From 12dd3629db42d7546e52023b797ce26d61fdee6f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 Jul 2022 18:07:36 +0200 Subject: [PATCH] 5.10-stable patches added patches: bpf-x86-respect-x86_feature_retpoline.patch bpf-x86-simplify-computing-label-offsets.patch crypto-x86-poly1305-fixup-sls.patch intel_idle-disable-ibrs-during-long-idle.patch kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch kvm-vmx-convert-launched-argument-to-flags.patch kvm-vmx-fix-ibrs-handling-after-vmexit.patch kvm-vmx-flatten-__vmx_vcpu_run.patch kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch objtool-add-alt_group-struct.patch objtool-add-elf_create_reloc-helper.patch objtool-add-elf_create_undef_symbol.patch objtool-add-entry-unret-validation.patch objtool-add-straight-line-speculation-validation.patch objtool-assume-only-elf-functions-do-sibling-calls.patch objtool-cache-instruction-relocs.patch objtool-classify-symbols.patch objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch objtool-correctly-handle-retpoline-thunk-calls.patch objtool-create-reloc-sections-implicitly.patch objtool-default-ignore-int3-for-unreachable.patch objtool-don-t-make-.altinstructions-writable.patch objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch objtool-extract-elf_strtab_concat.patch objtool-extract-elf_symbol_add.patch objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch objtool-fix-code-relocs-vs-weak-symbols.patch objtool-fix-objtool-regression-on-x32-systems.patch objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch objtool-fix-symbol-creation.patch objtool-fix-type-of-reloc-addend.patch objtool-handle-__sanitize_cov-tail-calls.patch objtool-handle-per-arch-retpoline-naming.patch objtool-introduce-cfi-hash.patch objtool-keep-track-of-retpoline-call-sites.patch objtool-make-.altinstructions-section-entry-size-consistent.patch objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch objtool-print-out-the-symbol-type-when-complaining-about-it.patch objtool-re-add-unwind_hint_-save_restore.patch objtool-refactor-orc-section-generation.patch objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch objtool-rework-the-elf_rebuild_reloc_section-logic.patch objtool-skip-magical-retpoline-.altinstr_replacement.patch objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch objtool-support-asm-jump-tables.patch objtool-support-retpoline-jump-detection-for-vmlinux.o.patch objtool-support-stack-layout-changes-in-alternatives.patch objtool-teach-get_alt_entry-about-more-relocation-types.patch objtool-treat-.text.__x86.-as-noinstr.patch objtool-update-retpoline-validation.patch objtool-x86-ignore-__x86_indirect_alt_-symbols.patch objtool-x86-replace-alternatives-with-.retpoline_sites.patch objtool-x86-rewrite-retpoline-thunk-calls.patch tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch x86-add-insn_decode_kernel.patch x86-add-magic-amd-return-thunk.patch x86-add-straight-line-speculation-mitigation.patch x86-alternative-add-debug-prints-to-apply_retpolines.patch x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch x86-alternative-implement-.retpoline_sites-support.patch x86-alternative-merge-include-files.patch x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch x86-alternative-relax-text_poke_bp-constraint.patch x86-alternative-support-alternative_ternary.patch x86-alternative-support-not-feature.patch x86-alternative-try-inline-spectre_v2-retpoline-amd.patch x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch x86-alternative-use-insn_decode.patch x86-alternatives-optimize-optimize_nops.patch x86-asm-fix-register-order.patch x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch x86-bpf-use-alternative-ret-encoding.patch x86-bugs-add-amd-retbleed-boot-parameter.patch x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch x86-bugs-add-retbleed-ibpb.patch x86-bugs-do-ibpb-fallback-check-only-once.patch x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch x86-bugs-enable-stibp-for-jmp2ret.patch x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch x86-bugs-optimize-spec_ctrl-msr-writes.patch x86-bugs-report-amd-retbleed-vulnerability.patch x86-bugs-report-intel-retbleed-vulnerability.patch x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch x86-common-stamp-out-the-stepping-madness.patch x86-cpu-amd-add-spectral-chicken.patch x86-cpu-amd-enumerate-btc_no.patch x86-cpufeatures-move-retpoline-flags-to-word-11.patch x86-entry-add-kernel-ibrs-implementation.patch x86-entry-remove-skip_r11rcx.patch x86-ftrace-use-alternative-ret-encoding.patch x86-insn-add-a-__ignore_sync_check__-marker.patch x86-insn-add-an-insn_decode-api.patch x86-insn-eval-handle-return-values-from-the-decoder.patch x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch x86-kexec-disable-ret-on-kexec.patch x86-kvm-fix-setcc-emulation-for-return-thunks.patch x86-kvm-vmx-make-noinstr-clean.patch x86-lib-atomic64_386_32-rename-things.patch x86-objtool-create-.return_sites.patch x86-prepare-asm-files-for-straight-line-speculation.patch x86-prepare-inline-asm-for-straight-line-speculation.patch x86-realmode-build-with-d__disable_exports.patch x86-retbleed-add-fine-grained-kconfig-knobs.patch x86-retpoline-cleanup-some-ifdefery.patch x86-retpoline-create-a-retpoline-thunk-array.patch x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch x86-retpoline-remove-unused-replacement-symbols.patch x86-retpoline-simplify-retpolines.patch x86-retpoline-swizzle-retpoline-thunk.patch x86-retpoline-use-mfunction-return.patch x86-sev-avoid-using-__x86_return_thunk.patch x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch x86-speculation-disable-rrsba-behavior.patch x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch x86-speculation-remove-x86_spec_ctrl_mask.patch x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch x86-static_call-serialize-__static_call_fixup-properly.patch x86-static_call-use-alternative-ret-encoding.patch x86-undo-return-thunk-damage.patch x86-use-return-thunk-in-asm-code.patch x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch x86-xen-rename-sys-entry-points.patch x86-xen-support-objtool-validation-in-xen-asm.s.patch x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch --- ...pf-x86-respect-x86_feature_retpoline.patch | 253 ++ ...x86-simplify-computing-label-offsets.patch | 261 ++ .../crypto-x86-poly1305-fixup-sls.patch | 210 ++ ...l_idle-disable-ibrs-during-long-idle.patch | 183 + ...-emulation-function-offsets-with-sls.patch | 110 + ...u_run-in-nested_vmx_check_vmentry_hw.patch | 112 + ...x-convert-launched-argument-to-flags.patch | 171 + ...m-vmx-fix-ibrs-handling-after-vmexit.patch | 39 + .../kvm-vmx-flatten-__vmx_vcpu_run.patch | 197 + ...est-rsb-poisoning-attacks-with-eibrs.patch | 241 ++ ...eg-instead-of-cmp-0-reg-in-vmenter.s.patch | 33 + ...ags-based-on-config_cc_is_-clang-gcc.patch | 40 + queue-5.10/objtool-add-alt_group-struct.patch | 129 + .../objtool-add-elf_create_reloc-helper.patch | 302 ++ .../objtool-add-elf_create_undef_symbol.patch | 103 + .../objtool-add-entry-unret-validation.patch | 533 +++ ...straight-line-speculation-validation.patch | 135 + ...-only-elf-functions-do-sibling-calls.patch | 121 + .../objtool-cache-instruction-relocs.patch | 95 + queue-5.10/objtool-classify-symbols.patch | 128 + ...hint_ret_offset-and-unwind_hint_func.patch | 239 ++ ...rrectly-handle-retpoline-thunk-calls.patch | 44 + ...ool-create-reloc-sections-implicitly.patch | 90 + ...-default-ignore-int3-for-unreachable.patch | 57 + ...don-t-make-.altinstructions-writable.patch | 40 + ...ifying-code-in-.altinstr_replacement.patch | 98 + .../objtool-extract-elf_strtab_concat.patch | 112 + .../objtool-extract-elf_symbol_add.patch | 112 + ...handling-for-elf_create_undef_symbol.patch | 73 + ...tool-fix-code-relocs-vs-weak-symbols.patch | 358 ++ ...ix-objtool-regression-on-x32-systems.patch | 103 + ...ation-for-kcov-tail-call-replacement.patch | 62 + queue-5.10/objtool-fix-symbol-creation.patch | 350 ++ .../objtool-fix-type-of-reloc-addend.patch | 92 + ...ool-handle-__sanitize_cov-tail-calls.patch | 256 ++ ...ool-handle-per-arch-retpoline-naming.patch | 77 + queue-5.10/objtool-introduce-cfi-hash.patch | 466 +++ ...l-keep-track-of-retpoline-call-sites.patch | 177 + ...ctions-section-entry-size-consistent.patch | 49 + ...-unconditional-retpoline-thunk-calls.patch | 43 + ...ymbol-type-when-complaining-about-it.patch | 65 + ...ool-re-add-unwind_hint_-save_restore.patch | 185 + ...tool-refactor-orc-section-generation.patch | 439 +++ ...-symbol-type-checks-in-get_alt_entry.patch | 91 + ...-the-elf_rebuild_reloc_section-logic.patch | 126 + ...ical-retpoline-.altinstr_replacement.patch | 51 + ...tions-when-adding-return-thunk-sites.patch | 37 + .../objtool-support-asm-jump-tables.patch | 59 + ...tpoline-jump-detection-for-vmlinux.o.patch | 51 + ...stack-layout-changes-in-alternatives.patch | 514 +++ ...lt_entry-about-more-relocation-types.patch | 94 + ...bjtool-treat-.text.__x86.-as-noinstr.patch | 37 + .../objtool-update-retpoline-validation.patch | 112 + ...6-ignore-__x86_indirect_alt_-symbols.patch | 45 + ...e-alternatives-with-.retpoline_sites.patch | 494 +++ ...ol-x86-rewrite-retpoline-thunk-calls.patch | 262 ++ queue-5.10/series | 130 + ...copies-used-in-perf-bench-mem-memcpy.patch | 120 + queue-5.10/x86-add-insn_decode_kernel.patch | 52 + .../x86-add-magic-amd-return-thunk.patch | 348 ++ ...straight-line-speculation-mitigation.patch | 200 + ...add-debug-prints-to-apply_retpolines.patch | 49 + ...handle-jcc-__x86_indirect_thunk_-reg.patch | 97 + ...e-implement-.retpoline_sites-support.patch | 283 ++ .../x86-alternative-merge-include-files.patch | 433 +++ ...e-byte-nops-at-an-arbitrary-position.patch | 134 + ...native-relax-text_poke_bp-constraint.patch | 172 + ...ernative-support-alternative_ternary.patch | 67 + .../x86-alternative-support-not-feature.patch | 91 + ...-try-inline-spectre_v2-retpoline-amd.patch | 97 + ...ternative_ternary-in-_static_cpu_has.patch | 79 + .../x86-alternative-use-insn_decode.patch | 40 + ...-alternatives-optimize-optimize_nops.patch | 216 ++ queue-5.10/x86-asm-fix-register-order.patch | 59 + ...m-fixup-odd-gen-for-each-reg.h-usage.patch | 53 + ...x86-bpf-use-alternative-ret-encoding.patch | 66 + ...bugs-add-amd-retbleed-boot-parameter.patch | 208 + ...n-lake-to-retbleed-affected-cpu-list.patch | 30 + queue-5.10/x86-bugs-add-retbleed-ibpb.patch | 255 ++ ...ugs-do-ibpb-fallback-check-only-once.patch | 49 + ...-on-entry-when-ibpb-is-not-supported.patch | 48 + .../x86-bugs-enable-stibp-for-jmp2ret.patch | 143 + ...-keep-a-per-cpu-ia32_spec_ctrl-value.patch | 119 + ...6-bugs-optimize-spec_ctrl-msr-writes.patch | 109 + ...gs-report-amd-retbleed-vulnerability.patch | 170 + ...-report-intel-retbleed-vulnerability.patch | 175 + ...nd-spectre_v2_user_select_mitigation.patch | 103 + ...ommon-stamp-out-the-stepping-madness.patch | 78 + .../x86-cpu-amd-add-spectral-chicken.patch | 108 + queue-5.10/x86-cpu-amd-enumerate-btc_no.patch | 86 + ...ures-move-retpoline-flags-to-word-11.patch | 52 + ...entry-add-kernel-ibrs-implementation.patch | 355 ++ queue-5.10/x86-entry-remove-skip_r11rcx.patch | 69 + ...-ftrace-use-alternative-ret-encoding.patch | 47 + ...n-add-a-__ignore_sync_check__-marker.patch | 199 + .../x86-insn-add-an-insn_decode-api.patch | 965 +++++ ...andle-return-values-from-the-decoder.patch | 117 + ...insn_decode-to-insn_decode_from_regs.patch | 82 + .../x86-kexec-disable-ret-on-kexec.patch | 173 + ...ix-setcc-emulation-for-return-thunks.patch | 99 + .../x86-kvm-vmx-make-noinstr-clean.patch | 75 + ...86-lib-atomic64_386_32-rename-things.patch | 248 ++ .../x86-objtool-create-.return_sites.patch | 200 + ...-files-for-straight-line-speculation.patch | 3331 +++++++++++++++++ ...ne-asm-for-straight-line-speculation.patch | 191 + ...almode-build-with-d__disable_exports.patch | 29 + ...bleed-add-fine-grained-kconfig-knobs.patch | 594 +++ .../x86-retpoline-cleanup-some-ifdefery.patch | 51 + ...oline-create-a-retpoline-thunk-array.patch | 105 + ...hunk-declarations-to-nospec-branch.h.patch | 73 + ...ne-remove-unused-replacement-symbols.patch | 97 + .../x86-retpoline-simplify-retpolines.patch | 217 ++ ...86-retpoline-swizzle-retpoline-thunk.patch | 41 + .../x86-retpoline-use-mfunction-return.patch | 79 + ...6-sev-avoid-using-__x86_return_thunk.patch | 48 + ...2-ibrs-option-to-support-kernel-ibrs.patch | 209 ++ ...6-speculation-disable-rrsba-behavior.patch | 154 + ...culation-fill-rsb-on-vmexit-for-ibrs.patch | 135 + ...ix-firmware-entry-spec_ctrl-handling.patch | 46 + ...-rsb-filling-with-config_retpoline-n.patch | 78 + ...-spec_ctrl-write-on-smt-state-change.patch | 34 + ...peculation-remove-x86_spec_ctrl_mask.patch | 88 + ...spec_ctrl-value-for-guest-entry-exit.patch | 57 + ...rialize-__static_call_fixup-properly.patch | 73 + ...ic_call-use-alternative-ret-encoding.patch | 184 + queue-5.10/x86-undo-return-thunk-damage.patch | 195 + .../x86-use-return-thunk-in-asm-code.patch | 95 + ...-don-t-use-ret-in-vsyscall-emulation.patch | 48 + .../x86-xen-rename-sys-entry-points.patch | 134 + ...port-objtool-validation-in-xen-asm.s.patch | 138 + ...l-vmlinux.o-validation-in-xen-head.s.patch | 42 + 131 files changed, 22670 insertions(+) create mode 100644 queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch create mode 100644 queue-5.10/bpf-x86-simplify-computing-label-offsets.patch create mode 100644 queue-5.10/crypto-x86-poly1305-fixup-sls.patch create mode 100644 queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch create mode 100644 queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch create mode 100644 queue-5.10/kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch create mode 100644 queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch create mode 100644 queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch create mode 100644 queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch create mode 100644 queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch create mode 100644 queue-5.10/kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch create mode 100644 queue-5.10/makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch create mode 100644 queue-5.10/objtool-add-alt_group-struct.patch create mode 100644 queue-5.10/objtool-add-elf_create_reloc-helper.patch create mode 100644 queue-5.10/objtool-add-elf_create_undef_symbol.patch create mode 100644 queue-5.10/objtool-add-entry-unret-validation.patch create mode 100644 queue-5.10/objtool-add-straight-line-speculation-validation.patch create mode 100644 queue-5.10/objtool-assume-only-elf-functions-do-sibling-calls.patch create mode 100644 queue-5.10/objtool-cache-instruction-relocs.patch create mode 100644 queue-5.10/objtool-classify-symbols.patch create mode 100644 queue-5.10/objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch create mode 100644 queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch create mode 100644 queue-5.10/objtool-create-reloc-sections-implicitly.patch create mode 100644 queue-5.10/objtool-default-ignore-int3-for-unreachable.patch create mode 100644 queue-5.10/objtool-don-t-make-.altinstructions-writable.patch create mode 100644 queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch create mode 100644 queue-5.10/objtool-extract-elf_strtab_concat.patch create mode 100644 queue-5.10/objtool-extract-elf_symbol_add.patch create mode 100644 queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch create mode 100644 queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch create mode 100644 queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch create mode 100644 queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch create mode 100644 queue-5.10/objtool-fix-symbol-creation.patch create mode 100644 queue-5.10/objtool-fix-type-of-reloc-addend.patch create mode 100644 queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch create mode 100644 queue-5.10/objtool-handle-per-arch-retpoline-naming.patch create mode 100644 queue-5.10/objtool-introduce-cfi-hash.patch create mode 100644 queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch create mode 100644 queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch create mode 100644 queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch create mode 100644 queue-5.10/objtool-print-out-the-symbol-type-when-complaining-about-it.patch create mode 100644 queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch create mode 100644 queue-5.10/objtool-refactor-orc-section-generation.patch create mode 100644 queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch create mode 100644 queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch create mode 100644 queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch create mode 100644 queue-5.10/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch create mode 100644 queue-5.10/objtool-support-asm-jump-tables.patch create mode 100644 queue-5.10/objtool-support-retpoline-jump-detection-for-vmlinux.o.patch create mode 100644 queue-5.10/objtool-support-stack-layout-changes-in-alternatives.patch create mode 100644 queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch create mode 100644 queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch create mode 100644 queue-5.10/objtool-update-retpoline-validation.patch create mode 100644 queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch create mode 100644 queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch create mode 100644 queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch create mode 100644 queue-5.10/series create mode 100644 queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch create mode 100644 queue-5.10/x86-add-insn_decode_kernel.patch create mode 100644 queue-5.10/x86-add-magic-amd-return-thunk.patch create mode 100644 queue-5.10/x86-add-straight-line-speculation-mitigation.patch create mode 100644 queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch create mode 100644 queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch create mode 100644 queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch create mode 100644 queue-5.10/x86-alternative-merge-include-files.patch create mode 100644 queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch create mode 100644 queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch create mode 100644 queue-5.10/x86-alternative-support-alternative_ternary.patch create mode 100644 queue-5.10/x86-alternative-support-not-feature.patch create mode 100644 queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch create mode 100644 queue-5.10/x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch create mode 100644 queue-5.10/x86-alternative-use-insn_decode.patch create mode 100644 queue-5.10/x86-alternatives-optimize-optimize_nops.patch create mode 100644 queue-5.10/x86-asm-fix-register-order.patch create mode 100644 queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch create mode 100644 queue-5.10/x86-bpf-use-alternative-ret-encoding.patch create mode 100644 queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch create mode 100644 queue-5.10/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch create mode 100644 queue-5.10/x86-bugs-add-retbleed-ibpb.patch create mode 100644 queue-5.10/x86-bugs-do-ibpb-fallback-check-only-once.patch create mode 100644 queue-5.10/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch create mode 100644 queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch create mode 100644 queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch create mode 100644 queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch create mode 100644 queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch create mode 100644 queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch create mode 100644 queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch create mode 100644 queue-5.10/x86-common-stamp-out-the-stepping-madness.patch create mode 100644 queue-5.10/x86-cpu-amd-add-spectral-chicken.patch create mode 100644 queue-5.10/x86-cpu-amd-enumerate-btc_no.patch create mode 100644 queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch create mode 100644 queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch create mode 100644 queue-5.10/x86-entry-remove-skip_r11rcx.patch create mode 100644 queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch create mode 100644 queue-5.10/x86-insn-add-a-__ignore_sync_check__-marker.patch create mode 100644 queue-5.10/x86-insn-add-an-insn_decode-api.patch create mode 100644 queue-5.10/x86-insn-eval-handle-return-values-from-the-decoder.patch create mode 100644 queue-5.10/x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch create mode 100644 queue-5.10/x86-kexec-disable-ret-on-kexec.patch create mode 100644 queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch create mode 100644 queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch create mode 100644 queue-5.10/x86-lib-atomic64_386_32-rename-things.patch create mode 100644 queue-5.10/x86-objtool-create-.return_sites.patch create mode 100644 queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch create mode 100644 queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch create mode 100644 queue-5.10/x86-realmode-build-with-d__disable_exports.patch create mode 100644 queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch create mode 100644 queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch create mode 100644 queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch create mode 100644 queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch create mode 100644 queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch create mode 100644 queue-5.10/x86-retpoline-simplify-retpolines.patch create mode 100644 queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch create mode 100644 queue-5.10/x86-retpoline-use-mfunction-return.patch create mode 100644 queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch create mode 100644 queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch create mode 100644 queue-5.10/x86-speculation-disable-rrsba-behavior.patch create mode 100644 queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch create mode 100644 queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch create mode 100644 queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch create mode 100644 queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch create mode 100644 queue-5.10/x86-speculation-remove-x86_spec_ctrl_mask.patch create mode 100644 queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch create mode 100644 queue-5.10/x86-static_call-serialize-__static_call_fixup-properly.patch create mode 100644 queue-5.10/x86-static_call-use-alternative-ret-encoding.patch create mode 100644 queue-5.10/x86-undo-return-thunk-damage.patch create mode 100644 queue-5.10/x86-use-return-thunk-in-asm-code.patch create mode 100644 queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch create mode 100644 queue-5.10/x86-xen-rename-sys-entry-points.patch create mode 100644 queue-5.10/x86-xen-support-objtool-validation-in-xen-asm.s.patch create mode 100644 queue-5.10/x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch diff --git a/queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch b/queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch new file mode 100644 index 00000000000..64ff53e7f9f --- /dev/null +++ b/queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch @@ -0,0 +1,253 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:48 +0200 +Subject: bpf,x86: Respect X86_FEATURE_RETPOLINE* + +From: Peter Zijlstra + +commit 87c87ecd00c54ecd677798cb49ef27329e0fab41 upstream. + +Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and +unconditionally emits a thunk call, this is sub-optimal and doesn't +match the regular, compiler generated, code. + +Update the i386 JIT to emit code equal to what the compiler emits for +the regular kernel text (IOW. a plain THUNK call). + +Update the x86_64 JIT to emit code similar to the result of compiler +and kernel rewrites as according to X86_FEATURE_RETPOLINE* flags. +Inlining RETPOLINE_AMD (lfence; jmp *%reg) and !RETPOLINE (jmp *%reg), +while doing a THUNK call for RETPOLINE. + +This removes the hard-coded retpoline thunks and shrinks the generated +code. Leaving a single retpoline thunk definition in the kernel. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Alexei Starovoitov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.614772675@infradead.org +[cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: add the necessary cnt variable to + emit_indirect_jump()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 59 ----------------------------------- + arch/x86/net/bpf_jit_comp.c | 49 +++++++++++++---------------- + arch/x86/net/bpf_jit_comp32.c | 22 +++++++++++-- + 3 files changed, 42 insertions(+), 88 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -318,63 +318,4 @@ static inline void mds_idle_clear_cpu_bu + + #endif /* __ASSEMBLY__ */ + +-/* +- * Below is used in the eBPF JIT compiler and emits the byte sequence +- * for the following assembly: +- * +- * With retpolines configured: +- * +- * callq do_rop +- * spec_trap: +- * pause +- * lfence +- * jmp spec_trap +- * do_rop: +- * mov %rcx,(%rsp) for x86_64 +- * mov %edx,(%esp) for x86_32 +- * retq +- * +- * Without retpolines configured: +- * +- * jmp *%rcx for x86_64 +- * jmp *%edx for x86_32 +- */ +-#ifdef CONFIG_RETPOLINE +-# ifdef CONFIG_X86_64 +-# define RETPOLINE_RCX_BPF_JIT_SIZE 17 +-# define RETPOLINE_RCX_BPF_JIT() \ +-do { \ +- EMIT1_off32(0xE8, 7); /* callq do_rop */ \ +- /* spec_trap: */ \ +- EMIT2(0xF3, 0x90); /* pause */ \ +- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ +- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ +- /* do_rop: */ \ +- EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \ +- EMIT1(0xC3); /* retq */ \ +-} while (0) +-# else /* !CONFIG_X86_64 */ +-# define RETPOLINE_EDX_BPF_JIT() \ +-do { \ +- EMIT1_off32(0xE8, 7); /* call do_rop */ \ +- /* spec_trap: */ \ +- EMIT2(0xF3, 0x90); /* pause */ \ +- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ +- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ +- /* do_rop: */ \ +- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ +- EMIT1(0xC3); /* ret */ \ +-} while (0) +-# endif +-#else /* !CONFIG_RETPOLINE */ +-# ifdef CONFIG_X86_64 +-# define RETPOLINE_RCX_BPF_JIT_SIZE 2 +-# define RETPOLINE_RCX_BPF_JIT() \ +- EMIT2(0xFF, 0xE1); /* jmp *%rcx */ +-# else /* !CONFIG_X86_64 */ +-# define RETPOLINE_EDX_BPF_JIT() \ +- EMIT2(0xFF, 0xE2) /* jmp *%edx */ +-# endif +-#endif +- + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -379,6 +379,26 @@ int bpf_arch_text_poke(void *ip, enum bp + return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); + } + ++#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) ++ ++static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) ++{ ++ u8 *prog = *pprog; ++ int cnt = 0; ++ ++#ifdef CONFIG_RETPOLINE ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { ++ EMIT_LFENCE(); ++ EMIT2(0xFF, 0xE0 + reg); ++ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { ++ emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); ++ } else ++#endif ++ EMIT2(0xFF, 0xE0 + reg); ++ ++ *pprog = prog; ++} ++ + /* + * Generate the following code: + * +@@ -460,7 +480,7 @@ static void emit_bpf_tail_call_indirect( + * rdi == ctx (1st arg) + * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET + */ +- RETPOLINE_RCX_BPF_JIT(); ++ emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start)); + + /* out: */ + ctx->tail_call_indirect_label = prog - start; +@@ -1099,8 +1119,7 @@ static int do_jit(struct bpf_prog *bpf_p + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) +- /* Emit 'lfence' */ +- EMIT3(0x0F, 0xAE, 0xE8); ++ EMIT_LFENCE(); + break; + + /* ST: *(u8*)(dst_reg + off) = imm */ +@@ -1878,26 +1897,6 @@ cleanup: + return ret; + } + +-static int emit_fallback_jump(u8 **pprog) +-{ +- u8 *prog = *pprog; +- int err = 0; +- +-#ifdef CONFIG_RETPOLINE +- /* Note that this assumes the the compiler uses external +- * thunks for indirect calls. Both clang and GCC use the same +- * naming convention for external thunks. +- */ +- err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); +-#else +- int cnt = 0; +- +- EMIT2(0xFF, 0xE2); /* jmp rdx */ +-#endif +- *pprog = prog; +- return err; +-} +- + static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) + { + u8 *jg_reloc, *prog = *pprog; +@@ -1919,9 +1918,7 @@ static int emit_bpf_dispatcher(u8 **ppro + if (err) + return err; + +- err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ +- if (err) +- return err; ++ emit_indirect_jump(&prog, 2 /* rdx */, prog); + + *pprog = prog; + return 0; +--- a/arch/x86/net/bpf_jit_comp32.c ++++ b/arch/x86/net/bpf_jit_comp32.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + + /* +@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u3 + *pprog = prog; + } + ++static int emit_jmp_edx(u8 **pprog, u8 *ip) ++{ ++ u8 *prog = *pprog; ++ int cnt = 0; ++ ++#ifdef CONFIG_RETPOLINE ++ EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5)); ++#else ++ EMIT2(0xFF, 0xE2); ++#endif ++ *pprog = prog; ++ ++ return cnt; ++} ++ + /* + * Generate the following code: + * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... +@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u3 + * goto *(prog->bpf_func + prologue_size); + * out: + */ +-static void emit_bpf_tail_call(u8 **pprog) ++static void emit_bpf_tail_call(u8 **pprog, u8 *ip) + { + u8 *prog = *pprog; + int cnt = 0; +@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **ppro + * eax == ctx (1st arg) + * edx == prog->bpf_func + prologue_size + */ +- RETPOLINE_EDX_BPF_JIT(); ++ cnt += emit_jmp_edx(&prog, ip + cnt); + + if (jmp_label1 == -1) + jmp_label1 = cnt; +@@ -1929,7 +1945,7 @@ static int do_jit(struct bpf_prog *bpf_p + break; + } + case BPF_JMP | BPF_TAIL_CALL: +- emit_bpf_tail_call(&prog); ++ emit_bpf_tail_call(&prog, image + addrs[i - 1]); + break; + + /* cond jump */ diff --git a/queue-5.10/bpf-x86-simplify-computing-label-offsets.patch b/queue-5.10/bpf-x86-simplify-computing-label-offsets.patch new file mode 100644 index 00000000000..7f05f0f585a --- /dev/null +++ b/queue-5.10/bpf-x86-simplify-computing-label-offsets.patch @@ -0,0 +1,261 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:47 +0200 +Subject: bpf,x86: Simplify computing label offsets + +From: Peter Zijlstra + +commit dceba0817ca329868a15e2e1dd46eb6340b69206 upstream. + +Take an idea from the 32bit JIT, which uses the multi-pass nature of +the JIT to compute the instruction offsets on a prior pass in order to +compute the relative jump offsets on a later pass. + +Application to the x86_64 JIT is slightly more involved because the +offsets depend on program variables (such as callee_regs_used and +stack_depth) and hence the computed offsets need to be kept in the +context of the JIT. + +This removes, IMO quite fragile, code that hard-codes the offsets and +tries to compute the length of variable parts of it. + +Convert both emit_bpf_tail_call_*() functions which have an out: label +at the end. Additionally emit_bpt_tail_call_direct() also has a poke +table entry, for which it computes the offset from the end (and thus +already relies on the previous pass to have computed addrs[i]), also +convert this to be a forward based offset. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Alexei Starovoitov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.552304864@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: keep the cnt variable in + emit_bpf_tail_call_{,in}direct()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/net/bpf_jit_comp.c | 125 ++++++++++++++------------------------------ + 1 file changed, 42 insertions(+), 83 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -212,6 +212,14 @@ static void jit_fill_hole(void *area, un + + struct jit_context { + int cleanup_addr; /* Epilogue code offset */ ++ ++ /* ++ * Program specific offsets of labels in the code; these rely on the ++ * JIT doing at least 2 passes, recording the position on the first ++ * pass, only to generate the correct offset on the second pass. ++ */ ++ int tail_call_direct_label; ++ int tail_call_indirect_label; + }; + + /* Maximum number of bytes emitted while JITing one eBPF insn */ +@@ -371,22 +379,6 @@ int bpf_arch_text_poke(void *ip, enum bp + return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); + } + +-static int get_pop_bytes(bool *callee_regs_used) +-{ +- int bytes = 0; +- +- if (callee_regs_used[3]) +- bytes += 2; +- if (callee_regs_used[2]) +- bytes += 2; +- if (callee_regs_used[1]) +- bytes += 2; +- if (callee_regs_used[0]) +- bytes += 1; +- +- return bytes; +-} +- + /* + * Generate the following code: + * +@@ -402,30 +394,12 @@ static int get_pop_bytes(bool *callee_re + * out: + */ + static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, +- u32 stack_depth) ++ u32 stack_depth, u8 *ip, ++ struct jit_context *ctx) + { + int tcc_off = -4 - round_up(stack_depth, 8); +- u8 *prog = *pprog; +- int pop_bytes = 0; +- int off1 = 42; +- int off2 = 31; +- int off3 = 9; +- int cnt = 0; +- +- /* count the additional bytes used for popping callee regs from stack +- * that need to be taken into account for each of the offsets that +- * are used for bailing out of the tail call +- */ +- pop_bytes = get_pop_bytes(callee_regs_used); +- off1 += pop_bytes; +- off2 += pop_bytes; +- off3 += pop_bytes; +- +- if (stack_depth) { +- off1 += 7; +- off2 += 7; +- off3 += 7; +- } ++ u8 *prog = *pprog, *start = *pprog; ++ int cnt = 0, offset; + + /* + * rdi - pointer to ctx +@@ -440,8 +414,9 @@ static void emit_bpf_tail_call_indirect( + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ + offsetof(struct bpf_array, map.max_entries)); +-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */ +- EMIT2(X86_JBE, OFFSET1); /* jbe out */ ++ ++ offset = ctx->tail_call_indirect_label - (prog + 2 - start); ++ EMIT2(X86_JBE, offset); /* jbe out */ + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) +@@ -449,8 +424,9 @@ static void emit_bpf_tail_call_indirect( + */ + EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ +-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE) +- EMIT2(X86_JA, OFFSET2); /* ja out */ ++ ++ offset = ctx->tail_call_indirect_label - (prog + 2 - start); ++ EMIT2(X86_JA, offset); /* ja out */ + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ + +@@ -463,12 +439,11 @@ static void emit_bpf_tail_call_indirect( + * goto out; + */ + EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */ +-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE) +- EMIT2(X86_JE, OFFSET3); /* je out */ + +- *pprog = prog; +- pop_callee_regs(pprog, callee_regs_used); +- prog = *pprog; ++ offset = ctx->tail_call_indirect_label - (prog + 2 - start); ++ EMIT2(X86_JE, offset); /* je out */ ++ ++ pop_callee_regs(&prog, callee_regs_used); + + EMIT1(0x58); /* pop rax */ + if (stack_depth) +@@ -488,39 +463,18 @@ static void emit_bpf_tail_call_indirect( + RETPOLINE_RCX_BPF_JIT(); + + /* out: */ ++ ctx->tail_call_indirect_label = prog - start; + *pprog = prog; + } + + static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, +- u8 **pprog, int addr, u8 *image, +- bool *callee_regs_used, u32 stack_depth) ++ u8 **pprog, u8 *ip, ++ bool *callee_regs_used, u32 stack_depth, ++ struct jit_context *ctx) + { + int tcc_off = -4 - round_up(stack_depth, 8); +- u8 *prog = *pprog; +- int pop_bytes = 0; +- int off1 = 20; +- int poke_off; +- int cnt = 0; +- +- /* count the additional bytes used for popping callee regs to stack +- * that need to be taken into account for jump offset that is used for +- * bailing out from of the tail call when limit is reached +- */ +- pop_bytes = get_pop_bytes(callee_regs_used); +- off1 += pop_bytes; +- +- /* +- * total bytes for: +- * - nop5/ jmpq $off +- * - pop callee regs +- * - sub rsp, $val if depth > 0 +- * - pop rax +- */ +- poke_off = X86_PATCH_SIZE + pop_bytes + 1; +- if (stack_depth) { +- poke_off += 7; +- off1 += 7; +- } ++ u8 *prog = *pprog, *start = *pprog; ++ int cnt = 0, offset; + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) +@@ -528,28 +482,30 @@ static void emit_bpf_tail_call_direct(st + */ + EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ +- EMIT2(X86_JA, off1); /* ja out */ ++ ++ offset = ctx->tail_call_direct_label - (prog + 2 - start); ++ EMIT2(X86_JA, offset); /* ja out */ + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ + +- poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE); ++ poke->tailcall_bypass = ip + (prog - start); + poke->adj_off = X86_TAIL_CALL_OFFSET; +- poke->tailcall_target = image + (addr - X86_PATCH_SIZE); ++ poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE; + poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE; + + emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, + poke->tailcall_bypass); + +- *pprog = prog; +- pop_callee_regs(pprog, callee_regs_used); +- prog = *pprog; ++ pop_callee_regs(&prog, callee_regs_used); + EMIT1(0x58); /* pop rax */ + if (stack_depth) + EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); + + memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); + prog += X86_PATCH_SIZE; ++ + /* out: */ ++ ctx->tail_call_direct_label = prog - start; + + *pprog = prog; + } +@@ -1274,13 +1230,16 @@ xadd: if (is_imm8(insn->off)) + case BPF_JMP | BPF_TAIL_CALL: + if (imm32) + emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], +- &prog, addrs[i], image, ++ &prog, image + addrs[i - 1], + callee_regs_used, +- bpf_prog->aux->stack_depth); ++ bpf_prog->aux->stack_depth, ++ ctx); + else + emit_bpf_tail_call_indirect(&prog, + callee_regs_used, +- bpf_prog->aux->stack_depth); ++ bpf_prog->aux->stack_depth, ++ image + addrs[i - 1], ++ ctx); + break; + + /* cond jump */ diff --git a/queue-5.10/crypto-x86-poly1305-fixup-sls.patch b/queue-5.10/crypto-x86-poly1305-fixup-sls.patch new file mode 100644 index 00000000000..f902493e9ee --- /dev/null +++ b/queue-5.10/crypto-x86-poly1305-fixup-sls.patch @@ -0,0 +1,210 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Thu, 24 Mar 2022 00:05:55 +0100 +Subject: crypto: x86/poly1305 - Fixup SLS + +From: Peter Zijlstra + +commit 7ed7aa4de9421229be6d331ed52d5cd09c99f409 upstream. + +Due to being a perl generated asm file, it got missed by the mass +convertion script. + +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_init_x86_64()+0x3a: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_x86_64()+0xf2: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_emit_x86_64()+0x37: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: __poly1305_block()+0x6d: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: __poly1305_init_avx()+0x1e8: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx()+0x18a: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx()+0xaf8: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_emit_avx()+0x99: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx2()+0x18a: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx2()+0x776: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x18a: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x796: missing int3 after ret +arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x10bd: missing int3 after ret + +Fixes: f94909ceb1ed ("x86: Prepare asm files for straight-line-speculation") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Herbert Xu +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 38 +++++++++++++------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl ++++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +@@ -297,7 +297,7 @@ ___ + $code.=<<___; + mov \$1,%eax + .Lno_key: +- ret ++ RET + ___ + &end_function("poly1305_init_x86_64"); + +@@ -373,7 +373,7 @@ $code.=<<___; + .cfi_adjust_cfa_offset -48 + .Lno_data: + .Lblocks_epilogue: +- ret ++ RET + .cfi_endproc + ___ + &end_function("poly1305_blocks_x86_64"); +@@ -399,7 +399,7 @@ $code.=<<___; + mov %rax,0($mac) # write result + mov %rcx,8($mac) + +- ret ++ RET + ___ + &end_function("poly1305_emit_x86_64"); + if ($avx) { +@@ -429,7 +429,7 @@ ___ + &poly1305_iteration(); + $code.=<<___; + pop $ctx +- ret ++ RET + .size __poly1305_block,.-__poly1305_block + + .type __poly1305_init_avx,\@abi-omnipotent +@@ -594,7 +594,7 @@ __poly1305_init_avx: + + lea -48-64($ctx),$ctx # size [de-]optimization + pop %rbp +- ret ++ RET + .size __poly1305_init_avx,.-__poly1305_init_avx + ___ + +@@ -747,7 +747,7 @@ $code.=<<___; + .cfi_restore %rbp + .Lno_data_avx: + .Lblocks_avx_epilogue: +- ret ++ RET + .cfi_endproc + + .align 32 +@@ -1452,7 +1452,7 @@ $code.=<<___ if (!$win64); + ___ + $code.=<<___; + vzeroupper +- ret ++ RET + .cfi_endproc + ___ + &end_function("poly1305_blocks_avx"); +@@ -1508,7 +1508,7 @@ $code.=<<___; + mov %rax,0($mac) # write result + mov %rcx,8($mac) + +- ret ++ RET + ___ + &end_function("poly1305_emit_avx"); + +@@ -1675,7 +1675,7 @@ $code.=<<___; + .cfi_restore %rbp + .Lno_data_avx2$suffix: + .Lblocks_avx2_epilogue$suffix: +- ret ++ RET + .cfi_endproc + + .align 32 +@@ -2201,7 +2201,7 @@ $code.=<<___ if (!$win64); + ___ + $code.=<<___; + vzeroupper +- ret ++ RET + .cfi_endproc + ___ + if($avx > 2 && $avx512) { +@@ -2792,7 +2792,7 @@ $code.=<<___ if (!$win64); + .cfi_def_cfa_register %rsp + ___ + $code.=<<___; +- ret ++ RET + .cfi_endproc + ___ + +@@ -2893,7 +2893,7 @@ $code.=<<___ if ($flavour =~ /elf32/); + ___ + $code.=<<___; + mov \$1,%eax +- ret ++ RET + .size poly1305_init_base2_44,.-poly1305_init_base2_44 + ___ + { +@@ -3010,7 +3010,7 @@ poly1305_blocks_vpmadd52: + jnz .Lblocks_vpmadd52_4x + + .Lno_data_vpmadd52: +- ret ++ RET + .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 + ___ + } +@@ -3451,7 +3451,7 @@ poly1305_blocks_vpmadd52_4x: + vzeroall + + .Lno_data_vpmadd52_4x: +- ret ++ RET + .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x + ___ + } +@@ -3824,7 +3824,7 @@ $code.=<<___; + vzeroall + + .Lno_data_vpmadd52_8x: +- ret ++ RET + .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x + ___ + } +@@ -3861,7 +3861,7 @@ poly1305_emit_base2_44: + mov %rax,0($mac) # write result + mov %rcx,8($mac) + +- ret ++ RET + .size poly1305_emit_base2_44,.-poly1305_emit_base2_44 + ___ + } } } +@@ -3916,7 +3916,7 @@ xor128_encrypt_n_pad: + + .Ldone_enc: + mov $otp,%rax +- ret ++ RET + .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad + + .globl xor128_decrypt_n_pad +@@ -3967,7 +3967,7 @@ xor128_decrypt_n_pad: + + .Ldone_dec: + mov $otp,%rax +- ret ++ RET + .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad + ___ + } +@@ -4109,7 +4109,7 @@ avx_handler: + pop %rbx + pop %rdi + pop %rsi +- ret ++ RET + .size avx_handler,.-avx_handler + + .section .pdata diff --git a/queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch b/queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch new file mode 100644 index 00000000000..836e6b9a857 --- /dev/null +++ b/queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch @@ -0,0 +1,183 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:58 +0200 +Subject: intel_idle: Disable IBRS during long idle + +From: Peter Zijlstra + +commit bf5835bcdb9635c97f85120dba9bfa21e111130f upstream. + +Having IBRS enabled while the SMT sibling is idle unnecessarily slows +down the running sibling. OTOH, disabling IBRS around idle takes two +MSR writes, which will increase the idle latency. + +Therefore, only disable IBRS around deeper idle states. Shallow idle +states are bounded by the tick in duration, since NOHZ is not allowed +for them by virtue of their short target residency. + +Only do this for mwait-driven idle, since that keeps interrupts disabled +across idle, which makes disabling IBRS vs IRQ-entry a non-issue. + +Note: C6 is a random threshold, most importantly C1 probably shouldn't +disable IBRS, benchmarking needed. + +Suggested-by: Tim Chen +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: no CPUIDLE_FLAG_IRQ_ENABLE] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 6 ++++ + drivers/idle/intel_idle.c | 43 ++++++++++++++++++++++++++++++----- + 3 files changed, 44 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -256,6 +256,7 @@ static inline void indirect_branch_predi + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; + extern void write_spec_ctrl_current(u64 val, bool force); ++extern u64 spec_ctrl_current(void); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -78,6 +78,12 @@ void write_spec_ctrl_current(u64 val, bo + wrmsrl(MSR_IA32_SPEC_CTRL, val); + } + ++u64 spec_ctrl_current(void) ++{ ++ return this_cpu_read(x86_spec_ctrl_current); ++} ++EXPORT_SYMBOL_GPL(spec_ctrl_current); ++ + /* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -47,11 +47,13 @@ + #include + #include + #include ++#include + #include + #include + #include + #include + #include ++#include + #include + #include + +@@ -94,6 +96,12 @@ static unsigned int mwait_substates __in + #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) + + /* ++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE ++ * above. ++ */ ++#define CPUIDLE_FLAG_IBRS BIT(16) ++ ++/* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) + * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. +@@ -132,6 +140,24 @@ static __cpuidle int intel_idle(struct c + return index; + } + ++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, ++ struct cpuidle_driver *drv, int index) ++{ ++ bool smt_active = sched_smt_active(); ++ u64 spec_ctrl = spec_ctrl_current(); ++ int ret; ++ ++ if (smt_active) ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ ++ ret = intel_idle(dev, drv, index); ++ ++ if (smt_active) ++ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); ++ ++ return ret; ++} ++ + /** + * intel_idle_s2idle - Ask the processor to enter the given idle state. + * @dev: cpuidle device of the target CPU. +@@ -653,7 +679,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C6", + .desc = "MWAIT 0x20", +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 85, + .target_residency = 200, + .enter = &intel_idle, +@@ -661,7 +687,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C7s", + .desc = "MWAIT 0x33", +- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 124, + .target_residency = 800, + .enter = &intel_idle, +@@ -669,7 +695,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C8", + .desc = "MWAIT 0x40", +- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 200, + .target_residency = 800, + .enter = &intel_idle, +@@ -677,7 +703,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C9", + .desc = "MWAIT 0x50", +- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 480, + .target_residency = 5000, + .enter = &intel_idle, +@@ -685,7 +711,7 @@ static struct cpuidle_state skl_cstates[ + { + .name = "C10", + .desc = "MWAIT 0x60", +- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 890, + .target_residency = 5000, + .enter = &intel_idle, +@@ -714,7 +740,7 @@ static struct cpuidle_state skx_cstates[ + { + .name = "C6", + .desc = "MWAIT 0x20", +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 133, + .target_residency = 600, + .enter = &intel_idle, +@@ -1501,6 +1527,11 @@ static void __init intel_idle_init_cstat + /* Structure copy. */ + drv->states[drv->state_count] = cpuidle_state_table[cstate]; + ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && ++ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { ++ drv->states[drv->state_count].enter = intel_idle_ibrs; ++ } ++ + if ((disabled_states_mask & BIT(drv->state_count)) || + ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && diff --git a/queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch b/queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch new file mode 100644 index 00000000000..b505df4f47b --- /dev/null +++ b/queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch @@ -0,0 +1,110 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Borislav Petkov +Date: Wed, 16 Mar 2022 22:05:52 +0100 +Subject: kvm/emulate: Fix SETcc emulation function offsets with SLS + +From: Borislav Petkov + +commit fe83f5eae432ccc8e90082d6ed506d5233547473 upstream. + +The commit in Fixes started adding INT3 after RETs as a mitigation +against straight-line speculation. + +The fastop SETcc implementation in kvm's insn emulator uses macro magic +to generate all possible SETcc functions and to jump to them when +emulating the respective instruction. + +However, it hardcodes the size and alignment of those functions to 4: a +three-byte SETcc insn and a single-byte RET. BUT, with SLS, there's an +INT3 that gets slapped after the RET, which brings the whole scheme out +of alignment: + + 15: 0f 90 c0 seto %al + 18: c3 ret + 19: cc int3 + 1a: 0f 1f 00 nopl (%rax) + 1d: 0f 91 c0 setno %al + 20: c3 ret + 21: cc int3 + 22: 0f 1f 00 nopl (%rax) + 25: 0f 92 c0 setb %al + 28: c3 ret + 29: cc int3 + +and this explodes like this: + + int3: 0000 [#1] PREEMPT SMP PTI + CPU: 0 PID: 2435 Comm: qemu-system-x86 Not tainted 5.17.0-rc8-sls #1 + Hardware name: Dell Inc. Precision WorkStation T3400 /0TP412, BIOS A14 04/30/2012 + RIP: 0010:setc+0x5/0x8 [kvm] + Code: 00 00 0f 1f 00 0f b6 05 43 24 06 00 c3 cc 0f 1f 80 00 00 00 00 0f 90 c0 c3 cc 0f \ + 1f 00 0f 91 c0 c3 cc 0f 1f 00 0f 92 c0 c3 cc <0f> 1f 00 0f 93 c0 c3 cc 0f 1f 00 \ + 0f 94 c0 c3 cc 0f 1f 00 0f 95 c0 + Call Trace: + + ? x86_emulate_insn [kvm] + ? x86_emulate_instruction [kvm] + ? vmx_handle_exit [kvm_intel] + ? kvm_arch_vcpu_ioctl_run [kvm] + ? kvm_vcpu_ioctl [kvm] + ? __x64_sys_ioctl + ? do_syscall_64 + ? entry_SYSCALL_64_after_hwframe + + +Raise the alignment value when SLS is enabled and use a macro for that +instead of hard-coding naked numbers. + +Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation") +Reported-by: Jamie Heilman +Signed-off-by: Borislav Petkov +Acked-by: Peter Zijlstra (Intel) +Tested-by: Jamie Heilman +Link: https://lore.kernel.org/r/YjGzJwjrvxg5YZ0Z@audible.transient.net +[Add a comment and a bit of safety checking, since this is going to be changed + again for IBT support. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -428,8 +428,23 @@ static int fastop(struct x86_emulate_ctx + FOP_END + + /* Special case for SETcc - 1 instruction per cc */ ++ ++/* ++ * Depending on .config the SETcc functions look like: ++ * ++ * SETcc %al [3 bytes] ++ * RET [1 byte] ++ * INT3 [1 byte; CONFIG_SLS] ++ * ++ * Which gives possible sizes 4 or 5. When rounded up to the ++ * next power-of-two alignment they become 4 or 8. ++ */ ++#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS)) ++#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS)) ++static_assert(SETCC_LENGTH <= SETCC_ALIGN); ++ + #define FOP_SETCC(op) \ +- ".align 4 \n\t" \ ++ ".align " __stringify(SETCC_ALIGN) " \n\t" \ + ".type " #op ", @function \n\t" \ + #op ": \n\t" \ + #op " %al \n\t" \ +@@ -1055,7 +1070,7 @@ static int em_bsr_c(struct x86_emulate_c + static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) + { + u8 rc; +- void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); ++ void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf); + + flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; + asm("push %[flags]; popf; " CALL_NOSPEC diff --git a/queue-5.10/kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch b/queue-5.10/kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch new file mode 100644 index 00000000000..2a859f52ffb --- /dev/null +++ b/queue-5.10/kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch @@ -0,0 +1,112 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Uros Bizjak +Date: Wed, 30 Dec 2020 16:26:57 -0800 +Subject: KVM/nVMX: Use __vmx_vcpu_run in nested_vmx_check_vmentry_hw + +From: Uros Bizjak + +commit 150f17bfab37e981ba03b37440638138ff2aa9ec upstream. + +Replace inline assembly in nested_vmx_check_vmentry_hw +with a call to __vmx_vcpu_run. The function is not +performance critical, so (double) GPR save/restore +in __vmx_vcpu_run can be tolerated, as far as performance +effects are concerned. + +Cc: Paolo Bonzini +Cc: Sean Christopherson +Reviewed-and-tested-by: Sean Christopherson +Signed-off-by: Uros Bizjak +[sean: dropped versioning info from changelog] +Signed-off-by: Sean Christopherson +Message-Id: <20201231002702.2223707-5-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 32 +++----------------------------- + arch/x86/kvm/vmx/vmenter.S | 2 +- + arch/x86/kvm/vmx/vmx.c | 2 -- + arch/x86/kvm/vmx/vmx.h | 1 + + 4 files changed, 5 insertions(+), 32 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -12,6 +12,7 @@ + #include "nested.h" + #include "pmu.h" + #include "trace.h" ++#include "vmx.h" + #include "x86.h" + + static bool __read_mostly enable_shadow_vmcs = 1; +@@ -3075,35 +3076,8 @@ static int nested_vmx_check_vmentry_hw(s + vmx->loaded_vmcs->host_state.cr4 = cr4; + } + +- asm( +- "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ +- "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" +- "je 1f \n\t" +- __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" +- "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" +- "1: \n\t" +- "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ +- +- /* Check if vmlaunch or vmresume is needed */ +- "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" +- +- /* +- * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set +- * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail +- * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the +- * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. +- */ +- "call vmx_vmenter\n\t" +- +- CC_SET(be) +- : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) +- : [HOST_RSP]"r"((unsigned long)HOST_RSP), +- [loaded_vmcs]"r"(vmx->loaded_vmcs), +- [launched]"i"(offsetof(struct loaded_vmcs, launched)), +- [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), +- [wordsize]"i"(sizeof(ulong)) +- : "memory" +- ); ++ vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, ++ vmx->loaded_vmcs->launched); + + if (vmx->msr_autoload.host.nr) + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -44,7 +44,7 @@ + * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump + * to vmx_vmexit. + */ +-SYM_FUNC_START(vmx_vmenter) ++SYM_FUNC_START_LOCAL(vmx_vmenter) + /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ + je 2f + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6687,8 +6687,6 @@ static fastpath_t vmx_exit_handlers_fast + } + } + +-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); +- + static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, + struct vcpu_vmx *vmx) + { +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -365,6 +365,7 @@ void vmx_set_virtual_apic_mode(struct kv + struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); + void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); + void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); ++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); + int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); + void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); + diff --git a/queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch b/queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch new file mode 100644 index 00000000000..8ba2501187b --- /dev/null +++ b/queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch @@ -0,0 +1,171 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:12 +0200 +Subject: KVM: VMX: Convert launched argument to flags + +From: Josh Poimboeuf + +commit bb06650634d3552c0f8557e9d16aa1a408040e28 upstream. + +Convert __vmx_vcpu_run()'s 'launched' argument to 'flags', in +preparation for doing SPEC_CTRL handling immediately after vmexit, which +will need another flag. + +This is much easier than adding a fourth argument, because this code +supports both 32-bit and 64-bit, and the fourth argument on 32-bit would +have to be pushed on the stack. + +Note that __vmx_vcpu_run_flags() is called outside of the noinstr +critical section because it will soon start calling potentially +traceable functions. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 2 +- + arch/x86/kvm/vmx/run_flags.h | 7 +++++++ + arch/x86/kvm/vmx/vmenter.S | 9 +++++---- + arch/x86/kvm/vmx/vmx.c | 17 ++++++++++++++--- + arch/x86/kvm/vmx/vmx.h | 5 ++++- + 5 files changed, 31 insertions(+), 9 deletions(-) + create mode 100644 arch/x86/kvm/vmx/run_flags.h + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -3077,7 +3077,7 @@ static int nested_vmx_check_vmentry_hw(s + } + + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, +- vmx->loaded_vmcs->launched); ++ __vmx_vcpu_run_flags(vmx)); + + if (vmx->msr_autoload.host.nr) + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); +--- /dev/null ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __KVM_X86_VMX_RUN_FLAGS_H ++#define __KVM_X86_VMX_RUN_FLAGS_H ++ ++#define VMX_RUN_VMRESUME (1 << 0) ++ ++#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include "run_flags.h" + + #define WORD_SIZE (BITS_PER_LONG / 8) + +@@ -34,7 +35,7 @@ + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode + * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @regs: unsigned long * (to guest registers) +- * @launched: %true if the VMCS has been launched ++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * + * Returns: + * 0 on VM-Exit, 1 on VM-Fail +@@ -59,7 +60,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + */ + push %_ASM_ARG2 + +- /* Copy @launched to BL, _ASM_ARG3 is volatile. */ ++ /* Copy @flags to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + + lea (%_ASM_SP), %_ASM_ARG2 +@@ -69,7 +70,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov (%_ASM_SP), %_ASM_AX + + /* Check if vmlaunch or vmresume is needed */ +- testb %bl, %bl ++ testb $VMX_RUN_VMRESUME, %bl + + /* Load guest registers. Don't clobber flags. */ + mov VCPU_RCX(%_ASM_AX), %_ASM_CX +@@ -92,7 +93,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + + /* Check EFLAGS.ZF from 'testb' above */ +- je .Lvmlaunch ++ jz .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -936,6 +936,16 @@ static bool msr_write_intercepted(struct + return true; + } + ++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) ++{ ++ unsigned int flags = 0; ++ ++ if (vmx->loaded_vmcs->launched) ++ flags |= VMX_RUN_VMRESUME; ++ ++ return flags; ++} ++ + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit) + { +@@ -6688,7 +6698,8 @@ static fastpath_t vmx_exit_handlers_fast + } + + static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, +- struct vcpu_vmx *vmx) ++ struct vcpu_vmx *vmx, ++ unsigned long flags) + { + /* + * VMENTER enables interrupts (host state), but the kernel state is +@@ -6725,7 +6736,7 @@ static noinstr void vmx_vcpu_enter_exit( + native_write_cr2(vcpu->arch.cr2); + + vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, +- vmx->loaded_vmcs->launched); ++ flags); + + vcpu->arch.cr2 = native_read_cr2(); + +@@ -6824,7 +6835,7 @@ reenter_guest: + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + + /* The actual VMENTER/EXIT is in the .noinstr.text section. */ +- vmx_vcpu_enter_exit(vcpu, vmx); ++ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); + + /* + * We do not use IBRS in the kernel. If this vCPU has used the +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -13,6 +13,7 @@ + #include "vmcs.h" + #include "vmx_ops.h" + #include "cpuid.h" ++#include "run_flags.h" + + extern const u32 vmx_msr_index[]; + +@@ -365,7 +366,9 @@ void vmx_set_virtual_apic_mode(struct kv + struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); + void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); + void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); ++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); ++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, ++ unsigned int flags); + int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); + void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); + diff --git a/queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch b/queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch new file mode 100644 index 00000000000..1ece0eafd07 --- /dev/null +++ b/queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch @@ -0,0 +1,39 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:14 +0200 +Subject: KVM: VMX: Fix IBRS handling after vmexit + +From: Josh Poimboeuf + +commit bea7e31a5caccb6fe8ed989c065072354f0ecb52 upstream. + +For legacy IBRS to work, the IBRS bit needs to be always re-written +after vmexit, even if it's already on. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6706,8 +6706,13 @@ void noinstr vmx_spec_ctrl_restore_host( + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. ++ * ++ * For legacy IBRS, the IBRS bit always needs to be written after ++ * transitioning from a less privileged predictor mode, regardless of ++ * whether the guest/host values differ. + */ +- if (vmx->spec_ctrl != hostval) ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || ++ vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); diff --git a/queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch b/queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch new file mode 100644 index 00000000000..add91ea3ec1 --- /dev/null +++ b/queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch @@ -0,0 +1,197 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:11 +0200 +Subject: KVM: VMX: Flatten __vmx_vcpu_run() + +From: Josh Poimboeuf + +commit 8bd200d23ec42d66ccd517a72dd0b9cc6132d2fd upstream. + +Move the vmx_vm{enter,exit}() functionality into __vmx_vcpu_run(). This +will make it easier to do the spec_ctrl handling before the first RET. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +[cascardo: remove ENDBR] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmenter.S | 118 +++++++++++++++++---------------------------- + 1 file changed, 45 insertions(+), 73 deletions(-) + +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -31,68 +31,6 @@ + .section .noinstr.text, "ax" + + /** +- * vmx_vmenter - VM-Enter the current loaded VMCS +- * +- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME +- * +- * Returns: +- * %RFLAGS.CF is set on VM-Fail Invalid +- * %RFLAGS.ZF is set on VM-Fail Valid +- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit +- * +- * Note that VMRESUME/VMLAUNCH fall-through and return directly if +- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump +- * to vmx_vmexit. +- */ +-SYM_FUNC_START_LOCAL(vmx_vmenter) +- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ +- je 2f +- +-1: vmresume +- RET +- +-2: vmlaunch +- RET +- +-3: cmpb $0, kvm_rebooting +- je 4f +- RET +-4: ud2 +- +- _ASM_EXTABLE(1b, 3b) +- _ASM_EXTABLE(2b, 3b) +- +-SYM_FUNC_END(vmx_vmenter) +- +-/** +- * vmx_vmexit - Handle a VMX VM-Exit +- * +- * Returns: +- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit +- * +- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump +- * here after hardware loads the host's state, i.e. this is the destination +- * referred to by VMCS.HOST_RIP. +- */ +-SYM_FUNC_START(vmx_vmexit) +-#ifdef CONFIG_RETPOLINE +- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE +- /* Preserve guest's RAX, it's used to stuff the RSB. */ +- push %_ASM_AX +- +- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ +- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +- +- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ +- or $1, %_ASM_AX +- +- pop %_ASM_AX +-.Lvmexit_skip_rsb: +-#endif +- RET +-SYM_FUNC_END(vmx_vmexit) +- +-/** + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode + * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @regs: unsigned long * (to guest registers) +@@ -124,8 +62,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + +- /* Adjust RSP to account for the CALL to vmx_vmenter(). */ +- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 ++ lea (%_ASM_SP), %_ASM_ARG2 + call vmx_update_host_rsp + + /* Load @regs to RAX. */ +@@ -154,11 +91,36 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Load guest RAX. This kills the @regs pointer! */ + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + +- /* Enter guest mode */ +- call vmx_vmenter ++ /* Check EFLAGS.ZF from 'testb' above */ ++ je .Lvmlaunch + +- /* Jump on VM-Fail. */ +- jbe 2f ++ /* ++ * After a successful VMRESUME/VMLAUNCH, control flow "magically" ++ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. ++ * So this isn't a typical function and objtool needs to be told to ++ * save the unwind state here and restore it below. ++ */ ++ UNWIND_HINT_SAVE ++ ++/* ++ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at ++ * the 'vmx_vmexit' label below. ++ */ ++.Lvmresume: ++ vmresume ++ jmp .Lvmfail ++ ++.Lvmlaunch: ++ vmlaunch ++ jmp .Lvmfail ++ ++ _ASM_EXTABLE(.Lvmresume, .Lfixup) ++ _ASM_EXTABLE(.Lvmlaunch, .Lfixup) ++ ++SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ++ ++ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ ++ UNWIND_HINT_RESTORE + + /* Temporarily save guest's RAX. */ + push %_ASM_AX +@@ -185,9 +147,13 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov %r15, VCPU_R15(%_ASM_AX) + #endif + ++ /* IMPORTANT: RSB must be stuffed before the first return. */ ++ FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ + /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ + xor %eax, %eax + ++.Lclear_regs: + /* + * Clear all general purpose registers except RSP and RAX to prevent + * speculative use of the guest's values, even those that are reloaded +@@ -197,7 +163,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + * free. RSP and RAX are exempt as RSP is restored by hardware during + * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + */ +-1: xor %ecx, %ecx ++ xor %ecx, %ecx + xor %edx, %edx + xor %ebx, %ebx + xor %ebp, %ebp +@@ -216,8 +182,8 @@ SYM_FUNC_START(__vmx_vcpu_run) + + /* "POP" @regs. */ + add $WORD_SIZE, %_ASM_SP +- pop %_ASM_BX + ++ pop %_ASM_BX + #ifdef CONFIG_X86_64 + pop %r12 + pop %r13 +@@ -230,9 +196,15 @@ SYM_FUNC_START(__vmx_vcpu_run) + pop %_ASM_BP + RET + +- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ +-2: mov $1, %eax +- jmp 1b ++.Lfixup: ++ cmpb $0, kvm_rebooting ++ jne .Lvmfail ++ ud2 ++.Lvmfail: ++ /* VM-Fail: set return value to 1 */ ++ mov $1, %eax ++ jmp .Lclear_regs ++ + SYM_FUNC_END(__vmx_vcpu_run) + + diff --git a/queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch b/queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch new file mode 100644 index 00000000000..cface601f83 --- /dev/null +++ b/queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch @@ -0,0 +1,241 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:13 +0200 +Subject: KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS + +From: Josh Poimboeuf + +commit fc02735b14fff8c6678b521d324ade27b1a3d4cf upstream. + +On eIBRS systems, the returns in the vmexit return path from +__vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks. + +Fix that by moving the post-vmexit spec_ctrl handling to immediately +after the vmexit. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 4 ++ + arch/x86/kvm/vmx/run_flags.h | 1 + arch/x86/kvm/vmx/vmenter.S | 49 +++++++++++++++++++++++++++-------- + arch/x86/kvm/vmx/vmx.c | 48 ++++++++++++++++++++-------------- + arch/x86/kvm/vmx/vmx.h | 1 + 6 files changed, 73 insertions(+), 31 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -275,6 +275,7 @@ static inline void indirect_branch_predi + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; ++extern u64 x86_spec_ctrl_current; + extern void write_spec_ctrl_current(u64 val, bool force); + extern u64 spec_ctrl_current(void); + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -186,6 +186,10 @@ void __init check_bugs(void) + #endif + } + ++/* ++ * NOTE: For VMX, this function is not called in the vmexit path. ++ * It uses vmx_spec_ctrl_restore_host() instead. ++ */ + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +--- a/arch/x86/kvm/vmx/run_flags.h ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -3,5 +3,6 @@ + #define __KVM_X86_VMX_RUN_FLAGS_H + + #define VMX_RUN_VMRESUME (1 << 0) ++#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) + + #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -33,9 +33,10 @@ + + /** + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode +- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) ++ * @vmx: struct vcpu_vmx * + * @regs: unsigned long * (to guest registers) +- * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH ++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH ++ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl + * + * Returns: + * 0 on VM-Exit, 1 on VM-Fail +@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run) + #endif + push %_ASM_BX + ++ /* Save @vmx for SPEC_CTRL handling */ ++ push %_ASM_ARG1 ++ ++ /* Save @flags for SPEC_CTRL handling */ ++ push %_ASM_ARG3 ++ + /* + * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and + * @regs is needed after VM-Exit to save the guest's register values. +@@ -148,25 +155,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL + mov %r15, VCPU_R15(%_ASM_AX) + #endif + +- /* IMPORTANT: RSB must be stuffed before the first return. */ +- FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +- +- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ +- xor %eax, %eax ++ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ ++ xor %ebx, %ebx + + .Lclear_regs: + /* +- * Clear all general purpose registers except RSP and RAX to prevent ++ * Clear all general purpose registers except RSP and RBX to prevent + * speculative use of the guest's values, even those that are reloaded + * via the stack. In theory, an L1 cache miss when restoring registers + * could lead to speculative execution with the guest's values. + * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially + * free. RSP and RAX are exempt as RSP is restored by hardware during +- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. ++ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return ++ * value. + */ ++ xor %eax, %eax + xor %ecx, %ecx + xor %edx, %edx +- xor %ebx, %ebx + xor %ebp, %ebp + xor %esi, %esi + xor %edi, %edi +@@ -184,6 +189,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL + /* "POP" @regs. */ + add $WORD_SIZE, %_ASM_SP + ++ /* ++ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before ++ * the first unbalanced RET after vmexit! ++ * ++ * For retpoline, RSB filling is needed to prevent poisoned RSB entries ++ * and (in some cases) RSB underflow. ++ * ++ * eIBRS has its own protection against poisoned RSB, so it doesn't ++ * need the RSB filling sequence. But it does need to be enabled ++ * before the first unbalanced RET. ++ */ ++ ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ ++ pop %_ASM_ARG2 /* @flags */ ++ pop %_ASM_ARG1 /* @vmx */ ++ ++ call vmx_spec_ctrl_restore_host ++ ++ /* Put return value in AX */ ++ mov %_ASM_BX, %_ASM_AX ++ + pop %_ASM_BX + #ifdef CONFIG_X86_64 + pop %r12 +@@ -203,7 +230,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL + ud2 + .Lvmfail: + /* VM-Fail: set return value to 1 */ +- mov $1, %eax ++ mov $1, %_ASM_BX + jmp .Lclear_regs + + SYM_FUNC_END(__vmx_vcpu_run) +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -943,6 +943,14 @@ unsigned int __vmx_vcpu_run_flags(struct + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + ++ /* ++ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free ++ * to change it directly without causing a vmexit. In that case read ++ * it after vmexit and store it in vmx->spec_ctrl. ++ */ ++ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) ++ flags |= VMX_RUN_SAVE_SPEC_CTRL; ++ + return flags; + } + +@@ -6685,6 +6693,26 @@ void noinstr vmx_update_host_rsp(struct + } + } + ++void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, ++ unsigned int flags) ++{ ++ u64 hostval = this_cpu_read(x86_spec_ctrl_current); ++ ++ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) ++ return; ++ ++ if (flags & VMX_RUN_SAVE_SPEC_CTRL) ++ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); ++ ++ /* ++ * If the guest/host SPEC_CTRL values differ, restore the host value. ++ */ ++ if (vmx->spec_ctrl != hostval) ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); ++ ++ barrier_nospec(); ++} ++ + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + { + switch (to_vmx(vcpu)->exit_reason.basic) { +@@ -6837,26 +6865,6 @@ reenter_guest: + /* The actual VMENTER/EXIT is in the .noinstr.text section. */ + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); + +- /* +- * We do not use IBRS in the kernel. If this vCPU has used the +- * SPEC_CTRL MSR it may have left it on; save the value and +- * turn it off. This is much more efficient than blindly adding +- * it to the atomic save/restore list. Especially as the former +- * (Saving guest MSRs on vmexit) doesn't even exist in KVM. +- * +- * For non-nested case: +- * If the L01 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- * +- * For nested case: +- * If the L02 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- */ +- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) +- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); +- +- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); +- + /* All fields are clean at this point */ + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_clean_fields |= +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -366,6 +366,7 @@ void vmx_set_virtual_apic_mode(struct kv + struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); + void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); + void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); ++void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); + unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); + bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); diff --git a/queue-5.10/kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch b/queue-5.10/kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch new file mode 100644 index 00000000000..64171a46680 --- /dev/null +++ b/queue-5.10/kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch @@ -0,0 +1,33 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Uros Bizjak +Date: Thu, 29 Oct 2020 15:04:57 +0100 +Subject: KVM/VMX: Use TEST %REG,%REG instead of CMP $0,%REG in vmenter.S + +From: Uros Bizjak + +commit 6c44221b05236cc65d76cb5dc2463f738edff39d upstream. + +Saves one byte in __vmx_vcpu_run for the same functionality. + +Cc: Paolo Bonzini +Cc: Sean Christopherson +Signed-off-by: Uros Bizjak +Message-Id: <20201029140457.126965-1-ubizjak@gmail.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmenter.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -132,7 +132,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov (%_ASM_SP), %_ASM_AX + + /* Check if vmlaunch or vmresume is needed */ +- cmpb $0, %bl ++ testb %bl, %bl + + /* Load guest registers. Don't clobber flags. */ + mov VCPU_RCX(%_ASM_AX), %_ASM_CX diff --git a/queue-5.10/makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch b/queue-5.10/makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch new file mode 100644 index 00000000000..130b6350f10 --- /dev/null +++ b/queue-5.10/makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Ben Hutchings +Date: Mon, 11 Jul 2022 00:31:38 +0200 +Subject: Makefile: Set retpoline cflags based on CONFIG_CC_IS_{CLANG,GCC} + +From: Ben Hutchings + +This was done as part of commit 7d73c3e9c51400d3e0e755488050804e4d44737a +"Makefile: remove stale cc-option checks" upstream, and is needed to +support backporting further retpoline changes. + +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + Makefile | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/Makefile ++++ b/Makefile +@@ -670,12 +670,14 @@ ifdef CONFIG_FUNCTION_TRACER + CC_FLAGS_FTRACE := -pg + endif + +-RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register +-RETPOLINE_VDSO_CFLAGS_GCC := -mindirect-branch=thunk-inline -mindirect-branch-register +-RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk +-RETPOLINE_VDSO_CFLAGS_CLANG := -mretpoline +-RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) +-RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_VDSO_CFLAGS_CLANG))) ++ifdef CONFIG_CC_IS_GCC ++RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) ++RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) ++endif ++ifdef CONFIG_CC_IS_CLANG ++RETPOLINE_CFLAGS := -mretpoline-external-thunk ++RETPOLINE_VDSO_CFLAGS := -mretpoline ++endif + export RETPOLINE_CFLAGS + export RETPOLINE_VDSO_CFLAGS + diff --git a/queue-5.10/objtool-add-alt_group-struct.patch b/queue-5.10/objtool-add-alt_group-struct.patch new file mode 100644 index 00000000000..96c7b98299d --- /dev/null +++ b/queue-5.10/objtool-add-alt_group-struct.patch @@ -0,0 +1,129 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Fri, 18 Dec 2020 14:19:32 -0600 +Subject: objtool: Add 'alt_group' struct + +From: Josh Poimboeuf + +commit b23cc71c62747f2e4c3e56138872cf47e1294f8a upstream. + +Create a new struct associated with each group of alternatives +instructions. This will help with the removal of fake jumps, and more +importantly with adding support for stack layout changes in +alternatives. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 29 +++++++++++++++++++++++------ + tools/objtool/check.h | 13 ++++++++++++- + 2 files changed, 35 insertions(+), 7 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1012,20 +1012,28 @@ static int handle_group_alt(struct objto + struct instruction *orig_insn, + struct instruction **new_insn) + { +- static unsigned int alt_group_next_index = 1; + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; +- unsigned int alt_group = alt_group_next_index++; ++ struct alt_group *orig_alt_group, *new_alt_group; + unsigned long dest_off; + ++ ++ orig_alt_group = malloc(sizeof(*orig_alt_group)); ++ if (!orig_alt_group) { ++ WARN("malloc failed"); ++ return -1; ++ } + last_orig_insn = NULL; + insn = orig_insn; + sec_for_each_insn_from(file, insn) { + if (insn->offset >= special_alt->orig_off + special_alt->orig_len) + break; + +- insn->alt_group = alt_group; ++ insn->alt_group = orig_alt_group; + last_orig_insn = insn; + } ++ orig_alt_group->orig_group = NULL; ++ orig_alt_group->first_insn = orig_insn; ++ orig_alt_group->last_insn = last_orig_insn; + + if (next_insn_same_sec(file, last_orig_insn)) { + fake_jump = malloc(sizeof(*fake_jump)); +@@ -1056,8 +1064,13 @@ static int handle_group_alt(struct objto + return 0; + } + ++ new_alt_group = malloc(sizeof(*new_alt_group)); ++ if (!new_alt_group) { ++ WARN("malloc failed"); ++ return -1; ++ } ++ + last_new_insn = NULL; +- alt_group = alt_group_next_index++; + insn = *new_insn; + sec_for_each_insn_from(file, insn) { + struct reloc *alt_reloc; +@@ -1069,7 +1082,7 @@ static int handle_group_alt(struct objto + + insn->ignore = orig_insn->ignore_alts; + insn->func = orig_insn->func; +- insn->alt_group = alt_group; ++ insn->alt_group = new_alt_group; + + /* + * Since alternative replacement code is copy/pasted by the +@@ -1118,6 +1131,10 @@ static int handle_group_alt(struct objto + return -1; + } + ++ new_alt_group->orig_group = orig_alt_group; ++ new_alt_group->first_insn = *new_insn; ++ new_alt_group->last_insn = last_new_insn; ++ + if (fake_jump) + list_add(&fake_jump->list, &last_new_insn->list); + +@@ -2440,7 +2457,7 @@ static int validate_return(struct symbol + static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn) + { + struct instruction *first_insn = insn; +- int alt_group = insn->alt_group; ++ struct alt_group *alt_group = insn->alt_group; + + sec_for_each_insn_continue(file, insn) { + if (insn->alt_group != alt_group) +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -19,6 +19,17 @@ struct insn_state { + s8 instr; + }; + ++struct alt_group { ++ /* ++ * Pointer from a replacement group to the original group. NULL if it ++ * *is* the original group. ++ */ ++ struct alt_group *orig_group; ++ ++ /* First and last instructions in the group */ ++ struct instruction *first_insn, *last_insn; ++}; ++ + struct instruction { + struct list_head list; + struct hlist_node hash; +@@ -34,7 +45,7 @@ struct instruction { + s8 instr; + u8 visited; + u8 ret_offset; +- int alt_group; ++ struct alt_group *alt_group; + struct symbol *call_dest; + struct instruction *jump_dest; + struct instruction *first_jump_src; diff --git a/queue-5.10/objtool-add-elf_create_reloc-helper.patch b/queue-5.10/objtool-add-elf_create_reloc-helper.patch new file mode 100644 index 00000000000..74d92a23b58 --- /dev/null +++ b/queue-5.10/objtool-add-elf_create_reloc-helper.patch @@ -0,0 +1,302 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:07 +0100 +Subject: objtool: Add elf_create_reloc() helper + +From: Peter Zijlstra + +commit ef47cc01cb4abcd760d8ac66b9361d6ade4d0846 upstream. + +We have 4 instances of adding a relocation. Create a common helper +to avoid growing even more. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151259.817438847@infradead.org +[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 43 +++++------------------- + tools/objtool/elf.c | 86 +++++++++++++++++++++++++++++++----------------- + tools/objtool/elf.h | 10 +++-- + tools/objtool/orc_gen.c | 30 +++------------- + 4 files changed, 79 insertions(+), 90 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -433,8 +433,7 @@ reachable: + + static int create_static_call_sections(struct objtool_file *file) + { +- struct section *sec, *reloc_sec; +- struct reloc *reloc; ++ struct section *sec; + struct static_call_site *site; + struct instruction *insn; + struct symbol *key_sym; +@@ -460,8 +459,7 @@ static int create_static_call_sections(s + if (!sec) + return -1; + +- reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); +- if (!reloc_sec) ++ if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) + return -1; + + idx = 0; +@@ -471,25 +469,11 @@ static int create_static_call_sections(s + memset(site, 0, sizeof(struct static_call_site)); + + /* populate reloc for 'addr' */ +- reloc = malloc(sizeof(*reloc)); +- +- if (!reloc) { +- perror("malloc"); ++ if (elf_add_reloc_to_insn(file->elf, sec, ++ idx * sizeof(struct static_call_site), ++ R_X86_64_PC32, ++ insn->sec, insn->offset)) + return -1; +- } +- memset(reloc, 0, sizeof(*reloc)); +- +- insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); +- if (!reloc->sym) { +- WARN_FUNC("static call tramp: missing containing symbol", +- insn->sec, insn->offset); +- return -1; +- } +- +- reloc->type = R_X86_64_PC32; +- reloc->offset = idx * sizeof(struct static_call_site); +- reloc->sec = reloc_sec; +- elf_add_reloc(file->elf, reloc); + + /* find key symbol */ + key_name = strdup(insn->call_dest->name); +@@ -526,18 +510,11 @@ static int create_static_call_sections(s + free(key_name); + + /* populate reloc for 'key' */ +- reloc = malloc(sizeof(*reloc)); +- if (!reloc) { +- perror("malloc"); ++ if (elf_add_reloc(file->elf, sec, ++ idx * sizeof(struct static_call_site) + 4, ++ R_X86_64_PC32, key_sym, ++ is_sibling_call(insn) * STATIC_CALL_SITE_TAIL)) + return -1; +- } +- memset(reloc, 0, sizeof(*reloc)); +- reloc->sym = key_sym; +- reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; +- reloc->type = R_X86_64_PC32; +- reloc->offset = idx * sizeof(struct static_call_site) + 4; +- reloc->sec = reloc_sec; +- elf_add_reloc(file->elf, reloc); + + idx++; + } +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -262,32 +262,6 @@ struct reloc *find_reloc_by_dest(const s + return find_reloc_by_dest_range(elf, sec, offset, 1); + } + +-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, +- struct reloc *reloc) +-{ +- if (sec->sym) { +- reloc->sym = sec->sym; +- reloc->addend = offset; +- return; +- } +- +- /* +- * The Clang assembler strips section symbols, so we have to reference +- * the function symbol instead: +- */ +- reloc->sym = find_symbol_containing(sec, offset); +- if (!reloc->sym) { +- /* +- * Hack alert. This happens when we need to reference the NOP +- * pad insn immediately after the function. +- */ +- reloc->sym = find_symbol_containing(sec, offset - 1); +- } +- +- if (reloc->sym) +- reloc->addend = offset - reloc->sym->offset; +-} +- + static int read_sections(struct elf *elf) + { + Elf_Scn *s = NULL; +@@ -524,14 +498,66 @@ err: + return -1; + } + +-void elf_add_reloc(struct elf *elf, struct reloc *reloc) ++int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, ++ unsigned int type, struct symbol *sym, int addend) + { +- struct section *sec = reloc->sec; ++ struct reloc *reloc; + +- list_add_tail(&reloc->list, &sec->reloc_list); ++ reloc = malloc(sizeof(*reloc)); ++ if (!reloc) { ++ perror("malloc"); ++ return -1; ++ } ++ memset(reloc, 0, sizeof(*reloc)); ++ ++ reloc->sec = sec->reloc; ++ reloc->offset = offset; ++ reloc->type = type; ++ reloc->sym = sym; ++ reloc->addend = addend; ++ ++ list_add_tail(&reloc->list, &sec->reloc->reloc_list); + elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); + +- sec->changed = true; ++ sec->reloc->changed = true; ++ ++ return 0; ++} ++ ++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, ++ unsigned long offset, unsigned int type, ++ struct section *insn_sec, unsigned long insn_off) ++{ ++ struct symbol *sym; ++ int addend; ++ ++ if (insn_sec->sym) { ++ sym = insn_sec->sym; ++ addend = insn_off; ++ ++ } else { ++ /* ++ * The Clang assembler strips section symbols, so we have to ++ * reference the function symbol instead: ++ */ ++ sym = find_symbol_containing(insn_sec, insn_off); ++ if (!sym) { ++ /* ++ * Hack alert. This happens when we need to reference ++ * the NOP pad insn immediately after the function. ++ */ ++ sym = find_symbol_containing(insn_sec, insn_off - 1); ++ } ++ ++ if (!sym) { ++ WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off); ++ return -1; ++ } ++ ++ addend = insn_off - sym->offset; ++ } ++ ++ return elf_add_reloc(elf, sec, offset, type, sym, addend); + } + + static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx) +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -123,7 +123,13 @@ static inline u32 reloc_hash(struct relo + struct elf *elf_open_read(const char *name, int flags); + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); + struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); +-void elf_add_reloc(struct elf *elf, struct reloc *reloc); ++ ++int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, ++ unsigned int type, struct symbol *sym, int addend); ++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, ++ unsigned long offset, unsigned int type, ++ struct section *insn_sec, unsigned long insn_off); ++ + int elf_write_insn(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int len, + const char *insn); +@@ -140,8 +146,6 @@ struct reloc *find_reloc_by_dest(const s + struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, + unsigned long offset, unsigned int len); + struct symbol *find_func_containing(struct section *sec, unsigned long offset); +-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, +- struct reloc *reloc); + + #define for_each_sec(file, sec) \ + list_for_each_entry(sec, &file->elf->sections, list) +--- a/tools/objtool/orc_gen.c ++++ b/tools/objtool/orc_gen.c +@@ -81,37 +81,20 @@ static int init_orc_entry(struct orc_ent + } + + static int write_orc_entry(struct elf *elf, struct section *orc_sec, +- struct section *ip_rsec, unsigned int idx, ++ struct section *ip_sec, unsigned int idx, + struct section *insn_sec, unsigned long insn_off, + struct orc_entry *o) + { + struct orc_entry *orc; +- struct reloc *reloc; + + /* populate ORC data */ + orc = (struct orc_entry *)orc_sec->data->d_buf + idx; + memcpy(orc, o, sizeof(*orc)); + + /* populate reloc for ip */ +- reloc = malloc(sizeof(*reloc)); +- if (!reloc) { +- perror("malloc"); ++ if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32, ++ insn_sec, insn_off)) + return -1; +- } +- memset(reloc, 0, sizeof(*reloc)); +- +- insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); +- if (!reloc->sym) { +- WARN("missing symbol for insn at offset 0x%lx", +- insn_off); +- return -1; +- } +- +- reloc->type = R_X86_64_PC32; +- reloc->offset = idx * sizeof(int); +- reloc->sec = ip_rsec; +- +- elf_add_reloc(elf, reloc); + + return 0; + } +@@ -150,7 +133,7 @@ static unsigned long alt_group_len(struc + + int orc_create(struct objtool_file *file) + { +- struct section *sec, *ip_rsec, *orc_sec; ++ struct section *sec, *orc_sec; + unsigned int nr = 0, idx = 0; + struct orc_list_entry *entry; + struct list_head orc_list; +@@ -239,13 +222,12 @@ int orc_create(struct objtool_file *file + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); + if (!sec) + return -1; +- ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); +- if (!ip_rsec) ++ if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) + return -1; + + /* Write ORC entries to sections: */ + list_for_each_entry(entry, &orc_list, list) { +- if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++, ++ if (write_orc_entry(file->elf, orc_sec, sec, idx++, + entry->insn_sec, entry->insn_off, + &entry->orc)) + return -1; diff --git a/queue-5.10/objtool-add-elf_create_undef_symbol.patch b/queue-5.10/objtool-add-elf_create_undef_symbol.patch new file mode 100644 index 00000000000..55b3afc2375 --- /dev/null +++ b/queue-5.10/objtool-add-elf_create_undef_symbol.patch @@ -0,0 +1,103 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:11 +0100 +Subject: objtool: Add elf_create_undef_symbol() + +From: Peter Zijlstra + +commit 2f2f7e47f0525cbaad5dd9675fd9d8aa8da12046 upstream. + +Allow objtool to create undefined symbols; this allows creating +relocations to symbols not currently in the symbol table. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151300.064743095@infradead.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/elf.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + tools/objtool/elf.h | 1 + 2 files changed, 61 insertions(+) + +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -766,6 +766,66 @@ static int elf_add_string(struct elf *el + return len; + } + ++struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) ++{ ++ struct section *symtab; ++ struct symbol *sym; ++ Elf_Data *data; ++ Elf_Scn *s; ++ ++ sym = malloc(sizeof(*sym)); ++ if (!sym) { ++ perror("malloc"); ++ return NULL; ++ } ++ memset(sym, 0, sizeof(*sym)); ++ ++ sym->name = strdup(name); ++ ++ sym->sym.st_name = elf_add_string(elf, NULL, sym->name); ++ if (sym->sym.st_name == -1) ++ return NULL; ++ ++ sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); ++ // st_other 0 ++ // st_shndx 0 ++ // st_value 0 ++ // st_size 0 ++ ++ symtab = find_section_by_name(elf, ".symtab"); ++ if (!symtab) { ++ WARN("can't find .symtab"); ++ return NULL; ++ } ++ ++ s = elf_getscn(elf->elf, symtab->idx); ++ if (!s) { ++ WARN_ELF("elf_getscn"); ++ return NULL; ++ } ++ ++ data = elf_newdata(s); ++ if (!data) { ++ WARN_ELF("elf_newdata"); ++ return NULL; ++ } ++ ++ data->d_buf = &sym->sym; ++ data->d_size = sizeof(sym->sym); ++ data->d_align = 1; ++ ++ sym->idx = symtab->len / sizeof(sym->sym); ++ ++ symtab->len += data->d_size; ++ symtab->changed = true; ++ ++ sym->sec = find_section_by_index(elf, 0); ++ ++ elf_add_symbol(elf, sym); ++ ++ return sym; ++} ++ + struct section *elf_create_section(struct elf *elf, const char *name, + unsigned int sh_flags, size_t entsize, int nr) + { +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -133,6 +133,7 @@ int elf_write_insn(struct elf *elf, stru + unsigned long offset, unsigned int len, + const char *insn); + int elf_write_reloc(struct elf *elf, struct reloc *reloc); ++struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); + int elf_write(struct elf *elf); + void elf_close(struct elf *elf); + diff --git a/queue-5.10/objtool-add-entry-unret-validation.patch b/queue-5.10/objtool-add-entry-unret-validation.patch new file mode 100644 index 00000000000..f65919c2ec8 --- /dev/null +++ b/queue-5.10/objtool-add-entry-unret-validation.patch @@ -0,0 +1,533 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:03 +0200 +Subject: objtool: Add entry UNRET validation + +From: Peter Zijlstra + +commit a09a6e2399ba0595c3042b3164f3ca68a3cff33e upstream. + +Since entry asm is tricky, add a validation pass that ensures the +retbleed mitigation has been done before the first actual RET +instruction. + +Entry points are those that either have UNWIND_HINT_ENTRY, which acts +as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or +those that have UWIND_HINT_IRET_REGS at +0. + +This is basically a variant of validate_branch() that is +intra-function and it will simply follow all branches from marked +entry points and ensures that all paths lead to ANNOTATE_UNRET_END. + +If a path hits RET or an indirection the path is a fail and will be +reported. + +There are 3 ANNOTATE_UNRET_END instances: + + - UNTRAIN_RET itself + - exception from-kernel; this path doesn't need UNTRAIN_RET + - all early exceptions; these also don't need UNTRAIN_RET + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: arch/x86/entry/entry_64.S no pt_regs return at .Lerror_entry_done_lfence] +[cascardo: tools/objtool/builtin-check.c no link option validation] +[cascardo: tools/objtool/check.c opts.ibt is ibt] +[cascardo: tools/objtool/include/objtool/builtin.h leave unret option as bool, no struct opts] +[cascardo: objtool is still called from scripts/link-vmlinux.sh] +[cascardo: no IBT support] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: + - In scripts/link-vmlinux.sh, use "test -n" instead of is_enabled + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 3 + arch/x86/entry/entry_64_compat.S | 6 - + arch/x86/include/asm/nospec-branch.h | 12 ++ + arch/x86/include/asm/unwind_hints.h | 4 + arch/x86/kernel/head_64.S | 5 + + arch/x86/xen/xen-asm.S | 10 +- + include/linux/objtool.h | 3 + scripts/link-vmlinux.sh | 3 + tools/include/linux/objtool.h | 3 + tools/objtool/builtin-check.c | 3 + tools/objtool/builtin.h | 2 + tools/objtool/check.c | 172 ++++++++++++++++++++++++++++++++++- + tools/objtool/check.h | 6 + + 13 files changed, 217 insertions(+), 15 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -93,7 +93,7 @@ SYM_CODE_END(native_usergs_sysret64) + */ + + SYM_CODE_START(entry_SYSCALL_64) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + + swapgs + /* tss.sp2 is scratch space. */ +@@ -1094,6 +1094,7 @@ SYM_CODE_START_LOCAL(error_entry) + */ + .Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY ++ ANNOTATE_UNRET_END + RET + + .Lbstep_iret: +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -49,7 +49,7 @@ + * 0(%ebp) arg6 + */ + SYM_CODE_START(entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + /* Interrupts are off on entry. */ + SWAPGS + +@@ -202,7 +202,7 @@ SYM_CODE_END(entry_SYSENTER_compat) + * 0(%esp) arg6 + */ + SYM_CODE_START(entry_SYSCALL_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + /* Interrupts are off on entry. */ + swapgs + +@@ -349,7 +349,7 @@ SYM_CODE_END(entry_SYSCALL_compat) + * ebp arg6 + */ + SYM_CODE_START(entry_INT80_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + /* + * Interrupts are off on entry. + */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -82,6 +82,17 @@ + #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + + /* ++ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should ++ * eventually turn into it's own annotation. ++ */ ++.macro ANNOTATE_UNRET_END ++#ifdef CONFIG_DEBUG_ENTRY ++ ANNOTATE_RETPOLINE_SAFE ++ nop ++#endif ++.endm ++ ++/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. +@@ -131,6 +142,7 @@ + */ + .macro UNTRAIN_RET + #ifdef CONFIG_RETPOLINE ++ ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ + "call zen_untrain_ret", X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB +--- a/arch/x86/include/asm/unwind_hints.h ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -11,6 +11,10 @@ + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + .endm + ++.macro UNWIND_HINT_ENTRY ++ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 ++.endm ++ + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 + .if \base == %rsp + .if \indirect +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -321,6 +321,8 @@ SYM_CODE_END(start_cpu0) + SYM_CODE_START_NOALIGN(vc_boot_ghcb) + UNWIND_HINT_IRET_REGS offset=8 + ++ ANNOTATE_UNRET_END ++ + /* Build pt_regs */ + PUSH_AND_CLEAR_REGS + +@@ -378,6 +380,7 @@ SYM_CODE_START(early_idt_handler_array) + SYM_CODE_END(early_idt_handler_array) + + SYM_CODE_START_LOCAL(early_idt_handler_common) ++ ANNOTATE_UNRET_END + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. +@@ -424,6 +427,8 @@ SYM_CODE_END(early_idt_handler_common) + SYM_CODE_START_NOALIGN(vc_no_ghcb) + UNWIND_HINT_IRET_REGS offset=8 + ++ ANNOTATE_UNRET_END ++ + /* Build pt_regs */ + PUSH_AND_CLEAR_REGS + +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -148,7 +148,7 @@ SYM_FUNC_END(xen_read_cr2_direct); + + .macro xen_pv_trap name + SYM_CODE_START(xen_\name) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + pop %rcx + pop %r11 + jmp \name +@@ -277,7 +277,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu + + /* Normal 64-bit system call target */ + SYM_CODE_START(xen_entry_SYSCALL_64) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + popq %rcx + popq %r11 + +@@ -296,7 +296,7 @@ SYM_CODE_END(xen_entry_SYSCALL_64) + + /* 32-bit compat syscall target */ + SYM_CODE_START(xen_entry_SYSCALL_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + popq %rcx + popq %r11 + +@@ -313,7 +313,7 @@ SYM_CODE_END(xen_entry_SYSCALL_compat) + + /* 32-bit compat sysenter target */ + SYM_CODE_START(xen_entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means + * that we don't need to guard against single step exceptions here. +@@ -336,7 +336,7 @@ SYM_CODE_END(xen_entry_SYSENTER_compat) + + SYM_CODE_START(xen_entry_SYSCALL_compat) + SYM_CODE_START(xen_entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -32,11 +32,14 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 + + #ifdef CONFIG_STACK_VALIDATION + +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -65,6 +65,9 @@ objtool_link() + + if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then + objtoolopt="check" ++ if [ -n "${CONFIG_RETPOLINE}" ]; then ++ objtoolopt="${objtoolopt} --unret" ++ fi + if [ -z "${CONFIG_FRAME_POINTER}" ]; then + objtoolopt="${objtoolopt} --no-fp" + fi +--- a/tools/include/linux/objtool.h ++++ b/tools/include/linux/objtool.h +@@ -32,11 +32,14 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 + + #ifdef CONFIG_STACK_VALIDATION + +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -19,7 +19,7 @@ + #include "objtool.h" + + bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +- validate_dup, vmlinux, sls; ++ validate_dup, vmlinux, sls, unret; + + static const char * const check_usage[] = { + "objtool check [] file.o", +@@ -30,6 +30,7 @@ const struct option check_options[] = { + OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), + OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), + OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), ++ OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), + OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), + OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), + OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), +--- a/tools/objtool/builtin.h ++++ b/tools/objtool/builtin.h +@@ -9,7 +9,7 @@ + + extern const struct option check_options[]; + extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +- validate_dup, vmlinux, sls; ++ validate_dup, vmlinux, sls, unret; + + extern int cmd_check(int argc, const char **argv); + extern int cmd_orc(int argc, const char **argv); +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1752,6 +1752,19 @@ static int read_unwind_hints(struct objt + + insn->hint = true; + ++ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { ++ struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); ++ ++ if (sym && sym->bind == STB_GLOBAL) { ++ insn->entry = 1; ++ } ++ } ++ ++ if (hint->type == UNWIND_HINT_TYPE_ENTRY) { ++ hint->type = UNWIND_HINT_TYPE_CALL; ++ insn->entry = 1; ++ } ++ + if (hint->type == UNWIND_HINT_TYPE_FUNC) { + insn->cfi = &func_cfi; + continue; +@@ -1800,8 +1813,9 @@ static int read_retpoline_hints(struct o + + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC && +- insn->type != INSN_RETURN) { +- WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", ++ insn->type != INSN_RETURN && ++ insn->type != INSN_NOP) { ++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop", + insn->sec, insn->offset); + return -1; + } +@@ -2818,8 +2832,8 @@ static int validate_branch(struct objtoo + return 1; + } + +- visited = 1 << state.uaccess; +- if (insn->visited) { ++ visited = VISITED_BRANCH << state.uaccess; ++ if (insn->visited & VISITED_BRANCH_MASK) { + if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) + return 1; + +@@ -3045,6 +3059,145 @@ static int validate_unwind_hints(struct + return warnings; + } + ++/* ++ * Validate rethunk entry constraint: must untrain RET before the first RET. ++ * ++ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes ++ * before an actual RET instruction. ++ */ ++static int validate_entry(struct objtool_file *file, struct instruction *insn) ++{ ++ struct instruction *next, *dest; ++ int ret, warnings = 0; ++ ++ for (;;) { ++ next = next_insn_to_validate(file, insn); ++ ++ if (insn->visited & VISITED_ENTRY) ++ return 0; ++ ++ insn->visited |= VISITED_ENTRY; ++ ++ if (!insn->ignore_alts && !list_empty(&insn->alts)) { ++ struct alternative *alt; ++ bool skip_orig = false; ++ ++ list_for_each_entry(alt, &insn->alts, list) { ++ if (alt->skip_orig) ++ skip_orig = true; ++ ++ ret = validate_entry(file, alt->insn); ++ if (ret) { ++ if (backtrace) ++ BT_FUNC("(alt)", insn); ++ return ret; ++ } ++ } ++ ++ if (skip_orig) ++ return 0; ++ } ++ ++ switch (insn->type) { ++ ++ case INSN_CALL_DYNAMIC: ++ case INSN_JUMP_DYNAMIC: ++ case INSN_JUMP_DYNAMIC_CONDITIONAL: ++ WARN_FUNC("early indirect call", insn->sec, insn->offset); ++ return 1; ++ ++ case INSN_JUMP_UNCONDITIONAL: ++ case INSN_JUMP_CONDITIONAL: ++ if (!is_sibling_call(insn)) { ++ if (!insn->jump_dest) { ++ WARN_FUNC("unresolved jump target after linking?!?", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ret = validate_entry(file, insn->jump_dest); ++ if (ret) { ++ if (backtrace) { ++ BT_FUNC("(branch%s)", insn, ++ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : ""); ++ } ++ return ret; ++ } ++ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL) ++ return 0; ++ ++ break; ++ } ++ ++ /* fallthrough */ ++ case INSN_CALL: ++ dest = find_insn(file, insn->call_dest->sec, ++ insn->call_dest->offset); ++ if (!dest) { ++ WARN("Unresolved function after linking!?: %s", ++ insn->call_dest->name); ++ return -1; ++ } ++ ++ ret = validate_entry(file, dest); ++ if (ret) { ++ if (backtrace) ++ BT_FUNC("(call)", insn); ++ return ret; ++ } ++ /* ++ * If a call returns without error, it must have seen UNTRAIN_RET. ++ * Therefore any non-error return is a success. ++ */ ++ return 0; ++ ++ case INSN_RETURN: ++ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset); ++ return 1; ++ ++ case INSN_NOP: ++ if (insn->retpoline_safe) ++ return 0; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!next) { ++ WARN_FUNC("teh end!", insn->sec, insn->offset); ++ return -1; ++ } ++ insn = next; ++ } ++ ++ return warnings; ++} ++ ++/* ++ * Validate that all branches starting at 'insn->entry' encounter UNRET_END ++ * before RET. ++ */ ++static int validate_unret(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ int ret, warnings = 0; ++ ++ for_each_insn(file, insn) { ++ if (!insn->entry) ++ continue; ++ ++ ret = validate_entry(file, insn); ++ if (ret < 0) { ++ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset); ++ return ret; ++ } ++ warnings += ret; ++ } ++ ++ return warnings; ++} ++ + static int validate_retpoline(struct objtool_file *file) + { + struct instruction *insn; +@@ -3312,6 +3465,17 @@ int check(struct objtool_file *file) + goto out; + warnings += ret; + ++ if (unret) { ++ /* ++ * Must be after validate_branch() and friends, it plays ++ * further games with insn->visited. ++ */ ++ ret = validate_unret(file); ++ if (ret < 0) ++ return ret; ++ warnings += ret; ++ } ++ + if (!warnings) { + ret = validate_reachable_instructions(file); + if (ret < 0) +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -48,6 +48,7 @@ struct instruction { + bool dead_end, ignore, ignore_alts; + bool hint; + bool retpoline_safe; ++ bool entry; + s8 instr; + u8 visited; + struct alt_group *alt_group; +@@ -62,6 +63,11 @@ struct instruction { + struct cfi_state *cfi; + }; + ++#define VISITED_BRANCH 0x01 ++#define VISITED_BRANCH_UACCESS 0x02 ++#define VISITED_BRANCH_MASK 0x03 ++#define VISITED_ENTRY 0x04 ++ + static inline bool is_static_jump(struct instruction *insn) + { + return insn->type == INSN_JUMP_CONDITIONAL || diff --git a/queue-5.10/objtool-add-straight-line-speculation-validation.patch b/queue-5.10/objtool-add-straight-line-speculation-validation.patch new file mode 100644 index 00000000000..4ac487691be --- /dev/null +++ b/queue-5.10/objtool-add-straight-line-speculation-validation.patch @@ -0,0 +1,135 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Sat, 4 Dec 2021 14:43:42 +0100 +Subject: objtool: Add straight-line-speculation validation + +From: Peter Zijlstra + +commit 1cc1e4c8aab4213bd4e6353dec2620476a233d6d upstream. + +Teach objtool to validate the straight-line-speculation constraints: + + - speculation trap after indirect calls + - speculation trap after RET + +Notable: when an instruction is annotated RETPOLINE_SAFE, indicating + speculation isn't a problem, also don't care about sls for that + instruction. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20211204134908.023037659@infradead.org +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +[bwh: Backported to 5.10: adjust filenames, context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch.h | 1 + + tools/objtool/arch/x86/decode.c | 13 +++++++++---- + tools/objtool/builtin-check.c | 4 +++- + tools/objtool/builtin.h | 3 ++- + tools/objtool/check.c | 14 ++++++++++++++ + 5 files changed, 29 insertions(+), 6 deletions(-) + +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -26,6 +26,7 @@ enum insn_type { + INSN_CLAC, + INSN_STD, + INSN_CLD, ++ INSN_TRAP, + INSN_OTHER, + }; + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -456,6 +456,11 @@ int arch_decode_instruction(const struct + + break; + ++ case 0xcc: ++ /* int3 */ ++ *type = INSN_TRAP; ++ break; ++ + case 0xe3: + /* jecxz/jrcxz */ + *type = INSN_JUMP_CONDITIONAL; +@@ -592,10 +597,10 @@ const char *arch_ret_insn(int len) + { + static const char ret[5][5] = { + { BYTE_RET }, +- { BYTE_RET, 0x90 }, +- { BYTE_RET, 0x66, 0x90 }, +- { BYTE_RET, 0x0f, 0x1f, 0x00 }, +- { BYTE_RET, 0x0f, 0x1f, 0x40, 0x00 }, ++ { BYTE_RET, 0xcc }, ++ { BYTE_RET, 0xcc, 0x90 }, ++ { BYTE_RET, 0xcc, 0x66, 0x90 }, ++ { BYTE_RET, 0xcc, 0x0f, 0x1f, 0x00 }, + }; + + if (len < 1 || len > 5) { +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -18,7 +18,8 @@ + #include "builtin.h" + #include "objtool.h" + +-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; ++bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, ++ validate_dup, vmlinux, sls; + + static const char * const check_usage[] = { + "objtool check [] file.o", +@@ -35,6 +36,7 @@ const struct option check_options[] = { + OPT_BOOLEAN('s', "stats", &stats, "print statistics"), + OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"), + OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), ++ OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), + OPT_END(), + }; + +--- a/tools/objtool/builtin.h ++++ b/tools/objtool/builtin.h +@@ -8,7 +8,8 @@ + #include + + extern const struct option check_options[]; +-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; ++extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, ++ validate_dup, vmlinux, sls; + + extern int cmd_check(int argc, const char **argv); + extern int cmd_orc(int argc, const char **argv); +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -2775,6 +2775,12 @@ static int validate_branch(struct objtoo + switch (insn->type) { + + case INSN_RETURN: ++ if (next_insn && next_insn->type == INSN_TRAP) { ++ next_insn->ignore = true; ++ } else if (sls && !insn->retpoline_safe) { ++ WARN_FUNC("missing int3 after ret", ++ insn->sec, insn->offset); ++ } + return validate_return(func, insn, &state); + + case INSN_CALL: +@@ -2818,6 +2824,14 @@ static int validate_branch(struct objtoo + break; + + case INSN_JUMP_DYNAMIC: ++ if (next_insn && next_insn->type == INSN_TRAP) { ++ next_insn->ignore = true; ++ } else if (sls && !insn->retpoline_safe) { ++ WARN_FUNC("missing int3 after indirect jump", ++ insn->sec, insn->offset); ++ } ++ ++ /* fallthrough */ + case INSN_JUMP_DYNAMIC_CONDITIONAL: + if (is_sibling_call(insn)) { + ret = validate_sibling_call(insn, &state); diff --git a/queue-5.10/objtool-assume-only-elf-functions-do-sibling-calls.patch b/queue-5.10/objtool-assume-only-elf-functions-do-sibling-calls.patch new file mode 100644 index 00000000000..373bc657840 --- /dev/null +++ b/queue-5.10/objtool-assume-only-elf-functions-do-sibling-calls.patch @@ -0,0 +1,121 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Thu, 21 Jan 2021 15:29:22 -0600 +Subject: objtool: Assume only ELF functions do sibling calls + +From: Josh Poimboeuf + +commit ecf11ba4d066fe527586c6edd6ca68457ca55cf4 upstream. + +There's an inconsistency in how sibling calls are detected in +non-function asm code, depending on the scope of the object. If the +target code is external to the object, objtool considers it a sibling +call. If the target code is internal but not a function, objtool +*doesn't* consider it a sibling call. + +This can cause some inconsistencies between per-object and vmlinux.o +validation. + +Instead, assume only ELF functions can do sibling calls. This generally +matches existing reality, and makes sibling call validation consistent +between vmlinux.o and per-object. + +Signed-off-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/0e9ab6f3628cc7bf3bde7aa6762d54d7df19ad78.1611263461.git.jpoimboe@redhat.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 36 ++++++++++++++++++++++-------------- + 1 file changed, 22 insertions(+), 14 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -109,15 +109,20 @@ static struct instruction *prev_insn_sam + + static bool is_sibling_call(struct instruction *insn) + { ++ /* ++ * Assume only ELF functions can make sibling calls. This ensures ++ * sibling call detection consistency between vmlinux.o and individual ++ * objects. ++ */ ++ if (!insn->func) ++ return false; ++ + /* An indirect jump is either a sibling call or a jump to a table. */ + if (insn->type == INSN_JUMP_DYNAMIC) + return list_empty(&insn->alts); + +- if (!is_static_jump(insn)) +- return false; +- + /* add_jump_destinations() sets insn->call_dest for sibling calls. */ +- return !!insn->call_dest; ++ return (is_static_jump(insn) && insn->call_dest); + } + + /* +@@ -788,7 +793,7 @@ static int add_jump_destinations(struct + continue; + + reloc = find_reloc_by_dest_range(file->elf, insn->sec, +- insn->offset, insn->len); ++ insn->offset, insn->len); + if (!reloc) { + dest_sec = insn->sec; + dest_off = arch_jump_destination(insn); +@@ -808,18 +813,21 @@ static int add_jump_destinations(struct + + insn->retpoline_safe = true; + continue; +- } else if (reloc->sym->sec->idx) { +- dest_sec = reloc->sym->sec; +- dest_off = reloc->sym->sym.st_value + +- arch_dest_reloc_offset(reloc->addend); +- } else { +- /* external sibling call */ ++ } else if (insn->func) { ++ /* internal or external sibling call (with reloc) */ + insn->call_dest = reloc->sym; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } + continue; ++ } else if (reloc->sym->sec->idx) { ++ dest_sec = reloc->sym->sec; ++ dest_off = reloc->sym->sym.st_value + ++ arch_dest_reloc_offset(reloc->addend); ++ } else { ++ /* non-func asm code jumping to another file */ ++ continue; + } + + insn->jump_dest = find_insn(file, dest_sec, dest_off); +@@ -868,7 +876,7 @@ static int add_jump_destinations(struct + } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && + insn->jump_dest->offset == insn->jump_dest->func->offset) { + +- /* internal sibling call */ ++ /* internal sibling call (without reloc) */ + insn->call_dest = insn->jump_dest->func; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, +@@ -2570,7 +2578,7 @@ static int validate_branch(struct objtoo + + case INSN_JUMP_CONDITIONAL: + case INSN_JUMP_UNCONDITIONAL: +- if (func && is_sibling_call(insn)) { ++ if (is_sibling_call(insn)) { + ret = validate_sibling_call(insn, &state); + if (ret) + return ret; +@@ -2592,7 +2600,7 @@ static int validate_branch(struct objtoo + + case INSN_JUMP_DYNAMIC: + case INSN_JUMP_DYNAMIC_CONDITIONAL: +- if (func && is_sibling_call(insn)) { ++ if (is_sibling_call(insn)) { + ret = validate_sibling_call(insn, &state); + if (ret) + return ret; diff --git a/queue-5.10/objtool-cache-instruction-relocs.patch b/queue-5.10/objtool-cache-instruction-relocs.patch new file mode 100644 index 00000000000..33986a8040a --- /dev/null +++ b/queue-5.10/objtool-cache-instruction-relocs.patch @@ -0,0 +1,95 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:13 +0100 +Subject: objtool: Cache instruction relocs + +From: Peter Zijlstra + +commit 7bd2a600f3e9d27286bbf23c83d599e9cc7cf245 upstream. + +Track the reloc of instructions in the new instruction->reloc field +to avoid having to look them up again later. + +( Technically x86 instructions can have two relocations, but not jumps + and calls, for which we're using this. ) + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151300.195441549@infradead.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 28 ++++++++++++++++++++++------ + tools/objtool/check.h | 1 + + 2 files changed, 23 insertions(+), 6 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -754,6 +754,25 @@ __weak bool arch_is_retpoline(struct sym + return false; + } + ++#define NEGATIVE_RELOC ((void *)-1L) ++ ++static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) ++{ ++ if (insn->reloc == NEGATIVE_RELOC) ++ return NULL; ++ ++ if (!insn->reloc) { ++ insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec, ++ insn->offset, insn->len); ++ if (!insn->reloc) { ++ insn->reloc = NEGATIVE_RELOC; ++ return NULL; ++ } ++ } ++ ++ return insn->reloc; ++} ++ + /* + * Find the destination instructions for all jumps. + */ +@@ -768,8 +787,7 @@ static int add_jump_destinations(struct + if (!is_static_jump(insn)) + continue; + +- reloc = find_reloc_by_dest_range(file->elf, insn->sec, +- insn->offset, insn->len); ++ reloc = insn_reloc(file, insn); + if (!reloc) { + dest_sec = insn->sec; + dest_off = arch_jump_destination(insn); +@@ -901,8 +919,7 @@ static int add_call_destinations(struct + if (insn->type != INSN_CALL) + continue; + +- reloc = find_reloc_by_dest_range(file->elf, insn->sec, +- insn->offset, insn->len); ++ reloc = insn_reloc(file, insn); + if (!reloc) { + dest_off = arch_jump_destination(insn); + insn->call_dest = find_call_destination(insn->sec, dest_off); +@@ -1085,8 +1102,7 @@ static int handle_group_alt(struct objto + * alternatives code can adjust the relative offsets + * accordingly. + */ +- alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec, +- insn->offset, insn->len); ++ alt_reloc = insn_reloc(file, insn); + if (alt_reloc && + !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { + +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -55,6 +55,7 @@ struct instruction { + struct instruction *jump_dest; + struct instruction *first_jump_src; + struct reloc *jump_table; ++ struct reloc *reloc; + struct list_head alts; + struct symbol *func; + struct list_head stack_ops; diff --git a/queue-5.10/objtool-classify-symbols.patch b/queue-5.10/objtool-classify-symbols.patch new file mode 100644 index 00000000000..b5957a534b1 --- /dev/null +++ b/queue-5.10/objtool-classify-symbols.patch @@ -0,0 +1,128 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:33 +0200 +Subject: objtool: Classify symbols + +From: Peter Zijlstra + +commit 1739c66eb7bd5f27f1b69a5a26e10e8327d1e136 upstream. + +In order to avoid calling str*cmp() on symbol names, over and over, do +them all once upfront and store the result. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.658539311@infradead.org +[cascardo: no pv_target on struct symbol, because of missing + db2b0c5d7b6f19b3c2cab08c531b65342eb5252b] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: objtool doesn't have any mcount handling] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 32 +++++++++++++++++++++----------- + tools/objtool/elf.h | 7 +++++-- + 2 files changed, 26 insertions(+), 13 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -889,8 +889,7 @@ static void add_call_dest(struct objtool + * so they need a little help, NOP out any KCOV calls from noinstr + * text. + */ +- if (insn->sec->noinstr && +- !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { ++ if (insn->sec->noinstr && insn->call_dest->kcov) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); +@@ -935,7 +934,7 @@ static int add_jump_destinations(struct + } else if (reloc->sym->type == STT_SECTION) { + dest_sec = reloc->sym->sec; + dest_off = arch_dest_reloc_offset(reloc->addend); +- } else if (arch_is_retpoline(reloc->sym)) { ++ } else if (reloc->sym->retpoline_thunk) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. +@@ -1076,7 +1075,7 @@ static int add_call_destinations(struct + + add_call_dest(file, insn, dest, false); + +- } else if (arch_is_retpoline(reloc->sym)) { ++ } else if (reloc->sym->retpoline_thunk) { + /* + * Retpoline calls are really dynamic calls in + * disguise, so convert them accordingly. +@@ -1733,17 +1732,28 @@ static int read_intra_function_calls(str + return 0; + } + +-static int read_static_call_tramps(struct objtool_file *file) ++static int classify_symbols(struct objtool_file *file) + { + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { +- if (func->bind == STB_GLOBAL && +- !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, ++ if (func->bind != STB_GLOBAL) ++ continue; ++ ++ if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + strlen(STATIC_CALL_TRAMP_PREFIX_STR))) + func->static_call_tramp = true; ++ ++ if (arch_is_retpoline(func)) ++ func->retpoline_thunk = true; ++ ++ if (!strcmp(func->name, "__fentry__")) ++ func->fentry = true; ++ ++ if (!strncmp(func->name, "__sanitizer_cov_", 16)) ++ func->kcov = true; + } + } + +@@ -1805,7 +1815,7 @@ static int decode_sections(struct objtoo + /* + * Must be before add_{jump_call}_destination. + */ +- ret = read_static_call_tramps(file); ++ ret = classify_symbols(file); + if (ret) + return ret; + +@@ -1863,9 +1873,9 @@ static int decode_sections(struct objtoo + + static bool is_fentry_call(struct instruction *insn) + { +- if (insn->type == INSN_CALL && insn->call_dest && +- insn->call_dest->type == STT_NOTYPE && +- !strcmp(insn->call_dest->name, "__fentry__")) ++ if (insn->type == INSN_CALL && ++ insn->call_dest && ++ insn->call_dest->fentry) + return true; + + return false; +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -55,8 +55,11 @@ struct symbol { + unsigned long offset; + unsigned int len; + struct symbol *pfunc, *cfunc, *alias; +- bool uaccess_safe; +- bool static_call_tramp; ++ u8 uaccess_safe : 1; ++ u8 static_call_tramp : 1; ++ u8 retpoline_thunk : 1; ++ u8 fentry : 1; ++ u8 kcov : 1; + }; + + struct reloc { diff --git a/queue-5.10/objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch b/queue-5.10/objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch new file mode 100644 index 00000000000..27797626f5c --- /dev/null +++ b/queue-5.10/objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch @@ -0,0 +1,239 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Thu, 21 Jan 2021 15:29:24 -0600 +Subject: objtool: Combine UNWIND_HINT_RET_OFFSET and UNWIND_HINT_FUNC + +From: Josh Poimboeuf + +commit b735bd3e68824316655252a931a3353a6ebc036f upstream. + +The ORC metadata generated for UNWIND_HINT_FUNC isn't actually very +func-like. With certain usages it can cause stack state mismatches +because it doesn't set the return address (CFI_RA). + +Also, users of UNWIND_HINT_RET_OFFSET no longer need to set a custom +return stack offset. Instead they just need to specify a func-like +situation, so the current ret_offset code is hacky for no good reason. + +Solve both problems by simplifying the RET_OFFSET handling and +converting it into a more useful UNWIND_HINT_FUNC. + +If we end up needing the old 'ret_offset' functionality again in the +future, we should be able to support it pretty easily with the addition +of a custom 'sp_offset' in UNWIND_HINT_FUNC. + +Signed-off-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/db9d1f5d79dddfbb3725ef6d8ec3477ad199948d.1611263462.git.jpoimboe@redhat.com +[bwh: Backported to 5.10: + - Don't use bswap_if_needed() since we don't have any of the other fixes + for mixed-endian cross-compilation + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/unwind_hints.h | 13 +----------- + arch/x86/kernel/ftrace_64.S | 2 - + arch/x86/lib/retpoline.S | 2 - + include/linux/objtool.h | 5 +++- + tools/include/linux/objtool.h | 5 +++- + tools/objtool/arch/x86/decode.c | 4 +-- + tools/objtool/check.c | 37 ++++++++++++++---------------------- + tools/objtool/check.h | 1 + 8 files changed, 29 insertions(+), 40 deletions(-) + +--- a/arch/x86/include/asm/unwind_hints.h ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -48,17 +48,8 @@ + UNWIND_HINT_REGS base=\base offset=\offset partial=1 + .endm + +-.macro UNWIND_HINT_FUNC sp_offset=8 +- UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL +-.endm +- +-/* +- * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN +- * and sibling calls. On these, sp_offset denotes the expected offset from +- * initial_func_cfi. +- */ +-.macro UNWIND_HINT_RET_OFFSET sp_offset=8 +- UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset ++.macro UNWIND_HINT_FUNC ++ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC + .endm + + #endif /* __ASSEMBLY__ */ +--- a/arch/x86/kernel/ftrace_64.S ++++ b/arch/x86/kernel/ftrace_64.S +@@ -265,7 +265,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, + restore_mcount_regs 8 + /* Restore flags */ + popfq +- UNWIND_HINT_RET_OFFSET ++ UNWIND_HINT_FUNC + jmp ftrace_epilogue + + SYM_FUNC_END(ftrace_regs_caller) +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -28,7 +28,7 @@ SYM_FUNC_START_NOALIGN(__x86_retpoline_\ + jmp .Lspec_trap_\@ + .Ldo_rop_\@: + mov %\reg, (%_ASM_SP) +- UNWIND_HINT_RET_OFFSET ++ UNWIND_HINT_FUNC + ret + SYM_FUNC_END(__x86_retpoline_\reg) + +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -29,11 +29,14 @@ struct unwind_hint { + * + * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that + * sp_reg+sp_offset points to the iret return frame. ++ * ++ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. ++ * Useful for code which doesn't have an ELF function annotation. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 +-#define UNWIND_HINT_TYPE_RET_OFFSET 3 ++#define UNWIND_HINT_TYPE_FUNC 3 + + #ifdef CONFIG_STACK_VALIDATION + +--- a/tools/include/linux/objtool.h ++++ b/tools/include/linux/objtool.h +@@ -29,11 +29,14 @@ struct unwind_hint { + * + * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that + * sp_reg+sp_offset points to the iret return frame. ++ * ++ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. ++ * Useful for code which doesn't have an ELF function annotation. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 +-#define UNWIND_HINT_TYPE_RET_OFFSET 3 ++#define UNWIND_HINT_TYPE_FUNC 3 + + #ifdef CONFIG_STACK_VALIDATION + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -563,8 +563,8 @@ void arch_initial_func_cfi_state(struct + state->cfa.offset = 8; + + /* initial RA (return address) */ +- state->regs[16].base = CFI_CFA; +- state->regs[16].offset = -8; ++ state->regs[CFI_RA].base = CFI_CFA; ++ state->regs[CFI_RA].offset = -8; + } + + const char *arch_nop_insn(int len) +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1423,13 +1423,20 @@ static int add_jump_table_alts(struct ob + return 0; + } + ++static void set_func_state(struct cfi_state *state) ++{ ++ state->cfa = initial_func_cfi.cfa; ++ memcpy(&state->regs, &initial_func_cfi.regs, ++ CFI_NUM_REGS * sizeof(struct cfi_reg)); ++ state->stack_size = initial_func_cfi.cfa.offset; ++} ++ + static int read_unwind_hints(struct objtool_file *file) + { + struct section *sec, *relocsec; + struct reloc *reloc; + struct unwind_hint *hint; + struct instruction *insn; +- struct cfi_reg *cfa; + int i; + + sec = find_section_by_name(file->elf, ".discard.unwind_hints"); +@@ -1464,22 +1471,20 @@ static int read_unwind_hints(struct objt + return -1; + } + +- cfa = &insn->cfi.cfa; ++ insn->hint = true; + +- if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { +- insn->ret_offset = hint->sp_offset; ++ if (hint->type == UNWIND_HINT_TYPE_FUNC) { ++ set_func_state(&insn->cfi); + continue; + } + +- insn->hint = true; +- + if (arch_decode_hint_reg(insn, hint->sp_reg)) { + WARN_FUNC("unsupported unwind_hint sp base reg %d", + insn->sec, insn->offset, hint->sp_reg); + return -1; + } + +- cfa->offset = hint->sp_offset; ++ insn->cfi.cfa.offset = hint->sp_offset; + insn->cfi.type = hint->type; + insn->cfi.end = hint->end; + } +@@ -1742,27 +1747,18 @@ static bool is_fentry_call(struct instru + + static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state) + { +- u8 ret_offset = insn->ret_offset; + struct cfi_state *cfi = &state->cfi; + int i; + + if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap) + return true; + +- if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset) ++ if (cfi->cfa.offset != initial_func_cfi.cfa.offset) + return true; + +- if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset) ++ if (cfi->stack_size != initial_func_cfi.cfa.offset) + return true; + +- /* +- * If there is a ret offset hint then don't check registers +- * because a callee-saved register might have been pushed on +- * the stack. +- */ +- if (ret_offset) +- return false; +- + for (i = 0; i < CFI_NUM_REGS; i++) { + if (cfi->regs[i].base != initial_func_cfi.regs[i].base || + cfi->regs[i].offset != initial_func_cfi.regs[i].offset) +@@ -2863,10 +2859,7 @@ static int validate_section(struct objto + continue; + + init_insn_state(&state, sec); +- state.cfi.cfa = initial_func_cfi.cfa; +- memcpy(&state.cfi.regs, &initial_func_cfi.regs, +- CFI_NUM_REGS * sizeof(struct cfi_reg)); +- state.cfi.stack_size = initial_func_cfi.cfa.offset; ++ set_func_state(&state.cfi); + + warnings += validate_symbol(file, sec, func, &state); + } +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -50,7 +50,6 @@ struct instruction { + bool retpoline_safe; + s8 instr; + u8 visited; +- u8 ret_offset; + struct alt_group *alt_group; + struct symbol *call_dest; + struct instruction *jump_dest; diff --git a/queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch b/queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch new file mode 100644 index 00000000000..f5e28041f11 --- /dev/null +++ b/queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch @@ -0,0 +1,44 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:03 +0100 +Subject: objtool: Correctly handle retpoline thunk calls + +From: Peter Zijlstra + +commit bcb1b6ff39da7e8a6a986eb08126fba2b5e13c32 upstream. + +Just like JMP handling, convert a direct CALL to a retpoline thunk +into a retpoline safe indirect CALL. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151259.567568238@infradead.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -953,6 +953,18 @@ static int add_call_destinations(struct + dest_off); + return -1; + } ++ ++ } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { ++ /* ++ * Retpoline calls are really dynamic calls in ++ * disguise, so convert them accordingly. ++ */ ++ insn->type = INSN_CALL_DYNAMIC; ++ insn->retpoline_safe = true; ++ ++ remove_insn_ops(insn); ++ continue; ++ + } else + insn->call_dest = reloc->sym; + diff --git a/queue-5.10/objtool-create-reloc-sections-implicitly.patch b/queue-5.10/objtool-create-reloc-sections-implicitly.patch new file mode 100644 index 00000000000..029650d4fbe --- /dev/null +++ b/queue-5.10/objtool-create-reloc-sections-implicitly.patch @@ -0,0 +1,90 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:08 +0100 +Subject: objtool: Create reloc sections implicitly + +From: Peter Zijlstra + +commit d0c5c4cc73da0b05b0d9e5f833f2d859e1b45f8e upstream. + +Have elf_add_reloc() create the relocation section implicitly. + +Suggested-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151259.880174448@infradead.org +[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 3 --- + tools/objtool/elf.c | 9 ++++++++- + tools/objtool/elf.h | 1 - + tools/objtool/orc_gen.c | 2 -- + 4 files changed, 8 insertions(+), 7 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -459,9 +459,6 @@ static int create_static_call_sections(s + if (!sec) + return -1; + +- if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) +- return -1; +- + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) { + +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -498,11 +498,18 @@ err: + return -1; + } + ++static struct section *elf_create_reloc_section(struct elf *elf, ++ struct section *base, ++ int reltype); ++ + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, + unsigned int type, struct symbol *sym, int addend) + { + struct reloc *reloc; + ++ if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA)) ++ return -1; ++ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); +@@ -880,7 +887,7 @@ static struct section *elf_create_rela_r + return sec; + } + +-struct section *elf_create_reloc_section(struct elf *elf, ++static struct section *elf_create_reloc_section(struct elf *elf, + struct section *base, + int reltype) + { +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -122,7 +122,6 @@ static inline u32 reloc_hash(struct relo + + struct elf *elf_open_read(const char *name, int flags); + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); +-struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); + + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, + unsigned int type, struct symbol *sym, int addend); +--- a/tools/objtool/orc_gen.c ++++ b/tools/objtool/orc_gen.c +@@ -222,8 +222,6 @@ int orc_create(struct objtool_file *file + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); + if (!sec) + return -1; +- if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) +- return -1; + + /* Write ORC entries to sections: */ + list_for_each_entry(entry, &orc_list, list) { diff --git a/queue-5.10/objtool-default-ignore-int3-for-unreachable.patch b/queue-5.10/objtool-default-ignore-int3-for-unreachable.patch new file mode 100644 index 00000000000..32cd3bca9f4 --- /dev/null +++ b/queue-5.10/objtool-default-ignore-int3-for-unreachable.patch @@ -0,0 +1,57 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 8 Mar 2022 16:30:14 +0100 +Subject: objtool: Default ignore INT3 for unreachable + +From: Peter Zijlstra + +commit 1ffbe4e935f9b7308615c75be990aec07464d1e7 upstream. + +Ignore all INT3 instructions for unreachable code warnings, similar to NOP. +This allows using INT3 for various paddings instead of NOPs. + +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/20220308154317.343312938@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -2775,9 +2775,8 @@ static int validate_branch(struct objtoo + switch (insn->type) { + + case INSN_RETURN: +- if (next_insn && next_insn->type == INSN_TRAP) { +- next_insn->ignore = true; +- } else if (sls && !insn->retpoline_safe) { ++ if (sls && !insn->retpoline_safe && ++ next_insn && next_insn->type != INSN_TRAP) { + WARN_FUNC("missing int3 after ret", + insn->sec, insn->offset); + } +@@ -2824,9 +2823,8 @@ static int validate_branch(struct objtoo + break; + + case INSN_JUMP_DYNAMIC: +- if (next_insn && next_insn->type == INSN_TRAP) { +- next_insn->ignore = true; +- } else if (sls && !insn->retpoline_safe) { ++ if (sls && !insn->retpoline_safe && ++ next_insn && next_insn->type != INSN_TRAP) { + WARN_FUNC("missing int3 after indirect jump", + insn->sec, insn->offset); + } +@@ -2997,7 +2995,7 @@ static bool ignore_unreachable_insn(stru + int i; + struct instruction *prev_insn; + +- if (insn->ignore || insn->type == INSN_NOP) ++ if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP) + return true; + + /* diff --git a/queue-5.10/objtool-don-t-make-.altinstructions-writable.patch b/queue-5.10/objtool-don-t-make-.altinstructions-writable.patch new file mode 100644 index 00000000000..2dd17b90da0 --- /dev/null +++ b/queue-5.10/objtool-don-t-make-.altinstructions-writable.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Wed, 23 Jun 2021 10:42:28 -0500 +Subject: objtool: Don't make .altinstructions writable + +From: Josh Poimboeuf + +commit e31694e0a7a709293319475d8001e05e31f2178c upstream. + +When objtool creates the .altinstructions section, it sets the SHF_WRITE +flag to make the section writable -- unless the section had already been +previously created by the kernel. The mismatch between kernel-created +and objtool-created section flags can cause failures with external +tooling (kpatch-build). And the section doesn't need to be writable +anyway. + +Make the section flags consistent with the kernel's. + +Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls") +Reported-by: Joe Lawrence +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Link: https://lore.kernel.org/r/6c284ae89717889ea136f9f0064d914cd8329d31.1624462939.git.jpoimboe@redhat.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch/x86/decode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -611,7 +611,7 @@ static int elf_add_alternative(struct el + sec = find_section_by_name(elf, ".altinstructions"); + if (!sec) { + sec = elf_create_section(elf, ".altinstructions", +- SHF_WRITE, size, 0); ++ SHF_ALLOC, size, 0); + + if (!sec) { + WARN_ELF("elf_create_section"); diff --git a/queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch b/queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch new file mode 100644 index 00000000000..088e69c3359 --- /dev/null +++ b/queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch @@ -0,0 +1,98 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:34 +0200 +Subject: objtool: Explicitly avoid self modifying code in .altinstr_replacement + +From: Peter Zijlstra + +commit dd003edeffa3cb87bc9862582004f405d77d7670 upstream. + +Assume ALTERNATIVE()s know what they're doing and do not change, or +cause to change, instructions in .altinstr_replacement sections. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.722511775@infradead.org +[cascardo: context adjustment] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: objtool doesn't have any mcount handling] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 36 ++++++++++++++++++++++++++++-------- + 1 file changed, 28 insertions(+), 8 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -870,18 +870,27 @@ static void remove_insn_ops(struct instr + } + } + +-static void add_call_dest(struct objtool_file *file, struct instruction *insn, +- struct symbol *dest, bool sibling) ++static void annotate_call_site(struct objtool_file *file, ++ struct instruction *insn, bool sibling) + { + struct reloc *reloc = insn_reloc(file, insn); ++ struct symbol *sym = insn->call_dest; + +- insn->call_dest = dest; +- if (!dest) ++ if (!sym) ++ sym = reloc->sym; ++ ++ /* ++ * Alternative replacement code is just template code which is ++ * sometimes copied to the original instruction. For now, don't ++ * annotate it. (In the future we might consider annotating the ++ * original instruction if/when it ever makes sense to do so.) ++ */ ++ if (!strcmp(insn->sec->name, ".altinstr_replacement")) + return; + +- if (insn->call_dest->static_call_tramp) { +- list_add_tail(&insn->call_node, +- &file->static_call_list); ++ if (sym->static_call_tramp) { ++ list_add_tail(&insn->call_node, &file->static_call_list); ++ return; + } + + /* +@@ -889,7 +898,7 @@ static void add_call_dest(struct objtool + * so they need a little help, NOP out any KCOV calls from noinstr + * text. + */ +- if (insn->sec->noinstr && insn->call_dest->kcov) { ++ if (insn->sec->noinstr && sym->kcov) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); +@@ -901,7 +910,16 @@ static void add_call_dest(struct objtool + : arch_nop_insn(insn->len)); + + insn->type = sibling ? INSN_RETURN : INSN_NOP; ++ return; + } ++} ++ ++static void add_call_dest(struct objtool_file *file, struct instruction *insn, ++ struct symbol *dest, bool sibling) ++{ ++ insn->call_dest = dest; ++ if (!dest) ++ return; + + /* + * Whatever stack impact regular CALLs have, should be undone +@@ -911,6 +929,8 @@ static void add_call_dest(struct objtool + * are converted to JUMP, see read_intra_function_calls(). + */ + remove_insn_ops(insn); ++ ++ annotate_call_site(file, insn, sibling); + } + + /* diff --git a/queue-5.10/objtool-extract-elf_strtab_concat.patch b/queue-5.10/objtool-extract-elf_strtab_concat.patch new file mode 100644 index 00000000000..5ad92942db3 --- /dev/null +++ b/queue-5.10/objtool-extract-elf_strtab_concat.patch @@ -0,0 +1,112 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:09 +0100 +Subject: objtool: Extract elf_strtab_concat() + +From: Peter Zijlstra + +commit 417a4dc91e559f92404c2544f785b02ce75784c3 upstream. + +Create a common helper to append strings to a strtab. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151259.941474004@infradead.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/elf.c | 60 ++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 38 insertions(+), 22 deletions(-) + +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -724,13 +724,48 @@ err: + return NULL; + } + ++static int elf_add_string(struct elf *elf, struct section *strtab, char *str) ++{ ++ Elf_Data *data; ++ Elf_Scn *s; ++ int len; ++ ++ if (!strtab) ++ strtab = find_section_by_name(elf, ".strtab"); ++ if (!strtab) { ++ WARN("can't find .strtab section"); ++ return -1; ++ } ++ ++ s = elf_getscn(elf->elf, strtab->idx); ++ if (!s) { ++ WARN_ELF("elf_getscn"); ++ return -1; ++ } ++ ++ data = elf_newdata(s); ++ if (!data) { ++ WARN_ELF("elf_newdata"); ++ return -1; ++ } ++ ++ data->d_buf = str; ++ data->d_size = strlen(str) + 1; ++ data->d_align = 1; ++ ++ len = strtab->len; ++ strtab->len += data->d_size; ++ strtab->changed = true; ++ ++ return len; ++} ++ + struct section *elf_create_section(struct elf *elf, const char *name, + unsigned int sh_flags, size_t entsize, int nr) + { + struct section *sec, *shstrtab; + size_t size = entsize * nr; + Elf_Scn *s; +- Elf_Data *data; + + sec = malloc(sizeof(*sec)); + if (!sec) { +@@ -787,7 +822,6 @@ struct section *elf_create_section(struc + sec->sh.sh_addralign = 1; + sec->sh.sh_flags = SHF_ALLOC | sh_flags; + +- + /* Add section name to .shstrtab (or .strtab for Clang) */ + shstrtab = find_section_by_name(elf, ".shstrtab"); + if (!shstrtab) +@@ -796,27 +830,9 @@ struct section *elf_create_section(struc + WARN("can't find .shstrtab or .strtab section"); + return NULL; + } +- +- s = elf_getscn(elf->elf, shstrtab->idx); +- if (!s) { +- WARN_ELF("elf_getscn"); +- return NULL; +- } +- +- data = elf_newdata(s); +- if (!data) { +- WARN_ELF("elf_newdata"); ++ sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name); ++ if (sec->sh.sh_name == -1) + return NULL; +- } +- +- data->d_buf = sec->name; +- data->d_size = strlen(name) + 1; +- data->d_align = 1; +- +- sec->sh.sh_name = shstrtab->len; +- +- shstrtab->len += strlen(name) + 1; +- shstrtab->changed = true; + + list_add_tail(&sec->list, &elf->sections); + elf_hash_add(elf->section_hash, &sec->hash, sec->idx); diff --git a/queue-5.10/objtool-extract-elf_symbol_add.patch b/queue-5.10/objtool-extract-elf_symbol_add.patch new file mode 100644 index 00000000000..17d139b7d3c --- /dev/null +++ b/queue-5.10/objtool-extract-elf_symbol_add.patch @@ -0,0 +1,112 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:10 +0100 +Subject: objtool: Extract elf_symbol_add() + +From: Peter Zijlstra + +commit 9a7827b7789c630c1efdb121daa42c6e77dce97f upstream. + +Create a common helper to add symbols. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151300.003468981@infradead.org +[bwh: Backported to 5.10: rb_add() parameter order is different] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/elf.c | 56 ++++++++++++++++++++++++++++------------------------ + 1 file changed, 31 insertions(+), 25 deletions(-) + +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -341,12 +341,39 @@ static int read_sections(struct elf *elf + return 0; + } + ++static void elf_add_symbol(struct elf *elf, struct symbol *sym) ++{ ++ struct list_head *entry; ++ struct rb_node *pnode; ++ ++ sym->type = GELF_ST_TYPE(sym->sym.st_info); ++ sym->bind = GELF_ST_BIND(sym->sym.st_info); ++ ++ sym->offset = sym->sym.st_value; ++ sym->len = sym->sym.st_size; ++ ++ rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset); ++ pnode = rb_prev(&sym->node); ++ if (pnode) ++ entry = &rb_entry(pnode, struct symbol, node)->list; ++ else ++ entry = &sym->sec->symbol_list; ++ list_add(&sym->list, entry); ++ elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); ++ elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); ++ ++ /* ++ * Don't store empty STT_NOTYPE symbols in the rbtree. They ++ * can exist within a function, confusing the sorting. ++ */ ++ if (!sym->len) ++ rb_erase(&sym->node, &sym->sec->symbol_tree); ++} ++ + static int read_symbols(struct elf *elf) + { + struct section *symtab, *symtab_shndx, *sec; + struct symbol *sym, *pfunc; +- struct list_head *entry; +- struct rb_node *pnode; + int symbols_nr, i; + char *coldstr; + Elf_Data *shndx_data = NULL; +@@ -391,9 +418,6 @@ static int read_symbols(struct elf *elf) + goto err; + } + +- sym->type = GELF_ST_TYPE(sym->sym.st_info); +- sym->bind = GELF_ST_BIND(sym->sym.st_info); +- + if ((sym->sym.st_shndx > SHN_UNDEF && + sym->sym.st_shndx < SHN_LORESERVE) || + (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) { +@@ -406,32 +430,14 @@ static int read_symbols(struct elf *elf) + sym->name); + goto err; + } +- if (sym->type == STT_SECTION) { ++ if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) { + sym->name = sym->sec->name; + sym->sec->sym = sym; + } + } else + sym->sec = find_section_by_index(elf, 0); + +- sym->offset = sym->sym.st_value; +- sym->len = sym->sym.st_size; +- +- rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset); +- pnode = rb_prev(&sym->node); +- if (pnode) +- entry = &rb_entry(pnode, struct symbol, node)->list; +- else +- entry = &sym->sec->symbol_list; +- list_add(&sym->list, entry); +- elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); +- elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); +- +- /* +- * Don't store empty STT_NOTYPE symbols in the rbtree. They +- * can exist within a function, confusing the sorting. +- */ +- if (!sym->len) +- rb_erase(&sym->node, &sym->sec->symbol_tree); ++ elf_add_symbol(elf, sym); + } + + if (stats) diff --git a/queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch b/queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch new file mode 100644 index 00000000000..a7b01c13df4 --- /dev/null +++ b/queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch @@ -0,0 +1,73 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Mon, 7 Jun 2021 11:45:58 +0200 +Subject: objtool: Fix .symtab_shndx handling for elf_create_undef_symbol() + +From: Peter Zijlstra + +commit 584fd3b31889852d0d6f3dd1e3d8e9619b660d2c upstream. + +When an ELF object uses extended symbol section indexes (IOW it has a +.symtab_shndx section), these must be kept in sync with the regular +symbol table (.symtab). + +So for every new symbol we emit, make sure to also emit a +.symtab_shndx value to keep the arrays of equal size. + +Note: since we're writing an UNDEF symbol, most GElf_Sym fields will +be 0 and we can repurpose one (st_size) to host the 0 for the xshndx +value. + +Fixes: 2f2f7e47f052 ("objtool: Add elf_create_undef_symbol()") +Reported-by: Nick Desaulniers +Suggested-by: Fangrui Song +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: Nick Desaulniers +Link: https://lkml.kernel.org/r/YL3q1qFO9QIRL/BA@hirez.programming.kicks-ass.net +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/elf.c | 25 ++++++++++++++++++++++++- + 1 file changed, 24 insertions(+), 1 deletion(-) + +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -768,7 +768,7 @@ static int elf_add_string(struct elf *el + + struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) + { +- struct section *symtab; ++ struct section *symtab, *symtab_shndx; + struct symbol *sym; + Elf_Data *data; + Elf_Scn *s; +@@ -819,6 +819,29 @@ struct symbol *elf_create_undef_symbol(s + symtab->len += data->d_size; + symtab->changed = true; + ++ symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); ++ if (symtab_shndx) { ++ s = elf_getscn(elf->elf, symtab_shndx->idx); ++ if (!s) { ++ WARN_ELF("elf_getscn"); ++ return NULL; ++ } ++ ++ data = elf_newdata(s); ++ if (!data) { ++ WARN_ELF("elf_newdata"); ++ return NULL; ++ } ++ ++ data->d_buf = &sym->sym.st_size; /* conveniently 0 */ ++ data->d_size = sizeof(Elf32_Word); ++ data->d_align = 4; ++ data->d_type = ELF_T_WORD; ++ ++ symtab_shndx->len += 4; ++ symtab_shndx->changed = true; ++ } ++ + sym->sec = find_section_by_index(elf, 0); + + elf_add_symbol(elf, sym); diff --git a/queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch b/queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch new file mode 100644 index 00000000000..0c344503de1 --- /dev/null +++ b/queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch @@ -0,0 +1,358 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Sun, 17 Apr 2022 17:03:36 +0200 +Subject: objtool: Fix code relocs vs weak symbols + +From: Peter Zijlstra + +commit 4abff6d48dbcea8200c7ea35ba70c242d128ebf3 upstream. + +Occasionally objtool driven code patching (think .static_call_sites +.retpoline_sites etc..) goes sideways and it tries to patch an +instruction that doesn't match. + +Much head-scatching and cursing later the problem is as outlined below +and affects every section that objtool generates for us, very much +including the ORC data. The below uses .static_call_sites because it's +convenient for demonstration purposes, but as mentioned the ORC +sections, .retpoline_sites and __mount_loc are all similarly affected. + +Consider: + +foo-weak.c: + + extern void __SCT__foo(void); + + __attribute__((weak)) void foo(void) + { + return __SCT__foo(); + } + +foo.c: + + extern void __SCT__foo(void); + extern void my_foo(void); + + void foo(void) + { + my_foo(); + return __SCT__foo(); + } + +These generate the obvious code +(gcc -O2 -fcf-protection=none -fno-asynchronous-unwind-tables -c foo*.c): + +foo-weak.o: +0000000000000000 : + 0: e9 00 00 00 00 jmpq 5 1: R_X86_64_PLT32 __SCT__foo-0x4 + +foo.o: +0000000000000000 : + 0: 48 83 ec 08 sub $0x8,%rsp + 4: e8 00 00 00 00 callq 9 5: R_X86_64_PLT32 my_foo-0x4 + 9: 48 83 c4 08 add $0x8,%rsp + d: e9 00 00 00 00 jmpq 12 e: R_X86_64_PLT32 __SCT__foo-0x4 + +Now, when we link these two files together, you get something like +(ld -r -o foos.o foo-weak.o foo.o): + +foos.o: +0000000000000000 : + 0: e9 00 00 00 00 jmpq 5 1: R_X86_64_PLT32 __SCT__foo-0x4 + 5: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%rax,%rax,1) + f: 90 nop + +0000000000000010 : + 10: 48 83 ec 08 sub $0x8,%rsp + 14: e8 00 00 00 00 callq 19 15: R_X86_64_PLT32 my_foo-0x4 + 19: 48 83 c4 08 add $0x8,%rsp + 1d: e9 00 00 00 00 jmpq 22 1e: R_X86_64_PLT32 __SCT__foo-0x4 + +Noting that ld preserves the weak function text, but strips the symbol +off of it (hence objdump doing that funny negative offset thing). This +does lead to 'interesting' unused code issues with objtool when ran on +linked objects, but that seems to be working (fingers crossed). + +So far so good.. Now lets consider the objtool static_call output +section (readelf output, old binutils): + +foo-weak.o: + +Relocation section '.rela.static_call_sites' at offset 0x2c8 contains 1 entry: + Offset Info Type Symbol's Value Symbol's Name + Addend +0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 .text + 0 +0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 + +foo.o: + +Relocation section '.rela.static_call_sites' at offset 0x310 contains 2 entries: + Offset Info Type Symbol's Value Symbol's Name + Addend +0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 .text + d +0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 + +foos.o: + +Relocation section '.rela.static_call_sites' at offset 0x430 contains 4 entries: + Offset Info Type Symbol's Value Symbol's Name + Addend +0000000000000000 0000000100000002 R_X86_64_PC32 0000000000000000 .text + 0 +0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 +0000000000000008 0000000100000002 R_X86_64_PC32 0000000000000000 .text + 1d +000000000000000c 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 + +So we have two patch sites, one in the dead code of the weak foo and one +in the real foo. All is well. + +*HOWEVER*, when the toolchain strips unused section symbols it +generates things like this (using new enough binutils): + +foo-weak.o: + +Relocation section '.rela.static_call_sites' at offset 0x2c8 contains 1 entry: + Offset Info Type Symbol's Value Symbol's Name + Addend +0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 foo + 0 +0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 + +foo.o: + +Relocation section '.rela.static_call_sites' at offset 0x310 contains 2 entries: + Offset Info Type Symbol's Value Symbol's Name + Addend +0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 foo + d +0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 + +foos.o: + +Relocation section '.rela.static_call_sites' at offset 0x430 contains 4 entries: + Offset Info Type Symbol's Value Symbol's Name + Addend +0000000000000000 0000000100000002 R_X86_64_PC32 0000000000000000 foo + 0 +0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 +0000000000000008 0000000100000002 R_X86_64_PC32 0000000000000000 foo + d +000000000000000c 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 + +And now we can see how that foos.o .static_call_sites goes side-ways, we +now have _two_ patch sites in foo. One for the weak symbol at foo+0 +(which is no longer a static_call site!) and one at foo+d which is in +fact the right location. + +This seems to happen when objtool cannot find a section symbol, in which +case it falls back to any other symbol to key off of, however in this +case that goes terribly wrong! + +As such, teach objtool to create a section symbol when there isn't +one. + +Fixes: 44f6a7c0755d ("objtool: Fix seg fault with Clang non-section symbols") +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Josh Poimboeuf +Link: https://lkml.kernel.org/r/20220419203807.655552918@infradead.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/elf.c | 187 +++++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 165 insertions(+), 22 deletions(-) + +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -537,37 +537,180 @@ int elf_add_reloc(struct elf *elf, struc + return 0; + } + +-int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, +- unsigned long offset, unsigned int type, +- struct section *insn_sec, unsigned long insn_off) ++/* ++ * Ensure that any reloc section containing references to @sym is marked ++ * changed such that it will get re-generated in elf_rebuild_reloc_sections() ++ * with the new symbol index. ++ */ ++static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym) ++{ ++ struct section *sec; ++ ++ list_for_each_entry(sec, &elf->sections, list) { ++ struct reloc *reloc; ++ ++ if (sec->changed) ++ continue; ++ ++ list_for_each_entry(reloc, &sec->reloc_list, list) { ++ if (reloc->sym == sym) { ++ sec->changed = true; ++ break; ++ } ++ } ++ } ++} ++ ++/* ++ * Move the first global symbol, as per sh_info, into a new, higher symbol ++ * index. This fees up the shndx for a new local symbol. ++ */ ++static int elf_move_global_symbol(struct elf *elf, struct section *symtab, ++ struct section *symtab_shndx) + { ++ Elf_Data *data, *shndx_data = NULL; ++ Elf32_Word first_non_local; + struct symbol *sym; +- int addend; ++ Elf_Scn *s; + +- if (insn_sec->sym) { +- sym = insn_sec->sym; +- addend = insn_off; ++ first_non_local = symtab->sh.sh_info; + +- } else { +- /* +- * The Clang assembler strips section symbols, so we have to +- * reference the function symbol instead: +- */ +- sym = find_symbol_containing(insn_sec, insn_off); +- if (!sym) { +- /* +- * Hack alert. This happens when we need to reference +- * the NOP pad insn immediately after the function. +- */ +- sym = find_symbol_containing(insn_sec, insn_off - 1); ++ sym = find_symbol_by_index(elf, first_non_local); ++ if (!sym) { ++ WARN("no non-local symbols !?"); ++ return first_non_local; ++ } ++ ++ s = elf_getscn(elf->elf, symtab->idx); ++ if (!s) { ++ WARN_ELF("elf_getscn"); ++ return -1; ++ } ++ ++ data = elf_newdata(s); ++ if (!data) { ++ WARN_ELF("elf_newdata"); ++ return -1; ++ } ++ ++ data->d_buf = &sym->sym; ++ data->d_size = sizeof(sym->sym); ++ data->d_align = 1; ++ data->d_type = ELF_T_SYM; ++ ++ sym->idx = symtab->sh.sh_size / sizeof(sym->sym); ++ elf_dirty_reloc_sym(elf, sym); ++ ++ symtab->sh.sh_info += 1; ++ symtab->sh.sh_size += data->d_size; ++ symtab->changed = true; ++ ++ if (symtab_shndx) { ++ s = elf_getscn(elf->elf, symtab_shndx->idx); ++ if (!s) { ++ WARN_ELF("elf_getscn"); ++ return -1; + } + +- if (!sym) { +- WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off); ++ shndx_data = elf_newdata(s); ++ if (!shndx_data) { ++ WARN_ELF("elf_newshndx_data"); + return -1; + } + +- addend = insn_off - sym->offset; ++ shndx_data->d_buf = &sym->sec->idx; ++ shndx_data->d_size = sizeof(Elf32_Word); ++ shndx_data->d_align = 4; ++ shndx_data->d_type = ELF_T_WORD; ++ ++ symtab_shndx->sh.sh_size += 4; ++ symtab_shndx->changed = true; ++ } ++ ++ return first_non_local; ++} ++ ++static struct symbol * ++elf_create_section_symbol(struct elf *elf, struct section *sec) ++{ ++ struct section *symtab, *symtab_shndx; ++ Elf_Data *shndx_data = NULL; ++ struct symbol *sym; ++ Elf32_Word shndx; ++ ++ symtab = find_section_by_name(elf, ".symtab"); ++ if (symtab) { ++ symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); ++ if (symtab_shndx) ++ shndx_data = symtab_shndx->data; ++ } else { ++ WARN("no .symtab"); ++ return NULL; ++ } ++ ++ sym = malloc(sizeof(*sym)); ++ if (!sym) { ++ perror("malloc"); ++ return NULL; ++ } ++ memset(sym, 0, sizeof(*sym)); ++ ++ sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx); ++ if (sym->idx < 0) { ++ WARN("elf_move_global_symbol"); ++ return NULL; ++ } ++ ++ sym->name = sec->name; ++ sym->sec = sec; ++ ++ // st_name 0 ++ sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); ++ // st_other 0 ++ // st_value 0 ++ // st_size 0 ++ shndx = sec->idx; ++ if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { ++ sym->sym.st_shndx = shndx; ++ if (!shndx_data) ++ shndx = 0; ++ } else { ++ sym->sym.st_shndx = SHN_XINDEX; ++ if (!shndx_data) { ++ WARN("no .symtab_shndx"); ++ return NULL; ++ } ++ } ++ ++ if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) { ++ WARN_ELF("gelf_update_symshndx"); ++ return NULL; ++ } ++ ++ elf_add_symbol(elf, sym); ++ ++ return sym; ++} ++ ++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, ++ unsigned long offset, unsigned int type, ++ struct section *insn_sec, unsigned long insn_off) ++{ ++ struct symbol *sym = insn_sec->sym; ++ int addend = insn_off; ++ ++ if (!sym) { ++ /* ++ * Due to how weak functions work, we must use section based ++ * relocations. Symbol based relocations would result in the ++ * weak and non-weak function annotations being overlaid on the ++ * non-weak function after linking. ++ */ ++ sym = elf_create_section_symbol(elf, insn_sec); ++ if (!sym) ++ return -1; ++ ++ insn_sec->sym = sym; + } + + return elf_add_reloc(elf, sec, offset, type, sym, addend); diff --git a/queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch b/queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch new file mode 100644 index 00000000000..3e3a60dd299 --- /dev/null +++ b/queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch @@ -0,0 +1,103 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Mikulas Patocka +Date: Mon, 16 May 2022 11:06:36 -0400 +Subject: objtool: Fix objtool regression on x32 systems + +From: Mikulas Patocka + +commit 22682a07acc308ef78681572e19502ce8893c4d4 upstream. + +Commit c087c6e7b551 ("objtool: Fix type of reloc::addend") failed to +appreciate cross building from ILP32 hosts, where 'int' == 'long' and +the issue persists. + +As such, use s64/int64_t/Elf64_Sxword for this field and suffer the +pain that is ISO C99 printf formats for it. + +Fixes: c087c6e7b551 ("objtool: Fix type of reloc::addend") +Signed-off-by: Mikulas Patocka +[peterz: reword changelog, s/long long/s64/] +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Cc: +Link: https://lkml.kernel.org/r/alpine.LRH.2.02.2205161041260.11556@file01.intranet.prod.int.rdu2.redhat.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 9 +++++---- + tools/objtool/elf.c | 2 +- + tools/objtool/elf.h | 4 ++-- + 3 files changed, 8 insertions(+), 7 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include + #include + + #include "builtin.h" +@@ -467,12 +468,12 @@ static int add_dead_ends(struct objtool_ + else if (reloc->addend == reloc->sym->sec->len) { + insn = find_last_insn(file, reloc->sym->sec); + if (!insn) { +- WARN("can't find unreachable insn at %s+0x%lx", ++ WARN("can't find unreachable insn at %s+0x%" PRIx64, + reloc->sym->sec->name, reloc->addend); + return -1; + } + } else { +- WARN("can't find unreachable insn at %s+0x%lx", ++ WARN("can't find unreachable insn at %s+0x%" PRIx64, + reloc->sym->sec->name, reloc->addend); + return -1; + } +@@ -502,12 +503,12 @@ reachable: + else if (reloc->addend == reloc->sym->sec->len) { + insn = find_last_insn(file, reloc->sym->sec); + if (!insn) { +- WARN("can't find reachable insn at %s+0x%lx", ++ WARN("can't find reachable insn at %s+0x%" PRIx64, + reloc->sym->sec->name, reloc->addend); + return -1; + } + } else { +- WARN("can't find reachable insn at %s+0x%lx", ++ WARN("can't find reachable insn at %s+0x%" PRIx64, + reloc->sym->sec->name, reloc->addend); + return -1; + } +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -510,7 +510,7 @@ static struct section *elf_create_reloc_ + int reltype); + + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, +- unsigned int type, struct symbol *sym, long addend) ++ unsigned int type, struct symbol *sym, s64 addend) + { + struct reloc *reloc; + +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -73,7 +73,7 @@ struct reloc { + struct symbol *sym; + unsigned long offset; + unsigned int type; +- long addend; ++ s64 addend; + int idx; + bool jump_table_start; + }; +@@ -127,7 +127,7 @@ struct elf *elf_open_read(const char *na + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); + + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, +- unsigned int type, struct symbol *sym, long addend); ++ unsigned int type, struct symbol *sym, s64 addend); + int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int type, + struct section *insn_sec, unsigned long insn_off); diff --git a/queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch b/queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch new file mode 100644 index 00000000000..bf49353fac7 --- /dev/null +++ b/queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch @@ -0,0 +1,62 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Wed, 23 Mar 2022 23:35:01 +0100 +Subject: objtool: Fix SLS validation for kcov tail-call replacement + +From: Peter Zijlstra + +commit 7a53f408902d913cd541b4f8ad7dbcd4961f5b82 upstream. + +Since not all compilers have a function attribute to disable KCOV +instrumentation, objtool can rewrite KCOV instrumentation in noinstr +functions as per commit: + + f56dae88a81f ("objtool: Handle __sanitize_cov*() tail calls") + +However, this has subtle interaction with the SLS validation from +commit: + + 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation") + +In that when a tail-call instrucion is replaced with a RET an +additional INT3 instruction is also written, but is not represented in +the decoded instruction stream. + +This then leads to false positive missing INT3 objtool warnings in +noinstr code. + +Instead of adding additional struct instruction objects, mark the RET +instruction with retpoline_safe to suppress the warning (since we know +there really is an INT3). + +Fixes: 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation") +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20220323230712.GA8939@worktop.programming.kicks-ass.net +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -961,6 +961,17 @@ static void annotate_call_site(struct ob + : arch_nop_insn(insn->len)); + + insn->type = sibling ? INSN_RETURN : INSN_NOP; ++ ++ if (sibling) { ++ /* ++ * We've replaced the tail-call JMP insn by two new ++ * insn: RET; INT3, except we only have a single struct ++ * insn here. Mark it retpoline_safe to avoid the SLS ++ * warning, instead of adding another insn. ++ */ ++ insn->retpoline_safe = true; ++ } ++ + return; + } + } diff --git a/queue-5.10/objtool-fix-symbol-creation.patch b/queue-5.10/objtool-fix-symbol-creation.patch new file mode 100644 index 00000000000..fa16132987e --- /dev/null +++ b/queue-5.10/objtool-fix-symbol-creation.patch @@ -0,0 +1,350 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 17 May 2022 17:42:04 +0200 +Subject: objtool: Fix symbol creation + +From: Peter Zijlstra + +commit ead165fa1042247b033afad7be4be9b815d04ade upstream. + +Nathan reported objtool failing with the following messages: + + warning: objtool: no non-local symbols !? + warning: objtool: gelf_update_symshndx: invalid section index + +The problem is due to commit 4abff6d48dbc ("objtool: Fix code relocs +vs weak symbols") failing to consider the case where an object would +have no non-local symbols. + +The problem that commit tries to address is adding a STB_LOCAL symbol +to the symbol table in light of the ELF spec's requirement that: + + In each symbol table, all symbols with STB_LOCAL binding preced the + weak and global symbols. As ``Sections'' above describes, a symbol + table section's sh_info section header member holds the symbol table + index for the first non-local symbol. + +The approach taken is to find this first non-local symbol, move that +to the end and then re-use the freed spot to insert a new local symbol +and increment sh_info. + +Except it never considered the case of object files without global +symbols and got a whole bunch of details wrong -- so many in fact that +it is a wonder it ever worked :/ + +Specifically: + + - It failed to re-hash the symbol on the new index, so a subsequent + find_symbol_by_index() would not find it at the new location and a + query for the old location would now return a non-deterministic + choice between the old and new symbol. + + - It failed to appreciate that the GElf wrappers are not a valid disk + format (it works because GElf is basically Elf64 and we only + support x86_64 atm.) + + - It failed to fully appreciate how horrible the libelf API really is + and got the gelf_update_symshndx() call pretty much completely + wrong; with the direct consequence that if inserting a second + STB_LOCAL symbol would require moving the same STB_GLOBAL symbol + again it would completely come unstuck. + +Write a new elf_update_symbol() function that wraps all the magic +required to update or create a new symbol at a given index. + +Specifically, gelf_update_sym*() require an @ndx argument that is +relative to the @data argument; this means you have to manually +iterate the section data descriptor list and update @ndx. + +Fixes: 4abff6d48dbc ("objtool: Fix code relocs vs weak symbols") +Reported-by: Nathan Chancellor +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Nathan Chancellor +Cc: +Link: https://lkml.kernel.org/r/YoPCTEYjoPqE4ZxB@hirez.programming.kicks-ass.net +Signed-off-by: Greg Kroah-Hartman +[bwh: Backported to 5.10: elf_hash_add() takes a hash table pointer, + not just a name] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/elf.c | 196 +++++++++++++++++++++++++++++++++------------------- + 1 file changed, 128 insertions(+), 68 deletions(-) + +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -346,6 +346,8 @@ static void elf_add_symbol(struct elf *e + struct list_head *entry; + struct rb_node *pnode; + ++ sym->alias = sym; ++ + sym->type = GELF_ST_TYPE(sym->sym.st_info); + sym->bind = GELF_ST_BIND(sym->sym.st_info); + +@@ -401,7 +403,6 @@ static int read_symbols(struct elf *elf) + return -1; + } + memset(sym, 0, sizeof(*sym)); +- sym->alias = sym; + + sym->idx = i; + +@@ -562,24 +563,21 @@ static void elf_dirty_reloc_sym(struct e + } + + /* +- * Move the first global symbol, as per sh_info, into a new, higher symbol +- * index. This fees up the shndx for a new local symbol. ++ * The libelf API is terrible; gelf_update_sym*() takes a data block relative ++ * index value, *NOT* the symbol index. As such, iterate the data blocks and ++ * adjust index until it fits. ++ * ++ * If no data block is found, allow adding a new data block provided the index ++ * is only one past the end. + */ +-static int elf_move_global_symbol(struct elf *elf, struct section *symtab, +- struct section *symtab_shndx) ++static int elf_update_symbol(struct elf *elf, struct section *symtab, ++ struct section *symtab_shndx, struct symbol *sym) + { +- Elf_Data *data, *shndx_data = NULL; +- Elf32_Word first_non_local; +- struct symbol *sym; +- Elf_Scn *s; +- +- first_non_local = symtab->sh.sh_info; +- +- sym = find_symbol_by_index(elf, first_non_local); +- if (!sym) { +- WARN("no non-local symbols !?"); +- return first_non_local; +- } ++ Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF; ++ Elf_Data *symtab_data = NULL, *shndx_data = NULL; ++ Elf64_Xword entsize = symtab->sh.sh_entsize; ++ int max_idx, idx = sym->idx; ++ Elf_Scn *s, *t = NULL; + + s = elf_getscn(elf->elf, symtab->idx); + if (!s) { +@@ -587,79 +585,124 @@ static int elf_move_global_symbol(struct + return -1; + } + +- data = elf_newdata(s); +- if (!data) { +- WARN_ELF("elf_newdata"); +- return -1; ++ if (symtab_shndx) { ++ t = elf_getscn(elf->elf, symtab_shndx->idx); ++ if (!t) { ++ WARN_ELF("elf_getscn"); ++ return -1; ++ } + } + +- data->d_buf = &sym->sym; +- data->d_size = sizeof(sym->sym); +- data->d_align = 1; +- data->d_type = ELF_T_SYM; ++ for (;;) { ++ /* get next data descriptor for the relevant sections */ ++ symtab_data = elf_getdata(s, symtab_data); ++ if (t) ++ shndx_data = elf_getdata(t, shndx_data); ++ ++ /* end-of-list */ ++ if (!symtab_data) { ++ void *buf; ++ ++ if (idx) { ++ /* we don't do holes in symbol tables */ ++ WARN("index out of range"); ++ return -1; ++ } + +- sym->idx = symtab->sh.sh_size / sizeof(sym->sym); +- elf_dirty_reloc_sym(elf, sym); ++ /* if @idx == 0, it's the next contiguous entry, create it */ ++ symtab_data = elf_newdata(s); ++ if (t) ++ shndx_data = elf_newdata(t); ++ ++ buf = calloc(1, entsize); ++ if (!buf) { ++ WARN("malloc"); ++ return -1; ++ } + +- symtab->sh.sh_info += 1; +- symtab->sh.sh_size += data->d_size; +- symtab->changed = true; ++ symtab_data->d_buf = buf; ++ symtab_data->d_size = entsize; ++ symtab_data->d_align = 1; ++ symtab_data->d_type = ELF_T_SYM; ++ ++ symtab->sh.sh_size += entsize; ++ symtab->changed = true; ++ ++ if (t) { ++ shndx_data->d_buf = &sym->sec->idx; ++ shndx_data->d_size = sizeof(Elf32_Word); ++ shndx_data->d_align = sizeof(Elf32_Word); ++ shndx_data->d_type = ELF_T_WORD; + +- if (symtab_shndx) { +- s = elf_getscn(elf->elf, symtab_shndx->idx); +- if (!s) { +- WARN_ELF("elf_getscn"); ++ symtab_shndx->sh.sh_size += sizeof(Elf32_Word); ++ symtab_shndx->changed = true; ++ } ++ ++ break; ++ } ++ ++ /* empty blocks should not happen */ ++ if (!symtab_data->d_size) { ++ WARN("zero size data"); + return -1; + } + +- shndx_data = elf_newdata(s); ++ /* is this the right block? */ ++ max_idx = symtab_data->d_size / entsize; ++ if (idx < max_idx) ++ break; ++ ++ /* adjust index and try again */ ++ idx -= max_idx; ++ } ++ ++ /* something went side-ways */ ++ if (idx < 0) { ++ WARN("negative index"); ++ return -1; ++ } ++ ++ /* setup extended section index magic and write the symbol */ ++ if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { ++ sym->sym.st_shndx = shndx; ++ if (!shndx_data) ++ shndx = 0; ++ } else { ++ sym->sym.st_shndx = SHN_XINDEX; + if (!shndx_data) { +- WARN_ELF("elf_newshndx_data"); ++ WARN("no .symtab_shndx"); + return -1; + } ++ } + +- shndx_data->d_buf = &sym->sec->idx; +- shndx_data->d_size = sizeof(Elf32_Word); +- shndx_data->d_align = 4; +- shndx_data->d_type = ELF_T_WORD; +- +- symtab_shndx->sh.sh_size += 4; +- symtab_shndx->changed = true; ++ if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) { ++ WARN_ELF("gelf_update_symshndx"); ++ return -1; + } + +- return first_non_local; ++ return 0; + } + + static struct symbol * + elf_create_section_symbol(struct elf *elf, struct section *sec) + { + struct section *symtab, *symtab_shndx; +- Elf_Data *shndx_data = NULL; +- struct symbol *sym; +- Elf32_Word shndx; ++ Elf32_Word first_non_local, new_idx; ++ struct symbol *sym, *old; + + symtab = find_section_by_name(elf, ".symtab"); + if (symtab) { + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); +- if (symtab_shndx) +- shndx_data = symtab_shndx->data; + } else { + WARN("no .symtab"); + return NULL; + } + +- sym = malloc(sizeof(*sym)); ++ sym = calloc(1, sizeof(*sym)); + if (!sym) { + perror("malloc"); + return NULL; + } +- memset(sym, 0, sizeof(*sym)); +- +- sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx); +- if (sym->idx < 0) { +- WARN("elf_move_global_symbol"); +- return NULL; +- } + + sym->name = sec->name; + sym->sec = sec; +@@ -669,24 +712,41 @@ elf_create_section_symbol(struct elf *el + // st_other 0 + // st_value 0 + // st_size 0 +- shndx = sec->idx; +- if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { +- sym->sym.st_shndx = shndx; +- if (!shndx_data) +- shndx = 0; +- } else { +- sym->sym.st_shndx = SHN_XINDEX; +- if (!shndx_data) { +- WARN("no .symtab_shndx"); ++ ++ /* ++ * Move the first global symbol, as per sh_info, into a new, higher ++ * symbol index. This fees up a spot for a new local symbol. ++ */ ++ first_non_local = symtab->sh.sh_info; ++ new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; ++ old = find_symbol_by_index(elf, first_non_local); ++ if (old) { ++ old->idx = new_idx; ++ ++ hlist_del(&old->hash); ++ elf_hash_add(elf->symbol_hash, &old->hash, old->idx); ++ ++ elf_dirty_reloc_sym(elf, old); ++ ++ if (elf_update_symbol(elf, symtab, symtab_shndx, old)) { ++ WARN("elf_update_symbol move"); + return NULL; + } ++ ++ new_idx = first_non_local; + } + +- if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) { +- WARN_ELF("gelf_update_symshndx"); ++ sym->idx = new_idx; ++ if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) { ++ WARN("elf_update_symbol"); + return NULL; + } + ++ /* ++ * Either way, we added a LOCAL symbol. ++ */ ++ symtab->sh.sh_info += 1; ++ + elf_add_symbol(elf, sym); + + return sym; diff --git a/queue-5.10/objtool-fix-type-of-reloc-addend.patch b/queue-5.10/objtool-fix-type-of-reloc-addend.patch new file mode 100644 index 00000000000..4998d7e8d54 --- /dev/null +++ b/queue-5.10/objtool-fix-type-of-reloc-addend.patch @@ -0,0 +1,92 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Sun, 17 Apr 2022 17:03:40 +0200 +Subject: objtool: Fix type of reloc::addend + +From: Peter Zijlstra + +commit c087c6e7b551b7f208c0b852304f044954cf2bb3 upstream. + +Elf{32,64}_Rela::r_addend is of type: Elf{32,64}_Sword, that means +that our reloc::addend needs to be long or face tuncation issues when +we do elf_rebuild_reloc_section(): + + - 107: 48 b8 00 00 00 00 00 00 00 00 movabs $0x0,%rax 109: R_X86_64_64 level4_kernel_pgt+0x80000067 + + 107: 48 b8 00 00 00 00 00 00 00 00 movabs $0x0,%rax 109: R_X86_64_64 level4_kernel_pgt-0x7fffff99 + +Fixes: 627fce14809b ("objtool: Add ORC unwind table generation") +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Josh Poimboeuf +Link: https://lkml.kernel.org/r/20220419203807.596871927@infradead.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 8 ++++---- + tools/objtool/elf.c | 2 +- + tools/objtool/elf.h | 4 ++-- + 3 files changed, 7 insertions(+), 7 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -467,12 +467,12 @@ static int add_dead_ends(struct objtool_ + else if (reloc->addend == reloc->sym->sec->len) { + insn = find_last_insn(file, reloc->sym->sec); + if (!insn) { +- WARN("can't find unreachable insn at %s+0x%x", ++ WARN("can't find unreachable insn at %s+0x%lx", + reloc->sym->sec->name, reloc->addend); + return -1; + } + } else { +- WARN("can't find unreachable insn at %s+0x%x", ++ WARN("can't find unreachable insn at %s+0x%lx", + reloc->sym->sec->name, reloc->addend); + return -1; + } +@@ -502,12 +502,12 @@ reachable: + else if (reloc->addend == reloc->sym->sec->len) { + insn = find_last_insn(file, reloc->sym->sec); + if (!insn) { +- WARN("can't find reachable insn at %s+0x%x", ++ WARN("can't find reachable insn at %s+0x%lx", + reloc->sym->sec->name, reloc->addend); + return -1; + } + } else { +- WARN("can't find reachable insn at %s+0x%x", ++ WARN("can't find reachable insn at %s+0x%lx", + reloc->sym->sec->name, reloc->addend); + return -1; + } +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -509,7 +509,7 @@ static struct section *elf_create_reloc_ + int reltype); + + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, +- unsigned int type, struct symbol *sym, int addend) ++ unsigned int type, struct symbol *sym, long addend) + { + struct reloc *reloc; + +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -73,7 +73,7 @@ struct reloc { + struct symbol *sym; + unsigned long offset; + unsigned int type; +- int addend; ++ long addend; + int idx; + bool jump_table_start; + }; +@@ -127,7 +127,7 @@ struct elf *elf_open_read(const char *na + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); + + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, +- unsigned int type, struct symbol *sym, int addend); ++ unsigned int type, struct symbol *sym, long addend); + int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int type, + struct section *insn_sec, unsigned long insn_off); diff --git a/queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch b/queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch new file mode 100644 index 00000000000..d05e1873498 --- /dev/null +++ b/queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch @@ -0,0 +1,256 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Thu, 24 Jun 2021 11:41:02 +0200 +Subject: objtool: Handle __sanitize_cov*() tail calls + +From: Peter Zijlstra + +commit f56dae88a81fded66adf2bea9922d1d98d1da14f upstream. + +Turns out the compilers also generate tail calls to __sanitize_cov*(), +make sure to also patch those out in noinstr code. + +Fixes: 0f1441b44e82 ("objtool: Fix noinstr vs KCOV") +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Marco Elver +Link: https://lore.kernel.org/r/20210624095147.818783799@infradead.org +Signed-off-by: Sasha Levin +[bwh: Backported to 5.10: + - objtool doesn't have any mcount handling + - Write the NOPs as hex literals since we can't use ] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch.h | 1 + tools/objtool/arch/x86/decode.c | 20 ++++++ + tools/objtool/check.c | 123 +++++++++++++++++++++------------------- + 3 files changed, 86 insertions(+), 58 deletions(-) + +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -83,6 +83,7 @@ unsigned long arch_jump_destination(stru + unsigned long arch_dest_reloc_offset(int addend); + + const char *arch_nop_insn(int len); ++const char *arch_ret_insn(int len); + + int arch_decode_hint_reg(u8 sp_reg, int *base); + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -586,6 +586,26 @@ const char *arch_nop_insn(int len) + return nops[len-1]; + } + ++#define BYTE_RET 0xC3 ++ ++const char *arch_ret_insn(int len) ++{ ++ static const char ret[5][5] = { ++ { BYTE_RET }, ++ { BYTE_RET, 0x90 }, ++ { BYTE_RET, 0x66, 0x90 }, ++ { BYTE_RET, 0x0f, 0x1f, 0x00 }, ++ { BYTE_RET, 0x0f, 0x1f, 0x40, 0x00 }, ++ }; ++ ++ if (len < 1 || len > 5) { ++ WARN("invalid RET size: %d\n", len); ++ return NULL; ++ } ++ ++ return ret[len-1]; ++} ++ + /* asm/alternative.h ? */ + + #define ALTINSTR_FLAG_INV (1 << 15) +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -860,6 +860,60 @@ static struct reloc *insn_reloc(struct o + return insn->reloc; + } + ++static void remove_insn_ops(struct instruction *insn) ++{ ++ struct stack_op *op, *tmp; ++ ++ list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) { ++ list_del(&op->list); ++ free(op); ++ } ++} ++ ++static void add_call_dest(struct objtool_file *file, struct instruction *insn, ++ struct symbol *dest, bool sibling) ++{ ++ struct reloc *reloc = insn_reloc(file, insn); ++ ++ insn->call_dest = dest; ++ if (!dest) ++ return; ++ ++ if (insn->call_dest->static_call_tramp) { ++ list_add_tail(&insn->call_node, ++ &file->static_call_list); ++ } ++ ++ /* ++ * Many compilers cannot disable KCOV with a function attribute ++ * so they need a little help, NOP out any KCOV calls from noinstr ++ * text. ++ */ ++ if (insn->sec->noinstr && ++ !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { ++ if (reloc) { ++ reloc->type = R_NONE; ++ elf_write_reloc(file->elf, reloc); ++ } ++ ++ elf_write_insn(file->elf, insn->sec, ++ insn->offset, insn->len, ++ sibling ? arch_ret_insn(insn->len) ++ : arch_nop_insn(insn->len)); ++ ++ insn->type = sibling ? INSN_RETURN : INSN_NOP; ++ } ++ ++ /* ++ * Whatever stack impact regular CALLs have, should be undone ++ * by the RETURN of the called function. ++ * ++ * Annotated intra-function calls retain the stack_ops but ++ * are converted to JUMP, see read_intra_function_calls(). ++ */ ++ remove_insn_ops(insn); ++} ++ + /* + * Find the destination instructions for all jumps. + */ +@@ -898,11 +952,7 @@ static int add_jump_destinations(struct + continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ +- insn->call_dest = reloc->sym; +- if (insn->call_dest->static_call_tramp) { +- list_add_tail(&insn->call_node, +- &file->static_call_list); +- } ++ add_call_dest(file, insn, reloc->sym, true); + continue; + } else if (reloc->sym->sec->idx) { + dest_sec = reloc->sym->sec; +@@ -958,13 +1008,8 @@ static int add_jump_destinations(struct + + } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && + insn->jump_dest->offset == insn->jump_dest->func->offset) { +- + /* internal sibling call (without reloc) */ +- insn->call_dest = insn->jump_dest->func; +- if (insn->call_dest->static_call_tramp) { +- list_add_tail(&insn->call_node, +- &file->static_call_list); +- } ++ add_call_dest(file, insn, insn->jump_dest->func, true); + } + } + } +@@ -972,16 +1017,6 @@ static int add_jump_destinations(struct + return 0; + } + +-static void remove_insn_ops(struct instruction *insn) +-{ +- struct stack_op *op, *tmp; +- +- list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) { +- list_del(&op->list); +- free(op); +- } +-} +- + static struct symbol *find_call_destination(struct section *sec, unsigned long offset) + { + struct symbol *call_dest; +@@ -1000,6 +1035,7 @@ static int add_call_destinations(struct + { + struct instruction *insn; + unsigned long dest_off; ++ struct symbol *dest; + struct reloc *reloc; + + for_each_insn(file, insn) { +@@ -1009,7 +1045,9 @@ static int add_call_destinations(struct + reloc = insn_reloc(file, insn); + if (!reloc) { + dest_off = arch_jump_destination(insn); +- insn->call_dest = find_call_destination(insn->sec, dest_off); ++ dest = find_call_destination(insn->sec, dest_off); ++ ++ add_call_dest(file, insn, dest, false); + + if (insn->ignore) + continue; +@@ -1027,9 +1065,8 @@ static int add_call_destinations(struct + + } else if (reloc->sym->type == STT_SECTION) { + dest_off = arch_dest_reloc_offset(reloc->addend); +- insn->call_dest = find_call_destination(reloc->sym->sec, +- dest_off); +- if (!insn->call_dest) { ++ dest = find_call_destination(reloc->sym->sec, dest_off); ++ if (!dest) { + WARN_FUNC("can't find call dest symbol at %s+0x%lx", + insn->sec, insn->offset, + reloc->sym->sec->name, +@@ -1037,6 +1074,8 @@ static int add_call_destinations(struct + return -1; + } + ++ add_call_dest(file, insn, dest, false); ++ + } else if (arch_is_retpoline(reloc->sym)) { + /* + * Retpoline calls are really dynamic calls in +@@ -1052,39 +1091,7 @@ static int add_call_destinations(struct + continue; + + } else +- insn->call_dest = reloc->sym; +- +- if (insn->call_dest && insn->call_dest->static_call_tramp) { +- list_add_tail(&insn->call_node, +- &file->static_call_list); +- } +- +- /* +- * Many compilers cannot disable KCOV with a function attribute +- * so they need a little help, NOP out any KCOV calls from noinstr +- * text. +- */ +- if (insn->sec->noinstr && +- !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { +- if (reloc) { +- reloc->type = R_NONE; +- elf_write_reloc(file->elf, reloc); +- } +- +- elf_write_insn(file->elf, insn->sec, +- insn->offset, insn->len, +- arch_nop_insn(insn->len)); +- insn->type = INSN_NOP; +- } +- +- /* +- * Whatever stack impact regular CALLs have, should be undone +- * by the RETURN of the called function. +- * +- * Annotated intra-function calls retain the stack_ops but +- * are converted to JUMP, see read_intra_function_calls(). +- */ +- remove_insn_ops(insn); ++ add_call_dest(file, insn, reloc->sym, false); + } + + return 0; diff --git a/queue-5.10/objtool-handle-per-arch-retpoline-naming.patch b/queue-5.10/objtool-handle-per-arch-retpoline-naming.patch new file mode 100644 index 00000000000..fd753c0d11e --- /dev/null +++ b/queue-5.10/objtool-handle-per-arch-retpoline-naming.patch @@ -0,0 +1,77 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:04 +0100 +Subject: objtool: Handle per arch retpoline naming + +From: Peter Zijlstra + +commit 530b4ddd9dd92b263081f5c7786d39a8129c8b2d upstream. + +The __x86_indirect_ naming is obviously not generic. Shorten to allow +matching some additional magic names later. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151259.630296706@infradead.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch.h | 2 ++ + tools/objtool/arch/x86/decode.c | 5 +++++ + tools/objtool/check.c | 9 +++++++-- + 3 files changed, 14 insertions(+), 2 deletions(-) + +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -86,4 +86,6 @@ const char *arch_nop_insn(int len); + + int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); + ++bool arch_is_retpoline(struct symbol *sym); ++ + #endif /* _ARCH_H */ +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -620,3 +620,8 @@ int arch_decode_hint_reg(struct instruct + + return 0; + } ++ ++bool arch_is_retpoline(struct symbol *sym) ++{ ++ return !strncmp(sym->name, "__x86_indirect_", 15); ++} +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -778,6 +778,11 @@ static int add_ignore_alternatives(struc + return 0; + } + ++__weak bool arch_is_retpoline(struct symbol *sym) ++{ ++ return false; ++} ++ + /* + * Find the destination instructions for all jumps. + */ +@@ -800,7 +805,7 @@ static int add_jump_destinations(struct + } else if (reloc->sym->type == STT_SECTION) { + dest_sec = reloc->sym->sec; + dest_off = arch_dest_reloc_offset(reloc->addend); +- } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { ++ } else if (arch_is_retpoline(reloc->sym)) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. +@@ -954,7 +959,7 @@ static int add_call_destinations(struct + return -1; + } + +- } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { ++ } else if (arch_is_retpoline(reloc->sym)) { + /* + * Retpoline calls are really dynamic calls in + * disguise, so convert them accordingly. diff --git a/queue-5.10/objtool-introduce-cfi-hash.patch b/queue-5.10/objtool-introduce-cfi-hash.patch new file mode 100644 index 00000000000..9fc42d5120d --- /dev/null +++ b/queue-5.10/objtool-introduce-cfi-hash.patch @@ -0,0 +1,466 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Thu, 24 Jun 2021 11:41:01 +0200 +Subject: objtool: Introduce CFI hash + +From: Peter Zijlstra + +commit 8b946cc38e063f0f7bb67789478c38f6d7d457c9 upstream. + +Andi reported that objtool on vmlinux.o consumes more memory than his +system has, leading to horrific performance. + +This is in part because we keep a struct instruction for every +instruction in the file in-memory. Shrink struct instruction by +removing the CFI state (which includes full register state) from it +and demand allocating it. + +Given most instructions don't actually change CFI state, there's lots +of repetition there, so add a hash table to find previous CFI +instances. + +Reduces memory consumption (and runtime) for processing an +x86_64-allyesconfig: + + pre: 4:40.84 real, 143.99 user, 44.18 sys, 30624988 mem + post: 2:14.61 real, 108.58 user, 25.04 sys, 16396184 mem + +Suggested-by: Andi Kleen +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20210624095147.756759107@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: + - Don't use bswap_if_needed() since we don't have any of the other fixes + for mixed-endian cross-compilation + - Since we don't have "objtool: Rewrite hashtable sizing", make + cfi_hash_alloc() set the number of bits similarly to elf_hash_bits() + - objtool doesn't have any mcount handling + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch.h | 2 + tools/objtool/arch/x86/decode.c | 20 ++--- + tools/objtool/cfi.h | 2 + tools/objtool/check.c | 154 +++++++++++++++++++++++++++++++++++----- + tools/objtool/check.h | 2 + tools/objtool/orc_gen.c | 15 ++- + 6 files changed, 160 insertions(+), 35 deletions(-) + +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -84,7 +84,7 @@ unsigned long arch_dest_reloc_offset(int + + const char *arch_nop_insn(int len); + +-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); ++int arch_decode_hint_reg(u8 sp_reg, int *base); + + bool arch_is_retpoline(struct symbol *sym); + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -706,34 +706,32 @@ int arch_rewrite_retpolines(struct objto + return 0; + } + +-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) ++int arch_decode_hint_reg(u8 sp_reg, int *base) + { +- struct cfi_reg *cfa = &insn->cfi.cfa; +- + switch (sp_reg) { + case ORC_REG_UNDEFINED: +- cfa->base = CFI_UNDEFINED; ++ *base = CFI_UNDEFINED; + break; + case ORC_REG_SP: +- cfa->base = CFI_SP; ++ *base = CFI_SP; + break; + case ORC_REG_BP: +- cfa->base = CFI_BP; ++ *base = CFI_BP; + break; + case ORC_REG_SP_INDIRECT: +- cfa->base = CFI_SP_INDIRECT; ++ *base = CFI_SP_INDIRECT; + break; + case ORC_REG_R10: +- cfa->base = CFI_R10; ++ *base = CFI_R10; + break; + case ORC_REG_R13: +- cfa->base = CFI_R13; ++ *base = CFI_R13; + break; + case ORC_REG_DI: +- cfa->base = CFI_DI; ++ *base = CFI_DI; + break; + case ORC_REG_DX: +- cfa->base = CFI_DX; ++ *base = CFI_DX; + break; + default: + return -1; +--- a/tools/objtool/cfi.h ++++ b/tools/objtool/cfi.h +@@ -7,6 +7,7 @@ + #define _OBJTOOL_CFI_H + + #include "cfi_regs.h" ++#include + + #define CFI_UNDEFINED -1 + #define CFI_CFA -2 +@@ -24,6 +25,7 @@ struct cfi_init_state { + }; + + struct cfi_state { ++ struct hlist_node hash; /* must be first, cficmp() */ + struct cfi_reg regs[CFI_NUM_REGS]; + struct cfi_reg vals[CFI_NUM_REGS]; + struct cfi_reg cfa; +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include + + #include "builtin.h" + #include "cfi.h" +@@ -25,7 +26,11 @@ struct alternative { + bool skip_orig; + }; + +-struct cfi_init_state initial_func_cfi; ++static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache; ++ ++static struct cfi_init_state initial_func_cfi; ++static struct cfi_state init_cfi; ++static struct cfi_state func_cfi; + + struct instruction *find_insn(struct objtool_file *file, + struct section *sec, unsigned long offset) +@@ -265,6 +270,78 @@ static void init_insn_state(struct insn_ + state->noinstr = sec->noinstr; + } + ++static struct cfi_state *cfi_alloc(void) ++{ ++ struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1); ++ if (!cfi) { ++ WARN("calloc failed"); ++ exit(1); ++ } ++ nr_cfi++; ++ return cfi; ++} ++ ++static int cfi_bits; ++static struct hlist_head *cfi_hash; ++ ++static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2) ++{ ++ return memcmp((void *)cfi1 + sizeof(cfi1->hash), ++ (void *)cfi2 + sizeof(cfi2->hash), ++ sizeof(struct cfi_state) - sizeof(struct hlist_node)); ++} ++ ++static inline u32 cfi_key(struct cfi_state *cfi) ++{ ++ return jhash((void *)cfi + sizeof(cfi->hash), ++ sizeof(*cfi) - sizeof(cfi->hash), 0); ++} ++ ++static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi) ++{ ++ struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; ++ struct cfi_state *obj; ++ ++ hlist_for_each_entry(obj, head, hash) { ++ if (!cficmp(cfi, obj)) { ++ nr_cfi_cache++; ++ return obj; ++ } ++ } ++ ++ obj = cfi_alloc(); ++ *obj = *cfi; ++ hlist_add_head(&obj->hash, head); ++ ++ return obj; ++} ++ ++static void cfi_hash_add(struct cfi_state *cfi) ++{ ++ struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; ++ ++ hlist_add_head(&cfi->hash, head); ++} ++ ++static void *cfi_hash_alloc(void) ++{ ++ cfi_bits = vmlinux ? ELF_HASH_BITS - 3 : 13; ++ cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits, ++ PROT_READ|PROT_WRITE, ++ MAP_PRIVATE|MAP_ANON, -1, 0); ++ if (cfi_hash == (void *)-1L) { ++ WARN("mmap fail cfi_hash"); ++ cfi_hash = NULL; ++ } else if (stats) { ++ printf("cfi_bits: %d\n", cfi_bits); ++ } ++ ++ return cfi_hash; ++} ++ ++static unsigned long nr_insns; ++static unsigned long nr_insns_visited; ++ + /* + * Call the arch-specific instruction decoder for all the instructions and add + * them to the global instruction list. +@@ -275,7 +352,6 @@ static int decode_instructions(struct ob + struct symbol *func; + unsigned long offset; + struct instruction *insn; +- unsigned long nr_insns = 0; + int ret; + + for_each_sec(file, sec) { +@@ -301,7 +377,6 @@ static int decode_instructions(struct ob + memset(insn, 0, sizeof(*insn)); + INIT_LIST_HEAD(&insn->alts); + INIT_LIST_HEAD(&insn->stack_ops); +- init_cfi_state(&insn->cfi); + + insn->sec = sec; + insn->offset = offset; +@@ -1077,7 +1152,6 @@ static int handle_group_alt(struct objto + memset(nop, 0, sizeof(*nop)); + INIT_LIST_HEAD(&nop->alts); + INIT_LIST_HEAD(&nop->stack_ops); +- init_cfi_state(&nop->cfi); + + nop->sec = special_alt->new_sec; + nop->offset = special_alt->new_off + special_alt->new_len; +@@ -1454,10 +1528,11 @@ static void set_func_state(struct cfi_st + + static int read_unwind_hints(struct objtool_file *file) + { ++ struct cfi_state cfi = init_cfi; + struct section *sec, *relocsec; +- struct reloc *reloc; + struct unwind_hint *hint; + struct instruction *insn; ++ struct reloc *reloc; + int i; + + sec = find_section_by_name(file->elf, ".discard.unwind_hints"); +@@ -1495,19 +1570,24 @@ static int read_unwind_hints(struct objt + insn->hint = true; + + if (hint->type == UNWIND_HINT_TYPE_FUNC) { +- set_func_state(&insn->cfi); ++ insn->cfi = &func_cfi; + continue; + } + +- if (arch_decode_hint_reg(insn, hint->sp_reg)) { ++ if (insn->cfi) ++ cfi = *(insn->cfi); ++ ++ if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) { + WARN_FUNC("unsupported unwind_hint sp base reg %d", + insn->sec, insn->offset, hint->sp_reg); + return -1; + } + +- insn->cfi.cfa.offset = hint->sp_offset; +- insn->cfi.type = hint->type; +- insn->cfi.end = hint->end; ++ cfi.cfa.offset = hint->sp_offset; ++ cfi.type = hint->type; ++ cfi.end = hint->end; ++ ++ insn->cfi = cfi_hash_find_or_add(&cfi); + } + + return 0; +@@ -2283,13 +2363,18 @@ static int propagate_alt_cfi(struct objt + if (!insn->alt_group) + return 0; + ++ if (!insn->cfi) { ++ WARN("CFI missing"); ++ return -1; ++ } ++ + alt_cfi = insn->alt_group->cfi; + group_off = insn->offset - insn->alt_group->first_insn->offset; + + if (!alt_cfi[group_off]) { +- alt_cfi[group_off] = &insn->cfi; ++ alt_cfi[group_off] = insn->cfi; + } else { +- if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) { ++ if (cficmp(alt_cfi[group_off], insn->cfi)) { + WARN_FUNC("stack layout conflict in alternatives", + insn->sec, insn->offset); + return -1; +@@ -2335,9 +2420,14 @@ static int handle_insn_ops(struct instru + + static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) + { +- struct cfi_state *cfi1 = &insn->cfi; ++ struct cfi_state *cfi1 = insn->cfi; + int i; + ++ if (!cfi1) { ++ WARN("CFI missing"); ++ return false; ++ } ++ + if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) { + + WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", +@@ -2522,7 +2612,7 @@ static int validate_branch(struct objtoo + struct instruction *insn, struct insn_state state) + { + struct alternative *alt; +- struct instruction *next_insn; ++ struct instruction *next_insn, *prev_insn = NULL; + struct section *sec; + u8 visited; + int ret; +@@ -2551,15 +2641,25 @@ static int validate_branch(struct objtoo + + if (insn->visited & visited) + return 0; ++ } else { ++ nr_insns_visited++; + } + + if (state.noinstr) + state.instr += insn->instr; + +- if (insn->hint) +- state.cfi = insn->cfi; +- else +- insn->cfi = state.cfi; ++ if (insn->hint) { ++ state.cfi = *insn->cfi; ++ } else { ++ /* XXX track if we actually changed state.cfi */ ++ ++ if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) { ++ insn->cfi = prev_insn->cfi; ++ nr_cfi_reused++; ++ } else { ++ insn->cfi = cfi_hash_find_or_add(&state.cfi); ++ } ++ } + + insn->visited |= visited; + +@@ -2709,6 +2809,7 @@ static int validate_branch(struct objtoo + return 1; + } + ++ prev_insn = insn; + insn = next_insn; + } + +@@ -2964,10 +3065,20 @@ int check(struct objtool_file *file) + int ret, warnings = 0; + + arch_initial_func_cfi_state(&initial_func_cfi); ++ init_cfi_state(&init_cfi); ++ init_cfi_state(&func_cfi); ++ set_func_state(&func_cfi); ++ ++ if (!cfi_hash_alloc()) ++ goto out; ++ ++ cfi_hash_add(&init_cfi); ++ cfi_hash_add(&func_cfi); + + ret = decode_sections(file); + if (ret < 0) + goto out; ++ + warnings += ret; + + if (list_empty(&file->insn_list)) +@@ -3011,6 +3122,13 @@ int check(struct objtool_file *file) + goto out; + warnings += ret; + ++ if (stats) { ++ printf("nr_insns_visited: %ld\n", nr_insns_visited); ++ printf("nr_cfi: %ld\n", nr_cfi); ++ printf("nr_cfi_reused: %ld\n", nr_cfi_reused); ++ printf("nr_cfi_cache: %ld\n", nr_cfi_cache); ++ } ++ + out: + /* + * For now, don't fail the kernel build on fatal warnings. These +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -59,7 +59,7 @@ struct instruction { + struct list_head alts; + struct symbol *func; + struct list_head stack_ops; +- struct cfi_state cfi; ++ struct cfi_state *cfi; + }; + + static inline bool is_static_jump(struct instruction *insn) +--- a/tools/objtool/orc_gen.c ++++ b/tools/objtool/orc_gen.c +@@ -12,13 +12,19 @@ + #include "check.h" + #include "warn.h" + +-static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) ++static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, ++ struct instruction *insn) + { +- struct instruction *insn = container_of(cfi, struct instruction, cfi); + struct cfi_reg *bp = &cfi->regs[CFI_BP]; + + memset(orc, 0, sizeof(*orc)); + ++ if (!cfi) { ++ orc->end = 0; ++ orc->sp_reg = ORC_REG_UNDEFINED; ++ return 0; ++ } ++ + orc->end = cfi->end; + + if (cfi->cfa.base == CFI_UNDEFINED) { +@@ -159,7 +165,7 @@ int orc_create(struct objtool_file *file + int i; + + if (!alt_group) { +- if (init_orc_entry(&orc, &insn->cfi)) ++ if (init_orc_entry(&orc, insn->cfi, insn)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; +@@ -183,7 +189,8 @@ int orc_create(struct objtool_file *file + struct cfi_state *cfi = alt_group->cfi[i]; + if (!cfi) + continue; +- if (init_orc_entry(&orc, cfi)) ++ /* errors are reported on the original insn */ ++ if (init_orc_entry(&orc, cfi, insn)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; diff --git a/queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch b/queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch new file mode 100644 index 00000000000..4621cc33b58 --- /dev/null +++ b/queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch @@ -0,0 +1,177 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:12 +0100 +Subject: objtool: Keep track of retpoline call sites + +From: Peter Zijlstra + +commit 43d5430ad74ef5156353af7aec352426ec7a8e57 upstream. + +Provide infrastructure for architectures to rewrite/augment compiler +generated retpoline calls. Similar to what we do for static_call()s, +keep track of the instructions that are retpoline calls. + +Use the same list_head, since a retpoline call cannot also be a +static_call. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151300.130805730@infradead.org +[bwh: Backported to 5.10: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch.h | 2 ++ + tools/objtool/check.c | 34 +++++++++++++++++++++++++++++----- + tools/objtool/check.h | 2 +- + tools/objtool/objtool.c | 1 + + tools/objtool/objtool.h | 1 + + 5 files changed, 34 insertions(+), 6 deletions(-) + +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -88,4 +88,6 @@ int arch_decode_hint_reg(struct instruct + + bool arch_is_retpoline(struct symbol *sym); + ++int arch_rewrite_retpolines(struct objtool_file *file); ++ + #endif /* _ARCH_H */ +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -451,7 +451,7 @@ static int create_static_call_sections(s + return 0; + + idx = 0; +- list_for_each_entry(insn, &file->static_call_list, static_call_node) ++ list_for_each_entry(insn, &file->static_call_list, call_node) + idx++; + + sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, +@@ -460,7 +460,7 @@ static int create_static_call_sections(s + return -1; + + idx = 0; +- list_for_each_entry(insn, &file->static_call_list, static_call_node) { ++ list_for_each_entry(insn, &file->static_call_list, call_node) { + + site = (struct static_call_site *)sec->data->d_buf + idx; + memset(site, 0, sizeof(struct static_call_site)); +@@ -786,13 +786,16 @@ static int add_jump_destinations(struct + else + insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + ++ list_add_tail(&insn->call_node, ++ &file->retpoline_call_list); ++ + insn->retpoline_safe = true; + continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ + insn->call_dest = reloc->sym; + if (insn->call_dest->static_call_tramp) { +- list_add_tail(&insn->static_call_node, ++ list_add_tail(&insn->call_node, + &file->static_call_list); + } + continue; +@@ -854,7 +857,7 @@ static int add_jump_destinations(struct + /* internal sibling call (without reloc) */ + insn->call_dest = insn->jump_dest->func; + if (insn->call_dest->static_call_tramp) { +- list_add_tail(&insn->static_call_node, ++ list_add_tail(&insn->call_node, + &file->static_call_list); + } + } +@@ -938,6 +941,9 @@ static int add_call_destinations(struct + insn->type = INSN_CALL_DYNAMIC; + insn->retpoline_safe = true; + ++ list_add_tail(&insn->call_node, ++ &file->retpoline_call_list); ++ + remove_insn_ops(insn); + continue; + +@@ -945,7 +951,7 @@ static int add_call_destinations(struct + insn->call_dest = reloc->sym; + + if (insn->call_dest && insn->call_dest->static_call_tramp) { +- list_add_tail(&insn->static_call_node, ++ list_add_tail(&insn->call_node, + &file->static_call_list); + } + +@@ -1655,6 +1661,11 @@ static void mark_rodata(struct objtool_f + file->rodata = found; + } + ++__weak int arch_rewrite_retpolines(struct objtool_file *file) ++{ ++ return 0; ++} ++ + static int decode_sections(struct objtool_file *file) + { + int ret; +@@ -1683,6 +1694,10 @@ static int decode_sections(struct objtoo + if (ret) + return ret; + ++ /* ++ * Must be before add_special_section_alts() as that depends on ++ * jump_dest being set. ++ */ + ret = add_jump_destinations(file); + if (ret) + return ret; +@@ -1719,6 +1734,15 @@ static int decode_sections(struct objtoo + if (ret) + return ret; + ++ /* ++ * Must be after add_special_section_alts(), since this will emit ++ * alternatives. Must be after add_{jump,call}_destination(), since ++ * those create the call insn lists. ++ */ ++ ret = arch_rewrite_retpolines(file); ++ if (ret) ++ return ret; ++ + return 0; + } + +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -39,7 +39,7 @@ struct alt_group { + struct instruction { + struct list_head list; + struct hlist_node hash; +- struct list_head static_call_node; ++ struct list_head call_node; + struct section *sec; + unsigned long offset; + unsigned int len; +--- a/tools/objtool/objtool.c ++++ b/tools/objtool/objtool.c +@@ -61,6 +61,7 @@ struct objtool_file *objtool_open_read(c + + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); ++ INIT_LIST_HEAD(&file.retpoline_call_list); + INIT_LIST_HEAD(&file.static_call_list); + file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); + file.ignore_unreachables = no_unreachable; +--- a/tools/objtool/objtool.h ++++ b/tools/objtool/objtool.h +@@ -18,6 +18,7 @@ struct objtool_file { + struct elf *elf; + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 20); ++ struct list_head retpoline_call_list; + struct list_head static_call_list; + bool ignore_unreachables, c_file, hints, rodata; + }; diff --git a/queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch b/queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch new file mode 100644 index 00000000000..6d27abf25dd --- /dev/null +++ b/queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Joe Lawrence +Date: Sun, 22 Aug 2021 18:50:36 -0400 +Subject: objtool: Make .altinstructions section entry size consistent + +From: Joe Lawrence + +commit dc02368164bd0ec603e3f5b3dd8252744a667b8a upstream. + +Commit e31694e0a7a7 ("objtool: Don't make .altinstructions writable") +aligned objtool-created and kernel-created .altinstructions section +flags, but there remains a minor discrepency in their use of a section +entry size: objtool sets one while the kernel build does not. + +While sh_entsize of sizeof(struct alt_instr) seems intuitive, this small +deviation can cause failures with external tooling (kpatch-build). + +Fix this by creating new .altinstructions sections with sh_entsize of 0 +and then later updating sec->sh_size as alternatives are added to the +section. An added benefit is avoiding the data descriptor and buffer +created by elf_create_section(), but previously unused by +elf_add_alternative(). + +Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls") +Signed-off-by: Joe Lawrence +Reviewed-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/20210822225037.54620-2-joe.lawrence@redhat.com +Cc: Andy Lavr +Cc: Peter Zijlstra +Cc: x86@kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch/x86/decode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -611,7 +611,7 @@ static int elf_add_alternative(struct el + sec = find_section_by_name(elf, ".altinstructions"); + if (!sec) { + sec = elf_create_section(elf, ".altinstructions", +- SHF_ALLOC, size, 0); ++ SHF_ALLOC, 0, 0); + + if (!sec) { + WARN_ELF("elf_create_section"); diff --git a/queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch b/queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch new file mode 100644 index 00000000000..4ecdcc4a42b --- /dev/null +++ b/queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Thu, 10 Jun 2021 09:04:29 +0200 +Subject: objtool: Only rewrite unconditional retpoline thunk calls + +From: Peter Zijlstra + +commit 2d49b721dc18c113d5221f4cf5a6104eb66cb7f2 upstream. + +It turns out that the compilers generate conditional branches to the +retpoline thunks like: + + 5d5: 0f 85 00 00 00 00 jne 5db + 5d7: R_X86_64_PLT32 __x86_indirect_thunk_r11-0x4 + +while the rewrite can only handle JMP/CALL to the thunks. The result +is the alternative wrecking the code. Make sure to skip writing the +alternatives for conditional branches. + +Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls") +Reported-by: Lukasz Majczak +Reported-by: Nathan Chancellor +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: Nathan Chancellor +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch/x86/decode.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -674,6 +674,10 @@ int arch_rewrite_retpolines(struct objto + + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + ++ if (insn->type != INSN_JUMP_DYNAMIC && ++ insn->type != INSN_CALL_DYNAMIC) ++ continue; ++ + if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) + continue; + diff --git a/queue-5.10/objtool-print-out-the-symbol-type-when-complaining-about-it.patch b/queue-5.10/objtool-print-out-the-symbol-type-when-complaining-about-it.patch new file mode 100644 index 00000000000..786987fecca --- /dev/null +++ b/queue-5.10/objtool-print-out-the-symbol-type-when-complaining-about-it.patch @@ -0,0 +1,65 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Linus Torvalds +Date: Sun, 3 Oct 2021 13:45:48 -0700 +Subject: objtool: print out the symbol type when complaining about it + +From: Linus Torvalds + +commit 7fab1c12bde926c5a8c7d5984c551d0854d7e0b3 upstream. + +The objtool warning that the kvm instruction emulation code triggered +wasn't very useful: + + arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception + +in that it helpfully tells you which symbol name it had trouble figuring +out the relocation for, but it doesn't actually say what the unknown +symbol type was that triggered it all. + +In this case it was because of missing type information (type 0, aka +STT_NOTYPE), but on the whole it really should just have printed that +out as part of the message. + +Because if this warning triggers, that's very much the first thing you +want to know - why did reloc2sec_off() return failure for that symbol? + +So rather than just saying you can't handle some type of symbol without +saying what the type _was_, just print out the type number too. + +Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types") +Link: https://lore.kernel.org/lkml/CAHk-=wiZwq-0LknKhXN4M+T8jbxn_2i9mcKpO+OaBSSq_Eh7tg@mail.gmail.com/ +Signed-off-by: Linus Torvalds +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/special.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/tools/objtool/special.c ++++ b/tools/objtool/special.c +@@ -106,8 +106,10 @@ static int get_alt_entry(struct elf *elf + return -1; + } + if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) { +- WARN_FUNC("don't know how to handle reloc symbol type: %s", +- sec, offset + entry->orig, orig_reloc->sym->name); ++ WARN_FUNC("don't know how to handle reloc symbol type %d: %s", ++ sec, offset + entry->orig, ++ orig_reloc->sym->type, ++ orig_reloc->sym->name); + return -1; + } + +@@ -128,8 +130,10 @@ static int get_alt_entry(struct elf *elf + return 1; + + if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) { +- WARN_FUNC("don't know how to handle reloc symbol type: %s", +- sec, offset + entry->new, new_reloc->sym->name); ++ WARN_FUNC("don't know how to handle reloc symbol type %d: %s", ++ sec, offset + entry->new, ++ new_reloc->sym->type, ++ new_reloc->sym->name); + return -1; + } + diff --git a/queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch b/queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch new file mode 100644 index 00000000000..3a789a54fb5 --- /dev/null +++ b/queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch @@ -0,0 +1,185 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Fri, 24 Jun 2022 12:52:40 +0200 +Subject: objtool: Re-add UNWIND_HINT_{SAVE_RESTORE} + +From: Josh Poimboeuf + +commit 8faea26e611189e933ea2281975ff4dc7c1106b6 upstream. + +Commit + + c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints") + +removed the save/restore unwind hints because they were no longer +needed. Now they're going to be needed again so re-add them. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/unwind_hints.h | 12 +++++++++- + include/linux/objtool.h | 6 +++-- + tools/include/linux/objtool.h | 6 +++-- + tools/objtool/check.c | 40 ++++++++++++++++++++++++++++++++++++ + tools/objtool/check.h | 1 + 5 files changed, 59 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/unwind_hints.h ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -8,11 +8,11 @@ + #ifdef __ASSEMBLY__ + + .macro UNWIND_HINT_EMPTY +- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 ++ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 + .endm + + .macro UNWIND_HINT_ENTRY +- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 ++ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 + .endm + + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 +@@ -56,6 +56,14 @@ + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC + .endm + ++.macro UNWIND_HINT_SAVE ++ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE ++.endm ++ ++.macro UNWIND_HINT_RESTORE ++ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE ++.endm ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _ASM_X86_UNWIND_HINTS_H */ +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -40,6 +40,8 @@ struct unwind_hint { + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 + #define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_STACK_VALIDATION + +@@ -102,7 +104,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -126,7 +128,7 @@ struct unwind_hint { + #define STACK_FRAME_NON_STANDARD(func) + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + #endif + +--- a/tools/include/linux/objtool.h ++++ b/tools/include/linux/objtool.h +@@ -40,6 +40,8 @@ struct unwind_hint { + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 + #define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_STACK_VALIDATION + +@@ -102,7 +104,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -126,7 +128,7 @@ struct unwind_hint { + #define STACK_FRAME_NON_STANDARD(func) + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + #endif + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1752,6 +1752,17 @@ static int read_unwind_hints(struct objt + + insn->hint = true; + ++ if (hint->type == UNWIND_HINT_TYPE_SAVE) { ++ insn->hint = false; ++ insn->save = true; ++ continue; ++ } ++ ++ if (hint->type == UNWIND_HINT_TYPE_RESTORE) { ++ insn->restore = true; ++ continue; ++ } ++ + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + +@@ -2847,6 +2858,35 @@ static int validate_branch(struct objtoo + state.instr += insn->instr; + + if (insn->hint) { ++ if (insn->restore) { ++ struct instruction *save_insn, *i; ++ ++ i = insn; ++ save_insn = NULL; ++ ++ sym_for_each_insn_continue_reverse(file, func, i) { ++ if (i->save) { ++ save_insn = i; ++ break; ++ } ++ } ++ ++ if (!save_insn) { ++ WARN_FUNC("no corresponding CFI save for CFI restore", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ if (!save_insn->visited) { ++ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ insn->cfi = save_insn->cfi; ++ nr_cfi_reused++; ++ } ++ + state.cfi = *insn->cfi; + } else { + /* XXX track if we actually changed state.cfi */ +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -47,6 +47,7 @@ struct instruction { + unsigned long immediate; + bool dead_end, ignore, ignore_alts; + bool hint; ++ bool save, restore; + bool retpoline_safe; + bool entry; + s8 instr; diff --git a/queue-5.10/objtool-refactor-orc-section-generation.patch b/queue-5.10/objtool-refactor-orc-section-generation.patch new file mode 100644 index 00000000000..f7bde5994a9 --- /dev/null +++ b/queue-5.10/objtool-refactor-orc-section-generation.patch @@ -0,0 +1,439 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Thu, 17 Dec 2020 15:02:42 -0600 +Subject: objtool: Refactor ORC section generation + +From: Josh Poimboeuf + +commit ab4e0744e99b87e1a223e89fc3c9ae44f727c9a6 upstream. + +Decouple ORC entries from instructions. This simplifies the +control/data flow, and is going to make it easier to support alternative +instructions which change the stack layout. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/Makefile | 4 + tools/objtool/arch.h | 4 + tools/objtool/builtin-orc.c | 6 + tools/objtool/check.h | 3 + tools/objtool/objtool.h | 3 + tools/objtool/orc_gen.c | 274 ++++++++++++++++++++++---------------------- + tools/objtool/weak.c | 7 - + 7 files changed, 141 insertions(+), 160 deletions(-) + +--- a/tools/objtool/Makefile ++++ b/tools/objtool/Makefile +@@ -46,10 +46,6 @@ ifeq ($(SRCARCH),x86) + SUBCMD_ORC := y + endif + +-ifeq ($(SUBCMD_ORC),y) +- CFLAGS += -DINSN_USE_ORC +-endif +- + export SUBCMD_CHECK SUBCMD_ORC + export srctree OUTPUT CFLAGS SRCARCH AWK + include $(srctree)/tools/build/Makefile.include +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -11,10 +11,6 @@ + #include "objtool.h" + #include "cfi.h" + +-#ifdef INSN_USE_ORC +-#include +-#endif +- + enum insn_type { + INSN_JUMP_CONDITIONAL, + INSN_JUMP_UNCONDITIONAL, +--- a/tools/objtool/builtin-orc.c ++++ b/tools/objtool/builtin-orc.c +@@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv) + if (list_empty(&file->insn_list)) + return 0; + +- ret = create_orc(file); +- if (ret) +- return ret; +- +- ret = create_orc_sections(file); ++ ret = orc_create(file); + if (ret) + return ret; + +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -43,9 +43,6 @@ struct instruction { + struct symbol *func; + struct list_head stack_ops; + struct cfi_state cfi; +-#ifdef INSN_USE_ORC +- struct orc_entry orc; +-#endif + }; + + static inline bool is_static_jump(struct instruction *insn) +--- a/tools/objtool/objtool.h ++++ b/tools/objtool/objtool.h +@@ -26,7 +26,6 @@ struct objtool_file *objtool_open_read(c + + int check(struct objtool_file *file); + int orc_dump(const char *objname); +-int create_orc(struct objtool_file *file); +-int create_orc_sections(struct objtool_file *file); ++int orc_create(struct objtool_file *file); + + #endif /* _OBJTOOL_H */ +--- a/tools/objtool/orc_gen.c ++++ b/tools/objtool/orc_gen.c +@@ -12,89 +12,84 @@ + #include "check.h" + #include "warn.h" + +-int create_orc(struct objtool_file *file) ++static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) + { +- struct instruction *insn; ++ struct instruction *insn = container_of(cfi, struct instruction, cfi); ++ struct cfi_reg *bp = &cfi->regs[CFI_BP]; + +- for_each_insn(file, insn) { +- struct orc_entry *orc = &insn->orc; +- struct cfi_reg *cfa = &insn->cfi.cfa; +- struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; ++ memset(orc, 0, sizeof(*orc)); + +- if (!insn->sec->text) +- continue; +- +- orc->end = insn->cfi.end; +- +- if (cfa->base == CFI_UNDEFINED) { +- orc->sp_reg = ORC_REG_UNDEFINED; +- continue; +- } +- +- switch (cfa->base) { +- case CFI_SP: +- orc->sp_reg = ORC_REG_SP; +- break; +- case CFI_SP_INDIRECT: +- orc->sp_reg = ORC_REG_SP_INDIRECT; +- break; +- case CFI_BP: +- orc->sp_reg = ORC_REG_BP; +- break; +- case CFI_BP_INDIRECT: +- orc->sp_reg = ORC_REG_BP_INDIRECT; +- break; +- case CFI_R10: +- orc->sp_reg = ORC_REG_R10; +- break; +- case CFI_R13: +- orc->sp_reg = ORC_REG_R13; +- break; +- case CFI_DI: +- orc->sp_reg = ORC_REG_DI; +- break; +- case CFI_DX: +- orc->sp_reg = ORC_REG_DX; +- break; +- default: +- WARN_FUNC("unknown CFA base reg %d", +- insn->sec, insn->offset, cfa->base); +- return -1; +- } ++ orc->end = cfi->end; + +- switch(bp->base) { +- case CFI_UNDEFINED: +- orc->bp_reg = ORC_REG_UNDEFINED; +- break; +- case CFI_CFA: +- orc->bp_reg = ORC_REG_PREV_SP; +- break; +- case CFI_BP: +- orc->bp_reg = ORC_REG_BP; +- break; +- default: +- WARN_FUNC("unknown BP base reg %d", +- insn->sec, insn->offset, bp->base); +- return -1; +- } ++ if (cfi->cfa.base == CFI_UNDEFINED) { ++ orc->sp_reg = ORC_REG_UNDEFINED; ++ return 0; ++ } ++ ++ switch (cfi->cfa.base) { ++ case CFI_SP: ++ orc->sp_reg = ORC_REG_SP; ++ break; ++ case CFI_SP_INDIRECT: ++ orc->sp_reg = ORC_REG_SP_INDIRECT; ++ break; ++ case CFI_BP: ++ orc->sp_reg = ORC_REG_BP; ++ break; ++ case CFI_BP_INDIRECT: ++ orc->sp_reg = ORC_REG_BP_INDIRECT; ++ break; ++ case CFI_R10: ++ orc->sp_reg = ORC_REG_R10; ++ break; ++ case CFI_R13: ++ orc->sp_reg = ORC_REG_R13; ++ break; ++ case CFI_DI: ++ orc->sp_reg = ORC_REG_DI; ++ break; ++ case CFI_DX: ++ orc->sp_reg = ORC_REG_DX; ++ break; ++ default: ++ WARN_FUNC("unknown CFA base reg %d", ++ insn->sec, insn->offset, cfi->cfa.base); ++ return -1; ++ } + +- orc->sp_offset = cfa->offset; +- orc->bp_offset = bp->offset; +- orc->type = insn->cfi.type; ++ switch (bp->base) { ++ case CFI_UNDEFINED: ++ orc->bp_reg = ORC_REG_UNDEFINED; ++ break; ++ case CFI_CFA: ++ orc->bp_reg = ORC_REG_PREV_SP; ++ break; ++ case CFI_BP: ++ orc->bp_reg = ORC_REG_BP; ++ break; ++ default: ++ WARN_FUNC("unknown BP base reg %d", ++ insn->sec, insn->offset, bp->base); ++ return -1; + } + ++ orc->sp_offset = cfi->cfa.offset; ++ orc->bp_offset = bp->offset; ++ orc->type = cfi->type; ++ + return 0; + } + +-static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec, +- unsigned int idx, struct section *insn_sec, +- unsigned long insn_off, struct orc_entry *o) ++static int write_orc_entry(struct elf *elf, struct section *orc_sec, ++ struct section *ip_rsec, unsigned int idx, ++ struct section *insn_sec, unsigned long insn_off, ++ struct orc_entry *o) + { + struct orc_entry *orc; + struct reloc *reloc; + + /* populate ORC data */ +- orc = (struct orc_entry *)u_sec->data->d_buf + idx; ++ orc = (struct orc_entry *)orc_sec->data->d_buf + idx; + memcpy(orc, o, sizeof(*orc)); + + /* populate reloc for ip */ +@@ -114,102 +109,109 @@ static int create_orc_entry(struct elf * + + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(int); +- reloc->sec = ip_relocsec; ++ reloc->sec = ip_rsec; + + elf_add_reloc(elf, reloc); + + return 0; + } + +-int create_orc_sections(struct objtool_file *file) ++struct orc_list_entry { ++ struct list_head list; ++ struct orc_entry orc; ++ struct section *insn_sec; ++ unsigned long insn_off; ++}; ++ ++static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc, ++ struct section *sec, unsigned long offset) ++{ ++ struct orc_list_entry *entry = malloc(sizeof(*entry)); ++ ++ if (!entry) { ++ WARN("malloc failed"); ++ return -1; ++ } ++ ++ entry->orc = *orc; ++ entry->insn_sec = sec; ++ entry->insn_off = offset; ++ ++ list_add_tail(&entry->list, orc_list); ++ return 0; ++} ++ ++int orc_create(struct objtool_file *file) + { +- struct instruction *insn, *prev_insn; +- struct section *sec, *u_sec, *ip_relocsec; +- unsigned int idx; ++ struct section *sec, *ip_rsec, *orc_sec; ++ unsigned int nr = 0, idx = 0; ++ struct orc_list_entry *entry; ++ struct list_head orc_list; + +- struct orc_entry empty = { +- .sp_reg = ORC_REG_UNDEFINED, ++ struct orc_entry null = { ++ .sp_reg = ORC_REG_UNDEFINED, + .bp_reg = ORC_REG_UNDEFINED, + .type = UNWIND_HINT_TYPE_CALL, + }; + +- sec = find_section_by_name(file->elf, ".orc_unwind"); +- if (sec) { +- WARN("file already has .orc_unwind section, skipping"); +- return -1; +- } +- +- /* count the number of needed orcs */ +- idx = 0; ++ /* Build a deduplicated list of ORC entries: */ ++ INIT_LIST_HEAD(&orc_list); + for_each_sec(file, sec) { ++ struct orc_entry orc, prev_orc = {0}; ++ struct instruction *insn; ++ bool empty = true; ++ + if (!sec->text) + continue; + +- prev_insn = NULL; + sec_for_each_insn(file, sec, insn) { +- if (!prev_insn || +- memcmp(&insn->orc, &prev_insn->orc, +- sizeof(struct orc_entry))) { +- idx++; +- } +- prev_insn = insn; ++ if (init_orc_entry(&orc, &insn->cfi)) ++ return -1; ++ if (!memcmp(&prev_orc, &orc, sizeof(orc))) ++ continue; ++ if (orc_list_add(&orc_list, &orc, sec, insn->offset)) ++ return -1; ++ nr++; ++ prev_orc = orc; ++ empty = false; + } + +- /* section terminator */ +- if (prev_insn) +- idx++; ++ /* Add a section terminator */ ++ if (!empty) { ++ orc_list_add(&orc_list, &null, sec, sec->len); ++ nr++; ++ } + } +- if (!idx) +- return -1; ++ if (!nr) ++ return 0; + ++ /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */ ++ sec = find_section_by_name(file->elf, ".orc_unwind"); ++ if (sec) { ++ WARN("file already has .orc_unwind section, skipping"); ++ return -1; ++ } ++ orc_sec = elf_create_section(file->elf, ".orc_unwind", 0, ++ sizeof(struct orc_entry), nr); ++ if (!orc_sec) ++ return -1; + +- /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ +- sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); ++ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); + if (!sec) + return -1; +- +- ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); +- if (!ip_relocsec) ++ ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); ++ if (!ip_rsec) + return -1; + +- /* create .orc_unwind section */ +- u_sec = elf_create_section(file->elf, ".orc_unwind", 0, +- sizeof(struct orc_entry), idx); +- +- /* populate sections */ +- idx = 0; +- for_each_sec(file, sec) { +- if (!sec->text) +- continue; +- +- prev_insn = NULL; +- sec_for_each_insn(file, sec, insn) { +- if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, +- sizeof(struct orc_entry))) { +- +- if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, +- insn->sec, insn->offset, +- &insn->orc)) +- return -1; +- +- idx++; +- } +- prev_insn = insn; +- } +- +- /* section terminator */ +- if (prev_insn) { +- if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, +- prev_insn->sec, +- prev_insn->offset + prev_insn->len, +- &empty)) +- return -1; +- +- idx++; +- } ++ /* Write ORC entries to sections: */ ++ list_for_each_entry(entry, &orc_list, list) { ++ if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++, ++ entry->insn_sec, entry->insn_off, ++ &entry->orc)) ++ return -1; + } + +- if (elf_rebuild_reloc_section(file->elf, ip_relocsec)) ++ if (elf_rebuild_reloc_section(file->elf, ip_rsec)) + return -1; + + return 0; +--- a/tools/objtool/weak.c ++++ b/tools/objtool/weak.c +@@ -25,12 +25,7 @@ int __weak orc_dump(const char *_objname + UNSUPPORTED("orc"); + } + +-int __weak create_orc(struct objtool_file *file) +-{ +- UNSUPPORTED("orc"); +-} +- +-int __weak create_orc_sections(struct objtool_file *file) ++int __weak orc_create(struct objtool_file *file) + { + UNSUPPORTED("orc"); + } diff --git a/queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch b/queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch new file mode 100644 index 00000000000..5b2fb245fa6 --- /dev/null +++ b/queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch @@ -0,0 +1,91 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Mon, 4 Oct 2021 10:07:50 -0700 +Subject: objtool: Remove reloc symbol type checks in get_alt_entry() + +From: Josh Poimboeuf + +commit 4d8b35968bbf9e42b6b202eedb510e2c82ad8b38 upstream. + +Converting a special section's relocation reference to a symbol is +straightforward. No need for objtool to complain that it doesn't know +how to handle it. Just handle it. + +This fixes the following warning: + + arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception + +Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types") +Reported-by: Linus Torvalds +Signed-off-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/feadbc3dfb3440d973580fad8d3db873cbfe1694.1633367242.git.jpoimboe@redhat.com +Cc: Peter Zijlstra +Cc: x86@kernel.org +Cc: Miroslav Benes +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/special.c | 36 +++++++----------------------------- + 1 file changed, 7 insertions(+), 29 deletions(-) + +--- a/tools/objtool/special.c ++++ b/tools/objtool/special.c +@@ -55,22 +55,11 @@ void __weak arch_handle_alternative(unsi + { + } + +-static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off) ++static void reloc_to_sec_off(struct reloc *reloc, struct section **sec, ++ unsigned long *off) + { +- switch (reloc->sym->type) { +- case STT_FUNC: +- *sec = reloc->sym->sec; +- *off = reloc->sym->offset + reloc->addend; +- return true; +- +- case STT_SECTION: +- *sec = reloc->sym->sec; +- *off = reloc->addend; +- return true; +- +- default: +- return false; +- } ++ *sec = reloc->sym->sec; ++ *off = reloc->sym->offset + reloc->addend; + } + + static int get_alt_entry(struct elf *elf, struct special_entry *entry, +@@ -105,13 +94,8 @@ static int get_alt_entry(struct elf *elf + WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); + return -1; + } +- if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) { +- WARN_FUNC("don't know how to handle reloc symbol type %d: %s", +- sec, offset + entry->orig, +- orig_reloc->sym->type, +- orig_reloc->sym->name); +- return -1; +- } ++ ++ reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); + + if (!entry->group || alt->new_len) { + new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); +@@ -129,13 +113,7 @@ static int get_alt_entry(struct elf *elf + if (arch_is_retpoline(new_reloc->sym)) + return 1; + +- if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) { +- WARN_FUNC("don't know how to handle reloc symbol type %d: %s", +- sec, offset + entry->new, +- new_reloc->sym->type, +- new_reloc->sym->name); +- return -1; +- } ++ reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); + + /* _ASM_EXTABLE_EX hack */ + if (alt->new_off >= 0x7ffffff0) diff --git a/queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch b/queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch new file mode 100644 index 00000000000..90bc0fc85c6 --- /dev/null +++ b/queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch @@ -0,0 +1,126 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:06 +0100 +Subject: objtool: Rework the elf_rebuild_reloc_section() logic + +From: Peter Zijlstra + +commit 3a647607b57ad8346e659ddd3b951ac292c83690 upstream. + +Instead of manually calling elf_rebuild_reloc_section() on sections +we've called elf_add_reloc() on, have elf_write() DTRT. + +This makes it easier to add random relocations in places without +carefully tracking when we're done and need to flush what section. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151259.754213408@infradead.org +[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 3 --- + tools/objtool/elf.c | 20 ++++++++++++++------ + tools/objtool/elf.h | 1 - + tools/objtool/orc_gen.c | 3 --- + 4 files changed, 14 insertions(+), 13 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -542,9 +542,6 @@ static int create_static_call_sections(s + idx++; + } + +- if (elf_rebuild_reloc_section(file->elf, reloc_sec)) +- return -1; +- + return 0; + } + +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -530,6 +530,8 @@ void elf_add_reloc(struct elf *elf, stru + + list_add_tail(&reloc->list, &sec->reloc_list); + elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); ++ ++ sec->changed = true; + } + + static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx) +@@ -609,7 +611,9 @@ static int read_relocs(struct elf *elf) + return -1; + } + +- elf_add_reloc(elf, reloc); ++ list_add_tail(&reloc->list, &sec->reloc_list); ++ elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); ++ + nr_reloc++; + } + max_reloc = max(max_reloc, nr_reloc); +@@ -920,14 +924,11 @@ static int elf_rebuild_rela_reloc_sectio + return 0; + } + +-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) ++static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) + { + struct reloc *reloc; + int nr; + +- sec->changed = true; +- elf->changed = true; +- + nr = 0; + list_for_each_entry(reloc, &sec->reloc_list, list) + nr++; +@@ -991,9 +992,15 @@ int elf_write(struct elf *elf) + struct section *sec; + Elf_Scn *s; + +- /* Update section headers for changed sections: */ ++ /* Update changed relocation sections and section headers: */ + list_for_each_entry(sec, &elf->sections, list) { + if (sec->changed) { ++ if (sec->base && ++ elf_rebuild_reloc_section(elf, sec)) { ++ WARN("elf_rebuild_reloc_section"); ++ return -1; ++ } ++ + s = elf_getscn(elf->elf, sec->idx); + if (!s) { + WARN_ELF("elf_getscn"); +@@ -1005,6 +1012,7 @@ int elf_write(struct elf *elf) + } + + sec->changed = false; ++ elf->changed = true; + } + } + +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -142,7 +142,6 @@ struct reloc *find_reloc_by_dest_range(c + struct symbol *find_func_containing(struct section *sec, unsigned long offset); + void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, + struct reloc *reloc); +-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); + + #define for_each_sec(file, sec) \ + list_for_each_entry(sec, &file->elf->sections, list) +--- a/tools/objtool/orc_gen.c ++++ b/tools/objtool/orc_gen.c +@@ -251,8 +251,5 @@ int orc_create(struct objtool_file *file + return -1; + } + +- if (elf_rebuild_reloc_section(file->elf, ip_rsec)) +- return -1; +- + return 0; + } diff --git a/queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch b/queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch new file mode 100644 index 00000000000..a1ca660321f --- /dev/null +++ b/queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:14 +0100 +Subject: objtool: Skip magical retpoline .altinstr_replacement + +From: Peter Zijlstra + +commit 50e7b4a1a1b264fc7df0698f2defb93cadf19a7b upstream. + +When the .altinstr_replacement is a retpoline, skip the alternative. +We already special case retpolines anyway. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151300.259429287@infradead.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/special.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/tools/objtool/special.c ++++ b/tools/objtool/special.c +@@ -104,6 +104,14 @@ static int get_alt_entry(struct elf *elf + return -1; + } + ++ /* ++ * Skip retpoline .altinstr_replacement... we already rewrite the ++ * instructions for retpolines anyway, see arch_is_retpoline() ++ * usage in add_{call,jump}_destinations(). ++ */ ++ if (arch_is_retpoline(new_reloc->sym)) ++ return 1; ++ + alt->new_sec = new_reloc->sym->sec; + alt->new_off = (unsigned int)new_reloc->addend; + +@@ -152,7 +160,9 @@ int special_get_alts(struct elf *elf, st + memset(alt, 0, sizeof(*alt)); + + ret = get_alt_entry(elf, entry, sec, idx, alt); +- if (ret) ++ if (ret > 0) ++ continue; ++ if (ret < 0) + return ret; + + list_add_tail(&alt->list, alts); diff --git a/queue-5.10/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch b/queue-5.10/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch new file mode 100644 index 00000000000..ee3474ab9d0 --- /dev/null +++ b/queue-5.10/objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch @@ -0,0 +1,37 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Thadeu Lima de Souza Cascardo +Date: Fri, 1 Jul 2022 09:00:45 -0300 +Subject: objtool: skip non-text sections when adding return-thunk sites + +From: Thadeu Lima de Souza Cascardo + +The .discard.text section is added in order to reserve BRK, with a +temporary function just so it can give it a size. This adds a relocation to +the return thunk, which objtool will add to the .return_sites section. +Linking will then fail as there are references to the .discard.text +section. + +Do not add instructions from non-text sections to the list of return thunk +calls, avoiding the reference to .discard.text. + +Signed-off-by: Thadeu Lima de Souza Cascardo +Acked-by: Josh Poimboeuf +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1090,7 +1090,9 @@ static void add_return_call(struct objto + insn->type = INSN_RETURN; + insn->retpoline_safe = true; + +- list_add_tail(&insn->call_node, &file->return_thunk_list); ++ /* Skip the non-text sections, specially .discard ones */ ++ if (insn->sec->text) ++ list_add_tail(&insn->call_node, &file->return_thunk_list); + } + + /* diff --git a/queue-5.10/objtool-support-asm-jump-tables.patch b/queue-5.10/objtool-support-asm-jump-tables.patch new file mode 100644 index 00000000000..f3f0ad6ddce --- /dev/null +++ b/queue-5.10/objtool-support-asm-jump-tables.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Wed, 24 Feb 2021 10:29:14 -0600 +Subject: objtool: Support asm jump tables + +From: Josh Poimboeuf + +commit 99033461e685b48549ec77608b4bda75ddf772ce upstream. + +Objtool detection of asm jump tables would normally just work, except +for the fact that asm retpolines use alternatives. Objtool thinks the +alternative code path (a jump to the retpoline) is a sibling call. + +Don't treat alternative indirect branches as sibling calls when the +original instruction has a jump table. + +Signed-off-by: Josh Poimboeuf +Tested-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Tested-by: Sami Tolvanen +Acked-by: Peter Zijlstra (Intel) +Acked-by: Herbert Xu +Link: https://lore.kernel.org/r/460cf4dc675d64e1124146562cabd2c05aa322e8.1614182415.git.jpoimboe@redhat.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -107,6 +107,18 @@ static struct instruction *prev_insn_sam + for (insn = next_insn_same_sec(file, insn); insn; \ + insn = next_insn_same_sec(file, insn)) + ++static bool is_jump_table_jump(struct instruction *insn) ++{ ++ struct alt_group *alt_group = insn->alt_group; ++ ++ if (insn->jump_table) ++ return true; ++ ++ /* Retpoline alternative for a jump table? */ ++ return alt_group && alt_group->orig_group && ++ alt_group->orig_group->first_insn->jump_table; ++} ++ + static bool is_sibling_call(struct instruction *insn) + { + /* +@@ -119,7 +131,7 @@ static bool is_sibling_call(struct instr + + /* An indirect jump is either a sibling call or a jump to a table. */ + if (insn->type == INSN_JUMP_DYNAMIC) +- return list_empty(&insn->alts); ++ return !is_jump_table_jump(insn); + + /* add_jump_destinations() sets insn->call_dest for sibling calls. */ + return (is_static_jump(insn) && insn->call_dest); diff --git a/queue-5.10/objtool-support-retpoline-jump-detection-for-vmlinux.o.patch b/queue-5.10/objtool-support-retpoline-jump-detection-for-vmlinux.o.patch new file mode 100644 index 00000000000..e4c850372ea --- /dev/null +++ b/queue-5.10/objtool-support-retpoline-jump-detection-for-vmlinux.o.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Thu, 21 Jan 2021 15:29:20 -0600 +Subject: objtool: Support retpoline jump detection for vmlinux.o + +From: Josh Poimboeuf + +commit 31a7424bc58063a8e0466c3c10f31a52ec2be4f6 upstream. + +Objtool converts direct retpoline jumps to type INSN_JUMP_DYNAMIC, since +that's what they are semantically. + +That conversion doesn't work in vmlinux.o validation because the +indirect thunk function is present in the object, so the intra-object +jump check succeeds before the retpoline jump check gets a chance. + +Rearrange the checks: check for a retpoline jump before checking for an +intra-object jump. + +Signed-off-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/4302893513770dde68ddc22a9d6a2a04aca491dd.1611263461.git.jpoimboe@redhat.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -795,10 +795,6 @@ static int add_jump_destinations(struct + } else if (reloc->sym->type == STT_SECTION) { + dest_sec = reloc->sym->sec; + dest_off = arch_dest_reloc_offset(reloc->addend); +- } else if (reloc->sym->sec->idx) { +- dest_sec = reloc->sym->sec; +- dest_off = reloc->sym->sym.st_value + +- arch_dest_reloc_offset(reloc->addend); + } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) || + !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) { + /* +@@ -812,6 +808,10 @@ static int add_jump_destinations(struct + + insn->retpoline_safe = true; + continue; ++ } else if (reloc->sym->sec->idx) { ++ dest_sec = reloc->sym->sec; ++ dest_off = reloc->sym->sym.st_value + ++ arch_dest_reloc_offset(reloc->addend); + } else { + /* external sibling call */ + insn->call_dest = reloc->sym; diff --git a/queue-5.10/objtool-support-stack-layout-changes-in-alternatives.patch b/queue-5.10/objtool-support-stack-layout-changes-in-alternatives.patch new file mode 100644 index 00000000000..0738bc3838a --- /dev/null +++ b/queue-5.10/objtool-support-stack-layout-changes-in-alternatives.patch @@ -0,0 +1,514 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Fri, 18 Dec 2020 14:26:21 -0600 +Subject: objtool: Support stack layout changes in alternatives + +From: Josh Poimboeuf + +commit c9c324dc22aab1687da37001b321b6dfa93a0699 upstream. + +The ORC unwinder showed a warning [1] which revealed the stack layout +didn't match what was expected. The problem was that paravirt patching +had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed +the stack layout between the PUSHF and the POP, so unwinding from an +interrupt which occurred between those two instructions would fail. + +Part of the agreed upon solution was to rework the custom paravirt +patching code to use alternatives instead, since objtool already knows +how to read alternatives (and converging runtime patching infrastructure +is always a good thing anyway). But the main problem still remains, +which is that runtime patching can change the stack layout. + +Making stack layout changes in alternatives was disallowed with commit +7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt +is going to be doing it, it needs to be supported. + +One way to do so would be to modify the ORC table when the code gets +patched. But ORC is simple -- a good thing! -- and it's best to leave +it alone. + +Instead, support stack layout changes by "flattening" all possible stack +states (CFI) from parallel alternative code streams into a single set of +linear states. The only necessary limitation is that CFI conflicts are +disallowed at all possible instruction boundaries. + +For example, this scenario is allowed: + + Alt1 Alt2 Alt3 + + 0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF + 0x01 POP %RAX + 0x02 NOP + ... + 0x05 NOP + ... + 0x07 + +The unwind information for offset-0x00 is identical for all 3 +alternatives. Similarly offset-0x05 and higher also are identical (and +the same as 0x00). However offset-0x01 has deviating CFI, but that is +only relevant for Alt3, neither of the other alternative instruction +streams will ever hit that offset. + +This scenario is NOT allowed: + + Alt1 Alt2 + + 0x00 CALL *pv_ops.save_fl PUSHF + 0x01 NOP6 + ... + 0x07 NOP POP %RAX + +The problem here is that offset-0x7, which is an instruction boundary in +both possible instruction patch streams, has two conflicting stack +layouts. + +[ The above examples were stolen from Peter Zijlstra. ] + +The new flattened CFI array is used both for the detection of conflicts +(like the second example above) and the generation of linear ORC +entries. + +BTW, another benefit of these changes is that, thanks to some related +cleanups (new fake nops and alt_group struct) objtool can finally be rid +of fake jumps, which were a constant source of headaches. + +[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble + +Cc: Shinichiro Kawasaki +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/Documentation/stack-validation.txt | 14 - + tools/objtool/check.c | 196 +++++++++++------------ + tools/objtool/check.h | 6 + tools/objtool/orc_gen.c | 56 +++++- + 4 files changed, 160 insertions(+), 112 deletions(-) + +--- a/tools/objtool/Documentation/stack-validation.txt ++++ b/tools/objtool/Documentation/stack-validation.txt +@@ -315,13 +315,15 @@ they mean, and suggestions for how to fi + function tracing inserts additional calls, which is not obvious from the + sources). + +-10. file.o: warning: func()+0x5c: alternative modifies stack ++10. file.o: warning: func()+0x5c: stack layout conflict in alternatives + +- This means that an alternative includes instructions that modify the +- stack. The problem is that there is only one ORC unwind table, this means +- that the ORC unwind entries must be valid for each of the alternatives. +- The easiest way to enforce this is to ensure alternatives do not contain +- any ORC entries, which in turn implies the above constraint. ++ This means that in the use of the alternative() or ALTERNATIVE() ++ macro, the code paths have conflicting modifications to the stack. ++ The problem is that there is only one ORC unwind table, which means ++ that the ORC unwind entries must be consistent for all possible ++ instruction boundaries regardless of which code has been patched. ++ This limitation can be overcome by massaging the alternatives with ++ NOPs to shift the stack changes around so they no longer conflict. + + 11. file.o: warning: unannotated intra-function call + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -19,8 +19,6 @@ + #include + #include + +-#define FAKE_JUMP_OFFSET -1 +- + struct alternative { + struct list_head list; + struct instruction *insn; +@@ -789,9 +787,6 @@ static int add_jump_destinations(struct + if (!is_static_jump(insn)) + continue; + +- if (insn->offset == FAKE_JUMP_OFFSET) +- continue; +- + reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (!reloc) { +@@ -991,28 +986,15 @@ static int add_call_destinations(struct + } + + /* +- * The .alternatives section requires some extra special care, over and above +- * what other special sections require: +- * +- * 1. Because alternatives are patched in-place, we need to insert a fake jump +- * instruction at the end so that validate_branch() skips all the original +- * replaced instructions when validating the new instruction path. +- * +- * 2. An added wrinkle is that the new instruction length might be zero. In +- * that case the old instructions are replaced with noops. We simulate that +- * by creating a fake jump as the only new instruction. +- * +- * 3. In some cases, the alternative section includes an instruction which +- * conditionally jumps to the _end_ of the entry. We have to modify these +- * jumps' destinations to point back to .text rather than the end of the +- * entry in .altinstr_replacement. ++ * The .alternatives section requires some extra special care over and above ++ * other special sections because alternatives are patched in place. + */ + static int handle_group_alt(struct objtool_file *file, + struct special_alt *special_alt, + struct instruction *orig_insn, + struct instruction **new_insn) + { +- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; ++ struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL; + struct alt_group *orig_alt_group, *new_alt_group; + unsigned long dest_off; + +@@ -1022,6 +1004,13 @@ static int handle_group_alt(struct objto + WARN("malloc failed"); + return -1; + } ++ orig_alt_group->cfi = calloc(special_alt->orig_len, ++ sizeof(struct cfi_state *)); ++ if (!orig_alt_group->cfi) { ++ WARN("calloc failed"); ++ return -1; ++ } ++ + last_orig_insn = NULL; + insn = orig_insn; + sec_for_each_insn_from(file, insn) { +@@ -1035,42 +1024,45 @@ static int handle_group_alt(struct objto + orig_alt_group->first_insn = orig_insn; + orig_alt_group->last_insn = last_orig_insn; + +- if (next_insn_same_sec(file, last_orig_insn)) { +- fake_jump = malloc(sizeof(*fake_jump)); +- if (!fake_jump) { +- WARN("malloc failed"); +- return -1; +- } +- memset(fake_jump, 0, sizeof(*fake_jump)); +- INIT_LIST_HEAD(&fake_jump->alts); +- INIT_LIST_HEAD(&fake_jump->stack_ops); +- init_cfi_state(&fake_jump->cfi); +- +- fake_jump->sec = special_alt->new_sec; +- fake_jump->offset = FAKE_JUMP_OFFSET; +- fake_jump->type = INSN_JUMP_UNCONDITIONAL; +- fake_jump->jump_dest = list_next_entry(last_orig_insn, list); +- fake_jump->func = orig_insn->func; ++ ++ new_alt_group = malloc(sizeof(*new_alt_group)); ++ if (!new_alt_group) { ++ WARN("malloc failed"); ++ return -1; + } + +- if (!special_alt->new_len) { +- if (!fake_jump) { +- WARN("%s: empty alternative at end of section", +- special_alt->orig_sec->name); ++ if (special_alt->new_len < special_alt->orig_len) { ++ /* ++ * Insert a fake nop at the end to make the replacement ++ * alt_group the same size as the original. This is needed to ++ * allow propagate_alt_cfi() to do its magic. When the last ++ * instruction affects the stack, the instruction after it (the ++ * nop) will propagate the new state to the shared CFI array. ++ */ ++ nop = malloc(sizeof(*nop)); ++ if (!nop) { ++ WARN("malloc failed"); + return -1; + } +- +- *new_insn = fake_jump; +- return 0; ++ memset(nop, 0, sizeof(*nop)); ++ INIT_LIST_HEAD(&nop->alts); ++ INIT_LIST_HEAD(&nop->stack_ops); ++ init_cfi_state(&nop->cfi); ++ ++ nop->sec = special_alt->new_sec; ++ nop->offset = special_alt->new_off + special_alt->new_len; ++ nop->len = special_alt->orig_len - special_alt->new_len; ++ nop->type = INSN_NOP; ++ nop->func = orig_insn->func; ++ nop->alt_group = new_alt_group; ++ nop->ignore = orig_insn->ignore_alts; + } + +- new_alt_group = malloc(sizeof(*new_alt_group)); +- if (!new_alt_group) { +- WARN("malloc failed"); +- return -1; ++ if (!special_alt->new_len) { ++ *new_insn = nop; ++ goto end; + } + +- last_new_insn = NULL; + insn = *new_insn; + sec_for_each_insn_from(file, insn) { + struct reloc *alt_reloc; +@@ -1109,14 +1101,8 @@ static int handle_group_alt(struct objto + continue; + + dest_off = arch_jump_destination(insn); +- if (dest_off == special_alt->new_off + special_alt->new_len) { +- if (!fake_jump) { +- WARN("%s: alternative jump to end of section", +- special_alt->orig_sec->name); +- return -1; +- } +- insn->jump_dest = fake_jump; +- } ++ if (dest_off == special_alt->new_off + special_alt->new_len) ++ insn->jump_dest = next_insn_same_sec(file, last_orig_insn); + + if (!insn->jump_dest) { + WARN_FUNC("can't find alternative jump destination", +@@ -1131,13 +1117,13 @@ static int handle_group_alt(struct objto + return -1; + } + ++ if (nop) ++ list_add(&nop->list, &last_new_insn->list); ++end: + new_alt_group->orig_group = orig_alt_group; + new_alt_group->first_insn = *new_insn; +- new_alt_group->last_insn = last_new_insn; +- +- if (fake_jump) +- list_add(&fake_jump->list, &last_new_insn->list); +- ++ new_alt_group->last_insn = nop ? : last_new_insn; ++ new_alt_group->cfi = orig_alt_group->cfi; + return 0; + } + +@@ -2237,22 +2223,47 @@ static int update_cfi_state(struct instr + return 0; + } + +-static int handle_insn_ops(struct instruction *insn, struct insn_state *state) ++/* ++ * The stack layouts of alternatives instructions can sometimes diverge when ++ * they have stack modifications. That's fine as long as the potential stack ++ * layouts don't conflict at any given potential instruction boundary. ++ * ++ * Flatten the CFIs of the different alternative code streams (both original ++ * and replacement) into a single shared CFI array which can be used to detect ++ * conflicts and nicely feed a linear array of ORC entries to the unwinder. ++ */ ++static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn) + { +- struct stack_op *op; ++ struct cfi_state **alt_cfi; ++ int group_off; + +- list_for_each_entry(op, &insn->stack_ops, list) { +- struct cfi_state old_cfi = state->cfi; +- int res; ++ if (!insn->alt_group) ++ return 0; + +- res = update_cfi_state(insn, &state->cfi, op); +- if (res) +- return res; ++ alt_cfi = insn->alt_group->cfi; ++ group_off = insn->offset - insn->alt_group->first_insn->offset; + +- if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) { +- WARN_FUNC("alternative modifies stack", insn->sec, insn->offset); ++ if (!alt_cfi[group_off]) { ++ alt_cfi[group_off] = &insn->cfi; ++ } else { ++ if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) { ++ WARN_FUNC("stack layout conflict in alternatives", ++ insn->sec, insn->offset); + return -1; + } ++ } ++ ++ return 0; ++} ++ ++static int handle_insn_ops(struct instruction *insn, struct insn_state *state) ++{ ++ struct stack_op *op; ++ ++ list_for_each_entry(op, &insn->stack_ops, list) { ++ ++ if (update_cfi_state(insn, &state->cfi, op)) ++ return 1; + + if (op->dest.type == OP_DEST_PUSHF) { + if (!state->uaccess_stack) { +@@ -2442,28 +2453,20 @@ static int validate_return(struct symbol + return 0; + } + +-/* +- * Alternatives should not contain any ORC entries, this in turn means they +- * should not contain any CFI ops, which implies all instructions should have +- * the same same CFI state. +- * +- * It is possible to constuct alternatives that have unreachable holes that go +- * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED +- * states which then results in ORC entries, which we just said we didn't want. +- * +- * Avoid them by copying the CFI entry of the first instruction into the whole +- * alternative. +- */ +-static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn) ++static struct instruction *next_insn_to_validate(struct objtool_file *file, ++ struct instruction *insn) + { +- struct instruction *first_insn = insn; + struct alt_group *alt_group = insn->alt_group; + +- sec_for_each_insn_continue(file, insn) { +- if (insn->alt_group != alt_group) +- break; +- insn->cfi = first_insn->cfi; +- } ++ /* ++ * Simulate the fact that alternatives are patched in-place. When the ++ * end of a replacement alt_group is reached, redirect objtool flow to ++ * the end of the original alt_group. ++ */ ++ if (alt_group && insn == alt_group->last_insn && alt_group->orig_group) ++ return next_insn_same_sec(file, alt_group->orig_group->last_insn); ++ ++ return next_insn_same_sec(file, insn); + } + + /* +@@ -2484,7 +2487,7 @@ static int validate_branch(struct objtoo + sec = insn->sec; + + while (1) { +- next_insn = next_insn_same_sec(file, insn); ++ next_insn = next_insn_to_validate(file, insn); + + if (file->c_file && func && insn->func && func != insn->func->pfunc) { + WARN("%s() falls through to next function %s()", +@@ -2517,6 +2520,9 @@ static int validate_branch(struct objtoo + + insn->visited |= visited; + ++ if (propagate_alt_cfi(file, insn)) ++ return 1; ++ + if (!insn->ignore_alts && !list_empty(&insn->alts)) { + bool skip_orig = false; + +@@ -2532,9 +2538,6 @@ static int validate_branch(struct objtoo + } + } + +- if (insn->alt_group) +- fill_alternative_cfi(file, insn); +- + if (skip_orig) + return 0; + } +@@ -2767,9 +2770,6 @@ static bool ignore_unreachable_insn(stru + !strcmp(insn->sec->name, ".altinstr_aux")) + return true; + +- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET) +- return true; +- + if (!insn->func) + return false; + +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -28,6 +28,12 @@ struct alt_group { + + /* First and last instructions in the group */ + struct instruction *first_insn, *last_insn; ++ ++ /* ++ * Byte-offset-addressed len-sized array of pointers to CFI structs. ++ * This is shared with the other alt_groups in the same alternative. ++ */ ++ struct cfi_state **cfi; + }; + + struct instruction { +--- a/tools/objtool/orc_gen.c ++++ b/tools/objtool/orc_gen.c +@@ -141,6 +141,13 @@ static int orc_list_add(struct list_head + return 0; + } + ++static unsigned long alt_group_len(struct alt_group *alt_group) ++{ ++ return alt_group->last_insn->offset + ++ alt_group->last_insn->len - ++ alt_group->first_insn->offset; ++} ++ + int orc_create(struct objtool_file *file) + { + struct section *sec, *ip_rsec, *orc_sec; +@@ -165,15 +172,48 @@ int orc_create(struct objtool_file *file + continue; + + sec_for_each_insn(file, sec, insn) { +- if (init_orc_entry(&orc, &insn->cfi)) +- return -1; +- if (!memcmp(&prev_orc, &orc, sizeof(orc))) ++ struct alt_group *alt_group = insn->alt_group; ++ int i; ++ ++ if (!alt_group) { ++ if (init_orc_entry(&orc, &insn->cfi)) ++ return -1; ++ if (!memcmp(&prev_orc, &orc, sizeof(orc))) ++ continue; ++ if (orc_list_add(&orc_list, &orc, sec, ++ insn->offset)) ++ return -1; ++ nr++; ++ prev_orc = orc; ++ empty = false; + continue; +- if (orc_list_add(&orc_list, &orc, sec, insn->offset)) +- return -1; +- nr++; +- prev_orc = orc; +- empty = false; ++ } ++ ++ /* ++ * Alternatives can have different stack layout ++ * possibilities (but they shouldn't conflict). ++ * Instead of traversing the instructions, use the ++ * alt_group's flattened byte-offset-addressed CFI ++ * array. ++ */ ++ for (i = 0; i < alt_group_len(alt_group); i++) { ++ struct cfi_state *cfi = alt_group->cfi[i]; ++ if (!cfi) ++ continue; ++ if (init_orc_entry(&orc, cfi)) ++ return -1; ++ if (!memcmp(&prev_orc, &orc, sizeof(orc))) ++ continue; ++ if (orc_list_add(&orc_list, &orc, insn->sec, ++ insn->offset + i)) ++ return -1; ++ nr++; ++ prev_orc = orc; ++ empty = false; ++ } ++ ++ /* Skip to the end of the alt_group */ ++ insn = alt_group->last_insn; + } + + /* Add a section terminator */ diff --git a/queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch b/queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch new file mode 100644 index 00000000000..79fe95a57bd --- /dev/null +++ b/queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch @@ -0,0 +1,94 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Thu, 30 Sep 2021 12:43:10 +0200 +Subject: objtool: Teach get_alt_entry() about more relocation types + +From: Peter Zijlstra + +commit 24ff652573754fe4c03213ebd26b17e86842feb3 upstream. + +Occasionally objtool encounters symbol (as opposed to section) +relocations in .altinstructions. Typically they are the alternatives +written by elf_add_alternative() as encountered on a noinstr +validation run on vmlinux after having already ran objtool on the +individual .o files. + +Basically this is the counterpart of commit 44f6a7c0755d ("objtool: +Fix seg fault with Clang non-section symbols"), because when these new +assemblers (binutils now also does this) strip the section symbols, +elf_add_reloc_to_insn() is forced to emit symbol based relocations. + +As such, teach get_alt_entry() about different relocation types. + +Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls") +Reported-by: Stephen Rothwell +Reported-by: Borislav Petkov +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Josh Poimboeuf +Tested-by: Nathan Chancellor +Link: https://lore.kernel.org/r/YVWUvknIEVNkPvnP@hirez.programming.kicks-ass.net +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/special.c | 32 +++++++++++++++++++++++++------- + 1 file changed, 25 insertions(+), 7 deletions(-) + +--- a/tools/objtool/special.c ++++ b/tools/objtool/special.c +@@ -55,6 +55,24 @@ void __weak arch_handle_alternative(unsi + { + } + ++static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off) ++{ ++ switch (reloc->sym->type) { ++ case STT_FUNC: ++ *sec = reloc->sym->sec; ++ *off = reloc->sym->offset + reloc->addend; ++ return true; ++ ++ case STT_SECTION: ++ *sec = reloc->sym->sec; ++ *off = reloc->addend; ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ + static int get_alt_entry(struct elf *elf, struct special_entry *entry, + struct section *sec, int idx, + struct special_alt *alt) +@@ -87,15 +105,12 @@ static int get_alt_entry(struct elf *elf + WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); + return -1; + } +- if (orig_reloc->sym->type != STT_SECTION) { +- WARN_FUNC("don't know how to handle non-section reloc symbol %s", ++ if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) { ++ WARN_FUNC("don't know how to handle reloc symbol type: %s", + sec, offset + entry->orig, orig_reloc->sym->name); + return -1; + } + +- alt->orig_sec = orig_reloc->sym->sec; +- alt->orig_off = orig_reloc->addend; +- + if (!entry->group || alt->new_len) { + new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); + if (!new_reloc) { +@@ -112,8 +127,11 @@ static int get_alt_entry(struct elf *elf + if (arch_is_retpoline(new_reloc->sym)) + return 1; + +- alt->new_sec = new_reloc->sym->sec; +- alt->new_off = (unsigned int)new_reloc->addend; ++ if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) { ++ WARN_FUNC("don't know how to handle reloc symbol type: %s", ++ sec, offset + entry->new, new_reloc->sym->name); ++ return -1; ++ } + + /* _ASM_EXTABLE_EX hack */ + if (alt->new_off >= 0x7ffffff0) diff --git a/queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch b/queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch new file mode 100644 index 00000000000..3762e418716 --- /dev/null +++ b/queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch @@ -0,0 +1,37 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:47 +0200 +Subject: objtool: Treat .text.__x86.* as noinstr + +From: Peter Zijlstra + +commit 951ddecf435659553ed15a9214e153a3af43a9a1 upstream. + +Needed because zen_untrain_ret() will be called from noinstr code. + +Also makes sense since the thunks MUST NOT contain instrumentation nor +be poked with dynamic instrumentation. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -366,7 +366,8 @@ static int decode_instructions(struct ob + sec->text = true; + + if (!strcmp(sec->name, ".noinstr.text") || +- !strcmp(sec->name, ".entry.text")) ++ !strcmp(sec->name, ".entry.text") || ++ !strncmp(sec->name, ".text.__x86.", 12)) + sec->noinstr = true; + + for (offset = 0; offset < sec->len; offset += insn->len) { diff --git a/queue-5.10/objtool-update-retpoline-validation.patch b/queue-5.10/objtool-update-retpoline-validation.patch new file mode 100644 index 00000000000..ddde9ca39b4 --- /dev/null +++ b/queue-5.10/objtool-update-retpoline-validation.patch @@ -0,0 +1,112 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:59 +0200 +Subject: objtool: Update Retpoline validation + +From: Peter Zijlstra + +commit 9bb2ec608a209018080ca262f771e6a9ff203b6f upstream. + +Update retpoline validation with the new CONFIG_RETPOLINE requirement of +not having bare naked RET instructions. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflict fixup at arch/x86/xen/xen-head.S] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 6 ++++++ + arch/x86/mm/mem_encrypt_boot.S | 2 ++ + arch/x86/xen/xen-head.S | 1 + + tools/objtool/check.c | 19 +++++++++++++------ + 4 files changed, 22 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -76,6 +76,12 @@ + .endm + + /* ++ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions ++ * vs RETBleed validation. ++ */ ++#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE ++ ++/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. +--- a/arch/x86/mm/mem_encrypt_boot.S ++++ b/arch/x86/mm/mem_encrypt_boot.S +@@ -66,6 +66,7 @@ SYM_FUNC_START(sme_encrypt_execute) + pop %rbp + + /* Offset to __x86_return_thunk would be wrong here */ ++ ANNOTATE_UNRET_SAFE + ret + int3 + SYM_FUNC_END(sme_encrypt_execute) +@@ -154,6 +155,7 @@ SYM_FUNC_START(__enc_copy) + pop %r15 + + /* Offset to __x86_return_thunk would be wrong here */ ++ ANNOTATE_UNRET_SAFE + ret + int3 + .L__enc_copy_end: +--- a/arch/x86/xen/xen-head.S ++++ b/arch/x86/xen/xen-head.S +@@ -70,6 +70,7 @@ SYM_CODE_START(hypercall_page) + .rept (PAGE_SIZE / 32) + UNWIND_HINT_FUNC + .skip 31, 0x90 ++ ANNOTATE_UNRET_SAFE + RET + .endr + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1799,8 +1799,9 @@ static int read_retpoline_hints(struct o + } + + if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) { +- WARN_FUNC("retpoline_safe hint not an indirect jump/call", ++ insn->type != INSN_CALL_DYNAMIC && ++ insn->type != INSN_RETURN) { ++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", + insn->sec, insn->offset); + return -1; + } +@@ -3051,7 +3052,8 @@ static int validate_retpoline(struct obj + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) ++ insn->type != INSN_CALL_DYNAMIC && ++ insn->type != INSN_RETURN) + continue; + + if (insn->retpoline_safe) +@@ -3066,9 +3068,14 @@ static int validate_retpoline(struct obj + if (!strcmp(insn->sec->name, ".init.text") && !module) + continue; + +- WARN_FUNC("indirect %s found in RETPOLINE build", +- insn->sec, insn->offset, +- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ if (insn->type == INSN_RETURN) { ++ WARN_FUNC("'naked' return found in RETPOLINE build", ++ insn->sec, insn->offset); ++ } else { ++ WARN_FUNC("indirect %s found in RETPOLINE build", ++ insn->sec, insn->offset, ++ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ } + + warnings++; + } diff --git a/queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch b/queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch new file mode 100644 index 00000000000..6c4585c7561 --- /dev/null +++ b/queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch @@ -0,0 +1,45 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Mon, 21 Jun 2021 16:13:55 +0200 +Subject: objtool/x86: Ignore __x86_indirect_alt_* symbols + +From: Peter Zijlstra + +commit 31197d3a0f1caeb60fb01f6755e28347e4f44037 upstream. + +Because the __x86_indirect_alt* symbols are just that, objtool will +try and validate them as regular symbols, instead of the alternative +replacements that they are. + +This goes sideways for FRAME_POINTER=y builds; which generate a fair +amount of warnings. + +Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Link: https://lore.kernel.org/r/YNCgxwLBiK9wclYJ@hirez.programming.kicks-ass.net +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/lib/retpoline.S | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -58,12 +58,16 @@ SYM_FUNC_START_NOALIGN(__x86_indirect_al + 2: .skip 5-(2b-1b), 0x90 + SYM_FUNC_END(__x86_indirect_alt_call_\reg) + ++STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg) ++ + SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) + ANNOTATE_RETPOLINE_SAFE + 1: jmp *%\reg + 2: .skip 5-(2b-1b), 0x90 + SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) + ++STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg) ++ + .endm + + /* diff --git a/queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch b/queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch new file mode 100644 index 00000000000..f815a74cc1c --- /dev/null +++ b/queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch @@ -0,0 +1,494 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:36 +0200 +Subject: objtool,x86: Replace alternatives with .retpoline_sites + +From: Peter Zijlstra + +commit 134ab5bd1883312d7a4b3033b05c6b5a1bb8889b upstream. + +Instead of writing complete alternatives, simply provide a list of all +the retpoline thunk calls. Then the kernel is free to do with them as +it pleases. Simpler code all-round. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.850007165@infradead.org +[cascardo: fixed conflict because of missing + 8b946cc38e063f0f7bb67789478c38f6d7d457c9] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: deleted functions had slightly different code] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/vmlinux.lds.S | 14 ++++ + tools/objtool/arch/x86/decode.c | 120 ------------------------------------ + tools/objtool/check.c | 132 ++++++++++++++++++++++++++++------------ + tools/objtool/elf.c | 83 ------------------------- + tools/objtool/elf.h | 1 + tools/objtool/special.c | 8 -- + 6 files changed, 107 insertions(+), 251 deletions(-) + +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -272,6 +272,20 @@ SECTIONS + __parainstructions_end = .; + } + ++#ifdef CONFIG_RETPOLINE ++ /* ++ * List of instructions that call/jmp/jcc to retpoline thunks ++ * __x86_indirect_thunk_*(). These instructions can be patched along ++ * with alternatives, after which the section can be freed. ++ */ ++ . = ALIGN(8); ++ .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) { ++ __retpoline_sites = .; ++ *(.retpoline_sites) ++ __retpoline_sites_end = .; ++ } ++#endif ++ + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -606,126 +606,6 @@ const char *arch_ret_insn(int len) + return ret[len-1]; + } + +-/* asm/alternative.h ? */ +- +-#define ALTINSTR_FLAG_INV (1 << 15) +-#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) +- +-struct alt_instr { +- s32 instr_offset; /* original instruction */ +- s32 repl_offset; /* offset to replacement instruction */ +- u16 cpuid; /* cpuid bit set for replacement */ +- u8 instrlen; /* length of original instruction */ +- u8 replacementlen; /* length of new instruction */ +-} __packed; +- +-static int elf_add_alternative(struct elf *elf, +- struct instruction *orig, struct symbol *sym, +- int cpuid, u8 orig_len, u8 repl_len) +-{ +- const int size = sizeof(struct alt_instr); +- struct alt_instr *alt; +- struct section *sec; +- Elf_Scn *s; +- +- sec = find_section_by_name(elf, ".altinstructions"); +- if (!sec) { +- sec = elf_create_section(elf, ".altinstructions", +- SHF_ALLOC, 0, 0); +- +- if (!sec) { +- WARN_ELF("elf_create_section"); +- return -1; +- } +- } +- +- s = elf_getscn(elf->elf, sec->idx); +- if (!s) { +- WARN_ELF("elf_getscn"); +- return -1; +- } +- +- sec->data = elf_newdata(s); +- if (!sec->data) { +- WARN_ELF("elf_newdata"); +- return -1; +- } +- +- sec->data->d_size = size; +- sec->data->d_align = 1; +- +- alt = sec->data->d_buf = malloc(size); +- if (!sec->data->d_buf) { +- perror("malloc"); +- return -1; +- } +- memset(sec->data->d_buf, 0, size); +- +- if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size, +- R_X86_64_PC32, orig->sec, orig->offset)) { +- WARN("elf_create_reloc: alt_instr::instr_offset"); +- return -1; +- } +- +- if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4, +- R_X86_64_PC32, sym, 0)) { +- WARN("elf_create_reloc: alt_instr::repl_offset"); +- return -1; +- } +- +- alt->cpuid = cpuid; +- alt->instrlen = orig_len; +- alt->replacementlen = repl_len; +- +- sec->sh.sh_size += size; +- sec->changed = true; +- +- return 0; +-} +- +-#define X86_FEATURE_RETPOLINE ( 7*32+12) +- +-int arch_rewrite_retpolines(struct objtool_file *file) +-{ +- struct instruction *insn; +- struct reloc *reloc; +- struct symbol *sym; +- char name[32] = ""; +- +- list_for_each_entry(insn, &file->retpoline_call_list, call_node) { +- +- if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) +- continue; +- +- if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) +- continue; +- +- reloc = insn->reloc; +- +- sprintf(name, "__x86_indirect_alt_%s_%s", +- insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call", +- reloc->sym->name + 21); +- +- sym = find_symbol_by_name(file->elf, name); +- if (!sym) { +- sym = elf_create_undef_symbol(file->elf, name); +- if (!sym) { +- WARN("elf_create_undef_symbol"); +- return -1; +- } +- } +- +- if (elf_add_alternative(file->elf, insn, sym, +- ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) { +- WARN("elf_add_alternative"); +- return -1; +- } +- } +- +- return 0; +-} +- + int arch_decode_hint_reg(u8 sp_reg, int *base) + { + switch (sp_reg) { +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -606,6 +606,52 @@ static int create_static_call_sections(s + return 0; + } + ++static int create_retpoline_sites_sections(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct section *sec; ++ int idx; ++ ++ sec = find_section_by_name(file->elf, ".retpoline_sites"); ++ if (sec) { ++ WARN("file already has .retpoline_sites, skipping"); ++ return 0; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->retpoline_call_list, call_node) ++ idx++; ++ ++ if (!idx) ++ return 0; ++ ++ sec = elf_create_section(file->elf, ".retpoline_sites", 0, ++ sizeof(int), idx); ++ if (!sec) { ++ WARN("elf_create_section: .retpoline_sites"); ++ return -1; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->retpoline_call_list, call_node) { ++ ++ int *site = (int *)sec->data->d_buf + idx; ++ *site = 0; ++ ++ if (elf_add_reloc_to_insn(file->elf, sec, ++ idx * sizeof(int), ++ R_X86_64_PC32, ++ insn->sec, insn->offset)) { ++ WARN("elf_add_reloc_to_insn: .retpoline_sites"); ++ return -1; ++ } ++ ++ idx++; ++ } ++ ++ return 0; ++} ++ + /* + * Warnings shouldn't be reported for ignored functions. + */ +@@ -893,6 +939,11 @@ static void annotate_call_site(struct ob + return; + } + ++ if (sym->retpoline_thunk) { ++ list_add_tail(&insn->call_node, &file->retpoline_call_list); ++ return; ++ } ++ + /* + * Many compilers cannot disable KCOV with a function attribute + * so they need a little help, NOP out any KCOV calls from noinstr +@@ -933,6 +984,39 @@ static void add_call_dest(struct objtool + annotate_call_site(file, insn, sibling); + } + ++static void add_retpoline_call(struct objtool_file *file, struct instruction *insn) ++{ ++ /* ++ * Retpoline calls/jumps are really dynamic calls/jumps in disguise, ++ * so convert them accordingly. ++ */ ++ switch (insn->type) { ++ case INSN_CALL: ++ insn->type = INSN_CALL_DYNAMIC; ++ break; ++ case INSN_JUMP_UNCONDITIONAL: ++ insn->type = INSN_JUMP_DYNAMIC; ++ break; ++ case INSN_JUMP_CONDITIONAL: ++ insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; ++ break; ++ default: ++ return; ++ } ++ ++ insn->retpoline_safe = true; ++ ++ /* ++ * Whatever stack impact regular CALLs have, should be undone ++ * by the RETURN of the called function. ++ * ++ * Annotated intra-function calls retain the stack_ops but ++ * are converted to JUMP, see read_intra_function_calls(). ++ */ ++ remove_insn_ops(insn); ++ ++ annotate_call_site(file, insn, false); ++} + /* + * Find the destination instructions for all jumps. + */ +@@ -955,19 +1039,7 @@ static int add_jump_destinations(struct + dest_sec = reloc->sym->sec; + dest_off = arch_dest_reloc_offset(reloc->addend); + } else if (reloc->sym->retpoline_thunk) { +- /* +- * Retpoline jumps are really dynamic jumps in +- * disguise, so convert them accordingly. +- */ +- if (insn->type == INSN_JUMP_UNCONDITIONAL) +- insn->type = INSN_JUMP_DYNAMIC; +- else +- insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; +- +- list_add_tail(&insn->call_node, +- &file->retpoline_call_list); +- +- insn->retpoline_safe = true; ++ add_retpoline_call(file, insn); + continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ +@@ -1096,18 +1168,7 @@ static int add_call_destinations(struct + add_call_dest(file, insn, dest, false); + + } else if (reloc->sym->retpoline_thunk) { +- /* +- * Retpoline calls are really dynamic calls in +- * disguise, so convert them accordingly. +- */ +- insn->type = INSN_CALL_DYNAMIC; +- insn->retpoline_safe = true; +- +- list_add_tail(&insn->call_node, +- &file->retpoline_call_list); +- +- remove_insn_ops(insn); +- continue; ++ add_retpoline_call(file, insn); + + } else + add_call_dest(file, insn, reloc->sym, false); +@@ -1806,11 +1867,6 @@ static void mark_rodata(struct objtool_f + file->rodata = found; + } + +-__weak int arch_rewrite_retpolines(struct objtool_file *file) +-{ +- return 0; +-} +- + static int decode_sections(struct objtool_file *file) + { + int ret; +@@ -1879,15 +1935,6 @@ static int decode_sections(struct objtoo + if (ret) + return ret; + +- /* +- * Must be after add_special_section_alts(), since this will emit +- * alternatives. Must be after add_{jump,call}_destination(), since +- * those create the call insn lists. +- */ +- ret = arch_rewrite_retpolines(file); +- if (ret) +- return ret; +- + return 0; + } + +@@ -3159,6 +3206,13 @@ int check(struct objtool_file *file) + goto out; + warnings += ret; + ++ if (retpoline) { ++ ret = create_retpoline_sites_sections(file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; ++ } ++ + if (stats) { + printf("nr_insns_visited: %ld\n", nr_insns_visited); + printf("nr_cfi: %ld\n", nr_cfi); +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -766,89 +766,6 @@ static int elf_add_string(struct elf *el + return len; + } + +-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) +-{ +- struct section *symtab, *symtab_shndx; +- struct symbol *sym; +- Elf_Data *data; +- Elf_Scn *s; +- +- sym = malloc(sizeof(*sym)); +- if (!sym) { +- perror("malloc"); +- return NULL; +- } +- memset(sym, 0, sizeof(*sym)); +- +- sym->name = strdup(name); +- +- sym->sym.st_name = elf_add_string(elf, NULL, sym->name); +- if (sym->sym.st_name == -1) +- return NULL; +- +- sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); +- // st_other 0 +- // st_shndx 0 +- // st_value 0 +- // st_size 0 +- +- symtab = find_section_by_name(elf, ".symtab"); +- if (!symtab) { +- WARN("can't find .symtab"); +- return NULL; +- } +- +- s = elf_getscn(elf->elf, symtab->idx); +- if (!s) { +- WARN_ELF("elf_getscn"); +- return NULL; +- } +- +- data = elf_newdata(s); +- if (!data) { +- WARN_ELF("elf_newdata"); +- return NULL; +- } +- +- data->d_buf = &sym->sym; +- data->d_size = sizeof(sym->sym); +- data->d_align = 1; +- +- sym->idx = symtab->len / sizeof(sym->sym); +- +- symtab->len += data->d_size; +- symtab->changed = true; +- +- symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); +- if (symtab_shndx) { +- s = elf_getscn(elf->elf, symtab_shndx->idx); +- if (!s) { +- WARN_ELF("elf_getscn"); +- return NULL; +- } +- +- data = elf_newdata(s); +- if (!data) { +- WARN_ELF("elf_newdata"); +- return NULL; +- } +- +- data->d_buf = &sym->sym.st_size; /* conveniently 0 */ +- data->d_size = sizeof(Elf32_Word); +- data->d_align = 4; +- data->d_type = ELF_T_WORD; +- +- symtab_shndx->len += 4; +- symtab_shndx->changed = true; +- } +- +- sym->sec = find_section_by_index(elf, 0); +- +- elf_add_symbol(elf, sym); +- +- return sym; +-} +- + struct section *elf_create_section(struct elf *elf, const char *name, + unsigned int sh_flags, size_t entsize, int nr) + { +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -136,7 +136,6 @@ int elf_write_insn(struct elf *elf, stru + unsigned long offset, unsigned int len, + const char *insn); + int elf_write_reloc(struct elf *elf, struct reloc *reloc); +-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); + int elf_write(struct elf *elf); + void elf_close(struct elf *elf); + +--- a/tools/objtool/special.c ++++ b/tools/objtool/special.c +@@ -105,14 +105,6 @@ static int get_alt_entry(struct elf *elf + return -1; + } + +- /* +- * Skip retpoline .altinstr_replacement... we already rewrite the +- * instructions for retpolines anyway, see arch_is_retpoline() +- * usage in add_{call,jump}_destinations(). +- */ +- if (arch_is_retpoline(new_reloc->sym)) +- return 1; +- + reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); + + /* _ASM_EXTABLE_EX hack */ diff --git a/queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch b/queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch new file mode 100644 index 00000000000..953817e9df4 --- /dev/null +++ b/queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch @@ -0,0 +1,262 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:15 +0100 +Subject: objtool/x86: Rewrite retpoline thunk calls + +From: Peter Zijlstra + +commit 9bc0bb50727c8ac69fbb33fb937431cf3518ff37 upstream. + +When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an +indirect call, have objtool rewrite it to: + + ALTERNATIVE "call __x86_indirect_thunk_\reg", + "call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE) + +Additionally, in order to not emit endless identical +.altinst_replacement chunks, use a global symbol for them, see +__x86_indirect_alt_*. + +This also avoids objtool from having to do code generation. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Reviewed-by: Miroslav Benes +Link: https://lkml.kernel.org/r/20210326151300.320177914@infradead.org +[bwh: Backported to 5.10: include "arch_elf.h" instead of "arch/elf.h"] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/asm-prototypes.h | 12 ++- + arch/x86/lib/retpoline.S | 41 +++++++++++ + tools/objtool/arch/x86/decode.c | 117 ++++++++++++++++++++++++++++++++++ + 3 files changed, 167 insertions(+), 3 deletions(-) + +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void); + + #ifdef CONFIG_RETPOLINE + +-#define DECL_INDIRECT_THUNK(reg) \ ++#undef GEN ++#define GEN(reg) \ + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); ++#include ++ ++#undef GEN ++#define GEN(reg) \ ++ extern asmlinkage void __x86_indirect_alt_call_ ## reg (void); ++#include + + #undef GEN +-#define GEN(reg) DECL_INDIRECT_THUNK(reg) ++#define GEN(reg) \ ++ extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void); + #include + + #endif /* CONFIG_RETPOLINE */ +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -10,6 +10,8 @@ + #include + #include + ++ .section .text.__x86.indirect_thunk ++ + .macro RETPOLINE reg + ANNOTATE_INTRA_FUNCTION_CALL + call .Ldo_rop_\@ +@@ -25,9 +27,9 @@ + .endm + + .macro THUNK reg +- .section .text.__x86.indirect_thunk + + .align 32 ++ + SYM_FUNC_START(__x86_indirect_thunk_\reg) + + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ +@@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) + .endm + + /* ++ * This generates .altinstr_replacement symbols for use by objtool. They, ++ * however, must not actually live in .altinstr_replacement since that will be ++ * discarded after init, but module alternatives will also reference these ++ * symbols. ++ * ++ * Their names matches the "__x86_indirect_" prefix to mark them as retpolines. ++ */ ++.macro ALT_THUNK reg ++ ++ .align 1 ++ ++SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg) ++ ANNOTATE_RETPOLINE_SAFE ++1: call *%\reg ++2: .skip 5-(2b-1b), 0x90 ++SYM_FUNC_END(__x86_indirect_alt_call_\reg) ++ ++SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) ++ ANNOTATE_RETPOLINE_SAFE ++1: jmp *%\reg ++2: .skip 5-(2b-1b), 0x90 ++SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) ++ ++.endm ++ ++/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather +@@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) + #define GEN(reg) EXPORT_THUNK(reg) + #include + ++#undef GEN ++#define GEN(reg) ALT_THUNK reg ++#include ++ ++#undef GEN ++#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg) ++#include ++ ++#undef GEN ++#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg) ++#include +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -16,6 +16,7 @@ + #include "../../arch.h" + #include "../../warn.h" + #include ++#include "arch_elf.h" + + static unsigned char op_to_cfi_reg[][2] = { + {CFI_AX, CFI_R8}, +@@ -585,6 +586,122 @@ const char *arch_nop_insn(int len) + return nops[len-1]; + } + ++/* asm/alternative.h ? */ ++ ++#define ALTINSTR_FLAG_INV (1 << 15) ++#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) ++ ++struct alt_instr { ++ s32 instr_offset; /* original instruction */ ++ s32 repl_offset; /* offset to replacement instruction */ ++ u16 cpuid; /* cpuid bit set for replacement */ ++ u8 instrlen; /* length of original instruction */ ++ u8 replacementlen; /* length of new instruction */ ++} __packed; ++ ++static int elf_add_alternative(struct elf *elf, ++ struct instruction *orig, struct symbol *sym, ++ int cpuid, u8 orig_len, u8 repl_len) ++{ ++ const int size = sizeof(struct alt_instr); ++ struct alt_instr *alt; ++ struct section *sec; ++ Elf_Scn *s; ++ ++ sec = find_section_by_name(elf, ".altinstructions"); ++ if (!sec) { ++ sec = elf_create_section(elf, ".altinstructions", ++ SHF_WRITE, size, 0); ++ ++ if (!sec) { ++ WARN_ELF("elf_create_section"); ++ return -1; ++ } ++ } ++ ++ s = elf_getscn(elf->elf, sec->idx); ++ if (!s) { ++ WARN_ELF("elf_getscn"); ++ return -1; ++ } ++ ++ sec->data = elf_newdata(s); ++ if (!sec->data) { ++ WARN_ELF("elf_newdata"); ++ return -1; ++ } ++ ++ sec->data->d_size = size; ++ sec->data->d_align = 1; ++ ++ alt = sec->data->d_buf = malloc(size); ++ if (!sec->data->d_buf) { ++ perror("malloc"); ++ return -1; ++ } ++ memset(sec->data->d_buf, 0, size); ++ ++ if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size, ++ R_X86_64_PC32, orig->sec, orig->offset)) { ++ WARN("elf_create_reloc: alt_instr::instr_offset"); ++ return -1; ++ } ++ ++ if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4, ++ R_X86_64_PC32, sym, 0)) { ++ WARN("elf_create_reloc: alt_instr::repl_offset"); ++ return -1; ++ } ++ ++ alt->cpuid = cpuid; ++ alt->instrlen = orig_len; ++ alt->replacementlen = repl_len; ++ ++ sec->sh.sh_size += size; ++ sec->changed = true; ++ ++ return 0; ++} ++ ++#define X86_FEATURE_RETPOLINE ( 7*32+12) ++ ++int arch_rewrite_retpolines(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct reloc *reloc; ++ struct symbol *sym; ++ char name[32] = ""; ++ ++ list_for_each_entry(insn, &file->retpoline_call_list, call_node) { ++ ++ if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) ++ continue; ++ ++ reloc = insn->reloc; ++ ++ sprintf(name, "__x86_indirect_alt_%s_%s", ++ insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call", ++ reloc->sym->name + 21); ++ ++ sym = find_symbol_by_name(file->elf, name); ++ if (!sym) { ++ sym = elf_create_undef_symbol(file->elf, name); ++ if (!sym) { ++ WARN("elf_create_undef_symbol"); ++ return -1; ++ } ++ } ++ ++ if (elf_add_alternative(file->elf, insn, sym, ++ ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) { ++ WARN("elf_add_alternative"); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ + int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) + { + struct cfi_reg *cfa = &insn->cfi.cfa; diff --git a/queue-5.10/series b/queue-5.10/series new file mode 100644 index 00000000000..91daa3e414c --- /dev/null +++ b/queue-5.10/series @@ -0,0 +1,130 @@ +kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch +kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch +objtool-refactor-orc-section-generation.patch +objtool-add-alt_group-struct.patch +objtool-support-stack-layout-changes-in-alternatives.patch +objtool-support-retpoline-jump-detection-for-vmlinux.o.patch +objtool-assume-only-elf-functions-do-sibling-calls.patch +objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch +x86-xen-support-objtool-validation-in-xen-asm.s.patch +x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch +x86-alternative-merge-include-files.patch +x86-alternative-support-not-feature.patch +x86-alternative-support-alternative_ternary.patch +x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch +x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch +x86-insn-add-a-__ignore_sync_check__-marker.patch +x86-insn-add-an-insn_decode-api.patch +x86-insn-eval-handle-return-values-from-the-decoder.patch +x86-alternative-use-insn_decode.patch +x86-add-insn_decode_kernel.patch +x86-alternatives-optimize-optimize_nops.patch +x86-retpoline-simplify-retpolines.patch +objtool-correctly-handle-retpoline-thunk-calls.patch +objtool-handle-per-arch-retpoline-naming.patch +objtool-rework-the-elf_rebuild_reloc_section-logic.patch +objtool-add-elf_create_reloc-helper.patch +objtool-create-reloc-sections-implicitly.patch +objtool-extract-elf_strtab_concat.patch +objtool-extract-elf_symbol_add.patch +objtool-add-elf_create_undef_symbol.patch +objtool-keep-track-of-retpoline-call-sites.patch +objtool-cache-instruction-relocs.patch +objtool-skip-magical-retpoline-.altinstr_replacement.patch +objtool-x86-rewrite-retpoline-thunk-calls.patch +objtool-support-asm-jump-tables.patch +x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch +objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch +objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch +objtool-x86-ignore-__x86_indirect_alt_-symbols.patch +objtool-don-t-make-.altinstructions-writable.patch +objtool-teach-get_alt_entry-about-more-relocation-types.patch +objtool-print-out-the-symbol-type-when-complaining-about-it.patch +objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch +objtool-make-.altinstructions-section-entry-size-consistent.patch +objtool-introduce-cfi-hash.patch +objtool-handle-__sanitize_cov-tail-calls.patch +objtool-classify-symbols.patch +objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch +objtool-x86-replace-alternatives-with-.retpoline_sites.patch +x86-retpoline-remove-unused-replacement-symbols.patch +x86-asm-fix-register-order.patch +x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch +x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch +x86-retpoline-create-a-retpoline-thunk-array.patch +x86-alternative-implement-.retpoline_sites-support.patch +x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch +x86-alternative-try-inline-spectre_v2-retpoline-amd.patch +x86-alternative-add-debug-prints-to-apply_retpolines.patch +bpf-x86-simplify-computing-label-offsets.patch +bpf-x86-respect-x86_feature_retpoline.patch +x86-lib-atomic64_386_32-rename-things.patch +x86-prepare-asm-files-for-straight-line-speculation.patch +x86-prepare-inline-asm-for-straight-line-speculation.patch +x86-alternative-relax-text_poke_bp-constraint.patch +objtool-add-straight-line-speculation-validation.patch +x86-add-straight-line-speculation-mitigation.patch +tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch +kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch +objtool-default-ignore-int3-for-unreachable.patch +crypto-x86-poly1305-fixup-sls.patch +objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch +objtool-fix-code-relocs-vs-weak-symbols.patch +objtool-fix-type-of-reloc-addend.patch +objtool-fix-symbol-creation.patch +x86-entry-remove-skip_r11rcx.patch +objtool-fix-objtool-regression-on-x32-systems.patch +x86-realmode-build-with-d__disable_exports.patch +x86-kvm-vmx-make-noinstr-clean.patch +x86-cpufeatures-move-retpoline-flags-to-word-11.patch +x86-retpoline-cleanup-some-ifdefery.patch +x86-retpoline-swizzle-retpoline-thunk.patch +makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch +x86-retpoline-use-mfunction-return.patch +x86-undo-return-thunk-damage.patch +x86-objtool-create-.return_sites.patch +objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch +x86-static_call-use-alternative-ret-encoding.patch +x86-ftrace-use-alternative-ret-encoding.patch +x86-bpf-use-alternative-ret-encoding.patch +x86-kvm-fix-setcc-emulation-for-return-thunks.patch +x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch +x86-sev-avoid-using-__x86_return_thunk.patch +x86-use-return-thunk-in-asm-code.patch +objtool-treat-.text.__x86.-as-noinstr.patch +x86-add-magic-amd-return-thunk.patch +x86-bugs-report-amd-retbleed-vulnerability.patch +x86-bugs-add-amd-retbleed-boot-parameter.patch +x86-bugs-enable-stibp-for-jmp2ret.patch +x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch +x86-entry-add-kernel-ibrs-implementation.patch +x86-bugs-optimize-spec_ctrl-msr-writes.patch +x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch +x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch +x86-bugs-report-intel-retbleed-vulnerability.patch +intel_idle-disable-ibrs-during-long-idle.patch +objtool-update-retpoline-validation.patch +x86-xen-rename-sys-entry-points.patch +x86-bugs-add-retbleed-ibpb.patch +x86-bugs-do-ibpb-fallback-check-only-once.patch +objtool-add-entry-unret-validation.patch +x86-cpu-amd-add-spectral-chicken.patch +x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch +x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch +x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch +x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch +x86-speculation-remove-x86_spec_ctrl_mask.patch +objtool-re-add-unwind_hint_-save_restore.patch +kvm-vmx-flatten-__vmx_vcpu_run.patch +kvm-vmx-convert-launched-argument-to-flags.patch +kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch +kvm-vmx-fix-ibrs-handling-after-vmexit.patch +x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch +x86-common-stamp-out-the-stepping-madness.patch +x86-cpu-amd-enumerate-btc_no.patch +x86-retbleed-add-fine-grained-kconfig-knobs.patch +x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch +x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch +x86-kexec-disable-ret-on-kexec.patch +x86-speculation-disable-rrsba-behavior.patch +x86-static_call-serialize-__static_call_fixup-properly.patch diff --git a/queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch b/queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch new file mode 100644 index 00000000000..ed1d040f478 --- /dev/null +++ b/queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch @@ -0,0 +1,120 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Arnaldo Carvalho de Melo +Date: Sun, 9 May 2021 10:19:37 -0300 +Subject: tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' + +From: Arnaldo Carvalho de Melo + +commit 35cb8c713a496e8c114eed5e2a5a30b359876df2 upstream. + +To bring in the change made in this cset: + + f94909ceb1ed4bfd ("x86: Prepare asm files for straight-line-speculation") + +It silences these perf tools build warnings, no change in the tools: + + Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' + diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S + Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' + diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S + +The code generated was checked before and after using 'objdump -d /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o', +no changes. + +Cc: Borislav Petkov +Cc: Peter Zijlstra +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/arch/x86/lib/memcpy_64.S | 12 ++++++------ + tools/arch/x86/lib/memset_64.S | 6 +++--- + 2 files changed, 9 insertions(+), 9 deletions(-) + +--- a/tools/arch/x86/lib/memcpy_64.S ++++ b/tools/arch/x86/lib/memcpy_64.S +@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy) + rep movsq + movl %edx, %ecx + rep movsb +- ret ++ RET + SYM_FUNC_END(memcpy) + SYM_FUNC_END_ALIAS(__memcpy) + EXPORT_SYMBOL(memcpy) +@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) + movq %rdi, %rax + movq %rdx, %rcx + rep movsb +- ret ++ RET + SYM_FUNC_END(memcpy_erms) + + SYM_FUNC_START_LOCAL(memcpy_orig) +@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + movq %r9, 1*8(%rdi) + movq %r10, -2*8(%rdi, %rdx) + movq %r11, -1*8(%rdi, %rdx) +- retq ++ RET + .p2align 4 + .Lless_16bytes: + cmpl $8, %edx +@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + movq -1*8(%rsi, %rdx), %r9 + movq %r8, 0*8(%rdi) + movq %r9, -1*8(%rdi, %rdx) +- retq ++ RET + .p2align 4 + .Lless_8bytes: + cmpl $4, %edx +@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + movl -4(%rsi, %rdx), %r8d + movl %ecx, (%rdi) + movl %r8d, -4(%rdi, %rdx) +- retq ++ RET + .p2align 4 + .Lless_3bytes: + subl $1, %edx +@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + movb %cl, (%rdi) + + .Lend: +- retq ++ RET + SYM_FUNC_END(memcpy_orig) + + .popsection +--- a/tools/arch/x86/lib/memset_64.S ++++ b/tools/arch/x86/lib/memset_64.S +@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) + movl %edx,%ecx + rep stosb + movq %r9,%rax +- ret ++ RET + SYM_FUNC_END(__memset) + SYM_FUNC_END_ALIAS(memset) + EXPORT_SYMBOL(memset) +@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms) + movq %rdx,%rcx + rep stosb + movq %r9,%rax +- ret ++ RET + SYM_FUNC_END(memset_erms) + + SYM_FUNC_START_LOCAL(memset_orig) +@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig) + + .Lende: + movq %r10,%rax +- ret ++ RET + + .Lbad_alignment: + cmpq $7,%rdx diff --git a/queue-5.10/x86-add-insn_decode_kernel.patch b/queue-5.10/x86-add-insn_decode_kernel.patch new file mode 100644 index 00000000000..0c7b32216b4 --- /dev/null +++ b/queue-5.10/x86-add-insn_decode_kernel.patch @@ -0,0 +1,52 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Ben Hutchings +Date: Mon, 11 Jul 2022 00:43:31 +0200 +Subject: x86: Add insn_decode_kernel() + +From: Ben Hutchings + +This was done by commit 52fa82c21f64e900a72437269a5cc9e0034b424e +upstream, but this backport avoids changing all callers of the +old decoder API. + +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/insn.h | 2 ++ + arch/x86/kernel/alternative.c | 2 +- + tools/arch/x86/include/asm/insn.h | 2 ++ + 3 files changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/insn.h ++++ b/arch/x86/include/asm/insn.h +@@ -105,6 +105,8 @@ enum insn_mode { + + extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + ++#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) ++ + /* Attribute will be determined after getting ModRM (for opcode groups) */ + static inline void insn_get_attribute(struct insn *insn) + { +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -1290,7 +1290,7 @@ static void text_poke_loc_init(struct te + if (!emulate) + emulate = opcode; + +- ret = insn_decode(&insn, emulate, MAX_INSN_SIZE, INSN_MODE_KERN); ++ ret = insn_decode_kernel(&insn, emulate); + + BUG_ON(ret < 0); + BUG_ON(len != insn.length); +--- a/tools/arch/x86/include/asm/insn.h ++++ b/tools/arch/x86/include/asm/insn.h +@@ -105,6 +105,8 @@ enum insn_mode { + + extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + ++#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) ++ + /* Attribute will be determined after getting ModRM (for opcode groups) */ + static inline void insn_get_attribute(struct insn *insn) + { diff --git a/queue-5.10/x86-add-magic-amd-return-thunk.patch b/queue-5.10/x86-add-magic-amd-return-thunk.patch new file mode 100644 index 00000000000..fb329bdd869 --- /dev/null +++ b/queue-5.10/x86-add-magic-amd-return-thunk.patch @@ -0,0 +1,348 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:48 +0200 +Subject: x86: Add magic AMD return-thunk + +From: Peter Zijlstra + +commit a149180fbcf336e97ce4eb2cdc13672727feb94d upstream. + +Note: needs to be in a section distinct from Retpolines such that the +Retpoline RET substitution cannot possibly use immediate jumps. + +ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a +little tricky but works due to the fact that zen_untrain_ret() doesn't +have any stack ops and as such will emit a single ORC entry at the +start (+0x3f). + +Meanwhile, unwinding an IP, including the __x86_return_thunk() one +(+0x40) will search for the largest ORC entry smaller or equal to the +IP, these will find the one ORC entry (+0x3f) and all works. + + [ Alexandre: SVM part. ] + [ bp: Build fix, massages. ] + +Suggested-by: Andrew Cooper +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflicts at arch/x86/entry/entry_64_compat.S] +[cascardo: there is no ANNOTATE_NOENDBR] +[cascardo: objtool commit 34c861e806478ac2ea4032721defbf1d6967df08 missing] +[cascardo: conflict fixup] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: SEV-ES is not supported, so drop the change + in arch/x86/kvm/svm/vmenter.S] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 6 ++ + arch/x86/entry/entry_64_compat.S | 4 + + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 3 - + arch/x86/include/asm/nospec-branch.h | 17 ++++++++ + arch/x86/kernel/vmlinux.lds.S | 2 + arch/x86/kvm/svm/vmenter.S | 9 ++++ + arch/x86/lib/retpoline.S | 63 +++++++++++++++++++++++++++++-- + tools/objtool/check.c | 20 ++++++++- + 9 files changed, 117 insertions(+), 8 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -102,6 +102,7 @@ SYM_CODE_START(entry_SYSCALL_64) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) ++ UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER_DS /* pt_regs->ss */ +@@ -675,6 +676,7 @@ native_irq_return_ldt: + pushq %rdi /* Stash user RDI */ + swapgs /* to kernel GS */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ ++ UNTRAIN_RET + + movq PER_CPU_VAR(espfix_waddr), %rdi + movq %rax, (0*8)(%rdi) /* user RAX */ +@@ -910,6 +912,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * be retrieved from a kernel internal table. + */ + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ++ UNTRAIN_RET + + /* + * Handling GSBASE depends on the availability of FSGSBASE. +@@ -1022,6 +1025,7 @@ SYM_CODE_START_LOCAL(error_entry) + FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ UNTRAIN_RET + + .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ +@@ -1077,6 +1081,7 @@ SYM_CODE_START_LOCAL(error_entry) + SWAPGS + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ UNTRAIN_RET + + /* + * Pretend that the exception came from user mode: set up pt_regs +@@ -1171,6 +1176,7 @@ SYM_CODE_START(asm_exc_nmi) + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 ++ UNTRAIN_RET + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -71,6 +72,7 @@ SYM_CODE_START(entry_SYSENTER_compat) + pushq $__USER32_CS /* pt_regs->cs */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ + SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) ++ UNTRAIN_RET + + /* + * User tracing code (ptrace or signal handlers) might assume that +@@ -211,6 +213,7 @@ SYM_CODE_START(entry_SYSCALL_compat) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) ++ UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER32_DS /* pt_regs->ss */ +@@ -377,6 +380,7 @@ SYM_CODE_START(entry_INT80_compat) + pushq (%rdi) /* pt_regs->di */ + .Lint80_keep_stack: + ++ UNTRAIN_RET + pushq %rsi /* pt_regs->si */ + xorl %esi, %esi /* nospec si */ + pushq %rdx /* pt_regs->dx */ +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -297,6 +297,7 @@ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ ++#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -61,7 +61,8 @@ + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ +- (1 << (X86_FEATURE_RETHUNK & 31))) ++ (1 << (X86_FEATURE_RETHUNK & 31)) | \ ++ (1 << (X86_FEATURE_UNRET & 31))) + #endif + + /* Force disable because it's broken beyond repair */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -112,6 +112,22 @@ + #endif + .endm + ++/* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the ++ * return thunk isn't mapped into the userspace tables (then again, AMD ++ * typically has NO_MELTDOWN). ++ * ++ * Doesn't clobber any registers but does require a stable stack. ++ * ++ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point ++ * where we have a stack but before any RET instruction. ++ */ ++.macro UNTRAIN_RET ++#ifdef CONFIG_RETPOLINE ++ ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET ++#endif ++.endm ++ + #else /* __ASSEMBLY__ */ + + #define ANNOTATE_RETPOLINE_SAFE \ +@@ -121,6 +137,7 @@ + ".popsection\n\t" + + extern void __x86_return_thunk(void); ++extern void zen_untrain_ret(void); + + #ifdef CONFIG_RETPOLINE + +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -142,7 +142,7 @@ SECTIONS + + #ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; +- *(.text.__x86.indirect_thunk) ++ *(.text.__x86.*) + __indirect_thunk_end = .; + #endif + } :text =0xcccc +--- a/arch/x86/kvm/svm/vmenter.S ++++ b/arch/x86/kvm/svm/vmenter.S +@@ -129,6 +129,15 @@ SYM_FUNC_START(__svm_vcpu_run) + #endif + + /* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be ++ * untrained as soon as we exit the VM and are back to the ++ * kernel. This should be done before re-enabling interrupts ++ * because interrupt handlers won't sanitize 'ret' if the return is ++ * from the kernel. ++ */ ++ UNTRAIN_RET ++ ++ /* + * Clear all general purpose registers except RSP and RAX to prevent + * speculative use of the guest's values, even those that are reloaded + * via the stack. In theory, an L1 cache miss when restoring registers +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -71,10 +71,67 @@ SYM_CODE_END(__x86_indirect_thunk_array) + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +-SYM_CODE_START(__x86_return_thunk) +- UNWIND_HINT_EMPTY ++ .section .text.__x86.return_thunk ++ ++/* ++ * Safety details here pertain to the AMD Zen{1,2} microarchitecture: ++ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for ++ * alignment within the BTB. ++ * 2) The instruction at zen_untrain_ret must contain, and not ++ * end with, the 0xc3 byte of the RET. ++ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread ++ * from re-poisioning the BTB prediction. ++ */ ++ .align 64 ++ .skip 63, 0xcc ++SYM_FUNC_START_NOALIGN(zen_untrain_ret); ++ ++ /* ++ * As executed from zen_untrain_ret, this is: ++ * ++ * TEST $0xcc, %bl ++ * LFENCE ++ * JMP __x86_return_thunk ++ * ++ * Executing the TEST instruction has a side effect of evicting any BTB ++ * prediction (potentially attacker controlled) attached to the RET, as ++ * __x86_return_thunk + 1 isn't an instruction boundary at the moment. ++ */ ++ .byte 0xf6 ++ ++ /* ++ * As executed from __x86_return_thunk, this is a plain RET. ++ * ++ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. ++ * ++ * We subsequently jump backwards and architecturally execute the RET. ++ * This creates a correct BTB prediction (type=ret), but in the ++ * meantime we suffer Straight Line Speculation (because the type was ++ * no branch) which is halted by the INT3. ++ * ++ * With SMT enabled and STIBP active, a sibling thread cannot poison ++ * RET's prediction to a type of its choice, but can evict the ++ * prediction due to competitive sharing. If the prediction is ++ * evicted, __x86_return_thunk will suffer Straight Line Speculation ++ * which will be contained safely by the INT3. ++ */ ++SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) + ret + int3 + SYM_CODE_END(__x86_return_thunk) + +-__EXPORT_THUNK(__x86_return_thunk) ++ /* ++ * Ensure the TEST decoding / BTB invalidation is complete. ++ */ ++ lfence ++ ++ /* ++ * Jump back and execute the RET in the middle of the TEST instruction. ++ * INT3 is for SLS protection. ++ */ ++ jmp __x86_return_thunk ++ int3 ++SYM_FUNC_END(zen_untrain_ret) ++__EXPORT_THUNK(zen_untrain_ret) ++ ++EXPORT_SYMBOL(__x86_return_thunk) +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1082,7 +1082,7 @@ static void add_retpoline_call(struct ob + annotate_call_site(file, insn, false); + } + +-static void add_return_call(struct objtool_file *file, struct instruction *insn) ++static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) + { + /* + * Return thunk tail calls are really just returns in disguise, +@@ -1092,7 +1092,7 @@ static void add_return_call(struct objto + insn->retpoline_safe = true; + + /* Skip the non-text sections, specially .discard ones */ +- if (insn->sec->text) ++ if (add && insn->sec->text) + list_add_tail(&insn->call_node, &file->return_thunk_list); + } + +@@ -1121,7 +1121,7 @@ static int add_jump_destinations(struct + add_retpoline_call(file, insn); + continue; + } else if (reloc->sym->return_thunk) { +- add_return_call(file, insn); ++ add_return_call(file, insn, true); + continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ +@@ -1138,6 +1138,7 @@ static int add_jump_destinations(struct + + insn->jump_dest = find_insn(file, dest_sec, dest_off); + if (!insn->jump_dest) { ++ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); + + /* + * This is a special case where an alt instruction +@@ -1147,6 +1148,19 @@ static int add_jump_destinations(struct + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + continue; + ++ /* ++ * This is a special case for zen_untrain_ret(). ++ * It jumps to __x86_return_thunk(), but objtool ++ * can't find the thunk's starting RET ++ * instruction, because the RET is also in the ++ * middle of another instruction. Objtool only ++ * knows about the outer instruction. ++ */ ++ if (sym && sym->return_thunk) { ++ add_return_call(file, insn, false); ++ continue; ++ } ++ + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", + insn->sec, insn->offset, dest_sec->name, + dest_off); diff --git a/queue-5.10/x86-add-straight-line-speculation-mitigation.patch b/queue-5.10/x86-add-straight-line-speculation-mitigation.patch new file mode 100644 index 00000000000..8ba8355c3fb --- /dev/null +++ b/queue-5.10/x86-add-straight-line-speculation-mitigation.patch @@ -0,0 +1,200 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Sat, 4 Dec 2021 14:43:44 +0100 +Subject: x86: Add straight-line-speculation mitigation + +From: Peter Zijlstra + +commit e463a09af2f0677b9485a7e8e4e70b396b2ffb6f upstream. + +Make use of an upcoming GCC feature to mitigate +straight-line-speculation for x86: + + https://gcc.gnu.org/g:53a643f8568067d7700a9f2facc8ba39974973d3 + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102952 + https://bugs.llvm.org/show_bug.cgi?id=52323 + +It's built tested on x86_64-allyesconfig using GCC-12 and GCC-11. + +Maintenance overhead of this should be fairly low due to objtool +validation. + +Size overhead of all these additional int3 instructions comes to: + + text data bss dec hex filename + 22267751 6933356 2011368 31212475 1dc43bb defconfig-build/vmlinux + 22804126 6933356 1470696 31208178 1dc32f2 defconfig-build/vmlinux.sls + +Or roughly 2.4% additional text. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20211204134908.140103474@infradead.org +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +[bwh: Backported to 5.10: + - In scripts/Makefile.build, add the objtool option with an ifdef + block, same as for other options + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/Kconfig | 12 ++++++++++++ + arch/x86/Makefile | 6 +++++- + arch/x86/include/asm/linkage.h | 10 ++++++++++ + arch/x86/include/asm/static_call.h | 2 +- + arch/x86/kernel/ftrace.c | 2 +- + arch/x86/kernel/static_call.c | 5 +++-- + arch/x86/lib/memmove_64.S | 2 +- + arch/x86/lib/retpoline.S | 2 +- + scripts/Makefile.build | 3 +++ + scripts/link-vmlinux.sh | 3 +++ + 10 files changed, 40 insertions(+), 7 deletions(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -462,6 +462,18 @@ config RETPOLINE + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + ++config CC_HAS_SLS ++ def_bool $(cc-option,-mharden-sls=all) ++ ++config SLS ++ bool "Mitigate Straight-Line-Speculation" ++ depends on CC_HAS_SLS && X86_64 ++ default n ++ help ++ Compile the kernel with straight-line-speculation options to guard ++ against straight line speculation. The kernel image might be slightly ++ larger. ++ + config X86_CPU_RESCTRL + bool "x86 CPU resource control support" + depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -196,7 +196,11 @@ ifdef CONFIG_RETPOLINE + endif + endif + +-KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) ++ifdef CONFIG_SLS ++ KBUILD_CFLAGS += -mharden-sls=all ++endif ++ ++KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) + + ifdef CONFIG_X86_NEED_RELOCS + LDFLAGS_vmlinux := --emit-relocs --discard-none +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -18,9 +18,19 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + ++#ifdef CONFIG_SLS ++#define RET ret; int3 ++#else ++#define RET ret ++#endif ++ + #else /* __ASSEMBLY__ */ + ++#ifdef CONFIG_SLS ++#define ASM_RET "ret; int3\n\t" ++#else + #define ASM_RET "ret\n\t" ++#endif + + #endif /* __ASSEMBLY__ */ + +--- a/arch/x86/include/asm/static_call.h ++++ b/arch/x86/include/asm/static_call.h +@@ -35,7 +35,7 @@ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + + #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ +- __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop") ++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") + + + #define ARCH_ADD_TRAMP_KEY(name) \ +--- a/arch/x86/kernel/ftrace.c ++++ b/arch/x86/kernel/ftrace.c +@@ -308,7 +308,7 @@ union ftrace_op_code_union { + } __attribute__((packed)); + }; + +-#define RET_SIZE 1 ++#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) + + static unsigned long + create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -11,6 +11,8 @@ enum insn_type { + RET = 3, /* tramp / site cond-tail-call */ + }; + ++static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; ++ + static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) + { + int size = CALL_INSN_SIZE; +@@ -30,8 +32,7 @@ static void __ref __static_call_transfor + break; + + case RET: +- code = text_gen_insn(RET_INSN_OPCODE, insn, func); +- size = RET_INSN_SIZE; ++ code = &retinsn; + break; + } + +--- a/arch/x86/lib/memmove_64.S ++++ b/arch/x86/lib/memmove_64.S +@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove) + /* FSRM implies ERMS => no length checks, do the copy directly */ + .Lmemmove_begin_forward: + ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM +- ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; RET", X86_FEATURE_ERMS ++ ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS + + /* + * movsq instruction have many startup latency +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\re + + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ + __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ +- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE ++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE + + .endm + +--- a/scripts/Makefile.build ++++ b/scripts/Makefile.build +@@ -230,6 +230,9 @@ endif + ifdef CONFIG_X86_SMAP + objtool_args += --uaccess + endif ++ifdef CONFIG_SLS ++ objtool_args += --sls ++endif + + # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory + # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -77,6 +77,9 @@ objtool_link() + if [ -n "${CONFIG_X86_SMAP}" ]; then + objtoolopt="${objtoolopt} --uaccess" + fi ++ if [ -n "${CONFIG_SLS}" ]; then ++ objtoolopt="${objtoolopt} --sls" ++ fi + info OBJTOOL ${1} + tools/objtool/objtool ${objtoolopt} ${1} + fi diff --git a/queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch b/queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch new file mode 100644 index 00000000000..2c19ce26817 --- /dev/null +++ b/queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:45 +0200 +Subject: x86/alternative: Add debug prints to apply_retpolines() + +From: Peter Zijlstra + +commit d4b5a5c993009ffeb5febe3b701da3faab6adb96 upstream. + +Make sure we can see the text changes when booting with +'debug-alternative'. + +Example output: + + [ ] SMP alternatives: retpoline at: __traceiter_initcall_level+0x1f/0x30 (ffffffff8100066f) len: 5 to: __x86_indirect_thunk_rax+0x0/0x20 + [ ] SMP alternatives: ffffffff82603e58: [2:5) optimized NOPs: ff d0 0f 1f 00 + [ ] SMP alternatives: ffffffff8100066f: orig: e8 cc 30 00 01 + [ ] SMP alternatives: ffffffff8100066f: repl: ff d0 0f 1f 00 + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.422273830@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -647,9 +647,15 @@ void __init_or_module noinline apply_ret + continue; + } + ++ DPRINTK("retpoline at: %pS (%px) len: %d to: %pS", ++ addr, addr, insn.length, ++ addr + insn.length + insn.immediate.value); ++ + len = patch_retpoline(addr, &insn, bytes); + if (len == insn.length) { + optimize_nops(bytes, len); ++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); ++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } diff --git a/queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch b/queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch new file mode 100644 index 00000000000..b424a27e21c --- /dev/null +++ b/queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch @@ -0,0 +1,97 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:43 +0200 +Subject: x86/alternative: Handle Jcc __x86_indirect_thunk_\reg + +From: Peter Zijlstra + +commit 2f0cbb2a8e5bbf101e9de118fc0eb168111a5e1e upstream. + +Handle the rare cases where the compiler (clang) does an indirect +conditional tail-call using: + + Jcc __x86_indirect_thunk_\reg + +For the !RETPOLINE case this can be rewritten to fit the original (6 +byte) instruction like: + + Jncc.d8 1f + JMP *%\reg + NOP +1: + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.296470217@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 40 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 36 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -548,7 +548,8 @@ static int emit_indirect(int op, int reg + static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) + { + retpoline_thunk_t *target; +- int reg, i = 0; ++ int reg, ret, i = 0; ++ u8 op, cc; + + target = addr + insn->length + insn->immediate.value; + reg = target - __x86_indirect_thunk_array; +@@ -562,9 +563,36 @@ static int patch_retpoline(void *addr, s + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + return -1; + +- i = emit_indirect(insn->opcode.bytes[0], reg, bytes); +- if (i < 0) +- return i; ++ op = insn->opcode.bytes[0]; ++ ++ /* ++ * Convert: ++ * ++ * Jcc.d32 __x86_indirect_thunk_\reg ++ * ++ * into: ++ * ++ * Jncc.d8 1f ++ * JMP *%\reg ++ * NOP ++ * 1: ++ */ ++ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ ++ if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) { ++ cc = insn->opcode.bytes[1] & 0xf; ++ cc ^= 1; /* invert condition */ ++ ++ bytes[i++] = 0x70 + cc; /* Jcc.d8 */ ++ bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */ ++ ++ /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */ ++ op = JMP32_INSN_OPCODE; ++ } ++ ++ ret = emit_indirect(op, reg, bytes + i); ++ if (ret < 0) ++ return ret; ++ i += ret; + + for (; i < insn->length;) + bytes[i++] = 0x90; +@@ -598,6 +626,10 @@ void __init_or_module noinline apply_ret + case JMP32_INSN_OPCODE: + break; + ++ case 0x0f: /* escape */ ++ if (op2 >= 0x80 && op2 <= 0x8f) ++ break; ++ fallthrough; + default: + WARN_ON_ONCE(1); + continue; diff --git a/queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch b/queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch new file mode 100644 index 00000000000..de3d9097373 --- /dev/null +++ b/queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch @@ -0,0 +1,283 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:42 +0200 +Subject: x86/alternative: Implement .retpoline_sites support + +From: Peter Zijlstra + +commit 7508500900814d14e2e085cdc4e28142721abbdf upstream. + +Rewrite retpoline thunk call sites to be indirect calls for +spectre_v2=off. This ensures spectre_v2=off is as near to a +RETPOLINE=n build as possible. + +This is the replacement for objtool writing alternative entries to +ensure the same and achieves feature-parity with the previous +approach. + +One noteworthy feature is that it relies on the thunks to be in +machine order to compute the register index. + +Specifically, this does not yet address the Jcc __x86_indirect_thunk_* +calls generated by clang, a future patch will add this. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.232495794@infradead.org +[cascardo: small conflict fixup at arch/x86/kernel/module.c] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: + - Use hex literal instead of BYTES_NOP1 + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/um/kernel/um_arch.c | 4 + + arch/x86/include/asm/alternative.h | 1 + arch/x86/kernel/alternative.c | 141 +++++++++++++++++++++++++++++++++++-- + arch/x86/kernel/module.c | 9 ++ + 4 files changed, 150 insertions(+), 5 deletions(-) + +--- a/arch/um/kernel/um_arch.c ++++ b/arch/um/kernel/um_arch.c +@@ -358,6 +358,10 @@ void __init check_bugs(void) + os_check_bugs(); + } + ++void apply_retpolines(s32 *start, s32 *end) ++{ ++} ++ + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) + { + } +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -75,6 +75,7 @@ extern int alternatives_patched; + + extern void alternative_instructions(void); + extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); ++extern void apply_retpolines(s32 *start, s32 *end); + + struct module; + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + int __read_mostly alternatives_patched; + +@@ -268,6 +269,7 @@ static void __init_or_module add_nops(vo + } + } + ++extern s32 __retpoline_sites[], __retpoline_sites_end[]; + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + extern s32 __smp_locks[], __smp_locks_end[]; + void text_poke_early(void *addr, const void *opcode, size_t len); +@@ -376,7 +378,7 @@ static __always_inline int optimize_nops + * "noinline" to cause control flow change and thus invalidate I$ and + * cause refetch after modification. + */ +-static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) ++static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) + { + struct insn insn; + int i = 0; +@@ -394,11 +396,11 @@ static void __init_or_module noinline op + * optimized. + */ + if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) +- i += optimize_nops_range(instr, a->instrlen, i); ++ i += optimize_nops_range(instr, len, i); + else + i += insn.length; + +- if (i >= a->instrlen) ++ if (i >= len) + return; + } + } +@@ -486,10 +488,135 @@ void __init_or_module noinline apply_alt + text_poke_early(instr, insn_buff, insn_buff_sz); + + next: +- optimize_nops(a, instr); ++ optimize_nops(instr, a->instrlen); + } + } + ++#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION) ++ ++/* ++ * CALL/JMP *%\reg ++ */ ++static int emit_indirect(int op, int reg, u8 *bytes) ++{ ++ int i = 0; ++ u8 modrm; ++ ++ switch (op) { ++ case CALL_INSN_OPCODE: ++ modrm = 0x10; /* Reg = 2; CALL r/m */ ++ break; ++ ++ case JMP32_INSN_OPCODE: ++ modrm = 0x20; /* Reg = 4; JMP r/m */ ++ break; ++ ++ default: ++ WARN_ON_ONCE(1); ++ return -1; ++ } ++ ++ if (reg >= 8) { ++ bytes[i++] = 0x41; /* REX.B prefix */ ++ reg -= 8; ++ } ++ ++ modrm |= 0xc0; /* Mod = 3 */ ++ modrm += reg; ++ ++ bytes[i++] = 0xff; /* opcode */ ++ bytes[i++] = modrm; ++ ++ return i; ++} ++ ++/* ++ * Rewrite the compiler generated retpoline thunk calls. ++ * ++ * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate ++ * indirect instructions, avoiding the extra indirection. ++ * ++ * For example, convert: ++ * ++ * CALL __x86_indirect_thunk_\reg ++ * ++ * into: ++ * ++ * CALL *%\reg ++ * ++ */ ++static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) ++{ ++ retpoline_thunk_t *target; ++ int reg, i = 0; ++ ++ target = addr + insn->length + insn->immediate.value; ++ reg = target - __x86_indirect_thunk_array; ++ ++ if (WARN_ON_ONCE(reg & ~0xf)) ++ return -1; ++ ++ /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ ++ BUG_ON(reg == 4); ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) ++ return -1; ++ ++ i = emit_indirect(insn->opcode.bytes[0], reg, bytes); ++ if (i < 0) ++ return i; ++ ++ for (; i < insn->length;) ++ bytes[i++] = 0x90; ++ ++ return i; ++} ++ ++/* ++ * Generated by 'objtool --retpoline'. ++ */ ++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) ++{ ++ s32 *s; ++ ++ for (s = start; s < end; s++) { ++ void *addr = (void *)s + *s; ++ struct insn insn; ++ int len, ret; ++ u8 bytes[16]; ++ u8 op1, op2; ++ ++ ret = insn_decode_kernel(&insn, addr); ++ if (WARN_ON_ONCE(ret < 0)) ++ continue; ++ ++ op1 = insn.opcode.bytes[0]; ++ op2 = insn.opcode.bytes[1]; ++ ++ switch (op1) { ++ case CALL_INSN_OPCODE: ++ case JMP32_INSN_OPCODE: ++ break; ++ ++ default: ++ WARN_ON_ONCE(1); ++ continue; ++ } ++ ++ len = patch_retpoline(addr, &insn, bytes); ++ if (len == insn.length) { ++ optimize_nops(bytes, len); ++ text_poke_early(addr, bytes, len); ++ } ++ } ++} ++ ++#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ ++ ++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } ++ ++#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ ++ + #ifdef CONFIG_SMP + static void alternatives_smp_lock(const s32 *start, const s32 *end, + u8 *text, u8 *text_end) +@@ -774,6 +901,12 @@ void __init alternative_instructions(voi + * patching. + */ + ++ /* ++ * Rewrite the retpolines, must be done before alternatives since ++ * those can rewrite the retpoline thunks. ++ */ ++ apply_retpolines(__retpoline_sites, __retpoline_sites_end); ++ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + #ifdef CONFIG_SMP +--- a/arch/x86/kernel/module.c ++++ b/arch/x86/kernel/module.c +@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr, + struct module *me) + { + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, +- *para = NULL, *orc = NULL, *orc_ip = NULL; ++ *para = NULL, *orc = NULL, *orc_ip = NULL, ++ *retpolines = NULL; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { +@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr, + orc = s; + if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) + orc_ip = s; ++ if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) ++ retpolines = s; + } + ++ if (retpolines) { ++ void *rseg = (void *)retpolines->sh_addr; ++ apply_retpolines(rseg, rseg + retpolines->sh_size); ++ } + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; diff --git a/queue-5.10/x86-alternative-merge-include-files.patch b/queue-5.10/x86-alternative-merge-include-files.patch new file mode 100644 index 00000000000..8a848cd00ba --- /dev/null +++ b/queue-5.10/x86-alternative-merge-include-files.patch @@ -0,0 +1,433 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Juergen Gross +Date: Thu, 11 Mar 2021 15:23:06 +0100 +Subject: x86/alternative: Merge include files + +From: Juergen Gross + +commit 5e21a3ecad1500e35b46701e7f3f232e15d78e69 upstream. + +Merge arch/x86/include/asm/alternative-asm.h into +arch/x86/include/asm/alternative.h in order to make it easier to use +common definitions later. + +Signed-off-by: Juergen Gross +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20210311142319.4723-2-jgross@suse.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_32.S | 2 + arch/x86/entry/vdso/vdso32/system_call.S | 2 + arch/x86/include/asm/alternative-asm.h | 114 ------------------------------- + arch/x86/include/asm/alternative.h | 112 +++++++++++++++++++++++++++++- + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/include/asm/smap.h | 5 - + arch/x86/lib/atomic64_386_32.S | 2 + arch/x86/lib/atomic64_cx8_32.S | 2 + arch/x86/lib/copy_page_64.S | 2 + arch/x86/lib/copy_user_64.S | 2 + arch/x86/lib/memcpy_64.S | 2 + arch/x86/lib/memmove_64.S | 2 + arch/x86/lib/memset_64.S | 2 + arch/x86/lib/retpoline.S | 2 + 14 files changed, 120 insertions(+), 132 deletions(-) + delete mode 100644 arch/x86/include/asm/alternative-asm.h + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -40,7 +40,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +--- a/arch/x86/entry/vdso/vdso32/system_call.S ++++ b/arch/x86/entry/vdso/vdso32/system_call.S +@@ -6,7 +6,7 @@ + #include + #include + #include +-#include ++#include + + .text + .globl __kernel_vsyscall +--- a/arch/x86/include/asm/alternative-asm.h ++++ /dev/null +@@ -1,114 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-#ifndef _ASM_X86_ALTERNATIVE_ASM_H +-#define _ASM_X86_ALTERNATIVE_ASM_H +- +-#ifdef __ASSEMBLY__ +- +-#include +- +-#ifdef CONFIG_SMP +- .macro LOCK_PREFIX +-672: lock +- .pushsection .smp_locks,"a" +- .balign 4 +- .long 672b - . +- .popsection +- .endm +-#else +- .macro LOCK_PREFIX +- .endm +-#endif +- +-/* +- * objtool annotation to ignore the alternatives and only consider the original +- * instruction(s). +- */ +-.macro ANNOTATE_IGNORE_ALTERNATIVE +- .Lannotate_\@: +- .pushsection .discard.ignore_alts +- .long .Lannotate_\@ - . +- .popsection +-.endm +- +-/* +- * Issue one struct alt_instr descriptor entry (need to put it into +- * the section .altinstructions, see below). This entry contains +- * enough information for the alternatives patching code to patch an +- * instruction. See apply_alternatives(). +- */ +-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len +- .long \orig - . +- .long \alt - . +- .word \feature +- .byte \orig_len +- .byte \alt_len +- .byte \pad_len +-.endm +- +-/* +- * Define an alternative between two instructions. If @feature is +- * present, early code in apply_alternatives() replaces @oldinstr with +- * @newinstr. ".skip" directive takes care of proper instruction padding +- * in case @newinstr is longer than @oldinstr. +- */ +-.macro ALTERNATIVE oldinstr, newinstr, feature +-140: +- \oldinstr +-141: +- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 +-142: +- +- .pushsection .altinstructions,"a" +- altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b +- .popsection +- +- .pushsection .altinstr_replacement,"ax" +-143: +- \newinstr +-144: +- .popsection +-.endm +- +-#define old_len 141b-140b +-#define new_len1 144f-143f +-#define new_len2 145f-144f +- +-/* +- * gas compatible max based on the idea from: +- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax +- * +- * The additional "-" is needed because gas uses a "true" value of -1. +- */ +-#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) +- +- +-/* +- * Same as ALTERNATIVE macro above but for two alternatives. If CPU +- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has +- * @feature2, it replaces @oldinstr with @feature2. +- */ +-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +-140: +- \oldinstr +-141: +- .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ +- (alt_max_short(new_len1, new_len2) - (old_len)),0x90 +-142: +- +- .pushsection .altinstructions,"a" +- altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b +- altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b +- .popsection +- +- .pushsection .altinstr_replacement,"ax" +-143: +- \newinstr1 +-144: +- \newinstr2 +-145: +- .popsection +-.endm +- +-#endif /* __ASSEMBLY__ */ +- +-#endif /* _ASM_X86_ALTERNATIVE_ASM_H */ +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -2,13 +2,14 @@ + #ifndef _ASM_X86_ALTERNATIVE_H + #define _ASM_X86_ALTERNATIVE_H + +-#ifndef __ASSEMBLY__ +- + #include +-#include + #include + #include + ++#ifndef __ASSEMBLY__ ++ ++#include ++ + /* + * Alternative inline assembly for SMP. + * +@@ -271,6 +272,111 @@ static inline int alternatives_text_rese + */ + #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr + ++#else /* __ASSEMBLY__ */ ++ ++#ifdef CONFIG_SMP ++ .macro LOCK_PREFIX ++672: lock ++ .pushsection .smp_locks,"a" ++ .balign 4 ++ .long 672b - . ++ .popsection ++ .endm ++#else ++ .macro LOCK_PREFIX ++ .endm ++#endif ++ ++/* ++ * objtool annotation to ignore the alternatives and only consider the original ++ * instruction(s). ++ */ ++.macro ANNOTATE_IGNORE_ALTERNATIVE ++ .Lannotate_\@: ++ .pushsection .discard.ignore_alts ++ .long .Lannotate_\@ - . ++ .popsection ++.endm ++ ++/* ++ * Issue one struct alt_instr descriptor entry (need to put it into ++ * the section .altinstructions, see below). This entry contains ++ * enough information for the alternatives patching code to patch an ++ * instruction. See apply_alternatives(). ++ */ ++.macro altinstruction_entry orig alt feature orig_len alt_len pad_len ++ .long \orig - . ++ .long \alt - . ++ .word \feature ++ .byte \orig_len ++ .byte \alt_len ++ .byte \pad_len ++.endm ++ ++/* ++ * Define an alternative between two instructions. If @feature is ++ * present, early code in apply_alternatives() replaces @oldinstr with ++ * @newinstr. ".skip" directive takes care of proper instruction padding ++ * in case @newinstr is longer than @oldinstr. ++ */ ++.macro ALTERNATIVE oldinstr, newinstr, feature ++140: ++ \oldinstr ++141: ++ .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ++142: ++ ++ .pushsection .altinstructions,"a" ++ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b ++ .popsection ++ ++ .pushsection .altinstr_replacement,"ax" ++143: ++ \newinstr ++144: ++ .popsection ++.endm ++ ++#define old_len 141b-140b ++#define new_len1 144f-143f ++#define new_len2 145f-144f ++ ++/* ++ * gas compatible max based on the idea from: ++ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax ++ * ++ * The additional "-" is needed because gas uses a "true" value of -1. ++ */ ++#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) ++ ++ ++/* ++ * Same as ALTERNATIVE macro above but for two alternatives. If CPU ++ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has ++ * @feature2, it replaces @oldinstr with @feature2. ++ */ ++.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 ++140: ++ \oldinstr ++141: ++ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ ++ (alt_max_short(new_len1, new_len2) - (old_len)),0x90 ++142: ++ ++ .pushsection .altinstructions,"a" ++ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b ++ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b ++ .popsection ++ ++ .pushsection .altinstr_replacement,"ax" ++143: ++ \newinstr1 ++144: ++ \newinstr2 ++145: ++ .popsection ++.endm ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _ASM_X86_ALTERNATIVE_H */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -7,7 +7,6 @@ + #include + + #include +-#include + #include + #include + #include +--- a/arch/x86/include/asm/smap.h ++++ b/arch/x86/include/asm/smap.h +@@ -11,6 +11,7 @@ + + #include + #include ++#include + + /* "Raw" instruction opcodes */ + #define __ASM_CLAC ".byte 0x0f,0x01,0xca" +@@ -18,8 +19,6 @@ + + #ifdef __ASSEMBLY__ + +-#include +- + #ifdef CONFIG_X86_SMAP + + #define ASM_CLAC \ +@@ -37,8 +36,6 @@ + + #else /* __ASSEMBLY__ */ + +-#include +- + #ifdef CONFIG_X86_SMAP + + static __always_inline void clac(void) +--- a/arch/x86/lib/atomic64_386_32.S ++++ b/arch/x86/lib/atomic64_386_32.S +@@ -6,7 +6,7 @@ + */ + + #include +-#include ++#include + + /* if you want SMP support, implement these with real spinlocks */ + .macro LOCK reg +--- a/arch/x86/lib/atomic64_cx8_32.S ++++ b/arch/x86/lib/atomic64_cx8_32.S +@@ -6,7 +6,7 @@ + */ + + #include +-#include ++#include + + .macro read64 reg + movl %ebx, %eax +--- a/arch/x86/lib/copy_page_64.S ++++ b/arch/x86/lib/copy_page_64.S +@@ -3,7 +3,7 @@ + + #include + #include +-#include ++#include + #include + + /* +--- a/arch/x86/lib/copy_user_64.S ++++ b/arch/x86/lib/copy_user_64.S +@@ -11,7 +11,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +--- a/arch/x86/lib/memcpy_64.S ++++ b/arch/x86/lib/memcpy_64.S +@@ -4,7 +4,7 @@ + #include + #include + #include +-#include ++#include + #include + + .pushsection .noinstr.text, "ax" +--- a/arch/x86/lib/memmove_64.S ++++ b/arch/x86/lib/memmove_64.S +@@ -8,7 +8,7 @@ + */ + #include + #include +-#include ++#include + #include + + #undef memmove +--- a/arch/x86/lib/memset_64.S ++++ b/arch/x86/lib/memset_64.S +@@ -3,7 +3,7 @@ + + #include + #include +-#include ++#include + #include + + /* +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -4,7 +4,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include diff --git a/queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch b/queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch new file mode 100644 index 00000000000..73995ad115e --- /dev/null +++ b/queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch @@ -0,0 +1,134 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Borislav Petkov +Date: Tue, 1 Jun 2021 17:51:22 +0200 +Subject: x86/alternative: Optimize single-byte NOPs at an arbitrary position + +From: Borislav Petkov + +commit 2b31e8ed96b260ce2c22bd62ecbb9458399e3b62 upstream. + +Up until now the assumption was that an alternative patching site would +have some instructions at the beginning and trailing single-byte NOPs +(0x90) padding. Therefore, the patching machinery would go and optimize +those single-byte NOPs into longer ones. + +However, this assumption is broken on 32-bit when code like +hv_do_hypercall() in hyperv_init() would use the ratpoline speculation +killer CALL_NOSPEC. The 32-bit version of that macro would align certain +insns to 16 bytes, leading to the compiler issuing a one or more +single-byte NOPs, depending on the holes it needs to fill for alignment. + +That would lead to the warning in optimize_nops() to fire: + + ------------[ cut here ]------------ + Not a NOP at 0xc27fb598 + WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13 + +due to that function verifying whether all of the following bytes really +are single-byte NOPs. + +Therefore, carve out the NOP padding into a separate function and call +it for each NOP range beginning with a single-byte NOP. + +Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()") +Reported-by: Richard Narron +Signed-off-by: Borislav Petkov +Acked-by: Peter Zijlstra (Intel) +Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301 +Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 64 ++++++++++++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 18 deletions(-) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -338,41 +338,69 @@ done: + } + + /* ++ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) ++ * ++ * @instr: instruction byte stream ++ * @instrlen: length of the above ++ * @off: offset within @instr where the first NOP has been detected ++ * ++ * Return: number of NOPs found (and replaced). ++ */ ++static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) ++{ ++ unsigned long flags; ++ int i = off, nnops; ++ ++ while (i < instrlen) { ++ if (instr[i] != 0x90) ++ break; ++ ++ i++; ++ } ++ ++ nnops = i - off; ++ ++ if (nnops <= 1) ++ return nnops; ++ ++ local_irq_save(flags); ++ add_nops(instr + off, nnops); ++ local_irq_restore(flags); ++ ++ DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); ++ ++ return nnops; ++} ++ ++/* + * "noinline" to cause control flow change and thus invalidate I$ and + * cause refetch after modification. + */ + static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) + { +- unsigned long flags; + struct insn insn; +- int nop, i = 0; ++ int i = 0; + + /* +- * Jump over the non-NOP insns, the remaining bytes must be single-byte +- * NOPs, optimize them. ++ * Jump over the non-NOP insns and optimize single-byte NOPs into bigger ++ * ones. + */ + for (;;) { + if (insn_decode_kernel(&insn, &instr[i])) + return; + ++ /* ++ * See if this and any potentially following NOPs can be ++ * optimized. ++ */ + if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) +- break; ++ i += optimize_nops_range(instr, a->instrlen, i); ++ else ++ i += insn.length; + +- if ((i += insn.length) >= a->instrlen) ++ if (i >= a->instrlen) + return; + } +- +- for (nop = i; i < a->instrlen; i++) { +- if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i])) +- return; +- } +- +- local_irq_save(flags); +- add_nops(instr + nop, i - nop); +- local_irq_restore(flags); +- +- DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", +- instr, nop, a->instrlen); + } + + /* diff --git a/queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch b/queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch new file mode 100644 index 00000000000..e1b4915562b --- /dev/null +++ b/queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch @@ -0,0 +1,172 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Sat, 4 Dec 2021 14:43:43 +0100 +Subject: x86/alternative: Relax text_poke_bp() constraint + +From: Peter Zijlstra + +commit 26c44b776dba4ac692a0bf5a3836feb8a63fea6b upstream. + +Currently, text_poke_bp() is very strict to only allow patching a +single instruction; however with straight-line-speculation it will be +required to patch: ret; int3, which is two instructions. + +As such, relax the constraints a little to allow int3 padding for all +instructions that do not imply the execution of the next instruction, +ie: RET, JMP.d8 and JMP.d32. + +While there, rename the text_poke_loc::rel32 field to ::disp. + +Note: this fills up the text_poke_loc structure which is now a round + 16 bytes big. + + [ bp: Put comments ontop instead of on the side. ] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20211204134908.082342723@infradead.org +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 49 +++++++++++++++++++++++++++++------------- + 1 file changed, 34 insertions(+), 15 deletions(-) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -1243,10 +1243,13 @@ void text_poke_sync(void) + } + + struct text_poke_loc { +- s32 rel_addr; /* addr := _stext + rel_addr */ +- s32 rel32; ++ /* addr := _stext + rel_addr */ ++ s32 rel_addr; ++ s32 disp; ++ u8 len; + u8 opcode; + const u8 text[POKE_MAX_OPCODE_SIZE]; ++ /* see text_poke_bp_batch() */ + u8 old; + }; + +@@ -1261,7 +1264,8 @@ static struct bp_patching_desc *bp_desc; + static __always_inline + struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) + { +- struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */ ++ /* rcu_dereference */ ++ struct bp_patching_desc *desc = __READ_ONCE(*descp); + + if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) + return NULL; +@@ -1295,7 +1299,7 @@ noinstr int poke_int3_handler(struct pt_ + { + struct bp_patching_desc *desc; + struct text_poke_loc *tp; +- int len, ret = 0; ++ int ret = 0; + void *ip; + + if (user_mode(regs)) +@@ -1335,8 +1339,7 @@ noinstr int poke_int3_handler(struct pt_ + goto out_put; + } + +- len = text_opcode_size(tp->opcode); +- ip += len; ++ ip += tp->len; + + switch (tp->opcode) { + case INT3_INSN_OPCODE: +@@ -1351,12 +1354,12 @@ noinstr int poke_int3_handler(struct pt_ + break; + + case CALL_INSN_OPCODE: +- int3_emulate_call(regs, (long)ip + tp->rel32); ++ int3_emulate_call(regs, (long)ip + tp->disp); + break; + + case JMP32_INSN_OPCODE: + case JMP8_INSN_OPCODE: +- int3_emulate_jmp(regs, (long)ip + tp->rel32); ++ int3_emulate_jmp(regs, (long)ip + tp->disp); + break; + + default: +@@ -1431,7 +1434,7 @@ static void text_poke_bp_batch(struct te + */ + for (do_sync = 0, i = 0; i < nr_entries; i++) { + u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, }; +- int len = text_opcode_size(tp[i].opcode); ++ int len = tp[i].len; + + if (len - INT3_INSN_SIZE > 0) { + memcpy(old + INT3_INSN_SIZE, +@@ -1508,21 +1511,37 @@ static void text_poke_loc_init(struct te + const void *opcode, size_t len, const void *emulate) + { + struct insn insn; +- int ret; ++ int ret, i; + + memcpy((void *)tp->text, opcode, len); + if (!emulate) + emulate = opcode; + + ret = insn_decode_kernel(&insn, emulate); +- + BUG_ON(ret < 0); +- BUG_ON(len != insn.length); + + tp->rel_addr = addr - (void *)_stext; ++ tp->len = len; + tp->opcode = insn.opcode.bytes[0]; + + switch (tp->opcode) { ++ case RET_INSN_OPCODE: ++ case JMP32_INSN_OPCODE: ++ case JMP8_INSN_OPCODE: ++ /* ++ * Control flow instructions without implied execution of the ++ * next instruction can be padded with INT3. ++ */ ++ for (i = insn.length; i < len; i++) ++ BUG_ON(tp->text[i] != INT3_INSN_OPCODE); ++ break; ++ ++ default: ++ BUG_ON(len != insn.length); ++ }; ++ ++ ++ switch (tp->opcode) { + case INT3_INSN_OPCODE: + case RET_INSN_OPCODE: + break; +@@ -1530,7 +1549,7 @@ static void text_poke_loc_init(struct te + case CALL_INSN_OPCODE: + case JMP32_INSN_OPCODE: + case JMP8_INSN_OPCODE: +- tp->rel32 = insn.immediate.value; ++ tp->disp = insn.immediate.value; + break; + + default: /* assume NOP */ +@@ -1538,13 +1557,13 @@ static void text_poke_loc_init(struct te + case 2: /* NOP2 -- emulate as JMP8+0 */ + BUG_ON(memcmp(emulate, ideal_nops[len], len)); + tp->opcode = JMP8_INSN_OPCODE; +- tp->rel32 = 0; ++ tp->disp = 0; + break; + + case 5: /* NOP5 -- emulate as JMP32+0 */ + BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len)); + tp->opcode = JMP32_INSN_OPCODE; +- tp->rel32 = 0; ++ tp->disp = 0; + break; + + default: /* unknown instruction */ diff --git a/queue-5.10/x86-alternative-support-alternative_ternary.patch b/queue-5.10/x86-alternative-support-alternative_ternary.patch new file mode 100644 index 00000000000..6e17377f240 --- /dev/null +++ b/queue-5.10/x86-alternative-support-alternative_ternary.patch @@ -0,0 +1,67 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Juergen Gross +Date: Thu, 11 Mar 2021 15:23:11 +0100 +Subject: x86/alternative: Support ALTERNATIVE_TERNARY + +From: Juergen Gross + +commit e208b3c4a9748b2c17aa09ba663b5096ccf82dce upstream. + +Add ALTERNATIVE_TERNARY support for replacing an initial instruction +with either of two instructions depending on a feature: + + ALTERNATIVE_TERNARY "default_instr", FEATURE_NR, + "feature_on_instr", "feature_off_instr" + +which will start with "default_instr" and at patch time will, +depending on FEATURE_NR being set or not, patch that with either +"feature_on_instr" or "feature_off_instr". + + [ bp: Add comment ontop. ] + +Signed-off-by: Juergen Gross +Signed-off-by: Borislav Petkov +Acked-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20210311142319.4723-7-jgross@suse.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/alternative.h | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -179,6 +179,11 @@ static inline int alternatives_text_rese + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".popsection\n" + ++/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ ++#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ ++ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ ++ newinstr_yes, feature) ++ + #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \ + OLDINSTR_3(oldinsn, 1, 2, 3) \ + ".pushsection .altinstructions,\"a\"\n" \ +@@ -210,6 +215,9 @@ static inline int alternatives_text_rese + #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") + ++#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \ ++ asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory") ++ + /* + * Alternative inline assembly with input. + * +@@ -380,6 +388,11 @@ static inline int alternatives_text_rese + .popsection + .endm + ++/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ ++#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ ++ ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ ++ newinstr_yes, feature ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/queue-5.10/x86-alternative-support-not-feature.patch b/queue-5.10/x86-alternative-support-not-feature.patch new file mode 100644 index 00000000000..1258bd6bc9e --- /dev/null +++ b/queue-5.10/x86-alternative-support-not-feature.patch @@ -0,0 +1,91 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Juergen Gross +Date: Thu, 11 Mar 2021 15:23:10 +0100 +Subject: x86/alternative: Support not-feature + +From: Juergen Gross + +commit dda7bb76484978316bb412a353789ebc5901de36 upstream. + +Add support for alternative patching for the case a feature is not +present on the current CPU. For users of ALTERNATIVE() and friends, an +inverted feature is specified by applying the ALT_NOT() macro to it, +e.g.: + + ALTERNATIVE(old, new, ALT_NOT(feature)); + +Committer note: + +The decision to encode the NOT-bit in the feature bit itself is because +a future change which would make objtool generate such alternative +calls, would keep the code in objtool itself fairly simple. + +Also, this allows for the alternative macros to support the NOT feature +without having to change them. + +Finally, the u16 cpuid member encoding the X86_FEATURE_ flags is not an +ABI so if more bits are needed, cpuid itself can be enlarged or a flags +field can be added to struct alt_instr after having considered the size +growth in either cases. + +Signed-off-by: Juergen Gross +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20210311142319.4723-6-jgross@suse.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/alternative.h | 3 +++ + arch/x86/kernel/alternative.c | 20 +++++++++++++++----- + 2 files changed, 18 insertions(+), 5 deletions(-) + +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -6,6 +6,9 @@ + #include + #include + ++#define ALTINSTR_FLAG_INV (1 << 15) ++#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) ++ + #ifndef __ASSEMBLY__ + + #include +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -388,21 +388,31 @@ void __init_or_module noinline apply_alt + */ + for (a = start; a < end; a++) { + int insn_buff_sz = 0; ++ /* Mask away "NOT" flag bit for feature to test. */ ++ u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV; + + instr = (u8 *)&a->instr_offset + a->instr_offset; + replacement = (u8 *)&a->repl_offset + a->repl_offset; + BUG_ON(a->instrlen > sizeof(insn_buff)); +- BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); +- if (!boot_cpu_has(a->cpuid)) { ++ BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32); ++ ++ /* ++ * Patch if either: ++ * - feature is present ++ * - feature not present but ALTINSTR_FLAG_INV is set to mean, ++ * patch if feature is *NOT* present. ++ */ ++ if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) { + if (a->padlen > 1) + optimize_nops(a, instr); + + continue; + } + +- DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", +- a->cpuid >> 5, +- a->cpuid & 0x1f, ++ DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", ++ (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "", ++ feature >> 5, ++ feature & 0x1f, + instr, instr, a->instrlen, + replacement, a->replacementlen, a->padlen); + diff --git a/queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch b/queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch new file mode 100644 index 00000000000..80fca8577cc --- /dev/null +++ b/queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch @@ -0,0 +1,97 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:44 +0200 +Subject: x86/alternative: Try inline spectre_v2=retpoline,amd + +From: Peter Zijlstra + +commit bbe2df3f6b6da7848398d55b1311d58a16ec21e4 upstream. + +Try and replace retpoline thunk calls with: + + LFENCE + CALL *%\reg + +for spectre_v2=retpoline,amd. + +Specifically, the sequence above is 5 bytes for the low 8 registers, +but 6 bytes for the high 8 registers. This means that unless the +compilers prefix stuff the call with higher registers this replacement +will fail. + +Luckily GCC strongly favours RAX for the indirect calls and most (95%+ +for defconfig-x86_64) will be converted. OTOH clang strongly favours +R11 and almost nothing gets converted. + +Note: it will also generate a correct replacement for the Jcc.d32 +case, except unless the compilers start to prefix stuff that, it'll +never fit. Specifically: + + Jncc.d8 1f + LFENCE + JMP *%\reg +1: + +is 7-8 bytes long, where the original instruction in unpadded form is +only 6 bytes. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.359986601@infradead.org +[cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -544,6 +544,7 @@ static int emit_indirect(int op, int reg + * + * CALL *%\reg + * ++ * It also tries to inline spectre_v2=retpoline,amd when size permits. + */ + static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) + { +@@ -560,7 +561,8 @@ static int patch_retpoline(void *addr, s + /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ + BUG_ON(reg == 4); + +- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && ++ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) + return -1; + + op = insn->opcode.bytes[0]; +@@ -573,8 +575,9 @@ static int patch_retpoline(void *addr, s + * into: + * + * Jncc.d8 1f ++ * [ LFENCE ] + * JMP *%\reg +- * NOP ++ * [ NOP ] + * 1: + */ + /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ +@@ -589,6 +592,15 @@ static int patch_retpoline(void *addr, s + op = JMP32_INSN_OPCODE; + } + ++ /* ++ * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE. ++ */ ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { ++ bytes[i++] = 0x0f; ++ bytes[i++] = 0xae; ++ bytes[i++] = 0xe8; /* LFENCE */ ++ } ++ + ret = emit_indirect(op, reg, bytes + i); + if (ret < 0) + return ret; diff --git a/queue-5.10/x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch b/queue-5.10/x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch new file mode 100644 index 00000000000..4464ee31d39 --- /dev/null +++ b/queue-5.10/x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch @@ -0,0 +1,79 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Juergen Gross +Date: Thu, 11 Mar 2021 15:23:12 +0100 +Subject: x86/alternative: Use ALTERNATIVE_TERNARY() in _static_cpu_has() + +From: Juergen Gross + +commit 2fe2a2c7a97c9bc32acc79154b75e754280f7867 upstream. + +_static_cpu_has() contains a completely open coded version of +ALTERNATIVE_TERNARY(). Replace that with the macro instead. + +Signed-off-by: Juergen Gross +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20210311142319.4723-8-jgross@suse.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeature.h | 41 ++++++++------------------------------ + 1 file changed, 9 insertions(+), 32 deletions(-) + +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -8,6 +8,7 @@ + + #include + #include ++#include + + enum cpuid_leafs + { +@@ -172,39 +173,15 @@ extern void clear_cpu_cap(struct cpuinfo + */ + static __always_inline bool _static_cpu_has(u16 bit) + { +- asm_volatile_goto("1: jmp 6f\n" +- "2:\n" +- ".skip -(((5f-4f) - (2b-1b)) > 0) * " +- "((5f-4f) - (2b-1b)),0x90\n" +- "3:\n" +- ".section .altinstructions,\"a\"\n" +- " .long 1b - .\n" /* src offset */ +- " .long 4f - .\n" /* repl offset */ +- " .word %P[always]\n" /* always replace */ +- " .byte 3b - 1b\n" /* src len */ +- " .byte 5f - 4f\n" /* repl len */ +- " .byte 3b - 2b\n" /* pad len */ +- ".previous\n" +- ".section .altinstr_replacement,\"ax\"\n" +- "4: jmp %l[t_no]\n" +- "5:\n" +- ".previous\n" +- ".section .altinstructions,\"a\"\n" +- " .long 1b - .\n" /* src offset */ +- " .long 0\n" /* no replacement */ +- " .word %P[feature]\n" /* feature bit */ +- " .byte 3b - 1b\n" /* src len */ +- " .byte 0\n" /* repl len */ +- " .byte 0\n" /* pad len */ +- ".previous\n" +- ".section .altinstr_aux,\"ax\"\n" +- "6:\n" +- " testb %[bitnum],%[cap_byte]\n" +- " jnz %l[t_yes]\n" +- " jmp %l[t_no]\n" +- ".previous\n" ++ asm_volatile_goto( ++ ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ++ ".section .altinstr_aux,\"ax\"\n" ++ "6:\n" ++ " testb %[bitnum],%[cap_byte]\n" ++ " jnz %l[t_yes]\n" ++ " jmp %l[t_no]\n" ++ ".previous\n" + : : [feature] "i" (bit), +- [always] "i" (X86_FEATURE_ALWAYS), + [bitnum] "i" (1 << (bit & 7)), + [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + : : t_yes, t_no); diff --git a/queue-5.10/x86-alternative-use-insn_decode.patch b/queue-5.10/x86-alternative-use-insn_decode.patch new file mode 100644 index 00000000000..db84d540054 --- /dev/null +++ b/queue-5.10/x86-alternative-use-insn_decode.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Borislav Petkov +Date: Fri, 6 Nov 2020 19:37:25 +0100 +Subject: x86/alternative: Use insn_decode() + +From: Borislav Petkov + +commit 63c66cde7bbcc79aac14b25861c5b2495eede57b upstream. + +No functional changes, just simplification. + +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20210304174237.31945-10-bp@alien8.de +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/alternative.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -1284,15 +1284,15 @@ static void text_poke_loc_init(struct te + const void *opcode, size_t len, const void *emulate) + { + struct insn insn; ++ int ret; + + memcpy((void *)tp->text, opcode, len); + if (!emulate) + emulate = opcode; + +- kernel_insn_init(&insn, emulate, MAX_INSN_SIZE); +- insn_get_length(&insn); ++ ret = insn_decode(&insn, emulate, MAX_INSN_SIZE, INSN_MODE_KERN); + +- BUG_ON(!insn_complete(&insn)); ++ BUG_ON(ret < 0); + BUG_ON(len != insn.length); + + tp->rel_addr = addr - (void *)_stext; diff --git a/queue-5.10/x86-alternatives-optimize-optimize_nops.patch b/queue-5.10/x86-alternatives-optimize-optimize_nops.patch new file mode 100644 index 00000000000..4115ae45a68 --- /dev/null +++ b/queue-5.10/x86-alternatives-optimize-optimize_nops.patch @@ -0,0 +1,216 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:01 +0100 +Subject: x86/alternatives: Optimize optimize_nops() + +From: Peter Zijlstra + +commit 23c1ad538f4f371bdb67d8a112314842d5db7e5a upstream. + +Currently, optimize_nops() scans to see if the alternative starts with +NOPs. However, the emit pattern is: + + 141: \oldinstr + 142: .skip (len-(142b-141b)), 0x90 + +That is, when 'oldinstr' is short, the tail is padded with NOPs. This case +never gets optimized. + +Rewrite optimize_nops() to replace any trailing string of NOPs inside +the alternative to larger NOPs. Also run it irrespective of patching, +replacing NOPs in both the original and replaced code. + +A direct consequence is that 'padlen' becomes superfluous, so remove it. + + [ bp: + - Adjust commit message + - remove a stale comment about needing to pad + - add a comment in optimize_nops() + - exit early if the NOP verif. loop catches a mismatch - function + should not not add NOPs in that case + - fix the "optimized NOPs" offsets output ] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Link: https://lkml.kernel.org/r/20210326151259.442992235@infradead.org +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/alternative.h | 17 ++------- + arch/x86/kernel/alternative.c | 49 ++++++++++++++++---------- + tools/objtool/arch/x86/include/arch_special.h | 2 - + 3 files changed, 37 insertions(+), 31 deletions(-) + +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -65,7 +65,6 @@ struct alt_instr { + u16 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +- u8 padlen; /* length of build-time padding */ + } __packed; + + /* +@@ -104,7 +103,6 @@ static inline int alternatives_text_rese + + #define alt_end_marker "663" + #define alt_slen "662b-661b" +-#define alt_pad_len alt_end_marker"b-662b" + #define alt_total_slen alt_end_marker"b-661b" + #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" + +@@ -151,8 +149,7 @@ static inline int alternatives_text_rese + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + " .word " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_total_slen "\n" /* source len */ \ +- " .byte " alt_rlen(num) "\n" /* replacement len */ \ +- " .byte " alt_pad_len "\n" /* pad len */ ++ " .byte " alt_rlen(num) "\n" /* replacement len */ + + #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + "# ALT: replacement " #num "\n" \ +@@ -224,9 +221,6 @@ static inline int alternatives_text_rese + * Peculiarities: + * No memory clobber here. + * Argument numbers start with 1. +- * Best is to use constraints that are fixed size (like (%1) ... "r") +- * If you use variable sized constraints like "m" or "g" in the +- * replacement make sure to pad to the worst case length. + * Leaving an unused argument 0 to keep API compatibility. + */ + #define alternative_input(oldinstr, newinstr, feature, input...) \ +@@ -315,13 +309,12 @@ static inline int alternatives_text_rese + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len ++.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .word \feature + .byte \orig_len + .byte \alt_len +- .byte \pad_len + .endm + + /* +@@ -338,7 +331,7 @@ static inline int alternatives_text_rese + 142: + + .pushsection .altinstructions,"a" +- altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b ++ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f + .popsection + + .pushsection .altinstr_replacement,"ax" +@@ -375,8 +368,8 @@ static inline int alternatives_text_rese + 142: + + .pushsection .altinstructions,"a" +- altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b +- altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b ++ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f ++ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f + .popsection + + .pushsection .altinstr_replacement,"ax" +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -344,19 +344,35 @@ done: + static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) + { + unsigned long flags; +- int i; ++ struct insn insn; ++ int nop, i = 0; + +- for (i = 0; i < a->padlen; i++) { +- if (instr[i] != 0x90) ++ /* ++ * Jump over the non-NOP insns, the remaining bytes must be single-byte ++ * NOPs, optimize them. ++ */ ++ for (;;) { ++ if (insn_decode_kernel(&insn, &instr[i])) ++ return; ++ ++ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) ++ break; ++ ++ if ((i += insn.length) >= a->instrlen) ++ return; ++ } ++ ++ for (nop = i; i < a->instrlen; i++) { ++ if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i])) + return; + } + + local_irq_save(flags); +- add_nops(instr + (a->instrlen - a->padlen), a->padlen); ++ add_nops(instr + nop, i - nop); + local_irq_restore(flags); + + DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", +- instr, a->instrlen - a->padlen, a->padlen); ++ instr, nop, a->instrlen); + } + + /* +@@ -402,19 +418,15 @@ void __init_or_module noinline apply_alt + * - feature not present but ALTINSTR_FLAG_INV is set to mean, + * patch if feature is *NOT* present. + */ +- if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) { +- if (a->padlen > 1) +- optimize_nops(a, instr); +- +- continue; +- } ++ if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) ++ goto next; + +- DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", ++ DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", + (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "", + feature >> 5, + feature & 0x1f, + instr, instr, a->instrlen, +- replacement, a->replacementlen, a->padlen); ++ replacement, a->replacementlen); + + DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); +@@ -438,14 +450,15 @@ void __init_or_module noinline apply_alt + if (a->replacementlen && is_jmp(replacement[0])) + recompute_jump(a, instr, replacement, insn_buff); + +- if (a->instrlen > a->replacementlen) { +- add_nops(insn_buff + a->replacementlen, +- a->instrlen - a->replacementlen); +- insn_buff_sz += a->instrlen - a->replacementlen; +- } ++ for (; insn_buff_sz < a->instrlen; insn_buff_sz++) ++ insn_buff[insn_buff_sz] = 0x90; ++ + DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); + + text_poke_early(instr, insn_buff, insn_buff_sz); ++ ++next: ++ optimize_nops(a, instr); + } + } + +--- a/tools/objtool/arch/x86/include/arch_special.h ++++ b/tools/objtool/arch/x86/include/arch_special.h +@@ -10,7 +10,7 @@ + #define JUMP_ORIG_OFFSET 0 + #define JUMP_NEW_OFFSET 4 + +-#define ALT_ENTRY_SIZE 13 ++#define ALT_ENTRY_SIZE 12 + #define ALT_ORIG_OFFSET 0 + #define ALT_NEW_OFFSET 4 + #define ALT_FEATURE_OFFSET 8 diff --git a/queue-5.10/x86-asm-fix-register-order.patch b/queue-5.10/x86-asm-fix-register-order.patch new file mode 100644 index 00000000000..cbc6c1d6c78 --- /dev/null +++ b/queue-5.10/x86-asm-fix-register-order.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:38 +0200 +Subject: x86/asm: Fix register order + +From: Peter Zijlstra + +commit a92ede2d584a2e070def59c7e47e6b6f6341c55c upstream. + +Ensure the register order is correct; this allows for easy translation +between register number and trampoline and vice-versa. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.978573921@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/GEN-for-each-reg.h | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/GEN-for-each-reg.h ++++ b/arch/x86/include/asm/GEN-for-each-reg.h +@@ -1,11 +1,16 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * These are in machine order; things rely on that. ++ */ + #ifdef CONFIG_64BIT + GEN(rax) +-GEN(rbx) + GEN(rcx) + GEN(rdx) ++GEN(rbx) ++GEN(rsp) ++GEN(rbp) + GEN(rsi) + GEN(rdi) +-GEN(rbp) + GEN(r8) + GEN(r9) + GEN(r10) +@@ -16,10 +21,11 @@ GEN(r14) + GEN(r15) + #else + GEN(eax) +-GEN(ebx) + GEN(ecx) + GEN(edx) ++GEN(ebx) ++GEN(esp) ++GEN(ebp) + GEN(esi) + GEN(edi) +-GEN(ebp) + #endif diff --git a/queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch b/queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch new file mode 100644 index 00000000000..3f09f942560 --- /dev/null +++ b/queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch @@ -0,0 +1,53 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:39 +0200 +Subject: x86/asm: Fixup odd GEN-for-each-reg.h usage + +From: Peter Zijlstra + +commit b6d3d9944bd7c9e8c06994ead3c9952f673f2a66 upstream. + +Currently GEN-for-each-reg.h usage leaves GEN defined, relying on any +subsequent usage to start with #undef, which is rude. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.041792350@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/asm-prototypes.h | 2 +- + arch/x86/lib/retpoline.S | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -19,9 +19,9 @@ extern void cmpxchg8b_emu(void); + + #ifdef CONFIG_RETPOLINE + +-#undef GEN + #define GEN(reg) \ + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + #include ++#undef GEN + + #endif /* CONFIG_RETPOLINE */ +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -55,10 +55,10 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) + #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) + #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) + +-#undef GEN + #define GEN(reg) THUNK reg + #include +- + #undef GEN ++ + #define GEN(reg) EXPORT_THUNK(reg) + #include ++#undef GEN diff --git a/queue-5.10/x86-bpf-use-alternative-ret-encoding.patch b/queue-5.10/x86-bpf-use-alternative-ret-encoding.patch new file mode 100644 index 00000000000..fc47d497097 --- /dev/null +++ b/queue-5.10/x86-bpf-use-alternative-ret-encoding.patch @@ -0,0 +1,66 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:41 +0200 +Subject: x86/bpf: Use alternative RET encoding + +From: Peter Zijlstra + +commit d77cfe594ad50e0bf95d457e02ccd578791b2a15 upstream. + +Use the return thunk in eBPF generated code, if needed. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: add the necessary cnt variable to emit_return()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/net/bpf_jit_comp.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -399,6 +399,22 @@ static void emit_indirect_jump(u8 **ppro + *pprog = prog; + } + ++static void emit_return(u8 **pprog, u8 *ip) ++{ ++ u8 *prog = *pprog; ++ int cnt = 0; ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { ++ emit_jump(&prog, &__x86_return_thunk, ip); ++ } else { ++ EMIT1(0xC3); /* ret */ ++ if (IS_ENABLED(CONFIG_SLS)) ++ EMIT1(0xCC); /* int3 */ ++ } ++ ++ *pprog = prog; ++} ++ + /* + * Generate the following code: + * +@@ -1443,7 +1459,7 @@ emit_jmp: + ctx->cleanup_addr = proglen; + pop_callee_regs(&prog, callee_regs_used); + EMIT1(0xC9); /* leave */ +- EMIT1(0xC3); /* ret */ ++ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); + break; + + default: +@@ -1884,7 +1900,7 @@ int arch_prepare_bpf_trampoline(struct b + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip our return address and return to parent */ + EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ +- EMIT1(0xC3); /* ret */ ++ emit_return(&prog, prog); + /* Make sure the trampoline generation logic doesn't overflow */ + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; diff --git a/queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch b/queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch new file mode 100644 index 00000000000..dec71ddd487 --- /dev/null +++ b/queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch @@ -0,0 +1,208 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Alexandre Chartre +Date: Tue, 14 Jun 2022 23:15:50 +0200 +Subject: x86/bugs: Add AMD retbleed= boot parameter + +From: Alexandre Chartre + +commit 7fbf47c7ce50b38a64576b150e7011ae73d54669 upstream. + +Add the "retbleed=" boot parameter to select a mitigation for +RETBleed. Possible values are "off", "auto" and "unret" +(JMP2RET mitigation). The default value is "auto". + +Currently, "retbleed=auto" will select the unret mitigation on +AMD and Hygon and no mitigation on Intel (JMP2RET is not effective on +Intel). + + [peterz: rebase; add hygon] + [jpoimboe: cleanups] + +Signed-off-by: Alexandre Chartre +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 15 +++ + arch/x86/Kconfig | 3 + arch/x86/kernel/cpu/bugs.c | 108 +++++++++++++++++++++++- + 3 files changed, 125 insertions(+), 1 deletion(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4656,6 +4656,21 @@ + + retain_initrd [RAM] Keep initrd memory after extraction + ++ retbleed= [X86] Control mitigation of RETBleed (Arbitrary ++ Speculative Code Execution with Return Instructions) ++ vulnerability. ++ ++ off - unconditionally disable ++ auto - automatically select a migitation ++ unret - force enable untrained return thunks, ++ only effective on AMD Zen {1,2} ++ based systems. ++ ++ Selecting 'auto' will choose a mitigation method at run ++ time according to the CPU. ++ ++ Not specifying this option is equivalent to retbleed=auto. ++ + rfkill.default_state= + 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, + etc. communication is blocked by default. +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -465,6 +465,9 @@ config RETPOLINE + config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + ++config CC_HAS_RETURN_THUNK ++ def_bool $(cc-option,-mfunction-return=thunk-extern) ++ + config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -37,6 +37,7 @@ + #include "cpu.h" + + static void __init spectre_v1_select_mitigation(void); ++static void __init retbleed_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); +@@ -112,6 +113,12 @@ void __init check_bugs(void) + + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); ++ retbleed_select_mitigation(); ++ /* ++ * spectre_v2_select_mitigation() relies on the state set by ++ * retbleed_select_mitigation(); specifically the STIBP selection is ++ * forced for UNRET. ++ */ + spectre_v2_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); +@@ -709,6 +716,100 @@ static int __init nospectre_v1_cmdline(c + early_param("nospectre_v1", nospectre_v1_cmdline); + + #undef pr_fmt ++#define pr_fmt(fmt) "RETBleed: " fmt ++ ++enum retbleed_mitigation { ++ RETBLEED_MITIGATION_NONE, ++ RETBLEED_MITIGATION_UNRET, ++}; ++ ++enum retbleed_mitigation_cmd { ++ RETBLEED_CMD_OFF, ++ RETBLEED_CMD_AUTO, ++ RETBLEED_CMD_UNRET, ++}; ++ ++const char * const retbleed_strings[] = { ++ [RETBLEED_MITIGATION_NONE] = "Vulnerable", ++ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++}; ++ ++static enum retbleed_mitigation retbleed_mitigation __ro_after_init = ++ RETBLEED_MITIGATION_NONE; ++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = ++ RETBLEED_CMD_AUTO; ++ ++static int __init retbleed_parse_cmdline(char *str) ++{ ++ if (!str) ++ return -EINVAL; ++ ++ if (!strcmp(str, "off")) ++ retbleed_cmd = RETBLEED_CMD_OFF; ++ else if (!strcmp(str, "auto")) ++ retbleed_cmd = RETBLEED_CMD_AUTO; ++ else if (!strcmp(str, "unret")) ++ retbleed_cmd = RETBLEED_CMD_UNRET; ++ else ++ pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); ++ ++ return 0; ++} ++early_param("retbleed", retbleed_parse_cmdline); ++ ++#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" ++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++ ++static void __init retbleed_select_mitigation(void) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) ++ return; ++ ++ switch (retbleed_cmd) { ++ case RETBLEED_CMD_OFF: ++ return; ++ ++ case RETBLEED_CMD_UNRET: ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ break; ++ ++ case RETBLEED_CMD_AUTO: ++ default: ++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) ++ break; ++ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ break; ++ } ++ ++ switch (retbleed_mitigation) { ++ case RETBLEED_MITIGATION_UNRET: ++ ++ if (!IS_ENABLED(CONFIG_RETPOLINE) || ++ !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { ++ pr_err(RETBLEED_COMPILER_MSG); ++ retbleed_mitigation = RETBLEED_MITIGATION_NONE; ++ break; ++ } ++ ++ setup_force_cpu_cap(X86_FEATURE_RETHUNK); ++ setup_force_cpu_cap(X86_FEATURE_UNRET); ++ ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) ++ pr_err(RETBLEED_UNTRAIN_MSG); ++ break; ++ ++ default: ++ break; ++ } ++ ++ pr_info("%s\n", retbleed_strings[retbleed_mitigation]); ++} ++ ++#undef pr_fmt + #define pr_fmt(fmt) "Spectre V2 : " fmt + + static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = +@@ -1919,7 +2020,12 @@ static ssize_t srbds_show_state(char *bu + + static ssize_t retbleed_show_state(char *buf) + { +- return sprintf(buf, "Vulnerable\n"); ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && ++ (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) ++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ ++ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + } + + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/queue-5.10/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch b/queue-5.10/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch new file mode 100644 index 00000000000..dd5a58fa115 --- /dev/null +++ b/queue-5.10/x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch @@ -0,0 +1,30 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Pawan Gupta +Date: Wed, 6 Jul 2022 15:01:15 -0700 +Subject: x86/bugs: Add Cannon lake to RETBleed affected CPU list + +From: Pawan Gupta + +commit f54d45372c6ac9c993451de5e51312485f7d10bc upstream. + +Cannon lake is also affected by RETBleed, add it to the list. + +Fixes: 6ad0ad2bf8a6 ("x86/bugs: Report Intel retbleed vulnerability") +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/common.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1129,6 +1129,7 @@ static const struct x86_cpu_id cpu_vuln_ + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), diff --git a/queue-5.10/x86-bugs-add-retbleed-ibpb.patch b/queue-5.10/x86-bugs-add-retbleed-ibpb.patch new file mode 100644 index 00000000000..4640bf05c42 --- /dev/null +++ b/queue-5.10/x86-bugs-add-retbleed-ibpb.patch @@ -0,0 +1,255 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:02 +0200 +Subject: x86/bugs: Add retbleed=ibpb + +From: Peter Zijlstra + +commit 3ebc170068885b6fc7bedda6c667bb2c4d533159 upstream. + +jmp2ret mitigates the easy-to-attack case at relatively low overhead. +It mitigates the long speculation windows after a mispredicted RET, but +it does not mitigate the short speculation window from arbitrary +instruction boundaries. + +On Zen2, there is a chicken bit which needs setting, which mitigates +"arbitrary instruction boundaries" down to just "basic block boundaries". + +But there is no fix for the short speculation window on basic block +boundaries, other than to flush the entire BTB to evict all attacker +predictions. + +On the spectrum of "fast & blurry" -> "safe", there is (on top of STIBP +or no-SMT): + + 1) Nothing System wide open + 2) jmp2ret May stop a script kiddy + 3) jmp2ret+chickenbit Raises the bar rather further + 4) IBPB Only thing which can count as "safe". + +Tentative numbers put IBPB-on-entry at a 2.5x hit on Zen2, and a 10x hit +on Zen1 according to lmbench. + + [ bp: Fixup feature bit comments, document option, 32-bit build fix. ] + +Suggested-by: Andrew Cooper +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 3 + + arch/x86/entry/Makefile | 2 - + arch/x86/entry/entry.S | 22 ++++++++++++ + arch/x86/include/asm/cpufeatures.h | 2 - + arch/x86/include/asm/nospec-branch.h | 8 +++- + arch/x86/kernel/cpu/bugs.c | 43 ++++++++++++++++++------ + 6 files changed, 67 insertions(+), 13 deletions(-) + create mode 100644 arch/x86/entry/entry.S + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4666,6 +4666,9 @@ + disabling SMT if necessary for + the full mitigation (only on Zen1 + and older without STIBP). ++ ibpb - mitigate short speculation windows on ++ basic block boundaries too. Safe, highest ++ perf impact. + unret - force enable untrained return thunks, + only effective on AMD f15h-f17h + based systems. +--- a/arch/x86/entry/Makefile ++++ b/arch/x86/entry/Makefile +@@ -21,7 +21,7 @@ CFLAGS_syscall_64.o += $(call cc-option + CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) + CFLAGS_syscall_x32.o += $(call cc-option,-Wno-override-init,) + +-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o ++obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o + obj-y += common.o + + obj-y += vdso/ +--- /dev/null ++++ b/arch/x86/entry/entry.S +@@ -0,0 +1,22 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Common place for both 32- and 64-bit entry routines. ++ */ ++ ++#include ++#include ++#include ++ ++.pushsection .noinstr.text, "ax" ++ ++SYM_FUNC_START(entry_ibpb) ++ movl $MSR_IA32_PRED_CMD, %ecx ++ movl $PRED_CMD_IBPB, %eax ++ xorl %edx, %edx ++ wrmsr ++ RET ++SYM_FUNC_END(entry_ibpb) ++/* For KVM */ ++EXPORT_SYMBOL_GPL(entry_ibpb); ++ ++.popsection +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -292,7 +292,7 @@ + #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ + /* FREE! (11*32+ 8) */ + /* FREE! (11*32+ 9) */ +-/* FREE! (11*32+10) */ ++#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ + /* FREE! (11*32+11) */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -123,14 +123,17 @@ + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * +- * Doesn't clobber any registers but does require a stable stack. ++ * While zen_untrain_ret() doesn't clobber anything but requires stack, ++ * entry_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ + .macro UNTRAIN_RET + #ifdef CONFIG_RETPOLINE +- ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET ++ ALTERNATIVE_2 "", \ ++ "call zen_untrain_ret", X86_FEATURE_UNRET, \ ++ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + #endif + .endm + +@@ -144,6 +147,7 @@ + + extern void __x86_return_thunk(void); + extern void zen_untrain_ret(void); ++extern void entry_ibpb(void); + + #ifdef CONFIG_RETPOLINE + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -761,6 +761,7 @@ static enum spectre_v2_mitigation spectr + enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, ++ RETBLEED_MITIGATION_IBPB, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, + }; +@@ -769,11 +770,13 @@ enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, ++ RETBLEED_CMD_IBPB, + }; + + const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", + }; +@@ -803,6 +806,8 @@ static int __init retbleed_parse_cmdline + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; ++ } else if (!strcmp(str, "ibpb")) { ++ retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { +@@ -817,11 +822,13 @@ static int __init retbleed_parse_cmdline + early_param("retbleed", retbleed_parse_cmdline); + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" + #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) + { ++ bool mitigate_smt = false; ++ + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + +@@ -833,11 +840,21 @@ static void __init retbleed_select_mitig + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + break; + ++ case RETBLEED_CMD_IBPB: ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ break; ++ + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { ++ ++ if (IS_ENABLED(CONFIG_RETPOLINE) && ++ IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ else ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } + + /* + * The Intel mitigation (IBRS) was already selected in +@@ -853,26 +870,34 @@ static void __init retbleed_select_mitig + if (!IS_ENABLED(CONFIG_RETPOLINE) || + !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { + pr_err(RETBLEED_COMPILER_MSG); +- retbleed_mitigation = RETBLEED_MITIGATION_NONE; +- break; ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ goto retbleed_force_ibpb; + } + + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + +- if (!boot_cpu_has(X86_FEATURE_STIBP) && +- (retbleed_nosmt || cpu_mitigations_auto_nosmt())) +- cpu_smt_disable(false); +- + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); ++ ++ mitigate_smt = true; ++ break; ++ ++ case RETBLEED_MITIGATION_IBPB: ++retbleed_force_ibpb: ++ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); ++ mitigate_smt = true; + break; + + default: + break; + } + ++ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && ++ (retbleed_nosmt || cpu_mitigations_auto_nosmt())) ++ cpu_smt_disable(false); ++ + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. diff --git a/queue-5.10/x86-bugs-do-ibpb-fallback-check-only-once.patch b/queue-5.10/x86-bugs-do-ibpb-fallback-check-only-once.patch new file mode 100644 index 00000000000..6968a78d04b --- /dev/null +++ b/queue-5.10/x86-bugs-do-ibpb-fallback-check-only-once.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 15:07:19 -0700 +Subject: x86/bugs: Do IBPB fallback check only once + +From: Josh Poimboeuf + +commit 0fe4aeea9c01baabecc8c3afc7889c809d939bc2 upstream. + +When booting with retbleed=auto, if the kernel wasn't built with +CONFIG_CC_HAS_RETURN_THUNK, the mitigation falls back to IBPB. Make +sure a warning is printed in that case. The IBPB fallback check is done +twice, but it really only needs to be done once. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 15 +++++---------- + 1 file changed, 5 insertions(+), 10 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -847,18 +847,13 @@ static void __init retbleed_select_mitig + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { +- +- if (IS_ENABLED(CONFIG_RETPOLINE) && +- IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; +- else +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; +- } ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + + /* +- * The Intel mitigation (IBRS) was already selected in +- * spectre_v2_select_mitigation(). ++ * The Intel mitigation (IBRS or eIBRS) was already selected in ++ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will ++ * be set accordingly below. + */ + + break; diff --git a/queue-5.10/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch b/queue-5.10/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch new file mode 100644 index 00000000000..81b2029327d --- /dev/null +++ b/queue-5.10/x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Thadeu Lima de Souza Cascardo +Date: Thu, 7 Jul 2022 13:41:52 -0300 +Subject: x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported + +From: Thadeu Lima de Souza Cascardo + +commit 2259da159fbe5dba8ac00b560cf00b6a6537fa18 upstream. + +There are some VM configurations which have Skylake model but do not +support IBPB. In those cases, when using retbleed=ibpb, userspace is going +to be killed and kernel is going to panic. + +If the CPU does not support IBPB, warn and proceed with the auto option. Also, +do not fallback to IBPB on AMD/Hygon systems if it is not supported. + +Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb") +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Borislav Petkov +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -821,7 +821,10 @@ static void __init retbleed_select_mitig + break; + + case RETBLEED_CMD_IBPB: +- if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { ++ if (!boot_cpu_has(X86_FEATURE_IBPB)) { ++ pr_err("WARNING: CPU does not support IBPB.\n"); ++ goto do_cmd_auto; ++ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); +@@ -836,7 +839,7 @@ do_cmd_auto: + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; +- else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) ++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } + diff --git a/queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch b/queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch new file mode 100644 index 00000000000..b1cf04ed956 --- /dev/null +++ b/queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch @@ -0,0 +1,143 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Kim Phillips +Date: Tue, 14 Jun 2022 23:15:51 +0200 +Subject: x86/bugs: Enable STIBP for JMP2RET + +From: Kim Phillips + +commit e8ec1b6e08a2102d8755ccb06fa26d540f26a2fa upstream. + +For untrained return thunks to be fully effective, STIBP must be enabled +or SMT disabled. + +Co-developed-by: Josh Poimboeuf +Signed-off-by: Josh Poimboeuf +Signed-off-by: Kim Phillips +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 16 ++++-- + arch/x86/kernel/cpu/bugs.c | 58 +++++++++++++++++++----- + 2 files changed, 57 insertions(+), 17 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4660,11 +4660,17 @@ + Speculative Code Execution with Return Instructions) + vulnerability. + +- off - unconditionally disable +- auto - automatically select a migitation +- unret - force enable untrained return thunks, +- only effective on AMD Zen {1,2} +- based systems. ++ off - no mitigation ++ auto - automatically select a migitation ++ auto,nosmt - automatically select a mitigation, ++ disabling SMT if necessary for ++ the full mitigation (only on Zen1 ++ and older without STIBP). ++ unret - force enable untrained return thunks, ++ only effective on AMD f15h-f17h ++ based systems. ++ unret,nosmt - like unret, will disable SMT when STIBP ++ is not available. + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -739,19 +739,34 @@ static enum retbleed_mitigation retbleed + static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + ++static int __ro_after_init retbleed_nosmt = false; ++ + static int __init retbleed_parse_cmdline(char *str) + { + if (!str) + return -EINVAL; + +- if (!strcmp(str, "off")) +- retbleed_cmd = RETBLEED_CMD_OFF; +- else if (!strcmp(str, "auto")) +- retbleed_cmd = RETBLEED_CMD_AUTO; +- else if (!strcmp(str, "unret")) +- retbleed_cmd = RETBLEED_CMD_UNRET; +- else +- pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); ++ while (str) { ++ char *next = strchr(str, ','); ++ if (next) { ++ *next = 0; ++ next++; ++ } ++ ++ if (!strcmp(str, "off")) { ++ retbleed_cmd = RETBLEED_CMD_OFF; ++ } else if (!strcmp(str, "auto")) { ++ retbleed_cmd = RETBLEED_CMD_AUTO; ++ } else if (!strcmp(str, "unret")) { ++ retbleed_cmd = RETBLEED_CMD_UNRET; ++ } else if (!strcmp(str, "nosmt")) { ++ retbleed_nosmt = true; ++ } else { ++ pr_err("Ignoring unknown retbleed option (%s).", str); ++ } ++ ++ str = next; ++ } + + return 0; + } +@@ -797,6 +812,10 @@ static void __init retbleed_select_mitig + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + ++ if (!boot_cpu_has(X86_FEATURE_STIBP) && ++ (retbleed_nosmt || cpu_mitigations_auto_nosmt())) ++ cpu_smt_disable(false); ++ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); +@@ -1043,6 +1062,13 @@ spectre_v2_user_select_mitigation(enum s + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { ++ if (mode != SPECTRE_V2_USER_STRICT && ++ mode != SPECTRE_V2_USER_STRICT_PREFERRED) ++ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); ++ mode = SPECTRE_V2_USER_STRICT_PREFERRED; ++ } ++ + spectre_v2_user_stibp = mode; + + set_mode: +@@ -2020,10 +2046,18 @@ static ssize_t srbds_show_state(char *bu + + static ssize_t retbleed_show_state(char *buf) + { +- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && +- (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && +- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) +- return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) ++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ ++ return sprintf(buf, "%s; SMT %s\n", ++ retbleed_strings[retbleed_mitigation], ++ !sched_smt_active() ? "disabled" : ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? ++ "enabled with STIBP protection" : "vulnerable"); ++ } + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + } diff --git a/queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch b/queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch new file mode 100644 index 00000000000..27c601d5262 --- /dev/null +++ b/queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch @@ -0,0 +1,119 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:52 +0200 +Subject: x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value + +From: Peter Zijlstra + +commit caa0ff24d5d0e02abce5e65c3d2b7f20a6617be5 upstream. + +Due to TIF_SSBD and TIF_SPEC_IB the actual IA32_SPEC_CTRL value can +differ from x86_spec_ctrl_base. As such, keep a per-CPU value +reflecting the current task's MSR content. + + [jpoimboe: rename] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 1 + + arch/x86/kernel/cpu/bugs.c | 28 +++++++++++++++++++++++----- + arch/x86/kernel/process.c | 2 +- + 3 files changed, 25 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -254,6 +254,7 @@ static inline void indirect_branch_predi + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; ++extern void write_spec_ctrl_current(u64 val); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -48,12 +48,30 @@ static void __init taa_select_mitigation + static void __init mmio_select_mitigation(void); + static void __init srbds_select_mitigation(void); + +-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ ++/* The base value of the SPEC_CTRL MSR without task-specific bits set */ + u64 x86_spec_ctrl_base; + EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); ++ ++/* The current value of the SPEC_CTRL MSR with task-specific bits set */ ++DEFINE_PER_CPU(u64, x86_spec_ctrl_current); ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); ++ + static DEFINE_MUTEX(spec_ctrl_mutex); + + /* ++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ ++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). ++ */ ++void write_spec_ctrl_current(u64 val) ++{ ++ if (this_cpu_read(x86_spec_ctrl_current) == val) ++ return; ++ ++ this_cpu_write(x86_spec_ctrl_current, val); ++ wrmsrl(MSR_IA32_SPEC_CTRL, val); ++} ++ ++/* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +@@ -1235,7 +1253,7 @@ static void __init spectre_v2_select_mit + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + + switch (mode) { +@@ -1290,7 +1308,7 @@ static void __init spectre_v2_select_mit + + static void update_stibp_msr(void * __unused) + { +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ +@@ -1533,7 +1551,7 @@ static enum ssb_mitigation __init __ssb_ + x86_amd_ssb_disable(); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + } + +@@ -1751,7 +1769,7 @@ int arch_prctl_spec_ctrl_get(struct task + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -556,7 +556,7 @@ static __always_inline void __speculatio + } + + if (updmsr) +- wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ write_spec_ctrl_current(msr); + } + + static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch b/queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch new file mode 100644 index 00000000000..0176b4a57e6 --- /dev/null +++ b/queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch @@ -0,0 +1,109 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:54 +0200 +Subject: x86/bugs: Optimize SPEC_CTRL MSR writes + +From: Peter Zijlstra + +commit c779bc1a9002fa474175b80e72b85c9bf628abb0 upstream. + +When changing SPEC_CTRL for user control, the WRMSR can be delayed +until return-to-user when KERNEL_IBRS has been enabled. + +This avoids an MSR write during context switch. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 2 +- + arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------ + arch/x86/kernel/process.c | 2 +- + 3 files changed, 14 insertions(+), 8 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -254,7 +254,7 @@ static inline void indirect_branch_predi + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; +-extern void write_spec_ctrl_current(u64 val); ++extern void write_spec_ctrl_current(u64 val, bool force); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -62,13 +62,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex); + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). + */ +-void write_spec_ctrl_current(u64 val) ++void write_spec_ctrl_current(u64 val, bool force) + { + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); +- wrmsrl(MSR_IA32_SPEC_CTRL, val); ++ ++ /* ++ * When KERNEL_IBRS this MSR is written on return-to-user, unless ++ * forced the update can be delayed until that time. ++ */ ++ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) ++ wrmsrl(MSR_IA32_SPEC_CTRL, val); + } + + /* +@@ -1253,7 +1259,7 @@ static void __init spectre_v2_select_mit + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + + switch (mode) { +@@ -1308,7 +1314,7 @@ static void __init spectre_v2_select_mit + + static void update_stibp_msr(void * __unused) + { +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ +@@ -1551,7 +1557,7 @@ static enum ssb_mitigation __init __ssb_ + x86_amd_ssb_disable(); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + } + +@@ -1769,7 +1775,7 @@ int arch_prctl_spec_ctrl_get(struct task + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -556,7 +556,7 @@ static __always_inline void __speculatio + } + + if (updmsr) +- write_spec_ctrl_current(msr); ++ write_spec_ctrl_current(msr, false); + } + + static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch b/queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch new file mode 100644 index 00000000000..52037632bef --- /dev/null +++ b/queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch @@ -0,0 +1,170 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Alexandre Chartre +Date: Tue, 14 Jun 2022 23:15:49 +0200 +Subject: x86/bugs: Report AMD retbleed vulnerability + +From: Alexandre Chartre + +commit 6b80b59b3555706508008f1f127b5412c89c7fd8 upstream. + +Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary +Speculative Code Execution with Return Instructions) attack. + + [peterz: add hygon] + [kim: invert parity; fam15h] + +Co-developed-by: Kim Phillips +Signed-off-by: Kim Phillips +Signed-off-by: Alexandre Chartre +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/bugs.c | 13 +++++++++++++ + arch/x86/kernel/cpu/common.c | 19 +++++++++++++++++++ + drivers/base/cpu.c | 8 ++++++++ + include/linux/cpu.h | 2 ++ + 5 files changed, 43 insertions(+) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -426,5 +426,6 @@ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ ++#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1917,6 +1917,11 @@ static ssize_t srbds_show_state(char *bu + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); + } + ++static ssize_t retbleed_show_state(char *buf) ++{ ++ return sprintf(buf, "Vulnerable\n"); ++} ++ + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) + { +@@ -1962,6 +1967,9 @@ static ssize_t cpu_show_common(struct de + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + ++ case X86_BUG_RETBLEED: ++ return retbleed_show_state(buf); ++ + default: + break; + } +@@ -2018,4 +2026,9 @@ ssize_t cpu_show_mmio_stale_data(struct + { + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + } ++ ++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); ++} + #endif +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1092,16 +1092,27 @@ static const __initconst struct x86_cpu_ + {} + }; + ++#define VULNBL(vendor, family, model, blacklist) \ ++ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) ++ + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, steppings, \ + X86_FEATURE_ANY, issues) + ++#define VULNBL_AMD(family, blacklist) \ ++ VULNBL(AMD, family, X86_MODEL_ANY, blacklist) ++ ++#define VULNBL_HYGON(family, blacklist) \ ++ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) ++ + #define SRBDS BIT(0) + /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ + #define MMIO BIT(1) + /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ + #define MMIO_SBDS BIT(2) ++/* CPU is affected by RETbleed, speculating where you would not expect it */ ++#define RETBLEED BIT(3) + + static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), +@@ -1134,6 +1145,11 @@ static const struct x86_cpu_id cpu_vuln_ + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), ++ ++ VULNBL_AMD(0x15, RETBLEED), ++ VULNBL_AMD(0x16, RETBLEED), ++ VULNBL_AMD(0x17, RETBLEED), ++ VULNBL_HYGON(0x18, RETBLEED), + {} + }; + +@@ -1235,6 +1251,9 @@ static void __init cpu_set_bug_bits(stru + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + ++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) ++ setup_force_cpu_bug(X86_BUG_RETBLEED); ++ + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; + +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -572,6 +572,12 @@ ssize_t __weak cpu_show_mmio_stale_data( + return sysfs_emit(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sysfs_emit(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +@@ -582,6 +588,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444 + static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); + static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); + static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); ++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@ -594,6 +601,7 @@ static struct attribute *cpu_root_vulner + &dev_attr_itlb_multihit.attr, + &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, ++ &dev_attr_retbleed.attr, + NULL + }; + +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct dev + extern ssize_t cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, + char *buf); ++extern ssize_t cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch b/queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch new file mode 100644 index 00000000000..9ae6f437464 --- /dev/null +++ b/queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch @@ -0,0 +1,175 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 24 Jun 2022 13:48:58 +0200 +Subject: x86/bugs: Report Intel retbleed vulnerability + +From: Peter Zijlstra + +commit 6ad0ad2bf8a67e27d1f9d006a1dabb0e1c360cc3 upstream. + +Skylake suffers from RSB underflow speculation issues; report this +vulnerability and it's mitigation (spectre_v2=ibrs). + + [jpoimboe: cleanups, eibrs] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/msr-index.h | 1 + + arch/x86/kernel/cpu/bugs.c | 39 +++++++++++++++++++++++++++++++++------ + arch/x86/kernel/cpu/common.c | 24 ++++++++++++------------ + 3 files changed, 46 insertions(+), 18 deletions(-) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -91,6 +91,7 @@ + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ + #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ ++#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ + #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ + #define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -746,12 +746,17 @@ static int __init nospectre_v1_cmdline(c + } + early_param("nospectre_v1", nospectre_v1_cmdline); + ++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = ++ SPECTRE_V2_NONE; ++ + #undef pr_fmt + #define pr_fmt(fmt) "RETBleed: " fmt + + enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, ++ RETBLEED_MITIGATION_IBRS, ++ RETBLEED_MITIGATION_EIBRS, + }; + + enum retbleed_mitigation_cmd { +@@ -763,6 +768,8 @@ enum retbleed_mitigation_cmd { + const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", ++ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", + }; + + static enum retbleed_mitigation retbleed_mitigation __ro_after_init = +@@ -805,6 +812,7 @@ early_param("retbleed", retbleed_parse_c + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" + #define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) + { +@@ -821,12 +829,15 @@ static void __init retbleed_select_mitig + + case RETBLEED_CMD_AUTO: + default: +- if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) +- break; +- + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ ++ /* ++ * The Intel mitigation (IBRS) was already selected in ++ * spectre_v2_select_mitigation(). ++ */ ++ + break; + } + +@@ -856,15 +867,31 @@ static void __init retbleed_select_mitig + break; + } + ++ /* ++ * Let IBRS trump all on Intel without affecting the effects of the ++ * retbleed= cmdline option. ++ */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { ++ switch (spectre_v2_enabled) { ++ case SPECTRE_V2_IBRS: ++ retbleed_mitigation = RETBLEED_MITIGATION_IBRS; ++ break; ++ case SPECTRE_V2_EIBRS: ++ case SPECTRE_V2_EIBRS_RETPOLINE: ++ case SPECTRE_V2_EIBRS_LFENCE: ++ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; ++ break; ++ default: ++ pr_err(RETBLEED_INTEL_MSG); ++ } ++ } ++ + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); + } + + #undef pr_fmt + #define pr_fmt(fmt) "Spectre V2 : " fmt + +-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = +- SPECTRE_V2_NONE; +- + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; + static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1124,24 +1124,24 @@ static const struct x86_cpu_id cpu_vuln_ + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | +- BIT(7) | BIT(0xB), MMIO), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), ++ BIT(7) | BIT(0xB), MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), +- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), +- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), +@@ -1251,7 +1251,7 @@ static void __init cpu_set_bug_bits(stru + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + +- if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) ++ if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) + setup_force_cpu_bug(X86_BUG_RETBLEED); + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) diff --git a/queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch b/queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch new file mode 100644 index 00000000000..52997341057 --- /dev/null +++ b/queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch @@ -0,0 +1,103 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:56 +0200 +Subject: x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation() + +From: Peter Zijlstra + +commit 166115c08a9b0b846b783088808a27d739be6e8d upstream. + +retbleed will depend on spectre_v2, while spectre_v2_user depends on +retbleed. Break this cycle. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -37,8 +37,9 @@ + #include "cpu.h" + + static void __init spectre_v1_select_mitigation(void); +-static void __init retbleed_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); ++static void __init retbleed_select_mitigation(void); ++static void __init spectre_v2_user_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); + static void __init mds_select_mitigation(void); +@@ -137,13 +138,19 @@ void __init check_bugs(void) + + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); ++ spectre_v2_select_mitigation(); ++ /* ++ * retbleed_select_mitigation() relies on the state set by ++ * spectre_v2_select_mitigation(); specifically it wants to know about ++ * spectre_v2=ibrs. ++ */ + retbleed_select_mitigation(); + /* +- * spectre_v2_select_mitigation() relies on the state set by ++ * spectre_v2_user_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ +- spectre_v2_select_mitigation(); ++ spectre_v2_user_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); + md_clear_select_mitigation(); +@@ -969,13 +976,15 @@ static void __init spec_v2_user_print_co + pr_info("spectre_v2_user=%s forced on command line.\n", reason); + } + ++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; ++ + static enum spectre_v2_user_cmd __init +-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) ++spectre_v2_parse_user_cmdline(void) + { + char arg[20]; + int ret, i; + +- switch (v2_cmd) { ++ switch (spectre_v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: +@@ -1010,7 +1019,7 @@ static inline bool spectre_v2_in_ibrs_mo + } + + static void __init +-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) ++spectre_v2_user_select_mitigation(void) + { + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); +@@ -1023,7 +1032,7 @@ spectre_v2_user_select_mitigation(enum s + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + +- cmd = spectre_v2_parse_user_cmdline(v2_cmd); ++ cmd = spectre_v2_parse_user_cmdline(); + switch (cmd) { + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; +@@ -1347,7 +1356,7 @@ static void __init spectre_v2_select_mit + } + + /* Set up IBPB and STIBP depending on the general spectre V2 command */ +- spectre_v2_user_select_mitigation(cmd); ++ spectre_v2_cmd = cmd; + } + + static void update_stibp_msr(void * __unused) diff --git a/queue-5.10/x86-common-stamp-out-the-stepping-madness.patch b/queue-5.10/x86-common-stamp-out-the-stepping-madness.patch new file mode 100644 index 00000000000..5128f8a70b3 --- /dev/null +++ b/queue-5.10/x86-common-stamp-out-the-stepping-madness.patch @@ -0,0 +1,78 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 24 Jun 2022 14:03:25 +0200 +Subject: x86/common: Stamp out the stepping madness + +From: Peter Zijlstra + +commit 7a05bc95ed1c5a59e47aaade9fb4083c27de9e62 upstream. + +The whole MMIO/RETBLEED enumeration went overboard on steppings. Get +rid of all that and simply use ANY. + +If a future stepping of these models would not be affected, it had +better set the relevant ARCH_CAP_$FOO_NO bit in +IA32_ARCH_CAPABILITIES. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Acked-by: Dave Hansen +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/common.c | 37 ++++++++++++++++--------------------- + 1 file changed, 16 insertions(+), 21 deletions(-) + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1119,32 +1119,27 @@ static const struct x86_cpu_id cpu_vuln_ + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), +- VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), ++ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | +- BIT(7) | BIT(0xB), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), +- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), +- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), diff --git a/queue-5.10/x86-cpu-amd-add-spectral-chicken.patch b/queue-5.10/x86-cpu-amd-add-spectral-chicken.patch new file mode 100644 index 00000000000..3957fc0684e --- /dev/null +++ b/queue-5.10/x86-cpu-amd-add-spectral-chicken.patch @@ -0,0 +1,108 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:04 +0200 +Subject: x86/cpu/amd: Add Spectral Chicken + +From: Peter Zijlstra + +commit d7caac991feeef1b871ee6988fd2c9725df09039 upstream. + +Zen2 uarchs have an undocumented, unnamed, MSR that contains a chicken +bit for some speculation behaviour. It needs setting. + +Note: very belatedly AMD released naming; it's now officially called + MSR_AMD64_DE_CFG2 and MSR_AMD64_DE_CFG2_SUPPRESS_NOBR_PRED_BIT + but shall remain the SPECTRAL CHICKEN. + +Suggested-by: Andrew Cooper +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/msr-index.h | 3 +++ + arch/x86/kernel/cpu/amd.c | 23 ++++++++++++++++++++++- + arch/x86/kernel/cpu/cpu.h | 2 ++ + arch/x86/kernel/cpu/hygon.c | 6 ++++++ + 4 files changed, 33 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -508,6 +508,9 @@ + /* Fam 17h MSRs */ + #define MSR_F17H_IRPERF 0xc00000e9 + ++#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 ++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) ++ + /* Fam 16h MSRs */ + #define MSR_F16H_L2I_PERF_CTL 0xc0010230 + #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -914,6 +914,26 @@ static void init_amd_bd(struct cpuinfo_x + clear_rdrand_cpuid_bit(c); + } + ++void init_spectral_chicken(struct cpuinfo_x86 *c) ++{ ++ u64 value; ++ ++ /* ++ * On Zen2 we offer this chicken (bit) on the altar of Speculation. ++ * ++ * This suppresses speculation from the middle of a basic block, i.e. it ++ * suppresses non-branch predictions. ++ * ++ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H ++ */ ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { ++ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { ++ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; ++ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); ++ } ++ } ++} ++ + static void init_amd_zn(struct cpuinfo_x86 *c) + { + set_cpu_cap(c, X86_FEATURE_ZEN); +@@ -959,7 +979,8 @@ static void init_amd(struct cpuinfo_x86 + case 0x12: init_amd_ln(c); break; + case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; +- case 0x17: fallthrough; ++ case 0x17: init_spectral_chicken(c); ++ fallthrough; + case 0x19: init_amd_zn(c); break; + } + +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -60,6 +60,8 @@ extern void tsx_disable(void); + static inline void tsx_init(void) { } + #endif /* CONFIG_CPU_SUP_INTEL */ + ++extern void init_spectral_chicken(struct cpuinfo_x86 *c); ++ + extern void get_cpu_cap(struct cpuinfo_x86 *c); + extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); + extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +--- a/arch/x86/kernel/cpu/hygon.c ++++ b/arch/x86/kernel/cpu/hygon.c +@@ -318,6 +318,12 @@ static void init_hygon(struct cpuinfo_x8 + /* get apicid instead of initial apic id from cpuid */ + c->apicid = hard_smp_processor_id(); + ++ /* ++ * XXX someone from Hygon needs to confirm this DTRT ++ * ++ init_spectral_chicken(c); ++ */ ++ + set_cpu_cap(c, X86_FEATURE_ZEN); + set_cpu_cap(c, X86_FEATURE_CPB); + diff --git a/queue-5.10/x86-cpu-amd-enumerate-btc_no.patch b/queue-5.10/x86-cpu-amd-enumerate-btc_no.patch new file mode 100644 index 00000000000..0eb427e9777 --- /dev/null +++ b/queue-5.10/x86-cpu-amd-enumerate-btc_no.patch @@ -0,0 +1,86 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Andrew Cooper +Date: Fri, 24 Jun 2022 14:41:21 +0100 +Subject: x86/cpu/amd: Enumerate BTC_NO + +From: Andrew Cooper + +commit 26aae8ccbc1972233afd08fb3f368947c0314265 upstream. + +BTC_NO indicates that hardware is not susceptible to Branch Type Confusion. + +Zen3 CPUs don't suffer BTC. + +Hypervisors are expected to synthesise BTC_NO when it is appropriate +given the migration pool, to prevent kernels using heuristics. + + [ bp: Massage. ] + +Signed-off-by: Andrew Cooper +Signed-off-by: Borislav Petkov +[cascardo: no X86_FEATURE_BRS] +[cascardo: no X86_FEATURE_CPPC] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/amd.c | 21 +++++++++++++++------ + arch/x86/kernel/cpu/common.c | 6 ++++-- + 3 files changed, 20 insertions(+), 8 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -316,6 +316,7 @@ + #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ + #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ + #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ ++#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -942,12 +942,21 @@ static void init_amd_zn(struct cpuinfo_x + node_reclaim_distance = 32; + #endif + +- /* +- * Fix erratum 1076: CPB feature bit not being set in CPUID. +- * Always set it, except when running under a hypervisor. +- */ +- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) +- set_cpu_cap(c, X86_FEATURE_CPB); ++ /* Fix up CPUID bits, but only if not virtualised. */ ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { ++ ++ /* Erratum 1076: CPB feature bit not being set in CPUID. */ ++ if (!cpu_has(c, X86_FEATURE_CPB)) ++ set_cpu_cap(c, X86_FEATURE_CPB); ++ ++ /* ++ * Zen3 (Fam19 model < 0x10) parts are not susceptible to ++ * Branch Type Confusion, but predate the allocation of the ++ * BTC_NO bit. ++ */ ++ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) ++ set_cpu_cap(c, X86_FEATURE_BTC_NO); ++ } + } + + static void init_amd(struct cpuinfo_x86 *c) +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1246,8 +1246,10 @@ static void __init cpu_set_bug_bits(stru + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + +- if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) +- setup_force_cpu_bug(X86_BUG_RETBLEED); ++ if (!cpu_has(c, X86_FEATURE_BTC_NO)) { ++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) ++ setup_force_cpu_bug(X86_BUG_RETBLEED); ++ } + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; diff --git a/queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch b/queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch new file mode 100644 index 00000000000..8f3f1632f1b --- /dev/null +++ b/queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch @@ -0,0 +1,52 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:33 +0200 +Subject: x86/cpufeatures: Move RETPOLINE flags to word 11 + +From: Peter Zijlstra + +commit a883d624aed463c84c22596006e5a96f5b44db31 upstream. + +In order to extend the RETPOLINE features to 4, move them to word 11 +where there is still room. This mostly keeps DISABLE_RETPOLINE +simple. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: bits 8 and 9 of word 11 are also free here, + so comment them accordingly] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -203,8 +203,8 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ ++/* FREE! ( 7*32+12) */ ++/* FREE! ( 7*32+13) */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +@@ -290,6 +290,12 @@ + #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ + #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ + #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ ++/* FREE! (11*32+ 8) */ ++/* FREE! (11*32+ 9) */ ++/* FREE! (11*32+10) */ ++/* FREE! (11*32+11) */ ++#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ ++#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch b/queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch new file mode 100644 index 00000000000..cee660aebb1 --- /dev/null +++ b/queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch @@ -0,0 +1,355 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:53 +0200 +Subject: x86/entry: Add kernel IBRS implementation + +From: Peter Zijlstra + +commit 2dbb887e875b1de3ca8f40ddf26bcfe55798c609 upstream. + +Implement Kernel IBRS - currently the only known option to mitigate RSB +underflow speculation issues on Skylake hardware. + +Note: since IBRS_ENTER requires fuller context established than +UNTRAIN_RET, it must be placed after it. However, since UNTRAIN_RET +itself implies a RET, it must come after IBRS_ENTER. This means +IBRS_ENTER needs to also move UNTRAIN_RET. + +Note 2: KERNEL_IBRS is sub-optimal for XenPV. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflict at arch/x86/entry/entry_64.S, skip_r11rcx] +[cascardo: conflict at arch/x86/entry/entry_64_compat.S] +[cascardo: conflict fixups, no ANNOTATE_NOENDBR] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/calling.h | 58 +++++++++++++++++++++++++++++++++++++ + arch/x86/entry/entry_64.S | 44 ++++++++++++++++++++++++---- + arch/x86/entry/entry_64_compat.S | 17 ++++++++-- + arch/x86/include/asm/cpufeatures.h | 2 - + 4 files changed, 111 insertions(+), 10 deletions(-) + +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -6,6 +6,8 @@ + #include + #include + #include ++#include ++#include + + /* + +@@ -309,6 +311,62 @@ For 32-bit we have the following convent + #endif + + /* ++ * IBRS kernel mitigation for Spectre_v2. ++ * ++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers ++ * the regs it uses (AX, CX, DX). Must be called before the first RET ++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction) ++ * ++ * The optional argument is used to save/restore the current value, ++ * which is used on the paranoid paths. ++ * ++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. ++ */ ++.macro IBRS_ENTER save_reg ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS ++ movl $MSR_IA32_SPEC_CTRL, %ecx ++ ++.ifnb \save_reg ++ rdmsr ++ shl $32, %rdx ++ or %rdx, %rax ++ mov %rax, \save_reg ++ test $SPEC_CTRL_IBRS, %eax ++ jz .Ldo_wrmsr_\@ ++ lfence ++ jmp .Lend_\@ ++.Ldo_wrmsr_\@: ++.endif ++ ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx ++ movl %edx, %eax ++ shr $32, %rdx ++ wrmsr ++.Lend_\@: ++.endm ++ ++/* ++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) ++ * regs. Must be called after the last RET. ++ */ ++.macro IBRS_EXIT save_reg ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS ++ movl $MSR_IA32_SPEC_CTRL, %ecx ++ ++.ifnb \save_reg ++ mov \save_reg, %rdx ++.else ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx ++ andl $(~SPEC_CTRL_IBRS), %edx ++.endif ++ ++ movl %edx, %eax ++ shr $32, %rdx ++ wrmsr ++.Lend_\@: ++.endm ++ ++/* + * Mitigate Spectre v1 for conditional swapgs code paths. + * + * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -102,7 +102,6 @@ SYM_CODE_START(entry_SYSCALL_64) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) +- UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER_DS /* pt_regs->ss */ +@@ -118,6 +117,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h + /* IRQs are off. */ + movq %rax, %rdi + movq %rsp, %rsi ++ ++ /* clobbers %rax, make sure it is after saving the syscall nr */ ++ IBRS_ENTER ++ UNTRAIN_RET ++ + call do_syscall_64 /* returns with IRQs disabled */ + + /* +@@ -192,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h + * perf profiles. Nothing jumps here. + */ + syscall_return_via_sysret: ++ IBRS_EXIT + POP_REGS pop_rdi=0 + + /* +@@ -569,6 +574,7 @@ __irqentry_text_end: + + SYM_CODE_START_LOCAL(common_interrupt_return) + SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) ++ IBRS_EXIT + #ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates user mode. */ + testb $3, CS(%rsp) +@@ -889,6 +895,9 @@ SYM_CODE_END(xen_failsafe_callback) + * 1 -> no SWAPGS on exit + * + * Y GSBASE value at entry, must be restored in paranoid_exit ++ * ++ * R14 - old CR3 ++ * R15 - old SPEC_CTRL + */ + SYM_CODE_START_LOCAL(paranoid_entry) + UNWIND_HINT_FUNC +@@ -912,7 +921,6 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * be retrieved from a kernel internal table. + */ + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 +- UNTRAIN_RET + + /* + * Handling GSBASE depends on the availability of FSGSBASE. +@@ -934,7 +942,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * is needed here. + */ + SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx +- RET ++ jmp .Lparanoid_gsbase_done + + .Lparanoid_entry_checkgs: + /* EBX = 1 -> kernel GSBASE active, no restore required */ +@@ -953,8 +961,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) + xorl %ebx, %ebx + swapgs + .Lparanoid_kernel_gsbase: +- + FENCE_SWAPGS_KERNEL_ENTRY ++.Lparanoid_gsbase_done: ++ ++ /* ++ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like ++ * CR3 above, keep the old value in a callee saved register. ++ */ ++ IBRS_ENTER save_reg=%r15 ++ UNTRAIN_RET ++ + RET + SYM_CODE_END(paranoid_entry) + +@@ -976,9 +992,19 @@ SYM_CODE_END(paranoid_entry) + * 1 -> no SWAPGS on exit + * + * Y User space GSBASE, must be restored unconditionally ++ * ++ * R14 - old CR3 ++ * R15 - old SPEC_CTRL + */ + SYM_CODE_START_LOCAL(paranoid_exit) + UNWIND_HINT_REGS ++ ++ /* ++ * Must restore IBRS state before both CR3 and %GS since we need access ++ * to the per-CPU x86_spec_ctrl_shadow variable. ++ */ ++ IBRS_EXIT save_reg=%r15 ++ + /* + * The order of operations is important. RESTORE_CR3 requires + * kernel GSBASE. +@@ -1025,9 +1051,11 @@ SYM_CODE_START_LOCAL(error_entry) + FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ IBRS_ENTER + UNTRAIN_RET + + .Lerror_entry_from_usermode_after_swapgs: ++ + /* Put us onto the real thread stack. */ + popq %r12 /* save return addr in %12 */ + movq %rsp, %rdi /* arg0 = pt_regs pointer */ +@@ -1081,6 +1109,7 @@ SYM_CODE_START_LOCAL(error_entry) + SWAPGS + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ IBRS_ENTER + UNTRAIN_RET + + /* +@@ -1176,7 +1205,6 @@ SYM_CODE_START(asm_exc_nmi) + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 +- UNTRAIN_RET + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ +@@ -1187,6 +1215,9 @@ SYM_CODE_START(asm_exc_nmi) + PUSH_AND_CLEAR_REGS rdx=(%rdx) + ENCODE_FRAME_POINTER + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're +@@ -1409,6 +1440,9 @@ end_repeat_nmi: + movq $-1, %rsi + call exc_nmi + ++ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ ++ IBRS_EXIT save_reg=%r15 ++ + /* Always restore stashed CR3 value (see paranoid_entry) */ + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 + +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -4,7 +4,6 @@ + * + * Copyright 2000-2002 Andi Kleen, SuSE Labs. + */ +-#include "calling.h" + #include + #include + #include +@@ -18,6 +17,8 @@ + #include + #include + ++#include "calling.h" ++ + .section .entry.text, "ax" + + /* +@@ -72,7 +73,6 @@ SYM_CODE_START(entry_SYSENTER_compat) + pushq $__USER32_CS /* pt_regs->cs */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ + SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) +- UNTRAIN_RET + + /* + * User tracing code (ptrace or signal handlers) might assume that +@@ -114,6 +114,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_af + + cld + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + /* + * SYSENTER doesn't filter flags, so we need to clear NT and AC + * ourselves. To save a few cycles, we can check whether +@@ -213,7 +216,6 @@ SYM_CODE_START(entry_SYSCALL_compat) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) +- UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER32_DS /* pt_regs->ss */ +@@ -255,6 +257,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft + + UNWIND_HINT_REGS + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + movq %rsp, %rdi + call do_fast_syscall_32 + /* XEN PV guests always use IRET path */ +@@ -269,6 +274,8 @@ sysret32_from_system_call: + */ + STACKLEAK_ERASE + ++ IBRS_EXIT ++ + movq RBX(%rsp), %rbx /* pt_regs->rbx */ + movq RBP(%rsp), %rbp /* pt_regs->rbp */ + movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ +@@ -380,7 +387,6 @@ SYM_CODE_START(entry_INT80_compat) + pushq (%rdi) /* pt_regs->di */ + .Lint80_keep_stack: + +- UNTRAIN_RET + pushq %rsi /* pt_regs->si */ + xorl %esi, %esi /* nospec si */ + pushq %rdx /* pt_regs->dx */ +@@ -413,6 +419,9 @@ SYM_CODE_START(entry_INT80_compat) + + cld + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + movq %rsp, %rdi + call do_int80_syscall_32 + jmp swapgs_restore_regs_and_return_to_usermode +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -203,7 +203,7 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +-/* FREE! ( 7*32+12) */ ++#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ + /* FREE! ( 7*32+13) */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ diff --git a/queue-5.10/x86-entry-remove-skip_r11rcx.patch b/queue-5.10/x86-entry-remove-skip_r11rcx.patch new file mode 100644 index 00000000000..3a74d23286f --- /dev/null +++ b/queue-5.10/x86-entry-remove-skip_r11rcx.patch @@ -0,0 +1,69 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 6 May 2022 14:14:35 +0200 +Subject: x86/entry: Remove skip_r11rcx + +From: Peter Zijlstra + +commit 1b331eeea7b8676fc5dbdf80d0a07e41be226177 upstream. + +Yes, r11 and rcx have been restored previously, but since they're being +popped anyway (into rsi) might as well pop them into their own regs -- +setting them to the value they already are. + +Less magical code. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org +[bwh: Backported to 5.10: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/calling.h | 10 +--------- + arch/x86/entry/entry_64.S | 3 +-- + 2 files changed, 2 insertions(+), 11 deletions(-) + +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -146,27 +146,19 @@ For 32-bit we have the following convent + + .endm + +-.macro POP_REGS pop_rdi=1 skip_r11rcx=0 ++.macro POP_REGS pop_rdi=1 + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx +- .if \skip_r11rcx +- popq %rsi +- .else + popq %r11 +- .endif + popq %r10 + popq %r9 + popq %r8 + popq %rax +- .if \skip_r11rcx +- popq %rsi +- .else + popq %rcx +- .endif + popq %rdx + popq %rsi + .if \pop_rdi +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h + * perf profiles. Nothing jumps here. + */ + syscall_return_via_sysret: +- /* rcx and r11 are already restored (see code above) */ +- POP_REGS pop_rdi=0 skip_r11rcx=1 ++ POP_REGS pop_rdi=0 + + /* + * Now all regs are restored except RSP and RDI. diff --git a/queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch b/queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch new file mode 100644 index 00000000000..3f7ebbc1f2b --- /dev/null +++ b/queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:40 +0200 +Subject: x86/ftrace: Use alternative RET encoding + +From: Peter Zijlstra + +commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream. + +Use the return thunk in ftrace trampolines, if needed. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: still copy return from ftrace_stub] +[cascardo: use memcpy(text_gen_insn) as there is no __text_gen_insn] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/ftrace.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/ftrace.c ++++ b/arch/x86/kernel/ftrace.c +@@ -308,7 +308,7 @@ union ftrace_op_code_union { + } __attribute__((packed)); + }; + +-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) ++#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) + + static unsigned long + create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) +@@ -367,7 +367,10 @@ create_trampoline(struct ftrace_ops *ops + + /* The trampoline ends with ret(q) */ + retq = (unsigned long)ftrace_stub; +- ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE); ++ else ++ ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); + if (WARN_ON(ret < 0)) + goto fail; + diff --git a/queue-5.10/x86-insn-add-a-__ignore_sync_check__-marker.patch b/queue-5.10/x86-insn-add-a-__ignore_sync_check__-marker.patch new file mode 100644 index 00000000000..8db17947df5 --- /dev/null +++ b/queue-5.10/x86-insn-add-a-__ignore_sync_check__-marker.patch @@ -0,0 +1,199 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Borislav Petkov +Date: Mon, 22 Feb 2021 13:34:40 +0100 +Subject: x86/insn: Add a __ignore_sync_check__ marker + +From: Borislav Petkov + +commit d30c7b820be5c4777fe6c3b0c21f9d0064251e51 upstream. + +Add an explicit __ignore_sync_check__ marker which will be used to mark +lines which are supposed to be ignored by file synchronization check +scripts, its advantage being that it explicitly denotes such lines in +the code. + +Signed-off-by: Borislav Petkov +Reviewed-by: Masami Hiramatsu +Link: https://lkml.kernel.org/r/20210304174237.31945-4-bp@alien8.de +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/inat.h | 2 +- + arch/x86/include/asm/insn.h | 2 +- + arch/x86/lib/inat.c | 2 +- + arch/x86/lib/insn.c | 6 +++--- + tools/arch/x86/include/asm/inat.h | 2 +- + tools/arch/x86/include/asm/insn.h | 2 +- + tools/arch/x86/lib/inat.c | 2 +- + tools/arch/x86/lib/insn.c | 6 +++--- + tools/objtool/sync-check.sh | 17 +++++++++++++---- + tools/perf/check-headers.sh | 15 +++++++++++---- + 10 files changed, 36 insertions(+), 20 deletions(-) + +--- a/arch/x86/include/asm/inat.h ++++ b/arch/x86/include/asm/inat.h +@@ -6,7 +6,7 @@ + * + * Written by Masami Hiramatsu + */ +-#include ++#include /* __ignore_sync_check__ */ + + /* + * Internal bits. Don't use bitmasks directly, because these bits are +--- a/arch/x86/include/asm/insn.h ++++ b/arch/x86/include/asm/insn.h +@@ -8,7 +8,7 @@ + */ + + /* insn_attr_t is defined in inat.h */ +-#include ++#include /* __ignore_sync_check__ */ + + struct insn_field { + union { +--- a/arch/x86/lib/inat.c ++++ b/arch/x86/lib/inat.c +@@ -4,7 +4,7 @@ + * + * Written by Masami Hiramatsu + */ +-#include ++#include /* __ignore_sync_check__ */ + + /* Attribute tables are generated from opcode map */ + #include "inat-tables.c" +--- a/arch/x86/lib/insn.c ++++ b/arch/x86/lib/insn.c +@@ -10,10 +10,10 @@ + #else + #include + #endif +-#include +-#include ++#include /*__ignore_sync_check__ */ ++#include /* __ignore_sync_check__ */ + +-#include ++#include /* __ignore_sync_check__ */ + + /* Verify next sizeof(t) bytes can be on the same instruction */ + #define validate_next(t, insn, n) \ +--- a/tools/arch/x86/include/asm/inat.h ++++ b/tools/arch/x86/include/asm/inat.h +@@ -6,7 +6,7 @@ + * + * Written by Masami Hiramatsu + */ +-#include "inat_types.h" ++#include "inat_types.h" /* __ignore_sync_check__ */ + + /* + * Internal bits. Don't use bitmasks directly, because these bits are +--- a/tools/arch/x86/include/asm/insn.h ++++ b/tools/arch/x86/include/asm/insn.h +@@ -8,7 +8,7 @@ + */ + + /* insn_attr_t is defined in inat.h */ +-#include "inat.h" ++#include "inat.h" /* __ignore_sync_check__ */ + + struct insn_field { + union { +--- a/tools/arch/x86/lib/inat.c ++++ b/tools/arch/x86/lib/inat.c +@@ -4,7 +4,7 @@ + * + * Written by Masami Hiramatsu + */ +-#include "../include/asm/insn.h" ++#include "../include/asm/insn.h" /* __ignore_sync_check__ */ + + /* Attribute tables are generated from opcode map */ + #include "inat-tables.c" +--- a/tools/arch/x86/lib/insn.c ++++ b/tools/arch/x86/lib/insn.c +@@ -10,10 +10,10 @@ + #else + #include + #endif +-#include "../include/asm/inat.h" +-#include "../include/asm/insn.h" ++#include "../include/asm/inat.h" /* __ignore_sync_check__ */ ++#include "../include/asm/insn.h" /* __ignore_sync_check__ */ + +-#include "../include/asm/emulate_prefix.h" ++#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */ + + /* Verify next sizeof(t) bytes can be on the same instruction */ + #define validate_next(t, insn, n) \ +--- a/tools/objtool/sync-check.sh ++++ b/tools/objtool/sync-check.sh +@@ -16,11 +16,14 @@ arch/x86/include/asm/emulate_prefix.h + arch/x86/lib/x86-opcode-map.txt + arch/x86/tools/gen-insn-attr-x86.awk + include/linux/static_call_types.h +-arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]' +-arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]' +-arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]' +-arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]' + " ++ ++SYNC_CHECK_FILES=' ++arch/x86/include/asm/inat.h ++arch/x86/include/asm/insn.h ++arch/x86/lib/inat.c ++arch/x86/lib/insn.c ++' + fi + + check_2 () { +@@ -63,3 +66,9 @@ while read -r file_entry; do + done <string syscall +@@ -129,6 +136,10 @@ for i in $FILES; do + check $i -B + done + ++for i in $SYNC_CHECK_FILES; do ++ check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"' ++done ++ + # diff with extra ignore lines + check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"' + check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' +@@ -137,10 +148,6 @@ check include/uapi/linux/mman.h '- + check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' + check include/linux/ctype.h '-I "isdigit("' + check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include " -B' +-check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' +-check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' +-check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' +-check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' + + # diff non-symmetric files + check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff --git a/queue-5.10/x86-insn-add-an-insn_decode-api.patch b/queue-5.10/x86-insn-add-an-insn_decode-api.patch new file mode 100644 index 00000000000..1e542ed54fc --- /dev/null +++ b/queue-5.10/x86-insn-add-an-insn_decode-api.patch @@ -0,0 +1,965 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Borislav Petkov +Date: Tue, 3 Nov 2020 17:28:30 +0100 +Subject: x86/insn: Add an insn_decode() API + +From: Borislav Petkov + +commit 93281c4a96572a34504244969b938e035204778d upstream. + +Users of the instruction decoder should use this to decode instruction +bytes. For that, have insn*() helpers return an int value to denote +success/failure. When there's an error fetching the next insn byte and +the insn falls short, return -ENODATA to denote that. + +While at it, make insn_get_opcode() more stricter as to whether what has +seen so far is a valid insn and if not. + +Copy linux/kconfig.h for the tools-version of the decoder so that it can +use IS_ENABLED(). + +Also, cast the INSN_MODE_KERN dummy define value to (enum insn_mode) +for tools use of the decoder because perf tool builds with -Werror and +errors out with -Werror=sign-compare otherwise. + +Signed-off-by: Borislav Petkov +Acked-by: Masami Hiramatsu +Link: https://lkml.kernel.org/r/20210304174237.31945-5-bp@alien8.de +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/insn.h | 24 ++-- + arch/x86/lib/insn.c | 216 +++++++++++++++++++++++++++++------- + tools/arch/x86/include/asm/insn.h | 24 ++-- + tools/arch/x86/lib/insn.c | 222 +++++++++++++++++++++++++++++--------- + tools/include/linux/kconfig.h | 73 ++++++++++++ + 5 files changed, 452 insertions(+), 107 deletions(-) + create mode 100644 tools/include/linux/kconfig.h + +--- a/arch/x86/include/asm/insn.h ++++ b/arch/x86/include/asm/insn.h +@@ -87,13 +87,23 @@ struct insn { + #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + + extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +-extern void insn_get_prefixes(struct insn *insn); +-extern void insn_get_opcode(struct insn *insn); +-extern void insn_get_modrm(struct insn *insn); +-extern void insn_get_sib(struct insn *insn); +-extern void insn_get_displacement(struct insn *insn); +-extern void insn_get_immediate(struct insn *insn); +-extern void insn_get_length(struct insn *insn); ++extern int insn_get_prefixes(struct insn *insn); ++extern int insn_get_opcode(struct insn *insn); ++extern int insn_get_modrm(struct insn *insn); ++extern int insn_get_sib(struct insn *insn); ++extern int insn_get_displacement(struct insn *insn); ++extern int insn_get_immediate(struct insn *insn); ++extern int insn_get_length(struct insn *insn); ++ ++enum insn_mode { ++ INSN_MODE_32, ++ INSN_MODE_64, ++ /* Mode is determined by the current kernel build. */ ++ INSN_MODE_KERN, ++ INSN_NUM_MODES, ++}; ++ ++extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + + /* Attribute will be determined after getting ModRM (for opcode groups) */ + static inline void insn_get_attribute(struct insn *insn) +--- a/arch/x86/lib/insn.c ++++ b/arch/x86/lib/insn.c +@@ -13,6 +13,9 @@ + #include /*__ignore_sync_check__ */ + #include /* __ignore_sync_check__ */ + ++#include ++#include ++ + #include /* __ignore_sync_check__ */ + + /* Verify next sizeof(t) bytes can be on the same instruction */ +@@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(stru + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. ++ * ++ * * Returns: ++ * 0: on success ++ * < 0: on error + */ +-void insn_get_prefixes(struct insn *insn) ++int insn_get_prefixes(struct insn *insn) + { + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; +@@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn + int i, nb; + + if (prefixes->got) +- return; ++ return 0; + + insn_get_emulate_prefix(insn); + +@@ -217,8 +224,10 @@ vex_end: + + prefixes->got = 1; + ++ return 0; ++ + err_out: +- return; ++ return -ENODATA; + } + + /** +@@ -230,16 +239,25 @@ err_out: + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. ++ * ++ * Returns: ++ * 0: on success ++ * < 0: on error + */ +-void insn_get_opcode(struct insn *insn) ++int insn_get_opcode(struct insn *insn) + { + struct insn_field *opcode = &insn->opcode; ++ int pfx_id, ret; + insn_byte_t op; +- int pfx_id; ++ + if (opcode->got) +- return; +- if (!insn->prefixes.got) +- insn_get_prefixes(insn); ++ return 0; ++ ++ if (!insn->prefixes.got) { ++ ret = insn_get_prefixes(insn); ++ if (ret) ++ return ret; ++ } + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); +@@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn) + insn->attr = inat_get_avx_attribute(op, m, p); + if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || + (!inat_accept_vex(insn->attr) && +- !inat_is_group(insn->attr))) +- insn->attr = 0; /* This instruction is bad */ +- goto end; /* VEX has only 1 byte for opcode */ ++ !inat_is_group(insn->attr))) { ++ /* This instruction is bad */ ++ insn->attr = 0; ++ return -EINVAL; ++ } ++ /* VEX has only 1 byte for opcode */ ++ goto end; + } + + insn->attr = inat_get_opcode_attribute(op); +@@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn) + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); + } +- if (inat_must_vex(insn->attr)) +- insn->attr = 0; /* This instruction is bad */ ++ ++ if (inat_must_vex(insn->attr)) { ++ /* This instruction is bad */ ++ insn->attr = 0; ++ return -EINVAL; ++ } + end: + opcode->got = 1; ++ return 0; + + err_out: +- return; ++ return -ENODATA; + } + + /** +@@ -283,15 +310,25 @@ err_out: + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. ++ * ++ * Returns: ++ * 0: on success ++ * < 0: on error + */ +-void insn_get_modrm(struct insn *insn) ++int insn_get_modrm(struct insn *insn) + { + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx_id, mod; ++ int ret; ++ + if (modrm->got) +- return; +- if (!insn->opcode.got) +- insn_get_opcode(insn); ++ return 0; ++ ++ if (!insn->opcode.got) { ++ ret = insn_get_opcode(insn); ++ if (ret) ++ return ret; ++ } + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); +@@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn) + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, + insn->attr); +- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) +- insn->attr = 0; /* This is bad */ ++ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { ++ /* Bad insn */ ++ insn->attr = 0; ++ return -EINVAL; ++ } + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; ++ + modrm->got = 1; ++ return 0; + + err_out: +- return; ++ return -ENODATA; + } + + +@@ -325,11 +367,16 @@ err_out: + int insn_rip_relative(struct insn *insn) + { + struct insn_field *modrm = &insn->modrm; ++ int ret; + + if (!insn->x86_64) + return 0; +- if (!modrm->got) +- insn_get_modrm(insn); ++ ++ if (!modrm->got) { ++ ret = insn_get_modrm(insn); ++ if (ret) ++ return 0; ++ } + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. +@@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn) + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. ++ * ++ * Returns: ++ * 0: if decoding succeeded ++ * < 0: otherwise. + */ +-void insn_get_sib(struct insn *insn) ++int insn_get_sib(struct insn *insn) + { + insn_byte_t modrm; ++ int ret; + + if (insn->sib.got) +- return; +- if (!insn->modrm.got) +- insn_get_modrm(insn); ++ return 0; ++ ++ if (!insn->modrm.got) { ++ ret = insn_get_modrm(insn); ++ if (ret) ++ return ret; ++ } ++ + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && +@@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn) + } + insn->sib.got = 1; + ++ return 0; ++ + err_out: +- return; ++ return -ENODATA; + } + + +@@ -374,15 +433,25 @@ err_out: + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. ++ * ++ * * Returns: ++ * 0: if decoding succeeded ++ * < 0: otherwise. + */ +-void insn_get_displacement(struct insn *insn) ++int insn_get_displacement(struct insn *insn) + { + insn_byte_t mod, rm, base; ++ int ret; + + if (insn->displacement.got) +- return; +- if (!insn->sib.got) +- insn_get_sib(insn); ++ return 0; ++ ++ if (!insn->sib.got) { ++ ret = insn_get_sib(insn); ++ if (ret) ++ return ret; ++ } ++ + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: +@@ -425,9 +494,10 @@ void insn_get_displacement(struct insn * + } + out: + insn->displacement.got = 1; ++ return 0; + + err_out: +- return; ++ return -ENODATA; + } + + /* Decode moffset16/32/64. Return 0 if failed */ +@@ -538,20 +608,30 @@ err_out: + } + + /** +- * insn_get_immediate() - Get the immediates of instruction ++ * insn_get_immediate() - Get the immediate in an instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be +- * get by bit masking with ((1 << (nbytes * 8)) - 1) ++ * computed by bit masking with ((1 << (nbytes * 8)) - 1) ++ * ++ * Returns: ++ * 0: on success ++ * < 0: on error + */ +-void insn_get_immediate(struct insn *insn) ++int insn_get_immediate(struct insn *insn) + { ++ int ret; ++ + if (insn->immediate.got) +- return; +- if (!insn->displacement.got) +- insn_get_displacement(insn); ++ return 0; ++ ++ if (!insn->displacement.got) { ++ ret = insn_get_displacement(insn); ++ if (ret) ++ return ret; ++ } + + if (inat_has_moffset(insn->attr)) { + if (!__get_moffset(insn)) +@@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *ins + } + done: + insn->immediate.got = 1; ++ return 0; + + err_out: +- return; ++ return -ENODATA; + } + + /** +@@ -615,13 +696,58 @@ err_out: + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. +- */ +-void insn_get_length(struct insn *insn) ++ * ++ * Returns: ++ * - 0 on success ++ * - < 0 on error ++*/ ++int insn_get_length(struct insn *insn) + { ++ int ret; ++ + if (insn->length) +- return; +- if (!insn->immediate.got) +- insn_get_immediate(insn); ++ return 0; ++ ++ if (!insn->immediate.got) { ++ ret = insn_get_immediate(insn); ++ if (ret) ++ return ret; ++ } ++ + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); ++ ++ return 0; ++} ++ ++/** ++ * insn_decode() - Decode an x86 instruction ++ * @insn: &struct insn to be initialized ++ * @kaddr: address (in kernel memory) of instruction (or copy thereof) ++ * @buf_len: length of the insn buffer at @kaddr ++ * @m: insn mode, see enum insn_mode ++ * ++ * Returns: ++ * 0: if decoding succeeded ++ * < 0: otherwise. ++ */ ++int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) ++{ ++ int ret; ++ ++/* #define INSN_MODE_KERN -1 __ignore_sync_check__ mode is only valid in the kernel */ ++ ++ if (m == INSN_MODE_KERN) ++ insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); ++ else ++ insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); ++ ++ ret = insn_get_length(insn); ++ if (ret) ++ return ret; ++ ++ if (insn_complete(insn)) ++ return 0; ++ ++ return -EINVAL; + } +--- a/tools/arch/x86/include/asm/insn.h ++++ b/tools/arch/x86/include/asm/insn.h +@@ -87,13 +87,23 @@ struct insn { + #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + + extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +-extern void insn_get_prefixes(struct insn *insn); +-extern void insn_get_opcode(struct insn *insn); +-extern void insn_get_modrm(struct insn *insn); +-extern void insn_get_sib(struct insn *insn); +-extern void insn_get_displacement(struct insn *insn); +-extern void insn_get_immediate(struct insn *insn); +-extern void insn_get_length(struct insn *insn); ++extern int insn_get_prefixes(struct insn *insn); ++extern int insn_get_opcode(struct insn *insn); ++extern int insn_get_modrm(struct insn *insn); ++extern int insn_get_sib(struct insn *insn); ++extern int insn_get_displacement(struct insn *insn); ++extern int insn_get_immediate(struct insn *insn); ++extern int insn_get_length(struct insn *insn); ++ ++enum insn_mode { ++ INSN_MODE_32, ++ INSN_MODE_64, ++ /* Mode is determined by the current kernel build. */ ++ INSN_MODE_KERN, ++ INSN_NUM_MODES, ++}; ++ ++extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + + /* Attribute will be determined after getting ModRM (for opcode groups) */ + static inline void insn_get_attribute(struct insn *insn) +--- a/tools/arch/x86/lib/insn.c ++++ b/tools/arch/x86/lib/insn.c +@@ -10,10 +10,13 @@ + #else + #include + #endif +-#include "../include/asm/inat.h" /* __ignore_sync_check__ */ +-#include "../include/asm/insn.h" /* __ignore_sync_check__ */ ++#include /* __ignore_sync_check__ */ ++#include /* __ignore_sync_check__ */ + +-#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */ ++#include ++#include ++ ++#include /* __ignore_sync_check__ */ + + /* Verify next sizeof(t) bytes can be on the same instruction */ + #define validate_next(t, insn, n) \ +@@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(stru + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. ++ * ++ * * Returns: ++ * 0: on success ++ * < 0: on error + */ +-void insn_get_prefixes(struct insn *insn) ++int insn_get_prefixes(struct insn *insn) + { + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; +@@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn + int i, nb; + + if (prefixes->got) +- return; ++ return 0; + + insn_get_emulate_prefix(insn); + +@@ -217,8 +224,10 @@ vex_end: + + prefixes->got = 1; + ++ return 0; ++ + err_out: +- return; ++ return -ENODATA; + } + + /** +@@ -230,16 +239,25 @@ err_out: + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. ++ * ++ * Returns: ++ * 0: on success ++ * < 0: on error + */ +-void insn_get_opcode(struct insn *insn) ++int insn_get_opcode(struct insn *insn) + { + struct insn_field *opcode = &insn->opcode; ++ int pfx_id, ret; + insn_byte_t op; +- int pfx_id; ++ + if (opcode->got) +- return; +- if (!insn->prefixes.got) +- insn_get_prefixes(insn); ++ return 0; ++ ++ if (!insn->prefixes.got) { ++ ret = insn_get_prefixes(insn); ++ if (ret) ++ return ret; ++ } + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); +@@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn) + insn->attr = inat_get_avx_attribute(op, m, p); + if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || + (!inat_accept_vex(insn->attr) && +- !inat_is_group(insn->attr))) +- insn->attr = 0; /* This instruction is bad */ +- goto end; /* VEX has only 1 byte for opcode */ ++ !inat_is_group(insn->attr))) { ++ /* This instruction is bad */ ++ insn->attr = 0; ++ return -EINVAL; ++ } ++ /* VEX has only 1 byte for opcode */ ++ goto end; + } + + insn->attr = inat_get_opcode_attribute(op); +@@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn) + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); + } +- if (inat_must_vex(insn->attr)) +- insn->attr = 0; /* This instruction is bad */ ++ ++ if (inat_must_vex(insn->attr)) { ++ /* This instruction is bad */ ++ insn->attr = 0; ++ return -EINVAL; ++ } + end: + opcode->got = 1; ++ return 0; + + err_out: +- return; ++ return -ENODATA; + } + + /** +@@ -283,15 +310,25 @@ err_out: + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. ++ * ++ * Returns: ++ * 0: on success ++ * < 0: on error + */ +-void insn_get_modrm(struct insn *insn) ++int insn_get_modrm(struct insn *insn) + { + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx_id, mod; ++ int ret; ++ + if (modrm->got) +- return; +- if (!insn->opcode.got) +- insn_get_opcode(insn); ++ return 0; ++ ++ if (!insn->opcode.got) { ++ ret = insn_get_opcode(insn); ++ if (ret) ++ return ret; ++ } + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); +@@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn) + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, + insn->attr); +- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) +- insn->attr = 0; /* This is bad */ ++ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { ++ /* Bad insn */ ++ insn->attr = 0; ++ return -EINVAL; ++ } + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; ++ + modrm->got = 1; ++ return 0; + + err_out: +- return; ++ return -ENODATA; + } + + +@@ -325,11 +367,16 @@ err_out: + int insn_rip_relative(struct insn *insn) + { + struct insn_field *modrm = &insn->modrm; ++ int ret; + + if (!insn->x86_64) + return 0; +- if (!modrm->got) +- insn_get_modrm(insn); ++ ++ if (!modrm->got) { ++ ret = insn_get_modrm(insn); ++ if (ret) ++ return 0; ++ } + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. +@@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn) + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. ++ * ++ * Returns: ++ * 0: if decoding succeeded ++ * < 0: otherwise. + */ +-void insn_get_sib(struct insn *insn) ++int insn_get_sib(struct insn *insn) + { + insn_byte_t modrm; ++ int ret; + + if (insn->sib.got) +- return; +- if (!insn->modrm.got) +- insn_get_modrm(insn); ++ return 0; ++ ++ if (!insn->modrm.got) { ++ ret = insn_get_modrm(insn); ++ if (ret) ++ return ret; ++ } ++ + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && +@@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn) + } + insn->sib.got = 1; + ++ return 0; ++ + err_out: +- return; ++ return -ENODATA; + } + + +@@ -374,15 +433,25 @@ err_out: + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. ++ * ++ * * Returns: ++ * 0: if decoding succeeded ++ * < 0: otherwise. + */ +-void insn_get_displacement(struct insn *insn) ++int insn_get_displacement(struct insn *insn) + { + insn_byte_t mod, rm, base; ++ int ret; + + if (insn->displacement.got) +- return; +- if (!insn->sib.got) +- insn_get_sib(insn); ++ return 0; ++ ++ if (!insn->sib.got) { ++ ret = insn_get_sib(insn); ++ if (ret) ++ return ret; ++ } ++ + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: +@@ -425,9 +494,10 @@ void insn_get_displacement(struct insn * + } + out: + insn->displacement.got = 1; ++ return 0; + + err_out: +- return; ++ return -ENODATA; + } + + /* Decode moffset16/32/64. Return 0 if failed */ +@@ -538,20 +608,30 @@ err_out: + } + + /** +- * insn_get_immediate() - Get the immediates of instruction ++ * insn_get_immediate() - Get the immediate in an instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be +- * get by bit masking with ((1 << (nbytes * 8)) - 1) ++ * computed by bit masking with ((1 << (nbytes * 8)) - 1) ++ * ++ * Returns: ++ * 0: on success ++ * < 0: on error + */ +-void insn_get_immediate(struct insn *insn) ++int insn_get_immediate(struct insn *insn) + { ++ int ret; ++ + if (insn->immediate.got) +- return; +- if (!insn->displacement.got) +- insn_get_displacement(insn); ++ return 0; ++ ++ if (!insn->displacement.got) { ++ ret = insn_get_displacement(insn); ++ if (ret) ++ return ret; ++ } + + if (inat_has_moffset(insn->attr)) { + if (!__get_moffset(insn)) +@@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *ins + } + done: + insn->immediate.got = 1; ++ return 0; + + err_out: +- return; ++ return -ENODATA; + } + + /** +@@ -615,13 +696,58 @@ err_out: + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. +- */ +-void insn_get_length(struct insn *insn) ++ * ++ * Returns: ++ * - 0 on success ++ * - < 0 on error ++*/ ++int insn_get_length(struct insn *insn) + { ++ int ret; ++ + if (insn->length) +- return; +- if (!insn->immediate.got) +- insn_get_immediate(insn); ++ return 0; ++ ++ if (!insn->immediate.got) { ++ ret = insn_get_immediate(insn); ++ if (ret) ++ return ret; ++ } ++ + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); ++ ++ return 0; ++} ++ ++/** ++ * insn_decode() - Decode an x86 instruction ++ * @insn: &struct insn to be initialized ++ * @kaddr: address (in kernel memory) of instruction (or copy thereof) ++ * @buf_len: length of the insn buffer at @kaddr ++ * @m: insn mode, see enum insn_mode ++ * ++ * Returns: ++ * 0: if decoding succeeded ++ * < 0: otherwise. ++ */ ++int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) ++{ ++ int ret; ++ ++#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */ ++ ++ if (m == INSN_MODE_KERN) ++ insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); ++ else ++ insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); ++ ++ ret = insn_get_length(insn); ++ if (ret) ++ return ret; ++ ++ if (insn_complete(insn)) ++ return 0; ++ ++ return -EINVAL; + } +--- /dev/null ++++ b/tools/include/linux/kconfig.h +@@ -0,0 +1,73 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _TOOLS_LINUX_KCONFIG_H ++#define _TOOLS_LINUX_KCONFIG_H ++ ++/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ ++ ++#ifdef CONFIG_CPU_BIG_ENDIAN ++#define __BIG_ENDIAN 4321 ++#else ++#define __LITTLE_ENDIAN 1234 ++#endif ++ ++#define __ARG_PLACEHOLDER_1 0, ++#define __take_second_arg(__ignored, val, ...) val ++ ++/* ++ * The use of "&&" / "||" is limited in certain expressions. ++ * The following enable to calculate "and" / "or" with macro expansion only. ++ */ ++#define __and(x, y) ___and(x, y) ++#define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y) ++#define ____and(arg1_or_junk, y) __take_second_arg(arg1_or_junk y, 0) ++ ++#define __or(x, y) ___or(x, y) ++#define ___or(x, y) ____or(__ARG_PLACEHOLDER_##x, y) ++#define ____or(arg1_or_junk, y) __take_second_arg(arg1_or_junk 1, y) ++ ++/* ++ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that ++ * these only work with boolean and tristate options. ++ */ ++ ++/* ++ * Getting something that works in C and CPP for an arg that may or may ++ * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1" ++ * we match on the placeholder define, insert the "0," for arg1 and generate ++ * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one). ++ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when ++ * the last step cherry picks the 2nd arg, we get a zero. ++ */ ++#define __is_defined(x) ___is_defined(x) ++#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val) ++#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0) ++ ++/* ++ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0 ++ * otherwise. For boolean options, this is equivalent to ++ * IS_ENABLED(CONFIG_FOO). ++ */ ++#define IS_BUILTIN(option) __is_defined(option) ++ ++/* ++ * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0 ++ * otherwise. ++ */ ++#define IS_MODULE(option) __is_defined(option##_MODULE) ++ ++/* ++ * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled ++ * code can call a function defined in code compiled based on CONFIG_FOO. ++ * This is similar to IS_ENABLED(), but returns false when invoked from ++ * built-in code when CONFIG_FOO is set to 'm'. ++ */ ++#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \ ++ __and(IS_MODULE(option), __is_defined(MODULE))) ++ ++/* ++ * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm', ++ * 0 otherwise. ++ */ ++#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) ++ ++#endif /* _TOOLS_LINUX_KCONFIG_H */ diff --git a/queue-5.10/x86-insn-eval-handle-return-values-from-the-decoder.patch b/queue-5.10/x86-insn-eval-handle-return-values-from-the-decoder.patch new file mode 100644 index 00000000000..49437d693b6 --- /dev/null +++ b/queue-5.10/x86-insn-eval-handle-return-values-from-the-decoder.patch @@ -0,0 +1,117 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Borislav Petkov +Date: Thu, 19 Nov 2020 19:20:18 +0100 +Subject: x86/insn-eval: Handle return values from the decoder + +From: Borislav Petkov + +commit 6e8c83d2a3afbfd5ee019ec720b75a42df515caa upstream. + +Now that the different instruction-inspecting functions return a value, +test that and return early from callers if error has been encountered. + +While at it, do not call insn_get_modrm() when calling +insn_get_displacement() because latter will make sure to call +insn_get_modrm() if ModRM hasn't been parsed yet. + +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20210304174237.31945-6-bp@alien8.de +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/lib/insn-eval.c | 34 +++++++++++++++++++++------------- + 1 file changed, 21 insertions(+), 13 deletions(-) + +--- a/arch/x86/lib/insn-eval.c ++++ b/arch/x86/lib/insn-eval.c +@@ -928,10 +928,11 @@ static int get_seg_base_limit(struct ins + static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs, + int *regoff, long *eff_addr) + { +- insn_get_modrm(insn); ++ int ret; + +- if (!insn->modrm.nbytes) +- return -EINVAL; ++ ret = insn_get_modrm(insn); ++ if (ret) ++ return ret; + + if (X86_MODRM_MOD(insn->modrm.value) != 3) + return -EINVAL; +@@ -977,14 +978,14 @@ static int get_eff_addr_modrm(struct ins + int *regoff, long *eff_addr) + { + long tmp; ++ int ret; + + if (insn->addr_bytes != 8 && insn->addr_bytes != 4) + return -EINVAL; + +- insn_get_modrm(insn); +- +- if (!insn->modrm.nbytes) +- return -EINVAL; ++ ret = insn_get_modrm(insn); ++ if (ret) ++ return ret; + + if (X86_MODRM_MOD(insn->modrm.value) > 2) + return -EINVAL; +@@ -1106,18 +1107,21 @@ static int get_eff_addr_modrm_16(struct + * @base_offset will have a register, as an offset from the base of pt_regs, + * that can be used to resolve the associated segment. + * +- * -EINVAL on error. ++ * Negative value on error. + */ + static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs, + int *base_offset, long *eff_addr) + { + long base, indx; + int indx_offset; ++ int ret; + + if (insn->addr_bytes != 8 && insn->addr_bytes != 4) + return -EINVAL; + +- insn_get_modrm(insn); ++ ret = insn_get_modrm(insn); ++ if (ret) ++ return ret; + + if (!insn->modrm.nbytes) + return -EINVAL; +@@ -1125,7 +1129,9 @@ static int get_eff_addr_sib(struct insn + if (X86_MODRM_MOD(insn->modrm.value) > 2) + return -EINVAL; + +- insn_get_sib(insn); ++ ret = insn_get_sib(insn); ++ if (ret) ++ return ret; + + if (!insn->sib.nbytes) + return -EINVAL; +@@ -1194,8 +1200,8 @@ static void __user *get_addr_ref_16(stru + short eff_addr; + long tmp; + +- insn_get_modrm(insn); +- insn_get_displacement(insn); ++ if (insn_get_displacement(insn)) ++ goto out; + + if (insn->addr_bytes != 2) + goto out; +@@ -1529,7 +1535,9 @@ bool insn_decode_from_regs(struct insn * + insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs); + insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs); + +- insn_get_length(insn); ++ if (insn_get_length(insn)) ++ return false; ++ + if (buf_size < insn->length) + return false; + diff --git a/queue-5.10/x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch b/queue-5.10/x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch new file mode 100644 index 00000000000..a5dde9d2407 --- /dev/null +++ b/queue-5.10/x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch @@ -0,0 +1,82 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Borislav Petkov +Date: Mon, 2 Nov 2020 18:47:34 +0100 +Subject: x86/insn: Rename insn_decode() to insn_decode_from_regs() + +From: Borislav Petkov + +commit 9e761296c52dcdb1aaa151b65bd39accb05740d9 upstream. + +Rename insn_decode() to insn_decode_from_regs() to denote that it +receives regs as param and uses registers from there during decoding. +Free the former name for a more generic version of the function. + +No functional changes. + +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20210304174237.31945-2-bp@alien8.de +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/insn-eval.h | 4 ++-- + arch/x86/kernel/sev-es.c | 2 +- + arch/x86/kernel/umip.c | 2 +- + arch/x86/lib/insn-eval.c | 6 +++--- + 4 files changed, 7 insertions(+), 7 deletions(-) + +--- a/arch/x86/include/asm/insn-eval.h ++++ b/arch/x86/include/asm/insn-eval.h +@@ -26,7 +26,7 @@ int insn_fetch_from_user(struct pt_regs + unsigned char buf[MAX_INSN_SIZE]); + int insn_fetch_from_user_inatomic(struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE]); +-bool insn_decode(struct insn *insn, struct pt_regs *regs, +- unsigned char buf[MAX_INSN_SIZE], int buf_size); ++bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, ++ unsigned char buf[MAX_INSN_SIZE], int buf_size); + + #endif /* _ASM_X86_INSN_EVAL_H */ +--- a/arch/x86/kernel/sev-es.c ++++ b/arch/x86/kernel/sev-es.c +@@ -236,7 +236,7 @@ static enum es_result vc_decode_insn(str + return ES_EXCEPTION; + } + +- if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res)) ++ if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) + return ES_DECODE_FAILED; + } else { + res = vc_fetch_insn_kernel(ctxt, buffer); +--- a/arch/x86/kernel/umip.c ++++ b/arch/x86/kernel/umip.c +@@ -356,7 +356,7 @@ bool fixup_umip_exception(struct pt_regs + if (!nr_copied) + return false; + +- if (!insn_decode(&insn, regs, buf, nr_copied)) ++ if (!insn_decode_from_regs(&insn, regs, buf, nr_copied)) + return false; + + umip_inst = identify_insn(&insn); +--- a/arch/x86/lib/insn-eval.c ++++ b/arch/x86/lib/insn-eval.c +@@ -1492,7 +1492,7 @@ int insn_fetch_from_user_inatomic(struct + } + + /** +- * insn_decode() - Decode an instruction ++ * insn_decode_from_regs() - Decode an instruction + * @insn: Structure to store decoded instruction + * @regs: Structure with register values as seen when entering kernel mode + * @buf: Buffer containing the instruction bytes +@@ -1505,8 +1505,8 @@ int insn_fetch_from_user_inatomic(struct + * + * True if instruction was decoded, False otherwise. + */ +-bool insn_decode(struct insn *insn, struct pt_regs *regs, +- unsigned char buf[MAX_INSN_SIZE], int buf_size) ++bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, ++ unsigned char buf[MAX_INSN_SIZE], int buf_size) + { + int seg_defs; + diff --git a/queue-5.10/x86-kexec-disable-ret-on-kexec.patch b/queue-5.10/x86-kexec-disable-ret-on-kexec.patch new file mode 100644 index 00000000000..d8a1b04ffa9 --- /dev/null +++ b/queue-5.10/x86-kexec-disable-ret-on-kexec.patch @@ -0,0 +1,173 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Konrad Rzeszutek Wilk +Date: Fri, 8 Jul 2022 19:10:11 +0200 +Subject: x86/kexec: Disable RET on kexec + +From: Konrad Rzeszutek Wilk + +commit 697977d8415d61f3acbc4ee6d564c9dcf0309507 upstream. + +All the invocations unroll to __x86_return_thunk and this file +must be PIC independent. + +This fixes kexec on 64-bit AMD boxes. + + [ bp: Fix 32-bit build. ] + +Reported-by: Edward Tran +Reported-by: Awais Tanveer +Suggested-by: Ankur Arora +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Alexandre Chartre +Signed-off-by: Borislav Petkov +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/relocate_kernel_32.S | 25 +++++++++++++++++++------ + arch/x86/kernel/relocate_kernel_64.S | 23 +++++++++++++++++------ + 2 files changed, 36 insertions(+), 12 deletions(-) + +--- a/arch/x86/kernel/relocate_kernel_32.S ++++ b/arch/x86/kernel/relocate_kernel_32.S +@@ -7,10 +7,12 @@ + #include + #include + #include ++#include + #include + + /* +- * Must be relocatable PIC code callable as a C function ++ * Must be relocatable PIC code callable as a C function, in particular ++ * there must be a plain RET and not jump to return thunk. + */ + + #define PTR(x) (x << 2) +@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + movl %edi, %eax + addl $(identity_mapped - relocate_kernel), %eax + pushl %eax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + xorl %edx, %edx + xorl %esi, %esi + xorl %ebp, %ebp +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + 1: + popl %edx + movl CP_PA_SWAP_PAGE(%edi), %esp + addl $PAGE_SIZE, %esp + 2: ++ ANNOTATE_RETPOLINE_SAFE + call *%edx + + /* get the re-entry point of the peer system */ +@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + movl %edi, %eax + addl $(virtual_mapped - relocate_kernel), %eax + pushl %eax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map + popl %edi + popl %esi + popl %ebx +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + popl %edi + popl %ebx + popl %ebp +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size +--- a/arch/x86/kernel/relocate_kernel_64.S ++++ b/arch/x86/kernel/relocate_kernel_64.S +@@ -13,7 +13,8 @@ + #include + + /* +- * Must be relocatable PIC code callable as a C function ++ * Must be relocatable PIC code callable as a C function, in particular ++ * there must be a plain RET and not jump to return thunk. + */ + + #define PTR(x) (x << 3) +@@ -104,7 +105,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + /* jump to identity mapped page */ + addq $(identity_mapped - relocate_kernel), %r8 + pushq %r8 +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -191,7 +194,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + xorl %r14d, %r14d + xorl %r15d, %r15d + +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + + 1: + popq %rdx +@@ -210,7 +215,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + call swap_pages + movq $virtual_mapped, %rax + pushq %rax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -231,7 +238,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map + popq %r12 + popq %rbp + popq %rbx +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -288,7 +297,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + lea PAGE_SIZE(%rax), %rsi + jmp 0b + 3: +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size diff --git a/queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch b/queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch new file mode 100644 index 00000000000..a0c98f6b3c7 --- /dev/null +++ b/queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch @@ -0,0 +1,99 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:42 +0200 +Subject: x86/kvm: Fix SETcc emulation for return thunks + +From: Peter Zijlstra + +commit af2e140f34208a5dfb6b7a8ad2d56bda88f0524d upstream. + +Prepare the SETcc fastop stuff for when RET can be larger still. + +The tricky bit here is that the expressions should not only be +constant C expressions, but also absolute GAS expressions. This means +no ?: and 'true' is ~0. + +Also ensure em_setcc() has the same alignment as the actual FOP_SETCC() +ops, this ensures there cannot be an alignment hole between em_setcc() +and the first op. + +Additionally, add a .skip directive to the FOP_SETCC() macro to fill +any remaining space with INT3 traps; however the primary purpose of +this directive is to generate AS warnings when the remaining space +goes negative. Which is a very good indication the alignment magic +went side-ways. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: ignore ENDBR when computing SETCC_LENGTH] +[cascardo: conflict fixup] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -322,13 +322,15 @@ static int fastop(struct x86_emulate_ctx + #define FOP_RET(name) \ + __FOP_RET(#name) + +-#define FOP_START(op) \ ++#define __FOP_START(op, align) \ + extern void em_##op(struct fastop *fake); \ + asm(".pushsection .text, \"ax\" \n\t" \ + ".global em_" #op " \n\t" \ +- ".align " __stringify(FASTOP_SIZE) " \n\t" \ ++ ".align " __stringify(align) " \n\t" \ + "em_" #op ":\n\t" + ++#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) ++ + #define FOP_END \ + ".popsection") + +@@ -432,15 +434,14 @@ static int fastop(struct x86_emulate_ctx + /* + * Depending on .config the SETcc functions look like: + * +- * SETcc %al [3 bytes] +- * RET [1 byte] +- * INT3 [1 byte; CONFIG_SLS] +- * +- * Which gives possible sizes 4 or 5. When rounded up to the +- * next power-of-two alignment they become 4 or 8. ++ * SETcc %al [3 bytes] ++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] ++ * INT3 [1 byte; CONFIG_SLS] + */ +-#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS)) +-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS)) ++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ ++ IS_ENABLED(CONFIG_SLS)) ++#define SETCC_LENGTH (3 + RET_LENGTH) ++#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) + static_assert(SETCC_LENGTH <= SETCC_ALIGN); + + #define FOP_SETCC(op) \ +@@ -448,14 +449,15 @@ static_assert(SETCC_LENGTH <= SETCC_ALIG + ".type " #op ", @function \n\t" \ + #op ": \n\t" \ + #op " %al \n\t" \ +- __FOP_RET(#op) ++ __FOP_RET(#op) \ ++ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" + + asm(".pushsection .fixup, \"ax\"\n" + ".global kvm_fastop_exception \n" + "kvm_fastop_exception: xor %esi, %esi; " ASM_RET + ".popsection"); + +-FOP_START(setcc) ++__FOP_START(setcc, SETCC_ALIGN) + FOP_SETCC(seto) + FOP_SETCC(setno) + FOP_SETCC(setc) diff --git a/queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch b/queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch new file mode 100644 index 00000000000..bfe019c4125 --- /dev/null +++ b/queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch @@ -0,0 +1,75 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:32 +0200 +Subject: x86/kvm/vmx: Make noinstr clean + +From: Peter Zijlstra + +commit 742ab6df974ae8384a2dd213db1a3a06cf6d8936 upstream. + +The recent mmio_stale_data fixes broke the noinstr constraints: + + vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section + vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section + +make it all happy again. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 6 +++--- + arch/x86/kvm/x86.c | 4 ++-- + include/linux/kvm_host.h | 2 +- + 3 files changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -380,9 +380,9 @@ static __always_inline void vmx_disable_ + if (!vmx->disable_fb_clear) + return; + +- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); ++ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); + msr |= FB_CLEAR_DIS; +- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); ++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; + } +@@ -393,7 +393,7 @@ static __always_inline void vmx_enable_f + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; +- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); ++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); + } + + static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -11171,9 +11171,9 @@ void kvm_arch_end_assignment(struct kvm + } + EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +-bool kvm_arch_has_assigned_device(struct kvm *kvm) ++bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) + { +- return atomic_read(&kvm->arch.assigned_device_count); ++ return arch_atomic_read(&kvm->arch.assigned_device_count); + } + EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -988,7 +988,7 @@ static inline void kvm_arch_end_assignme + { + } + +-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) ++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) + { + return false; + } diff --git a/queue-5.10/x86-lib-atomic64_386_32-rename-things.patch b/queue-5.10/x86-lib-atomic64_386_32-rename-things.patch new file mode 100644 index 00000000000..6b6051a0ebd --- /dev/null +++ b/queue-5.10/x86-lib-atomic64_386_32-rename-things.patch @@ -0,0 +1,248 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Sat, 4 Dec 2021 14:43:39 +0100 +Subject: x86/lib/atomic64_386_32: Rename things + +From: Peter Zijlstra + +commit 22da5a07c75e1104caf6a42f189c97b83d070073 upstream. + +Principally, in order to get rid of #define RET in this code to make +place for a new RET, but also to clarify the code, rename a bunch of +things: + + s/UNLOCK/IRQ_RESTORE/ + s/LOCK/IRQ_SAVE/ + s/BEGIN/BEGIN_IRQ_SAVE/ + s/\/RET_IRQ_RESTORE/ + s/RET_ENDP/\tRET_IRQ_RESTORE\rENDP/ + +which then leaves RET unused so it can be removed. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20211204134907.841623970@infradead.org +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/lib/atomic64_386_32.S | 84 ++++++++++++++++++++++------------------- + 1 file changed, 46 insertions(+), 38 deletions(-) + +--- a/arch/x86/lib/atomic64_386_32.S ++++ b/arch/x86/lib/atomic64_386_32.S +@@ -9,81 +9,83 @@ + #include + + /* if you want SMP support, implement these with real spinlocks */ +-.macro LOCK reg ++.macro IRQ_SAVE reg + pushfl + cli + .endm + +-.macro UNLOCK reg ++.macro IRQ_RESTORE reg + popfl + .endm + +-#define BEGIN(op) \ ++#define BEGIN_IRQ_SAVE(op) \ + .macro endp; \ + SYM_FUNC_END(atomic64_##op##_386); \ + .purgem endp; \ + .endm; \ + SYM_FUNC_START(atomic64_##op##_386); \ +- LOCK v; ++ IRQ_SAVE v; + + #define ENDP endp + +-#define RET \ +- UNLOCK v; \ ++#define RET_IRQ_RESTORE \ ++ IRQ_RESTORE v; \ + ret + +-#define RET_ENDP \ +- RET; \ +- ENDP +- + #define v %ecx +-BEGIN(read) ++BEGIN_IRQ_SAVE(read) + movl (v), %eax + movl 4(v), %edx +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %esi +-BEGIN(set) ++BEGIN_IRQ_SAVE(set) + movl %ebx, (v) + movl %ecx, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %esi +-BEGIN(xchg) ++BEGIN_IRQ_SAVE(xchg) + movl (v), %eax + movl 4(v), %edx + movl %ebx, (v) + movl %ecx, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %ecx +-BEGIN(add) ++BEGIN_IRQ_SAVE(add) + addl %eax, (v) + adcl %edx, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %ecx +-BEGIN(add_return) ++BEGIN_IRQ_SAVE(add_return) + addl (v), %eax + adcl 4(v), %edx + movl %eax, (v) + movl %edx, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %ecx +-BEGIN(sub) ++BEGIN_IRQ_SAVE(sub) + subl %eax, (v) + sbbl %edx, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %ecx +-BEGIN(sub_return) ++BEGIN_IRQ_SAVE(sub_return) + negl %edx + negl %eax + sbbl $0, %edx +@@ -91,47 +93,52 @@ BEGIN(sub_return) + adcl 4(v), %edx + movl %eax, (v) + movl %edx, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %esi +-BEGIN(inc) ++BEGIN_IRQ_SAVE(inc) + addl $1, (v) + adcl $0, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %esi +-BEGIN(inc_return) ++BEGIN_IRQ_SAVE(inc_return) + movl (v), %eax + movl 4(v), %edx + addl $1, %eax + adcl $0, %edx + movl %eax, (v) + movl %edx, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %esi +-BEGIN(dec) ++BEGIN_IRQ_SAVE(dec) + subl $1, (v) + sbbl $0, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %esi +-BEGIN(dec_return) ++BEGIN_IRQ_SAVE(dec_return) + movl (v), %eax + movl 4(v), %edx + subl $1, %eax + sbbl $0, %edx + movl %eax, (v) + movl %edx, 4(v) +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v + + #define v %esi +-BEGIN(add_unless) ++BEGIN_IRQ_SAVE(add_unless) + addl %eax, %ecx + adcl %edx, %edi + addl (v), %eax +@@ -143,7 +150,7 @@ BEGIN(add_unless) + movl %edx, 4(v) + movl $1, %eax + 2: +- RET ++ RET_IRQ_RESTORE + 3: + cmpl %edx, %edi + jne 1b +@@ -153,7 +160,7 @@ ENDP + #undef v + + #define v %esi +-BEGIN(inc_not_zero) ++BEGIN_IRQ_SAVE(inc_not_zero) + movl (v), %eax + movl 4(v), %edx + testl %eax, %eax +@@ -165,7 +172,7 @@ BEGIN(inc_not_zero) + movl %edx, 4(v) + movl $1, %eax + 2: +- RET ++ RET_IRQ_RESTORE + 3: + testl %edx, %edx + jne 1b +@@ -174,7 +181,7 @@ ENDP + #undef v + + #define v %esi +-BEGIN(dec_if_positive) ++BEGIN_IRQ_SAVE(dec_if_positive) + movl (v), %eax + movl 4(v), %edx + subl $1, %eax +@@ -183,5 +190,6 @@ BEGIN(dec_if_positive) + movl %eax, (v) + movl %edx, 4(v) + 1: +-RET_ENDP ++ RET_IRQ_RESTORE ++ENDP + #undef v diff --git a/queue-5.10/x86-objtool-create-.return_sites.patch b/queue-5.10/x86-objtool-create-.return_sites.patch new file mode 100644 index 00000000000..acce887d26e --- /dev/null +++ b/queue-5.10/x86-objtool-create-.return_sites.patch @@ -0,0 +1,200 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:38 +0200 +Subject: x86,objtool: Create .return_sites + +From: Peter Zijlstra + +commit d9e9d2300681d68a775c28de6aa6e5290ae17796 upstream. + +Find all the return-thunk sites and record them in a .return_sites +section such that the kernel can undo this. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflict fixup because of functions added to support IBT] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/arch.h | 1 + tools/objtool/arch/x86/decode.c | 5 ++ + tools/objtool/check.c | 75 ++++++++++++++++++++++++++++++++++++++++ + tools/objtool/elf.h | 1 + tools/objtool/objtool.c | 1 + tools/objtool/objtool.h | 1 + 6 files changed, 84 insertions(+) + +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len); + int arch_decode_hint_reg(u8 sp_reg, int *base); + + bool arch_is_retpoline(struct symbol *sym); ++bool arch_is_rethunk(struct symbol *sym); + + int arch_rewrite_retpolines(struct objtool_file *file); + +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -649,3 +649,8 @@ bool arch_is_retpoline(struct symbol *sy + { + return !strncmp(sym->name, "__x86_indirect_", 15); + } ++ ++bool arch_is_rethunk(struct symbol *sym) ++{ ++ return !strcmp(sym->name, "__x86_return_thunk"); ++} +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -653,6 +653,52 @@ static int create_retpoline_sites_sectio + return 0; + } + ++static int create_return_sites_sections(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct section *sec; ++ int idx; ++ ++ sec = find_section_by_name(file->elf, ".return_sites"); ++ if (sec) { ++ WARN("file already has .return_sites, skipping"); ++ return 0; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->return_thunk_list, call_node) ++ idx++; ++ ++ if (!idx) ++ return 0; ++ ++ sec = elf_create_section(file->elf, ".return_sites", 0, ++ sizeof(int), idx); ++ if (!sec) { ++ WARN("elf_create_section: .return_sites"); ++ return -1; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->return_thunk_list, call_node) { ++ ++ int *site = (int *)sec->data->d_buf + idx; ++ *site = 0; ++ ++ if (elf_add_reloc_to_insn(file->elf, sec, ++ idx * sizeof(int), ++ R_X86_64_PC32, ++ insn->sec, insn->offset)) { ++ WARN("elf_add_reloc_to_insn: .return_sites"); ++ return -1; ++ } ++ ++ idx++; ++ } ++ ++ return 0; ++} ++ + /* + * Warnings shouldn't be reported for ignored functions. + */ +@@ -888,6 +934,11 @@ __weak bool arch_is_retpoline(struct sym + return false; + } + ++__weak bool arch_is_rethunk(struct symbol *sym) ++{ ++ return false; ++} ++ + #define NEGATIVE_RELOC ((void *)-1L) + + static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +@@ -1029,6 +1080,19 @@ static void add_retpoline_call(struct ob + + annotate_call_site(file, insn, false); + } ++ ++static void add_return_call(struct objtool_file *file, struct instruction *insn) ++{ ++ /* ++ * Return thunk tail calls are really just returns in disguise, ++ * so convert them accordingly. ++ */ ++ insn->type = INSN_RETURN; ++ insn->retpoline_safe = true; ++ ++ list_add_tail(&insn->call_node, &file->return_thunk_list); ++} ++ + /* + * Find the destination instructions for all jumps. + */ +@@ -1053,6 +1117,9 @@ static int add_jump_destinations(struct + } else if (reloc->sym->retpoline_thunk) { + add_retpoline_call(file, insn); + continue; ++ } else if (reloc->sym->return_thunk) { ++ add_return_call(file, insn); ++ continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ + add_call_dest(file, insn, reloc->sym, true); +@@ -1842,6 +1909,9 @@ static int classify_symbols(struct objto + if (arch_is_retpoline(func)) + func->retpoline_thunk = true; + ++ if (arch_is_rethunk(func)) ++ func->return_thunk = true; ++ + if (!strcmp(func->name, "__fentry__")) + func->fentry = true; + +@@ -3235,6 +3305,11 @@ int check(struct objtool_file *file) + if (ret < 0) + goto out; + warnings += ret; ++ ++ ret = create_return_sites_sections(file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; + } + + if (stats) { +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -58,6 +58,7 @@ struct symbol { + u8 uaccess_safe : 1; + u8 static_call_tramp : 1; + u8 retpoline_thunk : 1; ++ u8 return_thunk : 1; + u8 fentry : 1; + u8 kcov : 1; + }; +--- a/tools/objtool/objtool.c ++++ b/tools/objtool/objtool.c +@@ -62,6 +62,7 @@ struct objtool_file *objtool_open_read(c + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.retpoline_call_list); ++ INIT_LIST_HEAD(&file.return_thunk_list); + INIT_LIST_HEAD(&file.static_call_list); + file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); + file.ignore_unreachables = no_unreachable; +--- a/tools/objtool/objtool.h ++++ b/tools/objtool/objtool.h +@@ -19,6 +19,7 @@ struct objtool_file { + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 20); + struct list_head retpoline_call_list; ++ struct list_head return_thunk_list; + struct list_head static_call_list; + bool ignore_unreachables, c_file, hints, rodata; + }; diff --git a/queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch b/queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch new file mode 100644 index 00000000000..e6a126e3759 --- /dev/null +++ b/queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch @@ -0,0 +1,3331 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Sat, 4 Dec 2021 14:43:40 +0100 +Subject: x86: Prepare asm files for straight-line-speculation + +From: Peter Zijlstra + +commit f94909ceb1ed4bfdb2ada72f93236305e6d6951f upstream. + +Replace all ret/retq instructions with RET in preparation of making +RET a macro. Since AS is case insensitive it's a big no-op without +RET defined. + + find arch/x86/ -name \*.S | while read file + do + sed -i 's/\/RET/' $file + done + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20211204134907.905503893@infradead.org +[bwh: Backported to 5.10: ran the above command] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/boot/compressed/efi_thunk_64.S | 2 - + arch/x86/boot/compressed/head_64.S | 4 +- + arch/x86/boot/compressed/mem_encrypt.S | 4 +- + arch/x86/crypto/aegis128-aesni-asm.S | 48 ++++++++++++------------ + arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 2 - + arch/x86/crypto/aesni-intel_asm.S | 52 +++++++++++++-------------- + arch/x86/crypto/aesni-intel_avx-x86_64.S | 40 ++++++++++---------- + arch/x86/crypto/blake2s-core.S | 4 +- + arch/x86/crypto/blowfish-x86_64-asm_64.S | 12 +++--- + arch/x86/crypto/camellia-aesni-avx-asm_64.S | 18 ++++----- + arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 18 ++++----- + arch/x86/crypto/camellia-x86_64-asm_64.S | 12 +++--- + arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 12 +++--- + arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 16 ++++---- + arch/x86/crypto/chacha-avx2-x86_64.S | 6 +-- + arch/x86/crypto/chacha-avx512vl-x86_64.S | 6 +-- + arch/x86/crypto/chacha-ssse3-x86_64.S | 8 ++-- + arch/x86/crypto/crc32-pclmul_asm.S | 2 - + arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 2 - + arch/x86/crypto/crct10dif-pcl-asm_64.S | 2 - + arch/x86/crypto/des3_ede-asm_64.S | 4 +- + arch/x86/crypto/ghash-clmulni-intel_asm.S | 6 +-- + arch/x86/crypto/nh-avx2-x86_64.S | 2 - + arch/x86/crypto/nh-sse2-x86_64.S | 2 - + arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 16 ++++---- + arch/x86/crypto/serpent-avx2-asm_64.S | 16 ++++---- + arch/x86/crypto/serpent-sse2-i586-asm_32.S | 6 +-- + arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 6 +-- + arch/x86/crypto/sha1_avx2_x86_64_asm.S | 2 - + arch/x86/crypto/sha1_ni_asm.S | 2 - + arch/x86/crypto/sha1_ssse3_asm.S | 2 - + arch/x86/crypto/sha256-avx-asm.S | 2 - + arch/x86/crypto/sha256-avx2-asm.S | 2 - + arch/x86/crypto/sha256-ssse3-asm.S | 2 - + arch/x86/crypto/sha256_ni_asm.S | 2 - + arch/x86/crypto/sha512-avx-asm.S | 2 - + arch/x86/crypto/sha512-avx2-asm.S | 2 - + arch/x86/crypto/sha512-ssse3-asm.S | 2 - + arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 16 ++++---- + arch/x86/crypto/twofish-i586-asm_32.S | 4 +- + arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 6 +-- + arch/x86/crypto/twofish-x86_64-asm_64.S | 4 +- + arch/x86/entry/entry_32.S | 2 - + arch/x86/entry/entry_64.S | 12 +++--- + arch/x86/entry/thunk_32.S | 2 - + arch/x86/entry/thunk_64.S | 2 - + arch/x86/entry/vdso/vdso32/system_call.S | 2 - + arch/x86/entry/vsyscall/vsyscall_emu_64.S | 6 +-- + arch/x86/kernel/acpi/wakeup_32.S | 6 +-- + arch/x86/kernel/ftrace_32.S | 6 +-- + arch/x86/kernel/ftrace_64.S | 10 ++--- + arch/x86/kernel/head_32.S | 2 - + arch/x86/kernel/irqflags.S | 4 +- + arch/x86/kernel/relocate_kernel_32.S | 10 ++--- + arch/x86/kernel/relocate_kernel_64.S | 10 ++--- + arch/x86/kernel/sev_verify_cbit.S | 2 - + arch/x86/kernel/verify_cpu.S | 4 +- + arch/x86/kvm/svm/vmenter.S | 2 - + arch/x86/kvm/vmx/vmenter.S | 14 +++---- + arch/x86/lib/atomic64_386_32.S | 2 - + arch/x86/lib/atomic64_cx8_32.S | 16 ++++---- + arch/x86/lib/checksum_32.S | 8 ++-- + arch/x86/lib/clear_page_64.S | 6 +-- + arch/x86/lib/cmpxchg16b_emu.S | 4 +- + arch/x86/lib/cmpxchg8b_emu.S | 4 +- + arch/x86/lib/copy_mc_64.S | 6 +-- + arch/x86/lib/copy_page_64.S | 4 +- + arch/x86/lib/copy_user_64.S | 12 +++--- + arch/x86/lib/csum-copy_64.S | 2 - + arch/x86/lib/getuser.S | 22 +++++------ + arch/x86/lib/hweight.S | 6 +-- + arch/x86/lib/iomap_copy_64.S | 2 - + arch/x86/lib/memcpy_64.S | 12 +++--- + arch/x86/lib/memmove_64.S | 4 +- + arch/x86/lib/memset_64.S | 6 +-- + arch/x86/lib/msr-reg.S | 4 +- + arch/x86/lib/putuser.S | 6 +-- + arch/x86/lib/retpoline.S | 2 - + arch/x86/math-emu/div_Xsig.S | 2 - + arch/x86/math-emu/div_small.S | 2 - + arch/x86/math-emu/mul_Xsig.S | 6 +-- + arch/x86/math-emu/polynom_Xsig.S | 2 - + arch/x86/math-emu/reg_norm.S | 6 +-- + arch/x86/math-emu/reg_round.S | 2 - + arch/x86/math-emu/reg_u_add.S | 2 - + arch/x86/math-emu/reg_u_div.S | 2 - + arch/x86/math-emu/reg_u_mul.S | 2 - + arch/x86/math-emu/reg_u_sub.S | 2 - + arch/x86/math-emu/round_Xsig.S | 4 +- + arch/x86/math-emu/shr_Xsig.S | 8 ++-- + arch/x86/math-emu/wm_shrx.S | 16 ++++---- + arch/x86/mm/mem_encrypt_boot.S | 4 +- + arch/x86/platform/efi/efi_stub_32.S | 2 - + arch/x86/platform/efi/efi_stub_64.S | 2 - + arch/x86/platform/efi/efi_thunk_64.S | 2 - + arch/x86/platform/olpc/xo1-wakeup.S | 6 +-- + arch/x86/power/hibernate_asm_32.S | 4 +- + arch/x86/power/hibernate_asm_64.S | 4 +- + arch/x86/um/checksum_32.S | 4 +- + arch/x86/um/setjmp_32.S | 2 - + arch/x86/um/setjmp_64.S | 2 - + arch/x86/xen/xen-asm.S | 14 +++---- + arch/x86/xen/xen-head.S | 2 - + 103 files changed, 353 insertions(+), 353 deletions(-) + +--- a/arch/x86/boot/compressed/efi_thunk_64.S ++++ b/arch/x86/boot/compressed/efi_thunk_64.S +@@ -89,7 +89,7 @@ SYM_FUNC_START(__efi64_thunk) + + pop %rbx + pop %rbp +- ret ++ RET + SYM_FUNC_END(__efi64_thunk) + + .code32 +--- a/arch/x86/boot/compressed/head_64.S ++++ b/arch/x86/boot/compressed/head_64.S +@@ -786,7 +786,7 @@ SYM_FUNC_START(efi32_pe_entry) + 2: popl %edi // restore callee-save registers + popl %ebx + leave +- ret ++ RET + SYM_FUNC_END(efi32_pe_entry) + + .section ".rodata" +@@ -868,7 +868,7 @@ SYM_FUNC_START(startup32_check_sev_cbit) + popl %ebx + popl %eax + #endif +- ret ++ RET + SYM_FUNC_END(startup32_check_sev_cbit) + + /* +--- a/arch/x86/boot/compressed/mem_encrypt.S ++++ b/arch/x86/boot/compressed/mem_encrypt.S +@@ -58,7 +58,7 @@ SYM_FUNC_START(get_sev_encryption_bit) + + #endif /* CONFIG_AMD_MEM_ENCRYPT */ + +- ret ++ RET + SYM_FUNC_END(get_sev_encryption_bit) + + .code64 +@@ -99,7 +99,7 @@ SYM_FUNC_START(set_sev_encryption_mask) + #endif + + xor %rax, %rax +- ret ++ RET + SYM_FUNC_END(set_sev_encryption_mask) + + .data +--- a/arch/x86/crypto/aegis128-aesni-asm.S ++++ b/arch/x86/crypto/aegis128-aesni-asm.S +@@ -122,7 +122,7 @@ SYM_FUNC_START_LOCAL(__load_partial) + pxor T0, MSG + + .Lld_partial_8: +- ret ++ RET + SYM_FUNC_END(__load_partial) + + /* +@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(__store_partial) + mov %r10b, (%r9) + + .Lst_partial_1: +- ret ++ RET + SYM_FUNC_END(__store_partial) + + /* +@@ -225,7 +225,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ini + movdqu STATE4, 0x40(STATEP) + + FRAME_END +- ret ++ RET + SYM_FUNC_END(crypto_aegis128_aesni_init) + + /* +@@ -337,7 +337,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lad_out_1: + movdqu STATE4, 0x00(STATEP) +@@ -346,7 +346,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lad_out_2: + movdqu STATE3, 0x00(STATEP) +@@ -355,7 +355,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lad_out_3: + movdqu STATE2, 0x00(STATEP) +@@ -364,7 +364,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lad_out_4: + movdqu STATE1, 0x00(STATEP) +@@ -373,11 +373,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lad_out: + FRAME_END +- ret ++ RET + SYM_FUNC_END(crypto_aegis128_aesni_ad) + + .macro encrypt_block a s0 s1 s2 s3 s4 i +@@ -452,7 +452,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lenc_out_1: + movdqu STATE3, 0x00(STATEP) +@@ -461,7 +461,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lenc_out_2: + movdqu STATE2, 0x00(STATEP) +@@ -470,7 +470,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lenc_out_3: + movdqu STATE1, 0x00(STATEP) +@@ -479,7 +479,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lenc_out_4: + movdqu STATE0, 0x00(STATEP) +@@ -488,11 +488,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Lenc_out: + FRAME_END +- ret ++ RET + SYM_FUNC_END(crypto_aegis128_aesni_enc) + + /* +@@ -532,7 +532,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc + movdqu STATE3, 0x40(STATEP) + + FRAME_END +- ret ++ RET + SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) + + .macro decrypt_block a s0 s1 s2 s3 s4 i +@@ -606,7 +606,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Ldec_out_1: + movdqu STATE3, 0x00(STATEP) +@@ -615,7 +615,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Ldec_out_2: + movdqu STATE2, 0x00(STATEP) +@@ -624,7 +624,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Ldec_out_3: + movdqu STATE1, 0x00(STATEP) +@@ -633,7 +633,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Ldec_out_4: + movdqu STATE0, 0x00(STATEP) +@@ -642,11 +642,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END +- ret ++ RET + + .Ldec_out: + FRAME_END +- ret ++ RET + SYM_FUNC_END(crypto_aegis128_aesni_dec) + + /* +@@ -696,7 +696,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec + movdqu STATE3, 0x40(STATEP) + + FRAME_END +- ret ++ RET + SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) + + /* +@@ -743,5 +743,5 @@ SYM_FUNC_START(crypto_aegis128_aesni_fin + movdqu MSG, (%rsi) + + FRAME_END +- ret ++ RET + SYM_FUNC_END(crypto_aegis128_aesni_final) +--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S ++++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +@@ -525,7 +525,7 @@ ddq_add_8: + /* return updated IV */ + vpshufb xbyteswap, xcounter, xcounter + vmovdqu xcounter, (p_iv) +- ret ++ RET + .endm + + /* +--- a/arch/x86/crypto/aesni-intel_asm.S ++++ b/arch/x86/crypto/aesni-intel_asm.S +@@ -1598,7 +1598,7 @@ SYM_FUNC_START(aesni_gcm_dec) + GCM_ENC_DEC dec + GCM_COMPLETE arg10, arg11 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_dec) + + +@@ -1687,7 +1687,7 @@ SYM_FUNC_START(aesni_gcm_enc) + + GCM_COMPLETE arg10, arg11 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_enc) + + /***************************************************************************** +@@ -1705,7 +1705,7 @@ SYM_FUNC_START(aesni_gcm_init) + FUNC_SAVE + GCM_INIT %arg3, %arg4,%arg5, %arg6 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_init) + + /***************************************************************************** +@@ -1720,7 +1720,7 @@ SYM_FUNC_START(aesni_gcm_enc_update) + FUNC_SAVE + GCM_ENC_DEC enc + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_enc_update) + + /***************************************************************************** +@@ -1735,7 +1735,7 @@ SYM_FUNC_START(aesni_gcm_dec_update) + FUNC_SAVE + GCM_ENC_DEC dec + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_dec_update) + + /***************************************************************************** +@@ -1750,7 +1750,7 @@ SYM_FUNC_START(aesni_gcm_finalize) + FUNC_SAVE + GCM_COMPLETE %arg3 %arg4 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_finalize) + + #endif +@@ -1766,7 +1766,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a + pxor %xmm1, %xmm0 + movaps %xmm0, (TKEYP) + add $0x10, TKEYP +- ret ++ RET + SYM_FUNC_END(_key_expansion_256a) + SYM_FUNC_END_ALIAS(_key_expansion_128) + +@@ -1791,7 +1791,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192a + shufps $0b01001110, %xmm2, %xmm1 + movaps %xmm1, 0x10(TKEYP) + add $0x20, TKEYP +- ret ++ RET + SYM_FUNC_END(_key_expansion_192a) + + SYM_FUNC_START_LOCAL(_key_expansion_192b) +@@ -1810,7 +1810,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192b + + movaps %xmm0, (TKEYP) + add $0x10, TKEYP +- ret ++ RET + SYM_FUNC_END(_key_expansion_192b) + + SYM_FUNC_START_LOCAL(_key_expansion_256b) +@@ -1822,7 +1822,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b + pxor %xmm1, %xmm2 + movaps %xmm2, (TKEYP) + add $0x10, TKEYP +- ret ++ RET + SYM_FUNC_END(_key_expansion_256b) + + /* +@@ -1937,7 +1937,7 @@ SYM_FUNC_START(aesni_set_key) + popl KEYP + #endif + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_set_key) + + /* +@@ -1961,7 +1961,7 @@ SYM_FUNC_START(aesni_enc) + popl KEYP + #endif + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_enc) + + /* +@@ -2018,7 +2018,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc1) + aesenc KEY, STATE + movaps 0x70(TKEYP), KEY + aesenclast KEY, STATE +- ret ++ RET + SYM_FUNC_END(_aesni_enc1) + + /* +@@ -2126,7 +2126,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc4) + aesenclast KEY, STATE2 + aesenclast KEY, STATE3 + aesenclast KEY, STATE4 +- ret ++ RET + SYM_FUNC_END(_aesni_enc4) + + /* +@@ -2151,7 +2151,7 @@ SYM_FUNC_START(aesni_dec) + popl KEYP + #endif + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_dec) + + /* +@@ -2208,7 +2208,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec1) + aesdec KEY, STATE + movaps 0x70(TKEYP), KEY + aesdeclast KEY, STATE +- ret ++ RET + SYM_FUNC_END(_aesni_dec1) + + /* +@@ -2316,7 +2316,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec4) + aesdeclast KEY, STATE2 + aesdeclast KEY, STATE3 + aesdeclast KEY, STATE4 +- ret ++ RET + SYM_FUNC_END(_aesni_dec4) + + /* +@@ -2376,7 +2376,7 @@ SYM_FUNC_START(aesni_ecb_enc) + popl LEN + #endif + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_ecb_enc) + + /* +@@ -2437,7 +2437,7 @@ SYM_FUNC_START(aesni_ecb_dec) + popl LEN + #endif + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_ecb_dec) + + /* +@@ -2481,7 +2481,7 @@ SYM_FUNC_START(aesni_cbc_enc) + popl IVP + #endif + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_cbc_enc) + + /* +@@ -2574,7 +2574,7 @@ SYM_FUNC_START(aesni_cbc_dec) + popl IVP + #endif + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_cbc_dec) + + #ifdef __x86_64__ +@@ -2602,7 +2602,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc_init) + mov $1, TCTR_LOW + movq TCTR_LOW, INC + movq CTR, TCTR_LOW +- ret ++ RET + SYM_FUNC_END(_aesni_inc_init) + + /* +@@ -2630,7 +2630,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc) + .Linc_low: + movaps CTR, IV + pshufb BSWAP_MASK, IV +- ret ++ RET + SYM_FUNC_END(_aesni_inc) + + /* +@@ -2693,7 +2693,7 @@ SYM_FUNC_START(aesni_ctr_enc) + movups IV, (IVP) + .Lctr_enc_just_ret: + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_ctr_enc) + + /* +@@ -2778,7 +2778,7 @@ SYM_FUNC_START(aesni_xts_encrypt) + movups IV, (IVP) + + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_xts_encrypt) + + /* +@@ -2846,7 +2846,7 @@ SYM_FUNC_START(aesni_xts_decrypt) + movups IV, (IVP) + + FRAME_END +- ret ++ RET + SYM_FUNC_END(aesni_xts_decrypt) + + #endif +--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S ++++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S +@@ -1777,7 +1777,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen2) + FUNC_SAVE + INIT GHASH_MUL_AVX, PRECOMPUTE_AVX + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_init_avx_gen2) + + ############################################################################### +@@ -1798,15 +1798,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_ + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11 + FUNC_RESTORE +- ret ++ RET + key_128_enc_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9 + FUNC_RESTORE +- ret ++ RET + key_256_enc_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2) + + ############################################################################### +@@ -1827,15 +1827,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_ + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11 + FUNC_RESTORE +- ret ++ RET + key_128_dec_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9 + FUNC_RESTORE +- ret ++ RET + key_256_dec_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2) + + ############################################################################### +@@ -1856,15 +1856,15 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_ge + # must be 192 + GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4 + FUNC_RESTORE +- ret ++ RET + key_128_finalize: + GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4 + FUNC_RESTORE +- ret ++ RET + key_256_finalize: + GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) + + ############################################################################### +@@ -2745,7 +2745,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen4) + FUNC_SAVE + INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_init_avx_gen4) + + ############################################################################### +@@ -2766,15 +2766,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_ + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11 + FUNC_RESTORE +- ret ++ RET + key_128_enc_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9 + FUNC_RESTORE +- ret ++ RET + key_256_enc_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4) + + ############################################################################### +@@ -2795,15 +2795,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_ + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11 + FUNC_RESTORE +- ret ++ RET + key_128_dec_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9 + FUNC_RESTORE +- ret ++ RET + key_256_dec_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4) + + ############################################################################### +@@ -2824,13 +2824,13 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_ge + # must be 192 + GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4 + FUNC_RESTORE +- ret ++ RET + key_128_finalize4: + GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4 + FUNC_RESTORE +- ret ++ RET + key_256_finalize4: + GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 + FUNC_RESTORE +- ret ++ RET + SYM_FUNC_END(aesni_gcm_finalize_avx_gen4) +--- a/arch/x86/crypto/blake2s-core.S ++++ b/arch/x86/crypto/blake2s-core.S +@@ -171,7 +171,7 @@ SYM_FUNC_START(blake2s_compress_ssse3) + movdqu %xmm1,0x10(%rdi) + movdqu %xmm14,0x20(%rdi) + .Lendofloop: +- ret ++ RET + SYM_FUNC_END(blake2s_compress_ssse3) + + #ifdef CONFIG_AS_AVX512 +@@ -251,6 +251,6 @@ SYM_FUNC_START(blake2s_compress_avx512) + vmovdqu %xmm1,0x10(%rdi) + vmovdqu %xmm4,0x20(%rdi) + vzeroupper +- retq ++ RET + SYM_FUNC_END(blake2s_compress_avx512) + #endif /* CONFIG_AS_AVX512 */ +--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S ++++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S +@@ -135,10 +135,10 @@ SYM_FUNC_START(__blowfish_enc_blk) + jnz .L__enc_xor; + + write_block(); +- ret; ++ RET; + .L__enc_xor: + xor_block(); +- ret; ++ RET; + SYM_FUNC_END(__blowfish_enc_blk) + + SYM_FUNC_START(blowfish_dec_blk) +@@ -170,7 +170,7 @@ SYM_FUNC_START(blowfish_dec_blk) + + movq %r11, %r12; + +- ret; ++ RET; + SYM_FUNC_END(blowfish_dec_blk) + + /********************************************************************** +@@ -322,14 +322,14 @@ SYM_FUNC_START(__blowfish_enc_blk_4way) + + popq %rbx; + popq %r12; +- ret; ++ RET; + + .L__enc_xor4: + xor_block4(); + + popq %rbx; + popq %r12; +- ret; ++ RET; + SYM_FUNC_END(__blowfish_enc_blk_4way) + + SYM_FUNC_START(blowfish_dec_blk_4way) +@@ -364,5 +364,5 @@ SYM_FUNC_START(blowfish_dec_blk_4way) + popq %rbx; + popq %r12; + +- ret; ++ RET; + SYM_FUNC_END(blowfish_dec_blk_4way) +--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S ++++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S +@@ -193,7 +193,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_ + roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, + %rcx, (%r9)); +- ret; ++ RET; + SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) + + .align 8 +@@ -201,7 +201,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_ + roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3, + %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, + %rax, (%r9)); +- ret; ++ RET; + SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) + + /* +@@ -787,7 +787,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk1 + %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); + + FRAME_END +- ret; ++ RET; + + .align 8 + .Lenc_max32: +@@ -874,7 +874,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk1 + %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); + + FRAME_END +- ret; ++ RET; + + .align 8 + .Ldec_max32: +@@ -915,7 +915,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way) + %xmm8, %rsi); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_ecb_enc_16way) + + SYM_FUNC_START(camellia_ecb_dec_16way) +@@ -945,7 +945,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way) + %xmm8, %rsi); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_ecb_dec_16way) + + SYM_FUNC_START(camellia_cbc_dec_16way) +@@ -996,7 +996,7 @@ SYM_FUNC_START(camellia_cbc_dec_16way) + %xmm8, %rsi); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_cbc_dec_16way) + + #define inc_le128(x, minus_one, tmp) \ +@@ -1109,7 +1109,7 @@ SYM_FUNC_START(camellia_ctr_16way) + %xmm8, %rsi); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_ctr_16way) + + #define gf128mul_x_ble(iv, mask, tmp) \ +@@ -1253,7 +1253,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_ + %xmm8, %rsi); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_xts_crypt_16way) + + SYM_FUNC_START(camellia_xts_enc_16way) +--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S ++++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +@@ -227,7 +227,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_ + roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, + %rcx, (%r9)); +- ret; ++ RET; + SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) + + .align 8 +@@ -235,7 +235,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_ + roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, + %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, + %rax, (%r9)); +- ret; ++ RET; + SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) + + /* +@@ -825,7 +825,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk3 + %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); + + FRAME_END +- ret; ++ RET; + + .align 8 + .Lenc_max32: +@@ -912,7 +912,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk3 + %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); + + FRAME_END +- ret; ++ RET; + + .align 8 + .Ldec_max32: +@@ -957,7 +957,7 @@ SYM_FUNC_START(camellia_ecb_enc_32way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_ecb_enc_32way) + + SYM_FUNC_START(camellia_ecb_dec_32way) +@@ -991,7 +991,7 @@ SYM_FUNC_START(camellia_ecb_dec_32way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_ecb_dec_32way) + + SYM_FUNC_START(camellia_cbc_dec_32way) +@@ -1059,7 +1059,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_cbc_dec_32way) + + #define inc_le128(x, minus_one, tmp) \ +@@ -1199,7 +1199,7 @@ SYM_FUNC_START(camellia_ctr_32way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_ctr_32way) + + #define gf128mul_x_ble(iv, mask, tmp) \ +@@ -1366,7 +1366,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_ + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(camellia_xts_crypt_32way) + + SYM_FUNC_START(camellia_xts_enc_32way) +--- a/arch/x86/crypto/camellia-x86_64-asm_64.S ++++ b/arch/x86/crypto/camellia-x86_64-asm_64.S +@@ -213,13 +213,13 @@ SYM_FUNC_START(__camellia_enc_blk) + enc_outunpack(mov, RT1); + + movq RR12, %r12; +- ret; ++ RET; + + .L__enc_xor: + enc_outunpack(xor, RT1); + + movq RR12, %r12; +- ret; ++ RET; + SYM_FUNC_END(__camellia_enc_blk) + + SYM_FUNC_START(camellia_dec_blk) +@@ -257,7 +257,7 @@ SYM_FUNC_START(camellia_dec_blk) + dec_outunpack(); + + movq RR12, %r12; +- ret; ++ RET; + SYM_FUNC_END(camellia_dec_blk) + + /********************************************************************** +@@ -448,14 +448,14 @@ SYM_FUNC_START(__camellia_enc_blk_2way) + + movq RR12, %r12; + popq %rbx; +- ret; ++ RET; + + .L__enc2_xor: + enc_outunpack2(xor, RT2); + + movq RR12, %r12; + popq %rbx; +- ret; ++ RET; + SYM_FUNC_END(__camellia_enc_blk_2way) + + SYM_FUNC_START(camellia_dec_blk_2way) +@@ -495,5 +495,5 @@ SYM_FUNC_START(camellia_dec_blk_2way) + + movq RR12, %r12; + movq RXOR, %rbx; +- ret; ++ RET; + SYM_FUNC_END(camellia_dec_blk_2way) +--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S ++++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +@@ -279,7 +279,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) + outunpack_blocks(RR3, RL3, RTMP, RX, RKM); + outunpack_blocks(RR4, RL4, RTMP, RX, RKM); + +- ret; ++ RET; + SYM_FUNC_END(__cast5_enc_blk16) + + .align 16 +@@ -352,7 +352,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16) + outunpack_blocks(RR3, RL3, RTMP, RX, RKM); + outunpack_blocks(RR4, RL4, RTMP, RX, RKM); + +- ret; ++ RET; + + .L__skip_dec: + vpsrldq $4, RKR, RKR; +@@ -393,7 +393,7 @@ SYM_FUNC_START(cast5_ecb_enc_16way) + + popq %r15; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast5_ecb_enc_16way) + + SYM_FUNC_START(cast5_ecb_dec_16way) +@@ -431,7 +431,7 @@ SYM_FUNC_START(cast5_ecb_dec_16way) + + popq %r15; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast5_ecb_dec_16way) + + SYM_FUNC_START(cast5_cbc_dec_16way) +@@ -483,7 +483,7 @@ SYM_FUNC_START(cast5_cbc_dec_16way) + popq %r15; + popq %r12; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast5_cbc_dec_16way) + + SYM_FUNC_START(cast5_ctr_16way) +@@ -559,5 +559,5 @@ SYM_FUNC_START(cast5_ctr_16way) + popq %r15; + popq %r12; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast5_ctr_16way) +--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S ++++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +@@ -291,7 +291,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8) + outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + +- ret; ++ RET; + SYM_FUNC_END(__cast6_enc_blk8) + + .align 8 +@@ -338,7 +338,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8) + outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + +- ret; ++ RET; + SYM_FUNC_END(__cast6_dec_blk8) + + SYM_FUNC_START(cast6_ecb_enc_8way) +@@ -361,7 +361,7 @@ SYM_FUNC_START(cast6_ecb_enc_8way) + + popq %r15; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast6_ecb_enc_8way) + + SYM_FUNC_START(cast6_ecb_dec_8way) +@@ -384,7 +384,7 @@ SYM_FUNC_START(cast6_ecb_dec_8way) + + popq %r15; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast6_ecb_dec_8way) + + SYM_FUNC_START(cast6_cbc_dec_8way) +@@ -410,7 +410,7 @@ SYM_FUNC_START(cast6_cbc_dec_8way) + popq %r15; + popq %r12; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast6_cbc_dec_8way) + + SYM_FUNC_START(cast6_ctr_8way) +@@ -438,7 +438,7 @@ SYM_FUNC_START(cast6_ctr_8way) + popq %r15; + popq %r12; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast6_ctr_8way) + + SYM_FUNC_START(cast6_xts_enc_8way) +@@ -465,7 +465,7 @@ SYM_FUNC_START(cast6_xts_enc_8way) + + popq %r15; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast6_xts_enc_8way) + + SYM_FUNC_START(cast6_xts_dec_8way) +@@ -492,5 +492,5 @@ SYM_FUNC_START(cast6_xts_dec_8way) + + popq %r15; + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(cast6_xts_dec_8way) +--- a/arch/x86/crypto/chacha-avx2-x86_64.S ++++ b/arch/x86/crypto/chacha-avx2-x86_64.S +@@ -193,7 +193,7 @@ SYM_FUNC_START(chacha_2block_xor_avx2) + + .Ldone2: + vzeroupper +- ret ++ RET + + .Lxorpart2: + # xor remaining bytes from partial register into output +@@ -498,7 +498,7 @@ SYM_FUNC_START(chacha_4block_xor_avx2) + + .Ldone4: + vzeroupper +- ret ++ RET + + .Lxorpart4: + # xor remaining bytes from partial register into output +@@ -992,7 +992,7 @@ SYM_FUNC_START(chacha_8block_xor_avx2) + .Ldone8: + vzeroupper + lea -8(%r10),%rsp +- ret ++ RET + + .Lxorpart8: + # xor remaining bytes from partial register into output +--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S ++++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S +@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_2block_xor_avx512v + + .Ldone2: + vzeroupper +- ret ++ RET + + .Lxorpart2: + # xor remaining bytes from partial register into output +@@ -432,7 +432,7 @@ SYM_FUNC_START(chacha_4block_xor_avx512v + + .Ldone4: + vzeroupper +- ret ++ RET + + .Lxorpart4: + # xor remaining bytes from partial register into output +@@ -812,7 +812,7 @@ SYM_FUNC_START(chacha_8block_xor_avx512v + + .Ldone8: + vzeroupper +- ret ++ RET + + .Lxorpart8: + # xor remaining bytes from partial register into output +--- a/arch/x86/crypto/chacha-ssse3-x86_64.S ++++ b/arch/x86/crypto/chacha-ssse3-x86_64.S +@@ -108,7 +108,7 @@ SYM_FUNC_START_LOCAL(chacha_permute) + sub $2,%r8d + jnz .Ldoubleround + +- ret ++ RET + SYM_FUNC_END(chacha_permute) + + SYM_FUNC_START(chacha_block_xor_ssse3) +@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_block_xor_ssse3) + + .Ldone: + FRAME_END +- ret ++ RET + + .Lxorpart: + # xor remaining bytes from partial register into output +@@ -217,7 +217,7 @@ SYM_FUNC_START(hchacha_block_ssse3) + movdqu %xmm3,0x10(%rsi) + + FRAME_END +- ret ++ RET + SYM_FUNC_END(hchacha_block_ssse3) + + SYM_FUNC_START(chacha_4block_xor_ssse3) +@@ -762,7 +762,7 @@ SYM_FUNC_START(chacha_4block_xor_ssse3) + + .Ldone4: + lea -8(%r10),%rsp +- ret ++ RET + + .Lxorpart4: + # xor remaining bytes from partial register into output +--- a/arch/x86/crypto/crc32-pclmul_asm.S ++++ b/arch/x86/crypto/crc32-pclmul_asm.S +@@ -236,5 +236,5 @@ fold_64: + pxor %xmm2, %xmm1 + pextrd $0x01, %xmm1, %eax + +- ret ++ RET + SYM_FUNC_END(crc32_pclmul_le_16) +--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S ++++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +@@ -309,7 +309,7 @@ do_return: + popq %rsi + popq %rdi + popq %rbx +- ret ++ RET + SYM_FUNC_END(crc_pcl) + + .section .rodata, "a", @progbits +--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S ++++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S +@@ -257,7 +257,7 @@ SYM_FUNC_START(crc_t10dif_pcl) + # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0. + + pextrw $0, %xmm0, %eax +- ret ++ RET + + .align 16 + .Lless_than_256_bytes: +--- a/arch/x86/crypto/des3_ede-asm_64.S ++++ b/arch/x86/crypto/des3_ede-asm_64.S +@@ -243,7 +243,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk + popq %r12; + popq %rbx; + +- ret; ++ RET; + SYM_FUNC_END(des3_ede_x86_64_crypt_blk) + + /*********************************************************************** +@@ -528,7 +528,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk + popq %r12; + popq %rbx; + +- ret; ++ RET; + SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way) + + .section .rodata, "a", @progbits +--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S ++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S +@@ -85,7 +85,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_bl + psrlq $1, T2 + pxor T2, T1 + pxor T1, DATA +- ret ++ RET + SYM_FUNC_END(__clmul_gf128mul_ble) + + /* void clmul_ghash_mul(char *dst, const u128 *shash) */ +@@ -99,7 +99,7 @@ SYM_FUNC_START(clmul_ghash_mul) + pshufb BSWAP, DATA + movups DATA, (%rdi) + FRAME_END +- ret ++ RET + SYM_FUNC_END(clmul_ghash_mul) + + /* +@@ -128,5 +128,5 @@ SYM_FUNC_START(clmul_ghash_update) + movups DATA, (%rdi) + .Lupdate_just_ret: + FRAME_END +- ret ++ RET + SYM_FUNC_END(clmul_ghash_update) +--- a/arch/x86/crypto/nh-avx2-x86_64.S ++++ b/arch/x86/crypto/nh-avx2-x86_64.S +@@ -153,5 +153,5 @@ SYM_FUNC_START(nh_avx2) + vpaddq T1, T0, T0 + vpaddq T4, T0, T0 + vmovdqu T0, (HASH) +- ret ++ RET + SYM_FUNC_END(nh_avx2) +--- a/arch/x86/crypto/nh-sse2-x86_64.S ++++ b/arch/x86/crypto/nh-sse2-x86_64.S +@@ -119,5 +119,5 @@ SYM_FUNC_START(nh_sse2) + paddq PASS2_SUMS, T1 + movdqu T0, 0x00(HASH) + movdqu T1, 0x10(HASH) +- ret ++ RET + SYM_FUNC_END(nh_sse2) +--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S ++++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +@@ -605,7 +605,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_ + write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); + +- ret; ++ RET; + SYM_FUNC_END(__serpent_enc_blk8_avx) + + .align 8 +@@ -659,7 +659,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_ + write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); + +- ret; ++ RET; + SYM_FUNC_END(__serpent_dec_blk8_avx) + + SYM_FUNC_START(serpent_ecb_enc_8way_avx) +@@ -677,7 +677,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx) + store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_ecb_enc_8way_avx) + + SYM_FUNC_START(serpent_ecb_dec_8way_avx) +@@ -695,7 +695,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx) + store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_ecb_dec_8way_avx) + + SYM_FUNC_START(serpent_cbc_dec_8way_avx) +@@ -713,7 +713,7 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx) + store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_cbc_dec_8way_avx) + + SYM_FUNC_START(serpent_ctr_8way_avx) +@@ -733,7 +733,7 @@ SYM_FUNC_START(serpent_ctr_8way_avx) + store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_ctr_8way_avx) + + SYM_FUNC_START(serpent_xts_enc_8way_avx) +@@ -755,7 +755,7 @@ SYM_FUNC_START(serpent_xts_enc_8way_avx) + store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_xts_enc_8way_avx) + + SYM_FUNC_START(serpent_xts_dec_8way_avx) +@@ -777,5 +777,5 @@ SYM_FUNC_START(serpent_xts_dec_8way_avx) + store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_xts_dec_8way_avx) +--- a/arch/x86/crypto/serpent-avx2-asm_64.S ++++ b/arch/x86/crypto/serpent-avx2-asm_64.S +@@ -611,7 +611,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16 + write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); + +- ret; ++ RET; + SYM_FUNC_END(__serpent_enc_blk16) + + .align 8 +@@ -665,7 +665,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk16 + write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); + +- ret; ++ RET; + SYM_FUNC_END(__serpent_dec_blk16) + + SYM_FUNC_START(serpent_ecb_enc_16way) +@@ -687,7 +687,7 @@ SYM_FUNC_START(serpent_ecb_enc_16way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_ecb_enc_16way) + + SYM_FUNC_START(serpent_ecb_dec_16way) +@@ -709,7 +709,7 @@ SYM_FUNC_START(serpent_ecb_dec_16way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_ecb_dec_16way) + + SYM_FUNC_START(serpent_cbc_dec_16way) +@@ -732,7 +732,7 @@ SYM_FUNC_START(serpent_cbc_dec_16way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_cbc_dec_16way) + + SYM_FUNC_START(serpent_ctr_16way) +@@ -757,7 +757,7 @@ SYM_FUNC_START(serpent_ctr_16way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_ctr_16way) + + SYM_FUNC_START(serpent_xts_enc_16way) +@@ -783,7 +783,7 @@ SYM_FUNC_START(serpent_xts_enc_16way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_xts_enc_16way) + + SYM_FUNC_START(serpent_xts_dec_16way) +@@ -809,5 +809,5 @@ SYM_FUNC_START(serpent_xts_dec_16way) + vzeroupper; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(serpent_xts_dec_16way) +--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S ++++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S +@@ -553,12 +553,12 @@ SYM_FUNC_START(__serpent_enc_blk_4way) + + write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + +- ret; ++ RET; + + .L__enc_xor4: + xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + +- ret; ++ RET; + SYM_FUNC_END(__serpent_enc_blk_4way) + + SYM_FUNC_START(serpent_dec_blk_4way) +@@ -612,5 +612,5 @@ SYM_FUNC_START(serpent_dec_blk_4way) + movl arg_dst(%esp), %eax; + write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); + +- ret; ++ RET; + SYM_FUNC_END(serpent_dec_blk_4way) +--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S ++++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +@@ -675,13 +675,13 @@ SYM_FUNC_START(__serpent_enc_blk_8way) + write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + +- ret; ++ RET; + + .L__enc_xor8: + xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + +- ret; ++ RET; + SYM_FUNC_END(__serpent_enc_blk_8way) + + SYM_FUNC_START(serpent_dec_blk_8way) +@@ -735,5 +735,5 @@ SYM_FUNC_START(serpent_dec_blk_8way) + write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); + +- ret; ++ RET; + SYM_FUNC_END(serpent_dec_blk_8way) +--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S ++++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S +@@ -674,7 +674,7 @@ _loop3: + pop %r12 + pop %rbx + +- ret ++ RET + + SYM_FUNC_END(\name) + .endm +--- a/arch/x86/crypto/sha1_ni_asm.S ++++ b/arch/x86/crypto/sha1_ni_asm.S +@@ -290,7 +290,7 @@ SYM_FUNC_START(sha1_ni_transform) + .Ldone_hash: + mov RSPSAVE, %rsp + +- ret ++ RET + SYM_FUNC_END(sha1_ni_transform) + + .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +--- a/arch/x86/crypto/sha1_ssse3_asm.S ++++ b/arch/x86/crypto/sha1_ssse3_asm.S +@@ -99,7 +99,7 @@ + pop %rbp + pop %r12 + pop %rbx +- ret ++ RET + + SYM_FUNC_END(\name) + .endm +--- a/arch/x86/crypto/sha256-avx-asm.S ++++ b/arch/x86/crypto/sha256-avx-asm.S +@@ -458,7 +458,7 @@ done_hash: + popq %r13 + popq %r12 + popq %rbx +- ret ++ RET + SYM_FUNC_END(sha256_transform_avx) + + .section .rodata.cst256.K256, "aM", @progbits, 256 +--- a/arch/x86/crypto/sha256-avx2-asm.S ++++ b/arch/x86/crypto/sha256-avx2-asm.S +@@ -711,7 +711,7 @@ done_hash: + popq %r13 + popq %r12 + popq %rbx +- ret ++ RET + SYM_FUNC_END(sha256_transform_rorx) + + .section .rodata.cst512.K256, "aM", @progbits, 512 +--- a/arch/x86/crypto/sha256-ssse3-asm.S ++++ b/arch/x86/crypto/sha256-ssse3-asm.S +@@ -472,7 +472,7 @@ done_hash: + popq %r12 + popq %rbx + +- ret ++ RET + SYM_FUNC_END(sha256_transform_ssse3) + + .section .rodata.cst256.K256, "aM", @progbits, 256 +--- a/arch/x86/crypto/sha256_ni_asm.S ++++ b/arch/x86/crypto/sha256_ni_asm.S +@@ -326,7 +326,7 @@ SYM_FUNC_START(sha256_ni_transform) + + .Ldone_hash: + +- ret ++ RET + SYM_FUNC_END(sha256_ni_transform) + + .section .rodata.cst256.K256, "aM", @progbits, 256 +--- a/arch/x86/crypto/sha512-avx-asm.S ++++ b/arch/x86/crypto/sha512-avx-asm.S +@@ -364,7 +364,7 @@ updateblock: + mov frame_RSPSAVE(%rsp), %rsp + + nowork: +- ret ++ RET + SYM_FUNC_END(sha512_transform_avx) + + ######################################################################## +--- a/arch/x86/crypto/sha512-avx2-asm.S ++++ b/arch/x86/crypto/sha512-avx2-asm.S +@@ -681,7 +681,7 @@ done_hash: + + # Restore Stack Pointer + mov frame_RSPSAVE(%rsp), %rsp +- ret ++ RET + SYM_FUNC_END(sha512_transform_rorx) + + ######################################################################## +--- a/arch/x86/crypto/sha512-ssse3-asm.S ++++ b/arch/x86/crypto/sha512-ssse3-asm.S +@@ -366,7 +366,7 @@ updateblock: + mov frame_RSPSAVE(%rsp), %rsp + + nowork: +- ret ++ RET + SYM_FUNC_END(sha512_transform_ssse3) + + ######################################################################## +--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S ++++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +@@ -272,7 +272,7 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8) + outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); + +- ret; ++ RET; + SYM_FUNC_END(__twofish_enc_blk8) + + .align 8 +@@ -312,7 +312,7 @@ SYM_FUNC_START_LOCAL(__twofish_dec_blk8) + outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); + +- ret; ++ RET; + SYM_FUNC_END(__twofish_dec_blk8) + + SYM_FUNC_START(twofish_ecb_enc_8way) +@@ -332,7 +332,7 @@ SYM_FUNC_START(twofish_ecb_enc_8way) + store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(twofish_ecb_enc_8way) + + SYM_FUNC_START(twofish_ecb_dec_8way) +@@ -352,7 +352,7 @@ SYM_FUNC_START(twofish_ecb_dec_8way) + store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(twofish_ecb_dec_8way) + + SYM_FUNC_START(twofish_cbc_dec_8way) +@@ -377,7 +377,7 @@ SYM_FUNC_START(twofish_cbc_dec_8way) + popq %r12; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(twofish_cbc_dec_8way) + + SYM_FUNC_START(twofish_ctr_8way) +@@ -404,7 +404,7 @@ SYM_FUNC_START(twofish_ctr_8way) + popq %r12; + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(twofish_ctr_8way) + + SYM_FUNC_START(twofish_xts_enc_8way) +@@ -428,7 +428,7 @@ SYM_FUNC_START(twofish_xts_enc_8way) + store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(twofish_xts_enc_8way) + + SYM_FUNC_START(twofish_xts_dec_8way) +@@ -452,5 +452,5 @@ SYM_FUNC_START(twofish_xts_dec_8way) + store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + FRAME_END +- ret; ++ RET; + SYM_FUNC_END(twofish_xts_dec_8way) +--- a/arch/x86/crypto/twofish-i586-asm_32.S ++++ b/arch/x86/crypto/twofish-i586-asm_32.S +@@ -260,7 +260,7 @@ SYM_FUNC_START(twofish_enc_blk) + pop %ebx + pop %ebp + mov $1, %eax +- ret ++ RET + SYM_FUNC_END(twofish_enc_blk) + + SYM_FUNC_START(twofish_dec_blk) +@@ -317,5 +317,5 @@ SYM_FUNC_START(twofish_dec_blk) + pop %ebx + pop %ebp + mov $1, %eax +- ret ++ RET + SYM_FUNC_END(twofish_dec_blk) +--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S ++++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +@@ -258,7 +258,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way) + popq %rbx; + popq %r12; + popq %r13; +- ret; ++ RET; + + .L__enc_xor3: + outunpack_enc3(xor); +@@ -266,7 +266,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way) + popq %rbx; + popq %r12; + popq %r13; +- ret; ++ RET; + SYM_FUNC_END(__twofish_enc_blk_3way) + + SYM_FUNC_START(twofish_dec_blk_3way) +@@ -301,5 +301,5 @@ SYM_FUNC_START(twofish_dec_blk_3way) + popq %rbx; + popq %r12; + popq %r13; +- ret; ++ RET; + SYM_FUNC_END(twofish_dec_blk_3way) +--- a/arch/x86/crypto/twofish-x86_64-asm_64.S ++++ b/arch/x86/crypto/twofish-x86_64-asm_64.S +@@ -252,7 +252,7 @@ SYM_FUNC_START(twofish_enc_blk) + + popq R1 + movl $1,%eax +- ret ++ RET + SYM_FUNC_END(twofish_enc_blk) + + SYM_FUNC_START(twofish_dec_blk) +@@ -304,5 +304,5 @@ SYM_FUNC_START(twofish_dec_blk) + + popq R1 + movl $1,%eax +- ret ++ RET + SYM_FUNC_END(twofish_dec_blk) +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -821,7 +821,7 @@ SYM_FUNC_START(schedule_tail_wrapper) + popl %eax + + FRAME_END +- ret ++ RET + SYM_FUNC_END(schedule_tail_wrapper) + .popsection + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -740,7 +740,7 @@ SYM_FUNC_START(asm_load_gs_index) + 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE + swapgs + FRAME_END +- ret ++ RET + SYM_FUNC_END(asm_load_gs_index) + EXPORT_SYMBOL(asm_load_gs_index) + +@@ -799,7 +799,7 @@ SYM_INNER_LABEL(asm_call_irq_on_stack, S + + /* Restore the previous stack pointer from RBP. */ + leaveq +- ret ++ RET + SYM_FUNC_END(asm_call_on_stack) + + #ifdef CONFIG_XEN_PV +@@ -932,7 +932,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * is needed here. + */ + SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx +- ret ++ RET + + .Lparanoid_entry_checkgs: + /* EBX = 1 -> kernel GSBASE active, no restore required */ +@@ -953,7 +953,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + .Lparanoid_kernel_gsbase: + + FENCE_SWAPGS_KERNEL_ENTRY +- ret ++ RET + SYM_CODE_END(paranoid_entry) + + /* +@@ -1032,7 +1032,7 @@ SYM_CODE_START_LOCAL(error_entry) + movq %rax, %rsp /* switch stack */ + ENCODE_FRAME_POINTER + pushq %r12 +- ret ++ RET + + /* + * There are two places in the kernel that can potentially fault with +@@ -1063,7 +1063,7 @@ SYM_CODE_START_LOCAL(error_entry) + */ + .Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY +- ret ++ RET + + .Lbstep_iret: + /* Fix truncated RIP */ +--- a/arch/x86/entry/thunk_32.S ++++ b/arch/x86/entry/thunk_32.S +@@ -24,7 +24,7 @@ SYM_CODE_START_NOALIGN(\name) + popl %edx + popl %ecx + popl %eax +- ret ++ RET + _ASM_NOKPROBE(\name) + SYM_CODE_END(\name) + .endm +--- a/arch/x86/entry/thunk_64.S ++++ b/arch/x86/entry/thunk_64.S +@@ -55,7 +55,7 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_res + popq %rsi + popq %rdi + popq %rbp +- ret ++ RET + _ASM_NOKPROBE(__thunk_restore) + SYM_CODE_END(__thunk_restore) + #endif +--- a/arch/x86/entry/vdso/vdso32/system_call.S ++++ b/arch/x86/entry/vdso/vdso32/system_call.S +@@ -78,7 +78,7 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L + popl %ecx + CFI_RESTORE ecx + CFI_ADJUST_CFA_OFFSET -4 +- ret ++ RET + CFI_ENDPROC + + .size __kernel_vsyscall,.-__kernel_vsyscall +--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S ++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S +@@ -19,17 +19,17 @@ __vsyscall_page: + + mov $__NR_gettimeofday, %rax + syscall +- ret ++ RET + + .balign 1024, 0xcc + mov $__NR_time, %rax + syscall +- ret ++ RET + + .balign 1024, 0xcc + mov $__NR_getcpu, %rax + syscall +- ret ++ RET + + .balign 4096, 0xcc + +--- a/arch/x86/kernel/acpi/wakeup_32.S ++++ b/arch/x86/kernel/acpi/wakeup_32.S +@@ -60,7 +60,7 @@ save_registers: + popl saved_context_eflags + + movl $ret_point, saved_eip +- ret ++ RET + + + restore_registers: +@@ -70,7 +70,7 @@ restore_registers: + movl saved_context_edi, %edi + pushl saved_context_eflags + popfl +- ret ++ RET + + SYM_CODE_START(do_suspend_lowlevel) + call save_processor_state +@@ -86,7 +86,7 @@ SYM_CODE_START(do_suspend_lowlevel) + ret_point: + call restore_registers + call restore_processor_state +- ret ++ RET + SYM_CODE_END(do_suspend_lowlevel) + + .data +--- a/arch/x86/kernel/ftrace_32.S ++++ b/arch/x86/kernel/ftrace_32.S +@@ -19,7 +19,7 @@ + #endif + + SYM_FUNC_START(__fentry__) +- ret ++ RET + SYM_FUNC_END(__fentry__) + EXPORT_SYMBOL(__fentry__) + +@@ -84,7 +84,7 @@ ftrace_graph_call: + + /* This is weak to keep gas from relaxing the jumps */ + SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) +- ret ++ RET + SYM_CODE_END(ftrace_caller) + + SYM_CODE_START(ftrace_regs_caller) +@@ -177,7 +177,7 @@ SYM_CODE_START(ftrace_graph_caller) + popl %edx + popl %ecx + popl %eax +- ret ++ RET + SYM_CODE_END(ftrace_graph_caller) + + .globl return_to_handler +--- a/arch/x86/kernel/ftrace_64.S ++++ b/arch/x86/kernel/ftrace_64.S +@@ -132,7 +132,7 @@ + #ifdef CONFIG_DYNAMIC_FTRACE + + SYM_FUNC_START(__fentry__) +- retq ++ RET + SYM_FUNC_END(__fentry__) + EXPORT_SYMBOL(__fentry__) + +@@ -170,10 +170,10 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L + + /* + * This is weak to keep gas from relaxing the jumps. +- * It is also used to copy the retq for trampolines. ++ * It is also used to copy the RET for trampolines. + */ + SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) +- retq ++ RET + SYM_FUNC_END(ftrace_epilogue) + + SYM_FUNC_START(ftrace_regs_caller) +@@ -287,7 +287,7 @@ fgraph_trace: + #endif + + SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) +- retq ++ RET + + trace: + /* save_mcount_regs fills in first two parameters */ +@@ -319,7 +319,7 @@ SYM_FUNC_START(ftrace_graph_caller) + + restore_mcount_regs + +- retq ++ RET + SYM_FUNC_END(ftrace_graph_caller) + + SYM_CODE_START(return_to_handler) +--- a/arch/x86/kernel/head_32.S ++++ b/arch/x86/kernel/head_32.S +@@ -354,7 +354,7 @@ setup_once: + #endif + + andl $0,setup_once_ref /* Once is enough, thanks */ +- ret ++ RET + + SYM_FUNC_START(early_idt_handler_array) + # 36(%esp) %eflags +--- a/arch/x86/kernel/irqflags.S ++++ b/arch/x86/kernel/irqflags.S +@@ -10,7 +10,7 @@ + SYM_FUNC_START(native_save_fl) + pushf + pop %_ASM_AX +- ret ++ RET + SYM_FUNC_END(native_save_fl) + EXPORT_SYMBOL(native_save_fl) + +@@ -21,6 +21,6 @@ EXPORT_SYMBOL(native_save_fl) + SYM_FUNC_START(native_restore_fl) + push %_ASM_ARG1 + popf +- ret ++ RET + SYM_FUNC_END(native_restore_fl) + EXPORT_SYMBOL(native_restore_fl) +--- a/arch/x86/kernel/relocate_kernel_32.S ++++ b/arch/x86/kernel/relocate_kernel_32.S +@@ -91,7 +91,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + movl %edi, %eax + addl $(identity_mapped - relocate_kernel), %eax + pushl %eax +- ret ++ RET + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -159,7 +159,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + xorl %edx, %edx + xorl %esi, %esi + xorl %ebp, %ebp +- ret ++ RET + 1: + popl %edx + movl CP_PA_SWAP_PAGE(%edi), %esp +@@ -190,7 +190,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + movl %edi, %eax + addl $(virtual_mapped - relocate_kernel), %eax + pushl %eax +- ret ++ RET + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -208,7 +208,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map + popl %edi + popl %esi + popl %ebx +- ret ++ RET + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -271,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + popl %edi + popl %ebx + popl %ebp +- ret ++ RET + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size +--- a/arch/x86/kernel/relocate_kernel_64.S ++++ b/arch/x86/kernel/relocate_kernel_64.S +@@ -104,7 +104,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + /* jump to identity mapped page */ + addq $(identity_mapped - relocate_kernel), %r8 + pushq %r8 +- ret ++ RET + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -191,7 +191,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + xorl %r14d, %r14d + xorl %r15d, %r15d + +- ret ++ RET + + 1: + popq %rdx +@@ -210,7 +210,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma + call swap_pages + movq $virtual_mapped, %rax + pushq %rax +- ret ++ RET + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -231,7 +231,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map + popq %r12 + popq %rbp + popq %rbx +- ret ++ RET + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -288,7 +288,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + lea PAGE_SIZE(%rax), %rsi + jmp 0b + 3: +- ret ++ RET + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size +--- a/arch/x86/kernel/sev_verify_cbit.S ++++ b/arch/x86/kernel/sev_verify_cbit.S +@@ -85,5 +85,5 @@ SYM_FUNC_START(sev_verify_cbit) + #endif + /* Return page-table pointer */ + movq %rdi, %rax +- ret ++ RET + SYM_FUNC_END(sev_verify_cbit) +--- a/arch/x86/kernel/verify_cpu.S ++++ b/arch/x86/kernel/verify_cpu.S +@@ -132,9 +132,9 @@ SYM_FUNC_START_LOCAL(verify_cpu) + .Lverify_cpu_no_longmode: + popf # Restore caller passed flags + movl $1,%eax +- ret ++ RET + .Lverify_cpu_sse_ok: + popf # Restore caller passed flags + xorl %eax, %eax +- ret ++ RET + SYM_FUNC_END(verify_cpu) +--- a/arch/x86/kvm/svm/vmenter.S ++++ b/arch/x86/kvm/svm/vmenter.S +@@ -166,5 +166,5 @@ SYM_FUNC_START(__svm_vcpu_run) + pop %edi + #endif + pop %_ASM_BP +- ret ++ RET + SYM_FUNC_END(__svm_vcpu_run) +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -49,14 +49,14 @@ SYM_FUNC_START_LOCAL(vmx_vmenter) + je 2f + + 1: vmresume +- ret ++ RET + + 2: vmlaunch +- ret ++ RET + + 3: cmpb $0, kvm_rebooting + je 4f +- ret ++ RET + 4: ud2 + + _ASM_EXTABLE(1b, 3b) +@@ -89,7 +89,7 @@ SYM_FUNC_START(vmx_vmexit) + pop %_ASM_AX + .Lvmexit_skip_rsb: + #endif +- ret ++ RET + SYM_FUNC_END(vmx_vmexit) + + /** +@@ -228,7 +228,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + pop %edi + #endif + pop %_ASM_BP +- ret ++ RET + + /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ + 2: mov $1, %eax +@@ -293,7 +293,7 @@ SYM_FUNC_START(vmread_error_trampoline) + pop %_ASM_AX + pop %_ASM_BP + +- ret ++ RET + SYM_FUNC_END(vmread_error_trampoline) + + SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff) +@@ -326,5 +326,5 @@ SYM_FUNC_START(vmx_do_interrupt_nmi_irqo + */ + mov %_ASM_BP, %_ASM_SP + pop %_ASM_BP +- ret ++ RET + SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff) +--- a/arch/x86/lib/atomic64_386_32.S ++++ b/arch/x86/lib/atomic64_386_32.S +@@ -30,7 +30,7 @@ SYM_FUNC_START(atomic64_##op##_386); \ + + #define RET_IRQ_RESTORE \ + IRQ_RESTORE v; \ +- ret ++ RET + + #define v %ecx + BEGIN_IRQ_SAVE(read) +--- a/arch/x86/lib/atomic64_cx8_32.S ++++ b/arch/x86/lib/atomic64_cx8_32.S +@@ -18,7 +18,7 @@ + + SYM_FUNC_START(atomic64_read_cx8) + read64 %ecx +- ret ++ RET + SYM_FUNC_END(atomic64_read_cx8) + + SYM_FUNC_START(atomic64_set_cx8) +@@ -28,7 +28,7 @@ SYM_FUNC_START(atomic64_set_cx8) + cmpxchg8b (%esi) + jne 1b + +- ret ++ RET + SYM_FUNC_END(atomic64_set_cx8) + + SYM_FUNC_START(atomic64_xchg_cx8) +@@ -37,7 +37,7 @@ SYM_FUNC_START(atomic64_xchg_cx8) + cmpxchg8b (%esi) + jne 1b + +- ret ++ RET + SYM_FUNC_END(atomic64_xchg_cx8) + + .macro addsub_return func ins insc +@@ -68,7 +68,7 @@ SYM_FUNC_START(atomic64_\func\()_return_ + popl %esi + popl %ebx + popl %ebp +- ret ++ RET + SYM_FUNC_END(atomic64_\func\()_return_cx8) + .endm + +@@ -93,7 +93,7 @@ SYM_FUNC_START(atomic64_\func\()_return_ + movl %ebx, %eax + movl %ecx, %edx + popl %ebx +- ret ++ RET + SYM_FUNC_END(atomic64_\func\()_return_cx8) + .endm + +@@ -118,7 +118,7 @@ SYM_FUNC_START(atomic64_dec_if_positive_ + movl %ebx, %eax + movl %ecx, %edx + popl %ebx +- ret ++ RET + SYM_FUNC_END(atomic64_dec_if_positive_cx8) + + SYM_FUNC_START(atomic64_add_unless_cx8) +@@ -149,7 +149,7 @@ SYM_FUNC_START(atomic64_add_unless_cx8) + addl $8, %esp + popl %ebx + popl %ebp +- ret ++ RET + 4: + cmpl %edx, 4(%esp) + jne 2b +@@ -176,5 +176,5 @@ SYM_FUNC_START(atomic64_inc_not_zero_cx8 + movl $1, %eax + 3: + popl %ebx +- ret ++ RET + SYM_FUNC_END(atomic64_inc_not_zero_cx8) +--- a/arch/x86/lib/checksum_32.S ++++ b/arch/x86/lib/checksum_32.S +@@ -127,7 +127,7 @@ SYM_FUNC_START(csum_partial) + 8: + popl %ebx + popl %esi +- ret ++ RET + SYM_FUNC_END(csum_partial) + + #else +@@ -245,7 +245,7 @@ SYM_FUNC_START(csum_partial) + 90: + popl %ebx + popl %esi +- ret ++ RET + SYM_FUNC_END(csum_partial) + + #endif +@@ -371,7 +371,7 @@ EXC( movb %cl, (%edi) ) + popl %esi + popl %edi + popl %ecx # equivalent to addl $4,%esp +- ret ++ RET + SYM_FUNC_END(csum_partial_copy_generic) + + #else +@@ -447,7 +447,7 @@ EXC( movb %dl, (%edi) ) + popl %esi + popl %edi + popl %ebx +- ret ++ RET + SYM_FUNC_END(csum_partial_copy_generic) + + #undef ROUND +--- a/arch/x86/lib/clear_page_64.S ++++ b/arch/x86/lib/clear_page_64.S +@@ -17,7 +17,7 @@ SYM_FUNC_START(clear_page_rep) + movl $4096/8,%ecx + xorl %eax,%eax + rep stosq +- ret ++ RET + SYM_FUNC_END(clear_page_rep) + EXPORT_SYMBOL_GPL(clear_page_rep) + +@@ -39,7 +39,7 @@ SYM_FUNC_START(clear_page_orig) + leaq 64(%rdi),%rdi + jnz .Lloop + nop +- ret ++ RET + SYM_FUNC_END(clear_page_orig) + EXPORT_SYMBOL_GPL(clear_page_orig) + +@@ -47,6 +47,6 @@ SYM_FUNC_START(clear_page_erms) + movl $4096,%ecx + xorl %eax,%eax + rep stosb +- ret ++ RET + SYM_FUNC_END(clear_page_erms) + EXPORT_SYMBOL_GPL(clear_page_erms) +--- a/arch/x86/lib/cmpxchg16b_emu.S ++++ b/arch/x86/lib/cmpxchg16b_emu.S +@@ -37,11 +37,11 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) + + popfq + mov $1, %al +- ret ++ RET + + .Lnot_same: + popfq + xor %al,%al +- ret ++ RET + + SYM_FUNC_END(this_cpu_cmpxchg16b_emu) +--- a/arch/x86/lib/cmpxchg8b_emu.S ++++ b/arch/x86/lib/cmpxchg8b_emu.S +@@ -32,7 +32,7 @@ SYM_FUNC_START(cmpxchg8b_emu) + movl %ecx, 4(%esi) + + popfl +- ret ++ RET + + .Lnot_same: + movl (%esi), %eax +@@ -40,7 +40,7 @@ SYM_FUNC_START(cmpxchg8b_emu) + movl 4(%esi), %edx + + popfl +- ret ++ RET + + SYM_FUNC_END(cmpxchg8b_emu) + EXPORT_SYMBOL(cmpxchg8b_emu) +--- a/arch/x86/lib/copy_mc_64.S ++++ b/arch/x86/lib/copy_mc_64.S +@@ -86,7 +86,7 @@ SYM_FUNC_START(copy_mc_fragile) + .L_done_memcpy_trap: + xorl %eax, %eax + .L_done: +- ret ++ RET + SYM_FUNC_END(copy_mc_fragile) + EXPORT_SYMBOL_GPL(copy_mc_fragile) + +@@ -142,7 +142,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_str + rep movsb + /* Copy successful. Return zero */ + xorl %eax, %eax +- ret ++ RET + SYM_FUNC_END(copy_mc_enhanced_fast_string) + + .section .fixup, "ax" +@@ -155,7 +155,7 @@ SYM_FUNC_END(copy_mc_enhanced_fast_strin + * user-copy routines. + */ + movq %rcx, %rax +- ret ++ RET + + .previous + +--- a/arch/x86/lib/copy_page_64.S ++++ b/arch/x86/lib/copy_page_64.S +@@ -17,7 +17,7 @@ SYM_FUNC_START(copy_page) + ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD + movl $4096/8, %ecx + rep movsq +- ret ++ RET + SYM_FUNC_END(copy_page) + EXPORT_SYMBOL(copy_page) + +@@ -85,5 +85,5 @@ SYM_FUNC_START_LOCAL(copy_page_regs) + movq (%rsp), %rbx + movq 1*8(%rsp), %r12 + addq $2*8, %rsp +- ret ++ RET + SYM_FUNC_END(copy_page_regs) +--- a/arch/x86/lib/copy_user_64.S ++++ b/arch/x86/lib/copy_user_64.S +@@ -105,7 +105,7 @@ SYM_FUNC_START(copy_user_generic_unrolle + jnz 21b + 23: xor %eax,%eax + ASM_CLAC +- ret ++ RET + + .section .fixup,"ax" + 30: shll $6,%ecx +@@ -173,7 +173,7 @@ SYM_FUNC_START(copy_user_generic_string) + movsb + xorl %eax,%eax + ASM_CLAC +- ret ++ RET + + .section .fixup,"ax" + 11: leal (%rdx,%rcx,8),%ecx +@@ -207,7 +207,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_s + movsb + xorl %eax,%eax + ASM_CLAC +- ret ++ RET + + .section .fixup,"ax" + 12: movl %ecx,%edx /* ecx is zerorest also */ +@@ -239,7 +239,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_ + 1: rep movsb + 2: mov %ecx,%eax + ASM_CLAC +- ret ++ RET + + /* + * Return zero to pretend that this copy succeeded. This +@@ -250,7 +250,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_ + */ + 3: xorl %eax,%eax + ASM_CLAC +- ret ++ RET + + _ASM_EXTABLE_CPY(1b, 2b) + SYM_CODE_END(.Lcopy_user_handle_tail) +@@ -361,7 +361,7 @@ SYM_FUNC_START(__copy_user_nocache) + xorl %eax,%eax + ASM_CLAC + sfence +- ret ++ RET + + .section .fixup,"ax" + .L_fixup_4x8b_copy: +--- a/arch/x86/lib/csum-copy_64.S ++++ b/arch/x86/lib/csum-copy_64.S +@@ -201,7 +201,7 @@ SYM_FUNC_START(csum_partial_copy_generic + movq 3*8(%rsp), %r13 + movq 4*8(%rsp), %r15 + addq $5*8, %rsp +- ret ++ RET + .Lshort: + movl %ecx, %r10d + jmp .L1 +--- a/arch/x86/lib/getuser.S ++++ b/arch/x86/lib/getuser.S +@@ -57,7 +57,7 @@ SYM_FUNC_START(__get_user_1) + 1: movzbl (%_ASM_AX),%edx + xor %eax,%eax + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__get_user_1) + EXPORT_SYMBOL(__get_user_1) + +@@ -71,7 +71,7 @@ SYM_FUNC_START(__get_user_2) + 2: movzwl (%_ASM_AX),%edx + xor %eax,%eax + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__get_user_2) + EXPORT_SYMBOL(__get_user_2) + +@@ -85,7 +85,7 @@ SYM_FUNC_START(__get_user_4) + 3: movl (%_ASM_AX),%edx + xor %eax,%eax + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__get_user_4) + EXPORT_SYMBOL(__get_user_4) + +@@ -100,7 +100,7 @@ SYM_FUNC_START(__get_user_8) + 4: movq (%_ASM_AX),%rdx + xor %eax,%eax + ASM_CLAC +- ret ++ RET + #else + LOAD_TASK_SIZE_MINUS_N(7) + cmp %_ASM_DX,%_ASM_AX +@@ -112,7 +112,7 @@ SYM_FUNC_START(__get_user_8) + 5: movl 4(%_ASM_AX),%ecx + xor %eax,%eax + ASM_CLAC +- ret ++ RET + #endif + SYM_FUNC_END(__get_user_8) + EXPORT_SYMBOL(__get_user_8) +@@ -124,7 +124,7 @@ SYM_FUNC_START(__get_user_nocheck_1) + 6: movzbl (%_ASM_AX),%edx + xor %eax,%eax + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__get_user_nocheck_1) + EXPORT_SYMBOL(__get_user_nocheck_1) + +@@ -134,7 +134,7 @@ SYM_FUNC_START(__get_user_nocheck_2) + 7: movzwl (%_ASM_AX),%edx + xor %eax,%eax + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__get_user_nocheck_2) + EXPORT_SYMBOL(__get_user_nocheck_2) + +@@ -144,7 +144,7 @@ SYM_FUNC_START(__get_user_nocheck_4) + 8: movl (%_ASM_AX),%edx + xor %eax,%eax + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__get_user_nocheck_4) + EXPORT_SYMBOL(__get_user_nocheck_4) + +@@ -159,7 +159,7 @@ SYM_FUNC_START(__get_user_nocheck_8) + #endif + xor %eax,%eax + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__get_user_nocheck_8) + EXPORT_SYMBOL(__get_user_nocheck_8) + +@@ -169,7 +169,7 @@ SYM_CODE_START_LOCAL(.Lbad_get_user_clac + bad_get_user: + xor %edx,%edx + mov $(-EFAULT),%_ASM_AX +- ret ++ RET + SYM_CODE_END(.Lbad_get_user_clac) + + #ifdef CONFIG_X86_32 +@@ -179,7 +179,7 @@ bad_get_user_8: + xor %edx,%edx + xor %ecx,%ecx + mov $(-EFAULT),%_ASM_AX +- ret ++ RET + SYM_CODE_END(.Lbad_get_user_8_clac) + #endif + +--- a/arch/x86/lib/hweight.S ++++ b/arch/x86/lib/hweight.S +@@ -32,7 +32,7 @@ SYM_FUNC_START(__sw_hweight32) + imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101 + shrl $24, %eax # w = w_tmp >> 24 + __ASM_SIZE(pop,) %__ASM_REG(dx) +- ret ++ RET + SYM_FUNC_END(__sw_hweight32) + EXPORT_SYMBOL(__sw_hweight32) + +@@ -65,7 +65,7 @@ SYM_FUNC_START(__sw_hweight64) + + popq %rdx + popq %rdi +- ret ++ RET + #else /* CONFIG_X86_32 */ + /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ + pushl %ecx +@@ -77,7 +77,7 @@ SYM_FUNC_START(__sw_hweight64) + addl %ecx, %eax # result + + popl %ecx +- ret ++ RET + #endif + SYM_FUNC_END(__sw_hweight64) + EXPORT_SYMBOL(__sw_hweight64) +--- a/arch/x86/lib/iomap_copy_64.S ++++ b/arch/x86/lib/iomap_copy_64.S +@@ -11,5 +11,5 @@ + SYM_FUNC_START(__iowrite32_copy) + movl %edx,%ecx + rep movsd +- ret ++ RET + SYM_FUNC_END(__iowrite32_copy) +--- a/arch/x86/lib/memcpy_64.S ++++ b/arch/x86/lib/memcpy_64.S +@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy) + rep movsq + movl %edx, %ecx + rep movsb +- ret ++ RET + SYM_FUNC_END(memcpy) + SYM_FUNC_END_ALIAS(__memcpy) + EXPORT_SYMBOL(memcpy) +@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) + movq %rdi, %rax + movq %rdx, %rcx + rep movsb +- ret ++ RET + SYM_FUNC_END(memcpy_erms) + + SYM_FUNC_START_LOCAL(memcpy_orig) +@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + movq %r9, 1*8(%rdi) + movq %r10, -2*8(%rdi, %rdx) + movq %r11, -1*8(%rdi, %rdx) +- retq ++ RET + .p2align 4 + .Lless_16bytes: + cmpl $8, %edx +@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + movq -1*8(%rsi, %rdx), %r9 + movq %r8, 0*8(%rdi) + movq %r9, -1*8(%rdi, %rdx) +- retq ++ RET + .p2align 4 + .Lless_8bytes: + cmpl $4, %edx +@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + movl -4(%rsi, %rdx), %r8d + movl %ecx, (%rdi) + movl %r8d, -4(%rdi, %rdx) +- retq ++ RET + .p2align 4 + .Lless_3bytes: + subl $1, %edx +@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + movb %cl, (%rdi) + + .Lend: +- retq ++ RET + SYM_FUNC_END(memcpy_orig) + + .popsection +--- a/arch/x86/lib/memmove_64.S ++++ b/arch/x86/lib/memmove_64.S +@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove) + /* FSRM implies ERMS => no length checks, do the copy directly */ + .Lmemmove_begin_forward: + ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM +- ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS ++ ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; RET", X86_FEATURE_ERMS + + /* + * movsq instruction have many startup latency +@@ -205,7 +205,7 @@ SYM_FUNC_START(__memmove) + movb (%rsi), %r11b + movb %r11b, (%rdi) + 13: +- retq ++ RET + SYM_FUNC_END(__memmove) + SYM_FUNC_END_ALIAS(memmove) + EXPORT_SYMBOL(__memmove) +--- a/arch/x86/lib/memset_64.S ++++ b/arch/x86/lib/memset_64.S +@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) + movl %edx,%ecx + rep stosb + movq %r9,%rax +- ret ++ RET + SYM_FUNC_END(__memset) + SYM_FUNC_END_ALIAS(memset) + EXPORT_SYMBOL(memset) +@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms) + movq %rdx,%rcx + rep stosb + movq %r9,%rax +- ret ++ RET + SYM_FUNC_END(memset_erms) + + SYM_FUNC_START_LOCAL(memset_orig) +@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig) + + .Lende: + movq %r10,%rax +- ret ++ RET + + .Lbad_alignment: + cmpq $7,%rdx +--- a/arch/x86/lib/msr-reg.S ++++ b/arch/x86/lib/msr-reg.S +@@ -35,7 +35,7 @@ SYM_FUNC_START(\op\()_safe_regs) + movl %edi, 28(%r10) + popq %r12 + popq %rbx +- ret ++ RET + 3: + movl $-EIO, %r11d + jmp 2b +@@ -77,7 +77,7 @@ SYM_FUNC_START(\op\()_safe_regs) + popl %esi + popl %ebp + popl %ebx +- ret ++ RET + 3: + movl $-EIO, 4(%esp) + jmp 2b +--- a/arch/x86/lib/putuser.S ++++ b/arch/x86/lib/putuser.S +@@ -52,7 +52,7 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SY + 1: movb %al,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__put_user_1) + EXPORT_SYMBOL(__put_user_1) + EXPORT_SYMBOL(__put_user_nocheck_1) +@@ -66,7 +66,7 @@ SYM_INNER_LABEL(__put_user_nocheck_2, SY + 2: movw %ax,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__put_user_2) + EXPORT_SYMBOL(__put_user_2) + EXPORT_SYMBOL(__put_user_nocheck_2) +@@ -80,7 +80,7 @@ SYM_INNER_LABEL(__put_user_nocheck_4, SY + 3: movl %eax,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC +- ret ++ RET + SYM_FUNC_END(__put_user_4) + EXPORT_SYMBOL(__put_user_4) + EXPORT_SYMBOL(__put_user_nocheck_4) +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -23,7 +23,7 @@ + .Ldo_rop_\@: + mov %\reg, (%_ASM_SP) + UNWIND_HINT_FUNC +- ret ++ RET + .endm + + .macro THUNK reg +--- a/arch/x86/math-emu/div_Xsig.S ++++ b/arch/x86/math-emu/div_Xsig.S +@@ -341,7 +341,7 @@ L_exit: + popl %esi + + leave +- ret ++ RET + + + #ifdef PARANOID +--- a/arch/x86/math-emu/div_small.S ++++ b/arch/x86/math-emu/div_small.S +@@ -44,5 +44,5 @@ SYM_FUNC_START(FPU_div_small) + popl %esi + + leave +- ret ++ RET + SYM_FUNC_END(FPU_div_small) +--- a/arch/x86/math-emu/mul_Xsig.S ++++ b/arch/x86/math-emu/mul_Xsig.S +@@ -62,7 +62,7 @@ SYM_FUNC_START(mul32_Xsig) + + popl %esi + leave +- ret ++ RET + SYM_FUNC_END(mul32_Xsig) + + +@@ -115,7 +115,7 @@ SYM_FUNC_START(mul64_Xsig) + + popl %esi + leave +- ret ++ RET + SYM_FUNC_END(mul64_Xsig) + + +@@ -175,5 +175,5 @@ SYM_FUNC_START(mul_Xsig_Xsig) + + popl %esi + leave +- ret ++ RET + SYM_FUNC_END(mul_Xsig_Xsig) +--- a/arch/x86/math-emu/polynom_Xsig.S ++++ b/arch/x86/math-emu/polynom_Xsig.S +@@ -133,5 +133,5 @@ L_accum_done: + popl %edi + popl %esi + leave +- ret ++ RET + SYM_FUNC_END(polynomial_Xsig) +--- a/arch/x86/math-emu/reg_norm.S ++++ b/arch/x86/math-emu/reg_norm.S +@@ -72,7 +72,7 @@ L_exit_valid: + L_exit: + popl %ebx + leave +- ret ++ RET + + + L_zero: +@@ -138,7 +138,7 @@ L_exit_nuo_valid: + + popl %ebx + leave +- ret ++ RET + + L_exit_nuo_zero: + movl TAG_Zero,%eax +@@ -146,5 +146,5 @@ L_exit_nuo_zero: + + popl %ebx + leave +- ret ++ RET + SYM_FUNC_END(FPU_normalize_nuo) +--- a/arch/x86/math-emu/reg_round.S ++++ b/arch/x86/math-emu/reg_round.S +@@ -437,7 +437,7 @@ fpu_Arith_exit: + popl %edi + popl %esi + leave +- ret ++ RET + + + /* +--- a/arch/x86/math-emu/reg_u_add.S ++++ b/arch/x86/math-emu/reg_u_add.S +@@ -164,6 +164,6 @@ L_exit: + popl %edi + popl %esi + leave +- ret ++ RET + #endif /* PARANOID */ + SYM_FUNC_END(FPU_u_add) +--- a/arch/x86/math-emu/reg_u_div.S ++++ b/arch/x86/math-emu/reg_u_div.S +@@ -468,7 +468,7 @@ L_exit: + popl %esi + + leave +- ret ++ RET + #endif /* PARANOID */ + + SYM_FUNC_END(FPU_u_div) +--- a/arch/x86/math-emu/reg_u_mul.S ++++ b/arch/x86/math-emu/reg_u_mul.S +@@ -144,7 +144,7 @@ L_exit: + popl %edi + popl %esi + leave +- ret ++ RET + #endif /* PARANOID */ + + SYM_FUNC_END(FPU_u_mul) +--- a/arch/x86/math-emu/reg_u_sub.S ++++ b/arch/x86/math-emu/reg_u_sub.S +@@ -270,5 +270,5 @@ L_exit: + popl %edi + popl %esi + leave +- ret ++ RET + SYM_FUNC_END(FPU_u_sub) +--- a/arch/x86/math-emu/round_Xsig.S ++++ b/arch/x86/math-emu/round_Xsig.S +@@ -78,7 +78,7 @@ L_exit: + popl %esi + popl %ebx + leave +- ret ++ RET + SYM_FUNC_END(round_Xsig) + + +@@ -138,5 +138,5 @@ L_n_exit: + popl %esi + popl %ebx + leave +- ret ++ RET + SYM_FUNC_END(norm_Xsig) +--- a/arch/x86/math-emu/shr_Xsig.S ++++ b/arch/x86/math-emu/shr_Xsig.S +@@ -45,7 +45,7 @@ SYM_FUNC_START(shr_Xsig) + popl %ebx + popl %esi + leave +- ret ++ RET + + L_more_than_31: + cmpl $64,%ecx +@@ -61,7 +61,7 @@ L_more_than_31: + movl $0,8(%esi) + popl %esi + leave +- ret ++ RET + + L_more_than_63: + cmpl $96,%ecx +@@ -76,7 +76,7 @@ L_more_than_63: + movl %edx,8(%esi) + popl %esi + leave +- ret ++ RET + + L_more_than_95: + xorl %eax,%eax +@@ -85,5 +85,5 @@ L_more_than_95: + movl %eax,8(%esi) + popl %esi + leave +- ret ++ RET + SYM_FUNC_END(shr_Xsig) +--- a/arch/x86/math-emu/wm_shrx.S ++++ b/arch/x86/math-emu/wm_shrx.S +@@ -55,7 +55,7 @@ SYM_FUNC_START(FPU_shrx) + popl %ebx + popl %esi + leave +- ret ++ RET + + L_more_than_31: + cmpl $64,%ecx +@@ -70,7 +70,7 @@ L_more_than_31: + movl $0,4(%esi) + popl %esi + leave +- ret ++ RET + + L_more_than_63: + cmpl $96,%ecx +@@ -84,7 +84,7 @@ L_more_than_63: + movl %edx,4(%esi) + popl %esi + leave +- ret ++ RET + + L_more_than_95: + xorl %eax,%eax +@@ -92,7 +92,7 @@ L_more_than_95: + movl %eax,4(%esi) + popl %esi + leave +- ret ++ RET + SYM_FUNC_END(FPU_shrx) + + +@@ -146,7 +146,7 @@ SYM_FUNC_START(FPU_shrxs) + popl %ebx + popl %esi + leave +- ret ++ RET + + /* Shift by [0..31] bits */ + Ls_less_than_32: +@@ -163,7 +163,7 @@ Ls_less_than_32: + popl %ebx + popl %esi + leave +- ret ++ RET + + /* Shift by [64..95] bits */ + Ls_more_than_63: +@@ -189,7 +189,7 @@ Ls_more_than_63: + popl %ebx + popl %esi + leave +- ret ++ RET + + Ls_more_than_95: + /* Shift by [96..inf) bits */ +@@ -203,5 +203,5 @@ Ls_more_than_95: + popl %ebx + popl %esi + leave +- ret ++ RET + SYM_FUNC_END(FPU_shrxs) +--- a/arch/x86/mm/mem_encrypt_boot.S ++++ b/arch/x86/mm/mem_encrypt_boot.S +@@ -65,7 +65,7 @@ SYM_FUNC_START(sme_encrypt_execute) + movq %rbp, %rsp /* Restore original stack pointer */ + pop %rbp + +- ret ++ RET + SYM_FUNC_END(sme_encrypt_execute) + + SYM_FUNC_START(__enc_copy) +@@ -151,6 +151,6 @@ SYM_FUNC_START(__enc_copy) + pop %r12 + pop %r15 + +- ret ++ RET + .L__enc_copy_end: + SYM_FUNC_END(__enc_copy) +--- a/arch/x86/platform/efi/efi_stub_32.S ++++ b/arch/x86/platform/efi/efi_stub_32.S +@@ -56,5 +56,5 @@ SYM_FUNC_START(efi_call_svam) + + movl 16(%esp), %ebx + leave +- ret ++ RET + SYM_FUNC_END(efi_call_svam) +--- a/arch/x86/platform/efi/efi_stub_64.S ++++ b/arch/x86/platform/efi/efi_stub_64.S +@@ -23,5 +23,5 @@ SYM_FUNC_START(__efi_call) + mov %rsi, %rcx + CALL_NOSPEC rdi + leave +- ret ++ RET + SYM_FUNC_END(__efi_call) +--- a/arch/x86/platform/efi/efi_thunk_64.S ++++ b/arch/x86/platform/efi/efi_thunk_64.S +@@ -63,7 +63,7 @@ SYM_CODE_START(__efi64_thunk) + 1: movq 24(%rsp), %rsp + pop %rbx + pop %rbp +- retq ++ RET + + .code32 + 2: pushl $__KERNEL_CS +--- a/arch/x86/platform/olpc/xo1-wakeup.S ++++ b/arch/x86/platform/olpc/xo1-wakeup.S +@@ -77,7 +77,7 @@ save_registers: + pushfl + popl saved_context_eflags + +- ret ++ RET + + restore_registers: + movl saved_context_ebp, %ebp +@@ -88,7 +88,7 @@ restore_registers: + pushl saved_context_eflags + popfl + +- ret ++ RET + + SYM_CODE_START(do_olpc_suspend_lowlevel) + call save_processor_state +@@ -109,7 +109,7 @@ ret_point: + + call restore_registers + call restore_processor_state +- ret ++ RET + SYM_CODE_END(do_olpc_suspend_lowlevel) + + .data +--- a/arch/x86/power/hibernate_asm_32.S ++++ b/arch/x86/power/hibernate_asm_32.S +@@ -32,7 +32,7 @@ SYM_FUNC_START(swsusp_arch_suspend) + FRAME_BEGIN + call swsusp_save + FRAME_END +- ret ++ RET + SYM_FUNC_END(swsusp_arch_suspend) + + SYM_CODE_START(restore_image) +@@ -108,5 +108,5 @@ SYM_FUNC_START(restore_registers) + /* tell the hibernation core that we've just restored the memory */ + movl %eax, in_suspend + +- ret ++ RET + SYM_FUNC_END(restore_registers) +--- a/arch/x86/power/hibernate_asm_64.S ++++ b/arch/x86/power/hibernate_asm_64.S +@@ -49,7 +49,7 @@ SYM_FUNC_START(swsusp_arch_suspend) + FRAME_BEGIN + call swsusp_save + FRAME_END +- ret ++ RET + SYM_FUNC_END(swsusp_arch_suspend) + + SYM_CODE_START(restore_image) +@@ -143,5 +143,5 @@ SYM_FUNC_START(restore_registers) + /* tell the hibernation core that we've just restored the memory */ + movq %rax, in_suspend(%rip) + +- ret ++ RET + SYM_FUNC_END(restore_registers) +--- a/arch/x86/um/checksum_32.S ++++ b/arch/x86/um/checksum_32.S +@@ -110,7 +110,7 @@ csum_partial: + 7: + popl %ebx + popl %esi +- ret ++ RET + + #else + +@@ -208,7 +208,7 @@ csum_partial: + 80: + popl %ebx + popl %esi +- ret ++ RET + + #endif + EXPORT_SYMBOL(csum_partial) +--- a/arch/x86/um/setjmp_32.S ++++ b/arch/x86/um/setjmp_32.S +@@ -34,7 +34,7 @@ kernel_setjmp: + movl %esi,12(%edx) + movl %edi,16(%edx) + movl %ecx,20(%edx) # Return address +- ret ++ RET + + .size kernel_setjmp,.-kernel_setjmp + +--- a/arch/x86/um/setjmp_64.S ++++ b/arch/x86/um/setjmp_64.S +@@ -33,7 +33,7 @@ kernel_setjmp: + movq %r14,40(%rdi) + movq %r15,48(%rdi) + movq %rsi,56(%rdi) # Return address +- ret ++ RET + + .size kernel_setjmp,.-kernel_setjmp + +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -45,7 +45,7 @@ SYM_FUNC_START(xen_irq_enable_direct) + call check_events + 1: + FRAME_END +- ret ++ RET + SYM_FUNC_END(xen_irq_enable_direct) + + +@@ -55,7 +55,7 @@ SYM_FUNC_END(xen_irq_enable_direct) + */ + SYM_FUNC_START(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask +- ret ++ RET + SYM_FUNC_END(xen_irq_disable_direct) + + /* +@@ -71,7 +71,7 @@ SYM_FUNC_START(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + setz %ah + addb %ah, %ah +- ret ++ RET + SYM_FUNC_END(xen_save_fl_direct) + + +@@ -98,7 +98,7 @@ SYM_FUNC_START(xen_restore_fl_direct) + call check_events + 1: + FRAME_END +- ret ++ RET + SYM_FUNC_END(xen_restore_fl_direct) + + +@@ -128,7 +128,7 @@ SYM_FUNC_START(check_events) + pop %rcx + pop %rax + FRAME_END +- ret ++ RET + SYM_FUNC_END(check_events) + + SYM_FUNC_START(xen_read_cr2) +@@ -136,14 +136,14 @@ SYM_FUNC_START(xen_read_cr2) + _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX + _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX + FRAME_END +- ret ++ RET + SYM_FUNC_END(xen_read_cr2); + + SYM_FUNC_START(xen_read_cr2_direct) + FRAME_BEGIN + _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX + FRAME_END +- ret ++ RET + SYM_FUNC_END(xen_read_cr2_direct); + + .macro xen_pv_trap name +--- a/arch/x86/xen/xen-head.S ++++ b/arch/x86/xen/xen-head.S +@@ -70,7 +70,7 @@ SYM_CODE_START(hypercall_page) + .rept (PAGE_SIZE / 32) + UNWIND_HINT_FUNC + .skip 31, 0x90 +- ret ++ RET + .endr + + #define HYPERCALL(n) \ diff --git a/queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch b/queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch new file mode 100644 index 00000000000..86245a8370c --- /dev/null +++ b/queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch @@ -0,0 +1,191 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Sat, 4 Dec 2021 14:43:41 +0100 +Subject: x86: Prepare inline-asm for straight-line-speculation + +From: Peter Zijlstra + +commit b17c2baa305cccbd16bafa289fd743cc2db77966 upstream. + +Replace all ret/retq instructions with ASM_RET in preparation of +making it more than a single instruction. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Link: https://lore.kernel.org/r/20211204134907.964635458@infradead.org +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +[bwh: Backported to 5.10: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/linkage.h | 4 ++++ + arch/x86/include/asm/paravirt.h | 2 +- + arch/x86/include/asm/qspinlock_paravirt.h | 4 ++-- + arch/x86/kernel/alternative.c | 2 +- + arch/x86/kernel/kprobes/core.c | 2 +- + arch/x86/kernel/paravirt.c | 2 +- + arch/x86/kvm/emulate.c | 4 ++-- + arch/x86/lib/error-inject.c | 3 ++- + samples/ftrace/ftrace-direct-modify.c | 4 ++-- + samples/ftrace/ftrace-direct-too.c | 2 +- + samples/ftrace/ftrace-direct.c | 2 +- + 11 files changed, 18 insertions(+), 13 deletions(-) + +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -18,6 +18,10 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + ++#else /* __ASSEMBLY__ */ ++ ++#define ASM_RET "ret\n\t" ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _ASM_X86_LINKAGE_H */ +--- a/arch/x86/include/asm/paravirt.h ++++ b/arch/x86/include/asm/paravirt.h +@@ -630,7 +630,7 @@ bool __raw_callee_save___native_vcpu_is_ + "call " #func ";" \ + PV_RESTORE_ALL_CALLER_REGS \ + FRAME_END \ +- "ret;" \ ++ ASM_RET \ + ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ + ".popsection") + +--- a/arch/x86/include/asm/qspinlock_paravirt.h ++++ b/arch/x86/include/asm/qspinlock_paravirt.h +@@ -48,7 +48,7 @@ asm (".pushsection .text;" + "jne .slowpath;" + "pop %rdx;" + FRAME_END +- "ret;" ++ ASM_RET + ".slowpath: " + "push %rsi;" + "movzbl %al,%esi;" +@@ -56,7 +56,7 @@ asm (".pushsection .text;" + "pop %rsi;" + "pop %rdx;" + FRAME_END +- "ret;" ++ ASM_RET + ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" + ".popsection"); + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -869,7 +869,7 @@ asm ( + " .type int3_magic, @function\n" + "int3_magic:\n" + " movl $1, (%" _ASM_ARG1 ")\n" +-" ret\n" ++ ASM_RET + " .size int3_magic, .-int3_magic\n" + " .popsection\n" + ); +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -768,7 +768,7 @@ asm( + RESTORE_REGS_STRING + " popfl\n" + #endif +- " ret\n" ++ ASM_RET + ".size kretprobe_trampoline, .-kretprobe_trampoline\n" + ); + NOKPROBE_SYMBOL(kretprobe_trampoline); +--- a/arch/x86/kernel/paravirt.c ++++ b/arch/x86/kernel/paravirt.c +@@ -40,7 +40,7 @@ extern void _paravirt_nop(void); + asm (".pushsection .entry.text, \"ax\"\n" + ".global _paravirt_nop\n" + "_paravirt_nop:\n\t" +- "ret\n\t" ++ ASM_RET + ".size _paravirt_nop, . - _paravirt_nop\n\t" + ".type _paravirt_nop, @function\n\t" + ".popsection"); +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -316,7 +316,7 @@ static int fastop(struct x86_emulate_ctx + __FOP_FUNC(#name) + + #define __FOP_RET(name) \ +- "ret \n\t" \ ++ ASM_RET \ + ".size " name ", .-" name "\n\t" + + #define FOP_RET(name) \ +@@ -437,7 +437,7 @@ static int fastop(struct x86_emulate_ctx + + asm(".pushsection .fixup, \"ax\"\n" + ".global kvm_fastop_exception \n" +- "kvm_fastop_exception: xor %esi, %esi; ret\n" ++ "kvm_fastop_exception: xor %esi, %esi; " ASM_RET + ".popsection"); + + FOP_START(setcc) +--- a/arch/x86/lib/error-inject.c ++++ b/arch/x86/lib/error-inject.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + ++#include + #include + #include + +@@ -10,7 +11,7 @@ asm( + ".type just_return_func, @function\n" + ".globl just_return_func\n" + "just_return_func:\n" +- " ret\n" ++ ASM_RET + ".size just_return_func, .-just_return_func\n" + ); + +--- a/samples/ftrace/ftrace-direct-modify.c ++++ b/samples/ftrace/ftrace-direct-modify.c +@@ -31,7 +31,7 @@ asm ( + " call my_direct_func1\n" + " leave\n" + " .size my_tramp1, .-my_tramp1\n" +-" ret\n" ++ ASM_RET + " .type my_tramp2, @function\n" + " .globl my_tramp2\n" + " my_tramp2:" +@@ -39,7 +39,7 @@ asm ( + " movq %rsp, %rbp\n" + " call my_direct_func2\n" + " leave\n" +-" ret\n" ++ ASM_RET + " .size my_tramp2, .-my_tramp2\n" + " .popsection\n" + ); +--- a/samples/ftrace/ftrace-direct-too.c ++++ b/samples/ftrace/ftrace-direct-too.c +@@ -31,7 +31,7 @@ asm ( + " popq %rsi\n" + " popq %rdi\n" + " leave\n" +-" ret\n" ++ ASM_RET + " .size my_tramp, .-my_tramp\n" + " .popsection\n" + ); +--- a/samples/ftrace/ftrace-direct.c ++++ b/samples/ftrace/ftrace-direct.c +@@ -24,7 +24,7 @@ asm ( + " call my_direct_func\n" + " popq %rdi\n" + " leave\n" +-" ret\n" ++ ASM_RET + " .size my_tramp, .-my_tramp\n" + " .popsection\n" + ); diff --git a/queue-5.10/x86-realmode-build-with-d__disable_exports.patch b/queue-5.10/x86-realmode-build-with-d__disable_exports.patch new file mode 100644 index 00000000000..a62e4dd1380 --- /dev/null +++ b/queue-5.10/x86-realmode-build-with-d__disable_exports.patch @@ -0,0 +1,29 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Thadeu Lima de Souza Cascardo +Date: Fri, 1 Jul 2022 11:21:20 -0300 +Subject: x86/realmode: build with -D__DISABLE_EXPORTS + +From: Thadeu Lima de Souza Cascardo + +Commit 156ff4a544ae ("x86/ibt: Base IBT bits") added this option when +building realmode in order to disable IBT there. This is also needed in +order to disable return thunks. + +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -31,7 +31,7 @@ endif + CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h + M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS)) + +-REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \ ++REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ + -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ + -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ + -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) diff --git a/queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch b/queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch new file mode 100644 index 00000000000..3023941d42f --- /dev/null +++ b/queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch @@ -0,0 +1,594 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Mon, 27 Jun 2022 22:21:17 +0000 +Subject: x86/retbleed: Add fine grained Kconfig knobs + +From: Peter Zijlstra + +commit f43b9876e857c739d407bc56df288b0ebe1a9164 upstream. + +Do fine-grained Kconfig for all the various retbleed parts. + +NOTE: if your compiler doesn't support return thunks this will +silently 'upgrade' your mitigation to IBPB, you might not like this. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +[cascardo: there is no CONFIG_OBJTOOL] +[cascardo: objtool calling and option parsing has changed] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: + - In scripts/Makefile.build, add the objtool option with an ifdef + block, same as for other options + - Adjust filename, context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + Makefile | 8 +- + arch/x86/Kconfig | 106 +++++++++++++++++++++++-------- + arch/x86/entry/calling.h | 4 + + arch/x86/include/asm/disabled-features.h | 18 ++++- + arch/x86/include/asm/linkage.h | 4 - + arch/x86/include/asm/nospec-branch.h | 10 ++ + arch/x86/include/asm/static_call.h | 2 + arch/x86/kernel/alternative.c | 5 + + arch/x86/kernel/cpu/amd.c | 2 + arch/x86/kernel/cpu/bugs.c | 42 +++++++----- + arch/x86/kernel/static_call.c | 2 + arch/x86/kvm/emulate.c | 4 - + arch/x86/lib/retpoline.S | 4 + + scripts/Makefile.build | 3 + scripts/link-vmlinux.sh | 2 + security/Kconfig | 11 --- + tools/objtool/builtin-check.c | 3 + tools/objtool/builtin.h | 2 + tools/objtool/check.c | 9 ++ + 19 files changed, 172 insertions(+), 69 deletions(-) + +--- a/Makefile ++++ b/Makefile +@@ -672,14 +672,18 @@ endif + + ifdef CONFIG_CC_IS_GCC + RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) +-RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) + endif + ifdef CONFIG_CC_IS_CLANG + RETPOLINE_CFLAGS := -mretpoline-external-thunk + RETPOLINE_VDSO_CFLAGS := -mretpoline +-RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + endif ++ ++ifdef CONFIG_RETHUNK ++RETHUNK_CFLAGS := -mfunction-return=thunk-extern ++RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) ++endif ++ + export RETPOLINE_CFLAGS + export RETPOLINE_VDSO_CFLAGS + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -453,30 +453,6 @@ config GOLDFISH + def_bool y + depends on X86_GOLDFISH + +-config RETPOLINE +- bool "Avoid speculative indirect branches in kernel" +- default y +- help +- Compile kernel with the retpoline compiler options to guard against +- kernel-to-user data leaks by avoiding speculative indirect +- branches. Requires a compiler with -mindirect-branch=thunk-extern +- support for full protection. The kernel may run slower. +- +-config CC_HAS_SLS +- def_bool $(cc-option,-mharden-sls=all) +- +-config CC_HAS_RETURN_THUNK +- def_bool $(cc-option,-mfunction-return=thunk-extern) +- +-config SLS +- bool "Mitigate Straight-Line-Speculation" +- depends on CC_HAS_SLS && X86_64 +- default n +- help +- Compile the kernel with straight-line-speculation options to guard +- against straight line speculation. The kernel image might be slightly +- larger. +- + config X86_CPU_RESCTRL + bool "x86 CPU resource control support" + depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) +@@ -2430,6 +2406,88 @@ source "kernel/livepatch/Kconfig" + + endmenu + ++config CC_HAS_SLS ++ def_bool $(cc-option,-mharden-sls=all) ++ ++config CC_HAS_RETURN_THUNK ++ def_bool $(cc-option,-mfunction-return=thunk-extern) ++ ++menuconfig SPECULATION_MITIGATIONS ++ bool "Mitigations for speculative execution vulnerabilities" ++ default y ++ help ++ Say Y here to enable options which enable mitigations for ++ speculative execution hardware vulnerabilities. ++ ++ If you say N, all mitigations will be disabled. You really ++ should know what you are doing to say so. ++ ++if SPECULATION_MITIGATIONS ++ ++config PAGE_TABLE_ISOLATION ++ bool "Remove the kernel mapping in user mode" ++ default y ++ depends on (X86_64 || X86_PAE) ++ help ++ This feature reduces the number of hardware side channels by ++ ensuring that the majority of kernel addresses are not mapped ++ into userspace. ++ ++ See Documentation/x86/pti.rst for more details. ++ ++config RETPOLINE ++ bool "Avoid speculative indirect branches in kernel" ++ default y ++ help ++ Compile kernel with the retpoline compiler options to guard against ++ kernel-to-user data leaks by avoiding speculative indirect ++ branches. Requires a compiler with -mindirect-branch=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config RETHUNK ++ bool "Enable return-thunks" ++ depends on RETPOLINE && CC_HAS_RETURN_THUNK ++ default y ++ help ++ Compile the kernel with the return-thunks compiler option to guard ++ against kernel-to-user data leaks by avoiding return speculation. ++ Requires a compiler with -mfunction-return=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config CPU_UNRET_ENTRY ++ bool "Enable UNRET on kernel entry" ++ depends on CPU_SUP_AMD && RETHUNK ++ default y ++ help ++ Compile the kernel with support for the retbleed=unret mitigation. ++ ++config CPU_IBPB_ENTRY ++ bool "Enable IBPB on kernel entry" ++ depends on CPU_SUP_AMD ++ default y ++ help ++ Compile the kernel with support for the retbleed=ibpb mitigation. ++ ++config CPU_IBRS_ENTRY ++ bool "Enable IBRS on kernel entry" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Compile the kernel with support for the spectre_v2=ibrs mitigation. ++ This mitigates both spectre_v2 and retbleed at great cost to ++ performance. ++ ++config SLS ++ bool "Mitigate Straight-Line-Speculation" ++ depends on CC_HAS_SLS && X86_64 ++ default n ++ help ++ Compile the kernel with straight-line-speculation options to guard ++ against straight line speculation. The kernel image might be slightly ++ larger. ++ ++endif ++ + config ARCH_HAS_ADD_PAGES + def_bool y + depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -323,6 +323,7 @@ For 32-bit we have the following convent + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ + .macro IBRS_ENTER save_reg ++#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +@@ -343,6 +344,7 @@ For 32-bit we have the following convent + shr $32, %rdx + wrmsr + .Lend_\@: ++#endif + .endm + + /* +@@ -350,6 +352,7 @@ For 32-bit we have the following convent + * regs. Must be called after the last RET. + */ + .macro IBRS_EXIT save_reg ++#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +@@ -364,6 +367,7 @@ For 32-bit we have the following convent + shr $32, %rdx + wrmsr + .Lend_\@: ++#endif + .endm + + /* +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -60,9 +60,19 @@ + # define DISABLE_RETPOLINE 0 + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ +- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ +- (1 << (X86_FEATURE_RETHUNK & 31)) | \ +- (1 << (X86_FEATURE_UNRET & 31))) ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++#endif ++ ++#ifdef CONFIG_RETHUNK ++# define DISABLE_RETHUNK 0 ++#else ++# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) ++#endif ++ ++#ifdef CONFIG_CPU_UNRET_ENTRY ++# define DISABLE_UNRET 0 ++#else ++# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) + #endif + + /* Force disable because it's broken beyond repair */ +@@ -82,7 +92,7 @@ + #define DISABLED_MASK8 0 + #define DISABLED_MASK9 (DISABLE_SMAP) + #define DISABLED_MASK10 0 +-#define DISABLED_MASK11 (DISABLE_RETPOLINE) ++#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) + #define DISABLED_MASK12 0 + #define DISABLED_MASK13 0 + #define DISABLED_MASK14 0 +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -18,7 +18,7 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + +-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) + #define RET jmp __x86_return_thunk + #else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS +@@ -30,7 +30,7 @@ + + #else /* __ASSEMBLY__ */ + +-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) + #define ASM_RET "jmp __x86_return_thunk\n\t" + #else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -127,6 +127,12 @@ + .Lskip_rsb_\@: + .endm + ++#ifdef CONFIG_CPU_UNRET_ENTRY ++#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" ++#else ++#define CALL_ZEN_UNTRAIN_RET "" ++#endif ++ + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD +@@ -139,10 +145,10 @@ + * where we have a stack but before any RET instruction. + */ + .macro UNTRAIN_RET +-#ifdef CONFIG_RETPOLINE ++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) + ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ +- "call zen_untrain_ret", X86_FEATURE_UNRET, \ ++ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + #endif + .endm +--- a/arch/x86/include/asm/static_call.h ++++ b/arch/x86/include/asm/static_call.h +@@ -44,7 +44,7 @@ + #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + +-#ifdef CONFIG_RETPOLINE ++#ifdef CONFIG_RETHUNK + #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") + #else +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -662,6 +662,7 @@ void __init_or_module noinline apply_ret + } + } + ++#ifdef CONFIG_RETHUNK + /* + * Rewrite the compiler generated return thunk tail-calls. + * +@@ -723,6 +724,10 @@ void __init_or_module noinline apply_ret + } + } + } ++#else ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } ++#endif /* CONFIG_RETHUNK */ ++ + #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ + + void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -916,6 +916,7 @@ static void init_amd_bd(struct cpuinfo_x + + void init_spectral_chicken(struct cpuinfo_x86 *c) + { ++#ifdef CONFIG_CPU_UNRET_ENTRY + u64 value; + + /* +@@ -932,6 +933,7 @@ void init_spectral_chicken(struct cpuinf + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } ++#endif + } + + static void init_amd_zn(struct cpuinfo_x86 *c) +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -798,7 +798,6 @@ static int __init retbleed_parse_cmdline + early_param("retbleed", retbleed_parse_cmdline); + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" + #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) +@@ -813,18 +812,33 @@ static void __init retbleed_select_mitig + return; + + case RETBLEED_CMD_UNRET: +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ } else { ++ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); ++ goto do_cmd_auto; ++ } + break; + + case RETBLEED_CMD_IBPB: +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } else { ++ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); ++ goto do_cmd_auto; ++ } + break; + ++do_cmd_auto: + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { ++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } + + /* + * The Intel mitigation (IBRS or eIBRS) was already selected in +@@ -837,14 +851,6 @@ static void __init retbleed_select_mitig + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: +- +- if (!IS_ENABLED(CONFIG_RETPOLINE) || +- !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { +- pr_err(RETBLEED_COMPILER_MSG); +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; +- goto retbleed_force_ibpb; +- } +- + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + +@@ -856,7 +862,6 @@ static void __init retbleed_select_mitig + break; + + case RETBLEED_MITIGATION_IBPB: +-retbleed_force_ibpb: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; + break; +@@ -1227,6 +1232,12 @@ static enum spectre_v2_mitigation_cmd __ + return SPECTRE_V2_CMD_AUTO; + } + ++ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { ++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); +@@ -1284,7 +1295,8 @@ static void __init spectre_v2_select_mit + break; + } + +- if (boot_cpu_has_bug(X86_BUG_RETBLEED) && ++ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && ++ boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -108,7 +108,7 @@ void arch_static_call_transform(void *si + } + EXPORT_SYMBOL_GPL(arch_static_call_transform); + +-#ifdef CONFIG_RETPOLINE ++#ifdef CONFIG_RETHUNK + /* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -435,10 +435,10 @@ static int fastop(struct x86_emulate_ctx + * Depending on .config the SETcc functions look like: + * + * SETcc %al [3 bytes] +- * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] ++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] + * INT3 [1 byte; CONFIG_SLS] + */ +-#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ ++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ + IS_ENABLED(CONFIG_SLS)) + #define SETCC_LENGTH (3 + RET_LENGTH) + #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -71,6 +71,8 @@ SYM_CODE_END(__x86_indirect_thunk_array) + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ ++#ifdef CONFIG_RETHUNK ++ + .section .text.__x86.return_thunk + + /* +@@ -135,3 +137,5 @@ SYM_FUNC_END(zen_untrain_ret) + __EXPORT_THUNK(zen_untrain_ret) + + EXPORT_SYMBOL(__x86_return_thunk) ++ ++#endif /* CONFIG_RETHUNK */ +--- a/scripts/Makefile.build ++++ b/scripts/Makefile.build +@@ -227,6 +227,9 @@ endif + ifdef CONFIG_RETPOLINE + objtool_args += --retpoline + endif ++ifdef CONFIG_RETHUNK ++ objtool_args += --rethunk ++endif + ifdef CONFIG_X86_SMAP + objtool_args += --uaccess + endif +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -65,7 +65,7 @@ objtool_link() + + if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then + objtoolopt="check" +- if [ -n "${CONFIG_RETPOLINE}" ]; then ++ if [ -n "${CONFIG_CPU_UNRET_ENTRY}" ]; then + objtoolopt="${objtoolopt} --unret" + fi + if [ -z "${CONFIG_FRAME_POINTER}" ]; then +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -54,17 +54,6 @@ config SECURITY_NETWORK + implement socket and networking access controls. + If you are unsure how to answer this question, answer N. + +-config PAGE_TABLE_ISOLATION +- bool "Remove the kernel mapping in user mode" +- default y +- depends on (X86_64 || X86_PAE) && !UML +- help +- This feature reduces the number of hardware side channels by +- ensuring that the majority of kernel addresses are not mapped +- into userspace. +- +- See Documentation/x86/pti.rst for more details. +- + config SECURITY_INFINIBAND + bool "Infiniband Security Hooks" + depends on SECURITY && INFINIBAND +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -19,7 +19,7 @@ + #include "objtool.h" + + bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +- validate_dup, vmlinux, sls, unret; ++ validate_dup, vmlinux, sls, unret, rethunk; + + static const char * const check_usage[] = { + "objtool check [] file.o", +@@ -30,6 +30,7 @@ const struct option check_options[] = { + OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), + OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), + OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), ++ OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"), + OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), + OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), + OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), +--- a/tools/objtool/builtin.h ++++ b/tools/objtool/builtin.h +@@ -9,7 +9,7 @@ + + extern const struct option check_options[]; + extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +- validate_dup, vmlinux, sls, unret; ++ validate_dup, vmlinux, sls, unret, rethunk; + + extern int cmd_check(int argc, const char **argv); + extern int cmd_orc(int argc, const char **argv); +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -3262,8 +3262,11 @@ static int validate_retpoline(struct obj + continue; + + if (insn->type == INSN_RETURN) { +- WARN_FUNC("'naked' return found in RETPOLINE build", +- insn->sec, insn->offset); ++ if (rethunk) { ++ WARN_FUNC("'naked' return found in RETHUNK build", ++ insn->sec, insn->offset); ++ } else ++ continue; + } else { + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, +@@ -3533,7 +3536,9 @@ int check(struct objtool_file *file) + if (ret < 0) + goto out; + warnings += ret; ++ } + ++ if (rethunk) { + ret = create_return_sites_sections(file); + if (ret < 0) + goto out; diff --git a/queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch b/queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch new file mode 100644 index 00000000000..fb6f6b69b6a --- /dev/null +++ b/queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:34 +0200 +Subject: x86/retpoline: Cleanup some #ifdefery + +From: Peter Zijlstra + +commit 369ae6ffc41a3c1137cab697635a84d0cc7cdcea upstream. + +On it's own not much of a cleanup but it prepares for more/similar +code. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: conflict fixup because of DISABLE_ENQCMD] +[cascardo: no changes at nospec-branch.h and bpf_jit_comp.c] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/disabled-features.h | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -56,6 +56,13 @@ + # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) + #endif + ++#ifdef CONFIG_RETPOLINE ++# define DISABLE_RETPOLINE 0 ++#else ++# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++#endif ++ + /* Force disable because it's broken beyond repair */ + #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) + +@@ -73,7 +80,7 @@ + #define DISABLED_MASK8 0 + #define DISABLED_MASK9 (DISABLE_SMAP) + #define DISABLED_MASK10 0 +-#define DISABLED_MASK11 0 ++#define DISABLED_MASK11 (DISABLE_RETPOLINE) + #define DISABLED_MASK12 0 + #define DISABLED_MASK13 0 + #define DISABLED_MASK14 0 diff --git a/queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch b/queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch new file mode 100644 index 00000000000..f53fd2b3c4f --- /dev/null +++ b/queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch @@ -0,0 +1,105 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:41 +0200 +Subject: x86/retpoline: Create a retpoline thunk array + +From: Peter Zijlstra + +commit 1a6f74429c42a3854980359a758e222005712aee upstream. + +Stick all the retpolines in a single symbol and have the individual +thunks as inner labels, this should guarantee thunk order and layout. + +Previously there were 16 (or rather 15 without rsp) separate symbols and +a toolchain might reasonably expect it could displace them however it +liked, with disregard for their relative position. + +However, now they're part of a larger symbol. Any change to their +relative position would disrupt this larger _array symbol and thus not +be sound. + +This is the same reasoning used for data symbols. On their own there +is no guarantee about their relative position wrt to one aonther, but +we're still able to do arrays because an array as a whole is a single +larger symbol. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.169659320@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 8 +++++++- + arch/x86/lib/retpoline.S | 14 +++++++++----- + 2 files changed, 16 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -12,6 +12,8 @@ + #include + #include + ++#define RETPOLINE_THUNK_SIZE 32 ++ + /* + * Fill the CPU return stack buffer. + * +@@ -120,11 +122,15 @@ + + #ifdef CONFIG_RETPOLINE + ++typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; ++ + #define GEN(reg) \ +- extern asmlinkage void __x86_indirect_thunk_ ## reg (void); ++ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; + #include + #undef GEN + ++extern retpoline_thunk_t __x86_indirect_thunk_array[]; ++ + #ifdef CONFIG_X86_64 + + /* +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -28,16 +28,14 @@ + + .macro THUNK reg + +- .align 32 +- +-SYM_FUNC_START(__x86_indirect_thunk_\reg) ++ .align RETPOLINE_THUNK_SIZE ++SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) ++ UNWIND_HINT_EMPTY + + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ + __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE + +-SYM_FUNC_END(__x86_indirect_thunk_\reg) +- + .endm + + /* +@@ -55,10 +53,16 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) + #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) + #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) + ++ .align RETPOLINE_THUNK_SIZE ++SYM_CODE_START(__x86_indirect_thunk_array) ++ + #define GEN(reg) THUNK reg + #include + #undef GEN + ++ .align RETPOLINE_THUNK_SIZE ++SYM_CODE_END(__x86_indirect_thunk_array) ++ + #define GEN(reg) EXPORT_THUNK(reg) + #include + #undef GEN diff --git a/queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch b/queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch new file mode 100644 index 00000000000..b7a5f20e8bd --- /dev/null +++ b/queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch @@ -0,0 +1,73 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:40 +0200 +Subject: x86/retpoline: Move the retpoline thunk declarations to nospec-branch.h + +From: Peter Zijlstra + +commit 6fda8a38865607db739be3e567a2387376222dbd upstream. + +Because it makes no sense to split the retpoline gunk over multiple +headers. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120310.106290934@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/asm-prototypes.h | 8 -------- + arch/x86/include/asm/nospec-branch.h | 7 +++++++ + arch/x86/net/bpf_jit_comp.c | 1 - + 3 files changed, 7 insertions(+), 9 deletions(-) + +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -17,11 +17,3 @@ + extern void cmpxchg8b_emu(void); + #endif + +-#ifdef CONFIG_RETPOLINE +- +-#define GEN(reg) \ +- extern asmlinkage void __x86_indirect_thunk_ ## reg (void); +-#include +-#undef GEN +- +-#endif /* CONFIG_RETPOLINE */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -5,6 +5,7 @@ + + #include + #include ++#include + + #include + #include +@@ -118,6 +119,12 @@ + ".popsection\n\t" + + #ifdef CONFIG_RETPOLINE ++ ++#define GEN(reg) \ ++ extern asmlinkage void __x86_indirect_thunk_ ## reg (void); ++#include ++#undef GEN ++ + #ifdef CONFIG_X86_64 + + /* +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -15,7 +15,6 @@ + #include + #include + #include +-#include + + static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) + { diff --git a/queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch b/queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch new file mode 100644 index 00000000000..c7f190402d6 --- /dev/null +++ b/queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch @@ -0,0 +1,97 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 26 Oct 2021 14:01:37 +0200 +Subject: x86/retpoline: Remove unused replacement symbols + +From: Peter Zijlstra + +commit 4fe79e710d9574a14993f8b4e16b7252da72d5e8 upstream. + +Now that objtool no longer creates alternatives, these replacement +symbols are no longer needed, remove them. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Borislav Petkov +Acked-by: Josh Poimboeuf +Tested-by: Alexei Starovoitov +Link: https://lore.kernel.org/r/20211026120309.915051744@infradead.org +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/asm-prototypes.h | 10 -------- + arch/x86/lib/retpoline.S | 42 ---------------------------------- + 2 files changed, 52 deletions(-) + +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -24,14 +24,4 @@ extern void cmpxchg8b_emu(void); + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + #include + +-#undef GEN +-#define GEN(reg) \ +- extern asmlinkage void __x86_indirect_alt_call_ ## reg (void); +-#include +- +-#undef GEN +-#define GEN(reg) \ +- extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void); +-#include +- + #endif /* CONFIG_RETPOLINE */ +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -41,36 +41,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) + .endm + + /* +- * This generates .altinstr_replacement symbols for use by objtool. They, +- * however, must not actually live in .altinstr_replacement since that will be +- * discarded after init, but module alternatives will also reference these +- * symbols. +- * +- * Their names matches the "__x86_indirect_" prefix to mark them as retpolines. +- */ +-.macro ALT_THUNK reg +- +- .align 1 +- +-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg) +- ANNOTATE_RETPOLINE_SAFE +-1: call *%\reg +-2: .skip 5-(2b-1b), 0x90 +-SYM_FUNC_END(__x86_indirect_alt_call_\reg) +- +-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg) +- +-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) +- ANNOTATE_RETPOLINE_SAFE +-1: jmp *%\reg +-2: .skip 5-(2b-1b), 0x90 +-SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) +- +-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg) +- +-.endm +- +-/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather +@@ -92,15 +62,3 @@ STACK_FRAME_NON_STANDARD(__x86_indirect_ + #undef GEN + #define GEN(reg) EXPORT_THUNK(reg) + #include +- +-#undef GEN +-#define GEN(reg) ALT_THUNK reg +-#include +- +-#undef GEN +-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg) +-#include +- +-#undef GEN +-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg) +-#include diff --git a/queue-5.10/x86-retpoline-simplify-retpolines.patch b/queue-5.10/x86-retpoline-simplify-retpolines.patch new file mode 100644 index 00000000000..7ae493d02af --- /dev/null +++ b/queue-5.10/x86-retpoline-simplify-retpolines.patch @@ -0,0 +1,217 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Fri, 26 Mar 2021 16:12:02 +0100 +Subject: x86/retpoline: Simplify retpolines + +From: Peter Zijlstra + +commit 119251855f9adf9421cb5eb409933092141ab2c7 upstream. + +Due to: + + c9c324dc22aa ("objtool: Support stack layout changes in alternatives") + +it is now possible to simplify the retpolines. + +Currently our retpolines consist of 2 symbols: + + - __x86_indirect_thunk_\reg: the compiler target + - __x86_retpoline_\reg: the actual retpoline. + +Both are consecutive in code and aligned such that for any one register +they both live in the same cacheline: + + 0000000000000000 <__x86_indirect_thunk_rax>: + 0: ff e0 jmpq *%rax + 2: 90 nop + 3: 90 nop + 4: 90 nop + + 0000000000000005 <__x86_retpoline_rax>: + 5: e8 07 00 00 00 callq 11 <__x86_retpoline_rax+0xc> + a: f3 90 pause + c: 0f ae e8 lfence + f: eb f9 jmp a <__x86_retpoline_rax+0x5> + 11: 48 89 04 24 mov %rax,(%rsp) + 15: c3 retq + 16: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%rax,%rax,1) + +The thunk is an alternative_2, where one option is a JMP to the +retpoline. This was done so that objtool didn't need to deal with +alternatives with stack ops. But that problem has been solved, so now +it is possible to fold the entire retpoline into the alternative to +simplify and consolidate unused bytes: + + 0000000000000000 <__x86_indirect_thunk_rax>: + 0: ff e0 jmpq *%rax + 2: 90 nop + 3: 90 nop + 4: 90 nop + 5: 90 nop + 6: 90 nop + 7: 90 nop + 8: 90 nop + 9: 90 nop + a: 90 nop + b: 90 nop + c: 90 nop + d: 90 nop + e: 90 nop + f: 90 nop + 10: 90 nop + 11: 66 66 2e 0f 1f 84 00 00 00 00 00 data16 nopw %cs:0x0(%rax,%rax,1) + 1c: 0f 1f 40 00 nopl 0x0(%rax) + +Notice that since the longest alternative sequence is now: + + 0: e8 07 00 00 00 callq c <.altinstr_replacement+0xc> + 5: f3 90 pause + 7: 0f ae e8 lfence + a: eb f9 jmp 5 <.altinstr_replacement+0x5> + c: 48 89 04 24 mov %rax,(%rsp) + 10: c3 retq + +17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if +we can shrink the retpoline by 1 byte we can pack it more densely). + + [ bp: Massage commit message. ] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Ingo Molnar +Link: https://lkml.kernel.org/r/20210326151259.506071949@infradead.org +[bwh: Backported to 5.10: + - Use X86_FEATRURE_RETPOLINE_LFENCE flag instead of + X86_FEATURE_RETPOLINE_AMD, since the later renaming of this flag + has already been applied + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/asm-prototypes.h | 7 ------- + arch/x86/include/asm/nospec-branch.h | 6 +++--- + arch/x86/lib/retpoline.S | 34 +++++++++++++++++----------------- + tools/objtool/check.c | 3 +-- + 4 files changed, 21 insertions(+), 29 deletions(-) + +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void); + #define DECL_INDIRECT_THUNK(reg) \ + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + +-#define DECL_RETPOLINE(reg) \ +- extern asmlinkage void __x86_retpoline_ ## reg (void); +- + #undef GEN + #define GEN(reg) DECL_INDIRECT_THUNK(reg) + #include + +-#undef GEN +-#define GEN(reg) DECL_RETPOLINE(reg) +-#include +- + #endif /* CONFIG_RETPOLINE */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -80,7 +80,7 @@ + .macro JMP_NOSPEC reg:req + #ifdef CONFIG_RETPOLINE + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ +- __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ ++ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE + #else + jmp *%\reg +@@ -90,7 +90,7 @@ + .macro CALL_NOSPEC reg:req + #ifdef CONFIG_RETPOLINE + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ +- __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ ++ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE + #else + call *%\reg +@@ -128,7 +128,7 @@ + ALTERNATIVE_2( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ +- "call __x86_retpoline_%V[thunk_target]\n", \ ++ "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -10,27 +10,31 @@ + #include + #include + +-.macro THUNK reg +- .section .text.__x86.indirect_thunk +- +- .align 32 +-SYM_FUNC_START(__x86_indirect_thunk_\reg) +- JMP_NOSPEC \reg +-SYM_FUNC_END(__x86_indirect_thunk_\reg) +- +-SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg) ++.macro RETPOLINE reg + ANNOTATE_INTRA_FUNCTION_CALL +- call .Ldo_rop_\@ ++ call .Ldo_rop_\@ + .Lspec_trap_\@: + UNWIND_HINT_EMPTY + pause + lfence +- jmp .Lspec_trap_\@ ++ jmp .Lspec_trap_\@ + .Ldo_rop_\@: +- mov %\reg, (%_ASM_SP) ++ mov %\reg, (%_ASM_SP) + UNWIND_HINT_FUNC + ret +-SYM_FUNC_END(__x86_retpoline_\reg) ++.endm ++ ++.macro THUNK reg ++ .section .text.__x86.indirect_thunk ++ ++ .align 32 ++SYM_FUNC_START(__x86_indirect_thunk_\reg) ++ ++ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ ++ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ ++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE ++ ++SYM_FUNC_END(__x86_indirect_thunk_\reg) + + .endm + +@@ -48,7 +52,6 @@ SYM_FUNC_END(__x86_retpoline_\reg) + + #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) + #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) +-#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg) + + #undef GEN + #define GEN(reg) THUNK reg +@@ -58,6 +61,3 @@ SYM_FUNC_END(__x86_retpoline_\reg) + #define GEN(reg) EXPORT_THUNK(reg) + #include + +-#undef GEN +-#define GEN(reg) EXPORT_RETPOLINE(reg) +-#include +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -800,8 +800,7 @@ static int add_jump_destinations(struct + } else if (reloc->sym->type == STT_SECTION) { + dest_sec = reloc->sym->sec; + dest_off = arch_dest_reloc_offset(reloc->addend); +- } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) || +- !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) { ++ } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. diff --git a/queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch b/queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch new file mode 100644 index 00000000000..3dd4e4a8f1b --- /dev/null +++ b/queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch @@ -0,0 +1,41 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:35 +0200 +Subject: x86/retpoline: Swizzle retpoline thunk + +From: Peter Zijlstra + +commit 00e1533325fd1fb5459229fe37f235462649f668 upstream. + +Put the actual retpoline thunk as the original code so that it can +become more complicated. Specifically, it allows RET to be a JMP, +which can't be .altinstr_replacement since that doesn't do relocations +(except for the very first instruction). + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/lib/retpoline.S | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -32,9 +32,9 @@ + SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + +- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ +- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ +- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE ++ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ ++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ ++ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) + + .endm + diff --git a/queue-5.10/x86-retpoline-use-mfunction-return.patch b/queue-5.10/x86-retpoline-use-mfunction-return.patch new file mode 100644 index 00000000000..3c14bbf2f38 --- /dev/null +++ b/queue-5.10/x86-retpoline-use-mfunction-return.patch @@ -0,0 +1,79 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:36 +0200 +Subject: x86/retpoline: Use -mfunction-return + +From: Peter Zijlstra + +commit 0b53c374b9eff2255a386f1f1cfb9a928e52a5ae upstream. + +Utilize -mfunction-return=thunk-extern when available to have the +compiler replace RET instructions with direct JMPs to the symbol +__x86_return_thunk. This does not affect assembler (.S) sources, only C +sources. + +-mfunction-return=thunk-extern has been available since gcc 7.3 and +clang 15. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Nick Desaulniers +Reviewed-by: Josh Poimboeuf +Tested-by: Nick Desaulniers +Signed-off-by: Borislav Petkov +[cascardo: RETPOLINE_CFLAGS is at Makefile] +[cascardo: remove ANNOTATE_NOENDBR from __x86_return_thunk] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + Makefile | 2 ++ + arch/x86/include/asm/nospec-branch.h | 2 ++ + arch/x86/lib/retpoline.S | 12 ++++++++++++ + 3 files changed, 16 insertions(+) + +--- a/Makefile ++++ b/Makefile +@@ -672,11 +672,13 @@ endif + + ifdef CONFIG_CC_IS_GCC + RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) ++RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) + endif + ifdef CONFIG_CC_IS_CLANG + RETPOLINE_CFLAGS := -mretpoline-external-thunk + RETPOLINE_VDSO_CFLAGS := -mretpoline ++RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + endif + export RETPOLINE_CFLAGS + export RETPOLINE_VDSO_CFLAGS +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -120,6 +120,8 @@ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + ++extern void __x86_return_thunk(void); ++ + #ifdef CONFIG_RETPOLINE + + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -66,3 +66,15 @@ SYM_CODE_END(__x86_indirect_thunk_array) + #define GEN(reg) EXPORT_THUNK(reg) + #include + #undef GEN ++ ++/* ++ * This function name is magical and is used by -mfunction-return=thunk-extern ++ * for the compiler to generate JMPs to it. ++ */ ++SYM_CODE_START(__x86_return_thunk) ++ UNWIND_HINT_EMPTY ++ ret ++ int3 ++SYM_CODE_END(__x86_return_thunk) ++ ++__EXPORT_THUNK(__x86_return_thunk) diff --git a/queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch b/queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch new file mode 100644 index 00000000000..c3c5c34b0a7 --- /dev/null +++ b/queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Kim Phillips +Date: Tue, 14 Jun 2022 23:15:44 +0200 +Subject: x86/sev: Avoid using __x86_return_thunk + +From: Kim Phillips + +commit 0ee9073000e8791f8b134a8ded31bcc767f7f232 upstream. + +Specifically, it's because __enc_copy() encrypts the kernel after +being relocated outside the kernel in sme_encrypt_execute(), and the +RET macro's jmp offset isn't amended prior to execution. + +Signed-off-by: Kim Phillips +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/mem_encrypt_boot.S | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/mm/mem_encrypt_boot.S ++++ b/arch/x86/mm/mem_encrypt_boot.S +@@ -65,7 +65,9 @@ SYM_FUNC_START(sme_encrypt_execute) + movq %rbp, %rsp /* Restore original stack pointer */ + pop %rbp + +- RET ++ /* Offset to __x86_return_thunk would be wrong here */ ++ ret ++ int3 + SYM_FUNC_END(sme_encrypt_execute) + + SYM_FUNC_START(__enc_copy) +@@ -151,6 +153,8 @@ SYM_FUNC_START(__enc_copy) + pop %r12 + pop %r15 + +- RET ++ /* Offset to __x86_return_thunk would be wrong here */ ++ ret ++ int3 + .L__enc_copy_end: + SYM_FUNC_END(__enc_copy) diff --git a/queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch b/queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch new file mode 100644 index 00000000000..850e41a89b3 --- /dev/null +++ b/queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch @@ -0,0 +1,209 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Pawan Gupta +Date: Tue, 14 Jun 2022 23:15:55 +0200 +Subject: x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS + +From: Pawan Gupta + +commit 7c693f54c873691a4b7da05c7e0f74e67745d144 upstream. + +Extend spectre_v2= boot option with Kernel IBRS. + + [jpoimboe: no STIBP with IBRS] + +Signed-off-by: Pawan Gupta +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 1 + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 66 ++++++++++++++++++------ + 3 files changed, 54 insertions(+), 14 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5026,6 +5026,7 @@ + eibrs - enhanced IBRS + eibrs,retpoline - enhanced IBRS + Retpolines + eibrs,lfence - enhanced IBRS + LFENCE ++ ibrs - use IBRS to protect kernel + + Not specifying this option is equivalent to + spectre_v2=auto. +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -212,6 +212,7 @@ enum spectre_v2_mitigation { + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, ++ SPECTRE_V2_IBRS, + }; + + /* The indirect branch speculation control variants */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -928,6 +928,7 @@ enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, ++ SPECTRE_V2_CMD_IBRS, + }; + + enum spectre_v2_user_cmd { +@@ -1000,11 +1001,12 @@ spectre_v2_parse_user_cmdline(enum spect + return SPECTRE_V2_USER_CMD_AUTO; + } + +-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) ++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) + { +- return (mode == SPECTRE_V2_EIBRS || +- mode == SPECTRE_V2_EIBRS_RETPOLINE || +- mode == SPECTRE_V2_EIBRS_LFENCE); ++ return mode == SPECTRE_V2_IBRS || ++ mode == SPECTRE_V2_EIBRS || ++ mode == SPECTRE_V2_EIBRS_RETPOLINE || ++ mode == SPECTRE_V2_EIBRS_LFENCE; + } + + static void __init +@@ -1069,12 +1071,12 @@ spectre_v2_user_select_mitigation(enum s + } + + /* +- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not +- * required. ++ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, ++ * STIBP is not required. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP) || + !smt_possible || +- spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ++ spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + return; + + /* +@@ -1106,6 +1108,7 @@ static const char * const spectre_v2_str + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", ++ [SPECTRE_V2_IBRS] = "Mitigation: IBRS", + }; + + static const struct { +@@ -1123,6 +1126,7 @@ static const struct { + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, ++ { "ibrs", SPECTRE_V2_CMD_IBRS, false }, + }; + + static void __init spec_v2_print_cond(const char *reason, bool secure) +@@ -1185,6 +1189,24 @@ static enum spectre_v2_mitigation_cmd __ + return SPECTRE_V2_CMD_AUTO; + } + ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { ++ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { ++ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { ++ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); + return cmd; +@@ -1224,6 +1246,14 @@ static void __init spectre_v2_select_mit + break; + } + ++ if (boot_cpu_has_bug(X86_BUG_RETBLEED) && ++ retbleed_cmd != RETBLEED_CMD_OFF && ++ boot_cpu_has(X86_FEATURE_IBRS) && ++ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { ++ mode = SPECTRE_V2_IBRS; ++ break; ++ } ++ + mode = spectre_v2_select_retpoline(); + break; + +@@ -1240,6 +1270,10 @@ static void __init spectre_v2_select_mit + mode = spectre_v2_select_retpoline(); + break; + ++ case SPECTRE_V2_CMD_IBRS: ++ mode = SPECTRE_V2_IBRS; ++ break; ++ + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; +@@ -1256,7 +1290,7 @@ static void __init spectre_v2_select_mit + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + +- if (spectre_v2_in_eibrs_mode(mode)) { ++ if (spectre_v2_in_ibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + write_spec_ctrl_current(x86_spec_ctrl_base, true); +@@ -1267,6 +1301,10 @@ static void __init spectre_v2_select_mit + case SPECTRE_V2_EIBRS: + break; + ++ case SPECTRE_V2_IBRS: ++ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); ++ break; ++ + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); +@@ -1293,17 +1331,17 @@ static void __init spectre_v2_select_mit + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + + /* +- * Retpoline means the kernel is safe because it has no indirect +- * branches. Enhanced IBRS protects firmware too, so, enable restricted +- * speculation around firmware calls only when Enhanced IBRS isn't +- * supported. ++ * Retpoline protects the kernel, but doesn't protect firmware. IBRS ++ * and Enhanced IBRS protect firmware too, so enable IBRS around ++ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise ++ * enabled. + * + * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because + * the user might select retpoline on the kernel command line and if + * the CPU supports Enhanced IBRS, kernel might un-intentionally not + * enable IBRS around firmware calls. + */ +- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { ++ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } +@@ -2012,7 +2050,7 @@ static ssize_t mmio_stale_data_show_stat + + static char *stibp_state(void) + { +- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ++ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + return ""; + + switch (spectre_v2_user_stibp) { diff --git a/queue-5.10/x86-speculation-disable-rrsba-behavior.patch b/queue-5.10/x86-speculation-disable-rrsba-behavior.patch new file mode 100644 index 00000000000..73c0f3b9832 --- /dev/null +++ b/queue-5.10/x86-speculation-disable-rrsba-behavior.patch @@ -0,0 +1,154 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Pawan Gupta +Date: Fri, 8 Jul 2022 13:36:09 -0700 +Subject: x86/speculation: Disable RRSBA behavior + +From: Pawan Gupta + +commit 4ad3278df6fe2b0852b00d5757fc2ccd8e92c26e upstream. + +Some Intel processors may use alternate predictors for RETs on +RSB-underflow. This condition may be vulnerable to Branch History +Injection (BHI) and intramode-BTI. + +Kernel earlier added spectre_v2 mitigation modes (eIBRS+Retpolines, +eIBRS+LFENCE, Retpolines) which protect indirect CALLs and JMPs against +such attacks. However, on RSB-underflow, RET target prediction may +fallback to alternate predictors. As a result, RET's predicted target +may get influenced by branch history. + +A new MSR_IA32_SPEC_CTRL bit (RRSBA_DIS_S) controls this fallback +behavior when in kernel mode. When set, RETs will not take predictions +from alternate predictors, hence mitigating RETs as well. Support for +this is enumerated by CPUID.7.2.EDX[RRSBA_CTRL] (bit2). + +For spectre v2 mitigation, when a user selects a mitigation that +protects indirect CALLs and JMPs against BHI and intramode-BTI, set +RRSBA_DIS_S also to protect RETs for RSB-underflow case. + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +[bwh: Backported to 5.15: adjust context in scattered.c] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/include/asm/msr-index.h | 9 +++++++++ + arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++++++ + arch/x86/kernel/cpu/scattered.c | 1 + + tools/arch/x86/include/asm/msr-index.h | 9 +++++++++ + 5 files changed, 46 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -293,7 +293,7 @@ + /* FREE! (11*32+ 8) */ + /* FREE! (11*32+ 9) */ + #define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +-/* FREE! (11*32+11) */ ++#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -51,6 +51,8 @@ + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -139,6 +141,13 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_RRSBA BIT(19) /* ++ * Indicates RET may use predictors ++ * other than the RSB. With eIBRS ++ * enabled predictions in kernel mode ++ * are restricted to targets in ++ * kernel. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1274,6 +1274,22 @@ static enum spectre_v2_mitigation __init + return SPECTRE_V2_RETPOLINE; + } + ++/* Disable in-kernel use of non-RSB RET predictors */ ++static void __init spec_ctrl_disable_kernel_rrsba(void) ++{ ++ u64 ia32_cap; ++ ++ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) ++ return; ++ ++ ia32_cap = x86_read_arch_cap_msr(); ++ ++ if (ia32_cap & ARCH_CAP_RRSBA) { ++ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ } ++} ++ + static void __init spectre_v2_select_mitigation(void) + { + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +@@ -1368,6 +1384,16 @@ static void __init spectre_v2_select_mit + break; + } + ++ /* ++ * Disable alternate RSB predictions in kernel when indirect CALLs and ++ * JMPs gets protection against BHI and Intramode-BTI, but RET ++ * prediction from a non-RSB predictor is still a risk. ++ */ ++ if (mode == SPECTRE_V2_EIBRS_LFENCE || ++ mode == SPECTRE_V2_EIBRS_RETPOLINE || ++ mode == SPECTRE_V2_RETPOLINE) ++ spec_ctrl_disable_kernel_rrsba(); ++ + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); + +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -26,6 +26,7 @@ struct cpuid_bit { + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, ++ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, +--- a/tools/arch/x86/include/asm/msr-index.h ++++ b/tools/arch/x86/include/asm/msr-index.h +@@ -51,6 +51,8 @@ + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -138,6 +140,13 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_RRSBA BIT(19) /* ++ * Indicates RET may use predictors ++ * other than the RSB. With eIBRS ++ * enabled predictions in kernel mode ++ * are restricted to targets in ++ * kernel. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* diff --git a/queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch b/queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch new file mode 100644 index 00000000000..47e6b561d20 --- /dev/null +++ b/queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch @@ -0,0 +1,135 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:15 +0200 +Subject: x86/speculation: Fill RSB on vmexit for IBRS + +From: Josh Poimboeuf + +commit 9756bba28470722dacb79ffce554336dd1f6a6cd upstream. + +Prevent RSB underflow/poisoning attacks with RSB. While at it, add a +bunch of comments to attempt to document the current state of tribal +knowledge about RSB attacks and what exactly is being mitigated. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 2 - + arch/x86/kernel/cpu/bugs.c | 63 ++++++++++++++++++++++++++++++++++--- + arch/x86/kvm/vmx/vmenter.S | 6 +-- + 3 files changed, 62 insertions(+), 9 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -204,7 +204,7 @@ + #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ + #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +-/* FREE! ( 7*32+13) */ ++#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1357,17 +1357,70 @@ static void __init spectre_v2_select_mit + pr_info("%s\n", spectre_v2_strings[mode]); + + /* +- * If spectre v2 protection has been enabled, unconditionally fill +- * RSB during a context switch; this protects against two independent +- * issues: ++ * If Spectre v2 protection has been enabled, fill the RSB during a ++ * context switch. In general there are two types of RSB attacks ++ * across context switches, for which the CALLs/RETs may be unbalanced. + * +- * - RSB underflow (and switch to BTB) on Skylake+ +- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs ++ * 1) RSB underflow ++ * ++ * Some Intel parts have "bottomless RSB". When the RSB is empty, ++ * speculated return targets may come from the branch predictor, ++ * which could have a user-poisoned BTB or BHB entry. ++ * ++ * AMD has it even worse: *all* returns are speculated from the BTB, ++ * regardless of the state of the RSB. ++ * ++ * When IBRS or eIBRS is enabled, the "user -> kernel" attack ++ * scenario is mitigated by the IBRS branch prediction isolation ++ * properties, so the RSB buffer filling wouldn't be necessary to ++ * protect against this type of attack. ++ * ++ * The "user -> user" attack scenario is mitigated by RSB filling. ++ * ++ * 2) Poisoned RSB entry ++ * ++ * If the 'next' in-kernel return stack is shorter than 'prev', ++ * 'next' could be tricked into speculating with a user-poisoned RSB ++ * entry. ++ * ++ * The "user -> kernel" attack scenario is mitigated by SMEP and ++ * eIBRS. ++ * ++ * The "user -> user" scenario, also known as SpectreBHB, requires ++ * RSB clearing. ++ * ++ * So to mitigate all cases, unconditionally fill RSB on context ++ * switches. ++ * ++ * FIXME: Is this pointless for retbleed-affected AMD? + */ + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + + /* ++ * Similar to context switches, there are two types of RSB attacks ++ * after vmexit: ++ * ++ * 1) RSB underflow ++ * ++ * 2) Poisoned RSB entry ++ * ++ * When retpoline is enabled, both are mitigated by filling/clearing ++ * the RSB. ++ * ++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch ++ * prediction isolation protections, RSB still needs to be cleared ++ * because of #2. Note that SMEP provides no protection here, unlike ++ * user-space-poisoned RSB entries. ++ * ++ * eIBRS, on the other hand, has RSB-poisoning protections, so it ++ * doesn't need RSB clearing after vmexit. ++ */ ++ if (boot_cpu_has(X86_FEATURE_RETPOLINE) || ++ boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); ++ ++ /* + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -193,15 +193,15 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * +- * For retpoline, RSB filling is needed to prevent poisoned RSB entries +- * and (in some cases) RSB underflow. ++ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB ++ * entries and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled + * before the first unbalanced RET. + */ + +- FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ diff --git a/queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch b/queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch new file mode 100644 index 00000000000..2f4a3d39107 --- /dev/null +++ b/queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:06 +0200 +Subject: x86/speculation: Fix firmware entry SPEC_CTRL handling + +From: Josh Poimboeuf + +commit e6aa13622ea8283cc699cac5d018cc40a2ba2010 upstream. + +The firmware entry code may accidentally clear STIBP or SSBD. Fix that. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -286,18 +286,16 @@ extern u64 spec_ctrl_current(void); + */ + #define firmware_restrict_branch_speculation_start() \ + do { \ +- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ +- \ + preempt_disable(); \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ ++ spec_ctrl_current() | SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ + } while (0) + + #define firmware_restrict_branch_speculation_end() \ + do { \ +- u64 val = x86_spec_ctrl_base; \ +- \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ ++ spec_ctrl_current(), \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ + } while (0) diff --git a/queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch b/queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch new file mode 100644 index 00000000000..896a4ffc2b8 --- /dev/null +++ b/queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch @@ -0,0 +1,78 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:05 +0200 +Subject: x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n + +From: Josh Poimboeuf + +commit b2620facef4889fefcbf2e87284f34dcd4189bce upstream. + +If a kernel is built with CONFIG_RETPOLINE=n, but the user still wants +to mitigate Spectre v2 using IBRS or eIBRS, the RSB filling will be +silently disabled. + +There's nothing retpoline-specific about RSB buffer filling. Remove the +CONFIG_RETPOLINE guards around it. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_32.S | 2 -- + arch/x86/entry/entry_64.S | 2 -- + arch/x86/include/asm/nospec-branch.h | 2 -- + 3 files changed, 6 deletions(-) + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -782,7 +782,6 @@ SYM_CODE_START(__switch_to_asm) + movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset + #endif + +-#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated +@@ -791,7 +790,6 @@ SYM_CODE_START(__switch_to_asm) + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +-#endif + + /* Restore flags or the incoming task to restore AC state. */ + popfl +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -249,7 +249,6 @@ SYM_FUNC_START(__switch_to_asm) + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset + #endif + +-#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated +@@ -258,7 +257,6 @@ SYM_FUNC_START(__switch_to_asm) + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +-#endif + + /* restore callee-saved registers */ + popq %r15 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -122,11 +122,9 @@ + * monstrosity above, manually. + */ + .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +-#ifdef CONFIG_RETPOLINE + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) + .Lskip_rsb_\@: +-#endif + .endm + + /* diff --git a/queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch b/queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch new file mode 100644 index 00000000000..b38f24d6384 --- /dev/null +++ b/queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:07 +0200 +Subject: x86/speculation: Fix SPEC_CTRL write on SMT state change + +From: Josh Poimboeuf + +commit 56aa4d221f1ee2c3a49b45b800778ec6e0ab73c5 upstream. + +If the SMT state changes, SSBD might get accidentally disabled. Fix +that. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1414,7 +1414,8 @@ static void __init spectre_v2_select_mit + + static void update_stibp_msr(void * __unused) + { +- write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); ++ write_spec_ctrl_current(val, true); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ diff --git a/queue-5.10/x86-speculation-remove-x86_spec_ctrl_mask.patch b/queue-5.10/x86-speculation-remove-x86_spec_ctrl_mask.patch new file mode 100644 index 00000000000..b4ac5bf90c1 --- /dev/null +++ b/queue-5.10/x86-speculation-remove-x86_spec_ctrl_mask.patch @@ -0,0 +1,88 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Fri, 17 Jun 2022 12:12:48 -0700 +Subject: x86/speculation: Remove x86_spec_ctrl_mask + +From: Josh Poimboeuf + +commit acac5e98ef8d638a411cfa2ee676c87e1973f126 upstream. + +This mask has been made redundant by kvm_spec_ctrl_test_value(). And it +doesn't even work when MSR interception is disabled, as the guest can +just write to SPEC_CTRL directly. + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Reviewed-by: Paolo Bonzini +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 31 +------------------------------ + 1 file changed, 1 insertion(+), 30 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -85,12 +85,6 @@ u64 spec_ctrl_current(void) + EXPORT_SYMBOL_GPL(spec_ctrl_current); + + /* +- * The vendor and possibly platform specific bits which can be modified in +- * x86_spec_ctrl_base. +- */ +-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; +- +-/* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). + */ +@@ -138,10 +132,6 @@ void __init check_bugs(void) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + +- /* Allow STIBP in MSR_SPEC_CTRL if supported */ +- if (boot_cpu_has(X86_FEATURE_STIBP)) +- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; +- + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); +@@ -199,19 +189,10 @@ void __init check_bugs(void) + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 msrval, guestval, hostval = spec_ctrl_current(); ++ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + struct thread_info *ti = current_thread_info(); + +- /* Is MSR_SPEC_CTRL implemented ? */ + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { +- /* +- * Restrict guest_spec_ctrl to supported values. Clear the +- * modifiable bits in the host base value and or the +- * modifiable bits from the guest value. +- */ +- guestval = hostval & ~x86_spec_ctrl_mask; +- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; +- + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +@@ -1622,16 +1603,6 @@ static enum ssb_mitigation __init __ssb_ + } + + /* +- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper +- * bit in the mask to allow guests to use the mitigation even in the +- * case where the host does not enable it. +- */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || +- static_cpu_has(X86_FEATURE_AMD_SSBD)) { +- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; +- } +- +- /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass diff --git a/queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch b/queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch new file mode 100644 index 00000000000..6304f785cf8 --- /dev/null +++ b/queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch @@ -0,0 +1,57 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Tue, 14 Jun 2022 23:16:08 +0200 +Subject: x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit + +From: Josh Poimboeuf + +commit bbb69e8bee1bd882784947095ffb2bfe0f7c9470 upstream. + +There's no need to recalculate the host value for every entry/exit. +Just use the cached value in spec_ctrl_current(). + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/bugs.c | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -199,7 +199,7 @@ void __init check_bugs(void) + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 msrval, guestval, hostval = x86_spec_ctrl_base; ++ u64 msrval, guestval, hostval = spec_ctrl_current(); + struct thread_info *ti = current_thread_info(); + + /* Is MSR_SPEC_CTRL implemented ? */ +@@ -212,15 +212,6 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + +- /* SSBD controlled in MSR_SPEC_CTRL */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || +- static_cpu_has(X86_FEATURE_AMD_SSBD)) +- hostval |= ssbd_tif_to_spec_ctrl(ti->flags); +- +- /* Conditional STIBP enabled? */ +- if (static_branch_unlikely(&switch_to_cond_stibp)) +- hostval |= stibp_tif_to_spec_ctrl(ti->flags); +- + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +@@ -1353,7 +1344,6 @@ static void __init spectre_v2_select_mit + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + + if (spectre_v2_in_ibrs_mode(mode)) { +- /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } diff --git a/queue-5.10/x86-static_call-serialize-__static_call_fixup-properly.patch b/queue-5.10/x86-static_call-serialize-__static_call_fixup-properly.patch new file mode 100644 index 00000000000..36183cbae29 --- /dev/null +++ b/queue-5.10/x86-static_call-serialize-__static_call_fixup-properly.patch @@ -0,0 +1,73 @@ +From c27c753ea6fd1237f4f96abf8b623d7bab505513 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 12 Jul 2022 14:01:06 +0200 +Subject: x86/static_call: Serialize __static_call_fixup() properly + +From: Thomas Gleixner + +commit c27c753ea6fd1237f4f96abf8b623d7bab505513 upstream. + +__static_call_fixup() invokes __static_call_transform() without holding +text_mutex, which causes lockdep to complain in text_poke_bp(). + +Adding the proper locking cures that, but as this is either used during +early boot or during module finalizing, it's not required to use +text_poke_bp(). Add an argument to __static_call_transform() which tells +it to use text_poke_early() for it. + +Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/static_call.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -20,7 +20,8 @@ static const u8 tramp_ud[] = { 0x0f, 0xb + + static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; + +-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) ++static void __ref __static_call_transform(void *insn, enum insn_type type, ++ void *func, bool modinit) + { + int size = CALL_INSN_SIZE; + const void *code; +@@ -49,7 +50,7 @@ static void __ref __static_call_transfor + if (memcmp(insn, code, size) == 0) + return; + +- if (unlikely(system_state == SYSTEM_BOOTING)) ++ if (system_state == SYSTEM_BOOTING || modinit) + return text_poke_early(insn, code, size); + + text_poke_bp(insn, code, size, NULL); +@@ -96,12 +97,12 @@ void arch_static_call_transform(void *si + + if (tramp) { + __static_call_validate(tramp, true); +- __static_call_transform(tramp, __sc_insn(!func, true), func); ++ __static_call_transform(tramp, __sc_insn(!func, true), func, false); + } + + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { + __static_call_validate(site, tail); +- __static_call_transform(site, __sc_insn(!func, tail), func); ++ __static_call_transform(site, __sc_insn(!func, tail), func, false); + } + + mutex_unlock(&text_mutex); +@@ -127,8 +128,10 @@ bool __static_call_fixup(void *tramp, u8 + return false; + } + ++ mutex_lock(&text_mutex); + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) +- __static_call_transform(tramp, RET, NULL); ++ __static_call_transform(tramp, RET, NULL, true); ++ mutex_unlock(&text_mutex); + + return true; + } diff --git a/queue-5.10/x86-static_call-use-alternative-ret-encoding.patch b/queue-5.10/x86-static_call-use-alternative-ret-encoding.patch new file mode 100644 index 00000000000..9eb51d231b7 --- /dev/null +++ b/queue-5.10/x86-static_call-use-alternative-ret-encoding.patch @@ -0,0 +1,184 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:39 +0200 +Subject: x86,static_call: Use alternative RET encoding + +From: Peter Zijlstra + +commit ee88d363d15617ff50ac24fab0ffec11113b2aeb upstream. + +In addition to teaching static_call about the new way to spell 'RET', +there is an added complication in that static_call() is allowed to +rewrite text before it is known which particular spelling is required. + +In order to deal with this; have a static_call specific fixup in the +apply_return() 'alternative' patching routine that will rewrite the +static_call trampoline to match the definite sequence. + +This in turn creates the problem of uniquely identifying static call +trampolines. Currently trampolines are 8 bytes, the first 5 being the +jmp.d32/ret sequence and the final 3 a byte sequence that spells out +'SCT'. + +This sequence is used in __static_call_validate() to ensure it is +patching a trampoline and not a random other jmp.d32. That is, +false-positives shouldn't be plenty, but aren't a big concern. + +OTOH the new __static_call_fixup() must not have false-positives, and +'SCT' decodes to the somewhat weird but semi plausible sequence: + + push %rbx + rex.XB push %r12 + +Additionally, there are SLS concerns with immediate jumps. Combined it +seems like a good moment to change the signature to a single 3 byte +trap instruction that is unique to this usage and will not ever get +generated by accident. + +As such, change the signature to: '0x0f, 0xb9, 0xcc', which decodes +to: + + ud1 %esp, %ecx + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: skip validation as introduced by 2105a92748e8 ("static_call,x86: Robustify trampoline patching")] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/static_call.h | 17 ++++++++++++++++ + arch/x86/kernel/alternative.c | 12 +++++++---- + arch/x86/kernel/static_call.c | 38 ++++++++++++++++++++++++++++++++++++- + 3 files changed, 62 insertions(+), 5 deletions(-) + +--- a/arch/x86/include/asm/static_call.h ++++ b/arch/x86/include/asm/static_call.h +@@ -21,6 +21,16 @@ + * relative displacement across sections. + */ + ++/* ++ * The trampoline is 8 bytes and of the general form: ++ * ++ * jmp.d32 \func ++ * ud1 %esp, %ecx ++ * ++ * That trailing #UD provides both a speculation stop and serves as a unique ++ * 3 byte signature identifying static call trampolines. Also see tramp_ud[] ++ * and __static_call_fixup(). ++ */ + #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ + asm(".pushsection .static_call.text, \"ax\" \n" \ + ".align 4 \n" \ +@@ -34,8 +44,13 @@ + #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + ++#ifdef CONFIG_RETPOLINE ++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ ++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") ++#else + #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") ++#endif + + + #define ARCH_ADD_TRAMP_KEY(name) \ +@@ -44,4 +59,6 @@ + ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ + ".popsection \n") + ++extern bool __static_call_fixup(void *tramp, u8 op, void *dest); ++ + #endif /* _ASM_STATIC_CALL_H */ +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -693,18 +693,22 @@ void __init_or_module noinline apply_ret + s32 *s; + + for (s = start; s < end; s++) { +- void *addr = (void *)s + *s; ++ void *dest = NULL, *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; +- u8 op1; ++ u8 op; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + +- op1 = insn.opcode.bytes[0]; +- if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) ++ op = insn.opcode.bytes[0]; ++ if (op == JMP32_INSN_OPCODE) ++ dest = addr + insn.length + insn.immediate.value; ++ ++ if (__static_call_fixup(addr, op, dest) || ++ WARN_ON_ONCE(dest != &__x86_return_thunk)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -11,6 +11,13 @@ enum insn_type { + RET = 3, /* tramp / site cond-tail-call */ + }; + ++/* ++ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such ++ * that there is no false-positive trampoline identification while also being a ++ * speculation stop. ++ */ ++static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; ++ + static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; + + static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) +@@ -32,7 +39,10 @@ static void __ref __static_call_transfor + break; + + case RET: +- code = &retinsn; ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); ++ else ++ code = &retinsn; + break; + } + +@@ -97,3 +107,29 @@ void arch_static_call_transform(void *si + mutex_unlock(&text_mutex); + } + EXPORT_SYMBOL_GPL(arch_static_call_transform); ++ ++#ifdef CONFIG_RETPOLINE ++/* ++ * This is called by apply_returns() to fix up static call trampolines, ++ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as ++ * having a return trampoline. ++ * ++ * The problem is that static_call() is available before determining ++ * X86_FEATURE_RETHUNK and, by implication, running alternatives. ++ * ++ * This means that __static_call_transform() above can have overwritten the ++ * return trampoline and we now need to fix things up to be consistent. ++ */ ++bool __static_call_fixup(void *tramp, u8 op, void *dest) ++{ ++ if (memcmp(tramp+5, tramp_ud, 3)) { ++ /* Not a trampoline site, not our problem. */ ++ return false; ++ } ++ ++ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) ++ __static_call_transform(tramp, RET, NULL); ++ ++ return true; ++} ++#endif diff --git a/queue-5.10/x86-undo-return-thunk-damage.patch b/queue-5.10/x86-undo-return-thunk-damage.patch new file mode 100644 index 00000000000..f2be9eafa84 --- /dev/null +++ b/queue-5.10/x86-undo-return-thunk-damage.patch @@ -0,0 +1,195 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:37 +0200 +Subject: x86: Undo return-thunk damage + +From: Peter Zijlstra + +commit 15e67227c49a57837108acfe1c80570e1bd9f962 upstream. + +Introduce X86_FEATURE_RETHUNK for those afflicted with needing this. + + [ bp: Do only INT3 padding - simpler. ] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: CONFIG_STACK_VALIDATION vs CONFIG_OBJTOOL] +[cascardo: no IBT support] +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/alternative.h | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 3 + + arch/x86/kernel/alternative.c | 60 +++++++++++++++++++++++++++++++ + arch/x86/kernel/module.c | 8 +++- + arch/x86/kernel/vmlinux.lds.S | 7 +++ + 6 files changed, 78 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -76,6 +76,7 @@ extern int alternatives_patched; + extern void alternative_instructions(void); + extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + extern void apply_retpolines(s32 *start, s32 *end); ++extern void apply_returns(s32 *start, s32 *end); + + struct module; + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -296,6 +296,7 @@ + /* FREE! (11*32+11) */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ ++#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -60,7 +60,8 @@ + # define DISABLE_RETPOLINE 0 + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ +- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ ++ (1 << (X86_FEATURE_RETHUNK & 31))) + #endif + + /* Force disable because it's broken beyond repair */ +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -270,6 +270,7 @@ static void __init_or_module add_nops(vo + } + + extern s32 __retpoline_sites[], __retpoline_sites_end[]; ++extern s32 __return_sites[], __return_sites_end[]; + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + extern s32 __smp_locks[], __smp_locks_end[]; + void text_poke_early(void *addr, const void *opcode, size_t len); +@@ -661,9 +662,67 @@ void __init_or_module noinline apply_ret + } + } + ++/* ++ * Rewrite the compiler generated return thunk tail-calls. ++ * ++ * For example, convert: ++ * ++ * JMP __x86_return_thunk ++ * ++ * into: ++ * ++ * RET ++ */ ++static int patch_return(void *addr, struct insn *insn, u8 *bytes) ++{ ++ int i = 0; ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ return -1; ++ ++ bytes[i++] = RET_INSN_OPCODE; ++ ++ for (; i < insn->length;) ++ bytes[i++] = INT3_INSN_OPCODE; ++ ++ return i; ++} ++ ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) ++{ ++ s32 *s; ++ ++ for (s = start; s < end; s++) { ++ void *addr = (void *)s + *s; ++ struct insn insn; ++ int len, ret; ++ u8 bytes[16]; ++ u8 op1; ++ ++ ret = insn_decode_kernel(&insn, addr); ++ if (WARN_ON_ONCE(ret < 0)) ++ continue; ++ ++ op1 = insn.opcode.bytes[0]; ++ if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) ++ continue; ++ ++ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", ++ addr, addr, insn.length, ++ addr + insn.length + insn.immediate.value); ++ ++ len = patch_return(addr, &insn, bytes); ++ if (len == insn.length) { ++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); ++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); ++ text_poke_early(addr, bytes, len); ++ } ++ } ++} + #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ + + void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } + + #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ + +@@ -956,6 +1015,7 @@ void __init alternative_instructions(voi + * those can rewrite the retpoline thunks. + */ + apply_retpolines(__retpoline_sites, __retpoline_sites_end); ++ apply_returns(__return_sites, __return_sites_end); + + apply_alternatives(__alt_instructions, __alt_instructions_end); + +--- a/arch/x86/kernel/module.c ++++ b/arch/x86/kernel/module.c +@@ -252,7 +252,7 @@ int module_finalize(const Elf_Ehdr *hdr, + { + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + *para = NULL, *orc = NULL, *orc_ip = NULL, +- *retpolines = NULL; ++ *retpolines = NULL, *returns = NULL; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { +@@ -270,12 +270,18 @@ int module_finalize(const Elf_Ehdr *hdr, + orc_ip = s; + if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) + retpolines = s; ++ if (!strcmp(".return_sites", secstrings + s->sh_name)) ++ returns = s; + } + + if (retpolines) { + void *rseg = (void *)retpolines->sh_addr; + apply_retpolines(rseg, rseg + retpolines->sh_size); + } ++ if (returns) { ++ void *rseg = (void *)returns->sh_addr; ++ apply_returns(rseg, rseg + returns->sh_size); ++ } + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -284,6 +284,13 @@ SECTIONS + *(.retpoline_sites) + __retpoline_sites_end = .; + } ++ ++ . = ALIGN(8); ++ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { ++ __return_sites = .; ++ *(.return_sites) ++ __return_sites_end = .; ++ } + #endif + + /* diff --git a/queue-5.10/x86-use-return-thunk-in-asm-code.patch b/queue-5.10/x86-use-return-thunk-in-asm-code.patch new file mode 100644 index 00000000000..b1dc6fcee96 --- /dev/null +++ b/queue-5.10/x86-use-return-thunk-in-asm-code.patch @@ -0,0 +1,95 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:45 +0200 +Subject: x86: Use return-thunk in asm code + +From: Peter Zijlstra + +commit aa3d480315ba6c3025a60958e1981072ea37c3df upstream. + +Use the return thunk in asm code. If the thunk isn't needed, it will +get patched into a RET instruction during boot by apply_returns(). + +Since alternatives can't handle relocations outside of the first +instruction, putting a 'jmp __x86_return_thunk' in one is not valid, +therefore carve out the memmove ERMS path into a separate label and jump +to it. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +[cascardo: no RANDSTRUCT_CFLAGS] +Signed-off-by: Thadeu Lima de Souza Cascardo +[bwh: Backported to 5.10: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/vdso/Makefile | 1 + + arch/x86/include/asm/linkage.h | 8 ++++++++ + arch/x86/lib/memmove_64.S | 7 ++++++- + 3 files changed, 15 insertions(+), 1 deletion(-) + +--- a/arch/x86/entry/vdso/Makefile ++++ b/arch/x86/entry/vdso/Makefile +@@ -91,6 +91,7 @@ endif + endif + + $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) ++$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO + + # + # vDSO code runs in userspace and -pg doesn't help with profiling anyway. +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -18,19 +18,27 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + ++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#define RET jmp __x86_return_thunk ++#else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS + #define RET ret; int3 + #else + #define RET ret + #endif ++#endif /* CONFIG_RETPOLINE */ + + #else /* __ASSEMBLY__ */ + ++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#define ASM_RET "jmp __x86_return_thunk\n\t" ++#else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS + #define ASM_RET "ret; int3\n\t" + #else + #define ASM_RET "ret\n\t" + #endif ++#endif /* CONFIG_RETPOLINE */ + + #endif /* __ASSEMBLY__ */ + +--- a/arch/x86/lib/memmove_64.S ++++ b/arch/x86/lib/memmove_64.S +@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove) + /* FSRM implies ERMS => no length checks, do the copy directly */ + .Lmemmove_begin_forward: + ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM +- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS ++ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS + + /* + * movsq instruction have many startup latency +@@ -206,6 +206,11 @@ SYM_FUNC_START(__memmove) + movb %r11b, (%rdi) + 13: + RET ++ ++.Lmemmove_erms: ++ movq %rdx, %rcx ++ rep movsb ++ RET + SYM_FUNC_END(__memmove) + SYM_FUNC_END_ALIAS(memmove) + EXPORT_SYMBOL(__memmove) diff --git a/queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch b/queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch new file mode 100644 index 00000000000..3328db0de54 --- /dev/null +++ b/queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:15:43 +0200 +Subject: x86/vsyscall_emu/64: Don't use RET in vsyscall emulation + +From: Peter Zijlstra + +commit 15583e514eb16744b80be85dea0774ece153177d upstream. + +This is userspace code and doesn't play by the normal kernel rules. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/vsyscall/vsyscall_emu_64.S | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S ++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S +@@ -19,17 +19,20 @@ __vsyscall_page: + + mov $__NR_gettimeofday, %rax + syscall +- RET ++ ret ++ int3 + + .balign 1024, 0xcc + mov $__NR_time, %rax + syscall +- RET ++ ret ++ int3 + + .balign 1024, 0xcc + mov $__NR_getcpu, %rax + syscall +- RET ++ ret ++ int3 + + .balign 4096, 0xcc + diff --git a/queue-5.10/x86-xen-rename-sys-entry-points.patch b/queue-5.10/x86-xen-rename-sys-entry-points.patch new file mode 100644 index 00000000000..b140c9a5bbe --- /dev/null +++ b/queue-5.10/x86-xen-rename-sys-entry-points.patch @@ -0,0 +1,134 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Peter Zijlstra +Date: Tue, 14 Jun 2022 23:16:00 +0200 +Subject: x86/xen: Rename SYS* entry points + +From: Peter Zijlstra + +commit b75b7f8ef1148be1b9321ffc2f6c19238904b438 upstream. + +Native SYS{CALL,ENTER} entry points are called +entry_SYS{CALL,ENTER}_{64,compat}, make sure the Xen versions are +named consistently. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Borislav Petkov +Reviewed-by: Josh Poimboeuf +Signed-off-by: Borislav Petkov +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/xen/setup.c | 6 +++--- + arch/x86/xen/xen-asm.S | 20 ++++++++++---------- + arch/x86/xen/xen-ops.h | 6 +++--- + 3 files changed, 16 insertions(+), 16 deletions(-) + +--- a/arch/x86/xen/setup.c ++++ b/arch/x86/xen/setup.c +@@ -922,7 +922,7 @@ void xen_enable_sysenter(void) + if (!boot_cpu_has(sysenter_feature)) + return; + +- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); ++ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); + if(ret != 0) + setup_clear_cpu_cap(sysenter_feature); + } +@@ -931,7 +931,7 @@ void xen_enable_syscall(void) + { + int ret; + +- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); ++ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); + if (ret != 0) { + printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); + /* Pretty fatal; 64-bit userspace has no other +@@ -940,7 +940,7 @@ void xen_enable_syscall(void) + + if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { + ret = register_callback(CALLBACKTYPE_syscall32, +- xen_syscall32_target); ++ xen_entry_SYSCALL_compat); + if (ret != 0) + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); + } +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -276,7 +276,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu + */ + + /* Normal 64-bit system call target */ +-SYM_CODE_START(xen_syscall_target) ++SYM_CODE_START(xen_entry_SYSCALL_64) + UNWIND_HINT_EMPTY + popq %rcx + popq %r11 +@@ -290,12 +290,12 @@ SYM_CODE_START(xen_syscall_target) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe +-SYM_CODE_END(xen_syscall_target) ++SYM_CODE_END(xen_entry_SYSCALL_64) + + #ifdef CONFIG_IA32_EMULATION + + /* 32-bit compat syscall target */ +-SYM_CODE_START(xen_syscall32_target) ++SYM_CODE_START(xen_entry_SYSCALL_compat) + UNWIND_HINT_EMPTY + popq %rcx + popq %r11 +@@ -309,10 +309,10 @@ SYM_CODE_START(xen_syscall32_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe +-SYM_CODE_END(xen_syscall32_target) ++SYM_CODE_END(xen_entry_SYSCALL_compat) + + /* 32-bit compat sysenter target */ +-SYM_CODE_START(xen_sysenter_target) ++SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_EMPTY + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means +@@ -330,18 +330,18 @@ SYM_CODE_START(xen_sysenter_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe +-SYM_CODE_END(xen_sysenter_target) ++SYM_CODE_END(xen_entry_SYSENTER_compat) + + #else /* !CONFIG_IA32_EMULATION */ + +-SYM_CODE_START(xen_syscall32_target) +-SYM_CODE_START(xen_sysenter_target) ++SYM_CODE_START(xen_entry_SYSCALL_compat) ++SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_EMPTY + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 + jmp hypercall_iret +-SYM_CODE_END(xen_sysenter_target) +-SYM_CODE_END(xen_syscall32_target) ++SYM_CODE_END(xen_entry_SYSENTER_compat) ++SYM_CODE_END(xen_entry_SYSCALL_compat) + + #endif /* CONFIG_IA32_EMULATION */ +--- a/arch/x86/xen/xen-ops.h ++++ b/arch/x86/xen/xen-ops.h +@@ -10,10 +10,10 @@ + /* These are code, but not functions. Defined in entry.S */ + extern const char xen_failsafe_callback[]; + +-void xen_sysenter_target(void); ++void xen_entry_SYSENTER_compat(void); + #ifdef CONFIG_X86_64 +-void xen_syscall_target(void); +-void xen_syscall32_target(void); ++void xen_entry_SYSCALL_64(void); ++void xen_entry_SYSCALL_compat(void); + #endif + + extern void *xen_initial_gdt; diff --git a/queue-5.10/x86-xen-support-objtool-validation-in-xen-asm.s.patch b/queue-5.10/x86-xen-support-objtool-validation-in-xen-asm.s.patch new file mode 100644 index 00000000000..19c84cd28f8 --- /dev/null +++ b/queue-5.10/x86-xen-support-objtool-validation-in-xen-asm.s.patch @@ -0,0 +1,138 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Thu, 21 Jan 2021 15:29:28 -0600 +Subject: x86/xen: Support objtool validation in xen-asm.S + +From: Josh Poimboeuf + +commit cde07a4e4434ddfb9b1616ac971edf6d66329804 upstream. + +The OBJECT_FILES_NON_STANDARD annotation is used to tell objtool to +ignore a file. File-level ignores won't work when validating vmlinux.o. + +Tweak the ELF metadata and unwind hints to allow objtool to follow the +code. + +Cc: Juergen Gross +Reviewed-by: Boris Ostrovsky +Signed-off-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/8b042a09c69e8645f3b133ef6653ba28f896807d.1611263462.git.jpoimboe@redhat.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/xen/Makefile | 1 - + arch/x86/xen/xen-asm.S | 29 +++++++++++++++++++---------- + 2 files changed, 19 insertions(+), 11 deletions(-) + +--- a/arch/x86/xen/Makefile ++++ b/arch/x86/xen/Makefile +@@ -1,5 +1,4 @@ + # SPDX-License-Identifier: GPL-2.0 +-OBJECT_FILES_NON_STANDARD_xen-asm.o := y + + ifdef CONFIG_FUNCTION_TRACER + # Do not profile debug and lowlevel utilities +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + #include + +@@ -147,6 +148,7 @@ SYM_FUNC_END(xen_read_cr2_direct); + + .macro xen_pv_trap name + SYM_CODE_START(xen_\name) ++ UNWIND_HINT_EMPTY + pop %rcx + pop %r11 + jmp \name +@@ -186,6 +188,7 @@ xen_pv_trap asm_exc_xen_hypervisor_callb + SYM_CODE_START(xen_early_idt_handler_array) + i = 0 + .rept NUM_EXCEPTION_VECTORS ++ UNWIND_HINT_EMPTY + pop %rcx + pop %r11 + jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE +@@ -212,11 +215,13 @@ hypercall_iret = hypercall_page + __HYPE + * rsp->rax } + */ + SYM_CODE_START(xen_iret) ++ UNWIND_HINT_EMPTY + pushq $0 + jmp hypercall_iret + SYM_CODE_END(xen_iret) + + SYM_CODE_START(xen_sysret64) ++ UNWIND_HINT_EMPTY + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back. +@@ -271,7 +276,8 @@ SYM_CODE_END(xenpv_restore_regs_and_retu + */ + + /* Normal 64-bit system call target */ +-SYM_FUNC_START(xen_syscall_target) ++SYM_CODE_START(xen_syscall_target) ++ UNWIND_HINT_EMPTY + popq %rcx + popq %r11 + +@@ -284,12 +290,13 @@ SYM_FUNC_START(xen_syscall_target) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe +-SYM_FUNC_END(xen_syscall_target) ++SYM_CODE_END(xen_syscall_target) + + #ifdef CONFIG_IA32_EMULATION + + /* 32-bit compat syscall target */ +-SYM_FUNC_START(xen_syscall32_target) ++SYM_CODE_START(xen_syscall32_target) ++ UNWIND_HINT_EMPTY + popq %rcx + popq %r11 + +@@ -302,10 +309,11 @@ SYM_FUNC_START(xen_syscall32_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe +-SYM_FUNC_END(xen_syscall32_target) ++SYM_CODE_END(xen_syscall32_target) + + /* 32-bit compat sysenter target */ +-SYM_FUNC_START(xen_sysenter_target) ++SYM_CODE_START(xen_sysenter_target) ++ UNWIND_HINT_EMPTY + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means + * that we don't need to guard against single step exceptions here. +@@ -322,17 +330,18 @@ SYM_FUNC_START(xen_sysenter_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe +-SYM_FUNC_END(xen_sysenter_target) ++SYM_CODE_END(xen_sysenter_target) + + #else /* !CONFIG_IA32_EMULATION */ + +-SYM_FUNC_START_ALIAS(xen_syscall32_target) +-SYM_FUNC_START(xen_sysenter_target) ++SYM_CODE_START(xen_syscall32_target) ++SYM_CODE_START(xen_sysenter_target) ++ UNWIND_HINT_EMPTY + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 + jmp hypercall_iret +-SYM_FUNC_END(xen_sysenter_target) +-SYM_FUNC_END_ALIAS(xen_syscall32_target) ++SYM_CODE_END(xen_sysenter_target) ++SYM_CODE_END(xen_syscall32_target) + + #endif /* CONFIG_IA32_EMULATION */ diff --git a/queue-5.10/x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch b/queue-5.10/x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch new file mode 100644 index 00000000000..a6d85db6b8d --- /dev/null +++ b/queue-5.10/x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Jul 12 05:07:35 PM CEST 2022 +From: Josh Poimboeuf +Date: Thu, 21 Jan 2021 15:29:29 -0600 +Subject: x86/xen: Support objtool vmlinux.o validation in xen-head.S + +From: Josh Poimboeuf + +commit f4b4bc10b0b85ec66f1a9bf5dddf475e6695b6d2 upstream. + +The Xen hypercall page is filled with zeros, causing objtool to fall +through all the empty hypercall functions until it reaches a real +function, resulting in a stack state mismatch. + +The build-time contents of the hypercall page don't matter because the +page gets rewritten by the hypervisor. Make it more palatable to +objtool by making each hypervisor function a true empty function, with +nops and a return. + +Cc: Juergen Gross +Reviewed-by: Boris Ostrovsky +Signed-off-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/0883bde1d7a1fb3b6a4c952bc0200e873752f609.1611263462.git.jpoimboe@redhat.com +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/xen/xen-head.S | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/x86/xen/xen-head.S ++++ b/arch/x86/xen/xen-head.S +@@ -68,8 +68,9 @@ SYM_CODE_END(asm_cpu_bringup_and_idle) + .balign PAGE_SIZE + SYM_CODE_START(hypercall_page) + .rept (PAGE_SIZE / 32) +- UNWIND_HINT_EMPTY +- .skip 32 ++ UNWIND_HINT_FUNC ++ .skip 31, 0x90 ++ ret + .endr + + #define HYPERCALL(n) \ -- 2.47.3