--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:48 +0200
+Subject: bpf,x86: Respect X86_FEATURE_RETPOLINE*
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 87c87ecd00c54ecd677798cb49ef27329e0fab41 upstream.
+
+Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and
+unconditionally emits a thunk call, this is sub-optimal and doesn't
+match the regular, compiler generated, code.
+
+Update the i386 JIT to emit code equal to what the compiler emits for
+the regular kernel text (IOW. a plain THUNK call).
+
+Update the x86_64 JIT to emit code similar to the result of compiler
+and kernel rewrites as according to X86_FEATURE_RETPOLINE* flags.
+Inlining RETPOLINE_AMD (lfence; jmp *%reg) and !RETPOLINE (jmp *%reg),
+while doing a THUNK call for RETPOLINE.
+
+This removes the hard-coded retpoline thunks and shrinks the generated
+code. Leaving a single retpoline thunk definition in the kernel.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.614772675@infradead.org
+[cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: add the necessary cnt variable to
+ emit_indirect_jump()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 59 -----------------------------------
+ arch/x86/net/bpf_jit_comp.c | 49 +++++++++++++----------------
+ arch/x86/net/bpf_jit_comp32.c | 22 +++++++++++--
+ 3 files changed, 42 insertions(+), 88 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -318,63 +318,4 @@ static inline void mds_idle_clear_cpu_bu
+
+ #endif /* __ASSEMBLY__ */
+
+-/*
+- * Below is used in the eBPF JIT compiler and emits the byte sequence
+- * for the following assembly:
+- *
+- * With retpolines configured:
+- *
+- * callq do_rop
+- * spec_trap:
+- * pause
+- * lfence
+- * jmp spec_trap
+- * do_rop:
+- * mov %rcx,(%rsp) for x86_64
+- * mov %edx,(%esp) for x86_32
+- * retq
+- *
+- * Without retpolines configured:
+- *
+- * jmp *%rcx for x86_64
+- * jmp *%edx for x86_32
+- */
+-#ifdef CONFIG_RETPOLINE
+-# ifdef CONFIG_X86_64
+-# define RETPOLINE_RCX_BPF_JIT_SIZE 17
+-# define RETPOLINE_RCX_BPF_JIT() \
+-do { \
+- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
+- /* spec_trap: */ \
+- EMIT2(0xF3, 0x90); /* pause */ \
+- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+- /* do_rop: */ \
+- EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
+- EMIT1(0xC3); /* retq */ \
+-} while (0)
+-# else /* !CONFIG_X86_64 */
+-# define RETPOLINE_EDX_BPF_JIT() \
+-do { \
+- EMIT1_off32(0xE8, 7); /* call do_rop */ \
+- /* spec_trap: */ \
+- EMIT2(0xF3, 0x90); /* pause */ \
+- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+- /* do_rop: */ \
+- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
+- EMIT1(0xC3); /* ret */ \
+-} while (0)
+-# endif
+-#else /* !CONFIG_RETPOLINE */
+-# ifdef CONFIG_X86_64
+-# define RETPOLINE_RCX_BPF_JIT_SIZE 2
+-# define RETPOLINE_RCX_BPF_JIT() \
+- EMIT2(0xFF, 0xE1); /* jmp *%rcx */
+-# else /* !CONFIG_X86_64 */
+-# define RETPOLINE_EDX_BPF_JIT() \
+- EMIT2(0xFF, 0xE2) /* jmp *%edx */
+-# endif
+-#endif
+-
+ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -379,6 +379,26 @@ int bpf_arch_text_poke(void *ip, enum bp
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+ }
+
++#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
++
++static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
++{
++ u8 *prog = *pprog;
++ int cnt = 0;
++
++#ifdef CONFIG_RETPOLINE
++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
++ EMIT_LFENCE();
++ EMIT2(0xFF, 0xE0 + reg);
++ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
++ emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
++ } else
++#endif
++ EMIT2(0xFF, 0xE0 + reg);
++
++ *pprog = prog;
++}
++
+ /*
+ * Generate the following code:
+ *
+@@ -460,7 +480,7 @@ static void emit_bpf_tail_call_indirect(
+ * rdi == ctx (1st arg)
+ * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
+ */
+- RETPOLINE_RCX_BPF_JIT();
++ emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
+
+ /* out: */
+ ctx->tail_call_indirect_label = prog - start;
+@@ -1099,8 +1119,7 @@ static int do_jit(struct bpf_prog *bpf_p
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ if (boot_cpu_has(X86_FEATURE_XMM2))
+- /* Emit 'lfence' */
+- EMIT3(0x0F, 0xAE, 0xE8);
++ EMIT_LFENCE();
+ break;
+
+ /* ST: *(u8*)(dst_reg + off) = imm */
+@@ -1878,26 +1897,6 @@ cleanup:
+ return ret;
+ }
+
+-static int emit_fallback_jump(u8 **pprog)
+-{
+- u8 *prog = *pprog;
+- int err = 0;
+-
+-#ifdef CONFIG_RETPOLINE
+- /* Note that this assumes the the compiler uses external
+- * thunks for indirect calls. Both clang and GCC use the same
+- * naming convention for external thunks.
+- */
+- err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
+-#else
+- int cnt = 0;
+-
+- EMIT2(0xFF, 0xE2); /* jmp rdx */
+-#endif
+- *pprog = prog;
+- return err;
+-}
+-
+ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
+ {
+ u8 *jg_reloc, *prog = *pprog;
+@@ -1919,9 +1918,7 @@ static int emit_bpf_dispatcher(u8 **ppro
+ if (err)
+ return err;
+
+- err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
+- if (err)
+- return err;
++ emit_indirect_jump(&prog, 2 /* rdx */, prog);
+
+ *pprog = prog;
+ return 0;
+--- a/arch/x86/net/bpf_jit_comp32.c
++++ b/arch/x86/net/bpf_jit_comp32.c
+@@ -15,6 +15,7 @@
+ #include <asm/cacheflush.h>
+ #include <asm/set_memory.h>
+ #include <asm/nospec-branch.h>
++#include <asm/asm-prototypes.h>
+ #include <linux/bpf.h>
+
+ /*
+@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u3
+ *pprog = prog;
+ }
+
++static int emit_jmp_edx(u8 **pprog, u8 *ip)
++{
++ u8 *prog = *pprog;
++ int cnt = 0;
++
++#ifdef CONFIG_RETPOLINE
++ EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
++#else
++ EMIT2(0xFF, 0xE2);
++#endif
++ *pprog = prog;
++
++ return cnt;
++}
++
+ /*
+ * Generate the following code:
+ * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
+@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u3
+ * goto *(prog->bpf_func + prologue_size);
+ * out:
+ */
+-static void emit_bpf_tail_call(u8 **pprog)
++static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
+ {
+ u8 *prog = *pprog;
+ int cnt = 0;
+@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **ppro
+ * eax == ctx (1st arg)
+ * edx == prog->bpf_func + prologue_size
+ */
+- RETPOLINE_EDX_BPF_JIT();
++ cnt += emit_jmp_edx(&prog, ip + cnt);
+
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+@@ -1929,7 +1945,7 @@ static int do_jit(struct bpf_prog *bpf_p
+ break;
+ }
+ case BPF_JMP | BPF_TAIL_CALL:
+- emit_bpf_tail_call(&prog);
++ emit_bpf_tail_call(&prog, image + addrs[i - 1]);
+ break;
+
+ /* cond jump */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:47 +0200
+Subject: bpf,x86: Simplify computing label offsets
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit dceba0817ca329868a15e2e1dd46eb6340b69206 upstream.
+
+Take an idea from the 32bit JIT, which uses the multi-pass nature of
+the JIT to compute the instruction offsets on a prior pass in order to
+compute the relative jump offsets on a later pass.
+
+Application to the x86_64 JIT is slightly more involved because the
+offsets depend on program variables (such as callee_regs_used and
+stack_depth) and hence the computed offsets need to be kept in the
+context of the JIT.
+
+This removes, IMO quite fragile, code that hard-codes the offsets and
+tries to compute the length of variable parts of it.
+
+Convert both emit_bpf_tail_call_*() functions which have an out: label
+at the end. Additionally emit_bpt_tail_call_direct() also has a poke
+table entry, for which it computes the offset from the end (and thus
+already relies on the previous pass to have computed addrs[i]), also
+convert this to be a forward based offset.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.552304864@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: keep the cnt variable in
+ emit_bpf_tail_call_{,in}direct()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c | 125 ++++++++++++++------------------------------
+ 1 file changed, 42 insertions(+), 83 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -212,6 +212,14 @@ static void jit_fill_hole(void *area, un
+
+ struct jit_context {
+ int cleanup_addr; /* Epilogue code offset */
++
++ /*
++ * Program specific offsets of labels in the code; these rely on the
++ * JIT doing at least 2 passes, recording the position on the first
++ * pass, only to generate the correct offset on the second pass.
++ */
++ int tail_call_direct_label;
++ int tail_call_indirect_label;
+ };
+
+ /* Maximum number of bytes emitted while JITing one eBPF insn */
+@@ -371,22 +379,6 @@ int bpf_arch_text_poke(void *ip, enum bp
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+ }
+
+-static int get_pop_bytes(bool *callee_regs_used)
+-{
+- int bytes = 0;
+-
+- if (callee_regs_used[3])
+- bytes += 2;
+- if (callee_regs_used[2])
+- bytes += 2;
+- if (callee_regs_used[1])
+- bytes += 2;
+- if (callee_regs_used[0])
+- bytes += 1;
+-
+- return bytes;
+-}
+-
+ /*
+ * Generate the following code:
+ *
+@@ -402,30 +394,12 @@ static int get_pop_bytes(bool *callee_re
+ * out:
+ */
+ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+- u32 stack_depth)
++ u32 stack_depth, u8 *ip,
++ struct jit_context *ctx)
+ {
+ int tcc_off = -4 - round_up(stack_depth, 8);
+- u8 *prog = *pprog;
+- int pop_bytes = 0;
+- int off1 = 42;
+- int off2 = 31;
+- int off3 = 9;
+- int cnt = 0;
+-
+- /* count the additional bytes used for popping callee regs from stack
+- * that need to be taken into account for each of the offsets that
+- * are used for bailing out of the tail call
+- */
+- pop_bytes = get_pop_bytes(callee_regs_used);
+- off1 += pop_bytes;
+- off2 += pop_bytes;
+- off3 += pop_bytes;
+-
+- if (stack_depth) {
+- off1 += 7;
+- off2 += 7;
+- off3 += 7;
+- }
++ u8 *prog = *pprog, *start = *pprog;
++ int cnt = 0, offset;
+
+ /*
+ * rdi - pointer to ctx
+@@ -440,8 +414,9 @@ static void emit_bpf_tail_call_indirect(
+ EMIT2(0x89, 0xD2); /* mov edx, edx */
+ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
+ offsetof(struct bpf_array, map.max_entries));
+-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
+- EMIT2(X86_JBE, OFFSET1); /* jbe out */
++
++ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++ EMIT2(X86_JBE, offset); /* jbe out */
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+@@ -449,8 +424,9 @@ static void emit_bpf_tail_call_indirect(
+ */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
+- EMIT2(X86_JA, OFFSET2); /* ja out */
++
++ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++ EMIT2(X86_JA, offset); /* ja out */
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
+
+@@ -463,12 +439,11 @@ static void emit_bpf_tail_call_indirect(
+ * goto out;
+ */
+ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
+-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
+- EMIT2(X86_JE, OFFSET3); /* je out */
+
+- *pprog = prog;
+- pop_callee_regs(pprog, callee_regs_used);
+- prog = *pprog;
++ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++ EMIT2(X86_JE, offset); /* je out */
++
++ pop_callee_regs(&prog, callee_regs_used);
+
+ EMIT1(0x58); /* pop rax */
+ if (stack_depth)
+@@ -488,39 +463,18 @@ static void emit_bpf_tail_call_indirect(
+ RETPOLINE_RCX_BPF_JIT();
+
+ /* out: */
++ ctx->tail_call_indirect_label = prog - start;
+ *pprog = prog;
+ }
+
+ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+- u8 **pprog, int addr, u8 *image,
+- bool *callee_regs_used, u32 stack_depth)
++ u8 **pprog, u8 *ip,
++ bool *callee_regs_used, u32 stack_depth,
++ struct jit_context *ctx)
+ {
+ int tcc_off = -4 - round_up(stack_depth, 8);
+- u8 *prog = *pprog;
+- int pop_bytes = 0;
+- int off1 = 20;
+- int poke_off;
+- int cnt = 0;
+-
+- /* count the additional bytes used for popping callee regs to stack
+- * that need to be taken into account for jump offset that is used for
+- * bailing out from of the tail call when limit is reached
+- */
+- pop_bytes = get_pop_bytes(callee_regs_used);
+- off1 += pop_bytes;
+-
+- /*
+- * total bytes for:
+- * - nop5/ jmpq $off
+- * - pop callee regs
+- * - sub rsp, $val if depth > 0
+- * - pop rax
+- */
+- poke_off = X86_PATCH_SIZE + pop_bytes + 1;
+- if (stack_depth) {
+- poke_off += 7;
+- off1 += 7;
+- }
++ u8 *prog = *pprog, *start = *pprog;
++ int cnt = 0, offset;
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+@@ -528,28 +482,30 @@ static void emit_bpf_tail_call_direct(st
+ */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+- EMIT2(X86_JA, off1); /* ja out */
++
++ offset = ctx->tail_call_direct_label - (prog + 2 - start);
++ EMIT2(X86_JA, offset); /* ja out */
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
+
+- poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
++ poke->tailcall_bypass = ip + (prog - start);
+ poke->adj_off = X86_TAIL_CALL_OFFSET;
+- poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
++ poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
+ poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
+
+ emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
+ poke->tailcall_bypass);
+
+- *pprog = prog;
+- pop_callee_regs(pprog, callee_regs_used);
+- prog = *pprog;
++ pop_callee_regs(&prog, callee_regs_used);
+ EMIT1(0x58); /* pop rax */
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
+
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+ prog += X86_PATCH_SIZE;
++
+ /* out: */
++ ctx->tail_call_direct_label = prog - start;
+
+ *pprog = prog;
+ }
+@@ -1274,13 +1230,16 @@ xadd: if (is_imm8(insn->off))
+ case BPF_JMP | BPF_TAIL_CALL:
+ if (imm32)
+ emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+- &prog, addrs[i], image,
++ &prog, image + addrs[i - 1],
+ callee_regs_used,
+- bpf_prog->aux->stack_depth);
++ bpf_prog->aux->stack_depth,
++ ctx);
+ else
+ emit_bpf_tail_call_indirect(&prog,
+ callee_regs_used,
+- bpf_prog->aux->stack_depth);
++ bpf_prog->aux->stack_depth,
++ image + addrs[i - 1],
++ ctx);
+ break;
+
+ /* cond jump */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 24 Mar 2022 00:05:55 +0100
+Subject: crypto: x86/poly1305 - Fixup SLS
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7ed7aa4de9421229be6d331ed52d5cd09c99f409 upstream.
+
+Due to being a perl generated asm file, it got missed by the mass
+convertion script.
+
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_init_x86_64()+0x3a: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_x86_64()+0xf2: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_emit_x86_64()+0x37: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: __poly1305_block()+0x6d: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: __poly1305_init_avx()+0x1e8: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx()+0x18a: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx()+0xaf8: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_emit_avx()+0x99: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx2()+0x18a: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx2()+0x776: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x18a: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x796: missing int3 after ret
+arch/x86/crypto/poly1305-x86_64-cryptogams.o: warning: objtool: poly1305_blocks_avx512()+0x10bd: missing int3 after ret
+
+Fixes: f94909ceb1ed ("x86: Prepare asm files for straight-line-speculation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 38 +++++++++++++-------------
+ 1 file changed, 19 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
++++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+@@ -297,7 +297,7 @@ ___
+ $code.=<<___;
+ mov \$1,%eax
+ .Lno_key:
+- ret
++ RET
+ ___
+ &end_function("poly1305_init_x86_64");
+
+@@ -373,7 +373,7 @@ $code.=<<___;
+ .cfi_adjust_cfa_offset -48
+ .Lno_data:
+ .Lblocks_epilogue:
+- ret
++ RET
+ .cfi_endproc
+ ___
+ &end_function("poly1305_blocks_x86_64");
+@@ -399,7 +399,7 @@ $code.=<<___;
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+- ret
++ RET
+ ___
+ &end_function("poly1305_emit_x86_64");
+ if ($avx) {
+@@ -429,7 +429,7 @@ ___
+ &poly1305_iteration();
+ $code.=<<___;
+ pop $ctx
+- ret
++ RET
+ .size __poly1305_block,.-__poly1305_block
+
+ .type __poly1305_init_avx,\@abi-omnipotent
+@@ -594,7 +594,7 @@ __poly1305_init_avx:
+
+ lea -48-64($ctx),$ctx # size [de-]optimization
+ pop %rbp
+- ret
++ RET
+ .size __poly1305_init_avx,.-__poly1305_init_avx
+ ___
+
+@@ -747,7 +747,7 @@ $code.=<<___;
+ .cfi_restore %rbp
+ .Lno_data_avx:
+ .Lblocks_avx_epilogue:
+- ret
++ RET
+ .cfi_endproc
+
+ .align 32
+@@ -1452,7 +1452,7 @@ $code.=<<___ if (!$win64);
+ ___
+ $code.=<<___;
+ vzeroupper
+- ret
++ RET
+ .cfi_endproc
+ ___
+ &end_function("poly1305_blocks_avx");
+@@ -1508,7 +1508,7 @@ $code.=<<___;
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+- ret
++ RET
+ ___
+ &end_function("poly1305_emit_avx");
+
+@@ -1675,7 +1675,7 @@ $code.=<<___;
+ .cfi_restore %rbp
+ .Lno_data_avx2$suffix:
+ .Lblocks_avx2_epilogue$suffix:
+- ret
++ RET
+ .cfi_endproc
+
+ .align 32
+@@ -2201,7 +2201,7 @@ $code.=<<___ if (!$win64);
+ ___
+ $code.=<<___;
+ vzeroupper
+- ret
++ RET
+ .cfi_endproc
+ ___
+ if($avx > 2 && $avx512) {
+@@ -2792,7 +2792,7 @@ $code.=<<___ if (!$win64);
+ .cfi_def_cfa_register %rsp
+ ___
+ $code.=<<___;
+- ret
++ RET
+ .cfi_endproc
+ ___
+
+@@ -2893,7 +2893,7 @@ $code.=<<___ if ($flavour =~ /elf32/);
+ ___
+ $code.=<<___;
+ mov \$1,%eax
+- ret
++ RET
+ .size poly1305_init_base2_44,.-poly1305_init_base2_44
+ ___
+ {
+@@ -3010,7 +3010,7 @@ poly1305_blocks_vpmadd52:
+ jnz .Lblocks_vpmadd52_4x
+
+ .Lno_data_vpmadd52:
+- ret
++ RET
+ .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
+ ___
+ }
+@@ -3451,7 +3451,7 @@ poly1305_blocks_vpmadd52_4x:
+ vzeroall
+
+ .Lno_data_vpmadd52_4x:
+- ret
++ RET
+ .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
+ ___
+ }
+@@ -3824,7 +3824,7 @@ $code.=<<___;
+ vzeroall
+
+ .Lno_data_vpmadd52_8x:
+- ret
++ RET
+ .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
+ ___
+ }
+@@ -3861,7 +3861,7 @@ poly1305_emit_base2_44:
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+- ret
++ RET
+ .size poly1305_emit_base2_44,.-poly1305_emit_base2_44
+ ___
+ } } }
+@@ -3916,7 +3916,7 @@ xor128_encrypt_n_pad:
+
+ .Ldone_enc:
+ mov $otp,%rax
+- ret
++ RET
+ .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
+
+ .globl xor128_decrypt_n_pad
+@@ -3967,7 +3967,7 @@ xor128_decrypt_n_pad:
+
+ .Ldone_dec:
+ mov $otp,%rax
+- ret
++ RET
+ .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
+ ___
+ }
+@@ -4109,7 +4109,7 @@ avx_handler:
+ pop %rbx
+ pop %rdi
+ pop %rsi
+- ret
++ RET
+ .size avx_handler,.-avx_handler
+
+ .section .pdata
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:58 +0200
+Subject: intel_idle: Disable IBRS during long idle
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit bf5835bcdb9635c97f85120dba9bfa21e111130f upstream.
+
+Having IBRS enabled while the SMT sibling is idle unnecessarily slows
+down the running sibling. OTOH, disabling IBRS around idle takes two
+MSR writes, which will increase the idle latency.
+
+Therefore, only disable IBRS around deeper idle states. Shallow idle
+states are bounded by the tick in duration, since NOHZ is not allowed
+for them by virtue of their short target residency.
+
+Only do this for mwait-driven idle, since that keeps interrupts disabled
+across idle, which makes disabling IBRS vs IRQ-entry a non-issue.
+
+Note: C6 is a random threshold, most importantly C1 probably shouldn't
+disable IBRS, benchmarking needed.
+
+Suggested-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: no CPUIDLE_FLAG_IRQ_ENABLE]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 1
+ arch/x86/kernel/cpu/bugs.c | 6 ++++
+ drivers/idle/intel_idle.c | 43 ++++++++++++++++++++++++++++++-----
+ 3 files changed, 44 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -256,6 +256,7 @@ static inline void indirect_branch_predi
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
+ extern void write_spec_ctrl_current(u64 val, bool force);
++extern u64 spec_ctrl_current(void);
+
+ /*
+ * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -78,6 +78,12 @@ void write_spec_ctrl_current(u64 val, bo
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ }
+
++u64 spec_ctrl_current(void)
++{
++ return this_cpu_read(x86_spec_ctrl_current);
++}
++EXPORT_SYMBOL_GPL(spec_ctrl_current);
++
+ /*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -47,11 +47,13 @@
+ #include <linux/tick.h>
+ #include <trace/events/power.h>
+ #include <linux/sched.h>
++#include <linux/sched/smt.h>
+ #include <linux/notifier.h>
+ #include <linux/cpu.h>
+ #include <linux/moduleparam.h>
+ #include <asm/cpu_device_id.h>
+ #include <asm/intel-family.h>
++#include <asm/nospec-branch.h>
+ #include <asm/mwait.h>
+ #include <asm/msr.h>
+
+@@ -94,6 +96,12 @@ static unsigned int mwait_substates __in
+ #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
+
+ /*
++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
++ * above.
++ */
++#define CPUIDLE_FLAG_IBRS BIT(16)
++
++/*
+ * MWAIT takes an 8-bit "hint" in EAX "suggesting"
+ * the C-state (top nibble) and sub-state (bottom nibble)
+ * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
+@@ -132,6 +140,24 @@ static __cpuidle int intel_idle(struct c
+ return index;
+ }
+
++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
++ struct cpuidle_driver *drv, int index)
++{
++ bool smt_active = sched_smt_active();
++ u64 spec_ctrl = spec_ctrl_current();
++ int ret;
++
++ if (smt_active)
++ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
++ ret = intel_idle(dev, drv, index);
++
++ if (smt_active)
++ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
++
++ return ret;
++}
++
+ /**
+ * intel_idle_s2idle - Ask the processor to enter the given idle state.
+ * @dev: cpuidle device of the target CPU.
+@@ -653,7 +679,7 @@ static struct cpuidle_state skl_cstates[
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 85,
+ .target_residency = 200,
+ .enter = &intel_idle,
+@@ -661,7 +687,7 @@ static struct cpuidle_state skl_cstates[
+ {
+ .name = "C7s",
+ .desc = "MWAIT 0x33",
+- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 124,
+ .target_residency = 800,
+ .enter = &intel_idle,
+@@ -669,7 +695,7 @@ static struct cpuidle_state skl_cstates[
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 200,
+ .target_residency = 800,
+ .enter = &intel_idle,
+@@ -677,7 +703,7 @@ static struct cpuidle_state skl_cstates[
+ {
+ .name = "C9",
+ .desc = "MWAIT 0x50",
+- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 480,
+ .target_residency = 5000,
+ .enter = &intel_idle,
+@@ -685,7 +711,7 @@ static struct cpuidle_state skl_cstates[
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 890,
+ .target_residency = 5000,
+ .enter = &intel_idle,
+@@ -714,7 +740,7 @@ static struct cpuidle_state skx_cstates[
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 133,
+ .target_residency = 600,
+ .enter = &intel_idle,
+@@ -1501,6 +1527,11 @@ static void __init intel_idle_init_cstat
+ /* Structure copy. */
+ drv->states[drv->state_count] = cpuidle_state_table[cstate];
+
++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
++ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
++ drv->states[drv->state_count].enter = intel_idle_ibrs;
++ }
++
+ if ((disabled_states_mask & BIT(drv->state_count)) ||
+ ((icpu->use_acpi || force_use_acpi) &&
+ intel_idle_off_by_default(mwait_hint) &&
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Wed, 16 Mar 2022 22:05:52 +0100
+Subject: kvm/emulate: Fix SETcc emulation function offsets with SLS
+
+From: Borislav Petkov <bp@suse.de>
+
+commit fe83f5eae432ccc8e90082d6ed506d5233547473 upstream.
+
+The commit in Fixes started adding INT3 after RETs as a mitigation
+against straight-line speculation.
+
+The fastop SETcc implementation in kvm's insn emulator uses macro magic
+to generate all possible SETcc functions and to jump to them when
+emulating the respective instruction.
+
+However, it hardcodes the size and alignment of those functions to 4: a
+three-byte SETcc insn and a single-byte RET. BUT, with SLS, there's an
+INT3 that gets slapped after the RET, which brings the whole scheme out
+of alignment:
+
+ 15: 0f 90 c0 seto %al
+ 18: c3 ret
+ 19: cc int3
+ 1a: 0f 1f 00 nopl (%rax)
+ 1d: 0f 91 c0 setno %al
+ 20: c3 ret
+ 21: cc int3
+ 22: 0f 1f 00 nopl (%rax)
+ 25: 0f 92 c0 setb %al
+ 28: c3 ret
+ 29: cc int3
+
+and this explodes like this:
+
+ int3: 0000 [#1] PREEMPT SMP PTI
+ CPU: 0 PID: 2435 Comm: qemu-system-x86 Not tainted 5.17.0-rc8-sls #1
+ Hardware name: Dell Inc. Precision WorkStation T3400 /0TP412, BIOS A14 04/30/2012
+ RIP: 0010:setc+0x5/0x8 [kvm]
+ Code: 00 00 0f 1f 00 0f b6 05 43 24 06 00 c3 cc 0f 1f 80 00 00 00 00 0f 90 c0 c3 cc 0f \
+ 1f 00 0f 91 c0 c3 cc 0f 1f 00 0f 92 c0 c3 cc <0f> 1f 00 0f 93 c0 c3 cc 0f 1f 00 \
+ 0f 94 c0 c3 cc 0f 1f 00 0f 95 c0
+ Call Trace:
+ <TASK>
+ ? x86_emulate_insn [kvm]
+ ? x86_emulate_instruction [kvm]
+ ? vmx_handle_exit [kvm_intel]
+ ? kvm_arch_vcpu_ioctl_run [kvm]
+ ? kvm_vcpu_ioctl [kvm]
+ ? __x64_sys_ioctl
+ ? do_syscall_64
+ ? entry_SYSCALL_64_after_hwframe
+ </TASK>
+
+Raise the alignment value when SLS is enabled and use a macro for that
+instead of hard-coding naked numbers.
+
+Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation")
+Reported-by: Jamie Heilman <jamie@audible.transient.net>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Jamie Heilman <jamie@audible.transient.net>
+Link: https://lore.kernel.org/r/YjGzJwjrvxg5YZ0Z@audible.transient.net
+[Add a comment and a bit of safety checking, since this is going to be changed
+ again for IBT support. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c | 19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -428,8 +428,23 @@ static int fastop(struct x86_emulate_ctx
+ FOP_END
+
+ /* Special case for SETcc - 1 instruction per cc */
++
++/*
++ * Depending on .config the SETcc functions look like:
++ *
++ * SETcc %al [3 bytes]
++ * RET [1 byte]
++ * INT3 [1 byte; CONFIG_SLS]
++ *
++ * Which gives possible sizes 4 or 5. When rounded up to the
++ * next power-of-two alignment they become 4 or 8.
++ */
++#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS))
++#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS))
++static_assert(SETCC_LENGTH <= SETCC_ALIGN);
++
+ #define FOP_SETCC(op) \
+- ".align 4 \n\t" \
++ ".align " __stringify(SETCC_ALIGN) " \n\t" \
+ ".type " #op ", @function \n\t" \
+ #op ": \n\t" \
+ #op " %al \n\t" \
+@@ -1055,7 +1070,7 @@ static int em_bsr_c(struct x86_emulate_c
+ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
+ {
+ u8 rc;
+- void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
++ void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
+
+ flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
+ asm("push %[flags]; popf; " CALL_NOSPEC
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Uros Bizjak <ubizjak@gmail.com>
+Date: Wed, 30 Dec 2020 16:26:57 -0800
+Subject: KVM/nVMX: Use __vmx_vcpu_run in nested_vmx_check_vmentry_hw
+
+From: Uros Bizjak <ubizjak@gmail.com>
+
+commit 150f17bfab37e981ba03b37440638138ff2aa9ec upstream.
+
+Replace inline assembly in nested_vmx_check_vmentry_hw
+with a call to __vmx_vcpu_run. The function is not
+performance critical, so (double) GPR save/restore
+in __vmx_vcpu_run can be tolerated, as far as performance
+effects are concerned.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Reviewed-and-tested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+[sean: dropped versioning info from changelog]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20201231002702.2223707-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c | 32 +++-----------------------------
+ arch/x86/kvm/vmx/vmenter.S | 2 +-
+ arch/x86/kvm/vmx/vmx.c | 2 --
+ arch/x86/kvm/vmx/vmx.h | 1 +
+ 4 files changed, 5 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -12,6 +12,7 @@
+ #include "nested.h"
+ #include "pmu.h"
+ #include "trace.h"
++#include "vmx.h"
+ #include "x86.h"
+
+ static bool __read_mostly enable_shadow_vmcs = 1;
+@@ -3075,35 +3076,8 @@ static int nested_vmx_check_vmentry_hw(s
+ vmx->loaded_vmcs->host_state.cr4 = cr4;
+ }
+
+- asm(
+- "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
+- "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
+- "je 1f \n\t"
+- __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
+- "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
+- "1: \n\t"
+- "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
+-
+- /* Check if vmlaunch or vmresume is needed */
+- "cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
+-
+- /*
+- * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
+- * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
+- * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the
+- * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
+- */
+- "call vmx_vmenter\n\t"
+-
+- CC_SET(be)
+- : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
+- : [HOST_RSP]"r"((unsigned long)HOST_RSP),
+- [loaded_vmcs]"r"(vmx->loaded_vmcs),
+- [launched]"i"(offsetof(struct loaded_vmcs, launched)),
+- [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
+- [wordsize]"i"(sizeof(ulong))
+- : "memory"
+- );
++ vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
++ vmx->loaded_vmcs->launched);
+
+ if (vmx->msr_autoload.host.nr)
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -44,7 +44,7 @@
+ * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
+ * to vmx_vmexit.
+ */
+-SYM_FUNC_START(vmx_vmenter)
++SYM_FUNC_START_LOCAL(vmx_vmenter)
+ /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
+ je 2f
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6687,8 +6687,6 @@ static fastpath_t vmx_exit_handlers_fast
+ }
+ }
+
+-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
+-
+ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+ struct vcpu_vmx *vmx)
+ {
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -365,6 +365,7 @@ void vmx_set_virtual_apic_mode(struct kv
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
+ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:12 +0200
+Subject: KVM: VMX: Convert launched argument to flags
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bb06650634d3552c0f8557e9d16aa1a408040e28 upstream.
+
+Convert __vmx_vcpu_run()'s 'launched' argument to 'flags', in
+preparation for doing SPEC_CTRL handling immediately after vmexit, which
+will need another flag.
+
+This is much easier than adding a fourth argument, because this code
+supports both 32-bit and 64-bit, and the fourth argument on 32-bit would
+have to be pushed on the stack.
+
+Note that __vmx_vcpu_run_flags() is called outside of the noinstr
+critical section because it will soon start calling potentially
+traceable functions.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c | 2 +-
+ arch/x86/kvm/vmx/run_flags.h | 7 +++++++
+ arch/x86/kvm/vmx/vmenter.S | 9 +++++----
+ arch/x86/kvm/vmx/vmx.c | 17 ++++++++++++++---
+ arch/x86/kvm/vmx/vmx.h | 5 ++++-
+ 5 files changed, 31 insertions(+), 9 deletions(-)
+ create mode 100644 arch/x86/kvm/vmx/run_flags.h
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -3077,7 +3077,7 @@ static int nested_vmx_check_vmentry_hw(s
+ }
+
+ vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+- vmx->loaded_vmcs->launched);
++ __vmx_vcpu_run_flags(vmx));
+
+ if (vmx->msr_autoload.host.nr)
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+--- /dev/null
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -0,0 +1,7 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __KVM_X86_VMX_RUN_FLAGS_H
++#define __KVM_X86_VMX_RUN_FLAGS_H
++
++#define VMX_RUN_VMRESUME (1 << 0)
++
++#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -5,6 +5,7 @@
+ #include <asm/kvm_vcpu_regs.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/segment.h>
++#include "run_flags.h"
+
+ #define WORD_SIZE (BITS_PER_LONG / 8)
+
+@@ -34,7 +35,7 @@
+ * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+ * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+ * @regs: unsigned long * (to guest registers)
+- * @launched: %true if the VMCS has been launched
++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
+ *
+ * Returns:
+ * 0 on VM-Exit, 1 on VM-Fail
+@@ -59,7 +60,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ */
+ push %_ASM_ARG2
+
+- /* Copy @launched to BL, _ASM_ARG3 is volatile. */
++ /* Copy @flags to BL, _ASM_ARG3 is volatile. */
+ mov %_ASM_ARG3B, %bl
+
+ lea (%_ASM_SP), %_ASM_ARG2
+@@ -69,7 +70,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ mov (%_ASM_SP), %_ASM_AX
+
+ /* Check if vmlaunch or vmresume is needed */
+- testb %bl, %bl
++ testb $VMX_RUN_VMRESUME, %bl
+
+ /* Load guest registers. Don't clobber flags. */
+ mov VCPU_RCX(%_ASM_AX), %_ASM_CX
+@@ -92,7 +93,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+
+ /* Check EFLAGS.ZF from 'testb' above */
+- je .Lvmlaunch
++ jz .Lvmlaunch
+
+ /*
+ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -936,6 +936,16 @@ static bool msr_write_intercepted(struct
+ return true;
+ }
+
++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
++{
++ unsigned int flags = 0;
++
++ if (vmx->loaded_vmcs->launched)
++ flags |= VMX_RUN_VMRESUME;
++
++ return flags;
++}
++
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+ unsigned long entry, unsigned long exit)
+ {
+@@ -6688,7 +6698,8 @@ static fastpath_t vmx_exit_handlers_fast
+ }
+
+ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+- struct vcpu_vmx *vmx)
++ struct vcpu_vmx *vmx,
++ unsigned long flags)
+ {
+ /*
+ * VMENTER enables interrupts (host state), but the kernel state is
+@@ -6725,7 +6736,7 @@ static noinstr void vmx_vcpu_enter_exit(
+ native_write_cr2(vcpu->arch.cr2);
+
+ vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+- vmx->loaded_vmcs->launched);
++ flags);
+
+ vcpu->arch.cr2 = native_read_cr2();
+
+@@ -6824,7 +6835,7 @@ reenter_guest:
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
+
+ /* The actual VMENTER/EXIT is in the .noinstr.text section. */
+- vmx_vcpu_enter_exit(vcpu, vmx);
++ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
+
+ /*
+ * We do not use IBRS in the kernel. If this vCPU has used the
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -13,6 +13,7 @@
+ #include "vmcs.h"
+ #include "vmx_ops.h"
+ #include "cpuid.h"
++#include "run_flags.h"
+
+ extern const u32 vmx_msr_index[];
+
+@@ -365,7 +366,9 @@ void vmx_set_virtual_apic_mode(struct kv
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
++ unsigned int flags);
+ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:14 +0200
+Subject: KVM: VMX: Fix IBRS handling after vmexit
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bea7e31a5caccb6fe8ed989c065072354f0ecb52 upstream.
+
+For legacy IBRS to work, the IBRS bit needs to be always re-written
+after vmexit, even if it's already on.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6706,8 +6706,13 @@ void noinstr vmx_spec_ctrl_restore_host(
+
+ /*
+ * If the guest/host SPEC_CTRL values differ, restore the host value.
++ *
++ * For legacy IBRS, the IBRS bit always needs to be written after
++ * transitioning from a less privileged predictor mode, regardless of
++ * whether the guest/host values differ.
+ */
+- if (vmx->spec_ctrl != hostval)
++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
++ vmx->spec_ctrl != hostval)
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+
+ barrier_nospec();
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:11 +0200
+Subject: KVM: VMX: Flatten __vmx_vcpu_run()
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 8bd200d23ec42d66ccd517a72dd0b9cc6132d2fd upstream.
+
+Move the vmx_vm{enter,exit}() functionality into __vmx_vcpu_run(). This
+will make it easier to do the spec_ctrl handling before the first RET.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: remove ENDBR]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmenter.S | 118 +++++++++++++++++----------------------------
+ 1 file changed, 45 insertions(+), 73 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -31,68 +31,6 @@
+ .section .noinstr.text, "ax"
+
+ /**
+- * vmx_vmenter - VM-Enter the current loaded VMCS
+- *
+- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
+- *
+- * Returns:
+- * %RFLAGS.CF is set on VM-Fail Invalid
+- * %RFLAGS.ZF is set on VM-Fail Valid
+- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+- *
+- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
+- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
+- * to vmx_vmexit.
+- */
+-SYM_FUNC_START_LOCAL(vmx_vmenter)
+- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
+- je 2f
+-
+-1: vmresume
+- RET
+-
+-2: vmlaunch
+- RET
+-
+-3: cmpb $0, kvm_rebooting
+- je 4f
+- RET
+-4: ud2
+-
+- _ASM_EXTABLE(1b, 3b)
+- _ASM_EXTABLE(2b, 3b)
+-
+-SYM_FUNC_END(vmx_vmenter)
+-
+-/**
+- * vmx_vmexit - Handle a VMX VM-Exit
+- *
+- * Returns:
+- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+- *
+- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump
+- * here after hardware loads the host's state, i.e. this is the destination
+- * referred to by VMCS.HOST_RIP.
+- */
+-SYM_FUNC_START(vmx_vmexit)
+-#ifdef CONFIG_RETPOLINE
+- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
+- /* Preserve guest's RAX, it's used to stuff the RSB. */
+- push %_ASM_AX
+-
+- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+-
+- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+- or $1, %_ASM_AX
+-
+- pop %_ASM_AX
+-.Lvmexit_skip_rsb:
+-#endif
+- RET
+-SYM_FUNC_END(vmx_vmexit)
+-
+-/**
+ * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+ * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+ * @regs: unsigned long * (to guest registers)
+@@ -124,8 +62,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ /* Copy @launched to BL, _ASM_ARG3 is volatile. */
+ mov %_ASM_ARG3B, %bl
+
+- /* Adjust RSP to account for the CALL to vmx_vmenter(). */
+- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
++ lea (%_ASM_SP), %_ASM_ARG2
+ call vmx_update_host_rsp
+
+ /* Load @regs to RAX. */
+@@ -154,11 +91,36 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ /* Load guest RAX. This kills the @regs pointer! */
+ mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+
+- /* Enter guest mode */
+- call vmx_vmenter
++ /* Check EFLAGS.ZF from 'testb' above */
++ je .Lvmlaunch
+
+- /* Jump on VM-Fail. */
+- jbe 2f
++ /*
++ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
++ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
++ * So this isn't a typical function and objtool needs to be told to
++ * save the unwind state here and restore it below.
++ */
++ UNWIND_HINT_SAVE
++
++/*
++ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
++ * the 'vmx_vmexit' label below.
++ */
++.Lvmresume:
++ vmresume
++ jmp .Lvmfail
++
++.Lvmlaunch:
++ vmlaunch
++ jmp .Lvmfail
++
++ _ASM_EXTABLE(.Lvmresume, .Lfixup)
++ _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
++
++SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
++
++ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
++ UNWIND_HINT_RESTORE
+
+ /* Temporarily save guest's RAX. */
+ push %_ASM_AX
+@@ -185,9 +147,13 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ mov %r15, VCPU_R15(%_ASM_AX)
+ #endif
+
++ /* IMPORTANT: RSB must be stuffed before the first return. */
++ FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++
+ /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
+ xor %eax, %eax
+
++.Lclear_regs:
+ /*
+ * Clear all general purpose registers except RSP and RAX to prevent
+ * speculative use of the guest's values, even those that are reloaded
+@@ -197,7 +163,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ * free. RSP and RAX are exempt as RSP is restored by hardware during
+ * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
+ */
+-1: xor %ecx, %ecx
++ xor %ecx, %ecx
+ xor %edx, %edx
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+@@ -216,8 +182,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
+
+ /* "POP" @regs. */
+ add $WORD_SIZE, %_ASM_SP
+- pop %_ASM_BX
+
++ pop %_ASM_BX
+ #ifdef CONFIG_X86_64
+ pop %r12
+ pop %r13
+@@ -230,9 +196,15 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ pop %_ASM_BP
+ RET
+
+- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
+-2: mov $1, %eax
+- jmp 1b
++.Lfixup:
++ cmpb $0, kvm_rebooting
++ jne .Lvmfail
++ ud2
++.Lvmfail:
++ /* VM-Fail: set return value to 1 */
++ mov $1, %eax
++ jmp .Lclear_regs
++
+ SYM_FUNC_END(__vmx_vcpu_run)
+
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:13 +0200
+Subject: KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit fc02735b14fff8c6678b521d324ade27b1a3d4cf upstream.
+
+On eIBRS systems, the returns in the vmexit return path from
+__vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks.
+
+Fix that by moving the post-vmexit spec_ctrl handling to immediately
+after the vmexit.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 1
+ arch/x86/kernel/cpu/bugs.c | 4 ++
+ arch/x86/kvm/vmx/run_flags.h | 1
+ arch/x86/kvm/vmx/vmenter.S | 49 +++++++++++++++++++++++++++--------
+ arch/x86/kvm/vmx/vmx.c | 48 ++++++++++++++++++++--------------
+ arch/x86/kvm/vmx/vmx.h | 1
+ 6 files changed, 73 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -275,6 +275,7 @@ static inline void indirect_branch_predi
+
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
++extern u64 x86_spec_ctrl_current;
+ extern void write_spec_ctrl_current(u64 val, bool force);
+ extern u64 spec_ctrl_current(void);
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -186,6 +186,10 @@ void __init check_bugs(void)
+ #endif
+ }
+
++/*
++ * NOTE: For VMX, this function is not called in the vmexit path.
++ * It uses vmx_spec_ctrl_restore_host() instead.
++ */
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+--- a/arch/x86/kvm/vmx/run_flags.h
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -3,5 +3,6 @@
+ #define __KVM_X86_VMX_RUN_FLAGS_H
+
+ #define VMX_RUN_VMRESUME (1 << 0)
++#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+
+ #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -33,9 +33,10 @@
+
+ /**
+ * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
++ * @vmx: struct vcpu_vmx *
+ * @regs: unsigned long * (to guest registers)
+- * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
++ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
+ *
+ * Returns:
+ * 0 on VM-Exit, 1 on VM-Fail
+@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ #endif
+ push %_ASM_BX
+
++ /* Save @vmx for SPEC_CTRL handling */
++ push %_ASM_ARG1
++
++ /* Save @flags for SPEC_CTRL handling */
++ push %_ASM_ARG3
++
+ /*
+ * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
+ * @regs is needed after VM-Exit to save the guest's register values.
+@@ -148,25 +155,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+ mov %r15, VCPU_R15(%_ASM_AX)
+ #endif
+
+- /* IMPORTANT: RSB must be stuffed before the first return. */
+- FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+-
+- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
+- xor %eax, %eax
++ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
++ xor %ebx, %ebx
+
+ .Lclear_regs:
+ /*
+- * Clear all general purpose registers except RSP and RAX to prevent
++ * Clear all general purpose registers except RSP and RBX to prevent
+ * speculative use of the guest's values, even those that are reloaded
+ * via the stack. In theory, an L1 cache miss when restoring registers
+ * could lead to speculative execution with the guest's values.
+ * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
+ * free. RSP and RAX are exempt as RSP is restored by hardware during
+- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
++ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
++ * value.
+ */
++ xor %eax, %eax
+ xor %ecx, %ecx
+ xor %edx, %edx
+- xor %ebx, %ebx
+ xor %ebp, %ebp
+ xor %esi, %esi
+ xor %edi, %edi
+@@ -184,6 +189,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+ /* "POP" @regs. */
+ add $WORD_SIZE, %_ASM_SP
+
++ /*
++ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
++ * the first unbalanced RET after vmexit!
++ *
++ * For retpoline, RSB filling is needed to prevent poisoned RSB entries
++ * and (in some cases) RSB underflow.
++ *
++ * eIBRS has its own protection against poisoned RSB, so it doesn't
++ * need the RSB filling sequence. But it does need to be enabled
++ * before the first unbalanced RET.
++ */
++
++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++
++ pop %_ASM_ARG2 /* @flags */
++ pop %_ASM_ARG1 /* @vmx */
++
++ call vmx_spec_ctrl_restore_host
++
++ /* Put return value in AX */
++ mov %_ASM_BX, %_ASM_AX
++
+ pop %_ASM_BX
+ #ifdef CONFIG_X86_64
+ pop %r12
+@@ -203,7 +230,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+ ud2
+ .Lvmfail:
+ /* VM-Fail: set return value to 1 */
+- mov $1, %eax
++ mov $1, %_ASM_BX
+ jmp .Lclear_regs
+
+ SYM_FUNC_END(__vmx_vcpu_run)
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -943,6 +943,14 @@ unsigned int __vmx_vcpu_run_flags(struct
+ if (vmx->loaded_vmcs->launched)
+ flags |= VMX_RUN_VMRESUME;
+
++ /*
++ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
++ * to change it directly without causing a vmexit. In that case read
++ * it after vmexit and store it in vmx->spec_ctrl.
++ */
++ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
++ flags |= VMX_RUN_SAVE_SPEC_CTRL;
++
+ return flags;
+ }
+
+@@ -6685,6 +6693,26 @@ void noinstr vmx_update_host_rsp(struct
+ }
+ }
+
++void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
++ unsigned int flags)
++{
++ u64 hostval = this_cpu_read(x86_spec_ctrl_current);
++
++ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
++ return;
++
++ if (flags & VMX_RUN_SAVE_SPEC_CTRL)
++ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
++
++ /*
++ * If the guest/host SPEC_CTRL values differ, restore the host value.
++ */
++ if (vmx->spec_ctrl != hostval)
++ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
++
++ barrier_nospec();
++}
++
+ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+ {
+ switch (to_vmx(vcpu)->exit_reason.basic) {
+@@ -6837,26 +6865,6 @@ reenter_guest:
+ /* The actual VMENTER/EXIT is in the .noinstr.text section. */
+ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
+
+- /*
+- * We do not use IBRS in the kernel. If this vCPU has used the
+- * SPEC_CTRL MSR it may have left it on; save the value and
+- * turn it off. This is much more efficient than blindly adding
+- * it to the atomic save/restore list. Especially as the former
+- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+- *
+- * For non-nested case:
+- * If the L01 MSR bitmap does not intercept the MSR, then we need to
+- * save it.
+- *
+- * For nested case:
+- * If the L02 MSR bitmap does not intercept the MSR, then we need to
+- * save it.
+- */
+- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+-
+- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
+-
+ /* All fields are clean at this point */
+ if (static_branch_unlikely(&enable_evmcs))
+ current_evmcs->hv_clean_fields |=
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -366,6 +366,7 @@ void vmx_set_virtual_apic_mode(struct kv
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
++void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
+ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
+ bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
+ unsigned int flags);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Uros Bizjak <ubizjak@gmail.com>
+Date: Thu, 29 Oct 2020 15:04:57 +0100
+Subject: KVM/VMX: Use TEST %REG,%REG instead of CMP $0,%REG in vmenter.S
+
+From: Uros Bizjak <ubizjak@gmail.com>
+
+commit 6c44221b05236cc65d76cb5dc2463f738edff39d upstream.
+
+Saves one byte in __vmx_vcpu_run for the same functionality.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+Message-Id: <20201029140457.126965-1-ubizjak@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmenter.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -132,7 +132,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ mov (%_ASM_SP), %_ASM_AX
+
+ /* Check if vmlaunch or vmresume is needed */
+- cmpb $0, %bl
++ testb %bl, %bl
+
+ /* Load guest registers. Don't clobber flags. */
+ mov VCPU_RCX(%_ASM_AX), %_ASM_CX
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 11 Jul 2022 00:31:38 +0200
+Subject: Makefile: Set retpoline cflags based on CONFIG_CC_IS_{CLANG,GCC}
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+This was done as part of commit 7d73c3e9c51400d3e0e755488050804e4d44737a
+"Makefile: remove stale cc-option checks" upstream, and is needed to
+support backporting further retpoline changes.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -670,12 +670,14 @@ ifdef CONFIG_FUNCTION_TRACER
+ CC_FLAGS_FTRACE := -pg
+ endif
+
+-RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+-RETPOLINE_VDSO_CFLAGS_GCC := -mindirect-branch=thunk-inline -mindirect-branch-register
+-RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+-RETPOLINE_VDSO_CFLAGS_CLANG := -mretpoline
+-RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
+-RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_VDSO_CFLAGS_CLANG)))
++ifdef CONFIG_CC_IS_GCC
++RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
++RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
++endif
++ifdef CONFIG_CC_IS_CLANG
++RETPOLINE_CFLAGS := -mretpoline-external-thunk
++RETPOLINE_VDSO_CFLAGS := -mretpoline
++endif
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 18 Dec 2020 14:19:32 -0600
+Subject: objtool: Add 'alt_group' struct
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit b23cc71c62747f2e4c3e56138872cf47e1294f8a upstream.
+
+Create a new struct associated with each group of alternatives
+instructions. This will help with the removal of fake jumps, and more
+importantly with adding support for stack layout changes in
+alternatives.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 29 +++++++++++++++++++++++------
+ tools/objtool/check.h | 13 ++++++++++++-
+ 2 files changed, 35 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1012,20 +1012,28 @@ static int handle_group_alt(struct objto
+ struct instruction *orig_insn,
+ struct instruction **new_insn)
+ {
+- static unsigned int alt_group_next_index = 1;
+ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+- unsigned int alt_group = alt_group_next_index++;
++ struct alt_group *orig_alt_group, *new_alt_group;
+ unsigned long dest_off;
+
++
++ orig_alt_group = malloc(sizeof(*orig_alt_group));
++ if (!orig_alt_group) {
++ WARN("malloc failed");
++ return -1;
++ }
+ last_orig_insn = NULL;
+ insn = orig_insn;
+ sec_for_each_insn_from(file, insn) {
+ if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+ break;
+
+- insn->alt_group = alt_group;
++ insn->alt_group = orig_alt_group;
+ last_orig_insn = insn;
+ }
++ orig_alt_group->orig_group = NULL;
++ orig_alt_group->first_insn = orig_insn;
++ orig_alt_group->last_insn = last_orig_insn;
+
+ if (next_insn_same_sec(file, last_orig_insn)) {
+ fake_jump = malloc(sizeof(*fake_jump));
+@@ -1056,8 +1064,13 @@ static int handle_group_alt(struct objto
+ return 0;
+ }
+
++ new_alt_group = malloc(sizeof(*new_alt_group));
++ if (!new_alt_group) {
++ WARN("malloc failed");
++ return -1;
++ }
++
+ last_new_insn = NULL;
+- alt_group = alt_group_next_index++;
+ insn = *new_insn;
+ sec_for_each_insn_from(file, insn) {
+ struct reloc *alt_reloc;
+@@ -1069,7 +1082,7 @@ static int handle_group_alt(struct objto
+
+ insn->ignore = orig_insn->ignore_alts;
+ insn->func = orig_insn->func;
+- insn->alt_group = alt_group;
++ insn->alt_group = new_alt_group;
+
+ /*
+ * Since alternative replacement code is copy/pasted by the
+@@ -1118,6 +1131,10 @@ static int handle_group_alt(struct objto
+ return -1;
+ }
+
++ new_alt_group->orig_group = orig_alt_group;
++ new_alt_group->first_insn = *new_insn;
++ new_alt_group->last_insn = last_new_insn;
++
+ if (fake_jump)
+ list_add(&fake_jump->list, &last_new_insn->list);
+
+@@ -2440,7 +2457,7 @@ static int validate_return(struct symbol
+ static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+ {
+ struct instruction *first_insn = insn;
+- int alt_group = insn->alt_group;
++ struct alt_group *alt_group = insn->alt_group;
+
+ sec_for_each_insn_continue(file, insn) {
+ if (insn->alt_group != alt_group)
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -19,6 +19,17 @@ struct insn_state {
+ s8 instr;
+ };
+
++struct alt_group {
++ /*
++ * Pointer from a replacement group to the original group. NULL if it
++ * *is* the original group.
++ */
++ struct alt_group *orig_group;
++
++ /* First and last instructions in the group */
++ struct instruction *first_insn, *last_insn;
++};
++
+ struct instruction {
+ struct list_head list;
+ struct hlist_node hash;
+@@ -34,7 +45,7 @@ struct instruction {
+ s8 instr;
+ u8 visited;
+ u8 ret_offset;
+- int alt_group;
++ struct alt_group *alt_group;
+ struct symbol *call_dest;
+ struct instruction *jump_dest;
+ struct instruction *first_jump_src;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:07 +0100
+Subject: objtool: Add elf_create_reloc() helper
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ef47cc01cb4abcd760d8ac66b9361d6ade4d0846 upstream.
+
+We have 4 instances of adding a relocation. Create a common helper
+to avoid growing even more.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.817438847@infradead.org
+[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 43 +++++-------------------
+ tools/objtool/elf.c | 86 +++++++++++++++++++++++++++++++-----------------
+ tools/objtool/elf.h | 10 +++--
+ tools/objtool/orc_gen.c | 30 +++-------------
+ 4 files changed, 79 insertions(+), 90 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -433,8 +433,7 @@ reachable:
+
+ static int create_static_call_sections(struct objtool_file *file)
+ {
+- struct section *sec, *reloc_sec;
+- struct reloc *reloc;
++ struct section *sec;
+ struct static_call_site *site;
+ struct instruction *insn;
+ struct symbol *key_sym;
+@@ -460,8 +459,7 @@ static int create_static_call_sections(s
+ if (!sec)
+ return -1;
+
+- reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+- if (!reloc_sec)
++ if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
+ return -1;
+
+ idx = 0;
+@@ -471,25 +469,11 @@ static int create_static_call_sections(s
+ memset(site, 0, sizeof(struct static_call_site));
+
+ /* populate reloc for 'addr' */
+- reloc = malloc(sizeof(*reloc));
+-
+- if (!reloc) {
+- perror("malloc");
++ if (elf_add_reloc_to_insn(file->elf, sec,
++ idx * sizeof(struct static_call_site),
++ R_X86_64_PC32,
++ insn->sec, insn->offset))
+ return -1;
+- }
+- memset(reloc, 0, sizeof(*reloc));
+-
+- insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc);
+- if (!reloc->sym) {
+- WARN_FUNC("static call tramp: missing containing symbol",
+- insn->sec, insn->offset);
+- return -1;
+- }
+-
+- reloc->type = R_X86_64_PC32;
+- reloc->offset = idx * sizeof(struct static_call_site);
+- reloc->sec = reloc_sec;
+- elf_add_reloc(file->elf, reloc);
+
+ /* find key symbol */
+ key_name = strdup(insn->call_dest->name);
+@@ -526,18 +510,11 @@ static int create_static_call_sections(s
+ free(key_name);
+
+ /* populate reloc for 'key' */
+- reloc = malloc(sizeof(*reloc));
+- if (!reloc) {
+- perror("malloc");
++ if (elf_add_reloc(file->elf, sec,
++ idx * sizeof(struct static_call_site) + 4,
++ R_X86_64_PC32, key_sym,
++ is_sibling_call(insn) * STATIC_CALL_SITE_TAIL))
+ return -1;
+- }
+- memset(reloc, 0, sizeof(*reloc));
+- reloc->sym = key_sym;
+- reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
+- reloc->type = R_X86_64_PC32;
+- reloc->offset = idx * sizeof(struct static_call_site) + 4;
+- reloc->sec = reloc_sec;
+- elf_add_reloc(file->elf, reloc);
+
+ idx++;
+ }
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -262,32 +262,6 @@ struct reloc *find_reloc_by_dest(const s
+ return find_reloc_by_dest_range(elf, sec, offset, 1);
+ }
+
+-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
+- struct reloc *reloc)
+-{
+- if (sec->sym) {
+- reloc->sym = sec->sym;
+- reloc->addend = offset;
+- return;
+- }
+-
+- /*
+- * The Clang assembler strips section symbols, so we have to reference
+- * the function symbol instead:
+- */
+- reloc->sym = find_symbol_containing(sec, offset);
+- if (!reloc->sym) {
+- /*
+- * Hack alert. This happens when we need to reference the NOP
+- * pad insn immediately after the function.
+- */
+- reloc->sym = find_symbol_containing(sec, offset - 1);
+- }
+-
+- if (reloc->sym)
+- reloc->addend = offset - reloc->sym->offset;
+-}
+-
+ static int read_sections(struct elf *elf)
+ {
+ Elf_Scn *s = NULL;
+@@ -524,14 +498,66 @@ err:
+ return -1;
+ }
+
+-void elf_add_reloc(struct elf *elf, struct reloc *reloc)
++int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
++ unsigned int type, struct symbol *sym, int addend)
+ {
+- struct section *sec = reloc->sec;
++ struct reloc *reloc;
+
+- list_add_tail(&reloc->list, &sec->reloc_list);
++ reloc = malloc(sizeof(*reloc));
++ if (!reloc) {
++ perror("malloc");
++ return -1;
++ }
++ memset(reloc, 0, sizeof(*reloc));
++
++ reloc->sec = sec->reloc;
++ reloc->offset = offset;
++ reloc->type = type;
++ reloc->sym = sym;
++ reloc->addend = addend;
++
++ list_add_tail(&reloc->list, &sec->reloc->reloc_list);
+ elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+
+- sec->changed = true;
++ sec->reloc->changed = true;
++
++ return 0;
++}
++
++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
++ unsigned long offset, unsigned int type,
++ struct section *insn_sec, unsigned long insn_off)
++{
++ struct symbol *sym;
++ int addend;
++
++ if (insn_sec->sym) {
++ sym = insn_sec->sym;
++ addend = insn_off;
++
++ } else {
++ /*
++ * The Clang assembler strips section symbols, so we have to
++ * reference the function symbol instead:
++ */
++ sym = find_symbol_containing(insn_sec, insn_off);
++ if (!sym) {
++ /*
++ * Hack alert. This happens when we need to reference
++ * the NOP pad insn immediately after the function.
++ */
++ sym = find_symbol_containing(insn_sec, insn_off - 1);
++ }
++
++ if (!sym) {
++ WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
++ return -1;
++ }
++
++ addend = insn_off - sym->offset;
++ }
++
++ return elf_add_reloc(elf, sec, offset, type, sym, addend);
+ }
+
+ static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -123,7 +123,13 @@ static inline u32 reloc_hash(struct relo
+ struct elf *elf_open_read(const char *name, int flags);
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+ struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
+-void elf_add_reloc(struct elf *elf, struct reloc *reloc);
++
++int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
++ unsigned int type, struct symbol *sym, int addend);
++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
++ unsigned long offset, unsigned int type,
++ struct section *insn_sec, unsigned long insn_off);
++
+ int elf_write_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len,
+ const char *insn);
+@@ -140,8 +146,6 @@ struct reloc *find_reloc_by_dest(const s
+ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len);
+ struct symbol *find_func_containing(struct section *sec, unsigned long offset);
+-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
+- struct reloc *reloc);
+
+ #define for_each_sec(file, sec) \
+ list_for_each_entry(sec, &file->elf->sections, list)
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -81,37 +81,20 @@ static int init_orc_entry(struct orc_ent
+ }
+
+ static int write_orc_entry(struct elf *elf, struct section *orc_sec,
+- struct section *ip_rsec, unsigned int idx,
++ struct section *ip_sec, unsigned int idx,
+ struct section *insn_sec, unsigned long insn_off,
+ struct orc_entry *o)
+ {
+ struct orc_entry *orc;
+- struct reloc *reloc;
+
+ /* populate ORC data */
+ orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
+ memcpy(orc, o, sizeof(*orc));
+
+ /* populate reloc for ip */
+- reloc = malloc(sizeof(*reloc));
+- if (!reloc) {
+- perror("malloc");
++ if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32,
++ insn_sec, insn_off))
+ return -1;
+- }
+- memset(reloc, 0, sizeof(*reloc));
+-
+- insn_to_reloc_sym_addend(insn_sec, insn_off, reloc);
+- if (!reloc->sym) {
+- WARN("missing symbol for insn at offset 0x%lx",
+- insn_off);
+- return -1;
+- }
+-
+- reloc->type = R_X86_64_PC32;
+- reloc->offset = idx * sizeof(int);
+- reloc->sec = ip_rsec;
+-
+- elf_add_reloc(elf, reloc);
+
+ return 0;
+ }
+@@ -150,7 +133,7 @@ static unsigned long alt_group_len(struc
+
+ int orc_create(struct objtool_file *file)
+ {
+- struct section *sec, *ip_rsec, *orc_sec;
++ struct section *sec, *orc_sec;
+ unsigned int nr = 0, idx = 0;
+ struct orc_list_entry *entry;
+ struct list_head orc_list;
+@@ -239,13 +222,12 @@ int orc_create(struct objtool_file *file
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+ if (!sec)
+ return -1;
+- ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+- if (!ip_rsec)
++ if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
+ return -1;
+
+ /* Write ORC entries to sections: */
+ list_for_each_entry(entry, &orc_list, list) {
+- if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
++ if (write_orc_entry(file->elf, orc_sec, sec, idx++,
+ entry->insn_sec, entry->insn_off,
+ &entry->orc))
+ return -1;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:11 +0100
+Subject: objtool: Add elf_create_undef_symbol()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2f2f7e47f0525cbaad5dd9675fd9d8aa8da12046 upstream.
+
+Allow objtool to create undefined symbols; this allows creating
+relocations to symbols not currently in the symbol table.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.064743095@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++
+ tools/objtool/elf.h | 1
+ 2 files changed, 61 insertions(+)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -766,6 +766,66 @@ static int elf_add_string(struct elf *el
+ return len;
+ }
+
++struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
++{
++ struct section *symtab;
++ struct symbol *sym;
++ Elf_Data *data;
++ Elf_Scn *s;
++
++ sym = malloc(sizeof(*sym));
++ if (!sym) {
++ perror("malloc");
++ return NULL;
++ }
++ memset(sym, 0, sizeof(*sym));
++
++ sym->name = strdup(name);
++
++ sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
++ if (sym->sym.st_name == -1)
++ return NULL;
++
++ sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
++ // st_other 0
++ // st_shndx 0
++ // st_value 0
++ // st_size 0
++
++ symtab = find_section_by_name(elf, ".symtab");
++ if (!symtab) {
++ WARN("can't find .symtab");
++ return NULL;
++ }
++
++ s = elf_getscn(elf->elf, symtab->idx);
++ if (!s) {
++ WARN_ELF("elf_getscn");
++ return NULL;
++ }
++
++ data = elf_newdata(s);
++ if (!data) {
++ WARN_ELF("elf_newdata");
++ return NULL;
++ }
++
++ data->d_buf = &sym->sym;
++ data->d_size = sizeof(sym->sym);
++ data->d_align = 1;
++
++ sym->idx = symtab->len / sizeof(sym->sym);
++
++ symtab->len += data->d_size;
++ symtab->changed = true;
++
++ sym->sec = find_section_by_index(elf, 0);
++
++ elf_add_symbol(elf, sym);
++
++ return sym;
++}
++
+ struct section *elf_create_section(struct elf *elf, const char *name,
+ unsigned int sh_flags, size_t entsize, int nr)
+ {
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -133,6 +133,7 @@ int elf_write_insn(struct elf *elf, stru
+ unsigned long offset, unsigned int len,
+ const char *insn);
+ int elf_write_reloc(struct elf *elf, struct reloc *reloc);
++struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
+ int elf_write(struct elf *elf);
+ void elf_close(struct elf *elf);
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:03 +0200
+Subject: objtool: Add entry UNRET validation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a09a6e2399ba0595c3042b3164f3ca68a3cff33e upstream.
+
+Since entry asm is tricky, add a validation pass that ensures the
+retbleed mitigation has been done before the first actual RET
+instruction.
+
+Entry points are those that either have UNWIND_HINT_ENTRY, which acts
+as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or
+those that have UWIND_HINT_IRET_REGS at +0.
+
+This is basically a variant of validate_branch() that is
+intra-function and it will simply follow all branches from marked
+entry points and ensures that all paths lead to ANNOTATE_UNRET_END.
+
+If a path hits RET or an indirection the path is a fail and will be
+reported.
+
+There are 3 ANNOTATE_UNRET_END instances:
+
+ - UNTRAIN_RET itself
+ - exception from-kernel; this path doesn't need UNTRAIN_RET
+ - all early exceptions; these also don't need UNTRAIN_RET
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: arch/x86/entry/entry_64.S no pt_regs return at .Lerror_entry_done_lfence]
+[cascardo: tools/objtool/builtin-check.c no link option validation]
+[cascardo: tools/objtool/check.c opts.ibt is ibt]
+[cascardo: tools/objtool/include/objtool/builtin.h leave unret option as bool, no struct opts]
+[cascardo: objtool is still called from scripts/link-vmlinux.sh]
+[cascardo: no IBT support]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10:
+ - In scripts/link-vmlinux.sh, use "test -n" instead of is_enabled
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 3
+ arch/x86/entry/entry_64_compat.S | 6 -
+ arch/x86/include/asm/nospec-branch.h | 12 ++
+ arch/x86/include/asm/unwind_hints.h | 4
+ arch/x86/kernel/head_64.S | 5 +
+ arch/x86/xen/xen-asm.S | 10 +-
+ include/linux/objtool.h | 3
+ scripts/link-vmlinux.sh | 3
+ tools/include/linux/objtool.h | 3
+ tools/objtool/builtin-check.c | 3
+ tools/objtool/builtin.h | 2
+ tools/objtool/check.c | 172 ++++++++++++++++++++++++++++++++++-
+ tools/objtool/check.h | 6 +
+ 13 files changed, 217 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -93,7 +93,7 @@ SYM_CODE_END(native_usergs_sysret64)
+ */
+
+ SYM_CODE_START(entry_SYSCALL_64)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+
+ swapgs
+ /* tss.sp2 is scratch space. */
+@@ -1094,6 +1094,7 @@ SYM_CODE_START_LOCAL(error_entry)
+ */
+ .Lerror_entry_done_lfence:
+ FENCE_SWAPGS_KERNEL_ENTRY
++ ANNOTATE_UNRET_END
+ RET
+
+ .Lbstep_iret:
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -49,7 +49,7 @@
+ * 0(%ebp) arg6
+ */
+ SYM_CODE_START(entry_SYSENTER_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ /* Interrupts are off on entry. */
+ SWAPGS
+
+@@ -202,7 +202,7 @@ SYM_CODE_END(entry_SYSENTER_compat)
+ * 0(%esp) arg6
+ */
+ SYM_CODE_START(entry_SYSCALL_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ /* Interrupts are off on entry. */
+ swapgs
+
+@@ -349,7 +349,7 @@ SYM_CODE_END(entry_SYSCALL_compat)
+ * ebp arg6
+ */
+ SYM_CODE_START(entry_INT80_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ /*
+ * Interrupts are off on entry.
+ */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -82,6 +82,17 @@
+ #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
+
+ /*
++ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
++ * eventually turn into it's own annotation.
++ */
++.macro ANNOTATE_UNRET_END
++#ifdef CONFIG_DEBUG_ENTRY
++ ANNOTATE_RETPOLINE_SAFE
++ nop
++#endif
++.endm
++
++/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+@@ -131,6 +142,7 @@
+ */
+ .macro UNTRAIN_RET
+ #ifdef CONFIG_RETPOLINE
++ ANNOTATE_UNRET_END
+ ALTERNATIVE_2 "", \
+ "call zen_untrain_ret", X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -11,6 +11,10 @@
+ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
+ .endm
+
++.macro UNWIND_HINT_ENTRY
++ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1
++.endm
++
+ .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+ .if \base == %rsp
+ .if \indirect
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -321,6 +321,8 @@ SYM_CODE_END(start_cpu0)
+ SYM_CODE_START_NOALIGN(vc_boot_ghcb)
+ UNWIND_HINT_IRET_REGS offset=8
+
++ ANNOTATE_UNRET_END
++
+ /* Build pt_regs */
+ PUSH_AND_CLEAR_REGS
+
+@@ -378,6 +380,7 @@ SYM_CODE_START(early_idt_handler_array)
+ SYM_CODE_END(early_idt_handler_array)
+
+ SYM_CODE_START_LOCAL(early_idt_handler_common)
++ ANNOTATE_UNRET_END
+ /*
+ * The stack is the hardware frame, an error code or zero, and the
+ * vector number.
+@@ -424,6 +427,8 @@ SYM_CODE_END(early_idt_handler_common)
+ SYM_CODE_START_NOALIGN(vc_no_ghcb)
+ UNWIND_HINT_IRET_REGS offset=8
+
++ ANNOTATE_UNRET_END
++
+ /* Build pt_regs */
+ PUSH_AND_CLEAR_REGS
+
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -148,7 +148,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
+
+ .macro xen_pv_trap name
+ SYM_CODE_START(xen_\name)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ pop %rcx
+ pop %r11
+ jmp \name
+@@ -277,7 +277,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu
+
+ /* Normal 64-bit system call target */
+ SYM_CODE_START(xen_entry_SYSCALL_64)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ popq %rcx
+ popq %r11
+
+@@ -296,7 +296,7 @@ SYM_CODE_END(xen_entry_SYSCALL_64)
+
+ /* 32-bit compat syscall target */
+ SYM_CODE_START(xen_entry_SYSCALL_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ popq %rcx
+ popq %r11
+
+@@ -313,7 +313,7 @@ SYM_CODE_END(xen_entry_SYSCALL_compat)
+
+ /* 32-bit compat sysenter target */
+ SYM_CODE_START(xen_entry_SYSENTER_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ /*
+ * NB: Xen is polite and clears TF from EFLAGS for us. This means
+ * that we don't need to guard against single step exceptions here.
+@@ -336,7 +336,7 @@ SYM_CODE_END(xen_entry_SYSENTER_compat)
+
+ SYM_CODE_START(xen_entry_SYSCALL_compat)
+ SYM_CODE_START(xen_entry_SYSENTER_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ lea 16(%rsp), %rsp /* strip %rcx, %r11 */
+ mov $-ENOSYS, %rax
+ pushq $0
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -32,11 +32,14 @@ struct unwind_hint {
+ *
+ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
++ *
++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
+ */
+ #define UNWIND_HINT_TYPE_CALL 0
+ #define UNWIND_HINT_TYPE_REGS 1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC 3
++#define UNWIND_HINT_TYPE_ENTRY 4
+
+ #ifdef CONFIG_STACK_VALIDATION
+
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -65,6 +65,9 @@ objtool_link()
+
+ if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
+ objtoolopt="check"
++ if [ -n "${CONFIG_RETPOLINE}" ]; then
++ objtoolopt="${objtoolopt} --unret"
++ fi
+ if [ -z "${CONFIG_FRAME_POINTER}" ]; then
+ objtoolopt="${objtoolopt} --no-fp"
+ fi
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -32,11 +32,14 @@ struct unwind_hint {
+ *
+ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
++ *
++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
+ */
+ #define UNWIND_HINT_TYPE_CALL 0
+ #define UNWIND_HINT_TYPE_REGS 1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC 3
++#define UNWIND_HINT_TYPE_ENTRY 4
+
+ #ifdef CONFIG_STACK_VALIDATION
+
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -19,7 +19,7 @@
+ #include "objtool.h"
+
+ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+- validate_dup, vmlinux, sls;
++ validate_dup, vmlinux, sls, unret;
+
+ static const char * const check_usage[] = {
+ "objtool check [<options>] file.o",
+@@ -30,6 +30,7 @@ const struct option check_options[] = {
+ OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+ OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+ OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
++ OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"),
+ OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+ OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+ OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
+--- a/tools/objtool/builtin.h
++++ b/tools/objtool/builtin.h
+@@ -9,7 +9,7 @@
+
+ extern const struct option check_options[];
+ extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+- validate_dup, vmlinux, sls;
++ validate_dup, vmlinux, sls, unret;
+
+ extern int cmd_check(int argc, const char **argv);
+ extern int cmd_orc(int argc, const char **argv);
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1752,6 +1752,19 @@ static int read_unwind_hints(struct objt
+
+ insn->hint = true;
+
++ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
++ struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
++
++ if (sym && sym->bind == STB_GLOBAL) {
++ insn->entry = 1;
++ }
++ }
++
++ if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
++ hint->type = UNWIND_HINT_TYPE_CALL;
++ insn->entry = 1;
++ }
++
+ if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+ insn->cfi = &func_cfi;
+ continue;
+@@ -1800,8 +1813,9 @@ static int read_retpoline_hints(struct o
+
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+ insn->type != INSN_CALL_DYNAMIC &&
+- insn->type != INSN_RETURN) {
+- WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret",
++ insn->type != INSN_RETURN &&
++ insn->type != INSN_NOP) {
++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
+ insn->sec, insn->offset);
+ return -1;
+ }
+@@ -2818,8 +2832,8 @@ static int validate_branch(struct objtoo
+ return 1;
+ }
+
+- visited = 1 << state.uaccess;
+- if (insn->visited) {
++ visited = VISITED_BRANCH << state.uaccess;
++ if (insn->visited & VISITED_BRANCH_MASK) {
+ if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
+ return 1;
+
+@@ -3045,6 +3059,145 @@ static int validate_unwind_hints(struct
+ return warnings;
+ }
+
++/*
++ * Validate rethunk entry constraint: must untrain RET before the first RET.
++ *
++ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
++ * before an actual RET instruction.
++ */
++static int validate_entry(struct objtool_file *file, struct instruction *insn)
++{
++ struct instruction *next, *dest;
++ int ret, warnings = 0;
++
++ for (;;) {
++ next = next_insn_to_validate(file, insn);
++
++ if (insn->visited & VISITED_ENTRY)
++ return 0;
++
++ insn->visited |= VISITED_ENTRY;
++
++ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
++ struct alternative *alt;
++ bool skip_orig = false;
++
++ list_for_each_entry(alt, &insn->alts, list) {
++ if (alt->skip_orig)
++ skip_orig = true;
++
++ ret = validate_entry(file, alt->insn);
++ if (ret) {
++ if (backtrace)
++ BT_FUNC("(alt)", insn);
++ return ret;
++ }
++ }
++
++ if (skip_orig)
++ return 0;
++ }
++
++ switch (insn->type) {
++
++ case INSN_CALL_DYNAMIC:
++ case INSN_JUMP_DYNAMIC:
++ case INSN_JUMP_DYNAMIC_CONDITIONAL:
++ WARN_FUNC("early indirect call", insn->sec, insn->offset);
++ return 1;
++
++ case INSN_JUMP_UNCONDITIONAL:
++ case INSN_JUMP_CONDITIONAL:
++ if (!is_sibling_call(insn)) {
++ if (!insn->jump_dest) {
++ WARN_FUNC("unresolved jump target after linking?!?",
++ insn->sec, insn->offset);
++ return -1;
++ }
++ ret = validate_entry(file, insn->jump_dest);
++ if (ret) {
++ if (backtrace) {
++ BT_FUNC("(branch%s)", insn,
++ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
++ }
++ return ret;
++ }
++
++ if (insn->type == INSN_JUMP_UNCONDITIONAL)
++ return 0;
++
++ break;
++ }
++
++ /* fallthrough */
++ case INSN_CALL:
++ dest = find_insn(file, insn->call_dest->sec,
++ insn->call_dest->offset);
++ if (!dest) {
++ WARN("Unresolved function after linking!?: %s",
++ insn->call_dest->name);
++ return -1;
++ }
++
++ ret = validate_entry(file, dest);
++ if (ret) {
++ if (backtrace)
++ BT_FUNC("(call)", insn);
++ return ret;
++ }
++ /*
++ * If a call returns without error, it must have seen UNTRAIN_RET.
++ * Therefore any non-error return is a success.
++ */
++ return 0;
++
++ case INSN_RETURN:
++ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
++ return 1;
++
++ case INSN_NOP:
++ if (insn->retpoline_safe)
++ return 0;
++ break;
++
++ default:
++ break;
++ }
++
++ if (!next) {
++ WARN_FUNC("teh end!", insn->sec, insn->offset);
++ return -1;
++ }
++ insn = next;
++ }
++
++ return warnings;
++}
++
++/*
++ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
++ * before RET.
++ */
++static int validate_unret(struct objtool_file *file)
++{
++ struct instruction *insn;
++ int ret, warnings = 0;
++
++ for_each_insn(file, insn) {
++ if (!insn->entry)
++ continue;
++
++ ret = validate_entry(file, insn);
++ if (ret < 0) {
++ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
++ return ret;
++ }
++ warnings += ret;
++ }
++
++ return warnings;
++}
++
+ static int validate_retpoline(struct objtool_file *file)
+ {
+ struct instruction *insn;
+@@ -3312,6 +3465,17 @@ int check(struct objtool_file *file)
+ goto out;
+ warnings += ret;
+
++ if (unret) {
++ /*
++ * Must be after validate_branch() and friends, it plays
++ * further games with insn->visited.
++ */
++ ret = validate_unret(file);
++ if (ret < 0)
++ return ret;
++ warnings += ret;
++ }
++
+ if (!warnings) {
+ ret = validate_reachable_instructions(file);
+ if (ret < 0)
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -48,6 +48,7 @@ struct instruction {
+ bool dead_end, ignore, ignore_alts;
+ bool hint;
+ bool retpoline_safe;
++ bool entry;
+ s8 instr;
+ u8 visited;
+ struct alt_group *alt_group;
+@@ -62,6 +63,11 @@ struct instruction {
+ struct cfi_state *cfi;
+ };
+
++#define VISITED_BRANCH 0x01
++#define VISITED_BRANCH_UACCESS 0x02
++#define VISITED_BRANCH_MASK 0x03
++#define VISITED_ENTRY 0x04
++
+ static inline bool is_static_jump(struct instruction *insn)
+ {
+ return insn->type == INSN_JUMP_CONDITIONAL ||
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:42 +0100
+Subject: objtool: Add straight-line-speculation validation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1cc1e4c8aab4213bd4e6353dec2620476a233d6d upstream.
+
+Teach objtool to validate the straight-line-speculation constraints:
+
+ - speculation trap after indirect calls
+ - speculation trap after RET
+
+Notable: when an instruction is annotated RETPOLINE_SAFE, indicating
+ speculation isn't a problem, also don't care about sls for that
+ instruction.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134908.023037659@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 5.10: adjust filenames, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h | 1 +
+ tools/objtool/arch/x86/decode.c | 13 +++++++++----
+ tools/objtool/builtin-check.c | 4 +++-
+ tools/objtool/builtin.h | 3 ++-
+ tools/objtool/check.c | 14 ++++++++++++++
+ 5 files changed, 29 insertions(+), 6 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -26,6 +26,7 @@ enum insn_type {
+ INSN_CLAC,
+ INSN_STD,
+ INSN_CLD,
++ INSN_TRAP,
+ INSN_OTHER,
+ };
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -456,6 +456,11 @@ int arch_decode_instruction(const struct
+
+ break;
+
++ case 0xcc:
++ /* int3 */
++ *type = INSN_TRAP;
++ break;
++
+ case 0xe3:
+ /* jecxz/jrcxz */
+ *type = INSN_JUMP_CONDITIONAL;
+@@ -592,10 +597,10 @@ const char *arch_ret_insn(int len)
+ {
+ static const char ret[5][5] = {
+ { BYTE_RET },
+- { BYTE_RET, 0x90 },
+- { BYTE_RET, 0x66, 0x90 },
+- { BYTE_RET, 0x0f, 0x1f, 0x00 },
+- { BYTE_RET, 0x0f, 0x1f, 0x40, 0x00 },
++ { BYTE_RET, 0xcc },
++ { BYTE_RET, 0xcc, 0x90 },
++ { BYTE_RET, 0xcc, 0x66, 0x90 },
++ { BYTE_RET, 0xcc, 0x0f, 0x1f, 0x00 },
+ };
+
+ if (len < 1 || len > 5) {
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -18,7 +18,8 @@
+ #include "builtin.h"
+ #include "objtool.h"
+
+-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
++bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
++ validate_dup, vmlinux, sls;
+
+ static const char * const check_usage[] = {
+ "objtool check [<options>] file.o",
+@@ -35,6 +36,7 @@ const struct option check_options[] = {
+ OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
+ OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+ OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
++ OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
+ OPT_END(),
+ };
+
+--- a/tools/objtool/builtin.h
++++ b/tools/objtool/builtin.h
+@@ -8,7 +8,8 @@
+ #include <subcmd/parse-options.h>
+
+ extern const struct option check_options[];
+-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
++extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
++ validate_dup, vmlinux, sls;
+
+ extern int cmd_check(int argc, const char **argv);
+ extern int cmd_orc(int argc, const char **argv);
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -2775,6 +2775,12 @@ static int validate_branch(struct objtoo
+ switch (insn->type) {
+
+ case INSN_RETURN:
++ if (next_insn && next_insn->type == INSN_TRAP) {
++ next_insn->ignore = true;
++ } else if (sls && !insn->retpoline_safe) {
++ WARN_FUNC("missing int3 after ret",
++ insn->sec, insn->offset);
++ }
+ return validate_return(func, insn, &state);
+
+ case INSN_CALL:
+@@ -2818,6 +2824,14 @@ static int validate_branch(struct objtoo
+ break;
+
+ case INSN_JUMP_DYNAMIC:
++ if (next_insn && next_insn->type == INSN_TRAP) {
++ next_insn->ignore = true;
++ } else if (sls && !insn->retpoline_safe) {
++ WARN_FUNC("missing int3 after indirect jump",
++ insn->sec, insn->offset);
++ }
++
++ /* fallthrough */
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ if (is_sibling_call(insn)) {
+ ret = validate_sibling_call(insn, &state);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:22 -0600
+Subject: objtool: Assume only ELF functions do sibling calls
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit ecf11ba4d066fe527586c6edd6ca68457ca55cf4 upstream.
+
+There's an inconsistency in how sibling calls are detected in
+non-function asm code, depending on the scope of the object. If the
+target code is external to the object, objtool considers it a sibling
+call. If the target code is internal but not a function, objtool
+*doesn't* consider it a sibling call.
+
+This can cause some inconsistencies between per-object and vmlinux.o
+validation.
+
+Instead, assume only ELF functions can do sibling calls. This generally
+matches existing reality, and makes sibling call validation consistent
+between vmlinux.o and per-object.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/0e9ab6f3628cc7bf3bde7aa6762d54d7df19ad78.1611263461.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 36 ++++++++++++++++++++++--------------
+ 1 file changed, 22 insertions(+), 14 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -109,15 +109,20 @@ static struct instruction *prev_insn_sam
+
+ static bool is_sibling_call(struct instruction *insn)
+ {
++ /*
++ * Assume only ELF functions can make sibling calls. This ensures
++ * sibling call detection consistency between vmlinux.o and individual
++ * objects.
++ */
++ if (!insn->func)
++ return false;
++
+ /* An indirect jump is either a sibling call or a jump to a table. */
+ if (insn->type == INSN_JUMP_DYNAMIC)
+ return list_empty(&insn->alts);
+
+- if (!is_static_jump(insn))
+- return false;
+-
+ /* add_jump_destinations() sets insn->call_dest for sibling calls. */
+- return !!insn->call_dest;
++ return (is_static_jump(insn) && insn->call_dest);
+ }
+
+ /*
+@@ -788,7 +793,7 @@ static int add_jump_destinations(struct
+ continue;
+
+ reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+- insn->offset, insn->len);
++ insn->offset, insn->len);
+ if (!reloc) {
+ dest_sec = insn->sec;
+ dest_off = arch_jump_destination(insn);
+@@ -808,18 +813,21 @@ static int add_jump_destinations(struct
+
+ insn->retpoline_safe = true;
+ continue;
+- } else if (reloc->sym->sec->idx) {
+- dest_sec = reloc->sym->sec;
+- dest_off = reloc->sym->sym.st_value +
+- arch_dest_reloc_offset(reloc->addend);
+- } else {
+- /* external sibling call */
++ } else if (insn->func) {
++ /* internal or external sibling call (with reloc) */
+ insn->call_dest = reloc->sym;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
+ continue;
++ } else if (reloc->sym->sec->idx) {
++ dest_sec = reloc->sym->sec;
++ dest_off = reloc->sym->sym.st_value +
++ arch_dest_reloc_offset(reloc->addend);
++ } else {
++ /* non-func asm code jumping to another file */
++ continue;
+ }
+
+ insn->jump_dest = find_insn(file, dest_sec, dest_off);
+@@ -868,7 +876,7 @@ static int add_jump_destinations(struct
+ } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
+ insn->jump_dest->offset == insn->jump_dest->func->offset) {
+
+- /* internal sibling call */
++ /* internal sibling call (without reloc) */
+ insn->call_dest = insn->jump_dest->func;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+@@ -2570,7 +2578,7 @@ static int validate_branch(struct objtoo
+
+ case INSN_JUMP_CONDITIONAL:
+ case INSN_JUMP_UNCONDITIONAL:
+- if (func && is_sibling_call(insn)) {
++ if (is_sibling_call(insn)) {
+ ret = validate_sibling_call(insn, &state);
+ if (ret)
+ return ret;
+@@ -2592,7 +2600,7 @@ static int validate_branch(struct objtoo
+
+ case INSN_JUMP_DYNAMIC:
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+- if (func && is_sibling_call(insn)) {
++ if (is_sibling_call(insn)) {
+ ret = validate_sibling_call(insn, &state);
+ if (ret)
+ return ret;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:13 +0100
+Subject: objtool: Cache instruction relocs
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7bd2a600f3e9d27286bbf23c83d599e9cc7cf245 upstream.
+
+Track the reloc of instructions in the new instruction->reloc field
+to avoid having to look them up again later.
+
+( Technically x86 instructions can have two relocations, but not jumps
+ and calls, for which we're using this. )
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.195441549@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 28 ++++++++++++++++++++++------
+ tools/objtool/check.h | 1 +
+ 2 files changed, 23 insertions(+), 6 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -754,6 +754,25 @@ __weak bool arch_is_retpoline(struct sym
+ return false;
+ }
+
++#define NEGATIVE_RELOC ((void *)-1L)
++
++static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
++{
++ if (insn->reloc == NEGATIVE_RELOC)
++ return NULL;
++
++ if (!insn->reloc) {
++ insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
++ insn->offset, insn->len);
++ if (!insn->reloc) {
++ insn->reloc = NEGATIVE_RELOC;
++ return NULL;
++ }
++ }
++
++ return insn->reloc;
++}
++
+ /*
+ * Find the destination instructions for all jumps.
+ */
+@@ -768,8 +787,7 @@ static int add_jump_destinations(struct
+ if (!is_static_jump(insn))
+ continue;
+
+- reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+- insn->offset, insn->len);
++ reloc = insn_reloc(file, insn);
+ if (!reloc) {
+ dest_sec = insn->sec;
+ dest_off = arch_jump_destination(insn);
+@@ -901,8 +919,7 @@ static int add_call_destinations(struct
+ if (insn->type != INSN_CALL)
+ continue;
+
+- reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+- insn->offset, insn->len);
++ reloc = insn_reloc(file, insn);
+ if (!reloc) {
+ dest_off = arch_jump_destination(insn);
+ insn->call_dest = find_call_destination(insn->sec, dest_off);
+@@ -1085,8 +1102,7 @@ static int handle_group_alt(struct objto
+ * alternatives code can adjust the relative offsets
+ * accordingly.
+ */
+- alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+- insn->offset, insn->len);
++ alt_reloc = insn_reloc(file, insn);
+ if (alt_reloc &&
+ !arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
+
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -55,6 +55,7 @@ struct instruction {
+ struct instruction *jump_dest;
+ struct instruction *first_jump_src;
+ struct reloc *jump_table;
++ struct reloc *reloc;
+ struct list_head alts;
+ struct symbol *func;
+ struct list_head stack_ops;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:33 +0200
+Subject: objtool: Classify symbols
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1739c66eb7bd5f27f1b69a5a26e10e8327d1e136 upstream.
+
+In order to avoid calling str*cmp() on symbol names, over and over, do
+them all once upfront and store the result.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.658539311@infradead.org
+[cascardo: no pv_target on struct symbol, because of missing
+ db2b0c5d7b6f19b3c2cab08c531b65342eb5252b]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: objtool doesn't have any mcount handling]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 32 +++++++++++++++++++++-----------
+ tools/objtool/elf.h | 7 +++++--
+ 2 files changed, 26 insertions(+), 13 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -889,8 +889,7 @@ static void add_call_dest(struct objtool
+ * so they need a little help, NOP out any KCOV calls from noinstr
+ * text.
+ */
+- if (insn->sec->noinstr &&
+- !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
++ if (insn->sec->noinstr && insn->call_dest->kcov) {
+ if (reloc) {
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+@@ -935,7 +934,7 @@ static int add_jump_destinations(struct
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_sec = reloc->sym->sec;
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+- } else if (arch_is_retpoline(reloc->sym)) {
++ } else if (reloc->sym->retpoline_thunk) {
+ /*
+ * Retpoline jumps are really dynamic jumps in
+ * disguise, so convert them accordingly.
+@@ -1076,7 +1075,7 @@ static int add_call_destinations(struct
+
+ add_call_dest(file, insn, dest, false);
+
+- } else if (arch_is_retpoline(reloc->sym)) {
++ } else if (reloc->sym->retpoline_thunk) {
+ /*
+ * Retpoline calls are really dynamic calls in
+ * disguise, so convert them accordingly.
+@@ -1733,17 +1732,28 @@ static int read_intra_function_calls(str
+ return 0;
+ }
+
+-static int read_static_call_tramps(struct objtool_file *file)
++static int classify_symbols(struct objtool_file *file)
+ {
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+- if (func->bind == STB_GLOBAL &&
+- !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
++ if (func->bind != STB_GLOBAL)
++ continue;
++
++ if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+ strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+ func->static_call_tramp = true;
++
++ if (arch_is_retpoline(func))
++ func->retpoline_thunk = true;
++
++ if (!strcmp(func->name, "__fentry__"))
++ func->fentry = true;
++
++ if (!strncmp(func->name, "__sanitizer_cov_", 16))
++ func->kcov = true;
+ }
+ }
+
+@@ -1805,7 +1815,7 @@ static int decode_sections(struct objtoo
+ /*
+ * Must be before add_{jump_call}_destination.
+ */
+- ret = read_static_call_tramps(file);
++ ret = classify_symbols(file);
+ if (ret)
+ return ret;
+
+@@ -1863,9 +1873,9 @@ static int decode_sections(struct objtoo
+
+ static bool is_fentry_call(struct instruction *insn)
+ {
+- if (insn->type == INSN_CALL && insn->call_dest &&
+- insn->call_dest->type == STT_NOTYPE &&
+- !strcmp(insn->call_dest->name, "__fentry__"))
++ if (insn->type == INSN_CALL &&
++ insn->call_dest &&
++ insn->call_dest->fentry)
+ return true;
+
+ return false;
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -55,8 +55,11 @@ struct symbol {
+ unsigned long offset;
+ unsigned int len;
+ struct symbol *pfunc, *cfunc, *alias;
+- bool uaccess_safe;
+- bool static_call_tramp;
++ u8 uaccess_safe : 1;
++ u8 static_call_tramp : 1;
++ u8 retpoline_thunk : 1;
++ u8 fentry : 1;
++ u8 kcov : 1;
+ };
+
+ struct reloc {
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:24 -0600
+Subject: objtool: Combine UNWIND_HINT_RET_OFFSET and UNWIND_HINT_FUNC
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit b735bd3e68824316655252a931a3353a6ebc036f upstream.
+
+The ORC metadata generated for UNWIND_HINT_FUNC isn't actually very
+func-like. With certain usages it can cause stack state mismatches
+because it doesn't set the return address (CFI_RA).
+
+Also, users of UNWIND_HINT_RET_OFFSET no longer need to set a custom
+return stack offset. Instead they just need to specify a func-like
+situation, so the current ret_offset code is hacky for no good reason.
+
+Solve both problems by simplifying the RET_OFFSET handling and
+converting it into a more useful UNWIND_HINT_FUNC.
+
+If we end up needing the old 'ret_offset' functionality again in the
+future, we should be able to support it pretty easily with the addition
+of a custom 'sp_offset' in UNWIND_HINT_FUNC.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/db9d1f5d79dddfbb3725ef6d8ec3477ad199948d.1611263462.git.jpoimboe@redhat.com
+[bwh: Backported to 5.10:
+ - Don't use bswap_if_needed() since we don't have any of the other fixes
+ for mixed-endian cross-compilation
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/unwind_hints.h | 13 +-----------
+ arch/x86/kernel/ftrace_64.S | 2 -
+ arch/x86/lib/retpoline.S | 2 -
+ include/linux/objtool.h | 5 +++-
+ tools/include/linux/objtool.h | 5 +++-
+ tools/objtool/arch/x86/decode.c | 4 +--
+ tools/objtool/check.c | 37 ++++++++++++++----------------------
+ tools/objtool/check.h | 1
+ 8 files changed, 29 insertions(+), 40 deletions(-)
+
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -48,17 +48,8 @@
+ UNWIND_HINT_REGS base=\base offset=\offset partial=1
+ .endm
+
+-.macro UNWIND_HINT_FUNC sp_offset=8
+- UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL
+-.endm
+-
+-/*
+- * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN
+- * and sibling calls. On these, sp_offset denotes the expected offset from
+- * initial_func_cfi.
+- */
+-.macro UNWIND_HINT_RET_OFFSET sp_offset=8
+- UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset
++.macro UNWIND_HINT_FUNC
++ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
+ .endm
+
+ #endif /* __ASSEMBLY__ */
+--- a/arch/x86/kernel/ftrace_64.S
++++ b/arch/x86/kernel/ftrace_64.S
+@@ -265,7 +265,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end,
+ restore_mcount_regs 8
+ /* Restore flags */
+ popfq
+- UNWIND_HINT_RET_OFFSET
++ UNWIND_HINT_FUNC
+ jmp ftrace_epilogue
+
+ SYM_FUNC_END(ftrace_regs_caller)
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -28,7 +28,7 @@ SYM_FUNC_START_NOALIGN(__x86_retpoline_\
+ jmp .Lspec_trap_\@
+ .Ldo_rop_\@:
+ mov %\reg, (%_ASM_SP)
+- UNWIND_HINT_RET_OFFSET
++ UNWIND_HINT_FUNC
+ ret
+ SYM_FUNC_END(__x86_retpoline_\reg)
+
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -29,11 +29,14 @@ struct unwind_hint {
+ *
+ * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+ * sp_reg+sp_offset points to the iret return frame.
++ *
++ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
++ * Useful for code which doesn't have an ELF function annotation.
+ */
+ #define UNWIND_HINT_TYPE_CALL 0
+ #define UNWIND_HINT_TYPE_REGS 1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+-#define UNWIND_HINT_TYPE_RET_OFFSET 3
++#define UNWIND_HINT_TYPE_FUNC 3
+
+ #ifdef CONFIG_STACK_VALIDATION
+
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -29,11 +29,14 @@ struct unwind_hint {
+ *
+ * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+ * sp_reg+sp_offset points to the iret return frame.
++ *
++ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
++ * Useful for code which doesn't have an ELF function annotation.
+ */
+ #define UNWIND_HINT_TYPE_CALL 0
+ #define UNWIND_HINT_TYPE_REGS 1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+-#define UNWIND_HINT_TYPE_RET_OFFSET 3
++#define UNWIND_HINT_TYPE_FUNC 3
+
+ #ifdef CONFIG_STACK_VALIDATION
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -563,8 +563,8 @@ void arch_initial_func_cfi_state(struct
+ state->cfa.offset = 8;
+
+ /* initial RA (return address) */
+- state->regs[16].base = CFI_CFA;
+- state->regs[16].offset = -8;
++ state->regs[CFI_RA].base = CFI_CFA;
++ state->regs[CFI_RA].offset = -8;
+ }
+
+ const char *arch_nop_insn(int len)
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1423,13 +1423,20 @@ static int add_jump_table_alts(struct ob
+ return 0;
+ }
+
++static void set_func_state(struct cfi_state *state)
++{
++ state->cfa = initial_func_cfi.cfa;
++ memcpy(&state->regs, &initial_func_cfi.regs,
++ CFI_NUM_REGS * sizeof(struct cfi_reg));
++ state->stack_size = initial_func_cfi.cfa.offset;
++}
++
+ static int read_unwind_hints(struct objtool_file *file)
+ {
+ struct section *sec, *relocsec;
+ struct reloc *reloc;
+ struct unwind_hint *hint;
+ struct instruction *insn;
+- struct cfi_reg *cfa;
+ int i;
+
+ sec = find_section_by_name(file->elf, ".discard.unwind_hints");
+@@ -1464,22 +1471,20 @@ static int read_unwind_hints(struct objt
+ return -1;
+ }
+
+- cfa = &insn->cfi.cfa;
++ insn->hint = true;
+
+- if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
+- insn->ret_offset = hint->sp_offset;
++ if (hint->type == UNWIND_HINT_TYPE_FUNC) {
++ set_func_state(&insn->cfi);
+ continue;
+ }
+
+- insn->hint = true;
+-
+ if (arch_decode_hint_reg(insn, hint->sp_reg)) {
+ WARN_FUNC("unsupported unwind_hint sp base reg %d",
+ insn->sec, insn->offset, hint->sp_reg);
+ return -1;
+ }
+
+- cfa->offset = hint->sp_offset;
++ insn->cfi.cfa.offset = hint->sp_offset;
+ insn->cfi.type = hint->type;
+ insn->cfi.end = hint->end;
+ }
+@@ -1742,27 +1747,18 @@ static bool is_fentry_call(struct instru
+
+ static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
+ {
+- u8 ret_offset = insn->ret_offset;
+ struct cfi_state *cfi = &state->cfi;
+ int i;
+
+ if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
+ return true;
+
+- if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
++ if (cfi->cfa.offset != initial_func_cfi.cfa.offset)
+ return true;
+
+- if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
++ if (cfi->stack_size != initial_func_cfi.cfa.offset)
+ return true;
+
+- /*
+- * If there is a ret offset hint then don't check registers
+- * because a callee-saved register might have been pushed on
+- * the stack.
+- */
+- if (ret_offset)
+- return false;
+-
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
+ cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
+@@ -2863,10 +2859,7 @@ static int validate_section(struct objto
+ continue;
+
+ init_insn_state(&state, sec);
+- state.cfi.cfa = initial_func_cfi.cfa;
+- memcpy(&state.cfi.regs, &initial_func_cfi.regs,
+- CFI_NUM_REGS * sizeof(struct cfi_reg));
+- state.cfi.stack_size = initial_func_cfi.cfa.offset;
++ set_func_state(&state.cfi);
+
+ warnings += validate_symbol(file, sec, func, &state);
+ }
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -50,7 +50,6 @@ struct instruction {
+ bool retpoline_safe;
+ s8 instr;
+ u8 visited;
+- u8 ret_offset;
+ struct alt_group *alt_group;
+ struct symbol *call_dest;
+ struct instruction *jump_dest;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:03 +0100
+Subject: objtool: Correctly handle retpoline thunk calls
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit bcb1b6ff39da7e8a6a986eb08126fba2b5e13c32 upstream.
+
+Just like JMP handling, convert a direct CALL to a retpoline thunk
+into a retpoline safe indirect CALL.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.567568238@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -953,6 +953,18 @@ static int add_call_destinations(struct
+ dest_off);
+ return -1;
+ }
++
++ } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
++ /*
++ * Retpoline calls are really dynamic calls in
++ * disguise, so convert them accordingly.
++ */
++ insn->type = INSN_CALL_DYNAMIC;
++ insn->retpoline_safe = true;
++
++ remove_insn_ops(insn);
++ continue;
++
+ } else
+ insn->call_dest = reloc->sym;
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:08 +0100
+Subject: objtool: Create reloc sections implicitly
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d0c5c4cc73da0b05b0d9e5f833f2d859e1b45f8e upstream.
+
+Have elf_add_reloc() create the relocation section implicitly.
+
+Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.880174448@infradead.org
+[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 3 ---
+ tools/objtool/elf.c | 9 ++++++++-
+ tools/objtool/elf.h | 1 -
+ tools/objtool/orc_gen.c | 2 --
+ 4 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -459,9 +459,6 @@ static int create_static_call_sections(s
+ if (!sec)
+ return -1;
+
+- if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
+- return -1;
+-
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -498,11 +498,18 @@ err:
+ return -1;
+ }
+
++static struct section *elf_create_reloc_section(struct elf *elf,
++ struct section *base,
++ int reltype);
++
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+ unsigned int type, struct symbol *sym, int addend)
+ {
+ struct reloc *reloc;
+
++ if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA))
++ return -1;
++
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+@@ -880,7 +887,7 @@ static struct section *elf_create_rela_r
+ return sec;
+ }
+
+-struct section *elf_create_reloc_section(struct elf *elf,
++static struct section *elf_create_reloc_section(struct elf *elf,
+ struct section *base,
+ int reltype)
+ {
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -122,7 +122,6 @@ static inline u32 reloc_hash(struct relo
+
+ struct elf *elf_open_read(const char *name, int flags);
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+-struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
+
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+ unsigned int type, struct symbol *sym, int addend);
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -222,8 +222,6 @@ int orc_create(struct objtool_file *file
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+ if (!sec)
+ return -1;
+- if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
+- return -1;
+
+ /* Write ORC entries to sections: */
+ list_for_each_entry(entry, &orc_list, list) {
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 8 Mar 2022 16:30:14 +0100
+Subject: objtool: Default ignore INT3 for unreachable
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1ffbe4e935f9b7308615c75be990aec07464d1e7 upstream.
+
+Ignore all INT3 instructions for unreachable code warnings, similar to NOP.
+This allows using INT3 for various paddings instead of NOPs.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/20220308154317.343312938@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -2775,9 +2775,8 @@ static int validate_branch(struct objtoo
+ switch (insn->type) {
+
+ case INSN_RETURN:
+- if (next_insn && next_insn->type == INSN_TRAP) {
+- next_insn->ignore = true;
+- } else if (sls && !insn->retpoline_safe) {
++ if (sls && !insn->retpoline_safe &&
++ next_insn && next_insn->type != INSN_TRAP) {
+ WARN_FUNC("missing int3 after ret",
+ insn->sec, insn->offset);
+ }
+@@ -2824,9 +2823,8 @@ static int validate_branch(struct objtoo
+ break;
+
+ case INSN_JUMP_DYNAMIC:
+- if (next_insn && next_insn->type == INSN_TRAP) {
+- next_insn->ignore = true;
+- } else if (sls && !insn->retpoline_safe) {
++ if (sls && !insn->retpoline_safe &&
++ next_insn && next_insn->type != INSN_TRAP) {
+ WARN_FUNC("missing int3 after indirect jump",
+ insn->sec, insn->offset);
+ }
+@@ -2997,7 +2995,7 @@ static bool ignore_unreachable_insn(stru
+ int i;
+ struct instruction *prev_insn;
+
+- if (insn->ignore || insn->type == INSN_NOP)
++ if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP)
+ return true;
+
+ /*
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Wed, 23 Jun 2021 10:42:28 -0500
+Subject: objtool: Don't make .altinstructions writable
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit e31694e0a7a709293319475d8001e05e31f2178c upstream.
+
+When objtool creates the .altinstructions section, it sets the SHF_WRITE
+flag to make the section writable -- unless the section had already been
+previously created by the kernel. The mismatch between kernel-created
+and objtool-created section flags can cause failures with external
+tooling (kpatch-build). And the section doesn't need to be writable
+anyway.
+
+Make the section flags consistent with the kernel's.
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Reported-by: Joe Lawrence <joe.lawrence@redhat.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/6c284ae89717889ea136f9f0064d914cd8329d31.1624462939.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/x86/decode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -611,7 +611,7 @@ static int elf_add_alternative(struct el
+ sec = find_section_by_name(elf, ".altinstructions");
+ if (!sec) {
+ sec = elf_create_section(elf, ".altinstructions",
+- SHF_WRITE, size, 0);
++ SHF_ALLOC, size, 0);
+
+ if (!sec) {
+ WARN_ELF("elf_create_section");
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:34 +0200
+Subject: objtool: Explicitly avoid self modifying code in .altinstr_replacement
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit dd003edeffa3cb87bc9862582004f405d77d7670 upstream.
+
+Assume ALTERNATIVE()s know what they're doing and do not change, or
+cause to change, instructions in .altinstr_replacement sections.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.722511775@infradead.org
+[cascardo: context adjustment]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: objtool doesn't have any mcount handling]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 36 ++++++++++++++++++++++++++++--------
+ 1 file changed, 28 insertions(+), 8 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -870,18 +870,27 @@ static void remove_insn_ops(struct instr
+ }
+ }
+
+-static void add_call_dest(struct objtool_file *file, struct instruction *insn,
+- struct symbol *dest, bool sibling)
++static void annotate_call_site(struct objtool_file *file,
++ struct instruction *insn, bool sibling)
+ {
+ struct reloc *reloc = insn_reloc(file, insn);
++ struct symbol *sym = insn->call_dest;
+
+- insn->call_dest = dest;
+- if (!dest)
++ if (!sym)
++ sym = reloc->sym;
++
++ /*
++ * Alternative replacement code is just template code which is
++ * sometimes copied to the original instruction. For now, don't
++ * annotate it. (In the future we might consider annotating the
++ * original instruction if/when it ever makes sense to do so.)
++ */
++ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+ return;
+
+- if (insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->call_node,
+- &file->static_call_list);
++ if (sym->static_call_tramp) {
++ list_add_tail(&insn->call_node, &file->static_call_list);
++ return;
+ }
+
+ /*
+@@ -889,7 +898,7 @@ static void add_call_dest(struct objtool
+ * so they need a little help, NOP out any KCOV calls from noinstr
+ * text.
+ */
+- if (insn->sec->noinstr && insn->call_dest->kcov) {
++ if (insn->sec->noinstr && sym->kcov) {
+ if (reloc) {
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+@@ -901,7 +910,16 @@ static void add_call_dest(struct objtool
+ : arch_nop_insn(insn->len));
+
+ insn->type = sibling ? INSN_RETURN : INSN_NOP;
++ return;
+ }
++}
++
++static void add_call_dest(struct objtool_file *file, struct instruction *insn,
++ struct symbol *dest, bool sibling)
++{
++ insn->call_dest = dest;
++ if (!dest)
++ return;
+
+ /*
+ * Whatever stack impact regular CALLs have, should be undone
+@@ -911,6 +929,8 @@ static void add_call_dest(struct objtool
+ * are converted to JUMP, see read_intra_function_calls().
+ */
+ remove_insn_ops(insn);
++
++ annotate_call_site(file, insn, sibling);
+ }
+
+ /*
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:09 +0100
+Subject: objtool: Extract elf_strtab_concat()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 417a4dc91e559f92404c2544f785b02ce75784c3 upstream.
+
+Create a common helper to append strings to a strtab.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.941474004@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c | 60 ++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 38 insertions(+), 22 deletions(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -724,13 +724,48 @@ err:
+ return NULL;
+ }
+
++static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
++{
++ Elf_Data *data;
++ Elf_Scn *s;
++ int len;
++
++ if (!strtab)
++ strtab = find_section_by_name(elf, ".strtab");
++ if (!strtab) {
++ WARN("can't find .strtab section");
++ return -1;
++ }
++
++ s = elf_getscn(elf->elf, strtab->idx);
++ if (!s) {
++ WARN_ELF("elf_getscn");
++ return -1;
++ }
++
++ data = elf_newdata(s);
++ if (!data) {
++ WARN_ELF("elf_newdata");
++ return -1;
++ }
++
++ data->d_buf = str;
++ data->d_size = strlen(str) + 1;
++ data->d_align = 1;
++
++ len = strtab->len;
++ strtab->len += data->d_size;
++ strtab->changed = true;
++
++ return len;
++}
++
+ struct section *elf_create_section(struct elf *elf, const char *name,
+ unsigned int sh_flags, size_t entsize, int nr)
+ {
+ struct section *sec, *shstrtab;
+ size_t size = entsize * nr;
+ Elf_Scn *s;
+- Elf_Data *data;
+
+ sec = malloc(sizeof(*sec));
+ if (!sec) {
+@@ -787,7 +822,6 @@ struct section *elf_create_section(struc
+ sec->sh.sh_addralign = 1;
+ sec->sh.sh_flags = SHF_ALLOC | sh_flags;
+
+-
+ /* Add section name to .shstrtab (or .strtab for Clang) */
+ shstrtab = find_section_by_name(elf, ".shstrtab");
+ if (!shstrtab)
+@@ -796,27 +830,9 @@ struct section *elf_create_section(struc
+ WARN("can't find .shstrtab or .strtab section");
+ return NULL;
+ }
+-
+- s = elf_getscn(elf->elf, shstrtab->idx);
+- if (!s) {
+- WARN_ELF("elf_getscn");
+- return NULL;
+- }
+-
+- data = elf_newdata(s);
+- if (!data) {
+- WARN_ELF("elf_newdata");
++ sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
++ if (sec->sh.sh_name == -1)
+ return NULL;
+- }
+-
+- data->d_buf = sec->name;
+- data->d_size = strlen(name) + 1;
+- data->d_align = 1;
+-
+- sec->sh.sh_name = shstrtab->len;
+-
+- shstrtab->len += strlen(name) + 1;
+- shstrtab->changed = true;
+
+ list_add_tail(&sec->list, &elf->sections);
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:10 +0100
+Subject: objtool: Extract elf_symbol_add()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 9a7827b7789c630c1efdb121daa42c6e77dce97f upstream.
+
+Create a common helper to add symbols.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.003468981@infradead.org
+[bwh: Backported to 5.10: rb_add() parameter order is different]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c | 56 ++++++++++++++++++++++++++++------------------------
+ 1 file changed, 31 insertions(+), 25 deletions(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -341,12 +341,39 @@ static int read_sections(struct elf *elf
+ return 0;
+ }
+
++static void elf_add_symbol(struct elf *elf, struct symbol *sym)
++{
++ struct list_head *entry;
++ struct rb_node *pnode;
++
++ sym->type = GELF_ST_TYPE(sym->sym.st_info);
++ sym->bind = GELF_ST_BIND(sym->sym.st_info);
++
++ sym->offset = sym->sym.st_value;
++ sym->len = sym->sym.st_size;
++
++ rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset);
++ pnode = rb_prev(&sym->node);
++ if (pnode)
++ entry = &rb_entry(pnode, struct symbol, node)->list;
++ else
++ entry = &sym->sec->symbol_list;
++ list_add(&sym->list, entry);
++ elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
++ elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
++
++ /*
++ * Don't store empty STT_NOTYPE symbols in the rbtree. They
++ * can exist within a function, confusing the sorting.
++ */
++ if (!sym->len)
++ rb_erase(&sym->node, &sym->sec->symbol_tree);
++}
++
+ static int read_symbols(struct elf *elf)
+ {
+ struct section *symtab, *symtab_shndx, *sec;
+ struct symbol *sym, *pfunc;
+- struct list_head *entry;
+- struct rb_node *pnode;
+ int symbols_nr, i;
+ char *coldstr;
+ Elf_Data *shndx_data = NULL;
+@@ -391,9 +418,6 @@ static int read_symbols(struct elf *elf)
+ goto err;
+ }
+
+- sym->type = GELF_ST_TYPE(sym->sym.st_info);
+- sym->bind = GELF_ST_BIND(sym->sym.st_info);
+-
+ if ((sym->sym.st_shndx > SHN_UNDEF &&
+ sym->sym.st_shndx < SHN_LORESERVE) ||
+ (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
+@@ -406,32 +430,14 @@ static int read_symbols(struct elf *elf)
+ sym->name);
+ goto err;
+ }
+- if (sym->type == STT_SECTION) {
++ if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
+ sym->name = sym->sec->name;
+ sym->sec->sym = sym;
+ }
+ } else
+ sym->sec = find_section_by_index(elf, 0);
+
+- sym->offset = sym->sym.st_value;
+- sym->len = sym->sym.st_size;
+-
+- rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset);
+- pnode = rb_prev(&sym->node);
+- if (pnode)
+- entry = &rb_entry(pnode, struct symbol, node)->list;
+- else
+- entry = &sym->sec->symbol_list;
+- list_add(&sym->list, entry);
+- elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+- elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+-
+- /*
+- * Don't store empty STT_NOTYPE symbols in the rbtree. They
+- * can exist within a function, confusing the sorting.
+- */
+- if (!sym->len)
+- rb_erase(&sym->node, &sym->sec->symbol_tree);
++ elf_add_symbol(elf, sym);
+ }
+
+ if (stats)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 7 Jun 2021 11:45:58 +0200
+Subject: objtool: Fix .symtab_shndx handling for elf_create_undef_symbol()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 584fd3b31889852d0d6f3dd1e3d8e9619b660d2c upstream.
+
+When an ELF object uses extended symbol section indexes (IOW it has a
+.symtab_shndx section), these must be kept in sync with the regular
+symbol table (.symtab).
+
+So for every new symbol we emit, make sure to also emit a
+.symtab_shndx value to keep the arrays of equal size.
+
+Note: since we're writing an UNDEF symbol, most GElf_Sym fields will
+be 0 and we can repurpose one (st_size) to host the 0 for the xshndx
+value.
+
+Fixes: 2f2f7e47f052 ("objtool: Add elf_create_undef_symbol()")
+Reported-by: Nick Desaulniers <ndesaulniers@google.com>
+Suggested-by: Fangrui Song <maskray@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Nick Desaulniers <ndesaulniers@google.com>
+Link: https://lkml.kernel.org/r/YL3q1qFO9QIRL/BA@hirez.programming.kicks-ass.net
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c | 25 ++++++++++++++++++++++++-
+ 1 file changed, 24 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -768,7 +768,7 @@ static int elf_add_string(struct elf *el
+
+ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+ {
+- struct section *symtab;
++ struct section *symtab, *symtab_shndx;
+ struct symbol *sym;
+ Elf_Data *data;
+ Elf_Scn *s;
+@@ -819,6 +819,29 @@ struct symbol *elf_create_undef_symbol(s
+ symtab->len += data->d_size;
+ symtab->changed = true;
+
++ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
++ if (symtab_shndx) {
++ s = elf_getscn(elf->elf, symtab_shndx->idx);
++ if (!s) {
++ WARN_ELF("elf_getscn");
++ return NULL;
++ }
++
++ data = elf_newdata(s);
++ if (!data) {
++ WARN_ELF("elf_newdata");
++ return NULL;
++ }
++
++ data->d_buf = &sym->sym.st_size; /* conveniently 0 */
++ data->d_size = sizeof(Elf32_Word);
++ data->d_align = 4;
++ data->d_type = ELF_T_WORD;
++
++ symtab_shndx->len += 4;
++ symtab_shndx->changed = true;
++ }
++
+ sym->sec = find_section_by_index(elf, 0);
+
+ elf_add_symbol(elf, sym);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sun, 17 Apr 2022 17:03:36 +0200
+Subject: objtool: Fix code relocs vs weak symbols
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4abff6d48dbcea8200c7ea35ba70c242d128ebf3 upstream.
+
+Occasionally objtool driven code patching (think .static_call_sites
+.retpoline_sites etc..) goes sideways and it tries to patch an
+instruction that doesn't match.
+
+Much head-scatching and cursing later the problem is as outlined below
+and affects every section that objtool generates for us, very much
+including the ORC data. The below uses .static_call_sites because it's
+convenient for demonstration purposes, but as mentioned the ORC
+sections, .retpoline_sites and __mount_loc are all similarly affected.
+
+Consider:
+
+foo-weak.c:
+
+ extern void __SCT__foo(void);
+
+ __attribute__((weak)) void foo(void)
+ {
+ return __SCT__foo();
+ }
+
+foo.c:
+
+ extern void __SCT__foo(void);
+ extern void my_foo(void);
+
+ void foo(void)
+ {
+ my_foo();
+ return __SCT__foo();
+ }
+
+These generate the obvious code
+(gcc -O2 -fcf-protection=none -fno-asynchronous-unwind-tables -c foo*.c):
+
+foo-weak.o:
+0000000000000000 <foo>:
+ 0: e9 00 00 00 00 jmpq 5 <foo+0x5> 1: R_X86_64_PLT32 __SCT__foo-0x4
+
+foo.o:
+0000000000000000 <foo>:
+ 0: 48 83 ec 08 sub $0x8,%rsp
+ 4: e8 00 00 00 00 callq 9 <foo+0x9> 5: R_X86_64_PLT32 my_foo-0x4
+ 9: 48 83 c4 08 add $0x8,%rsp
+ d: e9 00 00 00 00 jmpq 12 <foo+0x12> e: R_X86_64_PLT32 __SCT__foo-0x4
+
+Now, when we link these two files together, you get something like
+(ld -r -o foos.o foo-weak.o foo.o):
+
+foos.o:
+0000000000000000 <foo-0x10>:
+ 0: e9 00 00 00 00 jmpq 5 <foo-0xb> 1: R_X86_64_PLT32 __SCT__foo-0x4
+ 5: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%rax,%rax,1)
+ f: 90 nop
+
+0000000000000010 <foo>:
+ 10: 48 83 ec 08 sub $0x8,%rsp
+ 14: e8 00 00 00 00 callq 19 <foo+0x9> 15: R_X86_64_PLT32 my_foo-0x4
+ 19: 48 83 c4 08 add $0x8,%rsp
+ 1d: e9 00 00 00 00 jmpq 22 <foo+0x12> 1e: R_X86_64_PLT32 __SCT__foo-0x4
+
+Noting that ld preserves the weak function text, but strips the symbol
+off of it (hence objdump doing that funny negative offset thing). This
+does lead to 'interesting' unused code issues with objtool when ran on
+linked objects, but that seems to be working (fingers crossed).
+
+So far so good.. Now lets consider the objtool static_call output
+section (readelf output, old binutils):
+
+foo-weak.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x2c8 contains 1 entry:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 .text + 0
+0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1
+
+foo.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x310 contains 2 entries:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 .text + d
+0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1
+
+foos.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x430 contains 4 entries:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+0000000000000000 0000000100000002 R_X86_64_PC32 0000000000000000 .text + 0
+0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1
+0000000000000008 0000000100000002 R_X86_64_PC32 0000000000000000 .text + 1d
+000000000000000c 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1
+
+So we have two patch sites, one in the dead code of the weak foo and one
+in the real foo. All is well.
+
+*HOWEVER*, when the toolchain strips unused section symbols it
+generates things like this (using new enough binutils):
+
+foo-weak.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x2c8 contains 1 entry:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 foo + 0
+0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1
+
+foo.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x310 contains 2 entries:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 foo + d
+0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1
+
+foos.o:
+
+Relocation section '.rela.static_call_sites' at offset 0x430 contains 4 entries:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+0000000000000000 0000000100000002 R_X86_64_PC32 0000000000000000 foo + 0
+0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1
+0000000000000008 0000000100000002 R_X86_64_PC32 0000000000000000 foo + d
+000000000000000c 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1
+
+And now we can see how that foos.o .static_call_sites goes side-ways, we
+now have _two_ patch sites in foo. One for the weak symbol at foo+0
+(which is no longer a static_call site!) and one at foo+d which is in
+fact the right location.
+
+This seems to happen when objtool cannot find a section symbol, in which
+case it falls back to any other symbol to key off of, however in this
+case that goes terribly wrong!
+
+As such, teach objtool to create a section symbol when there isn't
+one.
+
+Fixes: 44f6a7c0755d ("objtool: Fix seg fault with Clang non-section symbols")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lkml.kernel.org/r/20220419203807.655552918@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c | 187 +++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 165 insertions(+), 22 deletions(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -537,37 +537,180 @@ int elf_add_reloc(struct elf *elf, struc
+ return 0;
+ }
+
+-int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+- unsigned long offset, unsigned int type,
+- struct section *insn_sec, unsigned long insn_off)
++/*
++ * Ensure that any reloc section containing references to @sym is marked
++ * changed such that it will get re-generated in elf_rebuild_reloc_sections()
++ * with the new symbol index.
++ */
++static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
++{
++ struct section *sec;
++
++ list_for_each_entry(sec, &elf->sections, list) {
++ struct reloc *reloc;
++
++ if (sec->changed)
++ continue;
++
++ list_for_each_entry(reloc, &sec->reloc_list, list) {
++ if (reloc->sym == sym) {
++ sec->changed = true;
++ break;
++ }
++ }
++ }
++}
++
++/*
++ * Move the first global symbol, as per sh_info, into a new, higher symbol
++ * index. This fees up the shndx for a new local symbol.
++ */
++static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
++ struct section *symtab_shndx)
+ {
++ Elf_Data *data, *shndx_data = NULL;
++ Elf32_Word first_non_local;
+ struct symbol *sym;
+- int addend;
++ Elf_Scn *s;
+
+- if (insn_sec->sym) {
+- sym = insn_sec->sym;
+- addend = insn_off;
++ first_non_local = symtab->sh.sh_info;
+
+- } else {
+- /*
+- * The Clang assembler strips section symbols, so we have to
+- * reference the function symbol instead:
+- */
+- sym = find_symbol_containing(insn_sec, insn_off);
+- if (!sym) {
+- /*
+- * Hack alert. This happens when we need to reference
+- * the NOP pad insn immediately after the function.
+- */
+- sym = find_symbol_containing(insn_sec, insn_off - 1);
++ sym = find_symbol_by_index(elf, first_non_local);
++ if (!sym) {
++ WARN("no non-local symbols !?");
++ return first_non_local;
++ }
++
++ s = elf_getscn(elf->elf, symtab->idx);
++ if (!s) {
++ WARN_ELF("elf_getscn");
++ return -1;
++ }
++
++ data = elf_newdata(s);
++ if (!data) {
++ WARN_ELF("elf_newdata");
++ return -1;
++ }
++
++ data->d_buf = &sym->sym;
++ data->d_size = sizeof(sym->sym);
++ data->d_align = 1;
++ data->d_type = ELF_T_SYM;
++
++ sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
++ elf_dirty_reloc_sym(elf, sym);
++
++ symtab->sh.sh_info += 1;
++ symtab->sh.sh_size += data->d_size;
++ symtab->changed = true;
++
++ if (symtab_shndx) {
++ s = elf_getscn(elf->elf, symtab_shndx->idx);
++ if (!s) {
++ WARN_ELF("elf_getscn");
++ return -1;
+ }
+
+- if (!sym) {
+- WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
++ shndx_data = elf_newdata(s);
++ if (!shndx_data) {
++ WARN_ELF("elf_newshndx_data");
+ return -1;
+ }
+
+- addend = insn_off - sym->offset;
++ shndx_data->d_buf = &sym->sec->idx;
++ shndx_data->d_size = sizeof(Elf32_Word);
++ shndx_data->d_align = 4;
++ shndx_data->d_type = ELF_T_WORD;
++
++ symtab_shndx->sh.sh_size += 4;
++ symtab_shndx->changed = true;
++ }
++
++ return first_non_local;
++}
++
++static struct symbol *
++elf_create_section_symbol(struct elf *elf, struct section *sec)
++{
++ struct section *symtab, *symtab_shndx;
++ Elf_Data *shndx_data = NULL;
++ struct symbol *sym;
++ Elf32_Word shndx;
++
++ symtab = find_section_by_name(elf, ".symtab");
++ if (symtab) {
++ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
++ if (symtab_shndx)
++ shndx_data = symtab_shndx->data;
++ } else {
++ WARN("no .symtab");
++ return NULL;
++ }
++
++ sym = malloc(sizeof(*sym));
++ if (!sym) {
++ perror("malloc");
++ return NULL;
++ }
++ memset(sym, 0, sizeof(*sym));
++
++ sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx);
++ if (sym->idx < 0) {
++ WARN("elf_move_global_symbol");
++ return NULL;
++ }
++
++ sym->name = sec->name;
++ sym->sec = sec;
++
++ // st_name 0
++ sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
++ // st_other 0
++ // st_value 0
++ // st_size 0
++ shndx = sec->idx;
++ if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
++ sym->sym.st_shndx = shndx;
++ if (!shndx_data)
++ shndx = 0;
++ } else {
++ sym->sym.st_shndx = SHN_XINDEX;
++ if (!shndx_data) {
++ WARN("no .symtab_shndx");
++ return NULL;
++ }
++ }
++
++ if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) {
++ WARN_ELF("gelf_update_symshndx");
++ return NULL;
++ }
++
++ elf_add_symbol(elf, sym);
++
++ return sym;
++}
++
++int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
++ unsigned long offset, unsigned int type,
++ struct section *insn_sec, unsigned long insn_off)
++{
++ struct symbol *sym = insn_sec->sym;
++ int addend = insn_off;
++
++ if (!sym) {
++ /*
++ * Due to how weak functions work, we must use section based
++ * relocations. Symbol based relocations would result in the
++ * weak and non-weak function annotations being overlaid on the
++ * non-weak function after linking.
++ */
++ sym = elf_create_section_symbol(elf, insn_sec);
++ if (!sym)
++ return -1;
++
++ insn_sec->sym = sym;
+ }
+
+ return elf_add_reloc(elf, sec, offset, type, sym, addend);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 16 May 2022 11:06:36 -0400
+Subject: objtool: Fix objtool regression on x32 systems
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 22682a07acc308ef78681572e19502ce8893c4d4 upstream.
+
+Commit c087c6e7b551 ("objtool: Fix type of reloc::addend") failed to
+appreciate cross building from ILP32 hosts, where 'int' == 'long' and
+the issue persists.
+
+As such, use s64/int64_t/Elf64_Sxword for this field and suffer the
+pain that is ISO C99 printf formats for it.
+
+Fixes: c087c6e7b551 ("objtool: Fix type of reloc::addend")
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+[peterz: reword changelog, s/long long/s64/]
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/alpine.LRH.2.02.2205161041260.11556@file01.intranet.prod.int.rdu2.redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 9 +++++----
+ tools/objtool/elf.c | 2 +-
+ tools/objtool/elf.h | 4 ++--
+ 3 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -5,6 +5,7 @@
+
+ #include <string.h>
+ #include <stdlib.h>
++#include <inttypes.h>
+ #include <sys/mman.h>
+
+ #include "builtin.h"
+@@ -467,12 +468,12 @@ static int add_dead_ends(struct objtool_
+ else if (reloc->addend == reloc->sym->sec->len) {
+ insn = find_last_insn(file, reloc->sym->sec);
+ if (!insn) {
+- WARN("can't find unreachable insn at %s+0x%lx",
++ WARN("can't find unreachable insn at %s+0x%" PRIx64,
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+ } else {
+- WARN("can't find unreachable insn at %s+0x%lx",
++ WARN("can't find unreachable insn at %s+0x%" PRIx64,
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+@@ -502,12 +503,12 @@ reachable:
+ else if (reloc->addend == reloc->sym->sec->len) {
+ insn = find_last_insn(file, reloc->sym->sec);
+ if (!insn) {
+- WARN("can't find reachable insn at %s+0x%lx",
++ WARN("can't find reachable insn at %s+0x%" PRIx64,
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+ } else {
+- WARN("can't find reachable insn at %s+0x%lx",
++ WARN("can't find reachable insn at %s+0x%" PRIx64,
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -510,7 +510,7 @@ static struct section *elf_create_reloc_
+ int reltype);
+
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+- unsigned int type, struct symbol *sym, long addend)
++ unsigned int type, struct symbol *sym, s64 addend)
+ {
+ struct reloc *reloc;
+
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -73,7 +73,7 @@ struct reloc {
+ struct symbol *sym;
+ unsigned long offset;
+ unsigned int type;
+- long addend;
++ s64 addend;
+ int idx;
+ bool jump_table_start;
+ };
+@@ -127,7 +127,7 @@ struct elf *elf_open_read(const char *na
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+- unsigned int type, struct symbol *sym, long addend);
++ unsigned int type, struct symbol *sym, s64 addend);
+ int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int type,
+ struct section *insn_sec, unsigned long insn_off);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 23 Mar 2022 23:35:01 +0100
+Subject: objtool: Fix SLS validation for kcov tail-call replacement
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7a53f408902d913cd541b4f8ad7dbcd4961f5b82 upstream.
+
+Since not all compilers have a function attribute to disable KCOV
+instrumentation, objtool can rewrite KCOV instrumentation in noinstr
+functions as per commit:
+
+ f56dae88a81f ("objtool: Handle __sanitize_cov*() tail calls")
+
+However, this has subtle interaction with the SLS validation from
+commit:
+
+ 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation")
+
+In that when a tail-call instrucion is replaced with a RET an
+additional INT3 instruction is also written, but is not represented in
+the decoded instruction stream.
+
+This then leads to false positive missing INT3 objtool warnings in
+noinstr code.
+
+Instead of adding additional struct instruction objects, mark the RET
+instruction with retpoline_safe to suppress the warning (since we know
+there really is an INT3).
+
+Fixes: 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20220323230712.GA8939@worktop.programming.kicks-ass.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -961,6 +961,17 @@ static void annotate_call_site(struct ob
+ : arch_nop_insn(insn->len));
+
+ insn->type = sibling ? INSN_RETURN : INSN_NOP;
++
++ if (sibling) {
++ /*
++ * We've replaced the tail-call JMP insn by two new
++ * insn: RET; INT3, except we only have a single struct
++ * insn here. Mark it retpoline_safe to avoid the SLS
++ * warning, instead of adding another insn.
++ */
++ insn->retpoline_safe = true;
++ }
++
+ return;
+ }
+ }
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 17 May 2022 17:42:04 +0200
+Subject: objtool: Fix symbol creation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ead165fa1042247b033afad7be4be9b815d04ade upstream.
+
+Nathan reported objtool failing with the following messages:
+
+ warning: objtool: no non-local symbols !?
+ warning: objtool: gelf_update_symshndx: invalid section index
+
+The problem is due to commit 4abff6d48dbc ("objtool: Fix code relocs
+vs weak symbols") failing to consider the case where an object would
+have no non-local symbols.
+
+The problem that commit tries to address is adding a STB_LOCAL symbol
+to the symbol table in light of the ELF spec's requirement that:
+
+ In each symbol table, all symbols with STB_LOCAL binding preced the
+ weak and global symbols. As ``Sections'' above describes, a symbol
+ table section's sh_info section header member holds the symbol table
+ index for the first non-local symbol.
+
+The approach taken is to find this first non-local symbol, move that
+to the end and then re-use the freed spot to insert a new local symbol
+and increment sh_info.
+
+Except it never considered the case of object files without global
+symbols and got a whole bunch of details wrong -- so many in fact that
+it is a wonder it ever worked :/
+
+Specifically:
+
+ - It failed to re-hash the symbol on the new index, so a subsequent
+ find_symbol_by_index() would not find it at the new location and a
+ query for the old location would now return a non-deterministic
+ choice between the old and new symbol.
+
+ - It failed to appreciate that the GElf wrappers are not a valid disk
+ format (it works because GElf is basically Elf64 and we only
+ support x86_64 atm.)
+
+ - It failed to fully appreciate how horrible the libelf API really is
+ and got the gelf_update_symshndx() call pretty much completely
+ wrong; with the direct consequence that if inserting a second
+ STB_LOCAL symbol would require moving the same STB_GLOBAL symbol
+ again it would completely come unstuck.
+
+Write a new elf_update_symbol() function that wraps all the magic
+required to update or create a new symbol at a given index.
+
+Specifically, gelf_update_sym*() require an @ndx argument that is
+relative to the @data argument; this means you have to manually
+iterate the section data descriptor list and update @ndx.
+
+Fixes: 4abff6d48dbc ("objtool: Fix code relocs vs weak symbols")
+Reported-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/YoPCTEYjoPqE4ZxB@hirez.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 5.10: elf_hash_add() takes a hash table pointer,
+ not just a name]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/elf.c | 196 +++++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 128 insertions(+), 68 deletions(-)
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -346,6 +346,8 @@ static void elf_add_symbol(struct elf *e
+ struct list_head *entry;
+ struct rb_node *pnode;
+
++ sym->alias = sym;
++
+ sym->type = GELF_ST_TYPE(sym->sym.st_info);
+ sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+@@ -401,7 +403,6 @@ static int read_symbols(struct elf *elf)
+ return -1;
+ }
+ memset(sym, 0, sizeof(*sym));
+- sym->alias = sym;
+
+ sym->idx = i;
+
+@@ -562,24 +563,21 @@ static void elf_dirty_reloc_sym(struct e
+ }
+
+ /*
+- * Move the first global symbol, as per sh_info, into a new, higher symbol
+- * index. This fees up the shndx for a new local symbol.
++ * The libelf API is terrible; gelf_update_sym*() takes a data block relative
++ * index value, *NOT* the symbol index. As such, iterate the data blocks and
++ * adjust index until it fits.
++ *
++ * If no data block is found, allow adding a new data block provided the index
++ * is only one past the end.
+ */
+-static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
+- struct section *symtab_shndx)
++static int elf_update_symbol(struct elf *elf, struct section *symtab,
++ struct section *symtab_shndx, struct symbol *sym)
+ {
+- Elf_Data *data, *shndx_data = NULL;
+- Elf32_Word first_non_local;
+- struct symbol *sym;
+- Elf_Scn *s;
+-
+- first_non_local = symtab->sh.sh_info;
+-
+- sym = find_symbol_by_index(elf, first_non_local);
+- if (!sym) {
+- WARN("no non-local symbols !?");
+- return first_non_local;
+- }
++ Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF;
++ Elf_Data *symtab_data = NULL, *shndx_data = NULL;
++ Elf64_Xword entsize = symtab->sh.sh_entsize;
++ int max_idx, idx = sym->idx;
++ Elf_Scn *s, *t = NULL;
+
+ s = elf_getscn(elf->elf, symtab->idx);
+ if (!s) {
+@@ -587,79 +585,124 @@ static int elf_move_global_symbol(struct
+ return -1;
+ }
+
+- data = elf_newdata(s);
+- if (!data) {
+- WARN_ELF("elf_newdata");
+- return -1;
++ if (symtab_shndx) {
++ t = elf_getscn(elf->elf, symtab_shndx->idx);
++ if (!t) {
++ WARN_ELF("elf_getscn");
++ return -1;
++ }
+ }
+
+- data->d_buf = &sym->sym;
+- data->d_size = sizeof(sym->sym);
+- data->d_align = 1;
+- data->d_type = ELF_T_SYM;
++ for (;;) {
++ /* get next data descriptor for the relevant sections */
++ symtab_data = elf_getdata(s, symtab_data);
++ if (t)
++ shndx_data = elf_getdata(t, shndx_data);
++
++ /* end-of-list */
++ if (!symtab_data) {
++ void *buf;
++
++ if (idx) {
++ /* we don't do holes in symbol tables */
++ WARN("index out of range");
++ return -1;
++ }
+
+- sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
+- elf_dirty_reloc_sym(elf, sym);
++ /* if @idx == 0, it's the next contiguous entry, create it */
++ symtab_data = elf_newdata(s);
++ if (t)
++ shndx_data = elf_newdata(t);
++
++ buf = calloc(1, entsize);
++ if (!buf) {
++ WARN("malloc");
++ return -1;
++ }
+
+- symtab->sh.sh_info += 1;
+- symtab->sh.sh_size += data->d_size;
+- symtab->changed = true;
++ symtab_data->d_buf = buf;
++ symtab_data->d_size = entsize;
++ symtab_data->d_align = 1;
++ symtab_data->d_type = ELF_T_SYM;
++
++ symtab->sh.sh_size += entsize;
++ symtab->changed = true;
++
++ if (t) {
++ shndx_data->d_buf = &sym->sec->idx;
++ shndx_data->d_size = sizeof(Elf32_Word);
++ shndx_data->d_align = sizeof(Elf32_Word);
++ shndx_data->d_type = ELF_T_WORD;
+
+- if (symtab_shndx) {
+- s = elf_getscn(elf->elf, symtab_shndx->idx);
+- if (!s) {
+- WARN_ELF("elf_getscn");
++ symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
++ symtab_shndx->changed = true;
++ }
++
++ break;
++ }
++
++ /* empty blocks should not happen */
++ if (!symtab_data->d_size) {
++ WARN("zero size data");
+ return -1;
+ }
+
+- shndx_data = elf_newdata(s);
++ /* is this the right block? */
++ max_idx = symtab_data->d_size / entsize;
++ if (idx < max_idx)
++ break;
++
++ /* adjust index and try again */
++ idx -= max_idx;
++ }
++
++ /* something went side-ways */
++ if (idx < 0) {
++ WARN("negative index");
++ return -1;
++ }
++
++ /* setup extended section index magic and write the symbol */
++ if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
++ sym->sym.st_shndx = shndx;
++ if (!shndx_data)
++ shndx = 0;
++ } else {
++ sym->sym.st_shndx = SHN_XINDEX;
+ if (!shndx_data) {
+- WARN_ELF("elf_newshndx_data");
++ WARN("no .symtab_shndx");
+ return -1;
+ }
++ }
+
+- shndx_data->d_buf = &sym->sec->idx;
+- shndx_data->d_size = sizeof(Elf32_Word);
+- shndx_data->d_align = 4;
+- shndx_data->d_type = ELF_T_WORD;
+-
+- symtab_shndx->sh.sh_size += 4;
+- symtab_shndx->changed = true;
++ if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) {
++ WARN_ELF("gelf_update_symshndx");
++ return -1;
+ }
+
+- return first_non_local;
++ return 0;
+ }
+
+ static struct symbol *
+ elf_create_section_symbol(struct elf *elf, struct section *sec)
+ {
+ struct section *symtab, *symtab_shndx;
+- Elf_Data *shndx_data = NULL;
+- struct symbol *sym;
+- Elf32_Word shndx;
++ Elf32_Word first_non_local, new_idx;
++ struct symbol *sym, *old;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (symtab) {
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+- if (symtab_shndx)
+- shndx_data = symtab_shndx->data;
+ } else {
+ WARN("no .symtab");
+ return NULL;
+ }
+
+- sym = malloc(sizeof(*sym));
++ sym = calloc(1, sizeof(*sym));
+ if (!sym) {
+ perror("malloc");
+ return NULL;
+ }
+- memset(sym, 0, sizeof(*sym));
+-
+- sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx);
+- if (sym->idx < 0) {
+- WARN("elf_move_global_symbol");
+- return NULL;
+- }
+
+ sym->name = sec->name;
+ sym->sec = sec;
+@@ -669,24 +712,41 @@ elf_create_section_symbol(struct elf *el
+ // st_other 0
+ // st_value 0
+ // st_size 0
+- shndx = sec->idx;
+- if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
+- sym->sym.st_shndx = shndx;
+- if (!shndx_data)
+- shndx = 0;
+- } else {
+- sym->sym.st_shndx = SHN_XINDEX;
+- if (!shndx_data) {
+- WARN("no .symtab_shndx");
++
++ /*
++ * Move the first global symbol, as per sh_info, into a new, higher
++ * symbol index. This fees up a spot for a new local symbol.
++ */
++ first_non_local = symtab->sh.sh_info;
++ new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
++ old = find_symbol_by_index(elf, first_non_local);
++ if (old) {
++ old->idx = new_idx;
++
++ hlist_del(&old->hash);
++ elf_hash_add(elf->symbol_hash, &old->hash, old->idx);
++
++ elf_dirty_reloc_sym(elf, old);
++
++ if (elf_update_symbol(elf, symtab, symtab_shndx, old)) {
++ WARN("elf_update_symbol move");
+ return NULL;
+ }
++
++ new_idx = first_non_local;
+ }
+
+- if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) {
+- WARN_ELF("gelf_update_symshndx");
++ sym->idx = new_idx;
++ if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
++ WARN("elf_update_symbol");
+ return NULL;
+ }
+
++ /*
++ * Either way, we added a LOCAL symbol.
++ */
++ symtab->sh.sh_info += 1;
++
+ elf_add_symbol(elf, sym);
+
+ return sym;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sun, 17 Apr 2022 17:03:40 +0200
+Subject: objtool: Fix type of reloc::addend
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c087c6e7b551b7f208c0b852304f044954cf2bb3 upstream.
+
+Elf{32,64}_Rela::r_addend is of type: Elf{32,64}_Sword, that means
+that our reloc::addend needs to be long or face tuncation issues when
+we do elf_rebuild_reloc_section():
+
+ - 107: 48 b8 00 00 00 00 00 00 00 00 movabs $0x0,%rax 109: R_X86_64_64 level4_kernel_pgt+0x80000067
+ + 107: 48 b8 00 00 00 00 00 00 00 00 movabs $0x0,%rax 109: R_X86_64_64 level4_kernel_pgt-0x7fffff99
+
+Fixes: 627fce14809b ("objtool: Add ORC unwind table generation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lkml.kernel.org/r/20220419203807.596871927@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 8 ++++----
+ tools/objtool/elf.c | 2 +-
+ tools/objtool/elf.h | 4 ++--
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -467,12 +467,12 @@ static int add_dead_ends(struct objtool_
+ else if (reloc->addend == reloc->sym->sec->len) {
+ insn = find_last_insn(file, reloc->sym->sec);
+ if (!insn) {
+- WARN("can't find unreachable insn at %s+0x%x",
++ WARN("can't find unreachable insn at %s+0x%lx",
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+ } else {
+- WARN("can't find unreachable insn at %s+0x%x",
++ WARN("can't find unreachable insn at %s+0x%lx",
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+@@ -502,12 +502,12 @@ reachable:
+ else if (reloc->addend == reloc->sym->sec->len) {
+ insn = find_last_insn(file, reloc->sym->sec);
+ if (!insn) {
+- WARN("can't find reachable insn at %s+0x%x",
++ WARN("can't find reachable insn at %s+0x%lx",
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+ } else {
+- WARN("can't find reachable insn at %s+0x%x",
++ WARN("can't find reachable insn at %s+0x%lx",
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -509,7 +509,7 @@ static struct section *elf_create_reloc_
+ int reltype);
+
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+- unsigned int type, struct symbol *sym, int addend)
++ unsigned int type, struct symbol *sym, long addend)
+ {
+ struct reloc *reloc;
+
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -73,7 +73,7 @@ struct reloc {
+ struct symbol *sym;
+ unsigned long offset;
+ unsigned int type;
+- int addend;
++ long addend;
+ int idx;
+ bool jump_table_start;
+ };
+@@ -127,7 +127,7 @@ struct elf *elf_open_read(const char *na
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+- unsigned int type, struct symbol *sym, int addend);
++ unsigned int type, struct symbol *sym, long addend);
+ int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int type,
+ struct section *insn_sec, unsigned long insn_off);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 24 Jun 2021 11:41:02 +0200
+Subject: objtool: Handle __sanitize_cov*() tail calls
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit f56dae88a81fded66adf2bea9922d1d98d1da14f upstream.
+
+Turns out the compilers also generate tail calls to __sanitize_cov*(),
+make sure to also patch those out in noinstr code.
+
+Fixes: 0f1441b44e82 ("objtool: Fix noinstr vs KCOV")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Marco Elver <elver@google.com>
+Link: https://lore.kernel.org/r/20210624095147.818783799@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+[bwh: Backported to 5.10:
+ - objtool doesn't have any mcount handling
+ - Write the NOPs as hex literals since we can't use <asm/nops.h>]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h | 1
+ tools/objtool/arch/x86/decode.c | 20 ++++++
+ tools/objtool/check.c | 123 +++++++++++++++++++++-------------------
+ 3 files changed, 86 insertions(+), 58 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -83,6 +83,7 @@ unsigned long arch_jump_destination(stru
+ unsigned long arch_dest_reloc_offset(int addend);
+
+ const char *arch_nop_insn(int len);
++const char *arch_ret_insn(int len);
+
+ int arch_decode_hint_reg(u8 sp_reg, int *base);
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -586,6 +586,26 @@ const char *arch_nop_insn(int len)
+ return nops[len-1];
+ }
+
++#define BYTE_RET 0xC3
++
++const char *arch_ret_insn(int len)
++{
++ static const char ret[5][5] = {
++ { BYTE_RET },
++ { BYTE_RET, 0x90 },
++ { BYTE_RET, 0x66, 0x90 },
++ { BYTE_RET, 0x0f, 0x1f, 0x00 },
++ { BYTE_RET, 0x0f, 0x1f, 0x40, 0x00 },
++ };
++
++ if (len < 1 || len > 5) {
++ WARN("invalid RET size: %d\n", len);
++ return NULL;
++ }
++
++ return ret[len-1];
++}
++
+ /* asm/alternative.h ? */
+
+ #define ALTINSTR_FLAG_INV (1 << 15)
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -860,6 +860,60 @@ static struct reloc *insn_reloc(struct o
+ return insn->reloc;
+ }
+
++static void remove_insn_ops(struct instruction *insn)
++{
++ struct stack_op *op, *tmp;
++
++ list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
++ list_del(&op->list);
++ free(op);
++ }
++}
++
++static void add_call_dest(struct objtool_file *file, struct instruction *insn,
++ struct symbol *dest, bool sibling)
++{
++ struct reloc *reloc = insn_reloc(file, insn);
++
++ insn->call_dest = dest;
++ if (!dest)
++ return;
++
++ if (insn->call_dest->static_call_tramp) {
++ list_add_tail(&insn->call_node,
++ &file->static_call_list);
++ }
++
++ /*
++ * Many compilers cannot disable KCOV with a function attribute
++ * so they need a little help, NOP out any KCOV calls from noinstr
++ * text.
++ */
++ if (insn->sec->noinstr &&
++ !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
++ if (reloc) {
++ reloc->type = R_NONE;
++ elf_write_reloc(file->elf, reloc);
++ }
++
++ elf_write_insn(file->elf, insn->sec,
++ insn->offset, insn->len,
++ sibling ? arch_ret_insn(insn->len)
++ : arch_nop_insn(insn->len));
++
++ insn->type = sibling ? INSN_RETURN : INSN_NOP;
++ }
++
++ /*
++ * Whatever stack impact regular CALLs have, should be undone
++ * by the RETURN of the called function.
++ *
++ * Annotated intra-function calls retain the stack_ops but
++ * are converted to JUMP, see read_intra_function_calls().
++ */
++ remove_insn_ops(insn);
++}
++
+ /*
+ * Find the destination instructions for all jumps.
+ */
+@@ -898,11 +952,7 @@ static int add_jump_destinations(struct
+ continue;
+ } else if (insn->func) {
+ /* internal or external sibling call (with reloc) */
+- insn->call_dest = reloc->sym;
+- if (insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->call_node,
+- &file->static_call_list);
+- }
++ add_call_dest(file, insn, reloc->sym, true);
+ continue;
+ } else if (reloc->sym->sec->idx) {
+ dest_sec = reloc->sym->sec;
+@@ -958,13 +1008,8 @@ static int add_jump_destinations(struct
+
+ } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
+ insn->jump_dest->offset == insn->jump_dest->func->offset) {
+-
+ /* internal sibling call (without reloc) */
+- insn->call_dest = insn->jump_dest->func;
+- if (insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->call_node,
+- &file->static_call_list);
+- }
++ add_call_dest(file, insn, insn->jump_dest->func, true);
+ }
+ }
+ }
+@@ -972,16 +1017,6 @@ static int add_jump_destinations(struct
+ return 0;
+ }
+
+-static void remove_insn_ops(struct instruction *insn)
+-{
+- struct stack_op *op, *tmp;
+-
+- list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
+- list_del(&op->list);
+- free(op);
+- }
+-}
+-
+ static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
+ {
+ struct symbol *call_dest;
+@@ -1000,6 +1035,7 @@ static int add_call_destinations(struct
+ {
+ struct instruction *insn;
+ unsigned long dest_off;
++ struct symbol *dest;
+ struct reloc *reloc;
+
+ for_each_insn(file, insn) {
+@@ -1009,7 +1045,9 @@ static int add_call_destinations(struct
+ reloc = insn_reloc(file, insn);
+ if (!reloc) {
+ dest_off = arch_jump_destination(insn);
+- insn->call_dest = find_call_destination(insn->sec, dest_off);
++ dest = find_call_destination(insn->sec, dest_off);
++
++ add_call_dest(file, insn, dest, false);
+
+ if (insn->ignore)
+ continue;
+@@ -1027,9 +1065,8 @@ static int add_call_destinations(struct
+
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+- insn->call_dest = find_call_destination(reloc->sym->sec,
+- dest_off);
+- if (!insn->call_dest) {
++ dest = find_call_destination(reloc->sym->sec, dest_off);
++ if (!dest) {
+ WARN_FUNC("can't find call dest symbol at %s+0x%lx",
+ insn->sec, insn->offset,
+ reloc->sym->sec->name,
+@@ -1037,6 +1074,8 @@ static int add_call_destinations(struct
+ return -1;
+ }
+
++ add_call_dest(file, insn, dest, false);
++
+ } else if (arch_is_retpoline(reloc->sym)) {
+ /*
+ * Retpoline calls are really dynamic calls in
+@@ -1052,39 +1091,7 @@ static int add_call_destinations(struct
+ continue;
+
+ } else
+- insn->call_dest = reloc->sym;
+-
+- if (insn->call_dest && insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->call_node,
+- &file->static_call_list);
+- }
+-
+- /*
+- * Many compilers cannot disable KCOV with a function attribute
+- * so they need a little help, NOP out any KCOV calls from noinstr
+- * text.
+- */
+- if (insn->sec->noinstr &&
+- !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
+- if (reloc) {
+- reloc->type = R_NONE;
+- elf_write_reloc(file->elf, reloc);
+- }
+-
+- elf_write_insn(file->elf, insn->sec,
+- insn->offset, insn->len,
+- arch_nop_insn(insn->len));
+- insn->type = INSN_NOP;
+- }
+-
+- /*
+- * Whatever stack impact regular CALLs have, should be undone
+- * by the RETURN of the called function.
+- *
+- * Annotated intra-function calls retain the stack_ops but
+- * are converted to JUMP, see read_intra_function_calls().
+- */
+- remove_insn_ops(insn);
++ add_call_dest(file, insn, reloc->sym, false);
+ }
+
+ return 0;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:04 +0100
+Subject: objtool: Handle per arch retpoline naming
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 530b4ddd9dd92b263081f5c7786d39a8129c8b2d upstream.
+
+The __x86_indirect_ naming is obviously not generic. Shorten to allow
+matching some additional magic names later.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.630296706@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h | 2 ++
+ tools/objtool/arch/x86/decode.c | 5 +++++
+ tools/objtool/check.c | 9 +++++++--
+ 3 files changed, 14 insertions(+), 2 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -86,4 +86,6 @@ const char *arch_nop_insn(int len);
+
+ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
+
++bool arch_is_retpoline(struct symbol *sym);
++
+ #endif /* _ARCH_H */
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -620,3 +620,8 @@ int arch_decode_hint_reg(struct instruct
+
+ return 0;
+ }
++
++bool arch_is_retpoline(struct symbol *sym)
++{
++ return !strncmp(sym->name, "__x86_indirect_", 15);
++}
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -778,6 +778,11 @@ static int add_ignore_alternatives(struc
+ return 0;
+ }
+
++__weak bool arch_is_retpoline(struct symbol *sym)
++{
++ return false;
++}
++
+ /*
+ * Find the destination instructions for all jumps.
+ */
+@@ -800,7 +805,7 @@ static int add_jump_destinations(struct
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_sec = reloc->sym->sec;
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+- } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
++ } else if (arch_is_retpoline(reloc->sym)) {
+ /*
+ * Retpoline jumps are really dynamic jumps in
+ * disguise, so convert them accordingly.
+@@ -954,7 +959,7 @@ static int add_call_destinations(struct
+ return -1;
+ }
+
+- } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
++ } else if (arch_is_retpoline(reloc->sym)) {
+ /*
+ * Retpoline calls are really dynamic calls in
+ * disguise, so convert them accordingly.
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 24 Jun 2021 11:41:01 +0200
+Subject: objtool: Introduce CFI hash
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 8b946cc38e063f0f7bb67789478c38f6d7d457c9 upstream.
+
+Andi reported that objtool on vmlinux.o consumes more memory than his
+system has, leading to horrific performance.
+
+This is in part because we keep a struct instruction for every
+instruction in the file in-memory. Shrink struct instruction by
+removing the CFI state (which includes full register state) from it
+and demand allocating it.
+
+Given most instructions don't actually change CFI state, there's lots
+of repetition there, so add a hash table to find previous CFI
+instances.
+
+Reduces memory consumption (and runtime) for processing an
+x86_64-allyesconfig:
+
+ pre: 4:40.84 real, 143.99 user, 44.18 sys, 30624988 mem
+ post: 2:14.61 real, 108.58 user, 25.04 sys, 16396184 mem
+
+Suggested-by: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20210624095147.756759107@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10:
+ - Don't use bswap_if_needed() since we don't have any of the other fixes
+ for mixed-endian cross-compilation
+ - Since we don't have "objtool: Rewrite hashtable sizing", make
+ cfi_hash_alloc() set the number of bits similarly to elf_hash_bits()
+ - objtool doesn't have any mcount handling
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h | 2
+ tools/objtool/arch/x86/decode.c | 20 ++---
+ tools/objtool/cfi.h | 2
+ tools/objtool/check.c | 154 +++++++++++++++++++++++++++++++++++-----
+ tools/objtool/check.h | 2
+ tools/objtool/orc_gen.c | 15 ++-
+ 6 files changed, 160 insertions(+), 35 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -84,7 +84,7 @@ unsigned long arch_dest_reloc_offset(int
+
+ const char *arch_nop_insn(int len);
+
+-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
++int arch_decode_hint_reg(u8 sp_reg, int *base);
+
+ bool arch_is_retpoline(struct symbol *sym);
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -706,34 +706,32 @@ int arch_rewrite_retpolines(struct objto
+ return 0;
+ }
+
+-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
++int arch_decode_hint_reg(u8 sp_reg, int *base)
+ {
+- struct cfi_reg *cfa = &insn->cfi.cfa;
+-
+ switch (sp_reg) {
+ case ORC_REG_UNDEFINED:
+- cfa->base = CFI_UNDEFINED;
++ *base = CFI_UNDEFINED;
+ break;
+ case ORC_REG_SP:
+- cfa->base = CFI_SP;
++ *base = CFI_SP;
+ break;
+ case ORC_REG_BP:
+- cfa->base = CFI_BP;
++ *base = CFI_BP;
+ break;
+ case ORC_REG_SP_INDIRECT:
+- cfa->base = CFI_SP_INDIRECT;
++ *base = CFI_SP_INDIRECT;
+ break;
+ case ORC_REG_R10:
+- cfa->base = CFI_R10;
++ *base = CFI_R10;
+ break;
+ case ORC_REG_R13:
+- cfa->base = CFI_R13;
++ *base = CFI_R13;
+ break;
+ case ORC_REG_DI:
+- cfa->base = CFI_DI;
++ *base = CFI_DI;
+ break;
+ case ORC_REG_DX:
+- cfa->base = CFI_DX;
++ *base = CFI_DX;
+ break;
+ default:
+ return -1;
+--- a/tools/objtool/cfi.h
++++ b/tools/objtool/cfi.h
+@@ -7,6 +7,7 @@
+ #define _OBJTOOL_CFI_H
+
+ #include "cfi_regs.h"
++#include <linux/list.h>
+
+ #define CFI_UNDEFINED -1
+ #define CFI_CFA -2
+@@ -24,6 +25,7 @@ struct cfi_init_state {
+ };
+
+ struct cfi_state {
++ struct hlist_node hash; /* must be first, cficmp() */
+ struct cfi_reg regs[CFI_NUM_REGS];
+ struct cfi_reg vals[CFI_NUM_REGS];
+ struct cfi_reg cfa;
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -5,6 +5,7 @@
+
+ #include <string.h>
+ #include <stdlib.h>
++#include <sys/mman.h>
+
+ #include "builtin.h"
+ #include "cfi.h"
+@@ -25,7 +26,11 @@ struct alternative {
+ bool skip_orig;
+ };
+
+-struct cfi_init_state initial_func_cfi;
++static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
++
++static struct cfi_init_state initial_func_cfi;
++static struct cfi_state init_cfi;
++static struct cfi_state func_cfi;
+
+ struct instruction *find_insn(struct objtool_file *file,
+ struct section *sec, unsigned long offset)
+@@ -265,6 +270,78 @@ static void init_insn_state(struct insn_
+ state->noinstr = sec->noinstr;
+ }
+
++static struct cfi_state *cfi_alloc(void)
++{
++ struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1);
++ if (!cfi) {
++ WARN("calloc failed");
++ exit(1);
++ }
++ nr_cfi++;
++ return cfi;
++}
++
++static int cfi_bits;
++static struct hlist_head *cfi_hash;
++
++static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2)
++{
++ return memcmp((void *)cfi1 + sizeof(cfi1->hash),
++ (void *)cfi2 + sizeof(cfi2->hash),
++ sizeof(struct cfi_state) - sizeof(struct hlist_node));
++}
++
++static inline u32 cfi_key(struct cfi_state *cfi)
++{
++ return jhash((void *)cfi + sizeof(cfi->hash),
++ sizeof(*cfi) - sizeof(cfi->hash), 0);
++}
++
++static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi)
++{
++ struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)];
++ struct cfi_state *obj;
++
++ hlist_for_each_entry(obj, head, hash) {
++ if (!cficmp(cfi, obj)) {
++ nr_cfi_cache++;
++ return obj;
++ }
++ }
++
++ obj = cfi_alloc();
++ *obj = *cfi;
++ hlist_add_head(&obj->hash, head);
++
++ return obj;
++}
++
++static void cfi_hash_add(struct cfi_state *cfi)
++{
++ struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)];
++
++ hlist_add_head(&cfi->hash, head);
++}
++
++static void *cfi_hash_alloc(void)
++{
++ cfi_bits = vmlinux ? ELF_HASH_BITS - 3 : 13;
++ cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits,
++ PROT_READ|PROT_WRITE,
++ MAP_PRIVATE|MAP_ANON, -1, 0);
++ if (cfi_hash == (void *)-1L) {
++ WARN("mmap fail cfi_hash");
++ cfi_hash = NULL;
++ } else if (stats) {
++ printf("cfi_bits: %d\n", cfi_bits);
++ }
++
++ return cfi_hash;
++}
++
++static unsigned long nr_insns;
++static unsigned long nr_insns_visited;
++
+ /*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+@@ -275,7 +352,6 @@ static int decode_instructions(struct ob
+ struct symbol *func;
+ unsigned long offset;
+ struct instruction *insn;
+- unsigned long nr_insns = 0;
+ int ret;
+
+ for_each_sec(file, sec) {
+@@ -301,7 +377,6 @@ static int decode_instructions(struct ob
+ memset(insn, 0, sizeof(*insn));
+ INIT_LIST_HEAD(&insn->alts);
+ INIT_LIST_HEAD(&insn->stack_ops);
+- init_cfi_state(&insn->cfi);
+
+ insn->sec = sec;
+ insn->offset = offset;
+@@ -1077,7 +1152,6 @@ static int handle_group_alt(struct objto
+ memset(nop, 0, sizeof(*nop));
+ INIT_LIST_HEAD(&nop->alts);
+ INIT_LIST_HEAD(&nop->stack_ops);
+- init_cfi_state(&nop->cfi);
+
+ nop->sec = special_alt->new_sec;
+ nop->offset = special_alt->new_off + special_alt->new_len;
+@@ -1454,10 +1528,11 @@ static void set_func_state(struct cfi_st
+
+ static int read_unwind_hints(struct objtool_file *file)
+ {
++ struct cfi_state cfi = init_cfi;
+ struct section *sec, *relocsec;
+- struct reloc *reloc;
+ struct unwind_hint *hint;
+ struct instruction *insn;
++ struct reloc *reloc;
+ int i;
+
+ sec = find_section_by_name(file->elf, ".discard.unwind_hints");
+@@ -1495,19 +1570,24 @@ static int read_unwind_hints(struct objt
+ insn->hint = true;
+
+ if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+- set_func_state(&insn->cfi);
++ insn->cfi = &func_cfi;
+ continue;
+ }
+
+- if (arch_decode_hint_reg(insn, hint->sp_reg)) {
++ if (insn->cfi)
++ cfi = *(insn->cfi);
++
++ if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) {
+ WARN_FUNC("unsupported unwind_hint sp base reg %d",
+ insn->sec, insn->offset, hint->sp_reg);
+ return -1;
+ }
+
+- insn->cfi.cfa.offset = hint->sp_offset;
+- insn->cfi.type = hint->type;
+- insn->cfi.end = hint->end;
++ cfi.cfa.offset = hint->sp_offset;
++ cfi.type = hint->type;
++ cfi.end = hint->end;
++
++ insn->cfi = cfi_hash_find_or_add(&cfi);
+ }
+
+ return 0;
+@@ -2283,13 +2363,18 @@ static int propagate_alt_cfi(struct objt
+ if (!insn->alt_group)
+ return 0;
+
++ if (!insn->cfi) {
++ WARN("CFI missing");
++ return -1;
++ }
++
+ alt_cfi = insn->alt_group->cfi;
+ group_off = insn->offset - insn->alt_group->first_insn->offset;
+
+ if (!alt_cfi[group_off]) {
+- alt_cfi[group_off] = &insn->cfi;
++ alt_cfi[group_off] = insn->cfi;
+ } else {
+- if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
++ if (cficmp(alt_cfi[group_off], insn->cfi)) {
+ WARN_FUNC("stack layout conflict in alternatives",
+ insn->sec, insn->offset);
+ return -1;
+@@ -2335,9 +2420,14 @@ static int handle_insn_ops(struct instru
+
+ static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
+ {
+- struct cfi_state *cfi1 = &insn->cfi;
++ struct cfi_state *cfi1 = insn->cfi;
+ int i;
+
++ if (!cfi1) {
++ WARN("CFI missing");
++ return false;
++ }
++
+ if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) {
+
+ WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+@@ -2522,7 +2612,7 @@ static int validate_branch(struct objtoo
+ struct instruction *insn, struct insn_state state)
+ {
+ struct alternative *alt;
+- struct instruction *next_insn;
++ struct instruction *next_insn, *prev_insn = NULL;
+ struct section *sec;
+ u8 visited;
+ int ret;
+@@ -2551,15 +2641,25 @@ static int validate_branch(struct objtoo
+
+ if (insn->visited & visited)
+ return 0;
++ } else {
++ nr_insns_visited++;
+ }
+
+ if (state.noinstr)
+ state.instr += insn->instr;
+
+- if (insn->hint)
+- state.cfi = insn->cfi;
+- else
+- insn->cfi = state.cfi;
++ if (insn->hint) {
++ state.cfi = *insn->cfi;
++ } else {
++ /* XXX track if we actually changed state.cfi */
++
++ if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) {
++ insn->cfi = prev_insn->cfi;
++ nr_cfi_reused++;
++ } else {
++ insn->cfi = cfi_hash_find_or_add(&state.cfi);
++ }
++ }
+
+ insn->visited |= visited;
+
+@@ -2709,6 +2809,7 @@ static int validate_branch(struct objtoo
+ return 1;
+ }
+
++ prev_insn = insn;
+ insn = next_insn;
+ }
+
+@@ -2964,10 +3065,20 @@ int check(struct objtool_file *file)
+ int ret, warnings = 0;
+
+ arch_initial_func_cfi_state(&initial_func_cfi);
++ init_cfi_state(&init_cfi);
++ init_cfi_state(&func_cfi);
++ set_func_state(&func_cfi);
++
++ if (!cfi_hash_alloc())
++ goto out;
++
++ cfi_hash_add(&init_cfi);
++ cfi_hash_add(&func_cfi);
+
+ ret = decode_sections(file);
+ if (ret < 0)
+ goto out;
++
+ warnings += ret;
+
+ if (list_empty(&file->insn_list))
+@@ -3011,6 +3122,13 @@ int check(struct objtool_file *file)
+ goto out;
+ warnings += ret;
+
++ if (stats) {
++ printf("nr_insns_visited: %ld\n", nr_insns_visited);
++ printf("nr_cfi: %ld\n", nr_cfi);
++ printf("nr_cfi_reused: %ld\n", nr_cfi_reused);
++ printf("nr_cfi_cache: %ld\n", nr_cfi_cache);
++ }
++
+ out:
+ /*
+ * For now, don't fail the kernel build on fatal warnings. These
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -59,7 +59,7 @@ struct instruction {
+ struct list_head alts;
+ struct symbol *func;
+ struct list_head stack_ops;
+- struct cfi_state cfi;
++ struct cfi_state *cfi;
+ };
+
+ static inline bool is_static_jump(struct instruction *insn)
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -12,13 +12,19 @@
+ #include "check.h"
+ #include "warn.h"
+
+-static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
++static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi,
++ struct instruction *insn)
+ {
+- struct instruction *insn = container_of(cfi, struct instruction, cfi);
+ struct cfi_reg *bp = &cfi->regs[CFI_BP];
+
+ memset(orc, 0, sizeof(*orc));
+
++ if (!cfi) {
++ orc->end = 0;
++ orc->sp_reg = ORC_REG_UNDEFINED;
++ return 0;
++ }
++
+ orc->end = cfi->end;
+
+ if (cfi->cfa.base == CFI_UNDEFINED) {
+@@ -159,7 +165,7 @@ int orc_create(struct objtool_file *file
+ int i;
+
+ if (!alt_group) {
+- if (init_orc_entry(&orc, &insn->cfi))
++ if (init_orc_entry(&orc, insn->cfi, insn))
+ return -1;
+ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+ continue;
+@@ -183,7 +189,8 @@ int orc_create(struct objtool_file *file
+ struct cfi_state *cfi = alt_group->cfi[i];
+ if (!cfi)
+ continue;
+- if (init_orc_entry(&orc, cfi))
++ /* errors are reported on the original insn */
++ if (init_orc_entry(&orc, cfi, insn))
+ return -1;
+ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+ continue;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:12 +0100
+Subject: objtool: Keep track of retpoline call sites
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 43d5430ad74ef5156353af7aec352426ec7a8e57 upstream.
+
+Provide infrastructure for architectures to rewrite/augment compiler
+generated retpoline calls. Similar to what we do for static_call()s,
+keep track of the instructions that are retpoline calls.
+
+Use the same list_head, since a retpoline call cannot also be a
+static_call.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.130805730@infradead.org
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h | 2 ++
+ tools/objtool/check.c | 34 +++++++++++++++++++++++++++++-----
+ tools/objtool/check.h | 2 +-
+ tools/objtool/objtool.c | 1 +
+ tools/objtool/objtool.h | 1 +
+ 5 files changed, 34 insertions(+), 6 deletions(-)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -88,4 +88,6 @@ int arch_decode_hint_reg(struct instruct
+
+ bool arch_is_retpoline(struct symbol *sym);
+
++int arch_rewrite_retpolines(struct objtool_file *file);
++
+ #endif /* _ARCH_H */
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -451,7 +451,7 @@ static int create_static_call_sections(s
+ return 0;
+
+ idx = 0;
+- list_for_each_entry(insn, &file->static_call_list, static_call_node)
++ list_for_each_entry(insn, &file->static_call_list, call_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
+@@ -460,7 +460,7 @@ static int create_static_call_sections(s
+ return -1;
+
+ idx = 0;
+- list_for_each_entry(insn, &file->static_call_list, static_call_node) {
++ list_for_each_entry(insn, &file->static_call_list, call_node) {
+
+ site = (struct static_call_site *)sec->data->d_buf + idx;
+ memset(site, 0, sizeof(struct static_call_site));
+@@ -786,13 +786,16 @@ static int add_jump_destinations(struct
+ else
+ insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
+
++ list_add_tail(&insn->call_node,
++ &file->retpoline_call_list);
++
+ insn->retpoline_safe = true;
+ continue;
+ } else if (insn->func) {
+ /* internal or external sibling call (with reloc) */
+ insn->call_dest = reloc->sym;
+ if (insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->static_call_node,
++ list_add_tail(&insn->call_node,
+ &file->static_call_list);
+ }
+ continue;
+@@ -854,7 +857,7 @@ static int add_jump_destinations(struct
+ /* internal sibling call (without reloc) */
+ insn->call_dest = insn->jump_dest->func;
+ if (insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->static_call_node,
++ list_add_tail(&insn->call_node,
+ &file->static_call_list);
+ }
+ }
+@@ -938,6 +941,9 @@ static int add_call_destinations(struct
+ insn->type = INSN_CALL_DYNAMIC;
+ insn->retpoline_safe = true;
+
++ list_add_tail(&insn->call_node,
++ &file->retpoline_call_list);
++
+ remove_insn_ops(insn);
+ continue;
+
+@@ -945,7 +951,7 @@ static int add_call_destinations(struct
+ insn->call_dest = reloc->sym;
+
+ if (insn->call_dest && insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->static_call_node,
++ list_add_tail(&insn->call_node,
+ &file->static_call_list);
+ }
+
+@@ -1655,6 +1661,11 @@ static void mark_rodata(struct objtool_f
+ file->rodata = found;
+ }
+
++__weak int arch_rewrite_retpolines(struct objtool_file *file)
++{
++ return 0;
++}
++
+ static int decode_sections(struct objtool_file *file)
+ {
+ int ret;
+@@ -1683,6 +1694,10 @@ static int decode_sections(struct objtoo
+ if (ret)
+ return ret;
+
++ /*
++ * Must be before add_special_section_alts() as that depends on
++ * jump_dest being set.
++ */
+ ret = add_jump_destinations(file);
+ if (ret)
+ return ret;
+@@ -1719,6 +1734,15 @@ static int decode_sections(struct objtoo
+ if (ret)
+ return ret;
+
++ /*
++ * Must be after add_special_section_alts(), since this will emit
++ * alternatives. Must be after add_{jump,call}_destination(), since
++ * those create the call insn lists.
++ */
++ ret = arch_rewrite_retpolines(file);
++ if (ret)
++ return ret;
++
+ return 0;
+ }
+
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -39,7 +39,7 @@ struct alt_group {
+ struct instruction {
+ struct list_head list;
+ struct hlist_node hash;
+- struct list_head static_call_node;
++ struct list_head call_node;
+ struct section *sec;
+ unsigned long offset;
+ unsigned int len;
+--- a/tools/objtool/objtool.c
++++ b/tools/objtool/objtool.c
+@@ -61,6 +61,7 @@ struct objtool_file *objtool_open_read(c
+
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
++ INIT_LIST_HEAD(&file.retpoline_call_list);
+ INIT_LIST_HEAD(&file.static_call_list);
+ file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+ file.ignore_unreachables = no_unreachable;
+--- a/tools/objtool/objtool.h
++++ b/tools/objtool/objtool.h
+@@ -18,6 +18,7 @@ struct objtool_file {
+ struct elf *elf;
+ struct list_head insn_list;
+ DECLARE_HASHTABLE(insn_hash, 20);
++ struct list_head retpoline_call_list;
+ struct list_head static_call_list;
+ bool ignore_unreachables, c_file, hints, rodata;
+ };
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Joe Lawrence <joe.lawrence@redhat.com>
+Date: Sun, 22 Aug 2021 18:50:36 -0400
+Subject: objtool: Make .altinstructions section entry size consistent
+
+From: Joe Lawrence <joe.lawrence@redhat.com>
+
+commit dc02368164bd0ec603e3f5b3dd8252744a667b8a upstream.
+
+Commit e31694e0a7a7 ("objtool: Don't make .altinstructions writable")
+aligned objtool-created and kernel-created .altinstructions section
+flags, but there remains a minor discrepency in their use of a section
+entry size: objtool sets one while the kernel build does not.
+
+While sh_entsize of sizeof(struct alt_instr) seems intuitive, this small
+deviation can cause failures with external tooling (kpatch-build).
+
+Fix this by creating new .altinstructions sections with sh_entsize of 0
+and then later updating sec->sh_size as alternatives are added to the
+section. An added benefit is avoiding the data descriptor and buffer
+created by elf_create_section(), but previously unused by
+elf_add_alternative().
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/20210822225037.54620-2-joe.lawrence@redhat.com
+Cc: Andy Lavr <andy.lavr@gmail.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: x86@kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/x86/decode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -611,7 +611,7 @@ static int elf_add_alternative(struct el
+ sec = find_section_by_name(elf, ".altinstructions");
+ if (!sec) {
+ sec = elf_create_section(elf, ".altinstructions",
+- SHF_ALLOC, size, 0);
++ SHF_ALLOC, 0, 0);
+
+ if (!sec) {
+ WARN_ELF("elf_create_section");
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 10 Jun 2021 09:04:29 +0200
+Subject: objtool: Only rewrite unconditional retpoline thunk calls
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2d49b721dc18c113d5221f4cf5a6104eb66cb7f2 upstream.
+
+It turns out that the compilers generate conditional branches to the
+retpoline thunks like:
+
+ 5d5: 0f 85 00 00 00 00 jne 5db <cpuidle_reflect+0x22>
+ 5d7: R_X86_64_PLT32 __x86_indirect_thunk_r11-0x4
+
+while the rewrite can only handle JMP/CALL to the thunks. The result
+is the alternative wrecking the code. Make sure to skip writing the
+alternatives for conditional branches.
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Reported-by: Lukasz Majczak <lma@semihalf.com>
+Reported-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/x86/decode.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -674,6 +674,10 @@ int arch_rewrite_retpolines(struct objto
+
+ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+
++ if (insn->type != INSN_JUMP_DYNAMIC &&
++ insn->type != INSN_CALL_DYNAMIC)
++ continue;
++
+ if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+ continue;
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 3 Oct 2021 13:45:48 -0700
+Subject: objtool: print out the symbol type when complaining about it
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 7fab1c12bde926c5a8c7d5984c551d0854d7e0b3 upstream.
+
+The objtool warning that the kvm instruction emulation code triggered
+wasn't very useful:
+
+ arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception
+
+in that it helpfully tells you which symbol name it had trouble figuring
+out the relocation for, but it doesn't actually say what the unknown
+symbol type was that triggered it all.
+
+In this case it was because of missing type information (type 0, aka
+STT_NOTYPE), but on the whole it really should just have printed that
+out as part of the message.
+
+Because if this warning triggers, that's very much the first thing you
+want to know - why did reloc2sec_off() return failure for that symbol?
+
+So rather than just saying you can't handle some type of symbol without
+saying what the type _was_, just print out the type number too.
+
+Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types")
+Link: https://lore.kernel.org/lkml/CAHk-=wiZwq-0LknKhXN4M+T8jbxn_2i9mcKpO+OaBSSq_Eh7tg@mail.gmail.com/
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/special.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -106,8 +106,10 @@ static int get_alt_entry(struct elf *elf
+ return -1;
+ }
+ if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) {
+- WARN_FUNC("don't know how to handle reloc symbol type: %s",
+- sec, offset + entry->orig, orig_reloc->sym->name);
++ WARN_FUNC("don't know how to handle reloc symbol type %d: %s",
++ sec, offset + entry->orig,
++ orig_reloc->sym->type,
++ orig_reloc->sym->name);
+ return -1;
+ }
+
+@@ -128,8 +130,10 @@ static int get_alt_entry(struct elf *elf
+ return 1;
+
+ if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) {
+- WARN_FUNC("don't know how to handle reloc symbol type: %s",
+- sec, offset + entry->new, new_reloc->sym->name);
++ WARN_FUNC("don't know how to handle reloc symbol type %d: %s",
++ sec, offset + entry->new,
++ new_reloc->sym->type,
++ new_reloc->sym->name);
+ return -1;
+ }
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Fri, 24 Jun 2022 12:52:40 +0200
+Subject: objtool: Re-add UNWIND_HINT_{SAVE_RESTORE}
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 8faea26e611189e933ea2281975ff4dc7c1106b6 upstream.
+
+Commit
+
+ c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints")
+
+removed the save/restore unwind hints because they were no longer
+needed. Now they're going to be needed again so re-add them.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/unwind_hints.h | 12 +++++++++-
+ include/linux/objtool.h | 6 +++--
+ tools/include/linux/objtool.h | 6 +++--
+ tools/objtool/check.c | 40 ++++++++++++++++++++++++++++++++++++
+ tools/objtool/check.h | 1
+ 5 files changed, 59 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -8,11 +8,11 @@
+ #ifdef __ASSEMBLY__
+
+ .macro UNWIND_HINT_EMPTY
+- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
++ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
+ .endm
+
+ .macro UNWIND_HINT_ENTRY
+- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1
++ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
+ .endm
+
+ .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+@@ -56,6 +56,14 @@
+ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
+ .endm
+
++.macro UNWIND_HINT_SAVE
++ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
++.endm
++
++.macro UNWIND_HINT_RESTORE
++ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
++.endm
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _ASM_X86_UNWIND_HINTS_H */
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -40,6 +40,8 @@ struct unwind_hint {
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC 3
+ #define UNWIND_HINT_TYPE_ENTRY 4
++#define UNWIND_HINT_TYPE_SAVE 5
++#define UNWIND_HINT_TYPE_RESTORE 6
+
+ #ifdef CONFIG_STACK_VALIDATION
+
+@@ -102,7 +104,7 @@ struct unwind_hint {
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+@@ -126,7 +128,7 @@ struct unwind_hint {
+ #define STACK_FRAME_NON_STANDARD(func)
+ #else
+ #define ANNOTATE_INTRA_FUNCTION_CALL
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .endm
+ #endif
+
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -40,6 +40,8 @@ struct unwind_hint {
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC 3
+ #define UNWIND_HINT_TYPE_ENTRY 4
++#define UNWIND_HINT_TYPE_SAVE 5
++#define UNWIND_HINT_TYPE_RESTORE 6
+
+ #ifdef CONFIG_STACK_VALIDATION
+
+@@ -102,7 +104,7 @@ struct unwind_hint {
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+@@ -126,7 +128,7 @@ struct unwind_hint {
+ #define STACK_FRAME_NON_STANDARD(func)
+ #else
+ #define ANNOTATE_INTRA_FUNCTION_CALL
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .endm
+ #endif
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1752,6 +1752,17 @@ static int read_unwind_hints(struct objt
+
+ insn->hint = true;
+
++ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
++ insn->hint = false;
++ insn->save = true;
++ continue;
++ }
++
++ if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
++ insn->restore = true;
++ continue;
++ }
++
+ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+ struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
+
+@@ -2847,6 +2858,35 @@ static int validate_branch(struct objtoo
+ state.instr += insn->instr;
+
+ if (insn->hint) {
++ if (insn->restore) {
++ struct instruction *save_insn, *i;
++
++ i = insn;
++ save_insn = NULL;
++
++ sym_for_each_insn_continue_reverse(file, func, i) {
++ if (i->save) {
++ save_insn = i;
++ break;
++ }
++ }
++
++ if (!save_insn) {
++ WARN_FUNC("no corresponding CFI save for CFI restore",
++ sec, insn->offset);
++ return 1;
++ }
++
++ if (!save_insn->visited) {
++ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
++ sec, insn->offset);
++ return 1;
++ }
++
++ insn->cfi = save_insn->cfi;
++ nr_cfi_reused++;
++ }
++
+ state.cfi = *insn->cfi;
+ } else {
+ /* XXX track if we actually changed state.cfi */
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -47,6 +47,7 @@ struct instruction {
+ unsigned long immediate;
+ bool dead_end, ignore, ignore_alts;
+ bool hint;
++ bool save, restore;
+ bool retpoline_safe;
+ bool entry;
+ s8 instr;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 17 Dec 2020 15:02:42 -0600
+Subject: objtool: Refactor ORC section generation
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit ab4e0744e99b87e1a223e89fc3c9ae44f727c9a6 upstream.
+
+Decouple ORC entries from instructions. This simplifies the
+control/data flow, and is going to make it easier to support alternative
+instructions which change the stack layout.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/Makefile | 4
+ tools/objtool/arch.h | 4
+ tools/objtool/builtin-orc.c | 6
+ tools/objtool/check.h | 3
+ tools/objtool/objtool.h | 3
+ tools/objtool/orc_gen.c | 274 ++++++++++++++++++++++----------------------
+ tools/objtool/weak.c | 7 -
+ 7 files changed, 141 insertions(+), 160 deletions(-)
+
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -46,10 +46,6 @@ ifeq ($(SRCARCH),x86)
+ SUBCMD_ORC := y
+ endif
+
+-ifeq ($(SUBCMD_ORC),y)
+- CFLAGS += -DINSN_USE_ORC
+-endif
+-
+ export SUBCMD_CHECK SUBCMD_ORC
+ export srctree OUTPUT CFLAGS SRCARCH AWK
+ include $(srctree)/tools/build/Makefile.include
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -11,10 +11,6 @@
+ #include "objtool.h"
+ #include "cfi.h"
+
+-#ifdef INSN_USE_ORC
+-#include <asm/orc_types.h>
+-#endif
+-
+ enum insn_type {
+ INSN_JUMP_CONDITIONAL,
+ INSN_JUMP_UNCONDITIONAL,
+--- a/tools/objtool/builtin-orc.c
++++ b/tools/objtool/builtin-orc.c
+@@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv)
+ if (list_empty(&file->insn_list))
+ return 0;
+
+- ret = create_orc(file);
+- if (ret)
+- return ret;
+-
+- ret = create_orc_sections(file);
++ ret = orc_create(file);
+ if (ret)
+ return ret;
+
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -43,9 +43,6 @@ struct instruction {
+ struct symbol *func;
+ struct list_head stack_ops;
+ struct cfi_state cfi;
+-#ifdef INSN_USE_ORC
+- struct orc_entry orc;
+-#endif
+ };
+
+ static inline bool is_static_jump(struct instruction *insn)
+--- a/tools/objtool/objtool.h
++++ b/tools/objtool/objtool.h
+@@ -26,7 +26,6 @@ struct objtool_file *objtool_open_read(c
+
+ int check(struct objtool_file *file);
+ int orc_dump(const char *objname);
+-int create_orc(struct objtool_file *file);
+-int create_orc_sections(struct objtool_file *file);
++int orc_create(struct objtool_file *file);
+
+ #endif /* _OBJTOOL_H */
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -12,89 +12,84 @@
+ #include "check.h"
+ #include "warn.h"
+
+-int create_orc(struct objtool_file *file)
++static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
+ {
+- struct instruction *insn;
++ struct instruction *insn = container_of(cfi, struct instruction, cfi);
++ struct cfi_reg *bp = &cfi->regs[CFI_BP];
+
+- for_each_insn(file, insn) {
+- struct orc_entry *orc = &insn->orc;
+- struct cfi_reg *cfa = &insn->cfi.cfa;
+- struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
++ memset(orc, 0, sizeof(*orc));
+
+- if (!insn->sec->text)
+- continue;
+-
+- orc->end = insn->cfi.end;
+-
+- if (cfa->base == CFI_UNDEFINED) {
+- orc->sp_reg = ORC_REG_UNDEFINED;
+- continue;
+- }
+-
+- switch (cfa->base) {
+- case CFI_SP:
+- orc->sp_reg = ORC_REG_SP;
+- break;
+- case CFI_SP_INDIRECT:
+- orc->sp_reg = ORC_REG_SP_INDIRECT;
+- break;
+- case CFI_BP:
+- orc->sp_reg = ORC_REG_BP;
+- break;
+- case CFI_BP_INDIRECT:
+- orc->sp_reg = ORC_REG_BP_INDIRECT;
+- break;
+- case CFI_R10:
+- orc->sp_reg = ORC_REG_R10;
+- break;
+- case CFI_R13:
+- orc->sp_reg = ORC_REG_R13;
+- break;
+- case CFI_DI:
+- orc->sp_reg = ORC_REG_DI;
+- break;
+- case CFI_DX:
+- orc->sp_reg = ORC_REG_DX;
+- break;
+- default:
+- WARN_FUNC("unknown CFA base reg %d",
+- insn->sec, insn->offset, cfa->base);
+- return -1;
+- }
++ orc->end = cfi->end;
+
+- switch(bp->base) {
+- case CFI_UNDEFINED:
+- orc->bp_reg = ORC_REG_UNDEFINED;
+- break;
+- case CFI_CFA:
+- orc->bp_reg = ORC_REG_PREV_SP;
+- break;
+- case CFI_BP:
+- orc->bp_reg = ORC_REG_BP;
+- break;
+- default:
+- WARN_FUNC("unknown BP base reg %d",
+- insn->sec, insn->offset, bp->base);
+- return -1;
+- }
++ if (cfi->cfa.base == CFI_UNDEFINED) {
++ orc->sp_reg = ORC_REG_UNDEFINED;
++ return 0;
++ }
++
++ switch (cfi->cfa.base) {
++ case CFI_SP:
++ orc->sp_reg = ORC_REG_SP;
++ break;
++ case CFI_SP_INDIRECT:
++ orc->sp_reg = ORC_REG_SP_INDIRECT;
++ break;
++ case CFI_BP:
++ orc->sp_reg = ORC_REG_BP;
++ break;
++ case CFI_BP_INDIRECT:
++ orc->sp_reg = ORC_REG_BP_INDIRECT;
++ break;
++ case CFI_R10:
++ orc->sp_reg = ORC_REG_R10;
++ break;
++ case CFI_R13:
++ orc->sp_reg = ORC_REG_R13;
++ break;
++ case CFI_DI:
++ orc->sp_reg = ORC_REG_DI;
++ break;
++ case CFI_DX:
++ orc->sp_reg = ORC_REG_DX;
++ break;
++ default:
++ WARN_FUNC("unknown CFA base reg %d",
++ insn->sec, insn->offset, cfi->cfa.base);
++ return -1;
++ }
+
+- orc->sp_offset = cfa->offset;
+- orc->bp_offset = bp->offset;
+- orc->type = insn->cfi.type;
++ switch (bp->base) {
++ case CFI_UNDEFINED:
++ orc->bp_reg = ORC_REG_UNDEFINED;
++ break;
++ case CFI_CFA:
++ orc->bp_reg = ORC_REG_PREV_SP;
++ break;
++ case CFI_BP:
++ orc->bp_reg = ORC_REG_BP;
++ break;
++ default:
++ WARN_FUNC("unknown BP base reg %d",
++ insn->sec, insn->offset, bp->base);
++ return -1;
+ }
+
++ orc->sp_offset = cfi->cfa.offset;
++ orc->bp_offset = bp->offset;
++ orc->type = cfi->type;
++
+ return 0;
+ }
+
+-static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec,
+- unsigned int idx, struct section *insn_sec,
+- unsigned long insn_off, struct orc_entry *o)
++static int write_orc_entry(struct elf *elf, struct section *orc_sec,
++ struct section *ip_rsec, unsigned int idx,
++ struct section *insn_sec, unsigned long insn_off,
++ struct orc_entry *o)
+ {
+ struct orc_entry *orc;
+ struct reloc *reloc;
+
+ /* populate ORC data */
+- orc = (struct orc_entry *)u_sec->data->d_buf + idx;
++ orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
+ memcpy(orc, o, sizeof(*orc));
+
+ /* populate reloc for ip */
+@@ -114,102 +109,109 @@ static int create_orc_entry(struct elf *
+
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(int);
+- reloc->sec = ip_relocsec;
++ reloc->sec = ip_rsec;
+
+ elf_add_reloc(elf, reloc);
+
+ return 0;
+ }
+
+-int create_orc_sections(struct objtool_file *file)
++struct orc_list_entry {
++ struct list_head list;
++ struct orc_entry orc;
++ struct section *insn_sec;
++ unsigned long insn_off;
++};
++
++static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc,
++ struct section *sec, unsigned long offset)
++{
++ struct orc_list_entry *entry = malloc(sizeof(*entry));
++
++ if (!entry) {
++ WARN("malloc failed");
++ return -1;
++ }
++
++ entry->orc = *orc;
++ entry->insn_sec = sec;
++ entry->insn_off = offset;
++
++ list_add_tail(&entry->list, orc_list);
++ return 0;
++}
++
++int orc_create(struct objtool_file *file)
+ {
+- struct instruction *insn, *prev_insn;
+- struct section *sec, *u_sec, *ip_relocsec;
+- unsigned int idx;
++ struct section *sec, *ip_rsec, *orc_sec;
++ unsigned int nr = 0, idx = 0;
++ struct orc_list_entry *entry;
++ struct list_head orc_list;
+
+- struct orc_entry empty = {
+- .sp_reg = ORC_REG_UNDEFINED,
++ struct orc_entry null = {
++ .sp_reg = ORC_REG_UNDEFINED,
+ .bp_reg = ORC_REG_UNDEFINED,
+ .type = UNWIND_HINT_TYPE_CALL,
+ };
+
+- sec = find_section_by_name(file->elf, ".orc_unwind");
+- if (sec) {
+- WARN("file already has .orc_unwind section, skipping");
+- return -1;
+- }
+-
+- /* count the number of needed orcs */
+- idx = 0;
++ /* Build a deduplicated list of ORC entries: */
++ INIT_LIST_HEAD(&orc_list);
+ for_each_sec(file, sec) {
++ struct orc_entry orc, prev_orc = {0};
++ struct instruction *insn;
++ bool empty = true;
++
+ if (!sec->text)
+ continue;
+
+- prev_insn = NULL;
+ sec_for_each_insn(file, sec, insn) {
+- if (!prev_insn ||
+- memcmp(&insn->orc, &prev_insn->orc,
+- sizeof(struct orc_entry))) {
+- idx++;
+- }
+- prev_insn = insn;
++ if (init_orc_entry(&orc, &insn->cfi))
++ return -1;
++ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
++ continue;
++ if (orc_list_add(&orc_list, &orc, sec, insn->offset))
++ return -1;
++ nr++;
++ prev_orc = orc;
++ empty = false;
+ }
+
+- /* section terminator */
+- if (prev_insn)
+- idx++;
++ /* Add a section terminator */
++ if (!empty) {
++ orc_list_add(&orc_list, &null, sec, sec->len);
++ nr++;
++ }
+ }
+- if (!idx)
+- return -1;
++ if (!nr)
++ return 0;
+
++ /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */
++ sec = find_section_by_name(file->elf, ".orc_unwind");
++ if (sec) {
++ WARN("file already has .orc_unwind section, skipping");
++ return -1;
++ }
++ orc_sec = elf_create_section(file->elf, ".orc_unwind", 0,
++ sizeof(struct orc_entry), nr);
++ if (!orc_sec)
++ return -1;
+
+- /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
+- sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
++ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+ if (!sec)
+ return -1;
+-
+- ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+- if (!ip_relocsec)
++ ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
++ if (!ip_rsec)
+ return -1;
+
+- /* create .orc_unwind section */
+- u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
+- sizeof(struct orc_entry), idx);
+-
+- /* populate sections */
+- idx = 0;
+- for_each_sec(file, sec) {
+- if (!sec->text)
+- continue;
+-
+- prev_insn = NULL;
+- sec_for_each_insn(file, sec, insn) {
+- if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
+- sizeof(struct orc_entry))) {
+-
+- if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
+- insn->sec, insn->offset,
+- &insn->orc))
+- return -1;
+-
+- idx++;
+- }
+- prev_insn = insn;
+- }
+-
+- /* section terminator */
+- if (prev_insn) {
+- if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
+- prev_insn->sec,
+- prev_insn->offset + prev_insn->len,
+- &empty))
+- return -1;
+-
+- idx++;
+- }
++ /* Write ORC entries to sections: */
++ list_for_each_entry(entry, &orc_list, list) {
++ if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
++ entry->insn_sec, entry->insn_off,
++ &entry->orc))
++ return -1;
+ }
+
+- if (elf_rebuild_reloc_section(file->elf, ip_relocsec))
++ if (elf_rebuild_reloc_section(file->elf, ip_rsec))
+ return -1;
+
+ return 0;
+--- a/tools/objtool/weak.c
++++ b/tools/objtool/weak.c
+@@ -25,12 +25,7 @@ int __weak orc_dump(const char *_objname
+ UNSUPPORTED("orc");
+ }
+
+-int __weak create_orc(struct objtool_file *file)
+-{
+- UNSUPPORTED("orc");
+-}
+-
+-int __weak create_orc_sections(struct objtool_file *file)
++int __weak orc_create(struct objtool_file *file)
+ {
+ UNSUPPORTED("orc");
+ }
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 4 Oct 2021 10:07:50 -0700
+Subject: objtool: Remove reloc symbol type checks in get_alt_entry()
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 4d8b35968bbf9e42b6b202eedb510e2c82ad8b38 upstream.
+
+Converting a special section's relocation reference to a symbol is
+straightforward. No need for objtool to complain that it doesn't know
+how to handle it. Just handle it.
+
+This fixes the following warning:
+
+ arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception
+
+Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types")
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/feadbc3dfb3440d973580fad8d3db873cbfe1694.1633367242.git.jpoimboe@redhat.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: x86@kernel.org
+Cc: Miroslav Benes <mbenes@suse.cz>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/special.c | 36 +++++++-----------------------------
+ 1 file changed, 7 insertions(+), 29 deletions(-)
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -55,22 +55,11 @@ void __weak arch_handle_alternative(unsi
+ {
+ }
+
+-static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off)
++static void reloc_to_sec_off(struct reloc *reloc, struct section **sec,
++ unsigned long *off)
+ {
+- switch (reloc->sym->type) {
+- case STT_FUNC:
+- *sec = reloc->sym->sec;
+- *off = reloc->sym->offset + reloc->addend;
+- return true;
+-
+- case STT_SECTION:
+- *sec = reloc->sym->sec;
+- *off = reloc->addend;
+- return true;
+-
+- default:
+- return false;
+- }
++ *sec = reloc->sym->sec;
++ *off = reloc->sym->offset + reloc->addend;
+ }
+
+ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
+@@ -105,13 +94,8 @@ static int get_alt_entry(struct elf *elf
+ WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
+ return -1;
+ }
+- if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) {
+- WARN_FUNC("don't know how to handle reloc symbol type %d: %s",
+- sec, offset + entry->orig,
+- orig_reloc->sym->type,
+- orig_reloc->sym->name);
+- return -1;
+- }
++
++ reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off);
+
+ if (!entry->group || alt->new_len) {
+ new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
+@@ -129,13 +113,7 @@ static int get_alt_entry(struct elf *elf
+ if (arch_is_retpoline(new_reloc->sym))
+ return 1;
+
+- if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) {
+- WARN_FUNC("don't know how to handle reloc symbol type %d: %s",
+- sec, offset + entry->new,
+- new_reloc->sym->type,
+- new_reloc->sym->name);
+- return -1;
+- }
++ reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off);
+
+ /* _ASM_EXTABLE_EX hack */
+ if (alt->new_off >= 0x7ffffff0)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:06 +0100
+Subject: objtool: Rework the elf_rebuild_reloc_section() logic
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3a647607b57ad8346e659ddd3b951ac292c83690 upstream.
+
+Instead of manually calling elf_rebuild_reloc_section() on sections
+we've called elf_add_reloc() on, have elf_write() DTRT.
+
+This makes it easier to add random relocations in places without
+carefully tracking when we're done and need to flush what section.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151259.754213408@infradead.org
+[bwh: Backported to 5.10: drop changes in create_mcount_loc_sections()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 3 ---
+ tools/objtool/elf.c | 20 ++++++++++++++------
+ tools/objtool/elf.h | 1 -
+ tools/objtool/orc_gen.c | 3 ---
+ 4 files changed, 14 insertions(+), 13 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -542,9 +542,6 @@ static int create_static_call_sections(s
+ idx++;
+ }
+
+- if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+- return -1;
+-
+ return 0;
+ }
+
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -530,6 +530,8 @@ void elf_add_reloc(struct elf *elf, stru
+
+ list_add_tail(&reloc->list, &sec->reloc_list);
+ elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
++
++ sec->changed = true;
+ }
+
+ static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
+@@ -609,7 +611,9 @@ static int read_relocs(struct elf *elf)
+ return -1;
+ }
+
+- elf_add_reloc(elf, reloc);
++ list_add_tail(&reloc->list, &sec->reloc_list);
++ elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
++
+ nr_reloc++;
+ }
+ max_reloc = max(max_reloc, nr_reloc);
+@@ -920,14 +924,11 @@ static int elf_rebuild_rela_reloc_sectio
+ return 0;
+ }
+
+-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
++static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
+ {
+ struct reloc *reloc;
+ int nr;
+
+- sec->changed = true;
+- elf->changed = true;
+-
+ nr = 0;
+ list_for_each_entry(reloc, &sec->reloc_list, list)
+ nr++;
+@@ -991,9 +992,15 @@ int elf_write(struct elf *elf)
+ struct section *sec;
+ Elf_Scn *s;
+
+- /* Update section headers for changed sections: */
++ /* Update changed relocation sections and section headers: */
+ list_for_each_entry(sec, &elf->sections, list) {
+ if (sec->changed) {
++ if (sec->base &&
++ elf_rebuild_reloc_section(elf, sec)) {
++ WARN("elf_rebuild_reloc_section");
++ return -1;
++ }
++
+ s = elf_getscn(elf->elf, sec->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+@@ -1005,6 +1012,7 @@ int elf_write(struct elf *elf)
+ }
+
+ sec->changed = false;
++ elf->changed = true;
+ }
+ }
+
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -142,7 +142,6 @@ struct reloc *find_reloc_by_dest_range(c
+ struct symbol *find_func_containing(struct section *sec, unsigned long offset);
+ void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
+ struct reloc *reloc);
+-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
+
+ #define for_each_sec(file, sec) \
+ list_for_each_entry(sec, &file->elf->sections, list)
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -251,8 +251,5 @@ int orc_create(struct objtool_file *file
+ return -1;
+ }
+
+- if (elf_rebuild_reloc_section(file->elf, ip_rsec))
+- return -1;
+-
+ return 0;
+ }
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:14 +0100
+Subject: objtool: Skip magical retpoline .altinstr_replacement
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 50e7b4a1a1b264fc7df0698f2defb93cadf19a7b upstream.
+
+When the .altinstr_replacement is a retpoline, skip the alternative.
+We already special case retpolines anyway.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.259429287@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/special.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -104,6 +104,14 @@ static int get_alt_entry(struct elf *elf
+ return -1;
+ }
+
++ /*
++ * Skip retpoline .altinstr_replacement... we already rewrite the
++ * instructions for retpolines anyway, see arch_is_retpoline()
++ * usage in add_{call,jump}_destinations().
++ */
++ if (arch_is_retpoline(new_reloc->sym))
++ return 1;
++
+ alt->new_sec = new_reloc->sym->sec;
+ alt->new_off = (unsigned int)new_reloc->addend;
+
+@@ -152,7 +160,9 @@ int special_get_alts(struct elf *elf, st
+ memset(alt, 0, sizeof(*alt));
+
+ ret = get_alt_entry(elf, entry, sec, idx, alt);
+- if (ret)
++ if (ret > 0)
++ continue;
++ if (ret < 0)
+ return ret;
+
+ list_add_tail(&alt->list, alts);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Fri, 1 Jul 2022 09:00:45 -0300
+Subject: objtool: skip non-text sections when adding return-thunk sites
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+The .discard.text section is added in order to reserve BRK, with a
+temporary function just so it can give it a size. This adds a relocation to
+the return thunk, which objtool will add to the .return_sites section.
+Linking will then fail as there are references to the .discard.text
+section.
+
+Do not add instructions from non-text sections to the list of return thunk
+calls, avoiding the reference to .discard.text.
+
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1090,7 +1090,9 @@ static void add_return_call(struct objto
+ insn->type = INSN_RETURN;
+ insn->retpoline_safe = true;
+
+- list_add_tail(&insn->call_node, &file->return_thunk_list);
++ /* Skip the non-text sections, specially .discard ones */
++ if (insn->sec->text)
++ list_add_tail(&insn->call_node, &file->return_thunk_list);
+ }
+
+ /*
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Wed, 24 Feb 2021 10:29:14 -0600
+Subject: objtool: Support asm jump tables
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 99033461e685b48549ec77608b4bda75ddf772ce upstream.
+
+Objtool detection of asm jump tables would normally just work, except
+for the fact that asm retpolines use alternatives. Objtool thinks the
+alternative code path (a jump to the retpoline) is a sibling call.
+
+Don't treat alternative indirect branches as sibling calls when the
+original instruction has a jump table.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Ard Biesheuvel <ardb@kernel.org>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Tested-by: Sami Tolvanen <samitolvanen@google.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Link: https://lore.kernel.org/r/460cf4dc675d64e1124146562cabd2c05aa322e8.1614182415.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -107,6 +107,18 @@ static struct instruction *prev_insn_sam
+ for (insn = next_insn_same_sec(file, insn); insn; \
+ insn = next_insn_same_sec(file, insn))
+
++static bool is_jump_table_jump(struct instruction *insn)
++{
++ struct alt_group *alt_group = insn->alt_group;
++
++ if (insn->jump_table)
++ return true;
++
++ /* Retpoline alternative for a jump table? */
++ return alt_group && alt_group->orig_group &&
++ alt_group->orig_group->first_insn->jump_table;
++}
++
+ static bool is_sibling_call(struct instruction *insn)
+ {
+ /*
+@@ -119,7 +131,7 @@ static bool is_sibling_call(struct instr
+
+ /* An indirect jump is either a sibling call or a jump to a table. */
+ if (insn->type == INSN_JUMP_DYNAMIC)
+- return list_empty(&insn->alts);
++ return !is_jump_table_jump(insn);
+
+ /* add_jump_destinations() sets insn->call_dest for sibling calls. */
+ return (is_static_jump(insn) && insn->call_dest);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:20 -0600
+Subject: objtool: Support retpoline jump detection for vmlinux.o
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 31a7424bc58063a8e0466c3c10f31a52ec2be4f6 upstream.
+
+Objtool converts direct retpoline jumps to type INSN_JUMP_DYNAMIC, since
+that's what they are semantically.
+
+That conversion doesn't work in vmlinux.o validation because the
+indirect thunk function is present in the object, so the intra-object
+jump check succeeds before the retpoline jump check gets a chance.
+
+Rearrange the checks: check for a retpoline jump before checking for an
+intra-object jump.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/4302893513770dde68ddc22a9d6a2a04aca491dd.1611263461.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -795,10 +795,6 @@ static int add_jump_destinations(struct
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_sec = reloc->sym->sec;
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+- } else if (reloc->sym->sec->idx) {
+- dest_sec = reloc->sym->sec;
+- dest_off = reloc->sym->sym.st_value +
+- arch_dest_reloc_offset(reloc->addend);
+ } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
+ !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
+ /*
+@@ -812,6 +808,10 @@ static int add_jump_destinations(struct
+
+ insn->retpoline_safe = true;
+ continue;
++ } else if (reloc->sym->sec->idx) {
++ dest_sec = reloc->sym->sec;
++ dest_off = reloc->sym->sym.st_value +
++ arch_dest_reloc_offset(reloc->addend);
+ } else {
+ /* external sibling call */
+ insn->call_dest = reloc->sym;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 18 Dec 2020 14:26:21 -0600
+Subject: objtool: Support stack layout changes in alternatives
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit c9c324dc22aab1687da37001b321b6dfa93a0699 upstream.
+
+The ORC unwinder showed a warning [1] which revealed the stack layout
+didn't match what was expected. The problem was that paravirt patching
+had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
+the stack layout between the PUSHF and the POP, so unwinding from an
+interrupt which occurred between those two instructions would fail.
+
+Part of the agreed upon solution was to rework the custom paravirt
+patching code to use alternatives instead, since objtool already knows
+how to read alternatives (and converging runtime patching infrastructure
+is always a good thing anyway). But the main problem still remains,
+which is that runtime patching can change the stack layout.
+
+Making stack layout changes in alternatives was disallowed with commit
+7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
+is going to be doing it, it needs to be supported.
+
+One way to do so would be to modify the ORC table when the code gets
+patched. But ORC is simple -- a good thing! -- and it's best to leave
+it alone.
+
+Instead, support stack layout changes by "flattening" all possible stack
+states (CFI) from parallel alternative code streams into a single set of
+linear states. The only necessary limitation is that CFI conflicts are
+disallowed at all possible instruction boundaries.
+
+For example, this scenario is allowed:
+
+ Alt1 Alt2 Alt3
+
+ 0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
+ 0x01 POP %RAX
+ 0x02 NOP
+ ...
+ 0x05 NOP
+ ...
+ 0x07 <insn>
+
+The unwind information for offset-0x00 is identical for all 3
+alternatives. Similarly offset-0x05 and higher also are identical (and
+the same as 0x00). However offset-0x01 has deviating CFI, but that is
+only relevant for Alt3, neither of the other alternative instruction
+streams will ever hit that offset.
+
+This scenario is NOT allowed:
+
+ Alt1 Alt2
+
+ 0x00 CALL *pv_ops.save_fl PUSHF
+ 0x01 NOP6
+ ...
+ 0x07 NOP POP %RAX
+
+The problem here is that offset-0x7, which is an instruction boundary in
+both possible instruction patch streams, has two conflicting stack
+layouts.
+
+[ The above examples were stolen from Peter Zijlstra. ]
+
+The new flattened CFI array is used both for the detection of conflicts
+(like the second example above) and the generation of linear ORC
+entries.
+
+BTW, another benefit of these changes is that, thanks to some related
+cleanups (new fake nops and alt_group struct) objtool can finally be rid
+of fake jumps, which were a constant source of headaches.
+
+[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
+
+Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/Documentation/stack-validation.txt | 14 -
+ tools/objtool/check.c | 196 +++++++++++------------
+ tools/objtool/check.h | 6
+ tools/objtool/orc_gen.c | 56 +++++-
+ 4 files changed, 160 insertions(+), 112 deletions(-)
+
+--- a/tools/objtool/Documentation/stack-validation.txt
++++ b/tools/objtool/Documentation/stack-validation.txt
+@@ -315,13 +315,15 @@ they mean, and suggestions for how to fi
+ function tracing inserts additional calls, which is not obvious from the
+ sources).
+
+-10. file.o: warning: func()+0x5c: alternative modifies stack
++10. file.o: warning: func()+0x5c: stack layout conflict in alternatives
+
+- This means that an alternative includes instructions that modify the
+- stack. The problem is that there is only one ORC unwind table, this means
+- that the ORC unwind entries must be valid for each of the alternatives.
+- The easiest way to enforce this is to ensure alternatives do not contain
+- any ORC entries, which in turn implies the above constraint.
++ This means that in the use of the alternative() or ALTERNATIVE()
++ macro, the code paths have conflicting modifications to the stack.
++ The problem is that there is only one ORC unwind table, which means
++ that the ORC unwind entries must be consistent for all possible
++ instruction boundaries regardless of which code has been patched.
++ This limitation can be overcome by massaging the alternatives with
++ NOPs to shift the stack changes around so they no longer conflict.
+
+ 11. file.o: warning: unannotated intra-function call
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -19,8 +19,6 @@
+ #include <linux/kernel.h>
+ #include <linux/static_call_types.h>
+
+-#define FAKE_JUMP_OFFSET -1
+-
+ struct alternative {
+ struct list_head list;
+ struct instruction *insn;
+@@ -789,9 +787,6 @@ static int add_jump_destinations(struct
+ if (!is_static_jump(insn))
+ continue;
+
+- if (insn->offset == FAKE_JUMP_OFFSET)
+- continue;
+-
+ reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (!reloc) {
+@@ -991,28 +986,15 @@ static int add_call_destinations(struct
+ }
+
+ /*
+- * The .alternatives section requires some extra special care, over and above
+- * what other special sections require:
+- *
+- * 1. Because alternatives are patched in-place, we need to insert a fake jump
+- * instruction at the end so that validate_branch() skips all the original
+- * replaced instructions when validating the new instruction path.
+- *
+- * 2. An added wrinkle is that the new instruction length might be zero. In
+- * that case the old instructions are replaced with noops. We simulate that
+- * by creating a fake jump as the only new instruction.
+- *
+- * 3. In some cases, the alternative section includes an instruction which
+- * conditionally jumps to the _end_ of the entry. We have to modify these
+- * jumps' destinations to point back to .text rather than the end of the
+- * entry in .altinstr_replacement.
++ * The .alternatives section requires some extra special care over and above
++ * other special sections because alternatives are patched in place.
+ */
+ static int handle_group_alt(struct objtool_file *file,
+ struct special_alt *special_alt,
+ struct instruction *orig_insn,
+ struct instruction **new_insn)
+ {
+- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
++ struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+ struct alt_group *orig_alt_group, *new_alt_group;
+ unsigned long dest_off;
+
+@@ -1022,6 +1004,13 @@ static int handle_group_alt(struct objto
+ WARN("malloc failed");
+ return -1;
+ }
++ orig_alt_group->cfi = calloc(special_alt->orig_len,
++ sizeof(struct cfi_state *));
++ if (!orig_alt_group->cfi) {
++ WARN("calloc failed");
++ return -1;
++ }
++
+ last_orig_insn = NULL;
+ insn = orig_insn;
+ sec_for_each_insn_from(file, insn) {
+@@ -1035,42 +1024,45 @@ static int handle_group_alt(struct objto
+ orig_alt_group->first_insn = orig_insn;
+ orig_alt_group->last_insn = last_orig_insn;
+
+- if (next_insn_same_sec(file, last_orig_insn)) {
+- fake_jump = malloc(sizeof(*fake_jump));
+- if (!fake_jump) {
+- WARN("malloc failed");
+- return -1;
+- }
+- memset(fake_jump, 0, sizeof(*fake_jump));
+- INIT_LIST_HEAD(&fake_jump->alts);
+- INIT_LIST_HEAD(&fake_jump->stack_ops);
+- init_cfi_state(&fake_jump->cfi);
+-
+- fake_jump->sec = special_alt->new_sec;
+- fake_jump->offset = FAKE_JUMP_OFFSET;
+- fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+- fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+- fake_jump->func = orig_insn->func;
++
++ new_alt_group = malloc(sizeof(*new_alt_group));
++ if (!new_alt_group) {
++ WARN("malloc failed");
++ return -1;
+ }
+
+- if (!special_alt->new_len) {
+- if (!fake_jump) {
+- WARN("%s: empty alternative at end of section",
+- special_alt->orig_sec->name);
++ if (special_alt->new_len < special_alt->orig_len) {
++ /*
++ * Insert a fake nop at the end to make the replacement
++ * alt_group the same size as the original. This is needed to
++ * allow propagate_alt_cfi() to do its magic. When the last
++ * instruction affects the stack, the instruction after it (the
++ * nop) will propagate the new state to the shared CFI array.
++ */
++ nop = malloc(sizeof(*nop));
++ if (!nop) {
++ WARN("malloc failed");
+ return -1;
+ }
+-
+- *new_insn = fake_jump;
+- return 0;
++ memset(nop, 0, sizeof(*nop));
++ INIT_LIST_HEAD(&nop->alts);
++ INIT_LIST_HEAD(&nop->stack_ops);
++ init_cfi_state(&nop->cfi);
++
++ nop->sec = special_alt->new_sec;
++ nop->offset = special_alt->new_off + special_alt->new_len;
++ nop->len = special_alt->orig_len - special_alt->new_len;
++ nop->type = INSN_NOP;
++ nop->func = orig_insn->func;
++ nop->alt_group = new_alt_group;
++ nop->ignore = orig_insn->ignore_alts;
+ }
+
+- new_alt_group = malloc(sizeof(*new_alt_group));
+- if (!new_alt_group) {
+- WARN("malloc failed");
+- return -1;
++ if (!special_alt->new_len) {
++ *new_insn = nop;
++ goto end;
+ }
+
+- last_new_insn = NULL;
+ insn = *new_insn;
+ sec_for_each_insn_from(file, insn) {
+ struct reloc *alt_reloc;
+@@ -1109,14 +1101,8 @@ static int handle_group_alt(struct objto
+ continue;
+
+ dest_off = arch_jump_destination(insn);
+- if (dest_off == special_alt->new_off + special_alt->new_len) {
+- if (!fake_jump) {
+- WARN("%s: alternative jump to end of section",
+- special_alt->orig_sec->name);
+- return -1;
+- }
+- insn->jump_dest = fake_jump;
+- }
++ if (dest_off == special_alt->new_off + special_alt->new_len)
++ insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
+
+ if (!insn->jump_dest) {
+ WARN_FUNC("can't find alternative jump destination",
+@@ -1131,13 +1117,13 @@ static int handle_group_alt(struct objto
+ return -1;
+ }
+
++ if (nop)
++ list_add(&nop->list, &last_new_insn->list);
++end:
+ new_alt_group->orig_group = orig_alt_group;
+ new_alt_group->first_insn = *new_insn;
+- new_alt_group->last_insn = last_new_insn;
+-
+- if (fake_jump)
+- list_add(&fake_jump->list, &last_new_insn->list);
+-
++ new_alt_group->last_insn = nop ? : last_new_insn;
++ new_alt_group->cfi = orig_alt_group->cfi;
+ return 0;
+ }
+
+@@ -2237,22 +2223,47 @@ static int update_cfi_state(struct instr
+ return 0;
+ }
+
+-static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
++/*
++ * The stack layouts of alternatives instructions can sometimes diverge when
++ * they have stack modifications. That's fine as long as the potential stack
++ * layouts don't conflict at any given potential instruction boundary.
++ *
++ * Flatten the CFIs of the different alternative code streams (both original
++ * and replacement) into a single shared CFI array which can be used to detect
++ * conflicts and nicely feed a linear array of ORC entries to the unwinder.
++ */
++static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn)
+ {
+- struct stack_op *op;
++ struct cfi_state **alt_cfi;
++ int group_off;
+
+- list_for_each_entry(op, &insn->stack_ops, list) {
+- struct cfi_state old_cfi = state->cfi;
+- int res;
++ if (!insn->alt_group)
++ return 0;
+
+- res = update_cfi_state(insn, &state->cfi, op);
+- if (res)
+- return res;
++ alt_cfi = insn->alt_group->cfi;
++ group_off = insn->offset - insn->alt_group->first_insn->offset;
+
+- if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
+- WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
++ if (!alt_cfi[group_off]) {
++ alt_cfi[group_off] = &insn->cfi;
++ } else {
++ if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
++ WARN_FUNC("stack layout conflict in alternatives",
++ insn->sec, insn->offset);
+ return -1;
+ }
++ }
++
++ return 0;
++}
++
++static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
++{
++ struct stack_op *op;
++
++ list_for_each_entry(op, &insn->stack_ops, list) {
++
++ if (update_cfi_state(insn, &state->cfi, op))
++ return 1;
+
+ if (op->dest.type == OP_DEST_PUSHF) {
+ if (!state->uaccess_stack) {
+@@ -2442,28 +2453,20 @@ static int validate_return(struct symbol
+ return 0;
+ }
+
+-/*
+- * Alternatives should not contain any ORC entries, this in turn means they
+- * should not contain any CFI ops, which implies all instructions should have
+- * the same same CFI state.
+- *
+- * It is possible to constuct alternatives that have unreachable holes that go
+- * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
+- * states which then results in ORC entries, which we just said we didn't want.
+- *
+- * Avoid them by copying the CFI entry of the first instruction into the whole
+- * alternative.
+- */
+-static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
++static struct instruction *next_insn_to_validate(struct objtool_file *file,
++ struct instruction *insn)
+ {
+- struct instruction *first_insn = insn;
+ struct alt_group *alt_group = insn->alt_group;
+
+- sec_for_each_insn_continue(file, insn) {
+- if (insn->alt_group != alt_group)
+- break;
+- insn->cfi = first_insn->cfi;
+- }
++ /*
++ * Simulate the fact that alternatives are patched in-place. When the
++ * end of a replacement alt_group is reached, redirect objtool flow to
++ * the end of the original alt_group.
++ */
++ if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
++ return next_insn_same_sec(file, alt_group->orig_group->last_insn);
++
++ return next_insn_same_sec(file, insn);
+ }
+
+ /*
+@@ -2484,7 +2487,7 @@ static int validate_branch(struct objtoo
+ sec = insn->sec;
+
+ while (1) {
+- next_insn = next_insn_same_sec(file, insn);
++ next_insn = next_insn_to_validate(file, insn);
+
+ if (file->c_file && func && insn->func && func != insn->func->pfunc) {
+ WARN("%s() falls through to next function %s()",
+@@ -2517,6 +2520,9 @@ static int validate_branch(struct objtoo
+
+ insn->visited |= visited;
+
++ if (propagate_alt_cfi(file, insn))
++ return 1;
++
+ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+ bool skip_orig = false;
+
+@@ -2532,9 +2538,6 @@ static int validate_branch(struct objtoo
+ }
+ }
+
+- if (insn->alt_group)
+- fill_alternative_cfi(file, insn);
+-
+ if (skip_orig)
+ return 0;
+ }
+@@ -2767,9 +2770,6 @@ static bool ignore_unreachable_insn(stru
+ !strcmp(insn->sec->name, ".altinstr_aux"))
+ return true;
+
+- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
+- return true;
+-
+ if (!insn->func)
+ return false;
+
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -28,6 +28,12 @@ struct alt_group {
+
+ /* First and last instructions in the group */
+ struct instruction *first_insn, *last_insn;
++
++ /*
++ * Byte-offset-addressed len-sized array of pointers to CFI structs.
++ * This is shared with the other alt_groups in the same alternative.
++ */
++ struct cfi_state **cfi;
+ };
+
+ struct instruction {
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -141,6 +141,13 @@ static int orc_list_add(struct list_head
+ return 0;
+ }
+
++static unsigned long alt_group_len(struct alt_group *alt_group)
++{
++ return alt_group->last_insn->offset +
++ alt_group->last_insn->len -
++ alt_group->first_insn->offset;
++}
++
+ int orc_create(struct objtool_file *file)
+ {
+ struct section *sec, *ip_rsec, *orc_sec;
+@@ -165,15 +172,48 @@ int orc_create(struct objtool_file *file
+ continue;
+
+ sec_for_each_insn(file, sec, insn) {
+- if (init_orc_entry(&orc, &insn->cfi))
+- return -1;
+- if (!memcmp(&prev_orc, &orc, sizeof(orc)))
++ struct alt_group *alt_group = insn->alt_group;
++ int i;
++
++ if (!alt_group) {
++ if (init_orc_entry(&orc, &insn->cfi))
++ return -1;
++ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
++ continue;
++ if (orc_list_add(&orc_list, &orc, sec,
++ insn->offset))
++ return -1;
++ nr++;
++ prev_orc = orc;
++ empty = false;
+ continue;
+- if (orc_list_add(&orc_list, &orc, sec, insn->offset))
+- return -1;
+- nr++;
+- prev_orc = orc;
+- empty = false;
++ }
++
++ /*
++ * Alternatives can have different stack layout
++ * possibilities (but they shouldn't conflict).
++ * Instead of traversing the instructions, use the
++ * alt_group's flattened byte-offset-addressed CFI
++ * array.
++ */
++ for (i = 0; i < alt_group_len(alt_group); i++) {
++ struct cfi_state *cfi = alt_group->cfi[i];
++ if (!cfi)
++ continue;
++ if (init_orc_entry(&orc, cfi))
++ return -1;
++ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
++ continue;
++ if (orc_list_add(&orc_list, &orc, insn->sec,
++ insn->offset + i))
++ return -1;
++ nr++;
++ prev_orc = orc;
++ empty = false;
++ }
++
++ /* Skip to the end of the alt_group */
++ insn = alt_group->last_insn;
+ }
+
+ /* Add a section terminator */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 30 Sep 2021 12:43:10 +0200
+Subject: objtool: Teach get_alt_entry() about more relocation types
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 24ff652573754fe4c03213ebd26b17e86842feb3 upstream.
+
+Occasionally objtool encounters symbol (as opposed to section)
+relocations in .altinstructions. Typically they are the alternatives
+written by elf_add_alternative() as encountered on a noinstr
+validation run on vmlinux after having already ran objtool on the
+individual .o files.
+
+Basically this is the counterpart of commit 44f6a7c0755d ("objtool:
+Fix seg fault with Clang non-section symbols"), because when these new
+assemblers (binutils now also does this) strip the section symbols,
+elf_add_reloc_to_insn() is forced to emit symbol based relocations.
+
+As such, teach get_alt_entry() about different relocation types.
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Reported-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Link: https://lore.kernel.org/r/YVWUvknIEVNkPvnP@hirez.programming.kicks-ass.net
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/special.c | 32 +++++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -55,6 +55,24 @@ void __weak arch_handle_alternative(unsi
+ {
+ }
+
++static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off)
++{
++ switch (reloc->sym->type) {
++ case STT_FUNC:
++ *sec = reloc->sym->sec;
++ *off = reloc->sym->offset + reloc->addend;
++ return true;
++
++ case STT_SECTION:
++ *sec = reloc->sym->sec;
++ *off = reloc->addend;
++ return true;
++
++ default:
++ return false;
++ }
++}
++
+ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
+ struct section *sec, int idx,
+ struct special_alt *alt)
+@@ -87,15 +105,12 @@ static int get_alt_entry(struct elf *elf
+ WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
+ return -1;
+ }
+- if (orig_reloc->sym->type != STT_SECTION) {
+- WARN_FUNC("don't know how to handle non-section reloc symbol %s",
++ if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) {
++ WARN_FUNC("don't know how to handle reloc symbol type: %s",
+ sec, offset + entry->orig, orig_reloc->sym->name);
+ return -1;
+ }
+
+- alt->orig_sec = orig_reloc->sym->sec;
+- alt->orig_off = orig_reloc->addend;
+-
+ if (!entry->group || alt->new_len) {
+ new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
+ if (!new_reloc) {
+@@ -112,8 +127,11 @@ static int get_alt_entry(struct elf *elf
+ if (arch_is_retpoline(new_reloc->sym))
+ return 1;
+
+- alt->new_sec = new_reloc->sym->sec;
+- alt->new_off = (unsigned int)new_reloc->addend;
++ if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) {
++ WARN_FUNC("don't know how to handle reloc symbol type: %s",
++ sec, offset + entry->new, new_reloc->sym->name);
++ return -1;
++ }
+
+ /* _ASM_EXTABLE_EX hack */
+ if (alt->new_off >= 0x7ffffff0)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:47 +0200
+Subject: objtool: Treat .text.__x86.* as noinstr
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 951ddecf435659553ed15a9214e153a3af43a9a1 upstream.
+
+Needed because zen_untrain_ret() will be called from noinstr code.
+
+Also makes sense since the thunks MUST NOT contain instrumentation nor
+be poked with dynamic instrumentation.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -366,7 +366,8 @@ static int decode_instructions(struct ob
+ sec->text = true;
+
+ if (!strcmp(sec->name, ".noinstr.text") ||
+- !strcmp(sec->name, ".entry.text"))
++ !strcmp(sec->name, ".entry.text") ||
++ !strncmp(sec->name, ".text.__x86.", 12))
+ sec->noinstr = true;
+
+ for (offset = 0; offset < sec->len; offset += insn->len) {
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:59 +0200
+Subject: objtool: Update Retpoline validation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 9bb2ec608a209018080ca262f771e6a9ff203b6f upstream.
+
+Update retpoline validation with the new CONFIG_RETPOLINE requirement of
+not having bare naked RET instructions.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict fixup at arch/x86/xen/xen-head.S]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 6 ++++++
+ arch/x86/mm/mem_encrypt_boot.S | 2 ++
+ arch/x86/xen/xen-head.S | 1 +
+ tools/objtool/check.c | 19 +++++++++++++------
+ 4 files changed, 22 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -76,6 +76,12 @@
+ .endm
+
+ /*
++ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
++ * vs RETBleed validation.
++ */
++#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
++
++/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -66,6 +66,7 @@ SYM_FUNC_START(sme_encrypt_execute)
+ pop %rbp
+
+ /* Offset to __x86_return_thunk would be wrong here */
++ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+ SYM_FUNC_END(sme_encrypt_execute)
+@@ -154,6 +155,7 @@ SYM_FUNC_START(__enc_copy)
+ pop %r15
+
+ /* Offset to __x86_return_thunk would be wrong here */
++ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+ .L__enc_copy_end:
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -70,6 +70,7 @@ SYM_CODE_START(hypercall_page)
+ .rept (PAGE_SIZE / 32)
+ UNWIND_HINT_FUNC
+ .skip 31, 0x90
++ ANNOTATE_UNRET_SAFE
+ RET
+ .endr
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1799,8 +1799,9 @@ static int read_retpoline_hints(struct o
+ }
+
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+- insn->type != INSN_CALL_DYNAMIC) {
+- WARN_FUNC("retpoline_safe hint not an indirect jump/call",
++ insn->type != INSN_CALL_DYNAMIC &&
++ insn->type != INSN_RETURN) {
++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret",
+ insn->sec, insn->offset);
+ return -1;
+ }
+@@ -3051,7 +3052,8 @@ static int validate_retpoline(struct obj
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+- insn->type != INSN_CALL_DYNAMIC)
++ insn->type != INSN_CALL_DYNAMIC &&
++ insn->type != INSN_RETURN)
+ continue;
+
+ if (insn->retpoline_safe)
+@@ -3066,9 +3068,14 @@ static int validate_retpoline(struct obj
+ if (!strcmp(insn->sec->name, ".init.text") && !module)
+ continue;
+
+- WARN_FUNC("indirect %s found in RETPOLINE build",
+- insn->sec, insn->offset,
+- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
++ if (insn->type == INSN_RETURN) {
++ WARN_FUNC("'naked' return found in RETPOLINE build",
++ insn->sec, insn->offset);
++ } else {
++ WARN_FUNC("indirect %s found in RETPOLINE build",
++ insn->sec, insn->offset,
++ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
++ }
+
+ warnings++;
+ }
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 21 Jun 2021 16:13:55 +0200
+Subject: objtool/x86: Ignore __x86_indirect_alt_* symbols
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 31197d3a0f1caeb60fb01f6755e28347e4f44037 upstream.
+
+Because the __x86_indirect_alt* symbols are just that, objtool will
+try and validate them as regular symbols, instead of the alternative
+replacements that they are.
+
+This goes sideways for FRAME_POINTER=y builds; which generate a fair
+amount of warnings.
+
+Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/YNCgxwLBiK9wclYJ@hirez.programming.kicks-ass.net
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -58,12 +58,16 @@ SYM_FUNC_START_NOALIGN(__x86_indirect_al
+ 2: .skip 5-(2b-1b), 0x90
+ SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+
++STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
++
+ SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+ ANNOTATE_RETPOLINE_SAFE
+ 1: jmp *%\reg
+ 2: .skip 5-(2b-1b), 0x90
+ SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+
++STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
++
+ .endm
+
+ /*
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:36 +0200
+Subject: objtool,x86: Replace alternatives with .retpoline_sites
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 134ab5bd1883312d7a4b3033b05c6b5a1bb8889b upstream.
+
+Instead of writing complete alternatives, simply provide a list of all
+the retpoline thunk calls. Then the kernel is free to do with them as
+it pleases. Simpler code all-round.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.850007165@infradead.org
+[cascardo: fixed conflict because of missing
+ 8b946cc38e063f0f7bb67789478c38f6d7d457c9]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: deleted functions had slightly different code]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/vmlinux.lds.S | 14 ++++
+ tools/objtool/arch/x86/decode.c | 120 ------------------------------------
+ tools/objtool/check.c | 132 ++++++++++++++++++++++++++++------------
+ tools/objtool/elf.c | 83 -------------------------
+ tools/objtool/elf.h | 1
+ tools/objtool/special.c | 8 --
+ 6 files changed, 107 insertions(+), 251 deletions(-)
+
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -272,6 +272,20 @@ SECTIONS
+ __parainstructions_end = .;
+ }
+
++#ifdef CONFIG_RETPOLINE
++ /*
++ * List of instructions that call/jmp/jcc to retpoline thunks
++ * __x86_indirect_thunk_*(). These instructions can be patched along
++ * with alternatives, after which the section can be freed.
++ */
++ . = ALIGN(8);
++ .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) {
++ __retpoline_sites = .;
++ *(.retpoline_sites)
++ __retpoline_sites_end = .;
++ }
++#endif
++
+ /*
+ * struct alt_inst entries. From the header (alternative.h):
+ * "Alternative instructions for different CPU types or capabilities"
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -606,126 +606,6 @@ const char *arch_ret_insn(int len)
+ return ret[len-1];
+ }
+
+-/* asm/alternative.h ? */
+-
+-#define ALTINSTR_FLAG_INV (1 << 15)
+-#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
+-
+-struct alt_instr {
+- s32 instr_offset; /* original instruction */
+- s32 repl_offset; /* offset to replacement instruction */
+- u16 cpuid; /* cpuid bit set for replacement */
+- u8 instrlen; /* length of original instruction */
+- u8 replacementlen; /* length of new instruction */
+-} __packed;
+-
+-static int elf_add_alternative(struct elf *elf,
+- struct instruction *orig, struct symbol *sym,
+- int cpuid, u8 orig_len, u8 repl_len)
+-{
+- const int size = sizeof(struct alt_instr);
+- struct alt_instr *alt;
+- struct section *sec;
+- Elf_Scn *s;
+-
+- sec = find_section_by_name(elf, ".altinstructions");
+- if (!sec) {
+- sec = elf_create_section(elf, ".altinstructions",
+- SHF_ALLOC, 0, 0);
+-
+- if (!sec) {
+- WARN_ELF("elf_create_section");
+- return -1;
+- }
+- }
+-
+- s = elf_getscn(elf->elf, sec->idx);
+- if (!s) {
+- WARN_ELF("elf_getscn");
+- return -1;
+- }
+-
+- sec->data = elf_newdata(s);
+- if (!sec->data) {
+- WARN_ELF("elf_newdata");
+- return -1;
+- }
+-
+- sec->data->d_size = size;
+- sec->data->d_align = 1;
+-
+- alt = sec->data->d_buf = malloc(size);
+- if (!sec->data->d_buf) {
+- perror("malloc");
+- return -1;
+- }
+- memset(sec->data->d_buf, 0, size);
+-
+- if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
+- R_X86_64_PC32, orig->sec, orig->offset)) {
+- WARN("elf_create_reloc: alt_instr::instr_offset");
+- return -1;
+- }
+-
+- if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
+- R_X86_64_PC32, sym, 0)) {
+- WARN("elf_create_reloc: alt_instr::repl_offset");
+- return -1;
+- }
+-
+- alt->cpuid = cpuid;
+- alt->instrlen = orig_len;
+- alt->replacementlen = repl_len;
+-
+- sec->sh.sh_size += size;
+- sec->changed = true;
+-
+- return 0;
+-}
+-
+-#define X86_FEATURE_RETPOLINE ( 7*32+12)
+-
+-int arch_rewrite_retpolines(struct objtool_file *file)
+-{
+- struct instruction *insn;
+- struct reloc *reloc;
+- struct symbol *sym;
+- char name[32] = "";
+-
+- list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+-
+- if (insn->type != INSN_JUMP_DYNAMIC &&
+- insn->type != INSN_CALL_DYNAMIC)
+- continue;
+-
+- if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+- continue;
+-
+- reloc = insn->reloc;
+-
+- sprintf(name, "__x86_indirect_alt_%s_%s",
+- insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+- reloc->sym->name + 21);
+-
+- sym = find_symbol_by_name(file->elf, name);
+- if (!sym) {
+- sym = elf_create_undef_symbol(file->elf, name);
+- if (!sym) {
+- WARN("elf_create_undef_symbol");
+- return -1;
+- }
+- }
+-
+- if (elf_add_alternative(file->elf, insn, sym,
+- ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
+- WARN("elf_add_alternative");
+- return -1;
+- }
+- }
+-
+- return 0;
+-}
+-
+ int arch_decode_hint_reg(u8 sp_reg, int *base)
+ {
+ switch (sp_reg) {
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -606,6 +606,52 @@ static int create_static_call_sections(s
+ return 0;
+ }
+
++static int create_retpoline_sites_sections(struct objtool_file *file)
++{
++ struct instruction *insn;
++ struct section *sec;
++ int idx;
++
++ sec = find_section_by_name(file->elf, ".retpoline_sites");
++ if (sec) {
++ WARN("file already has .retpoline_sites, skipping");
++ return 0;
++ }
++
++ idx = 0;
++ list_for_each_entry(insn, &file->retpoline_call_list, call_node)
++ idx++;
++
++ if (!idx)
++ return 0;
++
++ sec = elf_create_section(file->elf, ".retpoline_sites", 0,
++ sizeof(int), idx);
++ if (!sec) {
++ WARN("elf_create_section: .retpoline_sites");
++ return -1;
++ }
++
++ idx = 0;
++ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
++
++ int *site = (int *)sec->data->d_buf + idx;
++ *site = 0;
++
++ if (elf_add_reloc_to_insn(file->elf, sec,
++ idx * sizeof(int),
++ R_X86_64_PC32,
++ insn->sec, insn->offset)) {
++ WARN("elf_add_reloc_to_insn: .retpoline_sites");
++ return -1;
++ }
++
++ idx++;
++ }
++
++ return 0;
++}
++
+ /*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+@@ -893,6 +939,11 @@ static void annotate_call_site(struct ob
+ return;
+ }
+
++ if (sym->retpoline_thunk) {
++ list_add_tail(&insn->call_node, &file->retpoline_call_list);
++ return;
++ }
++
+ /*
+ * Many compilers cannot disable KCOV with a function attribute
+ * so they need a little help, NOP out any KCOV calls from noinstr
+@@ -933,6 +984,39 @@ static void add_call_dest(struct objtool
+ annotate_call_site(file, insn, sibling);
+ }
+
++static void add_retpoline_call(struct objtool_file *file, struct instruction *insn)
++{
++ /*
++ * Retpoline calls/jumps are really dynamic calls/jumps in disguise,
++ * so convert them accordingly.
++ */
++ switch (insn->type) {
++ case INSN_CALL:
++ insn->type = INSN_CALL_DYNAMIC;
++ break;
++ case INSN_JUMP_UNCONDITIONAL:
++ insn->type = INSN_JUMP_DYNAMIC;
++ break;
++ case INSN_JUMP_CONDITIONAL:
++ insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
++ break;
++ default:
++ return;
++ }
++
++ insn->retpoline_safe = true;
++
++ /*
++ * Whatever stack impact regular CALLs have, should be undone
++ * by the RETURN of the called function.
++ *
++ * Annotated intra-function calls retain the stack_ops but
++ * are converted to JUMP, see read_intra_function_calls().
++ */
++ remove_insn_ops(insn);
++
++ annotate_call_site(file, insn, false);
++}
+ /*
+ * Find the destination instructions for all jumps.
+ */
+@@ -955,19 +1039,7 @@ static int add_jump_destinations(struct
+ dest_sec = reloc->sym->sec;
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+ } else if (reloc->sym->retpoline_thunk) {
+- /*
+- * Retpoline jumps are really dynamic jumps in
+- * disguise, so convert them accordingly.
+- */
+- if (insn->type == INSN_JUMP_UNCONDITIONAL)
+- insn->type = INSN_JUMP_DYNAMIC;
+- else
+- insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
+-
+- list_add_tail(&insn->call_node,
+- &file->retpoline_call_list);
+-
+- insn->retpoline_safe = true;
++ add_retpoline_call(file, insn);
+ continue;
+ } else if (insn->func) {
+ /* internal or external sibling call (with reloc) */
+@@ -1096,18 +1168,7 @@ static int add_call_destinations(struct
+ add_call_dest(file, insn, dest, false);
+
+ } else if (reloc->sym->retpoline_thunk) {
+- /*
+- * Retpoline calls are really dynamic calls in
+- * disguise, so convert them accordingly.
+- */
+- insn->type = INSN_CALL_DYNAMIC;
+- insn->retpoline_safe = true;
+-
+- list_add_tail(&insn->call_node,
+- &file->retpoline_call_list);
+-
+- remove_insn_ops(insn);
+- continue;
++ add_retpoline_call(file, insn);
+
+ } else
+ add_call_dest(file, insn, reloc->sym, false);
+@@ -1806,11 +1867,6 @@ static void mark_rodata(struct objtool_f
+ file->rodata = found;
+ }
+
+-__weak int arch_rewrite_retpolines(struct objtool_file *file)
+-{
+- return 0;
+-}
+-
+ static int decode_sections(struct objtool_file *file)
+ {
+ int ret;
+@@ -1879,15 +1935,6 @@ static int decode_sections(struct objtoo
+ if (ret)
+ return ret;
+
+- /*
+- * Must be after add_special_section_alts(), since this will emit
+- * alternatives. Must be after add_{jump,call}_destination(), since
+- * those create the call insn lists.
+- */
+- ret = arch_rewrite_retpolines(file);
+- if (ret)
+- return ret;
+-
+ return 0;
+ }
+
+@@ -3159,6 +3206,13 @@ int check(struct objtool_file *file)
+ goto out;
+ warnings += ret;
+
++ if (retpoline) {
++ ret = create_retpoline_sites_sections(file);
++ if (ret < 0)
++ goto out;
++ warnings += ret;
++ }
++
+ if (stats) {
+ printf("nr_insns_visited: %ld\n", nr_insns_visited);
+ printf("nr_cfi: %ld\n", nr_cfi);
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -766,89 +766,6 @@ static int elf_add_string(struct elf *el
+ return len;
+ }
+
+-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+-{
+- struct section *symtab, *symtab_shndx;
+- struct symbol *sym;
+- Elf_Data *data;
+- Elf_Scn *s;
+-
+- sym = malloc(sizeof(*sym));
+- if (!sym) {
+- perror("malloc");
+- return NULL;
+- }
+- memset(sym, 0, sizeof(*sym));
+-
+- sym->name = strdup(name);
+-
+- sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+- if (sym->sym.st_name == -1)
+- return NULL;
+-
+- sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
+- // st_other 0
+- // st_shndx 0
+- // st_value 0
+- // st_size 0
+-
+- symtab = find_section_by_name(elf, ".symtab");
+- if (!symtab) {
+- WARN("can't find .symtab");
+- return NULL;
+- }
+-
+- s = elf_getscn(elf->elf, symtab->idx);
+- if (!s) {
+- WARN_ELF("elf_getscn");
+- return NULL;
+- }
+-
+- data = elf_newdata(s);
+- if (!data) {
+- WARN_ELF("elf_newdata");
+- return NULL;
+- }
+-
+- data->d_buf = &sym->sym;
+- data->d_size = sizeof(sym->sym);
+- data->d_align = 1;
+-
+- sym->idx = symtab->len / sizeof(sym->sym);
+-
+- symtab->len += data->d_size;
+- symtab->changed = true;
+-
+- symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+- if (symtab_shndx) {
+- s = elf_getscn(elf->elf, symtab_shndx->idx);
+- if (!s) {
+- WARN_ELF("elf_getscn");
+- return NULL;
+- }
+-
+- data = elf_newdata(s);
+- if (!data) {
+- WARN_ELF("elf_newdata");
+- return NULL;
+- }
+-
+- data->d_buf = &sym->sym.st_size; /* conveniently 0 */
+- data->d_size = sizeof(Elf32_Word);
+- data->d_align = 4;
+- data->d_type = ELF_T_WORD;
+-
+- symtab_shndx->len += 4;
+- symtab_shndx->changed = true;
+- }
+-
+- sym->sec = find_section_by_index(elf, 0);
+-
+- elf_add_symbol(elf, sym);
+-
+- return sym;
+-}
+-
+ struct section *elf_create_section(struct elf *elf, const char *name,
+ unsigned int sh_flags, size_t entsize, int nr)
+ {
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -136,7 +136,6 @@ int elf_write_insn(struct elf *elf, stru
+ unsigned long offset, unsigned int len,
+ const char *insn);
+ int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
+ int elf_write(struct elf *elf);
+ void elf_close(struct elf *elf);
+
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -105,14 +105,6 @@ static int get_alt_entry(struct elf *elf
+ return -1;
+ }
+
+- /*
+- * Skip retpoline .altinstr_replacement... we already rewrite the
+- * instructions for retpolines anyway, see arch_is_retpoline()
+- * usage in add_{call,jump}_destinations().
+- */
+- if (arch_is_retpoline(new_reloc->sym))
+- return 1;
+-
+ reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off);
+
+ /* _ASM_EXTABLE_EX hack */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:15 +0100
+Subject: objtool/x86: Rewrite retpoline thunk calls
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 9bc0bb50727c8ac69fbb33fb937431cf3518ff37 upstream.
+
+When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
+indirect call, have objtool rewrite it to:
+
+ ALTERNATIVE "call __x86_indirect_thunk_\reg",
+ "call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE)
+
+Additionally, in order to not emit endless identical
+.altinst_replacement chunks, use a global symbol for them, see
+__x86_indirect_alt_*.
+
+This also avoids objtool from having to do code generation.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Miroslav Benes <mbenes@suse.cz>
+Link: https://lkml.kernel.org/r/20210326151300.320177914@infradead.org
+[bwh: Backported to 5.10: include "arch_elf.h" instead of "arch/elf.h"]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h | 12 ++-
+ arch/x86/lib/retpoline.S | 41 +++++++++++
+ tools/objtool/arch/x86/decode.c | 117 ++++++++++++++++++++++++++++++++++
+ 3 files changed, 167 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void);
+
+ #ifdef CONFIG_RETPOLINE
+
+-#define DECL_INDIRECT_THUNK(reg) \
++#undef GEN
++#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
++#include <asm/GEN-for-each-reg.h>
++
++#undef GEN
++#define GEN(reg) \
++ extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
++#include <asm/GEN-for-each-reg.h>
+
+ #undef GEN
+-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
++#define GEN(reg) \
++ extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
+ #include <asm/GEN-for-each-reg.h>
+
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -10,6 +10,8 @@
+ #include <asm/unwind_hints.h>
+ #include <asm/frame.h>
+
++ .section .text.__x86.indirect_thunk
++
+ .macro RETPOLINE reg
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call .Ldo_rop_\@
+@@ -25,9 +27,9 @@
+ .endm
+
+ .macro THUNK reg
+- .section .text.__x86.indirect_thunk
+
+ .align 32
++
+ SYM_FUNC_START(__x86_indirect_thunk_\reg)
+
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+@@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ .endm
+
+ /*
++ * This generates .altinstr_replacement symbols for use by objtool. They,
++ * however, must not actually live in .altinstr_replacement since that will be
++ * discarded after init, but module alternatives will also reference these
++ * symbols.
++ *
++ * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
++ */
++.macro ALT_THUNK reg
++
++ .align 1
++
++SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
++ ANNOTATE_RETPOLINE_SAFE
++1: call *%\reg
++2: .skip 5-(2b-1b), 0x90
++SYM_FUNC_END(__x86_indirect_alt_call_\reg)
++
++SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
++ ANNOTATE_RETPOLINE_SAFE
++1: jmp *%\reg
++2: .skip 5-(2b-1b), 0x90
++SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
++
++.endm
++
++/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+@@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+
++#undef GEN
++#define GEN(reg) ALT_THUNK reg
++#include <asm/GEN-for-each-reg.h>
++
++#undef GEN
++#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
++#include <asm/GEN-for-each-reg.h>
++
++#undef GEN
++#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
++#include <asm/GEN-for-each-reg.h>
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -16,6 +16,7 @@
+ #include "../../arch.h"
+ #include "../../warn.h"
+ #include <asm/orc_types.h>
++#include "arch_elf.h"
+
+ static unsigned char op_to_cfi_reg[][2] = {
+ {CFI_AX, CFI_R8},
+@@ -585,6 +586,122 @@ const char *arch_nop_insn(int len)
+ return nops[len-1];
+ }
+
++/* asm/alternative.h ? */
++
++#define ALTINSTR_FLAG_INV (1 << 15)
++#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
++
++struct alt_instr {
++ s32 instr_offset; /* original instruction */
++ s32 repl_offset; /* offset to replacement instruction */
++ u16 cpuid; /* cpuid bit set for replacement */
++ u8 instrlen; /* length of original instruction */
++ u8 replacementlen; /* length of new instruction */
++} __packed;
++
++static int elf_add_alternative(struct elf *elf,
++ struct instruction *orig, struct symbol *sym,
++ int cpuid, u8 orig_len, u8 repl_len)
++{
++ const int size = sizeof(struct alt_instr);
++ struct alt_instr *alt;
++ struct section *sec;
++ Elf_Scn *s;
++
++ sec = find_section_by_name(elf, ".altinstructions");
++ if (!sec) {
++ sec = elf_create_section(elf, ".altinstructions",
++ SHF_WRITE, size, 0);
++
++ if (!sec) {
++ WARN_ELF("elf_create_section");
++ return -1;
++ }
++ }
++
++ s = elf_getscn(elf->elf, sec->idx);
++ if (!s) {
++ WARN_ELF("elf_getscn");
++ return -1;
++ }
++
++ sec->data = elf_newdata(s);
++ if (!sec->data) {
++ WARN_ELF("elf_newdata");
++ return -1;
++ }
++
++ sec->data->d_size = size;
++ sec->data->d_align = 1;
++
++ alt = sec->data->d_buf = malloc(size);
++ if (!sec->data->d_buf) {
++ perror("malloc");
++ return -1;
++ }
++ memset(sec->data->d_buf, 0, size);
++
++ if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
++ R_X86_64_PC32, orig->sec, orig->offset)) {
++ WARN("elf_create_reloc: alt_instr::instr_offset");
++ return -1;
++ }
++
++ if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
++ R_X86_64_PC32, sym, 0)) {
++ WARN("elf_create_reloc: alt_instr::repl_offset");
++ return -1;
++ }
++
++ alt->cpuid = cpuid;
++ alt->instrlen = orig_len;
++ alt->replacementlen = repl_len;
++
++ sec->sh.sh_size += size;
++ sec->changed = true;
++
++ return 0;
++}
++
++#define X86_FEATURE_RETPOLINE ( 7*32+12)
++
++int arch_rewrite_retpolines(struct objtool_file *file)
++{
++ struct instruction *insn;
++ struct reloc *reloc;
++ struct symbol *sym;
++ char name[32] = "";
++
++ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
++
++ if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
++ continue;
++
++ reloc = insn->reloc;
++
++ sprintf(name, "__x86_indirect_alt_%s_%s",
++ insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
++ reloc->sym->name + 21);
++
++ sym = find_symbol_by_name(file->elf, name);
++ if (!sym) {
++ sym = elf_create_undef_symbol(file->elf, name);
++ if (!sym) {
++ WARN("elf_create_undef_symbol");
++ return -1;
++ }
++ }
++
++ if (elf_add_alternative(file->elf, insn, sym,
++ ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
++ WARN("elf_add_alternative");
++ return -1;
++ }
++ }
++
++ return 0;
++}
++
+ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
+ {
+ struct cfi_reg *cfa = &insn->cfi.cfa;
--- /dev/null
+kvm-vmx-use-test-reg-reg-instead-of-cmp-0-reg-in-vmenter.s.patch
+kvm-nvmx-use-__vmx_vcpu_run-in-nested_vmx_check_vmentry_hw.patch
+objtool-refactor-orc-section-generation.patch
+objtool-add-alt_group-struct.patch
+objtool-support-stack-layout-changes-in-alternatives.patch
+objtool-support-retpoline-jump-detection-for-vmlinux.o.patch
+objtool-assume-only-elf-functions-do-sibling-calls.patch
+objtool-combine-unwind_hint_ret_offset-and-unwind_hint_func.patch
+x86-xen-support-objtool-validation-in-xen-asm.s.patch
+x86-xen-support-objtool-vmlinux.o-validation-in-xen-head.s.patch
+x86-alternative-merge-include-files.patch
+x86-alternative-support-not-feature.patch
+x86-alternative-support-alternative_ternary.patch
+x86-alternative-use-alternative_ternary-in-_static_cpu_has.patch
+x86-insn-rename-insn_decode-to-insn_decode_from_regs.patch
+x86-insn-add-a-__ignore_sync_check__-marker.patch
+x86-insn-add-an-insn_decode-api.patch
+x86-insn-eval-handle-return-values-from-the-decoder.patch
+x86-alternative-use-insn_decode.patch
+x86-add-insn_decode_kernel.patch
+x86-alternatives-optimize-optimize_nops.patch
+x86-retpoline-simplify-retpolines.patch
+objtool-correctly-handle-retpoline-thunk-calls.patch
+objtool-handle-per-arch-retpoline-naming.patch
+objtool-rework-the-elf_rebuild_reloc_section-logic.patch
+objtool-add-elf_create_reloc-helper.patch
+objtool-create-reloc-sections-implicitly.patch
+objtool-extract-elf_strtab_concat.patch
+objtool-extract-elf_symbol_add.patch
+objtool-add-elf_create_undef_symbol.patch
+objtool-keep-track-of-retpoline-call-sites.patch
+objtool-cache-instruction-relocs.patch
+objtool-skip-magical-retpoline-.altinstr_replacement.patch
+objtool-x86-rewrite-retpoline-thunk-calls.patch
+objtool-support-asm-jump-tables.patch
+x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch
+objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch
+objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch
+objtool-x86-ignore-__x86_indirect_alt_-symbols.patch
+objtool-don-t-make-.altinstructions-writable.patch
+objtool-teach-get_alt_entry-about-more-relocation-types.patch
+objtool-print-out-the-symbol-type-when-complaining-about-it.patch
+objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch
+objtool-make-.altinstructions-section-entry-size-consistent.patch
+objtool-introduce-cfi-hash.patch
+objtool-handle-__sanitize_cov-tail-calls.patch
+objtool-classify-symbols.patch
+objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch
+objtool-x86-replace-alternatives-with-.retpoline_sites.patch
+x86-retpoline-remove-unused-replacement-symbols.patch
+x86-asm-fix-register-order.patch
+x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch
+x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch
+x86-retpoline-create-a-retpoline-thunk-array.patch
+x86-alternative-implement-.retpoline_sites-support.patch
+x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch
+x86-alternative-try-inline-spectre_v2-retpoline-amd.patch
+x86-alternative-add-debug-prints-to-apply_retpolines.patch
+bpf-x86-simplify-computing-label-offsets.patch
+bpf-x86-respect-x86_feature_retpoline.patch
+x86-lib-atomic64_386_32-rename-things.patch
+x86-prepare-asm-files-for-straight-line-speculation.patch
+x86-prepare-inline-asm-for-straight-line-speculation.patch
+x86-alternative-relax-text_poke_bp-constraint.patch
+objtool-add-straight-line-speculation-validation.patch
+x86-add-straight-line-speculation-mitigation.patch
+tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch
+kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch
+objtool-default-ignore-int3-for-unreachable.patch
+crypto-x86-poly1305-fixup-sls.patch
+objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch
+objtool-fix-code-relocs-vs-weak-symbols.patch
+objtool-fix-type-of-reloc-addend.patch
+objtool-fix-symbol-creation.patch
+x86-entry-remove-skip_r11rcx.patch
+objtool-fix-objtool-regression-on-x32-systems.patch
+x86-realmode-build-with-d__disable_exports.patch
+x86-kvm-vmx-make-noinstr-clean.patch
+x86-cpufeatures-move-retpoline-flags-to-word-11.patch
+x86-retpoline-cleanup-some-ifdefery.patch
+x86-retpoline-swizzle-retpoline-thunk.patch
+makefile-set-retpoline-cflags-based-on-config_cc_is_-clang-gcc.patch
+x86-retpoline-use-mfunction-return.patch
+x86-undo-return-thunk-damage.patch
+x86-objtool-create-.return_sites.patch
+objtool-skip-non-text-sections-when-adding-return-thunk-sites.patch
+x86-static_call-use-alternative-ret-encoding.patch
+x86-ftrace-use-alternative-ret-encoding.patch
+x86-bpf-use-alternative-ret-encoding.patch
+x86-kvm-fix-setcc-emulation-for-return-thunks.patch
+x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
+x86-sev-avoid-using-__x86_return_thunk.patch
+x86-use-return-thunk-in-asm-code.patch
+objtool-treat-.text.__x86.-as-noinstr.patch
+x86-add-magic-amd-return-thunk.patch
+x86-bugs-report-amd-retbleed-vulnerability.patch
+x86-bugs-add-amd-retbleed-boot-parameter.patch
+x86-bugs-enable-stibp-for-jmp2ret.patch
+x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
+x86-entry-add-kernel-ibrs-implementation.patch
+x86-bugs-optimize-spec_ctrl-msr-writes.patch
+x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
+x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
+x86-bugs-report-intel-retbleed-vulnerability.patch
+intel_idle-disable-ibrs-during-long-idle.patch
+objtool-update-retpoline-validation.patch
+x86-xen-rename-sys-entry-points.patch
+x86-bugs-add-retbleed-ibpb.patch
+x86-bugs-do-ibpb-fallback-check-only-once.patch
+objtool-add-entry-unret-validation.patch
+x86-cpu-amd-add-spectral-chicken.patch
+x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
+x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
+x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
+x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
+x86-speculation-remove-x86_spec_ctrl_mask.patch
+objtool-re-add-unwind_hint_-save_restore.patch
+kvm-vmx-flatten-__vmx_vcpu_run.patch
+kvm-vmx-convert-launched-argument-to-flags.patch
+kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
+kvm-vmx-fix-ibrs-handling-after-vmexit.patch
+x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
+x86-common-stamp-out-the-stepping-madness.patch
+x86-cpu-amd-enumerate-btc_no.patch
+x86-retbleed-add-fine-grained-kconfig-knobs.patch
+x86-bugs-add-cannon-lake-to-retbleed-affected-cpu-list.patch
+x86-bugs-do-not-enable-ibpb-on-entry-when-ibpb-is-not-supported.patch
+x86-kexec-disable-ret-on-kexec.patch
+x86-speculation-disable-rrsba-behavior.patch
+x86-static_call-serialize-__static_call_fixup-properly.patch
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+Date: Sun, 9 May 2021 10:19:37 -0300
+Subject: tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy'
+
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+
+commit 35cb8c713a496e8c114eed5e2a5a30b359876df2 upstream.
+
+To bring in the change made in this cset:
+
+ f94909ceb1ed4bfd ("x86: Prepare asm files for straight-line-speculation")
+
+It silences these perf tools build warnings, no change in the tools:
+
+ Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S'
+ diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
+ Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S'
+ diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S
+
+The code generated was checked before and after using 'objdump -d /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o',
+no changes.
+
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/arch/x86/lib/memcpy_64.S | 12 ++++++------
+ tools/arch/x86/lib/memset_64.S | 6 +++---
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/tools/arch/x86/lib/memcpy_64.S
++++ b/tools/arch/x86/lib/memcpy_64.S
+@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
+ rep movsq
+ movl %edx, %ecx
+ rep movsb
+- ret
++ RET
+ SYM_FUNC_END(memcpy)
+ SYM_FUNC_END_ALIAS(__memcpy)
+ EXPORT_SYMBOL(memcpy)
+@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+ rep movsb
+- ret
++ RET
+ SYM_FUNC_END(memcpy_erms)
+
+ SYM_FUNC_START_LOCAL(memcpy_orig)
+@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movq %r9, 1*8(%rdi)
+ movq %r10, -2*8(%rdi, %rdx)
+ movq %r11, -1*8(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_16bytes:
+ cmpl $8, %edx
+@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movq -1*8(%rsi, %rdx), %r9
+ movq %r8, 0*8(%rdi)
+ movq %r9, -1*8(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_8bytes:
+ cmpl $4, %edx
+@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movl -4(%rsi, %rdx), %r8d
+ movl %ecx, (%rdi)
+ movl %r8d, -4(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_3bytes:
+ subl $1, %edx
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movb %cl, (%rdi)
+
+ .Lend:
+- retq
++ RET
+ SYM_FUNC_END(memcpy_orig)
+
+ .popsection
+--- a/tools/arch/x86/lib/memset_64.S
++++ b/tools/arch/x86/lib/memset_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
+ movl %edx,%ecx
+ rep stosb
+ movq %r9,%rax
+- ret
++ RET
+ SYM_FUNC_END(__memset)
+ SYM_FUNC_END_ALIAS(memset)
+ EXPORT_SYMBOL(memset)
+@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
+ movq %rdx,%rcx
+ rep stosb
+ movq %r9,%rax
+- ret
++ RET
+ SYM_FUNC_END(memset_erms)
+
+ SYM_FUNC_START_LOCAL(memset_orig)
+@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
+
+ .Lende:
+ movq %r10,%rax
+- ret
++ RET
+
+ .Lbad_alignment:
+ cmpq $7,%rdx
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 11 Jul 2022 00:43:31 +0200
+Subject: x86: Add insn_decode_kernel()
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+This was done by commit 52fa82c21f64e900a72437269a5cc9e0034b424e
+upstream, but this backport avoids changing all callers of the
+old decoder API.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/insn.h | 2 ++
+ arch/x86/kernel/alternative.c | 2 +-
+ tools/arch/x86/include/asm/insn.h | 2 ++
+ 3 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/insn.h
++++ b/arch/x86/include/asm/insn.h
+@@ -105,6 +105,8 @@ enum insn_mode {
+
+ extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
++#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
++
+ /* Attribute will be determined after getting ModRM (for opcode groups) */
+ static inline void insn_get_attribute(struct insn *insn)
+ {
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -1290,7 +1290,7 @@ static void text_poke_loc_init(struct te
+ if (!emulate)
+ emulate = opcode;
+
+- ret = insn_decode(&insn, emulate, MAX_INSN_SIZE, INSN_MODE_KERN);
++ ret = insn_decode_kernel(&insn, emulate);
+
+ BUG_ON(ret < 0);
+ BUG_ON(len != insn.length);
+--- a/tools/arch/x86/include/asm/insn.h
++++ b/tools/arch/x86/include/asm/insn.h
+@@ -105,6 +105,8 @@ enum insn_mode {
+
+ extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
++#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
++
+ /* Attribute will be determined after getting ModRM (for opcode groups) */
+ static inline void insn_get_attribute(struct insn *insn)
+ {
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:48 +0200
+Subject: x86: Add magic AMD return-thunk
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a149180fbcf336e97ce4eb2cdc13672727feb94d upstream.
+
+Note: needs to be in a section distinct from Retpolines such that the
+Retpoline RET substitution cannot possibly use immediate jumps.
+
+ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a
+little tricky but works due to the fact that zen_untrain_ret() doesn't
+have any stack ops and as such will emit a single ORC entry at the
+start (+0x3f).
+
+Meanwhile, unwinding an IP, including the __x86_return_thunk() one
+(+0x40) will search for the largest ORC entry smaller or equal to the
+IP, these will find the one ORC entry (+0x3f) and all works.
+
+ [ Alexandre: SVM part. ]
+ [ bp: Build fix, massages. ]
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflicts at arch/x86/entry/entry_64_compat.S]
+[cascardo: there is no ANNOTATE_NOENDBR]
+[cascardo: objtool commit 34c861e806478ac2ea4032721defbf1d6967df08 missing]
+[cascardo: conflict fixup]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: SEV-ES is not supported, so drop the change
+ in arch/x86/kvm/svm/vmenter.S]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 6 ++
+ arch/x86/entry/entry_64_compat.S | 4 +
+ arch/x86/include/asm/cpufeatures.h | 1
+ arch/x86/include/asm/disabled-features.h | 3 -
+ arch/x86/include/asm/nospec-branch.h | 17 ++++++++
+ arch/x86/kernel/vmlinux.lds.S | 2
+ arch/x86/kvm/svm/vmenter.S | 9 ++++
+ arch/x86/lib/retpoline.S | 63 +++++++++++++++++++++++++++++--
+ tools/objtool/check.c | 20 ++++++++-
+ 9 files changed, 117 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -102,6 +102,7 @@ SYM_CODE_START(entry_SYSCALL_64)
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
++ UNTRAIN_RET
+
+ /* Construct struct pt_regs on stack */
+ pushq $__USER_DS /* pt_regs->ss */
+@@ -675,6 +676,7 @@ native_irq_return_ldt:
+ pushq %rdi /* Stash user RDI */
+ swapgs /* to kernel GS */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
++ UNTRAIN_RET
+
+ movq PER_CPU_VAR(espfix_waddr), %rdi
+ movq %rax, (0*8)(%rdi) /* user RAX */
+@@ -910,6 +912,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ * be retrieved from a kernel internal table.
+ */
+ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
++ UNTRAIN_RET
+
+ /*
+ * Handling GSBASE depends on the availability of FSGSBASE.
+@@ -1022,6 +1025,7 @@ SYM_CODE_START_LOCAL(error_entry)
+ FENCE_SWAPGS_USER_ENTRY
+ /* We have user CR3. Change to kernel CR3. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++ UNTRAIN_RET
+
+ .Lerror_entry_from_usermode_after_swapgs:
+ /* Put us onto the real thread stack. */
+@@ -1077,6 +1081,7 @@ SYM_CODE_START_LOCAL(error_entry)
+ SWAPGS
+ FENCE_SWAPGS_USER_ENTRY
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++ UNTRAIN_RET
+
+ /*
+ * Pretend that the exception came from user mode: set up pt_regs
+@@ -1171,6 +1176,7 @@ SYM_CODE_START(asm_exc_nmi)
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNWIND_HINT_IRET_REGS base=%rdx offset=8
++ UNTRAIN_RET
+ pushq 5*8(%rdx) /* pt_regs->ss */
+ pushq 4*8(%rdx) /* pt_regs->rsp */
+ pushq 3*8(%rdx) /* pt_regs->flags */
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -14,6 +14,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/nospec-branch.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+
+@@ -71,6 +72,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
+ pushq $__USER32_CS /* pt_regs->cs */
+ pushq $0 /* pt_regs->ip = 0 (placeholder) */
+ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
++ UNTRAIN_RET
+
+ /*
+ * User tracing code (ptrace or signal handlers) might assume that
+@@ -211,6 +213,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
++ UNTRAIN_RET
+
+ /* Construct struct pt_regs on stack */
+ pushq $__USER32_DS /* pt_regs->ss */
+@@ -377,6 +380,7 @@ SYM_CODE_START(entry_INT80_compat)
+ pushq (%rdi) /* pt_regs->di */
+ .Lint80_keep_stack:
+
++ UNTRAIN_RET
+ pushq %rsi /* pt_regs->si */
+ xorl %esi, %esi /* nospec si */
+ pushq %rdx /* pt_regs->dx */
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -297,6 +297,7 @@
+ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
++#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -61,7 +61,8 @@
+ #else
+ # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
+- (1 << (X86_FEATURE_RETHUNK & 31)))
++ (1 << (X86_FEATURE_RETHUNK & 31)) | \
++ (1 << (X86_FEATURE_UNRET & 31)))
+ #endif
+
+ /* Force disable because it's broken beyond repair */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -112,6 +112,22 @@
+ #endif
+ .endm
+
++/*
++ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
++ * return thunk isn't mapped into the userspace tables (then again, AMD
++ * typically has NO_MELTDOWN).
++ *
++ * Doesn't clobber any registers but does require a stable stack.
++ *
++ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
++ * where we have a stack but before any RET instruction.
++ */
++.macro UNTRAIN_RET
++#ifdef CONFIG_RETPOLINE
++ ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
++#endif
++.endm
++
+ #else /* __ASSEMBLY__ */
+
+ #define ANNOTATE_RETPOLINE_SAFE \
+@@ -121,6 +137,7 @@
+ ".popsection\n\t"
+
+ extern void __x86_return_thunk(void);
++extern void zen_untrain_ret(void);
+
+ #ifdef CONFIG_RETPOLINE
+
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -142,7 +142,7 @@ SECTIONS
+
+ #ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+- *(.text.__x86.indirect_thunk)
++ *(.text.__x86.*)
+ __indirect_thunk_end = .;
+ #endif
+ } :text =0xcccc
+--- a/arch/x86/kvm/svm/vmenter.S
++++ b/arch/x86/kvm/svm/vmenter.S
+@@ -129,6 +129,15 @@ SYM_FUNC_START(__svm_vcpu_run)
+ #endif
+
+ /*
++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
++ * untrained as soon as we exit the VM and are back to the
++ * kernel. This should be done before re-enabling interrupts
++ * because interrupt handlers won't sanitize 'ret' if the return is
++ * from the kernel.
++ */
++ UNTRAIN_RET
++
++ /*
+ * Clear all general purpose registers except RSP and RAX to prevent
+ * speculative use of the guest's values, even those that are reloaded
+ * via the stack. In theory, an L1 cache miss when restoring registers
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -71,10 +71,67 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+ * This function name is magical and is used by -mfunction-return=thunk-extern
+ * for the compiler to generate JMPs to it.
+ */
+-SYM_CODE_START(__x86_return_thunk)
+- UNWIND_HINT_EMPTY
++ .section .text.__x86.return_thunk
++
++/*
++ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
++ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
++ * alignment within the BTB.
++ * 2) The instruction at zen_untrain_ret must contain, and not
++ * end with, the 0xc3 byte of the RET.
++ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
++ * from re-poisioning the BTB prediction.
++ */
++ .align 64
++ .skip 63, 0xcc
++SYM_FUNC_START_NOALIGN(zen_untrain_ret);
++
++ /*
++ * As executed from zen_untrain_ret, this is:
++ *
++ * TEST $0xcc, %bl
++ * LFENCE
++ * JMP __x86_return_thunk
++ *
++ * Executing the TEST instruction has a side effect of evicting any BTB
++ * prediction (potentially attacker controlled) attached to the RET, as
++ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
++ */
++ .byte 0xf6
++
++ /*
++ * As executed from __x86_return_thunk, this is a plain RET.
++ *
++ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
++ *
++ * We subsequently jump backwards and architecturally execute the RET.
++ * This creates a correct BTB prediction (type=ret), but in the
++ * meantime we suffer Straight Line Speculation (because the type was
++ * no branch) which is halted by the INT3.
++ *
++ * With SMT enabled and STIBP active, a sibling thread cannot poison
++ * RET's prediction to a type of its choice, but can evict the
++ * prediction due to competitive sharing. If the prediction is
++ * evicted, __x86_return_thunk will suffer Straight Line Speculation
++ * which will be contained safely by the INT3.
++ */
++SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+ ret
+ int3
+ SYM_CODE_END(__x86_return_thunk)
+
+-__EXPORT_THUNK(__x86_return_thunk)
++ /*
++ * Ensure the TEST decoding / BTB invalidation is complete.
++ */
++ lfence
++
++ /*
++ * Jump back and execute the RET in the middle of the TEST instruction.
++ * INT3 is for SLS protection.
++ */
++ jmp __x86_return_thunk
++ int3
++SYM_FUNC_END(zen_untrain_ret)
++__EXPORT_THUNK(zen_untrain_ret)
++
++EXPORT_SYMBOL(__x86_return_thunk)
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1082,7 +1082,7 @@ static void add_retpoline_call(struct ob
+ annotate_call_site(file, insn, false);
+ }
+
+-static void add_return_call(struct objtool_file *file, struct instruction *insn)
++static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
+ {
+ /*
+ * Return thunk tail calls are really just returns in disguise,
+@@ -1092,7 +1092,7 @@ static void add_return_call(struct objto
+ insn->retpoline_safe = true;
+
+ /* Skip the non-text sections, specially .discard ones */
+- if (insn->sec->text)
++ if (add && insn->sec->text)
+ list_add_tail(&insn->call_node, &file->return_thunk_list);
+ }
+
+@@ -1121,7 +1121,7 @@ static int add_jump_destinations(struct
+ add_retpoline_call(file, insn);
+ continue;
+ } else if (reloc->sym->return_thunk) {
+- add_return_call(file, insn);
++ add_return_call(file, insn, true);
+ continue;
+ } else if (insn->func) {
+ /* internal or external sibling call (with reloc) */
+@@ -1138,6 +1138,7 @@ static int add_jump_destinations(struct
+
+ insn->jump_dest = find_insn(file, dest_sec, dest_off);
+ if (!insn->jump_dest) {
++ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
+
+ /*
+ * This is a special case where an alt instruction
+@@ -1147,6 +1148,19 @@ static int add_jump_destinations(struct
+ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+ continue;
+
++ /*
++ * This is a special case for zen_untrain_ret().
++ * It jumps to __x86_return_thunk(), but objtool
++ * can't find the thunk's starting RET
++ * instruction, because the RET is also in the
++ * middle of another instruction. Objtool only
++ * knows about the outer instruction.
++ */
++ if (sym && sym->return_thunk) {
++ add_return_call(file, insn, false);
++ continue;
++ }
++
+ WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+ insn->sec, insn->offset, dest_sec->name,
+ dest_off);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:44 +0100
+Subject: x86: Add straight-line-speculation mitigation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit e463a09af2f0677b9485a7e8e4e70b396b2ffb6f upstream.
+
+Make use of an upcoming GCC feature to mitigate
+straight-line-speculation for x86:
+
+ https://gcc.gnu.org/g:53a643f8568067d7700a9f2facc8ba39974973d3
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102952
+ https://bugs.llvm.org/show_bug.cgi?id=52323
+
+It's built tested on x86_64-allyesconfig using GCC-12 and GCC-11.
+
+Maintenance overhead of this should be fairly low due to objtool
+validation.
+
+Size overhead of all these additional int3 instructions comes to:
+
+ text data bss dec hex filename
+ 22267751 6933356 2011368 31212475 1dc43bb defconfig-build/vmlinux
+ 22804126 6933356 1470696 31208178 1dc32f2 defconfig-build/vmlinux.sls
+
+Or roughly 2.4% additional text.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134908.140103474@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 5.10:
+ - In scripts/Makefile.build, add the objtool option with an ifdef
+ block, same as for other options
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig | 12 ++++++++++++
+ arch/x86/Makefile | 6 +++++-
+ arch/x86/include/asm/linkage.h | 10 ++++++++++
+ arch/x86/include/asm/static_call.h | 2 +-
+ arch/x86/kernel/ftrace.c | 2 +-
+ arch/x86/kernel/static_call.c | 5 +++--
+ arch/x86/lib/memmove_64.S | 2 +-
+ arch/x86/lib/retpoline.S | 2 +-
+ scripts/Makefile.build | 3 +++
+ scripts/link-vmlinux.sh | 3 +++
+ 10 files changed, 40 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -462,6 +462,18 @@ config RETPOLINE
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
++config CC_HAS_SLS
++ def_bool $(cc-option,-mharden-sls=all)
++
++config SLS
++ bool "Mitigate Straight-Line-Speculation"
++ depends on CC_HAS_SLS && X86_64
++ default n
++ help
++ Compile the kernel with straight-line-speculation options to guard
++ against straight line speculation. The kernel image might be slightly
++ larger.
++
+ config X86_CPU_RESCTRL
+ bool "x86 CPU resource control support"
+ depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -196,7 +196,11 @@ ifdef CONFIG_RETPOLINE
+ endif
+ endif
+
+-KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
++ifdef CONFIG_SLS
++ KBUILD_CFLAGS += -mharden-sls=all
++endif
++
++KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
+
+ ifdef CONFIG_X86_NEED_RELOCS
+ LDFLAGS_vmlinux := --emit-relocs --discard-none
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,9 +18,19 @@
+ #define __ALIGN_STR __stringify(__ALIGN)
+ #endif
+
++#ifdef CONFIG_SLS
++#define RET ret; int3
++#else
++#define RET ret
++#endif
++
+ #else /* __ASSEMBLY__ */
+
++#ifdef CONFIG_SLS
++#define ASM_RET "ret; int3\n\t"
++#else
+ #define ASM_RET "ret\n\t"
++#endif
+
+ #endif /* __ASSEMBLY__ */
+
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -35,7 +35,7 @@
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+- __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")
++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
+
+
+ #define ARCH_ADD_TRAMP_KEY(name) \
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -308,7 +308,7 @@ union ftrace_op_code_union {
+ } __attribute__((packed));
+ };
+
+-#define RET_SIZE 1
++#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
+
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -11,6 +11,8 @@ enum insn_type {
+ RET = 3, /* tramp / site cond-tail-call */
+ };
+
++static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
++
+ static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
+ {
+ int size = CALL_INSN_SIZE;
+@@ -30,8 +32,7 @@ static void __ref __static_call_transfor
+ break;
+
+ case RET:
+- code = text_gen_insn(RET_INSN_OPCODE, insn, func);
+- size = RET_INSN_SIZE;
++ code = &retinsn;
+ break;
+ }
+
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove)
+ /* FSRM implies ERMS => no length checks, do the copy directly */
+ .Lmemmove_begin_forward:
+ ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
+- ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; RET", X86_FEATURE_ERMS
++ ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
+
+ /*
+ * movsq instruction have many startup latency
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\re
+
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
+
+ .endm
+
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -230,6 +230,9 @@ endif
+ ifdef CONFIG_X86_SMAP
+ objtool_args += --uaccess
+ endif
++ifdef CONFIG_SLS
++ objtool_args += --sls
++endif
+
+ # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
+ # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -77,6 +77,9 @@ objtool_link()
+ if [ -n "${CONFIG_X86_SMAP}" ]; then
+ objtoolopt="${objtoolopt} --uaccess"
+ fi
++ if [ -n "${CONFIG_SLS}" ]; then
++ objtoolopt="${objtoolopt} --sls"
++ fi
+ info OBJTOOL ${1}
+ tools/objtool/objtool ${objtoolopt} ${1}
+ fi
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:45 +0200
+Subject: x86/alternative: Add debug prints to apply_retpolines()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d4b5a5c993009ffeb5febe3b701da3faab6adb96 upstream.
+
+Make sure we can see the text changes when booting with
+'debug-alternative'.
+
+Example output:
+
+ [ ] SMP alternatives: retpoline at: __traceiter_initcall_level+0x1f/0x30 (ffffffff8100066f) len: 5 to: __x86_indirect_thunk_rax+0x0/0x20
+ [ ] SMP alternatives: ffffffff82603e58: [2:5) optimized NOPs: ff d0 0f 1f 00
+ [ ] SMP alternatives: ffffffff8100066f: orig: e8 cc 30 00 01
+ [ ] SMP alternatives: ffffffff8100066f: repl: ff d0 0f 1f 00
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.422273830@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -647,9 +647,15 @@ void __init_or_module noinline apply_ret
+ continue;
+ }
+
++ DPRINTK("retpoline at: %pS (%px) len: %d to: %pS",
++ addr, addr, insn.length,
++ addr + insn.length + insn.immediate.value);
++
+ len = patch_retpoline(addr, &insn, bytes);
+ if (len == insn.length) {
+ optimize_nops(bytes, len);
++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ text_poke_early(addr, bytes, len);
+ }
+ }
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:43 +0200
+Subject: x86/alternative: Handle Jcc __x86_indirect_thunk_\reg
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2f0cbb2a8e5bbf101e9de118fc0eb168111a5e1e upstream.
+
+Handle the rare cases where the compiler (clang) does an indirect
+conditional tail-call using:
+
+ Jcc __x86_indirect_thunk_\reg
+
+For the !RETPOLINE case this can be rewritten to fit the original (6
+byte) instruction like:
+
+ Jncc.d8 1f
+ JMP *%\reg
+ NOP
+1:
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.296470217@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c | 40 ++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 36 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -548,7 +548,8 @@ static int emit_indirect(int op, int reg
+ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
+ {
+ retpoline_thunk_t *target;
+- int reg, i = 0;
++ int reg, ret, i = 0;
++ u8 op, cc;
+
+ target = addr + insn->length + insn->immediate.value;
+ reg = target - __x86_indirect_thunk_array;
+@@ -562,9 +563,36 @@ static int patch_retpoline(void *addr, s
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+ return -1;
+
+- i = emit_indirect(insn->opcode.bytes[0], reg, bytes);
+- if (i < 0)
+- return i;
++ op = insn->opcode.bytes[0];
++
++ /*
++ * Convert:
++ *
++ * Jcc.d32 __x86_indirect_thunk_\reg
++ *
++ * into:
++ *
++ * Jncc.d8 1f
++ * JMP *%\reg
++ * NOP
++ * 1:
++ */
++ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
++ if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
++ cc = insn->opcode.bytes[1] & 0xf;
++ cc ^= 1; /* invert condition */
++
++ bytes[i++] = 0x70 + cc; /* Jcc.d8 */
++ bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */
++
++ /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */
++ op = JMP32_INSN_OPCODE;
++ }
++
++ ret = emit_indirect(op, reg, bytes + i);
++ if (ret < 0)
++ return ret;
++ i += ret;
+
+ for (; i < insn->length;)
+ bytes[i++] = 0x90;
+@@ -598,6 +626,10 @@ void __init_or_module noinline apply_ret
+ case JMP32_INSN_OPCODE:
+ break;
+
++ case 0x0f: /* escape */
++ if (op2 >= 0x80 && op2 <= 0x8f)
++ break;
++ fallthrough;
+ default:
+ WARN_ON_ONCE(1);
+ continue;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:42 +0200
+Subject: x86/alternative: Implement .retpoline_sites support
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7508500900814d14e2e085cdc4e28142721abbdf upstream.
+
+Rewrite retpoline thunk call sites to be indirect calls for
+spectre_v2=off. This ensures spectre_v2=off is as near to a
+RETPOLINE=n build as possible.
+
+This is the replacement for objtool writing alternative entries to
+ensure the same and achieves feature-parity with the previous
+approach.
+
+One noteworthy feature is that it relies on the thunks to be in
+machine order to compute the register index.
+
+Specifically, this does not yet address the Jcc __x86_indirect_thunk_*
+calls generated by clang, a future patch will add this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.232495794@infradead.org
+[cascardo: small conflict fixup at arch/x86/kernel/module.c]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10:
+ - Use hex literal instead of BYTES_NOP1
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/um/kernel/um_arch.c | 4 +
+ arch/x86/include/asm/alternative.h | 1
+ arch/x86/kernel/alternative.c | 141 +++++++++++++++++++++++++++++++++++--
+ arch/x86/kernel/module.c | 9 ++
+ 4 files changed, 150 insertions(+), 5 deletions(-)
+
+--- a/arch/um/kernel/um_arch.c
++++ b/arch/um/kernel/um_arch.c
+@@ -358,6 +358,10 @@ void __init check_bugs(void)
+ os_check_bugs();
+ }
+
++void apply_retpolines(s32 *start, s32 *end)
++{
++}
++
+ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+ {
+ }
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -75,6 +75,7 @@ extern int alternatives_patched;
+
+ extern void alternative_instructions(void);
+ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
++extern void apply_retpolines(s32 *start, s32 *end);
+
+ struct module;
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -28,6 +28,7 @@
+ #include <asm/insn.h>
+ #include <asm/io.h>
+ #include <asm/fixmap.h>
++#include <asm/asm-prototypes.h>
+
+ int __read_mostly alternatives_patched;
+
+@@ -268,6 +269,7 @@ static void __init_or_module add_nops(vo
+ }
+ }
+
++extern s32 __retpoline_sites[], __retpoline_sites_end[];
+ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+ extern s32 __smp_locks[], __smp_locks_end[];
+ void text_poke_early(void *addr, const void *opcode, size_t len);
+@@ -376,7 +378,7 @@ static __always_inline int optimize_nops
+ * "noinline" to cause control flow change and thus invalidate I$ and
+ * cause refetch after modification.
+ */
+-static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
++static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
+ {
+ struct insn insn;
+ int i = 0;
+@@ -394,11 +396,11 @@ static void __init_or_module noinline op
+ * optimized.
+ */
+ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+- i += optimize_nops_range(instr, a->instrlen, i);
++ i += optimize_nops_range(instr, len, i);
+ else
+ i += insn.length;
+
+- if (i >= a->instrlen)
++ if (i >= len)
+ return;
+ }
+ }
+@@ -486,10 +488,135 @@ void __init_or_module noinline apply_alt
+ text_poke_early(instr, insn_buff, insn_buff_sz);
+
+ next:
+- optimize_nops(a, instr);
++ optimize_nops(instr, a->instrlen);
+ }
+ }
+
++#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
++
++/*
++ * CALL/JMP *%\reg
++ */
++static int emit_indirect(int op, int reg, u8 *bytes)
++{
++ int i = 0;
++ u8 modrm;
++
++ switch (op) {
++ case CALL_INSN_OPCODE:
++ modrm = 0x10; /* Reg = 2; CALL r/m */
++ break;
++
++ case JMP32_INSN_OPCODE:
++ modrm = 0x20; /* Reg = 4; JMP r/m */
++ break;
++
++ default:
++ WARN_ON_ONCE(1);
++ return -1;
++ }
++
++ if (reg >= 8) {
++ bytes[i++] = 0x41; /* REX.B prefix */
++ reg -= 8;
++ }
++
++ modrm |= 0xc0; /* Mod = 3 */
++ modrm += reg;
++
++ bytes[i++] = 0xff; /* opcode */
++ bytes[i++] = modrm;
++
++ return i;
++}
++
++/*
++ * Rewrite the compiler generated retpoline thunk calls.
++ *
++ * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
++ * indirect instructions, avoiding the extra indirection.
++ *
++ * For example, convert:
++ *
++ * CALL __x86_indirect_thunk_\reg
++ *
++ * into:
++ *
++ * CALL *%\reg
++ *
++ */
++static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
++{
++ retpoline_thunk_t *target;
++ int reg, i = 0;
++
++ target = addr + insn->length + insn->immediate.value;
++ reg = target - __x86_indirect_thunk_array;
++
++ if (WARN_ON_ONCE(reg & ~0xf))
++ return -1;
++
++ /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
++ BUG_ON(reg == 4);
++
++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
++ return -1;
++
++ i = emit_indirect(insn->opcode.bytes[0], reg, bytes);
++ if (i < 0)
++ return i;
++
++ for (; i < insn->length;)
++ bytes[i++] = 0x90;
++
++ return i;
++}
++
++/*
++ * Generated by 'objtool --retpoline'.
++ */
++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
++{
++ s32 *s;
++
++ for (s = start; s < end; s++) {
++ void *addr = (void *)s + *s;
++ struct insn insn;
++ int len, ret;
++ u8 bytes[16];
++ u8 op1, op2;
++
++ ret = insn_decode_kernel(&insn, addr);
++ if (WARN_ON_ONCE(ret < 0))
++ continue;
++
++ op1 = insn.opcode.bytes[0];
++ op2 = insn.opcode.bytes[1];
++
++ switch (op1) {
++ case CALL_INSN_OPCODE:
++ case JMP32_INSN_OPCODE:
++ break;
++
++ default:
++ WARN_ON_ONCE(1);
++ continue;
++ }
++
++ len = patch_retpoline(addr, &insn, bytes);
++ if (len == insn.length) {
++ optimize_nops(bytes, len);
++ text_poke_early(addr, bytes, len);
++ }
++ }
++}
++
++#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
++
++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
++
++#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
++
+ #ifdef CONFIG_SMP
+ static void alternatives_smp_lock(const s32 *start, const s32 *end,
+ u8 *text, u8 *text_end)
+@@ -774,6 +901,12 @@ void __init alternative_instructions(voi
+ * patching.
+ */
+
++ /*
++ * Rewrite the retpolines, must be done before alternatives since
++ * those can rewrite the retpoline thunks.
++ */
++ apply_retpolines(__retpoline_sites, __retpoline_sites_end);
++
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+
+ #ifdef CONFIG_SMP
+--- a/arch/x86/kernel/module.c
++++ b/arch/x86/kernel/module.c
+@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
+ struct module *me)
+ {
+ const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+- *para = NULL, *orc = NULL, *orc_ip = NULL;
++ *para = NULL, *orc = NULL, *orc_ip = NULL,
++ *retpolines = NULL;
+ char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr,
+ orc = s;
+ if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
+ orc_ip = s;
++ if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
++ retpolines = s;
+ }
+
++ if (retpolines) {
++ void *rseg = (void *)retpolines->sh_addr;
++ apply_retpolines(rseg, rseg + retpolines->sh_size);
++ }
+ if (alt) {
+ /* patch .altinstructions */
+ void *aseg = (void *)alt->sh_addr;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 11 Mar 2021 15:23:06 +0100
+Subject: x86/alternative: Merge include files
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 5e21a3ecad1500e35b46701e7f3f232e15d78e69 upstream.
+
+Merge arch/x86/include/asm/alternative-asm.h into
+arch/x86/include/asm/alternative.h in order to make it easier to use
+common definitions later.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210311142319.4723-2-jgross@suse.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_32.S | 2
+ arch/x86/entry/vdso/vdso32/system_call.S | 2
+ arch/x86/include/asm/alternative-asm.h | 114 -------------------------------
+ arch/x86/include/asm/alternative.h | 112 +++++++++++++++++++++++++++++-
+ arch/x86/include/asm/nospec-branch.h | 1
+ arch/x86/include/asm/smap.h | 5 -
+ arch/x86/lib/atomic64_386_32.S | 2
+ arch/x86/lib/atomic64_cx8_32.S | 2
+ arch/x86/lib/copy_page_64.S | 2
+ arch/x86/lib/copy_user_64.S | 2
+ arch/x86/lib/memcpy_64.S | 2
+ arch/x86/lib/memmove_64.S | 2
+ arch/x86/lib/memset_64.S | 2
+ arch/x86/lib/retpoline.S | 2
+ 14 files changed, 120 insertions(+), 132 deletions(-)
+ delete mode 100644 arch/x86/include/asm/alternative-asm.h
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -40,7 +40,7 @@
+ #include <asm/processor-flags.h>
+ #include <asm/irq_vectors.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
+ #include <asm/frame.h>
+--- a/arch/x86/entry/vdso/vdso32/system_call.S
++++ b/arch/x86/entry/vdso/vdso32/system_call.S
+@@ -6,7 +6,7 @@
+ #include <linux/linkage.h>
+ #include <asm/dwarf2.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+
+ .text
+ .globl __kernel_vsyscall
+--- a/arch/x86/include/asm/alternative-asm.h
++++ /dev/null
+@@ -1,114 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
+-#define _ASM_X86_ALTERNATIVE_ASM_H
+-
+-#ifdef __ASSEMBLY__
+-
+-#include <asm/asm.h>
+-
+-#ifdef CONFIG_SMP
+- .macro LOCK_PREFIX
+-672: lock
+- .pushsection .smp_locks,"a"
+- .balign 4
+- .long 672b - .
+- .popsection
+- .endm
+-#else
+- .macro LOCK_PREFIX
+- .endm
+-#endif
+-
+-/*
+- * objtool annotation to ignore the alternatives and only consider the original
+- * instruction(s).
+- */
+-.macro ANNOTATE_IGNORE_ALTERNATIVE
+- .Lannotate_\@:
+- .pushsection .discard.ignore_alts
+- .long .Lannotate_\@ - .
+- .popsection
+-.endm
+-
+-/*
+- * Issue one struct alt_instr descriptor entry (need to put it into
+- * the section .altinstructions, see below). This entry contains
+- * enough information for the alternatives patching code to patch an
+- * instruction. See apply_alternatives().
+- */
+-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+- .long \orig - .
+- .long \alt - .
+- .word \feature
+- .byte \orig_len
+- .byte \alt_len
+- .byte \pad_len
+-.endm
+-
+-/*
+- * Define an alternative between two instructions. If @feature is
+- * present, early code in apply_alternatives() replaces @oldinstr with
+- * @newinstr. ".skip" directive takes care of proper instruction padding
+- * in case @newinstr is longer than @oldinstr.
+- */
+-.macro ALTERNATIVE oldinstr, newinstr, feature
+-140:
+- \oldinstr
+-141:
+- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+-142:
+-
+- .pushsection .altinstructions,"a"
+- altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+- .popsection
+-
+- .pushsection .altinstr_replacement,"ax"
+-143:
+- \newinstr
+-144:
+- .popsection
+-.endm
+-
+-#define old_len 141b-140b
+-#define new_len1 144f-143f
+-#define new_len2 145f-144f
+-
+-/*
+- * gas compatible max based on the idea from:
+- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+- *
+- * The additional "-" is needed because gas uses a "true" value of -1.
+- */
+-#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+-
+-
+-/*
+- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+- * @feature2, it replaces @oldinstr with @feature2.
+- */
+-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+-140:
+- \oldinstr
+-141:
+- .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+- (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+-142:
+-
+- .pushsection .altinstructions,"a"
+- altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+- altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+- .popsection
+-
+- .pushsection .altinstr_replacement,"ax"
+-143:
+- \newinstr1
+-144:
+- \newinstr2
+-145:
+- .popsection
+-.endm
+-
+-#endif /* __ASSEMBLY__ */
+-
+-#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -2,13 +2,14 @@
+ #ifndef _ASM_X86_ALTERNATIVE_H
+ #define _ASM_X86_ALTERNATIVE_H
+
+-#ifndef __ASSEMBLY__
+-
+ #include <linux/types.h>
+-#include <linux/stddef.h>
+ #include <linux/stringify.h>
+ #include <asm/asm.h>
+
++#ifndef __ASSEMBLY__
++
++#include <linux/stddef.h>
++
+ /*
+ * Alternative inline assembly for SMP.
+ *
+@@ -271,6 +272,111 @@ static inline int alternatives_text_rese
+ */
+ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+
++#else /* __ASSEMBLY__ */
++
++#ifdef CONFIG_SMP
++ .macro LOCK_PREFIX
++672: lock
++ .pushsection .smp_locks,"a"
++ .balign 4
++ .long 672b - .
++ .popsection
++ .endm
++#else
++ .macro LOCK_PREFIX
++ .endm
++#endif
++
++/*
++ * objtool annotation to ignore the alternatives and only consider the original
++ * instruction(s).
++ */
++.macro ANNOTATE_IGNORE_ALTERNATIVE
++ .Lannotate_\@:
++ .pushsection .discard.ignore_alts
++ .long .Lannotate_\@ - .
++ .popsection
++.endm
++
++/*
++ * Issue one struct alt_instr descriptor entry (need to put it into
++ * the section .altinstructions, see below). This entry contains
++ * enough information for the alternatives patching code to patch an
++ * instruction. See apply_alternatives().
++ */
++.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
++ .long \orig - .
++ .long \alt - .
++ .word \feature
++ .byte \orig_len
++ .byte \alt_len
++ .byte \pad_len
++.endm
++
++/*
++ * Define an alternative between two instructions. If @feature is
++ * present, early code in apply_alternatives() replaces @oldinstr with
++ * @newinstr. ".skip" directive takes care of proper instruction padding
++ * in case @newinstr is longer than @oldinstr.
++ */
++.macro ALTERNATIVE oldinstr, newinstr, feature
++140:
++ \oldinstr
++141:
++ .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
++142:
++
++ .pushsection .altinstructions,"a"
++ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
++ .popsection
++
++ .pushsection .altinstr_replacement,"ax"
++143:
++ \newinstr
++144:
++ .popsection
++.endm
++
++#define old_len 141b-140b
++#define new_len1 144f-143f
++#define new_len2 145f-144f
++
++/*
++ * gas compatible max based on the idea from:
++ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
++ *
++ * The additional "-" is needed because gas uses a "true" value of -1.
++ */
++#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
++
++
++/*
++ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
++ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
++ * @feature2, it replaces @oldinstr with @feature2.
++ */
++.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
++140:
++ \oldinstr
++141:
++ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
++ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
++142:
++
++ .pushsection .altinstructions,"a"
++ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
++ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
++ .popsection
++
++ .pushsection .altinstr_replacement,"ax"
++143:
++ \newinstr1
++144:
++ \newinstr2
++145:
++ .popsection
++.endm
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _ASM_X86_ALTERNATIVE_H */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -7,7 +7,6 @@
+ #include <linux/objtool.h>
+
+ #include <asm/alternative.h>
+-#include <asm/alternative-asm.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/msr-index.h>
+ #include <asm/unwind_hints.h>
+--- a/arch/x86/include/asm/smap.h
++++ b/arch/x86/include/asm/smap.h
+@@ -11,6 +11,7 @@
+
+ #include <asm/nops.h>
+ #include <asm/cpufeatures.h>
++#include <asm/alternative.h>
+
+ /* "Raw" instruction opcodes */
+ #define __ASM_CLAC ".byte 0x0f,0x01,0xca"
+@@ -18,8 +19,6 @@
+
+ #ifdef __ASSEMBLY__
+
+-#include <asm/alternative-asm.h>
+-
+ #ifdef CONFIG_X86_SMAP
+
+ #define ASM_CLAC \
+@@ -37,8 +36,6 @@
+
+ #else /* __ASSEMBLY__ */
+
+-#include <asm/alternative.h>
+-
+ #ifdef CONFIG_X86_SMAP
+
+ static __always_inline void clac(void)
+--- a/arch/x86/lib/atomic64_386_32.S
++++ b/arch/x86/lib/atomic64_386_32.S
+@@ -6,7 +6,7 @@
+ */
+
+ #include <linux/linkage.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+
+ /* if you want SMP support, implement these with real spinlocks */
+ .macro LOCK reg
+--- a/arch/x86/lib/atomic64_cx8_32.S
++++ b/arch/x86/lib/atomic64_cx8_32.S
+@@ -6,7 +6,7 @@
+ */
+
+ #include <linux/linkage.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+
+ .macro read64 reg
+ movl %ebx, %eax
+--- a/arch/x86/lib/copy_page_64.S
++++ b/arch/x86/lib/copy_page_64.S
+@@ -3,7 +3,7 @@
+
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+
+ /*
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -11,7 +11,7 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/thread_info.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
+ #include <asm/export.h>
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -4,7 +4,7 @@
+ #include <linux/linkage.h>
+ #include <asm/errno.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+
+ .pushsection .noinstr.text, "ax"
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -8,7 +8,7 @@
+ */
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+
+ #undef memmove
+--- a/arch/x86/lib/memset_64.S
++++ b/arch/x86/lib/memset_64.S
+@@ -3,7 +3,7 @@
+
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+
+ /*
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -4,7 +4,7 @@
+ #include <linux/linkage.h>
+ #include <asm/dwarf2.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/alternative-asm.h>
++#include <asm/alternative.h>
+ #include <asm/export.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/unwind_hints.h>
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 1 Jun 2021 17:51:22 +0200
+Subject: x86/alternative: Optimize single-byte NOPs at an arbitrary position
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 2b31e8ed96b260ce2c22bd62ecbb9458399e3b62 upstream.
+
+Up until now the assumption was that an alternative patching site would
+have some instructions at the beginning and trailing single-byte NOPs
+(0x90) padding. Therefore, the patching machinery would go and optimize
+those single-byte NOPs into longer ones.
+
+However, this assumption is broken on 32-bit when code like
+hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
+killer CALL_NOSPEC. The 32-bit version of that macro would align certain
+insns to 16 bytes, leading to the compiler issuing a one or more
+single-byte NOPs, depending on the holes it needs to fill for alignment.
+
+That would lead to the warning in optimize_nops() to fire:
+
+ ------------[ cut here ]------------
+ Not a NOP at 0xc27fb598
+ WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13
+
+due to that function verifying whether all of the following bytes really
+are single-byte NOPs.
+
+Therefore, carve out the NOP padding into a separate function and call
+it for each NOP range beginning with a single-byte NOP.
+
+Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
+Reported-by: Richard Narron <richard@aaazen.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
+Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c | 64 ++++++++++++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -338,41 +338,69 @@ done:
+ }
+
+ /*
++ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
++ *
++ * @instr: instruction byte stream
++ * @instrlen: length of the above
++ * @off: offset within @instr where the first NOP has been detected
++ *
++ * Return: number of NOPs found (and replaced).
++ */
++static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
++{
++ unsigned long flags;
++ int i = off, nnops;
++
++ while (i < instrlen) {
++ if (instr[i] != 0x90)
++ break;
++
++ i++;
++ }
++
++ nnops = i - off;
++
++ if (nnops <= 1)
++ return nnops;
++
++ local_irq_save(flags);
++ add_nops(instr + off, nnops);
++ local_irq_restore(flags);
++
++ DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
++
++ return nnops;
++}
++
++/*
+ * "noinline" to cause control flow change and thus invalidate I$ and
+ * cause refetch after modification.
+ */
+ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
+ {
+- unsigned long flags;
+ struct insn insn;
+- int nop, i = 0;
++ int i = 0;
+
+ /*
+- * Jump over the non-NOP insns, the remaining bytes must be single-byte
+- * NOPs, optimize them.
++ * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
++ * ones.
+ */
+ for (;;) {
+ if (insn_decode_kernel(&insn, &instr[i]))
+ return;
+
++ /*
++ * See if this and any potentially following NOPs can be
++ * optimized.
++ */
+ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+- break;
++ i += optimize_nops_range(instr, a->instrlen, i);
++ else
++ i += insn.length;
+
+- if ((i += insn.length) >= a->instrlen)
++ if (i >= a->instrlen)
+ return;
+ }
+-
+- for (nop = i; i < a->instrlen; i++) {
+- if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+- return;
+- }
+-
+- local_irq_save(flags);
+- add_nops(instr + nop, i - nop);
+- local_irq_restore(flags);
+-
+- DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
+- instr, nop, a->instrlen);
+ }
+
+ /*
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:43 +0100
+Subject: x86/alternative: Relax text_poke_bp() constraint
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 26c44b776dba4ac692a0bf5a3836feb8a63fea6b upstream.
+
+Currently, text_poke_bp() is very strict to only allow patching a
+single instruction; however with straight-line-speculation it will be
+required to patch: ret; int3, which is two instructions.
+
+As such, relax the constraints a little to allow int3 padding for all
+instructions that do not imply the execution of the next instruction,
+ie: RET, JMP.d8 and JMP.d32.
+
+While there, rename the text_poke_loc::rel32 field to ::disp.
+
+Note: this fills up the text_poke_loc structure which is now a round
+ 16 bytes big.
+
+ [ bp: Put comments ontop instead of on the side. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134908.082342723@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c | 49 +++++++++++++++++++++++++++++-------------
+ 1 file changed, 34 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -1243,10 +1243,13 @@ void text_poke_sync(void)
+ }
+
+ struct text_poke_loc {
+- s32 rel_addr; /* addr := _stext + rel_addr */
+- s32 rel32;
++ /* addr := _stext + rel_addr */
++ s32 rel_addr;
++ s32 disp;
++ u8 len;
+ u8 opcode;
+ const u8 text[POKE_MAX_OPCODE_SIZE];
++ /* see text_poke_bp_batch() */
+ u8 old;
+ };
+
+@@ -1261,7 +1264,8 @@ static struct bp_patching_desc *bp_desc;
+ static __always_inline
+ struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
+ {
+- struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */
++ /* rcu_dereference */
++ struct bp_patching_desc *desc = __READ_ONCE(*descp);
+
+ if (!desc || !arch_atomic_inc_not_zero(&desc->refs))
+ return NULL;
+@@ -1295,7 +1299,7 @@ noinstr int poke_int3_handler(struct pt_
+ {
+ struct bp_patching_desc *desc;
+ struct text_poke_loc *tp;
+- int len, ret = 0;
++ int ret = 0;
+ void *ip;
+
+ if (user_mode(regs))
+@@ -1335,8 +1339,7 @@ noinstr int poke_int3_handler(struct pt_
+ goto out_put;
+ }
+
+- len = text_opcode_size(tp->opcode);
+- ip += len;
++ ip += tp->len;
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+@@ -1351,12 +1354,12 @@ noinstr int poke_int3_handler(struct pt_
+ break;
+
+ case CALL_INSN_OPCODE:
+- int3_emulate_call(regs, (long)ip + tp->rel32);
++ int3_emulate_call(regs, (long)ip + tp->disp);
+ break;
+
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+- int3_emulate_jmp(regs, (long)ip + tp->rel32);
++ int3_emulate_jmp(regs, (long)ip + tp->disp);
+ break;
+
+ default:
+@@ -1431,7 +1434,7 @@ static void text_poke_bp_batch(struct te
+ */
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
+ u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
+- int len = text_opcode_size(tp[i].opcode);
++ int len = tp[i].len;
+
+ if (len - INT3_INSN_SIZE > 0) {
+ memcpy(old + INT3_INSN_SIZE,
+@@ -1508,21 +1511,37 @@ static void text_poke_loc_init(struct te
+ const void *opcode, size_t len, const void *emulate)
+ {
+ struct insn insn;
+- int ret;
++ int ret, i;
+
+ memcpy((void *)tp->text, opcode, len);
+ if (!emulate)
+ emulate = opcode;
+
+ ret = insn_decode_kernel(&insn, emulate);
+-
+ BUG_ON(ret < 0);
+- BUG_ON(len != insn.length);
+
+ tp->rel_addr = addr - (void *)_stext;
++ tp->len = len;
+ tp->opcode = insn.opcode.bytes[0];
+
+ switch (tp->opcode) {
++ case RET_INSN_OPCODE:
++ case JMP32_INSN_OPCODE:
++ case JMP8_INSN_OPCODE:
++ /*
++ * Control flow instructions without implied execution of the
++ * next instruction can be padded with INT3.
++ */
++ for (i = insn.length; i < len; i++)
++ BUG_ON(tp->text[i] != INT3_INSN_OPCODE);
++ break;
++
++ default:
++ BUG_ON(len != insn.length);
++ };
++
++
++ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ case RET_INSN_OPCODE:
+ break;
+@@ -1530,7 +1549,7 @@ static void text_poke_loc_init(struct te
+ case CALL_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+- tp->rel32 = insn.immediate.value;
++ tp->disp = insn.immediate.value;
+ break;
+
+ default: /* assume NOP */
+@@ -1538,13 +1557,13 @@ static void text_poke_loc_init(struct te
+ case 2: /* NOP2 -- emulate as JMP8+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[len], len));
+ tp->opcode = JMP8_INSN_OPCODE;
+- tp->rel32 = 0;
++ tp->disp = 0;
+ break;
+
+ case 5: /* NOP5 -- emulate as JMP32+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+ tp->opcode = JMP32_INSN_OPCODE;
+- tp->rel32 = 0;
++ tp->disp = 0;
+ break;
+
+ default: /* unknown instruction */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 11 Mar 2021 15:23:11 +0100
+Subject: x86/alternative: Support ALTERNATIVE_TERNARY
+
+From: Juergen Gross <jgross@suse.com>
+
+commit e208b3c4a9748b2c17aa09ba663b5096ccf82dce upstream.
+
+Add ALTERNATIVE_TERNARY support for replacing an initial instruction
+with either of two instructions depending on a feature:
+
+ ALTERNATIVE_TERNARY "default_instr", FEATURE_NR,
+ "feature_on_instr", "feature_off_instr"
+
+which will start with "default_instr" and at patch time will,
+depending on FEATURE_NR being set or not, patch that with either
+"feature_on_instr" or "feature_off_instr".
+
+ [ bp: Add comment ontop. ]
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210311142319.4723-7-jgross@suse.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -179,6 +179,11 @@ static inline int alternatives_text_rese
+ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
+ ".popsection\n"
+
++/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
++#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
++ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
++ newinstr_yes, feature)
++
+ #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \
+ OLDINSTR_3(oldinsn, 1, 2, 3) \
+ ".pushsection .altinstructions,\"a\"\n" \
+@@ -210,6 +215,9 @@ static inline int alternatives_text_rese
+ #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+
++#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \
++ asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory")
++
+ /*
+ * Alternative inline assembly with input.
+ *
+@@ -380,6 +388,11 @@ static inline int alternatives_text_rese
+ .popsection
+ .endm
+
++/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
++#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
++ ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
++ newinstr_yes, feature
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _ASM_X86_ALTERNATIVE_H */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 11 Mar 2021 15:23:10 +0100
+Subject: x86/alternative: Support not-feature
+
+From: Juergen Gross <jgross@suse.com>
+
+commit dda7bb76484978316bb412a353789ebc5901de36 upstream.
+
+Add support for alternative patching for the case a feature is not
+present on the current CPU. For users of ALTERNATIVE() and friends, an
+inverted feature is specified by applying the ALT_NOT() macro to it,
+e.g.:
+
+ ALTERNATIVE(old, new, ALT_NOT(feature));
+
+Committer note:
+
+The decision to encode the NOT-bit in the feature bit itself is because
+a future change which would make objtool generate such alternative
+calls, would keep the code in objtool itself fairly simple.
+
+Also, this allows for the alternative macros to support the NOT feature
+without having to change them.
+
+Finally, the u16 cpuid member encoding the X86_FEATURE_ flags is not an
+ABI so if more bits are needed, cpuid itself can be enlarged or a flags
+field can be added to struct alt_instr after having considered the size
+growth in either cases.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210311142319.4723-6-jgross@suse.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h | 3 +++
+ arch/x86/kernel/alternative.c | 20 +++++++++++++++-----
+ 2 files changed, 18 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -6,6 +6,9 @@
+ #include <linux/stringify.h>
+ #include <asm/asm.h>
+
++#define ALTINSTR_FLAG_INV (1 << 15)
++#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
++
+ #ifndef __ASSEMBLY__
+
+ #include <linux/stddef.h>
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -388,21 +388,31 @@ void __init_or_module noinline apply_alt
+ */
+ for (a = start; a < end; a++) {
+ int insn_buff_sz = 0;
++ /* Mask away "NOT" flag bit for feature to test. */
++ u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
+
+ instr = (u8 *)&a->instr_offset + a->instr_offset;
+ replacement = (u8 *)&a->repl_offset + a->repl_offset;
+ BUG_ON(a->instrlen > sizeof(insn_buff));
+- BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
+- if (!boot_cpu_has(a->cpuid)) {
++ BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);
++
++ /*
++ * Patch if either:
++ * - feature is present
++ * - feature not present but ALTINSTR_FLAG_INV is set to mean,
++ * patch if feature is *NOT* present.
++ */
++ if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
+ if (a->padlen > 1)
+ optimize_nops(a, instr);
+
+ continue;
+ }
+
+- DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
+- a->cpuid >> 5,
+- a->cpuid & 0x1f,
++ DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
++ (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
++ feature >> 5,
++ feature & 0x1f,
+ instr, instr, a->instrlen,
+ replacement, a->replacementlen, a->padlen);
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:44 +0200
+Subject: x86/alternative: Try inline spectre_v2=retpoline,amd
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit bbe2df3f6b6da7848398d55b1311d58a16ec21e4 upstream.
+
+Try and replace retpoline thunk calls with:
+
+ LFENCE
+ CALL *%\reg
+
+for spectre_v2=retpoline,amd.
+
+Specifically, the sequence above is 5 bytes for the low 8 registers,
+but 6 bytes for the high 8 registers. This means that unless the
+compilers prefix stuff the call with higher registers this replacement
+will fail.
+
+Luckily GCC strongly favours RAX for the indirect calls and most (95%+
+for defconfig-x86_64) will be converted. OTOH clang strongly favours
+R11 and almost nothing gets converted.
+
+Note: it will also generate a correct replacement for the Jcc.d32
+case, except unless the compilers start to prefix stuff that, it'll
+never fit. Specifically:
+
+ Jncc.d8 1f
+ LFENCE
+ JMP *%\reg
+1:
+
+is 7-8 bytes long, where the original instruction in unpadded form is
+only 6 bytes.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.359986601@infradead.org
+[cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -544,6 +544,7 @@ static int emit_indirect(int op, int reg
+ *
+ * CALL *%\reg
+ *
++ * It also tries to inline spectre_v2=retpoline,amd when size permits.
+ */
+ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
+ {
+@@ -560,7 +561,8 @@ static int patch_retpoline(void *addr, s
+ /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
+ BUG_ON(reg == 4);
+
+- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
++ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
+ return -1;
+
+ op = insn->opcode.bytes[0];
+@@ -573,8 +575,9 @@ static int patch_retpoline(void *addr, s
+ * into:
+ *
+ * Jncc.d8 1f
++ * [ LFENCE ]
+ * JMP *%\reg
+- * NOP
++ * [ NOP ]
+ * 1:
+ */
+ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
+@@ -589,6 +592,15 @@ static int patch_retpoline(void *addr, s
+ op = JMP32_INSN_OPCODE;
+ }
+
++ /*
++ * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
++ */
++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
++ bytes[i++] = 0x0f;
++ bytes[i++] = 0xae;
++ bytes[i++] = 0xe8; /* LFENCE */
++ }
++
+ ret = emit_indirect(op, reg, bytes + i);
+ if (ret < 0)
+ return ret;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 11 Mar 2021 15:23:12 +0100
+Subject: x86/alternative: Use ALTERNATIVE_TERNARY() in _static_cpu_has()
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 2fe2a2c7a97c9bc32acc79154b75e754280f7867 upstream.
+
+_static_cpu_has() contains a completely open coded version of
+ALTERNATIVE_TERNARY(). Replace that with the macro instead.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210311142319.4723-8-jgross@suse.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeature.h | 41 ++++++++------------------------------
+ 1 file changed, 9 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -8,6 +8,7 @@
+
+ #include <asm/asm.h>
+ #include <linux/bitops.h>
++#include <asm/alternative.h>
+
+ enum cpuid_leafs
+ {
+@@ -172,39 +173,15 @@ extern void clear_cpu_cap(struct cpuinfo
+ */
+ static __always_inline bool _static_cpu_has(u16 bit)
+ {
+- asm_volatile_goto("1: jmp 6f\n"
+- "2:\n"
+- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+- "((5f-4f) - (2b-1b)),0x90\n"
+- "3:\n"
+- ".section .altinstructions,\"a\"\n"
+- " .long 1b - .\n" /* src offset */
+- " .long 4f - .\n" /* repl offset */
+- " .word %P[always]\n" /* always replace */
+- " .byte 3b - 1b\n" /* src len */
+- " .byte 5f - 4f\n" /* repl len */
+- " .byte 3b - 2b\n" /* pad len */
+- ".previous\n"
+- ".section .altinstr_replacement,\"ax\"\n"
+- "4: jmp %l[t_no]\n"
+- "5:\n"
+- ".previous\n"
+- ".section .altinstructions,\"a\"\n"
+- " .long 1b - .\n" /* src offset */
+- " .long 0\n" /* no replacement */
+- " .word %P[feature]\n" /* feature bit */
+- " .byte 3b - 1b\n" /* src len */
+- " .byte 0\n" /* repl len */
+- " .byte 0\n" /* pad len */
+- ".previous\n"
+- ".section .altinstr_aux,\"ax\"\n"
+- "6:\n"
+- " testb %[bitnum],%[cap_byte]\n"
+- " jnz %l[t_yes]\n"
+- " jmp %l[t_no]\n"
+- ".previous\n"
++ asm_volatile_goto(
++ ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
++ ".section .altinstr_aux,\"ax\"\n"
++ "6:\n"
++ " testb %[bitnum],%[cap_byte]\n"
++ " jnz %l[t_yes]\n"
++ " jmp %l[t_no]\n"
++ ".previous\n"
+ : : [feature] "i" (bit),
+- [always] "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 6 Nov 2020 19:37:25 +0100
+Subject: x86/alternative: Use insn_decode()
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 63c66cde7bbcc79aac14b25861c5b2495eede57b upstream.
+
+No functional changes, just simplification.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210304174237.31945-10-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -1284,15 +1284,15 @@ static void text_poke_loc_init(struct te
+ const void *opcode, size_t len, const void *emulate)
+ {
+ struct insn insn;
++ int ret;
+
+ memcpy((void *)tp->text, opcode, len);
+ if (!emulate)
+ emulate = opcode;
+
+- kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
+- insn_get_length(&insn);
++ ret = insn_decode(&insn, emulate, MAX_INSN_SIZE, INSN_MODE_KERN);
+
+- BUG_ON(!insn_complete(&insn));
++ BUG_ON(ret < 0);
+ BUG_ON(len != insn.length);
+
+ tp->rel_addr = addr - (void *)_stext;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:01 +0100
+Subject: x86/alternatives: Optimize optimize_nops()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 23c1ad538f4f371bdb67d8a112314842d5db7e5a upstream.
+
+Currently, optimize_nops() scans to see if the alternative starts with
+NOPs. However, the emit pattern is:
+
+ 141: \oldinstr
+ 142: .skip (len-(142b-141b)), 0x90
+
+That is, when 'oldinstr' is short, the tail is padded with NOPs. This case
+never gets optimized.
+
+Rewrite optimize_nops() to replace any trailing string of NOPs inside
+the alternative to larger NOPs. Also run it irrespective of patching,
+replacing NOPs in both the original and replaced code.
+
+A direct consequence is that 'padlen' becomes superfluous, so remove it.
+
+ [ bp:
+ - Adjust commit message
+ - remove a stale comment about needing to pad
+ - add a comment in optimize_nops()
+ - exit early if the NOP verif. loop catches a mismatch - function
+ should not not add NOPs in that case
+ - fix the "optimized NOPs" offsets output ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lkml.kernel.org/r/20210326151259.442992235@infradead.org
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h | 17 ++-------
+ arch/x86/kernel/alternative.c | 49 ++++++++++++++++----------
+ tools/objtool/arch/x86/include/arch_special.h | 2 -
+ 3 files changed, 37 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -65,7 +65,6 @@ struct alt_instr {
+ u16 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction */
+- u8 padlen; /* length of build-time padding */
+ } __packed;
+
+ /*
+@@ -104,7 +103,6 @@ static inline int alternatives_text_rese
+
+ #define alt_end_marker "663"
+ #define alt_slen "662b-661b"
+-#define alt_pad_len alt_end_marker"b-662b"
+ #define alt_total_slen alt_end_marker"b-661b"
+ #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
+
+@@ -151,8 +149,7 @@ static inline int alternatives_text_rese
+ " .long " b_replacement(num)"f - .\n" /* new instruction */ \
+ " .word " __stringify(feature) "\n" /* feature bit */ \
+ " .byte " alt_total_slen "\n" /* source len */ \
+- " .byte " alt_rlen(num) "\n" /* replacement len */ \
+- " .byte " alt_pad_len "\n" /* pad len */
++ " .byte " alt_rlen(num) "\n" /* replacement len */
+
+ #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
+ "# ALT: replacement " #num "\n" \
+@@ -224,9 +221,6 @@ static inline int alternatives_text_rese
+ * Peculiarities:
+ * No memory clobber here.
+ * Argument numbers start with 1.
+- * Best is to use constraints that are fixed size (like (%1) ... "r")
+- * If you use variable sized constraints like "m" or "g" in the
+- * replacement make sure to pad to the worst case length.
+ * Leaving an unused argument 0 to keep API compatibility.
+ */
+ #define alternative_input(oldinstr, newinstr, feature, input...) \
+@@ -315,13 +309,12 @@ static inline int alternatives_text_rese
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
++.macro altinstruction_entry orig alt feature orig_len alt_len
+ .long \orig - .
+ .long \alt - .
+ .word \feature
+ .byte \orig_len
+ .byte \alt_len
+- .byte \pad_len
+ .endm
+
+ /*
+@@ -338,7 +331,7 @@ static inline int alternatives_text_rese
+ 142:
+
+ .pushsection .altinstructions,"a"
+- altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
++ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+@@ -375,8 +368,8 @@ static inline int alternatives_text_rese
+ 142:
+
+ .pushsection .altinstructions,"a"
+- altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+- altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
++ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
++ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -344,19 +344,35 @@ done:
+ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
+ {
+ unsigned long flags;
+- int i;
++ struct insn insn;
++ int nop, i = 0;
+
+- for (i = 0; i < a->padlen; i++) {
+- if (instr[i] != 0x90)
++ /*
++ * Jump over the non-NOP insns, the remaining bytes must be single-byte
++ * NOPs, optimize them.
++ */
++ for (;;) {
++ if (insn_decode_kernel(&insn, &instr[i]))
++ return;
++
++ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
++ break;
++
++ if ((i += insn.length) >= a->instrlen)
++ return;
++ }
++
++ for (nop = i; i < a->instrlen; i++) {
++ if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+ return;
+ }
+
+ local_irq_save(flags);
+- add_nops(instr + (a->instrlen - a->padlen), a->padlen);
++ add_nops(instr + nop, i - nop);
+ local_irq_restore(flags);
+
+ DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
+- instr, a->instrlen - a->padlen, a->padlen);
++ instr, nop, a->instrlen);
+ }
+
+ /*
+@@ -402,19 +418,15 @@ void __init_or_module noinline apply_alt
+ * - feature not present but ALTINSTR_FLAG_INV is set to mean,
+ * patch if feature is *NOT* present.
+ */
+- if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
+- if (a->padlen > 1)
+- optimize_nops(a, instr);
+-
+- continue;
+- }
++ if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
++ goto next;
+
+- DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
++ DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
+ (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
+ feature >> 5,
+ feature & 0x1f,
+ instr, instr, a->instrlen,
+- replacement, a->replacementlen, a->padlen);
++ replacement, a->replacementlen);
+
+ DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
+ DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
+@@ -438,14 +450,15 @@ void __init_or_module noinline apply_alt
+ if (a->replacementlen && is_jmp(replacement[0]))
+ recompute_jump(a, instr, replacement, insn_buff);
+
+- if (a->instrlen > a->replacementlen) {
+- add_nops(insn_buff + a->replacementlen,
+- a->instrlen - a->replacementlen);
+- insn_buff_sz += a->instrlen - a->replacementlen;
+- }
++ for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
++ insn_buff[insn_buff_sz] = 0x90;
++
+ DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
+
+ text_poke_early(instr, insn_buff, insn_buff_sz);
++
++next:
++ optimize_nops(a, instr);
+ }
+ }
+
+--- a/tools/objtool/arch/x86/include/arch_special.h
++++ b/tools/objtool/arch/x86/include/arch_special.h
+@@ -10,7 +10,7 @@
+ #define JUMP_ORIG_OFFSET 0
+ #define JUMP_NEW_OFFSET 4
+
+-#define ALT_ENTRY_SIZE 13
++#define ALT_ENTRY_SIZE 12
+ #define ALT_ORIG_OFFSET 0
+ #define ALT_NEW_OFFSET 4
+ #define ALT_FEATURE_OFFSET 8
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:38 +0200
+Subject: x86/asm: Fix register order
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a92ede2d584a2e070def59c7e47e6b6f6341c55c upstream.
+
+Ensure the register order is correct; this allows for easy translation
+between register number and trampoline and vice-versa.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.978573921@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/GEN-for-each-reg.h | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/GEN-for-each-reg.h
++++ b/arch/x86/include/asm/GEN-for-each-reg.h
+@@ -1,11 +1,16 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are in machine order; things rely on that.
++ */
+ #ifdef CONFIG_64BIT
+ GEN(rax)
+-GEN(rbx)
+ GEN(rcx)
+ GEN(rdx)
++GEN(rbx)
++GEN(rsp)
++GEN(rbp)
+ GEN(rsi)
+ GEN(rdi)
+-GEN(rbp)
+ GEN(r8)
+ GEN(r9)
+ GEN(r10)
+@@ -16,10 +21,11 @@ GEN(r14)
+ GEN(r15)
+ #else
+ GEN(eax)
+-GEN(ebx)
+ GEN(ecx)
+ GEN(edx)
++GEN(ebx)
++GEN(esp)
++GEN(ebp)
+ GEN(esi)
+ GEN(edi)
+-GEN(ebp)
+ #endif
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:39 +0200
+Subject: x86/asm: Fixup odd GEN-for-each-reg.h usage
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b6d3d9944bd7c9e8c06994ead3c9952f673f2a66 upstream.
+
+Currently GEN-for-each-reg.h usage leaves GEN defined, relying on any
+subsequent usage to start with #undef, which is rude.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.041792350@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h | 2 +-
+ arch/x86/lib/retpoline.S | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -19,9 +19,9 @@ extern void cmpxchg8b_emu(void);
+
+ #ifdef CONFIG_RETPOLINE
+
+-#undef GEN
+ #define GEN(reg) \
+ extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+ #include <asm/GEN-for-each-reg.h>
++#undef GEN
+
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -55,10 +55,10 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+ #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+
+-#undef GEN
+ #define GEN(reg) THUNK reg
+ #include <asm/GEN-for-each-reg.h>
+-
+ #undef GEN
++
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
++#undef GEN
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:41 +0200
+Subject: x86/bpf: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d77cfe594ad50e0bf95d457e02ccd578791b2a15 upstream.
+
+Use the return thunk in eBPF generated code, if needed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: add the necessary cnt variable to emit_return()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -399,6 +399,22 @@ static void emit_indirect_jump(u8 **ppro
+ *pprog = prog;
+ }
+
++static void emit_return(u8 **pprog, u8 *ip)
++{
++ u8 *prog = *pprog;
++ int cnt = 0;
++
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
++ emit_jump(&prog, &__x86_return_thunk, ip);
++ } else {
++ EMIT1(0xC3); /* ret */
++ if (IS_ENABLED(CONFIG_SLS))
++ EMIT1(0xCC); /* int3 */
++ }
++
++ *pprog = prog;
++}
++
+ /*
+ * Generate the following code:
+ *
+@@ -1443,7 +1459,7 @@ emit_jmp:
+ ctx->cleanup_addr = proglen;
+ pop_callee_regs(&prog, callee_regs_used);
+ EMIT1(0xC9); /* leave */
+- EMIT1(0xC3); /* ret */
++ emit_return(&prog, image + addrs[i - 1] + (prog - temp));
+ break;
+
+ default:
+@@ -1884,7 +1900,7 @@ int arch_prepare_bpf_trampoline(struct b
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip our return address and return to parent */
+ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+- EMIT1(0xC3); /* ret */
++ emit_return(&prog, prog);
+ /* Make sure the trampoline generation logic doesn't overflow */
+ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
+ ret = -EFAULT;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+Date: Tue, 14 Jun 2022 23:15:50 +0200
+Subject: x86/bugs: Add AMD retbleed= boot parameter
+
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+
+commit 7fbf47c7ce50b38a64576b150e7011ae73d54669 upstream.
+
+Add the "retbleed=<value>" boot parameter to select a mitigation for
+RETBleed. Possible values are "off", "auto" and "unret"
+(JMP2RET mitigation). The default value is "auto".
+
+Currently, "retbleed=auto" will select the unret mitigation on
+AMD and Hygon and no mitigation on Intel (JMP2RET is not effective on
+Intel).
+
+ [peterz: rebase; add hygon]
+ [jpoimboe: cleanups]
+
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 15 +++
+ arch/x86/Kconfig | 3
+ arch/x86/kernel/cpu/bugs.c | 108 +++++++++++++++++++++++-
+ 3 files changed, 125 insertions(+), 1 deletion(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4656,6 +4656,21 @@
+
+ retain_initrd [RAM] Keep initrd memory after extraction
+
++ retbleed= [X86] Control mitigation of RETBleed (Arbitrary
++ Speculative Code Execution with Return Instructions)
++ vulnerability.
++
++ off - unconditionally disable
++ auto - automatically select a migitation
++ unret - force enable untrained return thunks,
++ only effective on AMD Zen {1,2}
++ based systems.
++
++ Selecting 'auto' will choose a mitigation method at run
++ time according to the CPU.
++
++ Not specifying this option is equivalent to retbleed=auto.
++
+ rfkill.default_state=
+ 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
+ etc. communication is blocked by default.
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -465,6 +465,9 @@ config RETPOLINE
+ config CC_HAS_SLS
+ def_bool $(cc-option,-mharden-sls=all)
+
++config CC_HAS_RETURN_THUNK
++ def_bool $(cc-option,-mfunction-return=thunk-extern)
++
+ config SLS
+ bool "Mitigate Straight-Line-Speculation"
+ depends on CC_HAS_SLS && X86_64
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -37,6 +37,7 @@
+ #include "cpu.h"
+
+ static void __init spectre_v1_select_mitigation(void);
++static void __init retbleed_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+@@ -112,6 +113,12 @@ void __init check_bugs(void)
+
+ /* Select the proper CPU mitigations before patching alternatives: */
+ spectre_v1_select_mitigation();
++ retbleed_select_mitigation();
++ /*
++ * spectre_v2_select_mitigation() relies on the state set by
++ * retbleed_select_mitigation(); specifically the STIBP selection is
++ * forced for UNRET.
++ */
+ spectre_v2_select_mitigation();
+ ssb_select_mitigation();
+ l1tf_select_mitigation();
+@@ -709,6 +716,100 @@ static int __init nospectre_v1_cmdline(c
+ early_param("nospectre_v1", nospectre_v1_cmdline);
+
+ #undef pr_fmt
++#define pr_fmt(fmt) "RETBleed: " fmt
++
++enum retbleed_mitigation {
++ RETBLEED_MITIGATION_NONE,
++ RETBLEED_MITIGATION_UNRET,
++};
++
++enum retbleed_mitigation_cmd {
++ RETBLEED_CMD_OFF,
++ RETBLEED_CMD_AUTO,
++ RETBLEED_CMD_UNRET,
++};
++
++const char * const retbleed_strings[] = {
++ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
++ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
++};
++
++static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
++ RETBLEED_MITIGATION_NONE;
++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
++ RETBLEED_CMD_AUTO;
++
++static int __init retbleed_parse_cmdline(char *str)
++{
++ if (!str)
++ return -EINVAL;
++
++ if (!strcmp(str, "off"))
++ retbleed_cmd = RETBLEED_CMD_OFF;
++ else if (!strcmp(str, "auto"))
++ retbleed_cmd = RETBLEED_CMD_AUTO;
++ else if (!strcmp(str, "unret"))
++ retbleed_cmd = RETBLEED_CMD_UNRET;
++ else
++ pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
++
++ return 0;
++}
++early_param("retbleed", retbleed_parse_cmdline);
++
++#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++
++static void __init retbleed_select_mitigation(void)
++{
++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
++ return;
++
++ switch (retbleed_cmd) {
++ case RETBLEED_CMD_OFF:
++ return;
++
++ case RETBLEED_CMD_UNRET:
++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ break;
++
++ case RETBLEED_CMD_AUTO:
++ default:
++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED))
++ break;
++
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ break;
++ }
++
++ switch (retbleed_mitigation) {
++ case RETBLEED_MITIGATION_UNRET:
++
++ if (!IS_ENABLED(CONFIG_RETPOLINE) ||
++ !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
++ pr_err(RETBLEED_COMPILER_MSG);
++ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
++ break;
++ }
++
++ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
++ setup_force_cpu_cap(X86_FEATURE_UNRET);
++
++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
++ pr_err(RETBLEED_UNTRAIN_MSG);
++ break;
++
++ default:
++ break;
++ }
++
++ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
++}
++
++#undef pr_fmt
+ #define pr_fmt(fmt) "Spectre V2 : " fmt
+
+ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+@@ -1919,7 +2020,12 @@ static ssize_t srbds_show_state(char *bu
+
+ static ssize_t retbleed_show_state(char *buf)
+ {
+- return sprintf(buf, "Vulnerable\n");
++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET &&
++ (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON))
++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++
++ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+ }
+
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 6 Jul 2022 15:01:15 -0700
+Subject: x86/bugs: Add Cannon lake to RETBleed affected CPU list
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit f54d45372c6ac9c993451de5e51312485f7d10bc upstream.
+
+Cannon lake is also affected by RETBleed, add it to the list.
+
+Fixes: 6ad0ad2bf8a6 ("x86/bugs: Report Intel retbleed vulnerability")
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1129,6 +1129,7 @@ static const struct x86_cpu_id cpu_vuln_
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:02 +0200
+Subject: x86/bugs: Add retbleed=ibpb
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3ebc170068885b6fc7bedda6c667bb2c4d533159 upstream.
+
+jmp2ret mitigates the easy-to-attack case at relatively low overhead.
+It mitigates the long speculation windows after a mispredicted RET, but
+it does not mitigate the short speculation window from arbitrary
+instruction boundaries.
+
+On Zen2, there is a chicken bit which needs setting, which mitigates
+"arbitrary instruction boundaries" down to just "basic block boundaries".
+
+But there is no fix for the short speculation window on basic block
+boundaries, other than to flush the entire BTB to evict all attacker
+predictions.
+
+On the spectrum of "fast & blurry" -> "safe", there is (on top of STIBP
+or no-SMT):
+
+ 1) Nothing System wide open
+ 2) jmp2ret May stop a script kiddy
+ 3) jmp2ret+chickenbit Raises the bar rather further
+ 4) IBPB Only thing which can count as "safe".
+
+Tentative numbers put IBPB-on-entry at a 2.5x hit on Zen2, and a 10x hit
+on Zen1 according to lmbench.
+
+ [ bp: Fixup feature bit comments, document option, 32-bit build fix. ]
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 3 +
+ arch/x86/entry/Makefile | 2 -
+ arch/x86/entry/entry.S | 22 ++++++++++++
+ arch/x86/include/asm/cpufeatures.h | 2 -
+ arch/x86/include/asm/nospec-branch.h | 8 +++-
+ arch/x86/kernel/cpu/bugs.c | 43 ++++++++++++++++++------
+ 6 files changed, 67 insertions(+), 13 deletions(-)
+ create mode 100644 arch/x86/entry/entry.S
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4666,6 +4666,9 @@
+ disabling SMT if necessary for
+ the full mitigation (only on Zen1
+ and older without STIBP).
++ ibpb - mitigate short speculation windows on
++ basic block boundaries too. Safe, highest
++ perf impact.
+ unret - force enable untrained return thunks,
+ only effective on AMD f15h-f17h
+ based systems.
+--- a/arch/x86/entry/Makefile
++++ b/arch/x86/entry/Makefile
+@@ -21,7 +21,7 @@ CFLAGS_syscall_64.o += $(call cc-option
+ CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,)
+ CFLAGS_syscall_x32.o += $(call cc-option,-Wno-override-init,)
+
+-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
++obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+ obj-y += common.o
+
+ obj-y += vdso/
+--- /dev/null
++++ b/arch/x86/entry/entry.S
+@@ -0,0 +1,22 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Common place for both 32- and 64-bit entry routines.
++ */
++
++#include <linux/linkage.h>
++#include <asm/export.h>
++#include <asm/msr-index.h>
++
++.pushsection .noinstr.text, "ax"
++
++SYM_FUNC_START(entry_ibpb)
++ movl $MSR_IA32_PRED_CMD, %ecx
++ movl $PRED_CMD_IBPB, %eax
++ xorl %edx, %edx
++ wrmsr
++ RET
++SYM_FUNC_END(entry_ibpb)
++/* For KVM */
++EXPORT_SYMBOL_GPL(entry_ibpb);
++
++.popsection
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -292,7 +292,7 @@
+ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+ /* FREE! (11*32+ 8) */
+ /* FREE! (11*32+ 9) */
+-/* FREE! (11*32+10) */
++#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+ /* FREE! (11*32+11) */
+ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -123,14 +123,17 @@
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+- * Doesn't clobber any registers but does require a stable stack.
++ * While zen_untrain_ret() doesn't clobber anything but requires stack,
++ * entry_ibpb() will clobber AX, CX, DX.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+ .macro UNTRAIN_RET
+ #ifdef CONFIG_RETPOLINE
+- ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
++ ALTERNATIVE_2 "", \
++ "call zen_untrain_ret", X86_FEATURE_UNRET, \
++ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ #endif
+ .endm
+
+@@ -144,6 +147,7 @@
+
+ extern void __x86_return_thunk(void);
+ extern void zen_untrain_ret(void);
++extern void entry_ibpb(void);
+
+ #ifdef CONFIG_RETPOLINE
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -761,6 +761,7 @@ static enum spectre_v2_mitigation spectr
+ enum retbleed_mitigation {
+ RETBLEED_MITIGATION_NONE,
+ RETBLEED_MITIGATION_UNRET,
++ RETBLEED_MITIGATION_IBPB,
+ RETBLEED_MITIGATION_IBRS,
+ RETBLEED_MITIGATION_EIBRS,
+ };
+@@ -769,11 +770,13 @@ enum retbleed_mitigation_cmd {
+ RETBLEED_CMD_OFF,
+ RETBLEED_CMD_AUTO,
+ RETBLEED_CMD_UNRET,
++ RETBLEED_CMD_IBPB,
+ };
+
+ const char * const retbleed_strings[] = {
+ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
+ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
++ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB",
+ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
+ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
+ };
+@@ -803,6 +806,8 @@ static int __init retbleed_parse_cmdline
+ retbleed_cmd = RETBLEED_CMD_AUTO;
+ } else if (!strcmp(str, "unret")) {
+ retbleed_cmd = RETBLEED_CMD_UNRET;
++ } else if (!strcmp(str, "ibpb")) {
++ retbleed_cmd = RETBLEED_CMD_IBPB;
+ } else if (!strcmp(str, "nosmt")) {
+ retbleed_nosmt = true;
+ } else {
+@@ -817,11 +822,13 @@ static int __init retbleed_parse_cmdline
+ early_param("retbleed", retbleed_parse_cmdline);
+
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n"
+ #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+
+ static void __init retbleed_select_mitigation(void)
+ {
++ bool mitigate_smt = false;
++
+ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
+ return;
+
+@@ -833,11 +840,21 @@ static void __init retbleed_select_mitig
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+ break;
+
++ case RETBLEED_CMD_IBPB:
++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++ break;
++
+ case RETBLEED_CMD_AUTO:
+ default:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+- retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
++
++ if (IS_ENABLED(CONFIG_RETPOLINE) &&
++ IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK))
++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ else
++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++ }
+
+ /*
+ * The Intel mitigation (IBRS) was already selected in
+@@ -853,26 +870,34 @@ static void __init retbleed_select_mitig
+ if (!IS_ENABLED(CONFIG_RETPOLINE) ||
+ !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
+ pr_err(RETBLEED_COMPILER_MSG);
+- retbleed_mitigation = RETBLEED_MITIGATION_NONE;
+- break;
++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++ goto retbleed_force_ibpb;
+ }
+
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+- if (!boot_cpu_has(X86_FEATURE_STIBP) &&
+- (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
+- cpu_smt_disable(false);
+-
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ pr_err(RETBLEED_UNTRAIN_MSG);
++
++ mitigate_smt = true;
++ break;
++
++ case RETBLEED_MITIGATION_IBPB:
++retbleed_force_ibpb:
++ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
++ mitigate_smt = true;
+ break;
+
+ default:
+ break;
+ }
+
++ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
++ (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
++ cpu_smt_disable(false);
++
+ /*
+ * Let IBRS trump all on Intel without affecting the effects of the
+ * retbleed= cmdline option.
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 15:07:19 -0700
+Subject: x86/bugs: Do IBPB fallback check only once
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 0fe4aeea9c01baabecc8c3afc7889c809d939bc2 upstream.
+
+When booting with retbleed=auto, if the kernel wasn't built with
+CONFIG_CC_HAS_RETURN_THUNK, the mitigation falls back to IBPB. Make
+sure a warning is printed in that case. The IBPB fallback check is done
+twice, but it really only needs to be done once.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -847,18 +847,13 @@ static void __init retbleed_select_mitig
+ case RETBLEED_CMD_AUTO:
+ default:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+-
+- if (IS_ENABLED(CONFIG_RETPOLINE) &&
+- IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK))
+- retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+- else
+- retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+- }
++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+
+ /*
+- * The Intel mitigation (IBRS) was already selected in
+- * spectre_v2_select_mitigation().
++ * The Intel mitigation (IBRS or eIBRS) was already selected in
++ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will
++ * be set accordingly below.
+ */
+
+ break;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Thu, 7 Jul 2022 13:41:52 -0300
+Subject: x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+commit 2259da159fbe5dba8ac00b560cf00b6a6537fa18 upstream.
+
+There are some VM configurations which have Skylake model but do not
+support IBPB. In those cases, when using retbleed=ibpb, userspace is going
+to be killed and kernel is going to panic.
+
+If the CPU does not support IBPB, warn and proceed with the auto option. Also,
+do not fallback to IBPB on AMD/Hygon systems if it is not supported.
+
+Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb")
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -821,7 +821,10 @@ static void __init retbleed_select_mitig
+ break;
+
+ case RETBLEED_CMD_IBPB:
+- if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++ if (!boot_cpu_has(X86_FEATURE_IBPB)) {
++ pr_err("WARNING: CPU does not support IBPB.\n");
++ goto do_cmd_auto;
++ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+@@ -836,7 +839,7 @@ do_cmd_auto:
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+- else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY))
++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ }
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Tue, 14 Jun 2022 23:15:51 +0200
+Subject: x86/bugs: Enable STIBP for JMP2RET
+
+From: Kim Phillips <kim.phillips@amd.com>
+
+commit e8ec1b6e08a2102d8755ccb06fa26d540f26a2fa upstream.
+
+For untrained return thunks to be fully effective, STIBP must be enabled
+or SMT disabled.
+
+Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 16 ++++--
+ arch/x86/kernel/cpu/bugs.c | 58 +++++++++++++++++++-----
+ 2 files changed, 57 insertions(+), 17 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4660,11 +4660,17 @@
+ Speculative Code Execution with Return Instructions)
+ vulnerability.
+
+- off - unconditionally disable
+- auto - automatically select a migitation
+- unret - force enable untrained return thunks,
+- only effective on AMD Zen {1,2}
+- based systems.
++ off - no mitigation
++ auto - automatically select a migitation
++ auto,nosmt - automatically select a mitigation,
++ disabling SMT if necessary for
++ the full mitigation (only on Zen1
++ and older without STIBP).
++ unret - force enable untrained return thunks,
++ only effective on AMD f15h-f17h
++ based systems.
++ unret,nosmt - like unret, will disable SMT when STIBP
++ is not available.
+
+ Selecting 'auto' will choose a mitigation method at run
+ time according to the CPU.
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -739,19 +739,34 @@ static enum retbleed_mitigation retbleed
+ static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
+ RETBLEED_CMD_AUTO;
+
++static int __ro_after_init retbleed_nosmt = false;
++
+ static int __init retbleed_parse_cmdline(char *str)
+ {
+ if (!str)
+ return -EINVAL;
+
+- if (!strcmp(str, "off"))
+- retbleed_cmd = RETBLEED_CMD_OFF;
+- else if (!strcmp(str, "auto"))
+- retbleed_cmd = RETBLEED_CMD_AUTO;
+- else if (!strcmp(str, "unret"))
+- retbleed_cmd = RETBLEED_CMD_UNRET;
+- else
+- pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
++ while (str) {
++ char *next = strchr(str, ',');
++ if (next) {
++ *next = 0;
++ next++;
++ }
++
++ if (!strcmp(str, "off")) {
++ retbleed_cmd = RETBLEED_CMD_OFF;
++ } else if (!strcmp(str, "auto")) {
++ retbleed_cmd = RETBLEED_CMD_AUTO;
++ } else if (!strcmp(str, "unret")) {
++ retbleed_cmd = RETBLEED_CMD_UNRET;
++ } else if (!strcmp(str, "nosmt")) {
++ retbleed_nosmt = true;
++ } else {
++ pr_err("Ignoring unknown retbleed option (%s).", str);
++ }
++
++ str = next;
++ }
+
+ return 0;
+ }
+@@ -797,6 +812,10 @@ static void __init retbleed_select_mitig
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
++ if (!boot_cpu_has(X86_FEATURE_STIBP) &&
++ (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
++ cpu_smt_disable(false);
++
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ pr_err(RETBLEED_UNTRAIN_MSG);
+@@ -1043,6 +1062,13 @@ spectre_v2_user_select_mitigation(enum s
+ boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
++ if (mode != SPECTRE_V2_USER_STRICT &&
++ mode != SPECTRE_V2_USER_STRICT_PREFERRED)
++ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n");
++ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
++ }
++
+ spectre_v2_user_stibp = mode;
+
+ set_mode:
+@@ -2020,10 +2046,18 @@ static ssize_t srbds_show_state(char *bu
+
+ static ssize_t retbleed_show_state(char *buf)
+ {
+- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET &&
+- (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON))
+- return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
++
++ return sprintf(buf, "%s; SMT %s\n",
++ retbleed_strings[retbleed_mitigation],
++ !sched_smt_active() ? "disabled" :
++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
++ "enabled with STIBP protection" : "vulnerable");
++ }
+
+ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+ }
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:52 +0200
+Subject: x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit caa0ff24d5d0e02abce5e65c3d2b7f20a6617be5 upstream.
+
+Due to TIF_SSBD and TIF_SPEC_IB the actual IA32_SPEC_CTRL value can
+differ from x86_spec_ctrl_base. As such, keep a per-CPU value
+reflecting the current task's MSR content.
+
+ [jpoimboe: rename]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 1 +
+ arch/x86/kernel/cpu/bugs.c | 28 +++++++++++++++++++++++-----
+ arch/x86/kernel/process.c | 2 +-
+ 3 files changed, 25 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -254,6 +254,7 @@ static inline void indirect_branch_predi
+
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
++extern void write_spec_ctrl_current(u64 val);
+
+ /*
+ * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -48,12 +48,30 @@ static void __init taa_select_mitigation
+ static void __init mmio_select_mitigation(void);
+ static void __init srbds_select_mitigation(void);
+
+-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
++/* The base value of the SPEC_CTRL MSR without task-specific bits set */
+ u64 x86_spec_ctrl_base;
+ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
++
++/* The current value of the SPEC_CTRL MSR with task-specific bits set */
++DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
++
+ static DEFINE_MUTEX(spec_ctrl_mutex);
+
+ /*
++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
++ */
++void write_spec_ctrl_current(u64 val)
++{
++ if (this_cpu_read(x86_spec_ctrl_current) == val)
++ return;
++
++ this_cpu_write(x86_spec_ctrl_current, val);
++ wrmsrl(MSR_IA32_SPEC_CTRL, val);
++}
++
++/*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+ */
+@@ -1235,7 +1253,7 @@ static void __init spectre_v2_select_mit
+ if (spectre_v2_in_eibrs_mode(mode)) {
+ /* Force it so VMEXIT will restore correctly */
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++ write_spec_ctrl_current(x86_spec_ctrl_base);
+ }
+
+ switch (mode) {
+@@ -1290,7 +1308,7 @@ static void __init spectre_v2_select_mit
+
+ static void update_stibp_msr(void * __unused)
+ {
+- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++ write_spec_ctrl_current(x86_spec_ctrl_base);
+ }
+
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
+@@ -1533,7 +1551,7 @@ static enum ssb_mitigation __init __ssb_
+ x86_amd_ssb_disable();
+ } else {
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++ write_spec_ctrl_current(x86_spec_ctrl_base);
+ }
+ }
+
+@@ -1751,7 +1769,7 @@ int arch_prctl_spec_ctrl_get(struct task
+ void x86_spec_ctrl_setup_ap(void)
+ {
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++ write_spec_ctrl_current(x86_spec_ctrl_base);
+
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+ x86_amd_ssb_disable();
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -556,7 +556,7 @@ static __always_inline void __speculatio
+ }
+
+ if (updmsr)
+- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
++ write_spec_ctrl_current(msr);
+ }
+
+ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:54 +0200
+Subject: x86/bugs: Optimize SPEC_CTRL MSR writes
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c779bc1a9002fa474175b80e72b85c9bf628abb0 upstream.
+
+When changing SPEC_CTRL for user control, the WRMSR can be delayed
+until return-to-user when KERNEL_IBRS has been enabled.
+
+This avoids an MSR write during context switch.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 2 +-
+ arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------
+ arch/x86/kernel/process.c | 2 +-
+ 3 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -254,7 +254,7 @@ static inline void indirect_branch_predi
+
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
+-extern void write_spec_ctrl_current(u64 val);
++extern void write_spec_ctrl_current(u64 val, bool force);
+
+ /*
+ * With retpoline, we must use IBRS to restrict branch prediction
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -62,13 +62,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
+ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
+ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
+ */
+-void write_spec_ctrl_current(u64 val)
++void write_spec_ctrl_current(u64 val, bool force)
+ {
+ if (this_cpu_read(x86_spec_ctrl_current) == val)
+ return;
+
+ this_cpu_write(x86_spec_ctrl_current, val);
+- wrmsrl(MSR_IA32_SPEC_CTRL, val);
++
++ /*
++ * When KERNEL_IBRS this MSR is written on return-to-user, unless
++ * forced the update can be delayed until that time.
++ */
++ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
++ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+ }
+
+ /*
+@@ -1253,7 +1259,7 @@ static void __init spectre_v2_select_mit
+ if (spectre_v2_in_eibrs_mode(mode)) {
+ /* Force it so VMEXIT will restore correctly */
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+- write_spec_ctrl_current(x86_spec_ctrl_base);
++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
+
+ switch (mode) {
+@@ -1308,7 +1314,7 @@ static void __init spectre_v2_select_mit
+
+ static void update_stibp_msr(void * __unused)
+ {
+- write_spec_ctrl_current(x86_spec_ctrl_base);
++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
+
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
+@@ -1551,7 +1557,7 @@ static enum ssb_mitigation __init __ssb_
+ x86_amd_ssb_disable();
+ } else {
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+- write_spec_ctrl_current(x86_spec_ctrl_base);
++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
+ }
+
+@@ -1769,7 +1775,7 @@ int arch_prctl_spec_ctrl_get(struct task
+ void x86_spec_ctrl_setup_ap(void)
+ {
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+- write_spec_ctrl_current(x86_spec_ctrl_base);
++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+ x86_amd_ssb_disable();
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -556,7 +556,7 @@ static __always_inline void __speculatio
+ }
+
+ if (updmsr)
+- write_spec_ctrl_current(msr);
++ write_spec_ctrl_current(msr, false);
+ }
+
+ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+Date: Tue, 14 Jun 2022 23:15:49 +0200
+Subject: x86/bugs: Report AMD retbleed vulnerability
+
+From: Alexandre Chartre <alexandre.chartre@oracle.com>
+
+commit 6b80b59b3555706508008f1f127b5412c89c7fd8 upstream.
+
+Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary
+Speculative Code Execution with Return Instructions) attack.
+
+ [peterz: add hygon]
+ [kim: invert parity; fam15h]
+
+Co-developed-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 1 +
+ arch/x86/kernel/cpu/bugs.c | 13 +++++++++++++
+ arch/x86/kernel/cpu/common.c | 19 +++++++++++++++++++
+ drivers/base/cpu.c | 8 ++++++++
+ include/linux/cpu.h | 2 ++
+ 5 files changed, 43 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -426,5 +426,6 @@
+ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
++#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
+
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1917,6 +1917,11 @@ static ssize_t srbds_show_state(char *bu
+ return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
+ }
+
++static ssize_t retbleed_show_state(char *buf)
++{
++ return sprintf(buf, "Vulnerable\n");
++}
++
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ char *buf, unsigned int bug)
+ {
+@@ -1962,6 +1967,9 @@ static ssize_t cpu_show_common(struct de
+ case X86_BUG_MMIO_STALE_DATA:
+ return mmio_stale_data_show_state(buf);
+
++ case X86_BUG_RETBLEED:
++ return retbleed_show_state(buf);
++
+ default:
+ break;
+ }
+@@ -2018,4 +2026,9 @@ ssize_t cpu_show_mmio_stale_data(struct
+ {
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ }
++
++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
++}
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1092,16 +1092,27 @@ static const __initconst struct x86_cpu_
+ {}
+ };
+
++#define VULNBL(vendor, family, model, blacklist) \
++ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
++
+ #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
+ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
+ INTEL_FAM6_##model, steppings, \
+ X86_FEATURE_ANY, issues)
+
++#define VULNBL_AMD(family, blacklist) \
++ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
++
++#define VULNBL_HYGON(family, blacklist) \
++ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
++
+ #define SRBDS BIT(0)
+ /* CPU is affected by X86_BUG_MMIO_STALE_DATA */
+ #define MMIO BIT(1)
+ /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
+ #define MMIO_SBDS BIT(2)
++/* CPU is affected by RETbleed, speculating where you would not expect it */
++#define RETBLEED BIT(3)
+
+ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
+ VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
+@@ -1134,6 +1145,11 @@ static const struct x86_cpu_id cpu_vuln_
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
++
++ VULNBL_AMD(0x15, RETBLEED),
++ VULNBL_AMD(0x16, RETBLEED),
++ VULNBL_AMD(0x17, RETBLEED),
++ VULNBL_HYGON(0x18, RETBLEED),
+ {}
+ };
+
+@@ -1235,6 +1251,9 @@ static void __init cpu_set_bug_bits(stru
+ !arch_cap_mmio_immune(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+
++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED))
++ setup_force_cpu_bug(X86_BUG_RETBLEED);
++
+ if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+ return;
+
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -572,6 +572,12 @@ ssize_t __weak cpu_show_mmio_stale_data(
+ return sysfs_emit(buf, "Not affected\n");
+ }
+
++ssize_t __weak cpu_show_retbleed(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sysfs_emit(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+@@ -582,6 +588,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444
+ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
+ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
+ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
+
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_meltdown.attr,
+@@ -594,6 +601,7 @@ static struct attribute *cpu_root_vulner
+ &dev_attr_itlb_multihit.attr,
+ &dev_attr_srbds.attr,
+ &dev_attr_mmio_stale_data.attr,
++ &dev_attr_retbleed.attr,
+ NULL
+ };
+
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct dev
+ extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
++extern ssize_t cpu_show_retbleed(struct device *dev,
++ struct device_attribute *attr, char *buf);
+
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 24 Jun 2022 13:48:58 +0200
+Subject: x86/bugs: Report Intel retbleed vulnerability
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 6ad0ad2bf8a67e27d1f9d006a1dabb0e1c360cc3 upstream.
+
+Skylake suffers from RSB underflow speculation issues; report this
+vulnerability and it's mitigation (spectre_v2=ibrs).
+
+ [jpoimboe: cleanups, eibrs]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h | 1 +
+ arch/x86/kernel/cpu/bugs.c | 39 +++++++++++++++++++++++++++++++++------
+ arch/x86/kernel/cpu/common.c | 24 ++++++++++++------------
+ 3 files changed, 46 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -91,6 +91,7 @@
+ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
+ #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
+ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
++#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
+ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
+ #define ARCH_CAP_SSB_NO BIT(4) /*
+ * Not susceptible to Speculative Store Bypass
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -746,12 +746,17 @@ static int __init nospectre_v1_cmdline(c
+ }
+ early_param("nospectre_v1", nospectre_v1_cmdline);
+
++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
++ SPECTRE_V2_NONE;
++
+ #undef pr_fmt
+ #define pr_fmt(fmt) "RETBleed: " fmt
+
+ enum retbleed_mitigation {
+ RETBLEED_MITIGATION_NONE,
+ RETBLEED_MITIGATION_UNRET,
++ RETBLEED_MITIGATION_IBRS,
++ RETBLEED_MITIGATION_EIBRS,
+ };
+
+ enum retbleed_mitigation_cmd {
+@@ -763,6 +768,8 @@ enum retbleed_mitigation_cmd {
+ const char * const retbleed_strings[] = {
+ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
+ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
++ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
++ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
+ };
+
+ static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
+@@ -805,6 +812,7 @@ early_param("retbleed", retbleed_parse_c
+
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+ #define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+
+ static void __init retbleed_select_mitigation(void)
+ {
+@@ -821,12 +829,15 @@ static void __init retbleed_select_mitig
+
+ case RETBLEED_CMD_AUTO:
+ default:
+- if (!boot_cpu_has_bug(X86_BUG_RETBLEED))
+- break;
+-
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++
++ /*
++ * The Intel mitigation (IBRS) was already selected in
++ * spectre_v2_select_mitigation().
++ */
++
+ break;
+ }
+
+@@ -856,15 +867,31 @@ static void __init retbleed_select_mitig
+ break;
+ }
+
++ /*
++ * Let IBRS trump all on Intel without affecting the effects of the
++ * retbleed= cmdline option.
++ */
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++ switch (spectre_v2_enabled) {
++ case SPECTRE_V2_IBRS:
++ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
++ break;
++ case SPECTRE_V2_EIBRS:
++ case SPECTRE_V2_EIBRS_RETPOLINE:
++ case SPECTRE_V2_EIBRS_LFENCE:
++ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
++ break;
++ default:
++ pr_err(RETBLEED_INTEL_MSG);
++ }
++ }
++
+ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+ }
+
+ #undef pr_fmt
+ #define pr_fmt(fmt) "Spectre V2 : " fmt
+
+-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+- SPECTRE_V2_NONE;
+-
+ static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+ SPECTRE_V2_USER_NONE;
+ static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1124,24 +1124,24 @@ static const struct x86_cpu_id cpu_vuln_
+ VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
+- BIT(7) | BIT(0xB), MMIO),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
++ BIT(7) | BIT(0xB), MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),
+- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),
+- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
+- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
+- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
+- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
++ VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
+@@ -1251,7 +1251,7 @@ static void __init cpu_set_bug_bits(stru
+ !arch_cap_mmio_immune(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+
+- if (cpu_matches(cpu_vuln_blacklist, RETBLEED))
++ if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)))
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
+
+ if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:56 +0200
+Subject: x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 166115c08a9b0b846b783088808a27d739be6e8d upstream.
+
+retbleed will depend on spectre_v2, while spectre_v2_user depends on
+retbleed. Break this cycle.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -37,8 +37,9 @@
+ #include "cpu.h"
+
+ static void __init spectre_v1_select_mitigation(void);
+-static void __init retbleed_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
++static void __init retbleed_select_mitigation(void);
++static void __init spectre_v2_user_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+ static void __init mds_select_mitigation(void);
+@@ -137,13 +138,19 @@ void __init check_bugs(void)
+
+ /* Select the proper CPU mitigations before patching alternatives: */
+ spectre_v1_select_mitigation();
++ spectre_v2_select_mitigation();
++ /*
++ * retbleed_select_mitigation() relies on the state set by
++ * spectre_v2_select_mitigation(); specifically it wants to know about
++ * spectre_v2=ibrs.
++ */
+ retbleed_select_mitigation();
+ /*
+- * spectre_v2_select_mitigation() relies on the state set by
++ * spectre_v2_user_select_mitigation() relies on the state set by
+ * retbleed_select_mitigation(); specifically the STIBP selection is
+ * forced for UNRET.
+ */
+- spectre_v2_select_mitigation();
++ spectre_v2_user_select_mitigation();
+ ssb_select_mitigation();
+ l1tf_select_mitigation();
+ md_clear_select_mitigation();
+@@ -969,13 +976,15 @@ static void __init spec_v2_user_print_co
+ pr_info("spectre_v2_user=%s forced on command line.\n", reason);
+ }
+
++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
++
+ static enum spectre_v2_user_cmd __init
+-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
++spectre_v2_parse_user_cmdline(void)
+ {
+ char arg[20];
+ int ret, i;
+
+- switch (v2_cmd) {
++ switch (spectre_v2_cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return SPECTRE_V2_USER_CMD_NONE;
+ case SPECTRE_V2_CMD_FORCE:
+@@ -1010,7 +1019,7 @@ static inline bool spectre_v2_in_ibrs_mo
+ }
+
+ static void __init
+-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
++spectre_v2_user_select_mitigation(void)
+ {
+ enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
+ bool smt_possible = IS_ENABLED(CONFIG_SMP);
+@@ -1023,7 +1032,7 @@ spectre_v2_user_select_mitigation(enum s
+ cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+ smt_possible = false;
+
+- cmd = spectre_v2_parse_user_cmdline(v2_cmd);
++ cmd = spectre_v2_parse_user_cmdline();
+ switch (cmd) {
+ case SPECTRE_V2_USER_CMD_NONE:
+ goto set_mode;
+@@ -1347,7 +1356,7 @@ static void __init spectre_v2_select_mit
+ }
+
+ /* Set up IBPB and STIBP depending on the general spectre V2 command */
+- spectre_v2_user_select_mitigation(cmd);
++ spectre_v2_cmd = cmd;
+ }
+
+ static void update_stibp_msr(void * __unused)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 24 Jun 2022 14:03:25 +0200
+Subject: x86/common: Stamp out the stepping madness
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 7a05bc95ed1c5a59e47aaade9fb4083c27de9e62 upstream.
+
+The whole MMIO/RETBLEED enumeration went overboard on steppings. Get
+rid of all that and simply use ANY.
+
+If a future stepping of these models would not be affected, it had
+better set the relevant ARCH_CAP_$FOO_NO bit in
+IA32_ARCH_CAPABILITIES.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c | 37 ++++++++++++++++---------------------
+ 1 file changed, 16 insertions(+), 21 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1119,32 +1119,27 @@ static const struct x86_cpu_id cpu_vuln_
+ VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
+- VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO),
++ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
++ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
+- BIT(7) | BIT(0xB), MMIO | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),
+- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),
+- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),
+- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
+- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
++ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
++ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED),
+- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
+- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+
+ VULNBL_AMD(0x15, RETBLEED),
+ VULNBL_AMD(0x16, RETBLEED),
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:04 +0200
+Subject: x86/cpu/amd: Add Spectral Chicken
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d7caac991feeef1b871ee6988fd2c9725df09039 upstream.
+
+Zen2 uarchs have an undocumented, unnamed, MSR that contains a chicken
+bit for some speculation behaviour. It needs setting.
+
+Note: very belatedly AMD released naming; it's now officially called
+ MSR_AMD64_DE_CFG2 and MSR_AMD64_DE_CFG2_SUPPRESS_NOBR_PRED_BIT
+ but shall remain the SPECTRAL CHICKEN.
+
+Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h | 3 +++
+ arch/x86/kernel/cpu/amd.c | 23 ++++++++++++++++++++++-
+ arch/x86/kernel/cpu/cpu.h | 2 ++
+ arch/x86/kernel/cpu/hygon.c | 6 ++++++
+ 4 files changed, 33 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -508,6 +508,9 @@
+ /* Fam 17h MSRs */
+ #define MSR_F17H_IRPERF 0xc00000e9
+
++#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
++
+ /* Fam 16h MSRs */
+ #define MSR_F16H_L2I_PERF_CTL 0xc0010230
+ #define MSR_F16H_L2I_PERF_CTR 0xc0010231
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -914,6 +914,26 @@ static void init_amd_bd(struct cpuinfo_x
+ clear_rdrand_cpuid_bit(c);
+ }
+
++void init_spectral_chicken(struct cpuinfo_x86 *c)
++{
++ u64 value;
++
++ /*
++ * On Zen2 we offer this chicken (bit) on the altar of Speculation.
++ *
++ * This suppresses speculation from the middle of a basic block, i.e. it
++ * suppresses non-branch predictions.
++ *
++ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
++ */
++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
++ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
++ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
++ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
++ }
++ }
++}
++
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+ {
+ set_cpu_cap(c, X86_FEATURE_ZEN);
+@@ -959,7 +979,8 @@ static void init_amd(struct cpuinfo_x86
+ case 0x12: init_amd_ln(c); break;
+ case 0x15: init_amd_bd(c); break;
+ case 0x16: init_amd_jg(c); break;
+- case 0x17: fallthrough;
++ case 0x17: init_spectral_chicken(c);
++ fallthrough;
+ case 0x19: init_amd_zn(c); break;
+ }
+
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -60,6 +60,8 @@ extern void tsx_disable(void);
+ static inline void tsx_init(void) { }
+ #endif /* CONFIG_CPU_SUP_INTEL */
+
++extern void init_spectral_chicken(struct cpuinfo_x86 *c);
++
+ extern void get_cpu_cap(struct cpuinfo_x86 *c);
+ extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+--- a/arch/x86/kernel/cpu/hygon.c
++++ b/arch/x86/kernel/cpu/hygon.c
+@@ -318,6 +318,12 @@ static void init_hygon(struct cpuinfo_x8
+ /* get apicid instead of initial apic id from cpuid */
+ c->apicid = hard_smp_processor_id();
+
++ /*
++ * XXX someone from Hygon needs to confirm this DTRT
++ *
++ init_spectral_chicken(c);
++ */
++
+ set_cpu_cap(c, X86_FEATURE_ZEN);
+ set_cpu_cap(c, X86_FEATURE_CPB);
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 24 Jun 2022 14:41:21 +0100
+Subject: x86/cpu/amd: Enumerate BTC_NO
+
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+
+commit 26aae8ccbc1972233afd08fb3f368947c0314265 upstream.
+
+BTC_NO indicates that hardware is not susceptible to Branch Type Confusion.
+
+Zen3 CPUs don't suffer BTC.
+
+Hypervisors are expected to synthesise BTC_NO when it is appropriate
+given the migration pool, to prevent kernels using heuristics.
+
+ [ bp: Massage. ]
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: no X86_FEATURE_BRS]
+[cascardo: no X86_FEATURE_CPPC]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 1 +
+ arch/x86/kernel/cpu/amd.c | 21 +++++++++++++++------
+ arch/x86/kernel/cpu/common.c | 6 ++++--
+ 3 files changed, 20 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -316,6 +316,7 @@
+ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
+ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
+ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
++#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
+
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -942,12 +942,21 @@ static void init_amd_zn(struct cpuinfo_x
+ node_reclaim_distance = 32;
+ #endif
+
+- /*
+- * Fix erratum 1076: CPB feature bit not being set in CPUID.
+- * Always set it, except when running under a hypervisor.
+- */
+- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
+- set_cpu_cap(c, X86_FEATURE_CPB);
++ /* Fix up CPUID bits, but only if not virtualised. */
++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
++
++ /* Erratum 1076: CPB feature bit not being set in CPUID. */
++ if (!cpu_has(c, X86_FEATURE_CPB))
++ set_cpu_cap(c, X86_FEATURE_CPB);
++
++ /*
++ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
++ * Branch Type Confusion, but predate the allocation of the
++ * BTC_NO bit.
++ */
++ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
++ set_cpu_cap(c, X86_FEATURE_BTC_NO);
++ }
+ }
+
+ static void init_amd(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1246,8 +1246,10 @@ static void __init cpu_set_bug_bits(stru
+ !arch_cap_mmio_immune(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+
+- if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)))
+- setup_force_cpu_bug(X86_BUG_RETBLEED);
++ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
++ setup_force_cpu_bug(X86_BUG_RETBLEED);
++ }
+
+ if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+ return;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:33 +0200
+Subject: x86/cpufeatures: Move RETPOLINE flags to word 11
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a883d624aed463c84c22596006e5a96f5b44db31 upstream.
+
+In order to extend the RETPOLINE features to 4, move them to word 11
+where there is still room. This mostly keeps DISABLE_RETPOLINE
+simple.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: bits 8 and 9 of word 11 are also free here,
+ so comment them accordingly]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -203,8 +203,8 @@
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
++/* FREE! ( 7*32+12) */
++/* FREE! ( 7*32+13) */
+ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
+ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+@@ -290,6 +290,12 @@
+ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
+ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
+ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
++/* FREE! (11*32+ 8) */
++/* FREE! (11*32+ 9) */
++/* FREE! (11*32+10) */
++/* FREE! (11*32+11) */
++#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:53 +0200
+Subject: x86/entry: Add kernel IBRS implementation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 2dbb887e875b1de3ca8f40ddf26bcfe55798c609 upstream.
+
+Implement Kernel IBRS - currently the only known option to mitigate RSB
+underflow speculation issues on Skylake hardware.
+
+Note: since IBRS_ENTER requires fuller context established than
+UNTRAIN_RET, it must be placed after it. However, since UNTRAIN_RET
+itself implies a RET, it must come after IBRS_ENTER. This means
+IBRS_ENTER needs to also move UNTRAIN_RET.
+
+Note 2: KERNEL_IBRS is sub-optimal for XenPV.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict at arch/x86/entry/entry_64.S, skip_r11rcx]
+[cascardo: conflict at arch/x86/entry/entry_64_compat.S]
+[cascardo: conflict fixups, no ANNOTATE_NOENDBR]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/calling.h | 58 +++++++++++++++++++++++++++++++++++++
+ arch/x86/entry/entry_64.S | 44 ++++++++++++++++++++++++----
+ arch/x86/entry/entry_64_compat.S | 17 ++++++++--
+ arch/x86/include/asm/cpufeatures.h | 2 -
+ 4 files changed, 111 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -6,6 +6,8 @@
+ #include <asm/percpu.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/processor-flags.h>
++#include <asm/msr.h>
++#include <asm/nospec-branch.h>
+
+ /*
+
+@@ -309,6 +311,62 @@ For 32-bit we have the following convent
+ #endif
+
+ /*
++ * IBRS kernel mitigation for Spectre_v2.
++ *
++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
++ * the regs it uses (AX, CX, DX). Must be called before the first RET
++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
++ *
++ * The optional argument is used to save/restore the current value,
++ * which is used on the paranoid paths.
++ *
++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
++ */
++.macro IBRS_ENTER save_reg
++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
++ movl $MSR_IA32_SPEC_CTRL, %ecx
++
++.ifnb \save_reg
++ rdmsr
++ shl $32, %rdx
++ or %rdx, %rax
++ mov %rax, \save_reg
++ test $SPEC_CTRL_IBRS, %eax
++ jz .Ldo_wrmsr_\@
++ lfence
++ jmp .Lend_\@
++.Ldo_wrmsr_\@:
++.endif
++
++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
++ movl %edx, %eax
++ shr $32, %rdx
++ wrmsr
++.Lend_\@:
++.endm
++
++/*
++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
++ * regs. Must be called after the last RET.
++ */
++.macro IBRS_EXIT save_reg
++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
++ movl $MSR_IA32_SPEC_CTRL, %ecx
++
++.ifnb \save_reg
++ mov \save_reg, %rdx
++.else
++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
++ andl $(~SPEC_CTRL_IBRS), %edx
++.endif
++
++ movl %edx, %eax
++ shr $32, %rdx
++ wrmsr
++.Lend_\@:
++.endm
++
++/*
+ * Mitigate Spectre v1 for conditional swapgs code paths.
+ *
+ * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -102,7 +102,6 @@ SYM_CODE_START(entry_SYSCALL_64)
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+- UNTRAIN_RET
+
+ /* Construct struct pt_regs on stack */
+ pushq $__USER_DS /* pt_regs->ss */
+@@ -118,6 +117,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+ /* IRQs are off. */
+ movq %rax, %rdi
+ movq %rsp, %rsi
++
++ /* clobbers %rax, make sure it is after saving the syscall nr */
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ call do_syscall_64 /* returns with IRQs disabled */
+
+ /*
+@@ -192,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+ * perf profiles. Nothing jumps here.
+ */
+ syscall_return_via_sysret:
++ IBRS_EXIT
+ POP_REGS pop_rdi=0
+
+ /*
+@@ -569,6 +574,7 @@ __irqentry_text_end:
+
+ SYM_CODE_START_LOCAL(common_interrupt_return)
+ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
++ IBRS_EXIT
+ #ifdef CONFIG_DEBUG_ENTRY
+ /* Assert that pt_regs indicates user mode. */
+ testb $3, CS(%rsp)
+@@ -889,6 +895,9 @@ SYM_CODE_END(xen_failsafe_callback)
+ * 1 -> no SWAPGS on exit
+ *
+ * Y GSBASE value at entry, must be restored in paranoid_exit
++ *
++ * R14 - old CR3
++ * R15 - old SPEC_CTRL
+ */
+ SYM_CODE_START_LOCAL(paranoid_entry)
+ UNWIND_HINT_FUNC
+@@ -912,7 +921,6 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ * be retrieved from a kernel internal table.
+ */
+ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+- UNTRAIN_RET
+
+ /*
+ * Handling GSBASE depends on the availability of FSGSBASE.
+@@ -934,7 +942,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ * is needed here.
+ */
+ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
+- RET
++ jmp .Lparanoid_gsbase_done
+
+ .Lparanoid_entry_checkgs:
+ /* EBX = 1 -> kernel GSBASE active, no restore required */
+@@ -953,8 +961,16 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ xorl %ebx, %ebx
+ swapgs
+ .Lparanoid_kernel_gsbase:
+-
+ FENCE_SWAPGS_KERNEL_ENTRY
++.Lparanoid_gsbase_done:
++
++ /*
++ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
++ * CR3 above, keep the old value in a callee saved register.
++ */
++ IBRS_ENTER save_reg=%r15
++ UNTRAIN_RET
++
+ RET
+ SYM_CODE_END(paranoid_entry)
+
+@@ -976,9 +992,19 @@ SYM_CODE_END(paranoid_entry)
+ * 1 -> no SWAPGS on exit
+ *
+ * Y User space GSBASE, must be restored unconditionally
++ *
++ * R14 - old CR3
++ * R15 - old SPEC_CTRL
+ */
+ SYM_CODE_START_LOCAL(paranoid_exit)
+ UNWIND_HINT_REGS
++
++ /*
++ * Must restore IBRS state before both CR3 and %GS since we need access
++ * to the per-CPU x86_spec_ctrl_shadow variable.
++ */
++ IBRS_EXIT save_reg=%r15
++
+ /*
+ * The order of operations is important. RESTORE_CR3 requires
+ * kernel GSBASE.
+@@ -1025,9 +1051,11 @@ SYM_CODE_START_LOCAL(error_entry)
+ FENCE_SWAPGS_USER_ENTRY
+ /* We have user CR3. Change to kernel CR3. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++ IBRS_ENTER
+ UNTRAIN_RET
+
+ .Lerror_entry_from_usermode_after_swapgs:
++
+ /* Put us onto the real thread stack. */
+ popq %r12 /* save return addr in %12 */
+ movq %rsp, %rdi /* arg0 = pt_regs pointer */
+@@ -1081,6 +1109,7 @@ SYM_CODE_START_LOCAL(error_entry)
+ SWAPGS
+ FENCE_SWAPGS_USER_ENTRY
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++ IBRS_ENTER
+ UNTRAIN_RET
+
+ /*
+@@ -1176,7 +1205,6 @@ SYM_CODE_START(asm_exc_nmi)
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNWIND_HINT_IRET_REGS base=%rdx offset=8
+- UNTRAIN_RET
+ pushq 5*8(%rdx) /* pt_regs->ss */
+ pushq 4*8(%rdx) /* pt_regs->rsp */
+ pushq 3*8(%rdx) /* pt_regs->flags */
+@@ -1187,6 +1215,9 @@ SYM_CODE_START(asm_exc_nmi)
+ PUSH_AND_CLEAR_REGS rdx=(%rdx)
+ ENCODE_FRAME_POINTER
+
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ /*
+ * At this point we no longer need to worry about stack damage
+ * due to nesting -- we're on the normal thread stack and we're
+@@ -1409,6 +1440,9 @@ end_repeat_nmi:
+ movq $-1, %rsi
+ call exc_nmi
+
++ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
++ IBRS_EXIT save_reg=%r15
++
+ /* Always restore stashed CR3 value (see paranoid_entry) */
+ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -4,7 +4,6 @@
+ *
+ * Copyright 2000-2002 Andi Kleen, SuSE Labs.
+ */
+-#include "calling.h"
+ #include <asm/asm-offsets.h>
+ #include <asm/current.h>
+ #include <asm/errno.h>
+@@ -18,6 +17,8 @@
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+
++#include "calling.h"
++
+ .section .entry.text, "ax"
+
+ /*
+@@ -72,7 +73,6 @@ SYM_CODE_START(entry_SYSENTER_compat)
+ pushq $__USER32_CS /* pt_regs->cs */
+ pushq $0 /* pt_regs->ip = 0 (placeholder) */
+ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+- UNTRAIN_RET
+
+ /*
+ * User tracing code (ptrace or signal handlers) might assume that
+@@ -114,6 +114,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_af
+
+ cld
+
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ /*
+ * SYSENTER doesn't filter flags, so we need to clear NT and AC
+ * ourselves. To save a few cycles, we can check whether
+@@ -213,7 +216,6 @@ SYM_CODE_START(entry_SYSCALL_compat)
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+- UNTRAIN_RET
+
+ /* Construct struct pt_regs on stack */
+ pushq $__USER32_DS /* pt_regs->ss */
+@@ -255,6 +257,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
+
+ UNWIND_HINT_REGS
+
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ movq %rsp, %rdi
+ call do_fast_syscall_32
+ /* XEN PV guests always use IRET path */
+@@ -269,6 +274,8 @@ sysret32_from_system_call:
+ */
+ STACKLEAK_ERASE
+
++ IBRS_EXIT
++
+ movq RBX(%rsp), %rbx /* pt_regs->rbx */
+ movq RBP(%rsp), %rbp /* pt_regs->rbp */
+ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
+@@ -380,7 +387,6 @@ SYM_CODE_START(entry_INT80_compat)
+ pushq (%rdi) /* pt_regs->di */
+ .Lint80_keep_stack:
+
+- UNTRAIN_RET
+ pushq %rsi /* pt_regs->si */
+ xorl %esi, %esi /* nospec si */
+ pushq %rdx /* pt_regs->dx */
+@@ -413,6 +419,9 @@ SYM_CODE_START(entry_INT80_compat)
+
+ cld
+
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ movq %rsp, %rdi
+ call do_int80_syscall_32
+ jmp swapgs_restore_regs_and_return_to_usermode
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -203,7 +203,7 @@
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+-/* FREE! ( 7*32+12) */
++#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+ /* FREE! ( 7*32+13) */
+ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 6 May 2022 14:14:35 +0200
+Subject: x86/entry: Remove skip_r11rcx
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1b331eeea7b8676fc5dbdf80d0a07e41be226177 upstream.
+
+Yes, r11 and rcx have been restored previously, but since they're being
+popped anyway (into rsi) might as well pop them into their own regs --
+setting them to the value they already are.
+
+Less magical code.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/calling.h | 10 +---------
+ arch/x86/entry/entry_64.S | 3 +--
+ 2 files changed, 2 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -146,27 +146,19 @@ For 32-bit we have the following convent
+
+ .endm
+
+-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
++.macro POP_REGS pop_rdi=1
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+- .if \skip_r11rcx
+- popq %rsi
+- .else
+ popq %r11
+- .endif
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+- .if \skip_r11rcx
+- popq %rsi
+- .else
+ popq %rcx
+- .endif
+ popq %rdx
+ popq %rsi
+ .if \pop_rdi
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h
+ * perf profiles. Nothing jumps here.
+ */
+ syscall_return_via_sysret:
+- /* rcx and r11 are already restored (see code above) */
+- POP_REGS pop_rdi=0 skip_r11rcx=1
++ POP_REGS pop_rdi=0
+
+ /*
+ * Now all regs are restored except RSP and RDI.
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:40 +0200
+Subject: x86/ftrace: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream.
+
+Use the return thunk in ftrace trampolines, if needed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: still copy return from ftrace_stub]
+[cascardo: use memcpy(text_gen_insn) as there is no __text_gen_insn]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -308,7 +308,7 @@ union ftrace_op_code_union {
+ } __attribute__((packed));
+ };
+
+-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
++#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
+
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+@@ -367,7 +367,10 @@ create_trampoline(struct ftrace_ops *ops
+
+ /* The trampoline ends with ret(q) */
+ retq = (unsigned long)ftrace_stub;
+- ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++ memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE);
++ else
++ ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
+ if (WARN_ON(ret < 0))
+ goto fail;
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 22 Feb 2021 13:34:40 +0100
+Subject: x86/insn: Add a __ignore_sync_check__ marker
+
+From: Borislav Petkov <bp@suse.de>
+
+commit d30c7b820be5c4777fe6c3b0c21f9d0064251e51 upstream.
+
+Add an explicit __ignore_sync_check__ marker which will be used to mark
+lines which are supposed to be ignored by file synchronization check
+scripts, its advantage being that it explicitly denotes such lines in
+the code.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
+Link: https://lkml.kernel.org/r/20210304174237.31945-4-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/inat.h | 2 +-
+ arch/x86/include/asm/insn.h | 2 +-
+ arch/x86/lib/inat.c | 2 +-
+ arch/x86/lib/insn.c | 6 +++---
+ tools/arch/x86/include/asm/inat.h | 2 +-
+ tools/arch/x86/include/asm/insn.h | 2 +-
+ tools/arch/x86/lib/inat.c | 2 +-
+ tools/arch/x86/lib/insn.c | 6 +++---
+ tools/objtool/sync-check.sh | 17 +++++++++++++----
+ tools/perf/check-headers.sh | 15 +++++++++++----
+ 10 files changed, 36 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/include/asm/inat.h
++++ b/arch/x86/include/asm/inat.h
+@@ -6,7 +6,7 @@
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+-#include <asm/inat_types.h>
++#include <asm/inat_types.h> /* __ignore_sync_check__ */
+
+ /*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+--- a/arch/x86/include/asm/insn.h
++++ b/arch/x86/include/asm/insn.h
+@@ -8,7 +8,7 @@
+ */
+
+ /* insn_attr_t is defined in inat.h */
+-#include <asm/inat.h>
++#include <asm/inat.h> /* __ignore_sync_check__ */
+
+ struct insn_field {
+ union {
+--- a/arch/x86/lib/inat.c
++++ b/arch/x86/lib/inat.c
+@@ -4,7 +4,7 @@
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+-#include <asm/insn.h>
++#include <asm/insn.h> /* __ignore_sync_check__ */
+
+ /* Attribute tables are generated from opcode map */
+ #include "inat-tables.c"
+--- a/arch/x86/lib/insn.c
++++ b/arch/x86/lib/insn.c
+@@ -10,10 +10,10 @@
+ #else
+ #include <string.h>
+ #endif
+-#include <asm/inat.h>
+-#include <asm/insn.h>
++#include <asm/inat.h> /*__ignore_sync_check__ */
++#include <asm/insn.h> /* __ignore_sync_check__ */
+
+-#include <asm/emulate_prefix.h>
++#include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
+
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+ #define validate_next(t, insn, n) \
+--- a/tools/arch/x86/include/asm/inat.h
++++ b/tools/arch/x86/include/asm/inat.h
+@@ -6,7 +6,7 @@
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+-#include "inat_types.h"
++#include "inat_types.h" /* __ignore_sync_check__ */
+
+ /*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+--- a/tools/arch/x86/include/asm/insn.h
++++ b/tools/arch/x86/include/asm/insn.h
+@@ -8,7 +8,7 @@
+ */
+
+ /* insn_attr_t is defined in inat.h */
+-#include "inat.h"
++#include "inat.h" /* __ignore_sync_check__ */
+
+ struct insn_field {
+ union {
+--- a/tools/arch/x86/lib/inat.c
++++ b/tools/arch/x86/lib/inat.c
+@@ -4,7 +4,7 @@
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+-#include "../include/asm/insn.h"
++#include "../include/asm/insn.h" /* __ignore_sync_check__ */
+
+ /* Attribute tables are generated from opcode map */
+ #include "inat-tables.c"
+--- a/tools/arch/x86/lib/insn.c
++++ b/tools/arch/x86/lib/insn.c
+@@ -10,10 +10,10 @@
+ #else
+ #include <string.h>
+ #endif
+-#include "../include/asm/inat.h"
+-#include "../include/asm/insn.h"
++#include "../include/asm/inat.h" /* __ignore_sync_check__ */
++#include "../include/asm/insn.h" /* __ignore_sync_check__ */
+
+-#include "../include/asm/emulate_prefix.h"
++#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */
+
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+ #define validate_next(t, insn, n) \
+--- a/tools/objtool/sync-check.sh
++++ b/tools/objtool/sync-check.sh
+@@ -16,11 +16,14 @@ arch/x86/include/asm/emulate_prefix.h
+ arch/x86/lib/x86-opcode-map.txt
+ arch/x86/tools/gen-insn-attr-x86.awk
+ include/linux/static_call_types.h
+-arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
+-arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]'
+-arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
+-arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
+ "
++
++SYNC_CHECK_FILES='
++arch/x86/include/asm/inat.h
++arch/x86/include/asm/insn.h
++arch/x86/lib/inat.c
++arch/x86/lib/insn.c
++'
+ fi
+
+ check_2 () {
+@@ -63,3 +66,9 @@ while read -r file_entry; do
+ done <<EOF
+ $FILES
+ EOF
++
++if [ "$SRCARCH" = "x86" ]; then
++ for i in $SYNC_CHECK_FILES; do
++ check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
++ done
++fi
+--- a/tools/perf/check-headers.sh
++++ b/tools/perf/check-headers.sh
+@@ -75,6 +75,13 @@ include/uapi/asm-generic/mman-common.h
+ include/uapi/asm-generic/unistd.h
+ '
+
++SYNC_CHECK_FILES='
++arch/x86/include/asm/inat.h
++arch/x86/include/asm/insn.h
++arch/x86/lib/inat.c
++arch/x86/lib/insn.c
++'
++
+ # These copies are under tools/perf/trace/beauty/ as they are not used to in
+ # building object files only by scripts in tools/perf/trace/beauty/ to generate
+ # tables that then gets included in .c files for things like id->string syscall
+@@ -129,6 +136,10 @@ for i in $FILES; do
+ check $i -B
+ done
+
++for i in $SYNC_CHECK_FILES; do
++ check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
++done
++
+ # diff with extra ignore lines
+ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
+ check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
+@@ -137,10 +148,6 @@ check include/uapi/linux/mman.h '-
+ check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
+ check include/linux/ctype.h '-I "isdigit("'
+ check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
+-check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
+-check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
+-check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
+-check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
+
+ # diff non-symmetric files
+ check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 3 Nov 2020 17:28:30 +0100
+Subject: x86/insn: Add an insn_decode() API
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 93281c4a96572a34504244969b938e035204778d upstream.
+
+Users of the instruction decoder should use this to decode instruction
+bytes. For that, have insn*() helpers return an int value to denote
+success/failure. When there's an error fetching the next insn byte and
+the insn falls short, return -ENODATA to denote that.
+
+While at it, make insn_get_opcode() more stricter as to whether what has
+seen so far is a valid insn and if not.
+
+Copy linux/kconfig.h for the tools-version of the decoder so that it can
+use IS_ENABLED().
+
+Also, cast the INSN_MODE_KERN dummy define value to (enum insn_mode)
+for tools use of the decoder because perf tool builds with -Werror and
+errors out with -Werror=sign-compare otherwise.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Link: https://lkml.kernel.org/r/20210304174237.31945-5-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/insn.h | 24 ++--
+ arch/x86/lib/insn.c | 216 +++++++++++++++++++++++++++++-------
+ tools/arch/x86/include/asm/insn.h | 24 ++--
+ tools/arch/x86/lib/insn.c | 222 +++++++++++++++++++++++++++++---------
+ tools/include/linux/kconfig.h | 73 ++++++++++++
+ 5 files changed, 452 insertions(+), 107 deletions(-)
+ create mode 100644 tools/include/linux/kconfig.h
+
+--- a/arch/x86/include/asm/insn.h
++++ b/arch/x86/include/asm/insn.h
+@@ -87,13 +87,23 @@ struct insn {
+ #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+
+ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+-extern void insn_get_prefixes(struct insn *insn);
+-extern void insn_get_opcode(struct insn *insn);
+-extern void insn_get_modrm(struct insn *insn);
+-extern void insn_get_sib(struct insn *insn);
+-extern void insn_get_displacement(struct insn *insn);
+-extern void insn_get_immediate(struct insn *insn);
+-extern void insn_get_length(struct insn *insn);
++extern int insn_get_prefixes(struct insn *insn);
++extern int insn_get_opcode(struct insn *insn);
++extern int insn_get_modrm(struct insn *insn);
++extern int insn_get_sib(struct insn *insn);
++extern int insn_get_displacement(struct insn *insn);
++extern int insn_get_immediate(struct insn *insn);
++extern int insn_get_length(struct insn *insn);
++
++enum insn_mode {
++ INSN_MODE_32,
++ INSN_MODE_64,
++ /* Mode is determined by the current kernel build. */
++ INSN_MODE_KERN,
++ INSN_NUM_MODES,
++};
++
++extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+ /* Attribute will be determined after getting ModRM (for opcode groups) */
+ static inline void insn_get_attribute(struct insn *insn)
+--- a/arch/x86/lib/insn.c
++++ b/arch/x86/lib/insn.c
+@@ -13,6 +13,9 @@
+ #include <asm/inat.h> /*__ignore_sync_check__ */
+ #include <asm/insn.h> /* __ignore_sync_check__ */
+
++#include <linux/errno.h>
++#include <linux/kconfig.h>
++
+ #include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
+
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+@@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(stru
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode. No effect if @insn->prefixes.got
+ * is already set.
++ *
++ * * Returns:
++ * 0: on success
++ * < 0: on error
+ */
+-void insn_get_prefixes(struct insn *insn)
++int insn_get_prefixes(struct insn *insn)
+ {
+ struct insn_field *prefixes = &insn->prefixes;
+ insn_attr_t attr;
+@@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn
+ int i, nb;
+
+ if (prefixes->got)
+- return;
++ return 0;
+
+ insn_get_emulate_prefix(insn);
+
+@@ -217,8 +224,10 @@ vex_end:
+
+ prefixes->got = 1;
+
++ return 0;
++
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+ /**
+@@ -230,16 +239,25 @@ err_out:
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
+ * is already 1.
++ *
++ * Returns:
++ * 0: on success
++ * < 0: on error
+ */
+-void insn_get_opcode(struct insn *insn)
++int insn_get_opcode(struct insn *insn)
+ {
+ struct insn_field *opcode = &insn->opcode;
++ int pfx_id, ret;
+ insn_byte_t op;
+- int pfx_id;
++
+ if (opcode->got)
+- return;
+- if (!insn->prefixes.got)
+- insn_get_prefixes(insn);
++ return 0;
++
++ if (!insn->prefixes.got) {
++ ret = insn_get_prefixes(insn);
++ if (ret)
++ return ret;
++ }
+
+ /* Get first opcode */
+ op = get_next(insn_byte_t, insn);
+@@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn)
+ insn->attr = inat_get_avx_attribute(op, m, p);
+ if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+ (!inat_accept_vex(insn->attr) &&
+- !inat_is_group(insn->attr)))
+- insn->attr = 0; /* This instruction is bad */
+- goto end; /* VEX has only 1 byte for opcode */
++ !inat_is_group(insn->attr))) {
++ /* This instruction is bad */
++ insn->attr = 0;
++ return -EINVAL;
++ }
++ /* VEX has only 1 byte for opcode */
++ goto end;
+ }
+
+ insn->attr = inat_get_opcode_attribute(op);
+@@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn)
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+ }
+- if (inat_must_vex(insn->attr))
+- insn->attr = 0; /* This instruction is bad */
++
++ if (inat_must_vex(insn->attr)) {
++ /* This instruction is bad */
++ insn->attr = 0;
++ return -EINVAL;
++ }
+ end:
+ opcode->got = 1;
++ return 0;
+
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+ /**
+@@ -283,15 +310,25 @@ err_out:
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any. If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
++ *
++ * Returns:
++ * 0: on success
++ * < 0: on error
+ */
+-void insn_get_modrm(struct insn *insn)
++int insn_get_modrm(struct insn *insn)
+ {
+ struct insn_field *modrm = &insn->modrm;
+ insn_byte_t pfx_id, mod;
++ int ret;
++
+ if (modrm->got)
+- return;
+- if (!insn->opcode.got)
+- insn_get_opcode(insn);
++ return 0;
++
++ if (!insn->opcode.got) {
++ ret = insn_get_opcode(insn);
++ if (ret)
++ return ret;
++ }
+
+ if (inat_has_modrm(insn->attr)) {
+ mod = get_next(insn_byte_t, insn);
+@@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn)
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
+ insn->attr);
+- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+- insn->attr = 0; /* This is bad */
++ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
++ /* Bad insn */
++ insn->attr = 0;
++ return -EINVAL;
++ }
+ }
+ }
+
+ if (insn->x86_64 && inat_is_force64(insn->attr))
+ insn->opnd_bytes = 8;
++
+ modrm->got = 1;
++ return 0;
+
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+
+@@ -325,11 +367,16 @@ err_out:
+ int insn_rip_relative(struct insn *insn)
+ {
+ struct insn_field *modrm = &insn->modrm;
++ int ret;
+
+ if (!insn->x86_64)
+ return 0;
+- if (!modrm->got)
+- insn_get_modrm(insn);
++
++ if (!modrm->got) {
++ ret = insn_get_modrm(insn);
++ if (ret)
++ return 0;
++ }
+ /*
+ * For rip-relative instructions, the mod field (top 2 bits)
+ * is zero and the r/m field (bottom 3 bits) is 0x5.
+@@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn)
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
++ *
++ * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
+ */
+-void insn_get_sib(struct insn *insn)
++int insn_get_sib(struct insn *insn)
+ {
+ insn_byte_t modrm;
++ int ret;
+
+ if (insn->sib.got)
+- return;
+- if (!insn->modrm.got)
+- insn_get_modrm(insn);
++ return 0;
++
++ if (!insn->modrm.got) {
++ ret = insn_get_modrm(insn);
++ if (ret)
++ return ret;
++ }
++
+ if (insn->modrm.nbytes) {
+ modrm = (insn_byte_t)insn->modrm.value;
+ if (insn->addr_bytes != 2 &&
+@@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn)
+ }
+ insn->sib.got = 1;
+
++ return 0;
++
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+
+@@ -374,15 +433,25 @@ err_out:
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
++ *
++ * * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
+ */
+-void insn_get_displacement(struct insn *insn)
++int insn_get_displacement(struct insn *insn)
+ {
+ insn_byte_t mod, rm, base;
++ int ret;
+
+ if (insn->displacement.got)
+- return;
+- if (!insn->sib.got)
+- insn_get_sib(insn);
++ return 0;
++
++ if (!insn->sib.got) {
++ ret = insn_get_sib(insn);
++ if (ret)
++ return ret;
++ }
++
+ if (insn->modrm.nbytes) {
+ /*
+ * Interpreting the modrm byte:
+@@ -425,9 +494,10 @@ void insn_get_displacement(struct insn *
+ }
+ out:
+ insn->displacement.got = 1;
++ return 0;
+
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+ /* Decode moffset16/32/64. Return 0 if failed */
+@@ -538,20 +608,30 @@ err_out:
+ }
+
+ /**
+- * insn_get_immediate() - Get the immediates of instruction
++ * insn_get_immediate() - Get the immediate in an instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+- * get by bit masking with ((1 << (nbytes * 8)) - 1)
++ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
++ *
++ * Returns:
++ * 0: on success
++ * < 0: on error
+ */
+-void insn_get_immediate(struct insn *insn)
++int insn_get_immediate(struct insn *insn)
+ {
++ int ret;
++
+ if (insn->immediate.got)
+- return;
+- if (!insn->displacement.got)
+- insn_get_displacement(insn);
++ return 0;
++
++ if (!insn->displacement.got) {
++ ret = insn_get_displacement(insn);
++ if (ret)
++ return ret;
++ }
+
+ if (inat_has_moffset(insn->attr)) {
+ if (!__get_moffset(insn))
+@@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *ins
+ }
+ done:
+ insn->immediate.got = 1;
++ return 0;
+
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+ /**
+@@ -615,13 +696,58 @@ err_out:
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+- */
+-void insn_get_length(struct insn *insn)
++ *
++ * Returns:
++ * - 0 on success
++ * - < 0 on error
++*/
++int insn_get_length(struct insn *insn)
+ {
++ int ret;
++
+ if (insn->length)
+- return;
+- if (!insn->immediate.got)
+- insn_get_immediate(insn);
++ return 0;
++
++ if (!insn->immediate.got) {
++ ret = insn_get_immediate(insn);
++ if (ret)
++ return ret;
++ }
++
+ insn->length = (unsigned char)((unsigned long)insn->next_byte
+ - (unsigned long)insn->kaddr);
++
++ return 0;
++}
++
++/**
++ * insn_decode() - Decode an x86 instruction
++ * @insn: &struct insn to be initialized
++ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
++ * @buf_len: length of the insn buffer at @kaddr
++ * @m: insn mode, see enum insn_mode
++ *
++ * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
++ */
++int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
++{
++ int ret;
++
++/* #define INSN_MODE_KERN -1 __ignore_sync_check__ mode is only valid in the kernel */
++
++ if (m == INSN_MODE_KERN)
++ insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
++ else
++ insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
++
++ ret = insn_get_length(insn);
++ if (ret)
++ return ret;
++
++ if (insn_complete(insn))
++ return 0;
++
++ return -EINVAL;
+ }
+--- a/tools/arch/x86/include/asm/insn.h
++++ b/tools/arch/x86/include/asm/insn.h
+@@ -87,13 +87,23 @@ struct insn {
+ #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+
+ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+-extern void insn_get_prefixes(struct insn *insn);
+-extern void insn_get_opcode(struct insn *insn);
+-extern void insn_get_modrm(struct insn *insn);
+-extern void insn_get_sib(struct insn *insn);
+-extern void insn_get_displacement(struct insn *insn);
+-extern void insn_get_immediate(struct insn *insn);
+-extern void insn_get_length(struct insn *insn);
++extern int insn_get_prefixes(struct insn *insn);
++extern int insn_get_opcode(struct insn *insn);
++extern int insn_get_modrm(struct insn *insn);
++extern int insn_get_sib(struct insn *insn);
++extern int insn_get_displacement(struct insn *insn);
++extern int insn_get_immediate(struct insn *insn);
++extern int insn_get_length(struct insn *insn);
++
++enum insn_mode {
++ INSN_MODE_32,
++ INSN_MODE_64,
++ /* Mode is determined by the current kernel build. */
++ INSN_MODE_KERN,
++ INSN_NUM_MODES,
++};
++
++extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+ /* Attribute will be determined after getting ModRM (for opcode groups) */
+ static inline void insn_get_attribute(struct insn *insn)
+--- a/tools/arch/x86/lib/insn.c
++++ b/tools/arch/x86/lib/insn.c
+@@ -10,10 +10,13 @@
+ #else
+ #include <string.h>
+ #endif
+-#include "../include/asm/inat.h" /* __ignore_sync_check__ */
+-#include "../include/asm/insn.h" /* __ignore_sync_check__ */
++#include <asm/inat.h> /* __ignore_sync_check__ */
++#include <asm/insn.h> /* __ignore_sync_check__ */
+
+-#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */
++#include <linux/errno.h>
++#include <linux/kconfig.h>
++
++#include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
+
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+ #define validate_next(t, insn, n) \
+@@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(stru
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode. No effect if @insn->prefixes.got
+ * is already set.
++ *
++ * * Returns:
++ * 0: on success
++ * < 0: on error
+ */
+-void insn_get_prefixes(struct insn *insn)
++int insn_get_prefixes(struct insn *insn)
+ {
+ struct insn_field *prefixes = &insn->prefixes;
+ insn_attr_t attr;
+@@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn
+ int i, nb;
+
+ if (prefixes->got)
+- return;
++ return 0;
+
+ insn_get_emulate_prefix(insn);
+
+@@ -217,8 +224,10 @@ vex_end:
+
+ prefixes->got = 1;
+
++ return 0;
++
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+ /**
+@@ -230,16 +239,25 @@ err_out:
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
+ * is already 1.
++ *
++ * Returns:
++ * 0: on success
++ * < 0: on error
+ */
+-void insn_get_opcode(struct insn *insn)
++int insn_get_opcode(struct insn *insn)
+ {
+ struct insn_field *opcode = &insn->opcode;
++ int pfx_id, ret;
+ insn_byte_t op;
+- int pfx_id;
++
+ if (opcode->got)
+- return;
+- if (!insn->prefixes.got)
+- insn_get_prefixes(insn);
++ return 0;
++
++ if (!insn->prefixes.got) {
++ ret = insn_get_prefixes(insn);
++ if (ret)
++ return ret;
++ }
+
+ /* Get first opcode */
+ op = get_next(insn_byte_t, insn);
+@@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn)
+ insn->attr = inat_get_avx_attribute(op, m, p);
+ if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+ (!inat_accept_vex(insn->attr) &&
+- !inat_is_group(insn->attr)))
+- insn->attr = 0; /* This instruction is bad */
+- goto end; /* VEX has only 1 byte for opcode */
++ !inat_is_group(insn->attr))) {
++ /* This instruction is bad */
++ insn->attr = 0;
++ return -EINVAL;
++ }
++ /* VEX has only 1 byte for opcode */
++ goto end;
+ }
+
+ insn->attr = inat_get_opcode_attribute(op);
+@@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn)
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+ }
+- if (inat_must_vex(insn->attr))
+- insn->attr = 0; /* This instruction is bad */
++
++ if (inat_must_vex(insn->attr)) {
++ /* This instruction is bad */
++ insn->attr = 0;
++ return -EINVAL;
++ }
+ end:
+ opcode->got = 1;
++ return 0;
+
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+ /**
+@@ -283,15 +310,25 @@ err_out:
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any. If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
++ *
++ * Returns:
++ * 0: on success
++ * < 0: on error
+ */
+-void insn_get_modrm(struct insn *insn)
++int insn_get_modrm(struct insn *insn)
+ {
+ struct insn_field *modrm = &insn->modrm;
+ insn_byte_t pfx_id, mod;
++ int ret;
++
+ if (modrm->got)
+- return;
+- if (!insn->opcode.got)
+- insn_get_opcode(insn);
++ return 0;
++
++ if (!insn->opcode.got) {
++ ret = insn_get_opcode(insn);
++ if (ret)
++ return ret;
++ }
+
+ if (inat_has_modrm(insn->attr)) {
+ mod = get_next(insn_byte_t, insn);
+@@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn)
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
+ insn->attr);
+- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+- insn->attr = 0; /* This is bad */
++ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
++ /* Bad insn */
++ insn->attr = 0;
++ return -EINVAL;
++ }
+ }
+ }
+
+ if (insn->x86_64 && inat_is_force64(insn->attr))
+ insn->opnd_bytes = 8;
++
+ modrm->got = 1;
++ return 0;
+
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+
+@@ -325,11 +367,16 @@ err_out:
+ int insn_rip_relative(struct insn *insn)
+ {
+ struct insn_field *modrm = &insn->modrm;
++ int ret;
+
+ if (!insn->x86_64)
+ return 0;
+- if (!modrm->got)
+- insn_get_modrm(insn);
++
++ if (!modrm->got) {
++ ret = insn_get_modrm(insn);
++ if (ret)
++ return 0;
++ }
+ /*
+ * For rip-relative instructions, the mod field (top 2 bits)
+ * is zero and the r/m field (bottom 3 bits) is 0x5.
+@@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn)
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
++ *
++ * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
+ */
+-void insn_get_sib(struct insn *insn)
++int insn_get_sib(struct insn *insn)
+ {
+ insn_byte_t modrm;
++ int ret;
+
+ if (insn->sib.got)
+- return;
+- if (!insn->modrm.got)
+- insn_get_modrm(insn);
++ return 0;
++
++ if (!insn->modrm.got) {
++ ret = insn_get_modrm(insn);
++ if (ret)
++ return ret;
++ }
++
+ if (insn->modrm.nbytes) {
+ modrm = (insn_byte_t)insn->modrm.value;
+ if (insn->addr_bytes != 2 &&
+@@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn)
+ }
+ insn->sib.got = 1;
+
++ return 0;
++
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+
+@@ -374,15 +433,25 @@ err_out:
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
++ *
++ * * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
+ */
+-void insn_get_displacement(struct insn *insn)
++int insn_get_displacement(struct insn *insn)
+ {
+ insn_byte_t mod, rm, base;
++ int ret;
+
+ if (insn->displacement.got)
+- return;
+- if (!insn->sib.got)
+- insn_get_sib(insn);
++ return 0;
++
++ if (!insn->sib.got) {
++ ret = insn_get_sib(insn);
++ if (ret)
++ return ret;
++ }
++
+ if (insn->modrm.nbytes) {
+ /*
+ * Interpreting the modrm byte:
+@@ -425,9 +494,10 @@ void insn_get_displacement(struct insn *
+ }
+ out:
+ insn->displacement.got = 1;
++ return 0;
+
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+ /* Decode moffset16/32/64. Return 0 if failed */
+@@ -538,20 +608,30 @@ err_out:
+ }
+
+ /**
+- * insn_get_immediate() - Get the immediates of instruction
++ * insn_get_immediate() - Get the immediate in an instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+- * get by bit masking with ((1 << (nbytes * 8)) - 1)
++ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
++ *
++ * Returns:
++ * 0: on success
++ * < 0: on error
+ */
+-void insn_get_immediate(struct insn *insn)
++int insn_get_immediate(struct insn *insn)
+ {
++ int ret;
++
+ if (insn->immediate.got)
+- return;
+- if (!insn->displacement.got)
+- insn_get_displacement(insn);
++ return 0;
++
++ if (!insn->displacement.got) {
++ ret = insn_get_displacement(insn);
++ if (ret)
++ return ret;
++ }
+
+ if (inat_has_moffset(insn->attr)) {
+ if (!__get_moffset(insn))
+@@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *ins
+ }
+ done:
+ insn->immediate.got = 1;
++ return 0;
+
+ err_out:
+- return;
++ return -ENODATA;
+ }
+
+ /**
+@@ -615,13 +696,58 @@ err_out:
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+- */
+-void insn_get_length(struct insn *insn)
++ *
++ * Returns:
++ * - 0 on success
++ * - < 0 on error
++*/
++int insn_get_length(struct insn *insn)
+ {
++ int ret;
++
+ if (insn->length)
+- return;
+- if (!insn->immediate.got)
+- insn_get_immediate(insn);
++ return 0;
++
++ if (!insn->immediate.got) {
++ ret = insn_get_immediate(insn);
++ if (ret)
++ return ret;
++ }
++
+ insn->length = (unsigned char)((unsigned long)insn->next_byte
+ - (unsigned long)insn->kaddr);
++
++ return 0;
++}
++
++/**
++ * insn_decode() - Decode an x86 instruction
++ * @insn: &struct insn to be initialized
++ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
++ * @buf_len: length of the insn buffer at @kaddr
++ * @m: insn mode, see enum insn_mode
++ *
++ * Returns:
++ * 0: if decoding succeeded
++ * < 0: otherwise.
++ */
++int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
++{
++ int ret;
++
++#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */
++
++ if (m == INSN_MODE_KERN)
++ insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
++ else
++ insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
++
++ ret = insn_get_length(insn);
++ if (ret)
++ return ret;
++
++ if (insn_complete(insn))
++ return 0;
++
++ return -EINVAL;
+ }
+--- /dev/null
++++ b/tools/include/linux/kconfig.h
+@@ -0,0 +1,73 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _TOOLS_LINUX_KCONFIG_H
++#define _TOOLS_LINUX_KCONFIG_H
++
++/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */
++
++#ifdef CONFIG_CPU_BIG_ENDIAN
++#define __BIG_ENDIAN 4321
++#else
++#define __LITTLE_ENDIAN 1234
++#endif
++
++#define __ARG_PLACEHOLDER_1 0,
++#define __take_second_arg(__ignored, val, ...) val
++
++/*
++ * The use of "&&" / "||" is limited in certain expressions.
++ * The following enable to calculate "and" / "or" with macro expansion only.
++ */
++#define __and(x, y) ___and(x, y)
++#define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y)
++#define ____and(arg1_or_junk, y) __take_second_arg(arg1_or_junk y, 0)
++
++#define __or(x, y) ___or(x, y)
++#define ___or(x, y) ____or(__ARG_PLACEHOLDER_##x, y)
++#define ____or(arg1_or_junk, y) __take_second_arg(arg1_or_junk 1, y)
++
++/*
++ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
++ * these only work with boolean and tristate options.
++ */
++
++/*
++ * Getting something that works in C and CPP for an arg that may or may
++ * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1"
++ * we match on the placeholder define, insert the "0," for arg1 and generate
++ * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one).
++ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
++ * the last step cherry picks the 2nd arg, we get a zero.
++ */
++#define __is_defined(x) ___is_defined(x)
++#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val)
++#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0)
++
++/*
++ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
++ * otherwise. For boolean options, this is equivalent to
++ * IS_ENABLED(CONFIG_FOO).
++ */
++#define IS_BUILTIN(option) __is_defined(option)
++
++/*
++ * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
++ * otherwise.
++ */
++#define IS_MODULE(option) __is_defined(option##_MODULE)
++
++/*
++ * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled
++ * code can call a function defined in code compiled based on CONFIG_FOO.
++ * This is similar to IS_ENABLED(), but returns false when invoked from
++ * built-in code when CONFIG_FOO is set to 'm'.
++ */
++#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \
++ __and(IS_MODULE(option), __is_defined(MODULE)))
++
++/*
++ * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm',
++ * 0 otherwise.
++ */
++#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
++
++#endif /* _TOOLS_LINUX_KCONFIG_H */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Thu, 19 Nov 2020 19:20:18 +0100
+Subject: x86/insn-eval: Handle return values from the decoder
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 6e8c83d2a3afbfd5ee019ec720b75a42df515caa upstream.
+
+Now that the different instruction-inspecting functions return a value,
+test that and return early from callers if error has been encountered.
+
+While at it, do not call insn_get_modrm() when calling
+insn_get_displacement() because latter will make sure to call
+insn_get_modrm() if ModRM hasn't been parsed yet.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210304174237.31945-6-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/insn-eval.c | 34 +++++++++++++++++++++-------------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/lib/insn-eval.c
++++ b/arch/x86/lib/insn-eval.c
+@@ -928,10 +928,11 @@ static int get_seg_base_limit(struct ins
+ static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs,
+ int *regoff, long *eff_addr)
+ {
+- insn_get_modrm(insn);
++ int ret;
+
+- if (!insn->modrm.nbytes)
+- return -EINVAL;
++ ret = insn_get_modrm(insn);
++ if (ret)
++ return ret;
+
+ if (X86_MODRM_MOD(insn->modrm.value) != 3)
+ return -EINVAL;
+@@ -977,14 +978,14 @@ static int get_eff_addr_modrm(struct ins
+ int *regoff, long *eff_addr)
+ {
+ long tmp;
++ int ret;
+
+ if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
+ return -EINVAL;
+
+- insn_get_modrm(insn);
+-
+- if (!insn->modrm.nbytes)
+- return -EINVAL;
++ ret = insn_get_modrm(insn);
++ if (ret)
++ return ret;
+
+ if (X86_MODRM_MOD(insn->modrm.value) > 2)
+ return -EINVAL;
+@@ -1106,18 +1107,21 @@ static int get_eff_addr_modrm_16(struct
+ * @base_offset will have a register, as an offset from the base of pt_regs,
+ * that can be used to resolve the associated segment.
+ *
+- * -EINVAL on error.
++ * Negative value on error.
+ */
+ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs,
+ int *base_offset, long *eff_addr)
+ {
+ long base, indx;
+ int indx_offset;
++ int ret;
+
+ if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
+ return -EINVAL;
+
+- insn_get_modrm(insn);
++ ret = insn_get_modrm(insn);
++ if (ret)
++ return ret;
+
+ if (!insn->modrm.nbytes)
+ return -EINVAL;
+@@ -1125,7 +1129,9 @@ static int get_eff_addr_sib(struct insn
+ if (X86_MODRM_MOD(insn->modrm.value) > 2)
+ return -EINVAL;
+
+- insn_get_sib(insn);
++ ret = insn_get_sib(insn);
++ if (ret)
++ return ret;
+
+ if (!insn->sib.nbytes)
+ return -EINVAL;
+@@ -1194,8 +1200,8 @@ static void __user *get_addr_ref_16(stru
+ short eff_addr;
+ long tmp;
+
+- insn_get_modrm(insn);
+- insn_get_displacement(insn);
++ if (insn_get_displacement(insn))
++ goto out;
+
+ if (insn->addr_bytes != 2)
+ goto out;
+@@ -1529,7 +1535,9 @@ bool insn_decode_from_regs(struct insn *
+ insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
+ insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
+
+- insn_get_length(insn);
++ if (insn_get_length(insn))
++ return false;
++
+ if (buf_size < insn->length)
+ return false;
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 2 Nov 2020 18:47:34 +0100
+Subject: x86/insn: Rename insn_decode() to insn_decode_from_regs()
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 9e761296c52dcdb1aaa151b65bd39accb05740d9 upstream.
+
+Rename insn_decode() to insn_decode_from_regs() to denote that it
+receives regs as param and uses registers from there during decoding.
+Free the former name for a more generic version of the function.
+
+No functional changes.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20210304174237.31945-2-bp@alien8.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/insn-eval.h | 4 ++--
+ arch/x86/kernel/sev-es.c | 2 +-
+ arch/x86/kernel/umip.c | 2 +-
+ arch/x86/lib/insn-eval.c | 6 +++---
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/insn-eval.h
++++ b/arch/x86/include/asm/insn-eval.h
+@@ -26,7 +26,7 @@ int insn_fetch_from_user(struct pt_regs
+ unsigned char buf[MAX_INSN_SIZE]);
+ int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE]);
+-bool insn_decode(struct insn *insn, struct pt_regs *regs,
+- unsigned char buf[MAX_INSN_SIZE], int buf_size);
++bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
++ unsigned char buf[MAX_INSN_SIZE], int buf_size);
+
+ #endif /* _ASM_X86_INSN_EVAL_H */
+--- a/arch/x86/kernel/sev-es.c
++++ b/arch/x86/kernel/sev-es.c
+@@ -236,7 +236,7 @@ static enum es_result vc_decode_insn(str
+ return ES_EXCEPTION;
+ }
+
+- if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res))
++ if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+ return ES_DECODE_FAILED;
+ } else {
+ res = vc_fetch_insn_kernel(ctxt, buffer);
+--- a/arch/x86/kernel/umip.c
++++ b/arch/x86/kernel/umip.c
+@@ -356,7 +356,7 @@ bool fixup_umip_exception(struct pt_regs
+ if (!nr_copied)
+ return false;
+
+- if (!insn_decode(&insn, regs, buf, nr_copied))
++ if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
+ return false;
+
+ umip_inst = identify_insn(&insn);
+--- a/arch/x86/lib/insn-eval.c
++++ b/arch/x86/lib/insn-eval.c
+@@ -1492,7 +1492,7 @@ int insn_fetch_from_user_inatomic(struct
+ }
+
+ /**
+- * insn_decode() - Decode an instruction
++ * insn_decode_from_regs() - Decode an instruction
+ * @insn: Structure to store decoded instruction
+ * @regs: Structure with register values as seen when entering kernel mode
+ * @buf: Buffer containing the instruction bytes
+@@ -1505,8 +1505,8 @@ int insn_fetch_from_user_inatomic(struct
+ *
+ * True if instruction was decoded, False otherwise.
+ */
+-bool insn_decode(struct insn *insn, struct pt_regs *regs,
+- unsigned char buf[MAX_INSN_SIZE], int buf_size)
++bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
++ unsigned char buf[MAX_INSN_SIZE], int buf_size)
+ {
+ int seg_defs;
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Fri, 8 Jul 2022 19:10:11 +0200
+Subject: x86/kexec: Disable RET on kexec
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 697977d8415d61f3acbc4ee6d564c9dcf0309507 upstream.
+
+All the invocations unroll to __x86_return_thunk and this file
+must be PIC independent.
+
+This fixes kexec on 64-bit AMD boxes.
+
+ [ bp: Fix 32-bit build. ]
+
+Reported-by: Edward Tran <edward.tran@oracle.com>
+Reported-by: Awais Tanveer <awais.tanveer@oracle.com>
+Suggested-by: Ankur Arora <ankur.a.arora@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/relocate_kernel_32.S | 25 +++++++++++++++++++------
+ arch/x86/kernel/relocate_kernel_64.S | 23 +++++++++++++++++------
+ 2 files changed, 36 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kernel/relocate_kernel_32.S
++++ b/arch/x86/kernel/relocate_kernel_32.S
+@@ -7,10 +7,12 @@
+ #include <linux/linkage.h>
+ #include <asm/page_types.h>
+ #include <asm/kexec.h>
++#include <asm/nospec-branch.h>
+ #include <asm/processor-flags.h>
+
+ /*
+- * Must be relocatable PIC code callable as a C function
++ * Must be relocatable PIC code callable as a C function, in particular
++ * there must be a plain RET and not jump to return thunk.
+ */
+
+ #define PTR(x) (x << 2)
+@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+ movl %edi, %eax
+ addl $(identity_mapped - relocate_kernel), %eax
+ pushl %eax
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(relocate_kernel)
+
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %ebp, %ebp
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ 1:
+ popl %edx
+ movl CP_PA_SWAP_PAGE(%edi), %esp
+ addl $PAGE_SIZE, %esp
+ 2:
++ ANNOTATE_RETPOLINE_SAFE
+ call *%edx
+
+ /* get the re-entry point of the peer system */
+@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+ movl %edi, %eax
+ addl $(virtual_mapped - relocate_kernel), %eax
+ pushl %eax
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(identity_mapped)
+
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+ popl %edi
+ popl %esi
+ popl %ebx
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(virtual_mapped)
+
+ /* Do the copies */
+@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+ popl %edi
+ popl %ebx
+ popl %ebp
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(swap_pages)
+
+ .globl kexec_control_code_size
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -13,7 +13,8 @@
+ #include <asm/unwind_hints.h>
+
+ /*
+- * Must be relocatable PIC code callable as a C function
++ * Must be relocatable PIC code callable as a C function, in particular
++ * there must be a plain RET and not jump to return thunk.
+ */
+
+ #define PTR(x) (x << 3)
+@@ -104,7 +105,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+ /* jump to identity mapped page */
+ addq $(identity_mapped - relocate_kernel), %r8
+ pushq %r8
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(relocate_kernel)
+
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -191,7 +194,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+ xorl %r14d, %r14d
+ xorl %r15d, %r15d
+
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+
+ 1:
+ popq %rdx
+@@ -210,7 +215,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+ call swap_pages
+ movq $virtual_mapped, %rax
+ pushq %rax
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(identity_mapped)
+
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -231,7 +238,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+ popq %r12
+ popq %rbp
+ popq %rbx
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(virtual_mapped)
+
+ /* Do the copies */
+@@ -288,7 +297,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+ lea PAGE_SIZE(%rax), %rsi
+ jmp 0b
+ 3:
+- RET
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(swap_pages)
+
+ .globl kexec_control_code_size
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:42 +0200
+Subject: x86/kvm: Fix SETcc emulation for return thunks
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit af2e140f34208a5dfb6b7a8ad2d56bda88f0524d upstream.
+
+Prepare the SETcc fastop stuff for when RET can be larger still.
+
+The tricky bit here is that the expressions should not only be
+constant C expressions, but also absolute GAS expressions. This means
+no ?: and 'true' is ~0.
+
+Also ensure em_setcc() has the same alignment as the actual FOP_SETCC()
+ops, this ensures there cannot be an alignment hole between em_setcc()
+and the first op.
+
+Additionally, add a .skip directive to the FOP_SETCC() macro to fill
+any remaining space with INT3 traps; however the primary purpose of
+this directive is to generate AS warnings when the remaining space
+goes negative. Which is a very good indication the alignment magic
+went side-ways.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: ignore ENDBR when computing SETCC_LENGTH]
+[cascardo: conflict fixup]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c | 26 ++++++++++++++------------
+ 1 file changed, 14 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -322,13 +322,15 @@ static int fastop(struct x86_emulate_ctx
+ #define FOP_RET(name) \
+ __FOP_RET(#name)
+
+-#define FOP_START(op) \
++#define __FOP_START(op, align) \
+ extern void em_##op(struct fastop *fake); \
+ asm(".pushsection .text, \"ax\" \n\t" \
+ ".global em_" #op " \n\t" \
+- ".align " __stringify(FASTOP_SIZE) " \n\t" \
++ ".align " __stringify(align) " \n\t" \
+ "em_" #op ":\n\t"
+
++#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
++
+ #define FOP_END \
+ ".popsection")
+
+@@ -432,15 +434,14 @@ static int fastop(struct x86_emulate_ctx
+ /*
+ * Depending on .config the SETcc functions look like:
+ *
+- * SETcc %al [3 bytes]
+- * RET [1 byte]
+- * INT3 [1 byte; CONFIG_SLS]
+- *
+- * Which gives possible sizes 4 or 5. When rounded up to the
+- * next power-of-two alignment they become 4 or 8.
++ * SETcc %al [3 bytes]
++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE]
++ * INT3 [1 byte; CONFIG_SLS]
+ */
+-#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS))
+-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS))
++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \
++ IS_ENABLED(CONFIG_SLS))
++#define SETCC_LENGTH (3 + RET_LENGTH)
++#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
+ static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+
+ #define FOP_SETCC(op) \
+@@ -448,14 +449,15 @@ static_assert(SETCC_LENGTH <= SETCC_ALIG
+ ".type " #op ", @function \n\t" \
+ #op ": \n\t" \
+ #op " %al \n\t" \
+- __FOP_RET(#op)
++ __FOP_RET(#op) \
++ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
+
+ asm(".pushsection .fixup, \"ax\"\n"
+ ".global kvm_fastop_exception \n"
+ "kvm_fastop_exception: xor %esi, %esi; " ASM_RET
+ ".popsection");
+
+-FOP_START(setcc)
++__FOP_START(setcc, SETCC_ALIGN)
+ FOP_SETCC(seto)
+ FOP_SETCC(setno)
+ FOP_SETCC(setc)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:32 +0200
+Subject: x86/kvm/vmx: Make noinstr clean
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 742ab6df974ae8384a2dd213db1a3a06cf6d8936 upstream.
+
+The recent mmio_stale_data fixes broke the noinstr constraints:
+
+ vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section
+ vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section
+
+make it all happy again.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 6 +++---
+ arch/x86/kvm/x86.c | 4 ++--
+ include/linux/kvm_host.h | 2 +-
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -380,9 +380,9 @@ static __always_inline void vmx_disable_
+ if (!vmx->disable_fb_clear)
+ return;
+
+- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
++ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
+ msr |= FB_CLEAR_DIS;
+- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ /* Cache the MSR value to avoid reading it later */
+ vmx->msr_ia32_mcu_opt_ctrl = msr;
+ }
+@@ -393,7 +393,7 @@ static __always_inline void vmx_enable_f
+ return;
+
+ vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
+- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+ }
+
+ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -11171,9 +11171,9 @@ void kvm_arch_end_assignment(struct kvm
+ }
+ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
+
+-bool kvm_arch_has_assigned_device(struct kvm *kvm)
++bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
+ {
+- return atomic_read(&kvm->arch.assigned_device_count);
++ return arch_atomic_read(&kvm->arch.assigned_device_count);
+ }
+ EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
+
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -988,7 +988,7 @@ static inline void kvm_arch_end_assignme
+ {
+ }
+
+-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
+ {
+ return false;
+ }
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:39 +0100
+Subject: x86/lib/atomic64_386_32: Rename things
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 22da5a07c75e1104caf6a42f189c97b83d070073 upstream.
+
+Principally, in order to get rid of #define RET in this code to make
+place for a new RET, but also to clarify the code, rename a bunch of
+things:
+
+ s/UNLOCK/IRQ_RESTORE/
+ s/LOCK/IRQ_SAVE/
+ s/BEGIN/BEGIN_IRQ_SAVE/
+ s/\<RET\>/RET_IRQ_RESTORE/
+ s/RET_ENDP/\tRET_IRQ_RESTORE\rENDP/
+
+which then leaves RET unused so it can be removed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134907.841623970@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/atomic64_386_32.S | 84 ++++++++++++++++++++++-------------------
+ 1 file changed, 46 insertions(+), 38 deletions(-)
+
+--- a/arch/x86/lib/atomic64_386_32.S
++++ b/arch/x86/lib/atomic64_386_32.S
+@@ -9,81 +9,83 @@
+ #include <asm/alternative.h>
+
+ /* if you want SMP support, implement these with real spinlocks */
+-.macro LOCK reg
++.macro IRQ_SAVE reg
+ pushfl
+ cli
+ .endm
+
+-.macro UNLOCK reg
++.macro IRQ_RESTORE reg
+ popfl
+ .endm
+
+-#define BEGIN(op) \
++#define BEGIN_IRQ_SAVE(op) \
+ .macro endp; \
+ SYM_FUNC_END(atomic64_##op##_386); \
+ .purgem endp; \
+ .endm; \
+ SYM_FUNC_START(atomic64_##op##_386); \
+- LOCK v;
++ IRQ_SAVE v;
+
+ #define ENDP endp
+
+-#define RET \
+- UNLOCK v; \
++#define RET_IRQ_RESTORE \
++ IRQ_RESTORE v; \
+ ret
+
+-#define RET_ENDP \
+- RET; \
+- ENDP
+-
+ #define v %ecx
+-BEGIN(read)
++BEGIN_IRQ_SAVE(read)
+ movl (v), %eax
+ movl 4(v), %edx
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(set)
++BEGIN_IRQ_SAVE(set)
+ movl %ebx, (v)
+ movl %ecx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(xchg)
++BEGIN_IRQ_SAVE(xchg)
+ movl (v), %eax
+ movl 4(v), %edx
+ movl %ebx, (v)
+ movl %ecx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %ecx
+-BEGIN(add)
++BEGIN_IRQ_SAVE(add)
+ addl %eax, (v)
+ adcl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %ecx
+-BEGIN(add_return)
++BEGIN_IRQ_SAVE(add_return)
+ addl (v), %eax
+ adcl 4(v), %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %ecx
+-BEGIN(sub)
++BEGIN_IRQ_SAVE(sub)
+ subl %eax, (v)
+ sbbl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %ecx
+-BEGIN(sub_return)
++BEGIN_IRQ_SAVE(sub_return)
+ negl %edx
+ negl %eax
+ sbbl $0, %edx
+@@ -91,47 +93,52 @@ BEGIN(sub_return)
+ adcl 4(v), %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(inc)
++BEGIN_IRQ_SAVE(inc)
+ addl $1, (v)
+ adcl $0, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(inc_return)
++BEGIN_IRQ_SAVE(inc_return)
+ movl (v), %eax
+ movl 4(v), %edx
+ addl $1, %eax
+ adcl $0, %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(dec)
++BEGIN_IRQ_SAVE(dec)
+ subl $1, (v)
+ sbbl $0, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(dec_return)
++BEGIN_IRQ_SAVE(dec_return)
+ movl (v), %eax
+ movl 4(v), %edx
+ subl $1, %eax
+ sbbl $0, %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(add_unless)
++BEGIN_IRQ_SAVE(add_unless)
+ addl %eax, %ecx
+ adcl %edx, %edi
+ addl (v), %eax
+@@ -143,7 +150,7 @@ BEGIN(add_unless)
+ movl %edx, 4(v)
+ movl $1, %eax
+ 2:
+- RET
++ RET_IRQ_RESTORE
+ 3:
+ cmpl %edx, %edi
+ jne 1b
+@@ -153,7 +160,7 @@ ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(inc_not_zero)
++BEGIN_IRQ_SAVE(inc_not_zero)
+ movl (v), %eax
+ movl 4(v), %edx
+ testl %eax, %eax
+@@ -165,7 +172,7 @@ BEGIN(inc_not_zero)
+ movl %edx, 4(v)
+ movl $1, %eax
+ 2:
+- RET
++ RET_IRQ_RESTORE
+ 3:
+ testl %edx, %edx
+ jne 1b
+@@ -174,7 +181,7 @@ ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(dec_if_positive)
++BEGIN_IRQ_SAVE(dec_if_positive)
+ movl (v), %eax
+ movl 4(v), %edx
+ subl $1, %eax
+@@ -183,5 +190,6 @@ BEGIN(dec_if_positive)
+ movl %eax, (v)
+ movl %edx, 4(v)
+ 1:
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:38 +0200
+Subject: x86,objtool: Create .return_sites
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d9e9d2300681d68a775c28de6aa6e5290ae17796 upstream.
+
+Find all the return-thunk sites and record them in a .return_sites
+section such that the kernel can undo this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict fixup because of functions added to support IBT]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch.h | 1
+ tools/objtool/arch/x86/decode.c | 5 ++
+ tools/objtool/check.c | 75 ++++++++++++++++++++++++++++++++++++++++
+ tools/objtool/elf.h | 1
+ tools/objtool/objtool.c | 1
+ tools/objtool/objtool.h | 1
+ 6 files changed, 84 insertions(+)
+
+--- a/tools/objtool/arch.h
++++ b/tools/objtool/arch.h
+@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len);
+ int arch_decode_hint_reg(u8 sp_reg, int *base);
+
+ bool arch_is_retpoline(struct symbol *sym);
++bool arch_is_rethunk(struct symbol *sym);
+
+ int arch_rewrite_retpolines(struct objtool_file *file);
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -649,3 +649,8 @@ bool arch_is_retpoline(struct symbol *sy
+ {
+ return !strncmp(sym->name, "__x86_indirect_", 15);
+ }
++
++bool arch_is_rethunk(struct symbol *sym)
++{
++ return !strcmp(sym->name, "__x86_return_thunk");
++}
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -653,6 +653,52 @@ static int create_retpoline_sites_sectio
+ return 0;
+ }
+
++static int create_return_sites_sections(struct objtool_file *file)
++{
++ struct instruction *insn;
++ struct section *sec;
++ int idx;
++
++ sec = find_section_by_name(file->elf, ".return_sites");
++ if (sec) {
++ WARN("file already has .return_sites, skipping");
++ return 0;
++ }
++
++ idx = 0;
++ list_for_each_entry(insn, &file->return_thunk_list, call_node)
++ idx++;
++
++ if (!idx)
++ return 0;
++
++ sec = elf_create_section(file->elf, ".return_sites", 0,
++ sizeof(int), idx);
++ if (!sec) {
++ WARN("elf_create_section: .return_sites");
++ return -1;
++ }
++
++ idx = 0;
++ list_for_each_entry(insn, &file->return_thunk_list, call_node) {
++
++ int *site = (int *)sec->data->d_buf + idx;
++ *site = 0;
++
++ if (elf_add_reloc_to_insn(file->elf, sec,
++ idx * sizeof(int),
++ R_X86_64_PC32,
++ insn->sec, insn->offset)) {
++ WARN("elf_add_reloc_to_insn: .return_sites");
++ return -1;
++ }
++
++ idx++;
++ }
++
++ return 0;
++}
++
+ /*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+@@ -888,6 +934,11 @@ __weak bool arch_is_retpoline(struct sym
+ return false;
+ }
+
++__weak bool arch_is_rethunk(struct symbol *sym)
++{
++ return false;
++}
++
+ #define NEGATIVE_RELOC ((void *)-1L)
+
+ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
+@@ -1029,6 +1080,19 @@ static void add_retpoline_call(struct ob
+
+ annotate_call_site(file, insn, false);
+ }
++
++static void add_return_call(struct objtool_file *file, struct instruction *insn)
++{
++ /*
++ * Return thunk tail calls are really just returns in disguise,
++ * so convert them accordingly.
++ */
++ insn->type = INSN_RETURN;
++ insn->retpoline_safe = true;
++
++ list_add_tail(&insn->call_node, &file->return_thunk_list);
++}
++
+ /*
+ * Find the destination instructions for all jumps.
+ */
+@@ -1053,6 +1117,9 @@ static int add_jump_destinations(struct
+ } else if (reloc->sym->retpoline_thunk) {
+ add_retpoline_call(file, insn);
+ continue;
++ } else if (reloc->sym->return_thunk) {
++ add_return_call(file, insn);
++ continue;
+ } else if (insn->func) {
+ /* internal or external sibling call (with reloc) */
+ add_call_dest(file, insn, reloc->sym, true);
+@@ -1842,6 +1909,9 @@ static int classify_symbols(struct objto
+ if (arch_is_retpoline(func))
+ func->retpoline_thunk = true;
+
++ if (arch_is_rethunk(func))
++ func->return_thunk = true;
++
+ if (!strcmp(func->name, "__fentry__"))
+ func->fentry = true;
+
+@@ -3235,6 +3305,11 @@ int check(struct objtool_file *file)
+ if (ret < 0)
+ goto out;
+ warnings += ret;
++
++ ret = create_return_sites_sections(file);
++ if (ret < 0)
++ goto out;
++ warnings += ret;
+ }
+
+ if (stats) {
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -58,6 +58,7 @@ struct symbol {
+ u8 uaccess_safe : 1;
+ u8 static_call_tramp : 1;
+ u8 retpoline_thunk : 1;
++ u8 return_thunk : 1;
+ u8 fentry : 1;
+ u8 kcov : 1;
+ };
+--- a/tools/objtool/objtool.c
++++ b/tools/objtool/objtool.c
+@@ -62,6 +62,7 @@ struct objtool_file *objtool_open_read(c
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+ INIT_LIST_HEAD(&file.retpoline_call_list);
++ INIT_LIST_HEAD(&file.return_thunk_list);
+ INIT_LIST_HEAD(&file.static_call_list);
+ file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+ file.ignore_unreachables = no_unreachable;
+--- a/tools/objtool/objtool.h
++++ b/tools/objtool/objtool.h
+@@ -19,6 +19,7 @@ struct objtool_file {
+ struct list_head insn_list;
+ DECLARE_HASHTABLE(insn_hash, 20);
+ struct list_head retpoline_call_list;
++ struct list_head return_thunk_list;
+ struct list_head static_call_list;
+ bool ignore_unreachables, c_file, hints, rodata;
+ };
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:40 +0100
+Subject: x86: Prepare asm files for straight-line-speculation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit f94909ceb1ed4bfdb2ada72f93236305e6d6951f upstream.
+
+Replace all ret/retq instructions with RET in preparation of making
+RET a macro. Since AS is case insensitive it's a big no-op without
+RET defined.
+
+ find arch/x86/ -name \*.S | while read file
+ do
+ sed -i 's/\<ret[q]*\>/RET/' $file
+ done
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134907.905503893@infradead.org
+[bwh: Backported to 5.10: ran the above command]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/efi_thunk_64.S | 2 -
+ arch/x86/boot/compressed/head_64.S | 4 +-
+ arch/x86/boot/compressed/mem_encrypt.S | 4 +-
+ arch/x86/crypto/aegis128-aesni-asm.S | 48 ++++++++++++------------
+ arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 2 -
+ arch/x86/crypto/aesni-intel_asm.S | 52 +++++++++++++--------------
+ arch/x86/crypto/aesni-intel_avx-x86_64.S | 40 ++++++++++----------
+ arch/x86/crypto/blake2s-core.S | 4 +-
+ arch/x86/crypto/blowfish-x86_64-asm_64.S | 12 +++---
+ arch/x86/crypto/camellia-aesni-avx-asm_64.S | 18 ++++-----
+ arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 18 ++++-----
+ arch/x86/crypto/camellia-x86_64-asm_64.S | 12 +++---
+ arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 12 +++---
+ arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 16 ++++----
+ arch/x86/crypto/chacha-avx2-x86_64.S | 6 +--
+ arch/x86/crypto/chacha-avx512vl-x86_64.S | 6 +--
+ arch/x86/crypto/chacha-ssse3-x86_64.S | 8 ++--
+ arch/x86/crypto/crc32-pclmul_asm.S | 2 -
+ arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 2 -
+ arch/x86/crypto/crct10dif-pcl-asm_64.S | 2 -
+ arch/x86/crypto/des3_ede-asm_64.S | 4 +-
+ arch/x86/crypto/ghash-clmulni-intel_asm.S | 6 +--
+ arch/x86/crypto/nh-avx2-x86_64.S | 2 -
+ arch/x86/crypto/nh-sse2-x86_64.S | 2 -
+ arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 16 ++++----
+ arch/x86/crypto/serpent-avx2-asm_64.S | 16 ++++----
+ arch/x86/crypto/serpent-sse2-i586-asm_32.S | 6 +--
+ arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 6 +--
+ arch/x86/crypto/sha1_avx2_x86_64_asm.S | 2 -
+ arch/x86/crypto/sha1_ni_asm.S | 2 -
+ arch/x86/crypto/sha1_ssse3_asm.S | 2 -
+ arch/x86/crypto/sha256-avx-asm.S | 2 -
+ arch/x86/crypto/sha256-avx2-asm.S | 2 -
+ arch/x86/crypto/sha256-ssse3-asm.S | 2 -
+ arch/x86/crypto/sha256_ni_asm.S | 2 -
+ arch/x86/crypto/sha512-avx-asm.S | 2 -
+ arch/x86/crypto/sha512-avx2-asm.S | 2 -
+ arch/x86/crypto/sha512-ssse3-asm.S | 2 -
+ arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 16 ++++----
+ arch/x86/crypto/twofish-i586-asm_32.S | 4 +-
+ arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 6 +--
+ arch/x86/crypto/twofish-x86_64-asm_64.S | 4 +-
+ arch/x86/entry/entry_32.S | 2 -
+ arch/x86/entry/entry_64.S | 12 +++---
+ arch/x86/entry/thunk_32.S | 2 -
+ arch/x86/entry/thunk_64.S | 2 -
+ arch/x86/entry/vdso/vdso32/system_call.S | 2 -
+ arch/x86/entry/vsyscall/vsyscall_emu_64.S | 6 +--
+ arch/x86/kernel/acpi/wakeup_32.S | 6 +--
+ arch/x86/kernel/ftrace_32.S | 6 +--
+ arch/x86/kernel/ftrace_64.S | 10 ++---
+ arch/x86/kernel/head_32.S | 2 -
+ arch/x86/kernel/irqflags.S | 4 +-
+ arch/x86/kernel/relocate_kernel_32.S | 10 ++---
+ arch/x86/kernel/relocate_kernel_64.S | 10 ++---
+ arch/x86/kernel/sev_verify_cbit.S | 2 -
+ arch/x86/kernel/verify_cpu.S | 4 +-
+ arch/x86/kvm/svm/vmenter.S | 2 -
+ arch/x86/kvm/vmx/vmenter.S | 14 +++----
+ arch/x86/lib/atomic64_386_32.S | 2 -
+ arch/x86/lib/atomic64_cx8_32.S | 16 ++++----
+ arch/x86/lib/checksum_32.S | 8 ++--
+ arch/x86/lib/clear_page_64.S | 6 +--
+ arch/x86/lib/cmpxchg16b_emu.S | 4 +-
+ arch/x86/lib/cmpxchg8b_emu.S | 4 +-
+ arch/x86/lib/copy_mc_64.S | 6 +--
+ arch/x86/lib/copy_page_64.S | 4 +-
+ arch/x86/lib/copy_user_64.S | 12 +++---
+ arch/x86/lib/csum-copy_64.S | 2 -
+ arch/x86/lib/getuser.S | 22 +++++------
+ arch/x86/lib/hweight.S | 6 +--
+ arch/x86/lib/iomap_copy_64.S | 2 -
+ arch/x86/lib/memcpy_64.S | 12 +++---
+ arch/x86/lib/memmove_64.S | 4 +-
+ arch/x86/lib/memset_64.S | 6 +--
+ arch/x86/lib/msr-reg.S | 4 +-
+ arch/x86/lib/putuser.S | 6 +--
+ arch/x86/lib/retpoline.S | 2 -
+ arch/x86/math-emu/div_Xsig.S | 2 -
+ arch/x86/math-emu/div_small.S | 2 -
+ arch/x86/math-emu/mul_Xsig.S | 6 +--
+ arch/x86/math-emu/polynom_Xsig.S | 2 -
+ arch/x86/math-emu/reg_norm.S | 6 +--
+ arch/x86/math-emu/reg_round.S | 2 -
+ arch/x86/math-emu/reg_u_add.S | 2 -
+ arch/x86/math-emu/reg_u_div.S | 2 -
+ arch/x86/math-emu/reg_u_mul.S | 2 -
+ arch/x86/math-emu/reg_u_sub.S | 2 -
+ arch/x86/math-emu/round_Xsig.S | 4 +-
+ arch/x86/math-emu/shr_Xsig.S | 8 ++--
+ arch/x86/math-emu/wm_shrx.S | 16 ++++----
+ arch/x86/mm/mem_encrypt_boot.S | 4 +-
+ arch/x86/platform/efi/efi_stub_32.S | 2 -
+ arch/x86/platform/efi/efi_stub_64.S | 2 -
+ arch/x86/platform/efi/efi_thunk_64.S | 2 -
+ arch/x86/platform/olpc/xo1-wakeup.S | 6 +--
+ arch/x86/power/hibernate_asm_32.S | 4 +-
+ arch/x86/power/hibernate_asm_64.S | 4 +-
+ arch/x86/um/checksum_32.S | 4 +-
+ arch/x86/um/setjmp_32.S | 2 -
+ arch/x86/um/setjmp_64.S | 2 -
+ arch/x86/xen/xen-asm.S | 14 +++----
+ arch/x86/xen/xen-head.S | 2 -
+ 103 files changed, 353 insertions(+), 353 deletions(-)
+
+--- a/arch/x86/boot/compressed/efi_thunk_64.S
++++ b/arch/x86/boot/compressed/efi_thunk_64.S
+@@ -89,7 +89,7 @@ SYM_FUNC_START(__efi64_thunk)
+
+ pop %rbx
+ pop %rbp
+- ret
++ RET
+ SYM_FUNC_END(__efi64_thunk)
+
+ .code32
+--- a/arch/x86/boot/compressed/head_64.S
++++ b/arch/x86/boot/compressed/head_64.S
+@@ -786,7 +786,7 @@ SYM_FUNC_START(efi32_pe_entry)
+ 2: popl %edi // restore callee-save registers
+ popl %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(efi32_pe_entry)
+
+ .section ".rodata"
+@@ -868,7 +868,7 @@ SYM_FUNC_START(startup32_check_sev_cbit)
+ popl %ebx
+ popl %eax
+ #endif
+- ret
++ RET
+ SYM_FUNC_END(startup32_check_sev_cbit)
+
+ /*
+--- a/arch/x86/boot/compressed/mem_encrypt.S
++++ b/arch/x86/boot/compressed/mem_encrypt.S
+@@ -58,7 +58,7 @@ SYM_FUNC_START(get_sev_encryption_bit)
+
+ #endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+- ret
++ RET
+ SYM_FUNC_END(get_sev_encryption_bit)
+
+ .code64
+@@ -99,7 +99,7 @@ SYM_FUNC_START(set_sev_encryption_mask)
+ #endif
+
+ xor %rax, %rax
+- ret
++ RET
+ SYM_FUNC_END(set_sev_encryption_mask)
+
+ .data
+--- a/arch/x86/crypto/aegis128-aesni-asm.S
++++ b/arch/x86/crypto/aegis128-aesni-asm.S
+@@ -122,7 +122,7 @@ SYM_FUNC_START_LOCAL(__load_partial)
+ pxor T0, MSG
+
+ .Lld_partial_8:
+- ret
++ RET
+ SYM_FUNC_END(__load_partial)
+
+ /*
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(__store_partial)
+ mov %r10b, (%r9)
+
+ .Lst_partial_1:
+- ret
++ RET
+ SYM_FUNC_END(__store_partial)
+
+ /*
+@@ -225,7 +225,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ini
+ movdqu STATE4, 0x40(STATEP)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_init)
+
+ /*
+@@ -337,7 +337,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out_1:
+ movdqu STATE4, 0x00(STATEP)
+@@ -346,7 +346,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out_2:
+ movdqu STATE3, 0x00(STATEP)
+@@ -355,7 +355,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out_3:
+ movdqu STATE2, 0x00(STATEP)
+@@ -364,7 +364,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out_4:
+ movdqu STATE1, 0x00(STATEP)
+@@ -373,11 +373,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_ad)
+
+ .macro encrypt_block a s0 s1 s2 s3 s4 i
+@@ -452,7 +452,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out_1:
+ movdqu STATE3, 0x00(STATEP)
+@@ -461,7 +461,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out_2:
+ movdqu STATE2, 0x00(STATEP)
+@@ -470,7 +470,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out_3:
+ movdqu STATE1, 0x00(STATEP)
+@@ -479,7 +479,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out_4:
+ movdqu STATE0, 0x00(STATEP)
+@@ -488,11 +488,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_enc)
+
+ /*
+@@ -532,7 +532,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc
+ movdqu STATE3, 0x40(STATEP)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
+
+ .macro decrypt_block a s0 s1 s2 s3 s4 i
+@@ -606,7 +606,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out_1:
+ movdqu STATE3, 0x00(STATEP)
+@@ -615,7 +615,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out_2:
+ movdqu STATE2, 0x00(STATEP)
+@@ -624,7 +624,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out_3:
+ movdqu STATE1, 0x00(STATEP)
+@@ -633,7 +633,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out_4:
+ movdqu STATE0, 0x00(STATEP)
+@@ -642,11 +642,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_dec)
+
+ /*
+@@ -696,7 +696,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec
+ movdqu STATE3, 0x40(STATEP)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
+
+ /*
+@@ -743,5 +743,5 @@ SYM_FUNC_START(crypto_aegis128_aesni_fin
+ movdqu MSG, (%rsi)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_final)
+--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
++++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+@@ -525,7 +525,7 @@ ddq_add_8:
+ /* return updated IV */
+ vpshufb xbyteswap, xcounter, xcounter
+ vmovdqu xcounter, (p_iv)
+- ret
++ RET
+ .endm
+
+ /*
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -1598,7 +1598,7 @@ SYM_FUNC_START(aesni_gcm_dec)
+ GCM_ENC_DEC dec
+ GCM_COMPLETE arg10, arg11
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_dec)
+
+
+@@ -1687,7 +1687,7 @@ SYM_FUNC_START(aesni_gcm_enc)
+
+ GCM_COMPLETE arg10, arg11
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_enc)
+
+ /*****************************************************************************
+@@ -1705,7 +1705,7 @@ SYM_FUNC_START(aesni_gcm_init)
+ FUNC_SAVE
+ GCM_INIT %arg3, %arg4,%arg5, %arg6
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_init)
+
+ /*****************************************************************************
+@@ -1720,7 +1720,7 @@ SYM_FUNC_START(aesni_gcm_enc_update)
+ FUNC_SAVE
+ GCM_ENC_DEC enc
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_enc_update)
+
+ /*****************************************************************************
+@@ -1735,7 +1735,7 @@ SYM_FUNC_START(aesni_gcm_dec_update)
+ FUNC_SAVE
+ GCM_ENC_DEC dec
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_dec_update)
+
+ /*****************************************************************************
+@@ -1750,7 +1750,7 @@ SYM_FUNC_START(aesni_gcm_finalize)
+ FUNC_SAVE
+ GCM_COMPLETE %arg3 %arg4
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_finalize)
+
+ #endif
+@@ -1766,7 +1766,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a
+ pxor %xmm1, %xmm0
+ movaps %xmm0, (TKEYP)
+ add $0x10, TKEYP
+- ret
++ RET
+ SYM_FUNC_END(_key_expansion_256a)
+ SYM_FUNC_END_ALIAS(_key_expansion_128)
+
+@@ -1791,7 +1791,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192a
+ shufps $0b01001110, %xmm2, %xmm1
+ movaps %xmm1, 0x10(TKEYP)
+ add $0x20, TKEYP
+- ret
++ RET
+ SYM_FUNC_END(_key_expansion_192a)
+
+ SYM_FUNC_START_LOCAL(_key_expansion_192b)
+@@ -1810,7 +1810,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192b
+
+ movaps %xmm0, (TKEYP)
+ add $0x10, TKEYP
+- ret
++ RET
+ SYM_FUNC_END(_key_expansion_192b)
+
+ SYM_FUNC_START_LOCAL(_key_expansion_256b)
+@@ -1822,7 +1822,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b
+ pxor %xmm1, %xmm2
+ movaps %xmm2, (TKEYP)
+ add $0x10, TKEYP
+- ret
++ RET
+ SYM_FUNC_END(_key_expansion_256b)
+
+ /*
+@@ -1937,7 +1937,7 @@ SYM_FUNC_START(aesni_set_key)
+ popl KEYP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_set_key)
+
+ /*
+@@ -1961,7 +1961,7 @@ SYM_FUNC_START(aesni_enc)
+ popl KEYP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_enc)
+
+ /*
+@@ -2018,7 +2018,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc1)
+ aesenc KEY, STATE
+ movaps 0x70(TKEYP), KEY
+ aesenclast KEY, STATE
+- ret
++ RET
+ SYM_FUNC_END(_aesni_enc1)
+
+ /*
+@@ -2126,7 +2126,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc4)
+ aesenclast KEY, STATE2
+ aesenclast KEY, STATE3
+ aesenclast KEY, STATE4
+- ret
++ RET
+ SYM_FUNC_END(_aesni_enc4)
+
+ /*
+@@ -2151,7 +2151,7 @@ SYM_FUNC_START(aesni_dec)
+ popl KEYP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_dec)
+
+ /*
+@@ -2208,7 +2208,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec1)
+ aesdec KEY, STATE
+ movaps 0x70(TKEYP), KEY
+ aesdeclast KEY, STATE
+- ret
++ RET
+ SYM_FUNC_END(_aesni_dec1)
+
+ /*
+@@ -2316,7 +2316,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec4)
+ aesdeclast KEY, STATE2
+ aesdeclast KEY, STATE3
+ aesdeclast KEY, STATE4
+- ret
++ RET
+ SYM_FUNC_END(_aesni_dec4)
+
+ /*
+@@ -2376,7 +2376,7 @@ SYM_FUNC_START(aesni_ecb_enc)
+ popl LEN
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_ecb_enc)
+
+ /*
+@@ -2437,7 +2437,7 @@ SYM_FUNC_START(aesni_ecb_dec)
+ popl LEN
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_ecb_dec)
+
+ /*
+@@ -2481,7 +2481,7 @@ SYM_FUNC_START(aesni_cbc_enc)
+ popl IVP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_cbc_enc)
+
+ /*
+@@ -2574,7 +2574,7 @@ SYM_FUNC_START(aesni_cbc_dec)
+ popl IVP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_cbc_dec)
+
+ #ifdef __x86_64__
+@@ -2602,7 +2602,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc_init)
+ mov $1, TCTR_LOW
+ movq TCTR_LOW, INC
+ movq CTR, TCTR_LOW
+- ret
++ RET
+ SYM_FUNC_END(_aesni_inc_init)
+
+ /*
+@@ -2630,7 +2630,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc)
+ .Linc_low:
+ movaps CTR, IV
+ pshufb BSWAP_MASK, IV
+- ret
++ RET
+ SYM_FUNC_END(_aesni_inc)
+
+ /*
+@@ -2693,7 +2693,7 @@ SYM_FUNC_START(aesni_ctr_enc)
+ movups IV, (IVP)
+ .Lctr_enc_just_ret:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_ctr_enc)
+
+ /*
+@@ -2778,7 +2778,7 @@ SYM_FUNC_START(aesni_xts_encrypt)
+ movups IV, (IVP)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_xts_encrypt)
+
+ /*
+@@ -2846,7 +2846,7 @@ SYM_FUNC_START(aesni_xts_decrypt)
+ movups IV, (IVP)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_xts_decrypt)
+
+ #endif
+--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
++++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
+@@ -1777,7 +1777,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen2)
+ FUNC_SAVE
+ INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_init_avx_gen2)
+
+ ###############################################################################
+@@ -1798,15 +1798,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_enc_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_enc_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2)
+
+ ###############################################################################
+@@ -1827,15 +1827,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_dec_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_dec_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2)
+
+ ###############################################################################
+@@ -1856,15 +1856,15 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_ge
+ # must be 192
+ GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_finalize:
+ GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_finalize:
+ GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
+
+ ###############################################################################
+@@ -2745,7 +2745,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen4)
+ FUNC_SAVE
+ INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_init_avx_gen4)
+
+ ###############################################################################
+@@ -2766,15 +2766,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_enc_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_enc_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4)
+
+ ###############################################################################
+@@ -2795,15 +2795,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_dec_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_dec_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4)
+
+ ###############################################################################
+@@ -2824,13 +2824,13 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_ge
+ # must be 192
+ GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_finalize4:
+ GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_finalize4:
+ GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_finalize_avx_gen4)
+--- a/arch/x86/crypto/blake2s-core.S
++++ b/arch/x86/crypto/blake2s-core.S
+@@ -171,7 +171,7 @@ SYM_FUNC_START(blake2s_compress_ssse3)
+ movdqu %xmm1,0x10(%rdi)
+ movdqu %xmm14,0x20(%rdi)
+ .Lendofloop:
+- ret
++ RET
+ SYM_FUNC_END(blake2s_compress_ssse3)
+
+ #ifdef CONFIG_AS_AVX512
+@@ -251,6 +251,6 @@ SYM_FUNC_START(blake2s_compress_avx512)
+ vmovdqu %xmm1,0x10(%rdi)
+ vmovdqu %xmm4,0x20(%rdi)
+ vzeroupper
+- retq
++ RET
+ SYM_FUNC_END(blake2s_compress_avx512)
+ #endif /* CONFIG_AS_AVX512 */
+--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
++++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
+@@ -135,10 +135,10 @@ SYM_FUNC_START(__blowfish_enc_blk)
+ jnz .L__enc_xor;
+
+ write_block();
+- ret;
++ RET;
+ .L__enc_xor:
+ xor_block();
+- ret;
++ RET;
+ SYM_FUNC_END(__blowfish_enc_blk)
+
+ SYM_FUNC_START(blowfish_dec_blk)
+@@ -170,7 +170,7 @@ SYM_FUNC_START(blowfish_dec_blk)
+
+ movq %r11, %r12;
+
+- ret;
++ RET;
+ SYM_FUNC_END(blowfish_dec_blk)
+
+ /**********************************************************************
+@@ -322,14 +322,14 @@ SYM_FUNC_START(__blowfish_enc_blk_4way)
+
+ popq %rbx;
+ popq %r12;
+- ret;
++ RET;
+
+ .L__enc_xor4:
+ xor_block4();
+
+ popq %rbx;
+ popq %r12;
+- ret;
++ RET;
+ SYM_FUNC_END(__blowfish_enc_blk_4way)
+
+ SYM_FUNC_START(blowfish_dec_blk_4way)
+@@ -364,5 +364,5 @@ SYM_FUNC_START(blowfish_dec_blk_4way)
+ popq %rbx;
+ popq %r12;
+
+- ret;
++ RET;
+ SYM_FUNC_END(blowfish_dec_blk_4way)
+--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
++++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+@@ -193,7 +193,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_
+ roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
+ %rcx, (%r9));
+- ret;
++ RET;
+ SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+
+ .align 8
+@@ -201,7 +201,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_
+ roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
+ %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
+ %rax, (%r9));
+- ret;
++ RET;
+ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+
+ /*
+@@ -787,7 +787,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk1
+ %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
+
+ FRAME_END
+- ret;
++ RET;
+
+ .align 8
+ .Lenc_max32:
+@@ -874,7 +874,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk1
+ %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
+
+ FRAME_END
+- ret;
++ RET;
+
+ .align 8
+ .Ldec_max32:
+@@ -915,7 +915,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way)
+ %xmm8, %rsi);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ecb_enc_16way)
+
+ SYM_FUNC_START(camellia_ecb_dec_16way)
+@@ -945,7 +945,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way)
+ %xmm8, %rsi);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ecb_dec_16way)
+
+ SYM_FUNC_START(camellia_cbc_dec_16way)
+@@ -996,7 +996,7 @@ SYM_FUNC_START(camellia_cbc_dec_16way)
+ %xmm8, %rsi);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_cbc_dec_16way)
+
+ #define inc_le128(x, minus_one, tmp) \
+@@ -1109,7 +1109,7 @@ SYM_FUNC_START(camellia_ctr_16way)
+ %xmm8, %rsi);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ctr_16way)
+
+ #define gf128mul_x_ble(iv, mask, tmp) \
+@@ -1253,7 +1253,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_
+ %xmm8, %rsi);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_xts_crypt_16way)
+
+ SYM_FUNC_START(camellia_xts_enc_16way)
+--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
++++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+@@ -227,7 +227,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_
+ roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
+ %rcx, (%r9));
+- ret;
++ RET;
+ SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+
+ .align 8
+@@ -235,7 +235,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_
+ roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
+ %rax, (%r9));
+- ret;
++ RET;
+ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+
+ /*
+@@ -825,7 +825,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk3
+ %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
+
+ FRAME_END
+- ret;
++ RET;
+
+ .align 8
+ .Lenc_max32:
+@@ -912,7 +912,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk3
+ %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
+
+ FRAME_END
+- ret;
++ RET;
+
+ .align 8
+ .Ldec_max32:
+@@ -957,7 +957,7 @@ SYM_FUNC_START(camellia_ecb_enc_32way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ecb_enc_32way)
+
+ SYM_FUNC_START(camellia_ecb_dec_32way)
+@@ -991,7 +991,7 @@ SYM_FUNC_START(camellia_ecb_dec_32way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ecb_dec_32way)
+
+ SYM_FUNC_START(camellia_cbc_dec_32way)
+@@ -1059,7 +1059,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_cbc_dec_32way)
+
+ #define inc_le128(x, minus_one, tmp) \
+@@ -1199,7 +1199,7 @@ SYM_FUNC_START(camellia_ctr_32way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ctr_32way)
+
+ #define gf128mul_x_ble(iv, mask, tmp) \
+@@ -1366,7 +1366,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_xts_crypt_32way)
+
+ SYM_FUNC_START(camellia_xts_enc_32way)
+--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
++++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
+@@ -213,13 +213,13 @@ SYM_FUNC_START(__camellia_enc_blk)
+ enc_outunpack(mov, RT1);
+
+ movq RR12, %r12;
+- ret;
++ RET;
+
+ .L__enc_xor:
+ enc_outunpack(xor, RT1);
+
+ movq RR12, %r12;
+- ret;
++ RET;
+ SYM_FUNC_END(__camellia_enc_blk)
+
+ SYM_FUNC_START(camellia_dec_blk)
+@@ -257,7 +257,7 @@ SYM_FUNC_START(camellia_dec_blk)
+ dec_outunpack();
+
+ movq RR12, %r12;
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_dec_blk)
+
+ /**********************************************************************
+@@ -448,14 +448,14 @@ SYM_FUNC_START(__camellia_enc_blk_2way)
+
+ movq RR12, %r12;
+ popq %rbx;
+- ret;
++ RET;
+
+ .L__enc2_xor:
+ enc_outunpack2(xor, RT2);
+
+ movq RR12, %r12;
+ popq %rbx;
+- ret;
++ RET;
+ SYM_FUNC_END(__camellia_enc_blk_2way)
+
+ SYM_FUNC_START(camellia_dec_blk_2way)
+@@ -495,5 +495,5 @@ SYM_FUNC_START(camellia_dec_blk_2way)
+
+ movq RR12, %r12;
+ movq RXOR, %rbx;
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_dec_blk_2way)
+--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+@@ -279,7 +279,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
+ outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
+ outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__cast5_enc_blk16)
+
+ .align 16
+@@ -352,7 +352,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
+ outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
+ outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
+
+- ret;
++ RET;
+
+ .L__skip_dec:
+ vpsrldq $4, RKR, RKR;
+@@ -393,7 +393,7 @@ SYM_FUNC_START(cast5_ecb_enc_16way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast5_ecb_enc_16way)
+
+ SYM_FUNC_START(cast5_ecb_dec_16way)
+@@ -431,7 +431,7 @@ SYM_FUNC_START(cast5_ecb_dec_16way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast5_ecb_dec_16way)
+
+ SYM_FUNC_START(cast5_cbc_dec_16way)
+@@ -483,7 +483,7 @@ SYM_FUNC_START(cast5_cbc_dec_16way)
+ popq %r15;
+ popq %r12;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast5_cbc_dec_16way)
+
+ SYM_FUNC_START(cast5_ctr_16way)
+@@ -559,5 +559,5 @@ SYM_FUNC_START(cast5_ctr_16way)
+ popq %r15;
+ popq %r12;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast5_ctr_16way)
+--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+@@ -291,7 +291,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
+ outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__cast6_enc_blk8)
+
+ .align 8
+@@ -338,7 +338,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
+ outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__cast6_dec_blk8)
+
+ SYM_FUNC_START(cast6_ecb_enc_8way)
+@@ -361,7 +361,7 @@ SYM_FUNC_START(cast6_ecb_enc_8way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_ecb_enc_8way)
+
+ SYM_FUNC_START(cast6_ecb_dec_8way)
+@@ -384,7 +384,7 @@ SYM_FUNC_START(cast6_ecb_dec_8way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_ecb_dec_8way)
+
+ SYM_FUNC_START(cast6_cbc_dec_8way)
+@@ -410,7 +410,7 @@ SYM_FUNC_START(cast6_cbc_dec_8way)
+ popq %r15;
+ popq %r12;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_cbc_dec_8way)
+
+ SYM_FUNC_START(cast6_ctr_8way)
+@@ -438,7 +438,7 @@ SYM_FUNC_START(cast6_ctr_8way)
+ popq %r15;
+ popq %r12;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_ctr_8way)
+
+ SYM_FUNC_START(cast6_xts_enc_8way)
+@@ -465,7 +465,7 @@ SYM_FUNC_START(cast6_xts_enc_8way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_xts_enc_8way)
+
+ SYM_FUNC_START(cast6_xts_dec_8way)
+@@ -492,5 +492,5 @@ SYM_FUNC_START(cast6_xts_dec_8way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_xts_dec_8way)
+--- a/arch/x86/crypto/chacha-avx2-x86_64.S
++++ b/arch/x86/crypto/chacha-avx2-x86_64.S
+@@ -193,7 +193,7 @@ SYM_FUNC_START(chacha_2block_xor_avx2)
+
+ .Ldone2:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart2:
+ # xor remaining bytes from partial register into output
+@@ -498,7 +498,7 @@ SYM_FUNC_START(chacha_4block_xor_avx2)
+
+ .Ldone4:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart4:
+ # xor remaining bytes from partial register into output
+@@ -992,7 +992,7 @@ SYM_FUNC_START(chacha_8block_xor_avx2)
+ .Ldone8:
+ vzeroupper
+ lea -8(%r10),%rsp
+- ret
++ RET
+
+ .Lxorpart8:
+ # xor remaining bytes from partial register into output
+--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S
++++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S
+@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_2block_xor_avx512v
+
+ .Ldone2:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart2:
+ # xor remaining bytes from partial register into output
+@@ -432,7 +432,7 @@ SYM_FUNC_START(chacha_4block_xor_avx512v
+
+ .Ldone4:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart4:
+ # xor remaining bytes from partial register into output
+@@ -812,7 +812,7 @@ SYM_FUNC_START(chacha_8block_xor_avx512v
+
+ .Ldone8:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart8:
+ # xor remaining bytes from partial register into output
+--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
++++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
+@@ -108,7 +108,7 @@ SYM_FUNC_START_LOCAL(chacha_permute)
+ sub $2,%r8d
+ jnz .Ldoubleround
+
+- ret
++ RET
+ SYM_FUNC_END(chacha_permute)
+
+ SYM_FUNC_START(chacha_block_xor_ssse3)
+@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_block_xor_ssse3)
+
+ .Ldone:
+ FRAME_END
+- ret
++ RET
+
+ .Lxorpart:
+ # xor remaining bytes from partial register into output
+@@ -217,7 +217,7 @@ SYM_FUNC_START(hchacha_block_ssse3)
+ movdqu %xmm3,0x10(%rsi)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(hchacha_block_ssse3)
+
+ SYM_FUNC_START(chacha_4block_xor_ssse3)
+@@ -762,7 +762,7 @@ SYM_FUNC_START(chacha_4block_xor_ssse3)
+
+ .Ldone4:
+ lea -8(%r10),%rsp
+- ret
++ RET
+
+ .Lxorpart4:
+ # xor remaining bytes from partial register into output
+--- a/arch/x86/crypto/crc32-pclmul_asm.S
++++ b/arch/x86/crypto/crc32-pclmul_asm.S
+@@ -236,5 +236,5 @@ fold_64:
+ pxor %xmm2, %xmm1
+ pextrd $0x01, %xmm1, %eax
+
+- ret
++ RET
+ SYM_FUNC_END(crc32_pclmul_le_16)
+--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
++++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+@@ -309,7 +309,7 @@ do_return:
+ popq %rsi
+ popq %rdi
+ popq %rbx
+- ret
++ RET
+ SYM_FUNC_END(crc_pcl)
+
+ .section .rodata, "a", @progbits
+--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
++++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
+@@ -257,7 +257,7 @@ SYM_FUNC_START(crc_t10dif_pcl)
+ # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
+
+ pextrw $0, %xmm0, %eax
+- ret
++ RET
+
+ .align 16
+ .Lless_than_256_bytes:
+--- a/arch/x86/crypto/des3_ede-asm_64.S
++++ b/arch/x86/crypto/des3_ede-asm_64.S
+@@ -243,7 +243,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk
+ popq %r12;
+ popq %rbx;
+
+- ret;
++ RET;
+ SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
+
+ /***********************************************************************
+@@ -528,7 +528,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk
+ popq %r12;
+ popq %rbx;
+
+- ret;
++ RET;
+ SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
+
+ .section .rodata, "a", @progbits
+--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+@@ -85,7 +85,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_bl
+ psrlq $1, T2
+ pxor T2, T1
+ pxor T1, DATA
+- ret
++ RET
+ SYM_FUNC_END(__clmul_gf128mul_ble)
+
+ /* void clmul_ghash_mul(char *dst, const u128 *shash) */
+@@ -99,7 +99,7 @@ SYM_FUNC_START(clmul_ghash_mul)
+ pshufb BSWAP, DATA
+ movups DATA, (%rdi)
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(clmul_ghash_mul)
+
+ /*
+@@ -128,5 +128,5 @@ SYM_FUNC_START(clmul_ghash_update)
+ movups DATA, (%rdi)
+ .Lupdate_just_ret:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(clmul_ghash_update)
+--- a/arch/x86/crypto/nh-avx2-x86_64.S
++++ b/arch/x86/crypto/nh-avx2-x86_64.S
+@@ -153,5 +153,5 @@ SYM_FUNC_START(nh_avx2)
+ vpaddq T1, T0, T0
+ vpaddq T4, T0, T0
+ vmovdqu T0, (HASH)
+- ret
++ RET
+ SYM_FUNC_END(nh_avx2)
+--- a/arch/x86/crypto/nh-sse2-x86_64.S
++++ b/arch/x86/crypto/nh-sse2-x86_64.S
+@@ -119,5 +119,5 @@ SYM_FUNC_START(nh_sse2)
+ paddq PASS2_SUMS, T1
+ movdqu T0, 0x00(HASH)
+ movdqu T1, 0x10(HASH)
+- ret
++ RET
+ SYM_FUNC_END(nh_sse2)
+--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+@@ -605,7 +605,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_
+ write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_enc_blk8_avx)
+
+ .align 8
+@@ -659,7 +659,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_
+ write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+ write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_dec_blk8_avx)
+
+ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
+@@ -677,7 +677,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
+ store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ecb_enc_8way_avx)
+
+ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
+@@ -695,7 +695,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
+ store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ecb_dec_8way_avx)
+
+ SYM_FUNC_START(serpent_cbc_dec_8way_avx)
+@@ -713,7 +713,7 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx)
+ store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_cbc_dec_8way_avx)
+
+ SYM_FUNC_START(serpent_ctr_8way_avx)
+@@ -733,7 +733,7 @@ SYM_FUNC_START(serpent_ctr_8way_avx)
+ store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ctr_8way_avx)
+
+ SYM_FUNC_START(serpent_xts_enc_8way_avx)
+@@ -755,7 +755,7 @@ SYM_FUNC_START(serpent_xts_enc_8way_avx)
+ store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_xts_enc_8way_avx)
+
+ SYM_FUNC_START(serpent_xts_dec_8way_avx)
+@@ -777,5 +777,5 @@ SYM_FUNC_START(serpent_xts_dec_8way_avx)
+ store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_xts_dec_8way_avx)
+--- a/arch/x86/crypto/serpent-avx2-asm_64.S
++++ b/arch/x86/crypto/serpent-avx2-asm_64.S
+@@ -611,7 +611,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16
+ write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_enc_blk16)
+
+ .align 8
+@@ -665,7 +665,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk16
+ write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+ write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_dec_blk16)
+
+ SYM_FUNC_START(serpent_ecb_enc_16way)
+@@ -687,7 +687,7 @@ SYM_FUNC_START(serpent_ecb_enc_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ecb_enc_16way)
+
+ SYM_FUNC_START(serpent_ecb_dec_16way)
+@@ -709,7 +709,7 @@ SYM_FUNC_START(serpent_ecb_dec_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ecb_dec_16way)
+
+ SYM_FUNC_START(serpent_cbc_dec_16way)
+@@ -732,7 +732,7 @@ SYM_FUNC_START(serpent_cbc_dec_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_cbc_dec_16way)
+
+ SYM_FUNC_START(serpent_ctr_16way)
+@@ -757,7 +757,7 @@ SYM_FUNC_START(serpent_ctr_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ctr_16way)
+
+ SYM_FUNC_START(serpent_xts_enc_16way)
+@@ -783,7 +783,7 @@ SYM_FUNC_START(serpent_xts_enc_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_xts_enc_16way)
+
+ SYM_FUNC_START(serpent_xts_dec_16way)
+@@ -809,5 +809,5 @@ SYM_FUNC_START(serpent_xts_dec_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_xts_dec_16way)
+--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
++++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+@@ -553,12 +553,12 @@ SYM_FUNC_START(__serpent_enc_blk_4way)
+
+ write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
+
+- ret;
++ RET;
+
+ .L__enc_xor4:
+ xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_enc_blk_4way)
+
+ SYM_FUNC_START(serpent_dec_blk_4way)
+@@ -612,5 +612,5 @@ SYM_FUNC_START(serpent_dec_blk_4way)
+ movl arg_dst(%esp), %eax;
+ write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
+
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_dec_blk_4way)
+--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
++++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+@@ -675,13 +675,13 @@ SYM_FUNC_START(__serpent_enc_blk_8way)
+ write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+
+ .L__enc_xor8:
+ xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_enc_blk_8way)
+
+ SYM_FUNC_START(serpent_dec_blk_8way)
+@@ -735,5 +735,5 @@ SYM_FUNC_START(serpent_dec_blk_8way)
+ write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+ write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_dec_blk_8way)
+--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
++++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+@@ -674,7 +674,7 @@ _loop3:
+ pop %r12
+ pop %rbx
+
+- ret
++ RET
+
+ SYM_FUNC_END(\name)
+ .endm
+--- a/arch/x86/crypto/sha1_ni_asm.S
++++ b/arch/x86/crypto/sha1_ni_asm.S
+@@ -290,7 +290,7 @@ SYM_FUNC_START(sha1_ni_transform)
+ .Ldone_hash:
+ mov RSPSAVE, %rsp
+
+- ret
++ RET
+ SYM_FUNC_END(sha1_ni_transform)
+
+ .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
+--- a/arch/x86/crypto/sha1_ssse3_asm.S
++++ b/arch/x86/crypto/sha1_ssse3_asm.S
+@@ -99,7 +99,7 @@
+ pop %rbp
+ pop %r12
+ pop %rbx
+- ret
++ RET
+
+ SYM_FUNC_END(\name)
+ .endm
+--- a/arch/x86/crypto/sha256-avx-asm.S
++++ b/arch/x86/crypto/sha256-avx-asm.S
+@@ -458,7 +458,7 @@ done_hash:
+ popq %r13
+ popq %r12
+ popq %rbx
+- ret
++ RET
+ SYM_FUNC_END(sha256_transform_avx)
+
+ .section .rodata.cst256.K256, "aM", @progbits, 256
+--- a/arch/x86/crypto/sha256-avx2-asm.S
++++ b/arch/x86/crypto/sha256-avx2-asm.S
+@@ -711,7 +711,7 @@ done_hash:
+ popq %r13
+ popq %r12
+ popq %rbx
+- ret
++ RET
+ SYM_FUNC_END(sha256_transform_rorx)
+
+ .section .rodata.cst512.K256, "aM", @progbits, 512
+--- a/arch/x86/crypto/sha256-ssse3-asm.S
++++ b/arch/x86/crypto/sha256-ssse3-asm.S
+@@ -472,7 +472,7 @@ done_hash:
+ popq %r12
+ popq %rbx
+
+- ret
++ RET
+ SYM_FUNC_END(sha256_transform_ssse3)
+
+ .section .rodata.cst256.K256, "aM", @progbits, 256
+--- a/arch/x86/crypto/sha256_ni_asm.S
++++ b/arch/x86/crypto/sha256_ni_asm.S
+@@ -326,7 +326,7 @@ SYM_FUNC_START(sha256_ni_transform)
+
+ .Ldone_hash:
+
+- ret
++ RET
+ SYM_FUNC_END(sha256_ni_transform)
+
+ .section .rodata.cst256.K256, "aM", @progbits, 256
+--- a/arch/x86/crypto/sha512-avx-asm.S
++++ b/arch/x86/crypto/sha512-avx-asm.S
+@@ -364,7 +364,7 @@ updateblock:
+ mov frame_RSPSAVE(%rsp), %rsp
+
+ nowork:
+- ret
++ RET
+ SYM_FUNC_END(sha512_transform_avx)
+
+ ########################################################################
+--- a/arch/x86/crypto/sha512-avx2-asm.S
++++ b/arch/x86/crypto/sha512-avx2-asm.S
+@@ -681,7 +681,7 @@ done_hash:
+
+ # Restore Stack Pointer
+ mov frame_RSPSAVE(%rsp), %rsp
+- ret
++ RET
+ SYM_FUNC_END(sha512_transform_rorx)
+
+ ########################################################################
+--- a/arch/x86/crypto/sha512-ssse3-asm.S
++++ b/arch/x86/crypto/sha512-ssse3-asm.S
+@@ -366,7 +366,7 @@ updateblock:
+ mov frame_RSPSAVE(%rsp), %rsp
+
+ nowork:
+- ret
++ RET
+ SYM_FUNC_END(sha512_transform_ssse3)
+
+ ########################################################################
+--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+@@ -272,7 +272,7 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
+ outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+ outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__twofish_enc_blk8)
+
+ .align 8
+@@ -312,7 +312,7 @@ SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
+ outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__twofish_dec_blk8)
+
+ SYM_FUNC_START(twofish_ecb_enc_8way)
+@@ -332,7 +332,7 @@ SYM_FUNC_START(twofish_ecb_enc_8way)
+ store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_ecb_enc_8way)
+
+ SYM_FUNC_START(twofish_ecb_dec_8way)
+@@ -352,7 +352,7 @@ SYM_FUNC_START(twofish_ecb_dec_8way)
+ store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_ecb_dec_8way)
+
+ SYM_FUNC_START(twofish_cbc_dec_8way)
+@@ -377,7 +377,7 @@ SYM_FUNC_START(twofish_cbc_dec_8way)
+ popq %r12;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_cbc_dec_8way)
+
+ SYM_FUNC_START(twofish_ctr_8way)
+@@ -404,7 +404,7 @@ SYM_FUNC_START(twofish_ctr_8way)
+ popq %r12;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_ctr_8way)
+
+ SYM_FUNC_START(twofish_xts_enc_8way)
+@@ -428,7 +428,7 @@ SYM_FUNC_START(twofish_xts_enc_8way)
+ store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_xts_enc_8way)
+
+ SYM_FUNC_START(twofish_xts_dec_8way)
+@@ -452,5 +452,5 @@ SYM_FUNC_START(twofish_xts_dec_8way)
+ store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_xts_dec_8way)
+--- a/arch/x86/crypto/twofish-i586-asm_32.S
++++ b/arch/x86/crypto/twofish-i586-asm_32.S
+@@ -260,7 +260,7 @@ SYM_FUNC_START(twofish_enc_blk)
+ pop %ebx
+ pop %ebp
+ mov $1, %eax
+- ret
++ RET
+ SYM_FUNC_END(twofish_enc_blk)
+
+ SYM_FUNC_START(twofish_dec_blk)
+@@ -317,5 +317,5 @@ SYM_FUNC_START(twofish_dec_blk)
+ pop %ebx
+ pop %ebp
+ mov $1, %eax
+- ret
++ RET
+ SYM_FUNC_END(twofish_dec_blk)
+--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
++++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+@@ -258,7 +258,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
+ popq %rbx;
+ popq %r12;
+ popq %r13;
+- ret;
++ RET;
+
+ .L__enc_xor3:
+ outunpack_enc3(xor);
+@@ -266,7 +266,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
+ popq %rbx;
+ popq %r12;
+ popq %r13;
+- ret;
++ RET;
+ SYM_FUNC_END(__twofish_enc_blk_3way)
+
+ SYM_FUNC_START(twofish_dec_blk_3way)
+@@ -301,5 +301,5 @@ SYM_FUNC_START(twofish_dec_blk_3way)
+ popq %rbx;
+ popq %r12;
+ popq %r13;
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_dec_blk_3way)
+--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
++++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
+@@ -252,7 +252,7 @@ SYM_FUNC_START(twofish_enc_blk)
+
+ popq R1
+ movl $1,%eax
+- ret
++ RET
+ SYM_FUNC_END(twofish_enc_blk)
+
+ SYM_FUNC_START(twofish_dec_blk)
+@@ -304,5 +304,5 @@ SYM_FUNC_START(twofish_dec_blk)
+
+ popq R1
+ movl $1,%eax
+- ret
++ RET
+ SYM_FUNC_END(twofish_dec_blk)
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -821,7 +821,7 @@ SYM_FUNC_START(schedule_tail_wrapper)
+ popl %eax
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(schedule_tail_wrapper)
+ .popsection
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -740,7 +740,7 @@ SYM_FUNC_START(asm_load_gs_index)
+ 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
+ swapgs
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(asm_load_gs_index)
+ EXPORT_SYMBOL(asm_load_gs_index)
+
+@@ -799,7 +799,7 @@ SYM_INNER_LABEL(asm_call_irq_on_stack, S
+
+ /* Restore the previous stack pointer from RBP. */
+ leaveq
+- ret
++ RET
+ SYM_FUNC_END(asm_call_on_stack)
+
+ #ifdef CONFIG_XEN_PV
+@@ -932,7 +932,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ * is needed here.
+ */
+ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
+- ret
++ RET
+
+ .Lparanoid_entry_checkgs:
+ /* EBX = 1 -> kernel GSBASE active, no restore required */
+@@ -953,7 +953,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ .Lparanoid_kernel_gsbase:
+
+ FENCE_SWAPGS_KERNEL_ENTRY
+- ret
++ RET
+ SYM_CODE_END(paranoid_entry)
+
+ /*
+@@ -1032,7 +1032,7 @@ SYM_CODE_START_LOCAL(error_entry)
+ movq %rax, %rsp /* switch stack */
+ ENCODE_FRAME_POINTER
+ pushq %r12
+- ret
++ RET
+
+ /*
+ * There are two places in the kernel that can potentially fault with
+@@ -1063,7 +1063,7 @@ SYM_CODE_START_LOCAL(error_entry)
+ */
+ .Lerror_entry_done_lfence:
+ FENCE_SWAPGS_KERNEL_ENTRY
+- ret
++ RET
+
+ .Lbstep_iret:
+ /* Fix truncated RIP */
+--- a/arch/x86/entry/thunk_32.S
++++ b/arch/x86/entry/thunk_32.S
+@@ -24,7 +24,7 @@ SYM_CODE_START_NOALIGN(\name)
+ popl %edx
+ popl %ecx
+ popl %eax
+- ret
++ RET
+ _ASM_NOKPROBE(\name)
+ SYM_CODE_END(\name)
+ .endm
+--- a/arch/x86/entry/thunk_64.S
++++ b/arch/x86/entry/thunk_64.S
+@@ -55,7 +55,7 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_res
+ popq %rsi
+ popq %rdi
+ popq %rbp
+- ret
++ RET
+ _ASM_NOKPROBE(__thunk_restore)
+ SYM_CODE_END(__thunk_restore)
+ #endif
+--- a/arch/x86/entry/vdso/vdso32/system_call.S
++++ b/arch/x86/entry/vdso/vdso32/system_call.S
+@@ -78,7 +78,7 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L
+ popl %ecx
+ CFI_RESTORE ecx
+ CFI_ADJUST_CFA_OFFSET -4
+- ret
++ RET
+ CFI_ENDPROC
+
+ .size __kernel_vsyscall,.-__kernel_vsyscall
+--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+@@ -19,17 +19,17 @@ __vsyscall_page:
+
+ mov $__NR_gettimeofday, %rax
+ syscall
+- ret
++ RET
+
+ .balign 1024, 0xcc
+ mov $__NR_time, %rax
+ syscall
+- ret
++ RET
+
+ .balign 1024, 0xcc
+ mov $__NR_getcpu, %rax
+ syscall
+- ret
++ RET
+
+ .balign 4096, 0xcc
+
+--- a/arch/x86/kernel/acpi/wakeup_32.S
++++ b/arch/x86/kernel/acpi/wakeup_32.S
+@@ -60,7 +60,7 @@ save_registers:
+ popl saved_context_eflags
+
+ movl $ret_point, saved_eip
+- ret
++ RET
+
+
+ restore_registers:
+@@ -70,7 +70,7 @@ restore_registers:
+ movl saved_context_edi, %edi
+ pushl saved_context_eflags
+ popfl
+- ret
++ RET
+
+ SYM_CODE_START(do_suspend_lowlevel)
+ call save_processor_state
+@@ -86,7 +86,7 @@ SYM_CODE_START(do_suspend_lowlevel)
+ ret_point:
+ call restore_registers
+ call restore_processor_state
+- ret
++ RET
+ SYM_CODE_END(do_suspend_lowlevel)
+
+ .data
+--- a/arch/x86/kernel/ftrace_32.S
++++ b/arch/x86/kernel/ftrace_32.S
+@@ -19,7 +19,7 @@
+ #endif
+
+ SYM_FUNC_START(__fentry__)
+- ret
++ RET
+ SYM_FUNC_END(__fentry__)
+ EXPORT_SYMBOL(__fentry__)
+
+@@ -84,7 +84,7 @@ ftrace_graph_call:
+
+ /* This is weak to keep gas from relaxing the jumps */
+ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+- ret
++ RET
+ SYM_CODE_END(ftrace_caller)
+
+ SYM_CODE_START(ftrace_regs_caller)
+@@ -177,7 +177,7 @@ SYM_CODE_START(ftrace_graph_caller)
+ popl %edx
+ popl %ecx
+ popl %eax
+- ret
++ RET
+ SYM_CODE_END(ftrace_graph_caller)
+
+ .globl return_to_handler
+--- a/arch/x86/kernel/ftrace_64.S
++++ b/arch/x86/kernel/ftrace_64.S
+@@ -132,7 +132,7 @@
+ #ifdef CONFIG_DYNAMIC_FTRACE
+
+ SYM_FUNC_START(__fentry__)
+- retq
++ RET
+ SYM_FUNC_END(__fentry__)
+ EXPORT_SYMBOL(__fentry__)
+
+@@ -170,10 +170,10 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L
+
+ /*
+ * This is weak to keep gas from relaxing the jumps.
+- * It is also used to copy the retq for trampolines.
++ * It is also used to copy the RET for trampolines.
+ */
+ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+- retq
++ RET
+ SYM_FUNC_END(ftrace_epilogue)
+
+ SYM_FUNC_START(ftrace_regs_caller)
+@@ -287,7 +287,7 @@ fgraph_trace:
+ #endif
+
+ SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL)
+- retq
++ RET
+
+ trace:
+ /* save_mcount_regs fills in first two parameters */
+@@ -319,7 +319,7 @@ SYM_FUNC_START(ftrace_graph_caller)
+
+ restore_mcount_regs
+
+- retq
++ RET
+ SYM_FUNC_END(ftrace_graph_caller)
+
+ SYM_CODE_START(return_to_handler)
+--- a/arch/x86/kernel/head_32.S
++++ b/arch/x86/kernel/head_32.S
+@@ -354,7 +354,7 @@ setup_once:
+ #endif
+
+ andl $0,setup_once_ref /* Once is enough, thanks */
+- ret
++ RET
+
+ SYM_FUNC_START(early_idt_handler_array)
+ # 36(%esp) %eflags
+--- a/arch/x86/kernel/irqflags.S
++++ b/arch/x86/kernel/irqflags.S
+@@ -10,7 +10,7 @@
+ SYM_FUNC_START(native_save_fl)
+ pushf
+ pop %_ASM_AX
+- ret
++ RET
+ SYM_FUNC_END(native_save_fl)
+ EXPORT_SYMBOL(native_save_fl)
+
+@@ -21,6 +21,6 @@ EXPORT_SYMBOL(native_save_fl)
+ SYM_FUNC_START(native_restore_fl)
+ push %_ASM_ARG1
+ popf
+- ret
++ RET
+ SYM_FUNC_END(native_restore_fl)
+ EXPORT_SYMBOL(native_restore_fl)
+--- a/arch/x86/kernel/relocate_kernel_32.S
++++ b/arch/x86/kernel/relocate_kernel_32.S
+@@ -91,7 +91,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+ movl %edi, %eax
+ addl $(identity_mapped - relocate_kernel), %eax
+ pushl %eax
+- ret
++ RET
+ SYM_CODE_END(relocate_kernel)
+
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -159,7 +159,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %ebp, %ebp
+- ret
++ RET
+ 1:
+ popl %edx
+ movl CP_PA_SWAP_PAGE(%edi), %esp
+@@ -190,7 +190,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+ movl %edi, %eax
+ addl $(virtual_mapped - relocate_kernel), %eax
+ pushl %eax
+- ret
++ RET
+ SYM_CODE_END(identity_mapped)
+
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -208,7 +208,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+ popl %edi
+ popl %esi
+ popl %ebx
+- ret
++ RET
+ SYM_CODE_END(virtual_mapped)
+
+ /* Do the copies */
+@@ -271,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+ popl %edi
+ popl %ebx
+ popl %ebp
+- ret
++ RET
+ SYM_CODE_END(swap_pages)
+
+ .globl kexec_control_code_size
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -104,7 +104,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+ /* jump to identity mapped page */
+ addq $(identity_mapped - relocate_kernel), %r8
+ pushq %r8
+- ret
++ RET
+ SYM_CODE_END(relocate_kernel)
+
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -191,7 +191,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+ xorl %r14d, %r14d
+ xorl %r15d, %r15d
+
+- ret
++ RET
+
+ 1:
+ popq %rdx
+@@ -210,7 +210,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_ma
+ call swap_pages
+ movq $virtual_mapped, %rax
+ pushq %rax
+- ret
++ RET
+ SYM_CODE_END(identity_mapped)
+
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -231,7 +231,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+ popq %r12
+ popq %rbp
+ popq %rbx
+- ret
++ RET
+ SYM_CODE_END(virtual_mapped)
+
+ /* Do the copies */
+@@ -288,7 +288,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+ lea PAGE_SIZE(%rax), %rsi
+ jmp 0b
+ 3:
+- ret
++ RET
+ SYM_CODE_END(swap_pages)
+
+ .globl kexec_control_code_size
+--- a/arch/x86/kernel/sev_verify_cbit.S
++++ b/arch/x86/kernel/sev_verify_cbit.S
+@@ -85,5 +85,5 @@ SYM_FUNC_START(sev_verify_cbit)
+ #endif
+ /* Return page-table pointer */
+ movq %rdi, %rax
+- ret
++ RET
+ SYM_FUNC_END(sev_verify_cbit)
+--- a/arch/x86/kernel/verify_cpu.S
++++ b/arch/x86/kernel/verify_cpu.S
+@@ -132,9 +132,9 @@ SYM_FUNC_START_LOCAL(verify_cpu)
+ .Lverify_cpu_no_longmode:
+ popf # Restore caller passed flags
+ movl $1,%eax
+- ret
++ RET
+ .Lverify_cpu_sse_ok:
+ popf # Restore caller passed flags
+ xorl %eax, %eax
+- ret
++ RET
+ SYM_FUNC_END(verify_cpu)
+--- a/arch/x86/kvm/svm/vmenter.S
++++ b/arch/x86/kvm/svm/vmenter.S
+@@ -166,5 +166,5 @@ SYM_FUNC_START(__svm_vcpu_run)
+ pop %edi
+ #endif
+ pop %_ASM_BP
+- ret
++ RET
+ SYM_FUNC_END(__svm_vcpu_run)
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -49,14 +49,14 @@ SYM_FUNC_START_LOCAL(vmx_vmenter)
+ je 2f
+
+ 1: vmresume
+- ret
++ RET
+
+ 2: vmlaunch
+- ret
++ RET
+
+ 3: cmpb $0, kvm_rebooting
+ je 4f
+- ret
++ RET
+ 4: ud2
+
+ _ASM_EXTABLE(1b, 3b)
+@@ -89,7 +89,7 @@ SYM_FUNC_START(vmx_vmexit)
+ pop %_ASM_AX
+ .Lvmexit_skip_rsb:
+ #endif
+- ret
++ RET
+ SYM_FUNC_END(vmx_vmexit)
+
+ /**
+@@ -228,7 +228,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ pop %edi
+ #endif
+ pop %_ASM_BP
+- ret
++ RET
+
+ /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
+ 2: mov $1, %eax
+@@ -293,7 +293,7 @@ SYM_FUNC_START(vmread_error_trampoline)
+ pop %_ASM_AX
+ pop %_ASM_BP
+
+- ret
++ RET
+ SYM_FUNC_END(vmread_error_trampoline)
+
+ SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
+@@ -326,5 +326,5 @@ SYM_FUNC_START(vmx_do_interrupt_nmi_irqo
+ */
+ mov %_ASM_BP, %_ASM_SP
+ pop %_ASM_BP
+- ret
++ RET
+ SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
+--- a/arch/x86/lib/atomic64_386_32.S
++++ b/arch/x86/lib/atomic64_386_32.S
+@@ -30,7 +30,7 @@ SYM_FUNC_START(atomic64_##op##_386); \
+
+ #define RET_IRQ_RESTORE \
+ IRQ_RESTORE v; \
+- ret
++ RET
+
+ #define v %ecx
+ BEGIN_IRQ_SAVE(read)
+--- a/arch/x86/lib/atomic64_cx8_32.S
++++ b/arch/x86/lib/atomic64_cx8_32.S
+@@ -18,7 +18,7 @@
+
+ SYM_FUNC_START(atomic64_read_cx8)
+ read64 %ecx
+- ret
++ RET
+ SYM_FUNC_END(atomic64_read_cx8)
+
+ SYM_FUNC_START(atomic64_set_cx8)
+@@ -28,7 +28,7 @@ SYM_FUNC_START(atomic64_set_cx8)
+ cmpxchg8b (%esi)
+ jne 1b
+
+- ret
++ RET
+ SYM_FUNC_END(atomic64_set_cx8)
+
+ SYM_FUNC_START(atomic64_xchg_cx8)
+@@ -37,7 +37,7 @@ SYM_FUNC_START(atomic64_xchg_cx8)
+ cmpxchg8b (%esi)
+ jne 1b
+
+- ret
++ RET
+ SYM_FUNC_END(atomic64_xchg_cx8)
+
+ .macro addsub_return func ins insc
+@@ -68,7 +68,7 @@ SYM_FUNC_START(atomic64_\func\()_return_
+ popl %esi
+ popl %ebx
+ popl %ebp
+- ret
++ RET
+ SYM_FUNC_END(atomic64_\func\()_return_cx8)
+ .endm
+
+@@ -93,7 +93,7 @@ SYM_FUNC_START(atomic64_\func\()_return_
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+- ret
++ RET
+ SYM_FUNC_END(atomic64_\func\()_return_cx8)
+ .endm
+
+@@ -118,7 +118,7 @@ SYM_FUNC_START(atomic64_dec_if_positive_
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+- ret
++ RET
+ SYM_FUNC_END(atomic64_dec_if_positive_cx8)
+
+ SYM_FUNC_START(atomic64_add_unless_cx8)
+@@ -149,7 +149,7 @@ SYM_FUNC_START(atomic64_add_unless_cx8)
+ addl $8, %esp
+ popl %ebx
+ popl %ebp
+- ret
++ RET
+ 4:
+ cmpl %edx, 4(%esp)
+ jne 2b
+@@ -176,5 +176,5 @@ SYM_FUNC_START(atomic64_inc_not_zero_cx8
+ movl $1, %eax
+ 3:
+ popl %ebx
+- ret
++ RET
+ SYM_FUNC_END(atomic64_inc_not_zero_cx8)
+--- a/arch/x86/lib/checksum_32.S
++++ b/arch/x86/lib/checksum_32.S
+@@ -127,7 +127,7 @@ SYM_FUNC_START(csum_partial)
+ 8:
+ popl %ebx
+ popl %esi
+- ret
++ RET
+ SYM_FUNC_END(csum_partial)
+
+ #else
+@@ -245,7 +245,7 @@ SYM_FUNC_START(csum_partial)
+ 90:
+ popl %ebx
+ popl %esi
+- ret
++ RET
+ SYM_FUNC_END(csum_partial)
+
+ #endif
+@@ -371,7 +371,7 @@ EXC( movb %cl, (%edi) )
+ popl %esi
+ popl %edi
+ popl %ecx # equivalent to addl $4,%esp
+- ret
++ RET
+ SYM_FUNC_END(csum_partial_copy_generic)
+
+ #else
+@@ -447,7 +447,7 @@ EXC( movb %dl, (%edi) )
+ popl %esi
+ popl %edi
+ popl %ebx
+- ret
++ RET
+ SYM_FUNC_END(csum_partial_copy_generic)
+
+ #undef ROUND
+--- a/arch/x86/lib/clear_page_64.S
++++ b/arch/x86/lib/clear_page_64.S
+@@ -17,7 +17,7 @@ SYM_FUNC_START(clear_page_rep)
+ movl $4096/8,%ecx
+ xorl %eax,%eax
+ rep stosq
+- ret
++ RET
+ SYM_FUNC_END(clear_page_rep)
+ EXPORT_SYMBOL_GPL(clear_page_rep)
+
+@@ -39,7 +39,7 @@ SYM_FUNC_START(clear_page_orig)
+ leaq 64(%rdi),%rdi
+ jnz .Lloop
+ nop
+- ret
++ RET
+ SYM_FUNC_END(clear_page_orig)
+ EXPORT_SYMBOL_GPL(clear_page_orig)
+
+@@ -47,6 +47,6 @@ SYM_FUNC_START(clear_page_erms)
+ movl $4096,%ecx
+ xorl %eax,%eax
+ rep stosb
+- ret
++ RET
+ SYM_FUNC_END(clear_page_erms)
+ EXPORT_SYMBOL_GPL(clear_page_erms)
+--- a/arch/x86/lib/cmpxchg16b_emu.S
++++ b/arch/x86/lib/cmpxchg16b_emu.S
+@@ -37,11 +37,11 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
+
+ popfq
+ mov $1, %al
+- ret
++ RET
+
+ .Lnot_same:
+ popfq
+ xor %al,%al
+- ret
++ RET
+
+ SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
+--- a/arch/x86/lib/cmpxchg8b_emu.S
++++ b/arch/x86/lib/cmpxchg8b_emu.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
+ movl %ecx, 4(%esi)
+
+ popfl
+- ret
++ RET
+
+ .Lnot_same:
+ movl (%esi), %eax
+@@ -40,7 +40,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
+ movl 4(%esi), %edx
+
+ popfl
+- ret
++ RET
+
+ SYM_FUNC_END(cmpxchg8b_emu)
+ EXPORT_SYMBOL(cmpxchg8b_emu)
+--- a/arch/x86/lib/copy_mc_64.S
++++ b/arch/x86/lib/copy_mc_64.S
+@@ -86,7 +86,7 @@ SYM_FUNC_START(copy_mc_fragile)
+ .L_done_memcpy_trap:
+ xorl %eax, %eax
+ .L_done:
+- ret
++ RET
+ SYM_FUNC_END(copy_mc_fragile)
+ EXPORT_SYMBOL_GPL(copy_mc_fragile)
+
+@@ -142,7 +142,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_str
+ rep movsb
+ /* Copy successful. Return zero */
+ xorl %eax, %eax
+- ret
++ RET
+ SYM_FUNC_END(copy_mc_enhanced_fast_string)
+
+ .section .fixup, "ax"
+@@ -155,7 +155,7 @@ SYM_FUNC_END(copy_mc_enhanced_fast_strin
+ * user-copy routines.
+ */
+ movq %rcx, %rax
+- ret
++ RET
+
+ .previous
+
+--- a/arch/x86/lib/copy_page_64.S
++++ b/arch/x86/lib/copy_page_64.S
+@@ -17,7 +17,7 @@ SYM_FUNC_START(copy_page)
+ ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
+ movl $4096/8, %ecx
+ rep movsq
+- ret
++ RET
+ SYM_FUNC_END(copy_page)
+ EXPORT_SYMBOL(copy_page)
+
+@@ -85,5 +85,5 @@ SYM_FUNC_START_LOCAL(copy_page_regs)
+ movq (%rsp), %rbx
+ movq 1*8(%rsp), %r12
+ addq $2*8, %rsp
+- ret
++ RET
+ SYM_FUNC_END(copy_page_regs)
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -105,7 +105,7 @@ SYM_FUNC_START(copy_user_generic_unrolle
+ jnz 21b
+ 23: xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ .section .fixup,"ax"
+ 30: shll $6,%ecx
+@@ -173,7 +173,7 @@ SYM_FUNC_START(copy_user_generic_string)
+ movsb
+ xorl %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ .section .fixup,"ax"
+ 11: leal (%rdx,%rcx,8),%ecx
+@@ -207,7 +207,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_s
+ movsb
+ xorl %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ .section .fixup,"ax"
+ 12: movl %ecx,%edx /* ecx is zerorest also */
+@@ -239,7 +239,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_
+ 1: rep movsb
+ 2: mov %ecx,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ /*
+ * Return zero to pretend that this copy succeeded. This
+@@ -250,7 +250,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_
+ */
+ 3: xorl %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ _ASM_EXTABLE_CPY(1b, 2b)
+ SYM_CODE_END(.Lcopy_user_handle_tail)
+@@ -361,7 +361,7 @@ SYM_FUNC_START(__copy_user_nocache)
+ xorl %eax,%eax
+ ASM_CLAC
+ sfence
+- ret
++ RET
+
+ .section .fixup,"ax"
+ .L_fixup_4x8b_copy:
+--- a/arch/x86/lib/csum-copy_64.S
++++ b/arch/x86/lib/csum-copy_64.S
+@@ -201,7 +201,7 @@ SYM_FUNC_START(csum_partial_copy_generic
+ movq 3*8(%rsp), %r13
+ movq 4*8(%rsp), %r15
+ addq $5*8, %rsp
+- ret
++ RET
+ .Lshort:
+ movl %ecx, %r10d
+ jmp .L1
+--- a/arch/x86/lib/getuser.S
++++ b/arch/x86/lib/getuser.S
+@@ -57,7 +57,7 @@ SYM_FUNC_START(__get_user_1)
+ 1: movzbl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_1)
+ EXPORT_SYMBOL(__get_user_1)
+
+@@ -71,7 +71,7 @@ SYM_FUNC_START(__get_user_2)
+ 2: movzwl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_2)
+ EXPORT_SYMBOL(__get_user_2)
+
+@@ -85,7 +85,7 @@ SYM_FUNC_START(__get_user_4)
+ 3: movl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_4)
+ EXPORT_SYMBOL(__get_user_4)
+
+@@ -100,7 +100,7 @@ SYM_FUNC_START(__get_user_8)
+ 4: movq (%_ASM_AX),%rdx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ #else
+ LOAD_TASK_SIZE_MINUS_N(7)
+ cmp %_ASM_DX,%_ASM_AX
+@@ -112,7 +112,7 @@ SYM_FUNC_START(__get_user_8)
+ 5: movl 4(%_ASM_AX),%ecx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ #endif
+ SYM_FUNC_END(__get_user_8)
+ EXPORT_SYMBOL(__get_user_8)
+@@ -124,7 +124,7 @@ SYM_FUNC_START(__get_user_nocheck_1)
+ 6: movzbl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_nocheck_1)
+ EXPORT_SYMBOL(__get_user_nocheck_1)
+
+@@ -134,7 +134,7 @@ SYM_FUNC_START(__get_user_nocheck_2)
+ 7: movzwl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_nocheck_2)
+ EXPORT_SYMBOL(__get_user_nocheck_2)
+
+@@ -144,7 +144,7 @@ SYM_FUNC_START(__get_user_nocheck_4)
+ 8: movl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_nocheck_4)
+ EXPORT_SYMBOL(__get_user_nocheck_4)
+
+@@ -159,7 +159,7 @@ SYM_FUNC_START(__get_user_nocheck_8)
+ #endif
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_nocheck_8)
+ EXPORT_SYMBOL(__get_user_nocheck_8)
+
+@@ -169,7 +169,7 @@ SYM_CODE_START_LOCAL(.Lbad_get_user_clac
+ bad_get_user:
+ xor %edx,%edx
+ mov $(-EFAULT),%_ASM_AX
+- ret
++ RET
+ SYM_CODE_END(.Lbad_get_user_clac)
+
+ #ifdef CONFIG_X86_32
+@@ -179,7 +179,7 @@ bad_get_user_8:
+ xor %edx,%edx
+ xor %ecx,%ecx
+ mov $(-EFAULT),%_ASM_AX
+- ret
++ RET
+ SYM_CODE_END(.Lbad_get_user_8_clac)
+ #endif
+
+--- a/arch/x86/lib/hweight.S
++++ b/arch/x86/lib/hweight.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(__sw_hweight32)
+ imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
+ shrl $24, %eax # w = w_tmp >> 24
+ __ASM_SIZE(pop,) %__ASM_REG(dx)
+- ret
++ RET
+ SYM_FUNC_END(__sw_hweight32)
+ EXPORT_SYMBOL(__sw_hweight32)
+
+@@ -65,7 +65,7 @@ SYM_FUNC_START(__sw_hweight64)
+
+ popq %rdx
+ popq %rdi
+- ret
++ RET
+ #else /* CONFIG_X86_32 */
+ /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
+ pushl %ecx
+@@ -77,7 +77,7 @@ SYM_FUNC_START(__sw_hweight64)
+ addl %ecx, %eax # result
+
+ popl %ecx
+- ret
++ RET
+ #endif
+ SYM_FUNC_END(__sw_hweight64)
+ EXPORT_SYMBOL(__sw_hweight64)
+--- a/arch/x86/lib/iomap_copy_64.S
++++ b/arch/x86/lib/iomap_copy_64.S
+@@ -11,5 +11,5 @@
+ SYM_FUNC_START(__iowrite32_copy)
+ movl %edx,%ecx
+ rep movsd
+- ret
++ RET
+ SYM_FUNC_END(__iowrite32_copy)
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
+ rep movsq
+ movl %edx, %ecx
+ rep movsb
+- ret
++ RET
+ SYM_FUNC_END(memcpy)
+ SYM_FUNC_END_ALIAS(__memcpy)
+ EXPORT_SYMBOL(memcpy)
+@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+ rep movsb
+- ret
++ RET
+ SYM_FUNC_END(memcpy_erms)
+
+ SYM_FUNC_START_LOCAL(memcpy_orig)
+@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movq %r9, 1*8(%rdi)
+ movq %r10, -2*8(%rdi, %rdx)
+ movq %r11, -1*8(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_16bytes:
+ cmpl $8, %edx
+@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movq -1*8(%rsi, %rdx), %r9
+ movq %r8, 0*8(%rdi)
+ movq %r9, -1*8(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_8bytes:
+ cmpl $4, %edx
+@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movl -4(%rsi, %rdx), %r8d
+ movl %ecx, (%rdi)
+ movl %r8d, -4(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_3bytes:
+ subl $1, %edx
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movb %cl, (%rdi)
+
+ .Lend:
+- retq
++ RET
+ SYM_FUNC_END(memcpy_orig)
+
+ .popsection
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove)
+ /* FSRM implies ERMS => no length checks, do the copy directly */
+ .Lmemmove_begin_forward:
+ ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
+- ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
++ ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; RET", X86_FEATURE_ERMS
+
+ /*
+ * movsq instruction have many startup latency
+@@ -205,7 +205,7 @@ SYM_FUNC_START(__memmove)
+ movb (%rsi), %r11b
+ movb %r11b, (%rdi)
+ 13:
+- retq
++ RET
+ SYM_FUNC_END(__memmove)
+ SYM_FUNC_END_ALIAS(memmove)
+ EXPORT_SYMBOL(__memmove)
+--- a/arch/x86/lib/memset_64.S
++++ b/arch/x86/lib/memset_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
+ movl %edx,%ecx
+ rep stosb
+ movq %r9,%rax
+- ret
++ RET
+ SYM_FUNC_END(__memset)
+ SYM_FUNC_END_ALIAS(memset)
+ EXPORT_SYMBOL(memset)
+@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
+ movq %rdx,%rcx
+ rep stosb
+ movq %r9,%rax
+- ret
++ RET
+ SYM_FUNC_END(memset_erms)
+
+ SYM_FUNC_START_LOCAL(memset_orig)
+@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
+
+ .Lende:
+ movq %r10,%rax
+- ret
++ RET
+
+ .Lbad_alignment:
+ cmpq $7,%rdx
+--- a/arch/x86/lib/msr-reg.S
++++ b/arch/x86/lib/msr-reg.S
+@@ -35,7 +35,7 @@ SYM_FUNC_START(\op\()_safe_regs)
+ movl %edi, 28(%r10)
+ popq %r12
+ popq %rbx
+- ret
++ RET
+ 3:
+ movl $-EIO, %r11d
+ jmp 2b
+@@ -77,7 +77,7 @@ SYM_FUNC_START(\op\()_safe_regs)
+ popl %esi
+ popl %ebp
+ popl %ebx
+- ret
++ RET
+ 3:
+ movl $-EIO, 4(%esp)
+ jmp 2b
+--- a/arch/x86/lib/putuser.S
++++ b/arch/x86/lib/putuser.S
+@@ -52,7 +52,7 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SY
+ 1: movb %al,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__put_user_1)
+ EXPORT_SYMBOL(__put_user_1)
+ EXPORT_SYMBOL(__put_user_nocheck_1)
+@@ -66,7 +66,7 @@ SYM_INNER_LABEL(__put_user_nocheck_2, SY
+ 2: movw %ax,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__put_user_2)
+ EXPORT_SYMBOL(__put_user_2)
+ EXPORT_SYMBOL(__put_user_nocheck_2)
+@@ -80,7 +80,7 @@ SYM_INNER_LABEL(__put_user_nocheck_4, SY
+ 3: movl %eax,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__put_user_4)
+ EXPORT_SYMBOL(__put_user_4)
+ EXPORT_SYMBOL(__put_user_nocheck_4)
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -23,7 +23,7 @@
+ .Ldo_rop_\@:
+ mov %\reg, (%_ASM_SP)
+ UNWIND_HINT_FUNC
+- ret
++ RET
+ .endm
+
+ .macro THUNK reg
+--- a/arch/x86/math-emu/div_Xsig.S
++++ b/arch/x86/math-emu/div_Xsig.S
+@@ -341,7 +341,7 @@ L_exit:
+ popl %esi
+
+ leave
+- ret
++ RET
+
+
+ #ifdef PARANOID
+--- a/arch/x86/math-emu/div_small.S
++++ b/arch/x86/math-emu/div_small.S
+@@ -44,5 +44,5 @@ SYM_FUNC_START(FPU_div_small)
+ popl %esi
+
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_div_small)
+--- a/arch/x86/math-emu/mul_Xsig.S
++++ b/arch/x86/math-emu/mul_Xsig.S
+@@ -62,7 +62,7 @@ SYM_FUNC_START(mul32_Xsig)
+
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(mul32_Xsig)
+
+
+@@ -115,7 +115,7 @@ SYM_FUNC_START(mul64_Xsig)
+
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(mul64_Xsig)
+
+
+@@ -175,5 +175,5 @@ SYM_FUNC_START(mul_Xsig_Xsig)
+
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(mul_Xsig_Xsig)
+--- a/arch/x86/math-emu/polynom_Xsig.S
++++ b/arch/x86/math-emu/polynom_Xsig.S
+@@ -133,5 +133,5 @@ L_accum_done:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(polynomial_Xsig)
+--- a/arch/x86/math-emu/reg_norm.S
++++ b/arch/x86/math-emu/reg_norm.S
+@@ -72,7 +72,7 @@ L_exit_valid:
+ L_exit:
+ popl %ebx
+ leave
+- ret
++ RET
+
+
+ L_zero:
+@@ -138,7 +138,7 @@ L_exit_nuo_valid:
+
+ popl %ebx
+ leave
+- ret
++ RET
+
+ L_exit_nuo_zero:
+ movl TAG_Zero,%eax
+@@ -146,5 +146,5 @@ L_exit_nuo_zero:
+
+ popl %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_normalize_nuo)
+--- a/arch/x86/math-emu/reg_round.S
++++ b/arch/x86/math-emu/reg_round.S
+@@ -437,7 +437,7 @@ fpu_Arith_exit:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+
+
+ /*
+--- a/arch/x86/math-emu/reg_u_add.S
++++ b/arch/x86/math-emu/reg_u_add.S
+@@ -164,6 +164,6 @@ L_exit:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+ #endif /* PARANOID */
+ SYM_FUNC_END(FPU_u_add)
+--- a/arch/x86/math-emu/reg_u_div.S
++++ b/arch/x86/math-emu/reg_u_div.S
+@@ -468,7 +468,7 @@ L_exit:
+ popl %esi
+
+ leave
+- ret
++ RET
+ #endif /* PARANOID */
+
+ SYM_FUNC_END(FPU_u_div)
+--- a/arch/x86/math-emu/reg_u_mul.S
++++ b/arch/x86/math-emu/reg_u_mul.S
+@@ -144,7 +144,7 @@ L_exit:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+ #endif /* PARANOID */
+
+ SYM_FUNC_END(FPU_u_mul)
+--- a/arch/x86/math-emu/reg_u_sub.S
++++ b/arch/x86/math-emu/reg_u_sub.S
+@@ -270,5 +270,5 @@ L_exit:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_u_sub)
+--- a/arch/x86/math-emu/round_Xsig.S
++++ b/arch/x86/math-emu/round_Xsig.S
+@@ -78,7 +78,7 @@ L_exit:
+ popl %esi
+ popl %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(round_Xsig)
+
+
+@@ -138,5 +138,5 @@ L_n_exit:
+ popl %esi
+ popl %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(norm_Xsig)
+--- a/arch/x86/math-emu/shr_Xsig.S
++++ b/arch/x86/math-emu/shr_Xsig.S
+@@ -45,7 +45,7 @@ SYM_FUNC_START(shr_Xsig)
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_31:
+ cmpl $64,%ecx
+@@ -61,7 +61,7 @@ L_more_than_31:
+ movl $0,8(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_63:
+ cmpl $96,%ecx
+@@ -76,7 +76,7 @@ L_more_than_63:
+ movl %edx,8(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_95:
+ xorl %eax,%eax
+@@ -85,5 +85,5 @@ L_more_than_95:
+ movl %eax,8(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(shr_Xsig)
+--- a/arch/x86/math-emu/wm_shrx.S
++++ b/arch/x86/math-emu/wm_shrx.S
+@@ -55,7 +55,7 @@ SYM_FUNC_START(FPU_shrx)
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_31:
+ cmpl $64,%ecx
+@@ -70,7 +70,7 @@ L_more_than_31:
+ movl $0,4(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_63:
+ cmpl $96,%ecx
+@@ -84,7 +84,7 @@ L_more_than_63:
+ movl %edx,4(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_95:
+ xorl %eax,%eax
+@@ -92,7 +92,7 @@ L_more_than_95:
+ movl %eax,4(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_shrx)
+
+
+@@ -146,7 +146,7 @@ SYM_FUNC_START(FPU_shrxs)
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ /* Shift by [0..31] bits */
+ Ls_less_than_32:
+@@ -163,7 +163,7 @@ Ls_less_than_32:
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ /* Shift by [64..95] bits */
+ Ls_more_than_63:
+@@ -189,7 +189,7 @@ Ls_more_than_63:
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ Ls_more_than_95:
+ /* Shift by [96..inf) bits */
+@@ -203,5 +203,5 @@ Ls_more_than_95:
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_shrxs)
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -65,7 +65,7 @@ SYM_FUNC_START(sme_encrypt_execute)
+ movq %rbp, %rsp /* Restore original stack pointer */
+ pop %rbp
+
+- ret
++ RET
+ SYM_FUNC_END(sme_encrypt_execute)
+
+ SYM_FUNC_START(__enc_copy)
+@@ -151,6 +151,6 @@ SYM_FUNC_START(__enc_copy)
+ pop %r12
+ pop %r15
+
+- ret
++ RET
+ .L__enc_copy_end:
+ SYM_FUNC_END(__enc_copy)
+--- a/arch/x86/platform/efi/efi_stub_32.S
++++ b/arch/x86/platform/efi/efi_stub_32.S
+@@ -56,5 +56,5 @@ SYM_FUNC_START(efi_call_svam)
+
+ movl 16(%esp), %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(efi_call_svam)
+--- a/arch/x86/platform/efi/efi_stub_64.S
++++ b/arch/x86/platform/efi/efi_stub_64.S
+@@ -23,5 +23,5 @@ SYM_FUNC_START(__efi_call)
+ mov %rsi, %rcx
+ CALL_NOSPEC rdi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(__efi_call)
+--- a/arch/x86/platform/efi/efi_thunk_64.S
++++ b/arch/x86/platform/efi/efi_thunk_64.S
+@@ -63,7 +63,7 @@ SYM_CODE_START(__efi64_thunk)
+ 1: movq 24(%rsp), %rsp
+ pop %rbx
+ pop %rbp
+- retq
++ RET
+
+ .code32
+ 2: pushl $__KERNEL_CS
+--- a/arch/x86/platform/olpc/xo1-wakeup.S
++++ b/arch/x86/platform/olpc/xo1-wakeup.S
+@@ -77,7 +77,7 @@ save_registers:
+ pushfl
+ popl saved_context_eflags
+
+- ret
++ RET
+
+ restore_registers:
+ movl saved_context_ebp, %ebp
+@@ -88,7 +88,7 @@ restore_registers:
+ pushl saved_context_eflags
+ popfl
+
+- ret
++ RET
+
+ SYM_CODE_START(do_olpc_suspend_lowlevel)
+ call save_processor_state
+@@ -109,7 +109,7 @@ ret_point:
+
+ call restore_registers
+ call restore_processor_state
+- ret
++ RET
+ SYM_CODE_END(do_olpc_suspend_lowlevel)
+
+ .data
+--- a/arch/x86/power/hibernate_asm_32.S
++++ b/arch/x86/power/hibernate_asm_32.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(swsusp_arch_suspend)
+ FRAME_BEGIN
+ call swsusp_save
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(swsusp_arch_suspend)
+
+ SYM_CODE_START(restore_image)
+@@ -108,5 +108,5 @@ SYM_FUNC_START(restore_registers)
+ /* tell the hibernation core that we've just restored the memory */
+ movl %eax, in_suspend
+
+- ret
++ RET
+ SYM_FUNC_END(restore_registers)
+--- a/arch/x86/power/hibernate_asm_64.S
++++ b/arch/x86/power/hibernate_asm_64.S
+@@ -49,7 +49,7 @@ SYM_FUNC_START(swsusp_arch_suspend)
+ FRAME_BEGIN
+ call swsusp_save
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(swsusp_arch_suspend)
+
+ SYM_CODE_START(restore_image)
+@@ -143,5 +143,5 @@ SYM_FUNC_START(restore_registers)
+ /* tell the hibernation core that we've just restored the memory */
+ movq %rax, in_suspend(%rip)
+
+- ret
++ RET
+ SYM_FUNC_END(restore_registers)
+--- a/arch/x86/um/checksum_32.S
++++ b/arch/x86/um/checksum_32.S
+@@ -110,7 +110,7 @@ csum_partial:
+ 7:
+ popl %ebx
+ popl %esi
+- ret
++ RET
+
+ #else
+
+@@ -208,7 +208,7 @@ csum_partial:
+ 80:
+ popl %ebx
+ popl %esi
+- ret
++ RET
+
+ #endif
+ EXPORT_SYMBOL(csum_partial)
+--- a/arch/x86/um/setjmp_32.S
++++ b/arch/x86/um/setjmp_32.S
+@@ -34,7 +34,7 @@ kernel_setjmp:
+ movl %esi,12(%edx)
+ movl %edi,16(%edx)
+ movl %ecx,20(%edx) # Return address
+- ret
++ RET
+
+ .size kernel_setjmp,.-kernel_setjmp
+
+--- a/arch/x86/um/setjmp_64.S
++++ b/arch/x86/um/setjmp_64.S
+@@ -33,7 +33,7 @@ kernel_setjmp:
+ movq %r14,40(%rdi)
+ movq %r15,48(%rdi)
+ movq %rsi,56(%rdi) # Return address
+- ret
++ RET
+
+ .size kernel_setjmp,.-kernel_setjmp
+
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -45,7 +45,7 @@ SYM_FUNC_START(xen_irq_enable_direct)
+ call check_events
+ 1:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(xen_irq_enable_direct)
+
+
+@@ -55,7 +55,7 @@ SYM_FUNC_END(xen_irq_enable_direct)
+ */
+ SYM_FUNC_START(xen_irq_disable_direct)
+ movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+- ret
++ RET
+ SYM_FUNC_END(xen_irq_disable_direct)
+
+ /*
+@@ -71,7 +71,7 @@ SYM_FUNC_START(xen_save_fl_direct)
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ setz %ah
+ addb %ah, %ah
+- ret
++ RET
+ SYM_FUNC_END(xen_save_fl_direct)
+
+
+@@ -98,7 +98,7 @@ SYM_FUNC_START(xen_restore_fl_direct)
+ call check_events
+ 1:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(xen_restore_fl_direct)
+
+
+@@ -128,7 +128,7 @@ SYM_FUNC_START(check_events)
+ pop %rcx
+ pop %rax
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(check_events)
+
+ SYM_FUNC_START(xen_read_cr2)
+@@ -136,14 +136,14 @@ SYM_FUNC_START(xen_read_cr2)
+ _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+ _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(xen_read_cr2);
+
+ SYM_FUNC_START(xen_read_cr2_direct)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(xen_read_cr2_direct);
+
+ .macro xen_pv_trap name
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -70,7 +70,7 @@ SYM_CODE_START(hypercall_page)
+ .rept (PAGE_SIZE / 32)
+ UNWIND_HINT_FUNC
+ .skip 31, 0x90
+- ret
++ RET
+ .endr
+
+ #define HYPERCALL(n) \
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sat, 4 Dec 2021 14:43:41 +0100
+Subject: x86: Prepare inline-asm for straight-line-speculation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b17c2baa305cccbd16bafa289fd743cc2db77966 upstream.
+
+Replace all ret/retq instructions with ASM_RET in preparation of
+making it more than a single instruction.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lore.kernel.org/r/20211204134907.964635458@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/linkage.h | 4 ++++
+ arch/x86/include/asm/paravirt.h | 2 +-
+ arch/x86/include/asm/qspinlock_paravirt.h | 4 ++--
+ arch/x86/kernel/alternative.c | 2 +-
+ arch/x86/kernel/kprobes/core.c | 2 +-
+ arch/x86/kernel/paravirt.c | 2 +-
+ arch/x86/kvm/emulate.c | 4 ++--
+ arch/x86/lib/error-inject.c | 3 ++-
+ samples/ftrace/ftrace-direct-modify.c | 4 ++--
+ samples/ftrace/ftrace-direct-too.c | 2 +-
+ samples/ftrace/ftrace-direct.c | 2 +-
+ 11 files changed, 18 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,6 +18,10 @@
+ #define __ALIGN_STR __stringify(__ALIGN)
+ #endif
+
++#else /* __ASSEMBLY__ */
++
++#define ASM_RET "ret\n\t"
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _ASM_X86_LINKAGE_H */
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -630,7 +630,7 @@ bool __raw_callee_save___native_vcpu_is_
+ "call " #func ";" \
+ PV_RESTORE_ALL_CALLER_REGS \
+ FRAME_END \
+- "ret;" \
++ ASM_RET \
+ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
+ ".popsection")
+
+--- a/arch/x86/include/asm/qspinlock_paravirt.h
++++ b/arch/x86/include/asm/qspinlock_paravirt.h
+@@ -48,7 +48,7 @@ asm (".pushsection .text;"
+ "jne .slowpath;"
+ "pop %rdx;"
+ FRAME_END
+- "ret;"
++ ASM_RET
+ ".slowpath: "
+ "push %rsi;"
+ "movzbl %al,%esi;"
+@@ -56,7 +56,7 @@ asm (".pushsection .text;"
+ "pop %rsi;"
+ "pop %rdx;"
+ FRAME_END
+- "ret;"
++ ASM_RET
+ ".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
+ ".popsection");
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -869,7 +869,7 @@ asm (
+ " .type int3_magic, @function\n"
+ "int3_magic:\n"
+ " movl $1, (%" _ASM_ARG1 ")\n"
+-" ret\n"
++ ASM_RET
+ " .size int3_magic, .-int3_magic\n"
+ " .popsection\n"
+ );
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -768,7 +768,7 @@ asm(
+ RESTORE_REGS_STRING
+ " popfl\n"
+ #endif
+- " ret\n"
++ ASM_RET
+ ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
+ );
+ NOKPROBE_SYMBOL(kretprobe_trampoline);
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -40,7 +40,7 @@ extern void _paravirt_nop(void);
+ asm (".pushsection .entry.text, \"ax\"\n"
+ ".global _paravirt_nop\n"
+ "_paravirt_nop:\n\t"
+- "ret\n\t"
++ ASM_RET
+ ".size _paravirt_nop, . - _paravirt_nop\n\t"
+ ".type _paravirt_nop, @function\n\t"
+ ".popsection");
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -316,7 +316,7 @@ static int fastop(struct x86_emulate_ctx
+ __FOP_FUNC(#name)
+
+ #define __FOP_RET(name) \
+- "ret \n\t" \
++ ASM_RET \
+ ".size " name ", .-" name "\n\t"
+
+ #define FOP_RET(name) \
+@@ -437,7 +437,7 @@ static int fastop(struct x86_emulate_ctx
+
+ asm(".pushsection .fixup, \"ax\"\n"
+ ".global kvm_fastop_exception \n"
+- "kvm_fastop_exception: xor %esi, %esi; ret\n"
++ "kvm_fastop_exception: xor %esi, %esi; " ASM_RET
+ ".popsection");
+
+ FOP_START(setcc)
+--- a/arch/x86/lib/error-inject.c
++++ b/arch/x86/lib/error-inject.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+
++#include <linux/linkage.h>
+ #include <linux/error-injection.h>
+ #include <linux/kprobes.h>
+
+@@ -10,7 +11,7 @@ asm(
+ ".type just_return_func, @function\n"
+ ".globl just_return_func\n"
+ "just_return_func:\n"
+- " ret\n"
++ ASM_RET
+ ".size just_return_func, .-just_return_func\n"
+ );
+
+--- a/samples/ftrace/ftrace-direct-modify.c
++++ b/samples/ftrace/ftrace-direct-modify.c
+@@ -31,7 +31,7 @@ asm (
+ " call my_direct_func1\n"
+ " leave\n"
+ " .size my_tramp1, .-my_tramp1\n"
+-" ret\n"
++ ASM_RET
+ " .type my_tramp2, @function\n"
+ " .globl my_tramp2\n"
+ " my_tramp2:"
+@@ -39,7 +39,7 @@ asm (
+ " movq %rsp, %rbp\n"
+ " call my_direct_func2\n"
+ " leave\n"
+-" ret\n"
++ ASM_RET
+ " .size my_tramp2, .-my_tramp2\n"
+ " .popsection\n"
+ );
+--- a/samples/ftrace/ftrace-direct-too.c
++++ b/samples/ftrace/ftrace-direct-too.c
+@@ -31,7 +31,7 @@ asm (
+ " popq %rsi\n"
+ " popq %rdi\n"
+ " leave\n"
+-" ret\n"
++ ASM_RET
+ " .size my_tramp, .-my_tramp\n"
+ " .popsection\n"
+ );
+--- a/samples/ftrace/ftrace-direct.c
++++ b/samples/ftrace/ftrace-direct.c
+@@ -24,7 +24,7 @@ asm (
+ " call my_direct_func\n"
+ " popq %rdi\n"
+ " leave\n"
+-" ret\n"
++ ASM_RET
+ " .size my_tramp, .-my_tramp\n"
+ " .popsection\n"
+ );
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Fri, 1 Jul 2022 11:21:20 -0300
+Subject: x86/realmode: build with -D__DISABLE_EXPORTS
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+Commit 156ff4a544ae ("x86/ibt: Base IBT bits") added this option when
+building realmode in order to disable IBT there. This is also needed in
+order to disable return thunks.
+
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -31,7 +31,7 @@ endif
+ CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
+ M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
+
+-REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \
++REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \
+ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
+ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
+ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 27 Jun 2022 22:21:17 +0000
+Subject: x86/retbleed: Add fine grained Kconfig knobs
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit f43b9876e857c739d407bc56df288b0ebe1a9164 upstream.
+
+Do fine-grained Kconfig for all the various retbleed parts.
+
+NOTE: if your compiler doesn't support return thunks this will
+silently 'upgrade' your mitigation to IBPB, you might not like this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: there is no CONFIG_OBJTOOL]
+[cascardo: objtool calling and option parsing has changed]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10:
+ - In scripts/Makefile.build, add the objtool option with an ifdef
+ block, same as for other options
+ - Adjust filename, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile | 8 +-
+ arch/x86/Kconfig | 106 +++++++++++++++++++++++--------
+ arch/x86/entry/calling.h | 4 +
+ arch/x86/include/asm/disabled-features.h | 18 ++++-
+ arch/x86/include/asm/linkage.h | 4 -
+ arch/x86/include/asm/nospec-branch.h | 10 ++
+ arch/x86/include/asm/static_call.h | 2
+ arch/x86/kernel/alternative.c | 5 +
+ arch/x86/kernel/cpu/amd.c | 2
+ arch/x86/kernel/cpu/bugs.c | 42 +++++++-----
+ arch/x86/kernel/static_call.c | 2
+ arch/x86/kvm/emulate.c | 4 -
+ arch/x86/lib/retpoline.S | 4 +
+ scripts/Makefile.build | 3
+ scripts/link-vmlinux.sh | 2
+ security/Kconfig | 11 ---
+ tools/objtool/builtin-check.c | 3
+ tools/objtool/builtin.h | 2
+ tools/objtool/check.c | 9 ++
+ 19 files changed, 172 insertions(+), 69 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -672,14 +672,18 @@ endif
+
+ ifdef CONFIG_CC_IS_GCC
+ RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+-RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern)
+ RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
+ endif
+ ifdef CONFIG_CC_IS_CLANG
+ RETPOLINE_CFLAGS := -mretpoline-external-thunk
+ RETPOLINE_VDSO_CFLAGS := -mretpoline
+-RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern)
+ endif
++
++ifdef CONFIG_RETHUNK
++RETHUNK_CFLAGS := -mfunction-return=thunk-extern
++RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
++endif
++
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -453,30 +453,6 @@ config GOLDFISH
+ def_bool y
+ depends on X86_GOLDFISH
+
+-config RETPOLINE
+- bool "Avoid speculative indirect branches in kernel"
+- default y
+- help
+- Compile kernel with the retpoline compiler options to guard against
+- kernel-to-user data leaks by avoiding speculative indirect
+- branches. Requires a compiler with -mindirect-branch=thunk-extern
+- support for full protection. The kernel may run slower.
+-
+-config CC_HAS_SLS
+- def_bool $(cc-option,-mharden-sls=all)
+-
+-config CC_HAS_RETURN_THUNK
+- def_bool $(cc-option,-mfunction-return=thunk-extern)
+-
+-config SLS
+- bool "Mitigate Straight-Line-Speculation"
+- depends on CC_HAS_SLS && X86_64
+- default n
+- help
+- Compile the kernel with straight-line-speculation options to guard
+- against straight line speculation. The kernel image might be slightly
+- larger.
+-
+ config X86_CPU_RESCTRL
+ bool "x86 CPU resource control support"
+ depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
+@@ -2430,6 +2406,88 @@ source "kernel/livepatch/Kconfig"
+
+ endmenu
+
++config CC_HAS_SLS
++ def_bool $(cc-option,-mharden-sls=all)
++
++config CC_HAS_RETURN_THUNK
++ def_bool $(cc-option,-mfunction-return=thunk-extern)
++
++menuconfig SPECULATION_MITIGATIONS
++ bool "Mitigations for speculative execution vulnerabilities"
++ default y
++ help
++ Say Y here to enable options which enable mitigations for
++ speculative execution hardware vulnerabilities.
++
++ If you say N, all mitigations will be disabled. You really
++ should know what you are doing to say so.
++
++if SPECULATION_MITIGATIONS
++
++config PAGE_TABLE_ISOLATION
++ bool "Remove the kernel mapping in user mode"
++ default y
++ depends on (X86_64 || X86_PAE)
++ help
++ This feature reduces the number of hardware side channels by
++ ensuring that the majority of kernel addresses are not mapped
++ into userspace.
++
++ See Documentation/x86/pti.rst for more details.
++
++config RETPOLINE
++ bool "Avoid speculative indirect branches in kernel"
++ default y
++ help
++ Compile kernel with the retpoline compiler options to guard against
++ kernel-to-user data leaks by avoiding speculative indirect
++ branches. Requires a compiler with -mindirect-branch=thunk-extern
++ support for full protection. The kernel may run slower.
++
++config RETHUNK
++ bool "Enable return-thunks"
++ depends on RETPOLINE && CC_HAS_RETURN_THUNK
++ default y
++ help
++ Compile the kernel with the return-thunks compiler option to guard
++ against kernel-to-user data leaks by avoiding return speculation.
++ Requires a compiler with -mfunction-return=thunk-extern
++ support for full protection. The kernel may run slower.
++
++config CPU_UNRET_ENTRY
++ bool "Enable UNRET on kernel entry"
++ depends on CPU_SUP_AMD && RETHUNK
++ default y
++ help
++ Compile the kernel with support for the retbleed=unret mitigation.
++
++config CPU_IBPB_ENTRY
++ bool "Enable IBPB on kernel entry"
++ depends on CPU_SUP_AMD
++ default y
++ help
++ Compile the kernel with support for the retbleed=ibpb mitigation.
++
++config CPU_IBRS_ENTRY
++ bool "Enable IBRS on kernel entry"
++ depends on CPU_SUP_INTEL
++ default y
++ help
++ Compile the kernel with support for the spectre_v2=ibrs mitigation.
++ This mitigates both spectre_v2 and retbleed at great cost to
++ performance.
++
++config SLS
++ bool "Mitigate Straight-Line-Speculation"
++ depends on CC_HAS_SLS && X86_64
++ default n
++ help
++ Compile the kernel with straight-line-speculation options to guard
++ against straight line speculation. The kernel image might be slightly
++ larger.
++
++endif
++
+ config ARCH_HAS_ADD_PAGES
+ def_bool y
+ depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -323,6 +323,7 @@ For 32-bit we have the following convent
+ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
+ */
+ .macro IBRS_ENTER save_reg
++#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+@@ -343,6 +344,7 @@ For 32-bit we have the following convent
+ shr $32, %rdx
+ wrmsr
+ .Lend_\@:
++#endif
+ .endm
+
+ /*
+@@ -350,6 +352,7 @@ For 32-bit we have the following convent
+ * regs. Must be called after the last RET.
+ */
+ .macro IBRS_EXIT save_reg
++#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+@@ -364,6 +367,7 @@ For 32-bit we have the following convent
+ shr $32, %rdx
+ wrmsr
+ .Lend_\@:
++#endif
+ .endm
+
+ /*
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -60,9 +60,19 @@
+ # define DISABLE_RETPOLINE 0
+ #else
+ # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
+- (1 << (X86_FEATURE_RETHUNK & 31)) | \
+- (1 << (X86_FEATURE_UNRET & 31)))
++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++#endif
++
++#ifdef CONFIG_RETHUNK
++# define DISABLE_RETHUNK 0
++#else
++# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
++#endif
++
++#ifdef CONFIG_CPU_UNRET_ENTRY
++# define DISABLE_UNRET 0
++#else
++# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+ #endif
+
+ /* Force disable because it's broken beyond repair */
+@@ -82,7 +92,7 @@
+ #define DISABLED_MASK8 0
+ #define DISABLED_MASK9 (DISABLE_SMAP)
+ #define DISABLED_MASK10 0
+-#define DISABLED_MASK11 (DISABLE_RETPOLINE)
++#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
+ #define DISABLED_MASK12 0
+ #define DISABLED_MASK13 0
+ #define DISABLED_MASK14 0
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,7 +18,7 @@
+ #define __ALIGN_STR __stringify(__ALIGN)
+ #endif
+
+-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+ #define RET jmp __x86_return_thunk
+ #else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+@@ -30,7 +30,7 @@
+
+ #else /* __ASSEMBLY__ */
+
+-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+ #define ASM_RET "jmp __x86_return_thunk\n\t"
+ #else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -127,6 +127,12 @@
+ .Lskip_rsb_\@:
+ .endm
+
++#ifdef CONFIG_CPU_UNRET_ENTRY
++#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
++#else
++#define CALL_ZEN_UNTRAIN_RET ""
++#endif
++
+ /*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+@@ -139,10 +145,10 @@
+ * where we have a stack but before any RET instruction.
+ */
+ .macro UNTRAIN_RET
+-#ifdef CONFIG_RETPOLINE
++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+ ANNOTATE_UNRET_END
+ ALTERNATIVE_2 "", \
+- "call zen_untrain_ret", X86_FEATURE_UNRET, \
++ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ #endif
+ .endm
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -44,7 +44,7 @@
+ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+
+-#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_RETHUNK
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+ #else
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -662,6 +662,7 @@ void __init_or_module noinline apply_ret
+ }
+ }
+
++#ifdef CONFIG_RETHUNK
+ /*
+ * Rewrite the compiler generated return thunk tail-calls.
+ *
+@@ -723,6 +724,10 @@ void __init_or_module noinline apply_ret
+ }
+ }
+ }
++#else
++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
++#endif /* CONFIG_RETHUNK */
++
+ #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+
+ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -916,6 +916,7 @@ static void init_amd_bd(struct cpuinfo_x
+
+ void init_spectral_chicken(struct cpuinfo_x86 *c)
+ {
++#ifdef CONFIG_CPU_UNRET_ENTRY
+ u64 value;
+
+ /*
+@@ -932,6 +933,7 @@ void init_spectral_chicken(struct cpuinf
+ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
+ }
+ }
++#endif
+ }
+
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -798,7 +798,6 @@ static int __init retbleed_parse_cmdline
+ early_param("retbleed", retbleed_parse_cmdline);
+
+ #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n"
+ #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+
+ static void __init retbleed_select_mitigation(void)
+@@ -813,18 +812,33 @@ static void __init retbleed_select_mitig
+ return;
+
+ case RETBLEED_CMD_UNRET:
+- retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ } else {
++ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
++ goto do_cmd_auto;
++ }
+ break;
+
+ case RETBLEED_CMD_IBPB:
+- retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++ if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++ } else {
++ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
++ goto do_cmd_auto;
++ }
+ break;
+
++do_cmd_auto:
+ case RETBLEED_CMD_AUTO:
+ default:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+- retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY))
++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++ }
+
+ /*
+ * The Intel mitigation (IBRS or eIBRS) was already selected in
+@@ -837,14 +851,6 @@ static void __init retbleed_select_mitig
+
+ switch (retbleed_mitigation) {
+ case RETBLEED_MITIGATION_UNRET:
+-
+- if (!IS_ENABLED(CONFIG_RETPOLINE) ||
+- !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) {
+- pr_err(RETBLEED_COMPILER_MSG);
+- retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+- goto retbleed_force_ibpb;
+- }
+-
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+@@ -856,7 +862,6 @@ static void __init retbleed_select_mitig
+ break;
+
+ case RETBLEED_MITIGATION_IBPB:
+-retbleed_force_ibpb:
+ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ mitigate_smt = true;
+ break;
+@@ -1227,6 +1232,12 @@ static enum spectre_v2_mitigation_cmd __
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
++ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
++ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
+ mitigation_options[i].option);
+@@ -1284,7 +1295,8 @@ static void __init spectre_v2_select_mit
+ break;
+ }
+
+- if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
++ boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ retbleed_cmd != RETBLEED_CMD_OFF &&
+ boot_cpu_has(X86_FEATURE_IBRS) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -108,7 +108,7 @@ void arch_static_call_transform(void *si
+ }
+ EXPORT_SYMBOL_GPL(arch_static_call_transform);
+
+-#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_RETHUNK
+ /*
+ * This is called by apply_returns() to fix up static call trampolines,
+ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -435,10 +435,10 @@ static int fastop(struct x86_emulate_ctx
+ * Depending on .config the SETcc functions look like:
+ *
+ * SETcc %al [3 bytes]
+- * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE]
++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK]
+ * INT3 [1 byte; CONFIG_SLS]
+ */
+-#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \
++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \
+ IS_ENABLED(CONFIG_SLS))
+ #define SETCC_LENGTH (3 + RET_LENGTH)
+ #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -71,6 +71,8 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+ * This function name is magical and is used by -mfunction-return=thunk-extern
+ * for the compiler to generate JMPs to it.
+ */
++#ifdef CONFIG_RETHUNK
++
+ .section .text.__x86.return_thunk
+
+ /*
+@@ -135,3 +137,5 @@ SYM_FUNC_END(zen_untrain_ret)
+ __EXPORT_THUNK(zen_untrain_ret)
+
+ EXPORT_SYMBOL(__x86_return_thunk)
++
++#endif /* CONFIG_RETHUNK */
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -227,6 +227,9 @@ endif
+ ifdef CONFIG_RETPOLINE
+ objtool_args += --retpoline
+ endif
++ifdef CONFIG_RETHUNK
++ objtool_args += --rethunk
++endif
+ ifdef CONFIG_X86_SMAP
+ objtool_args += --uaccess
+ endif
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -65,7 +65,7 @@ objtool_link()
+
+ if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
+ objtoolopt="check"
+- if [ -n "${CONFIG_RETPOLINE}" ]; then
++ if [ -n "${CONFIG_CPU_UNRET_ENTRY}" ]; then
+ objtoolopt="${objtoolopt} --unret"
+ fi
+ if [ -z "${CONFIG_FRAME_POINTER}" ]; then
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -54,17 +54,6 @@ config SECURITY_NETWORK
+ implement socket and networking access controls.
+ If you are unsure how to answer this question, answer N.
+
+-config PAGE_TABLE_ISOLATION
+- bool "Remove the kernel mapping in user mode"
+- default y
+- depends on (X86_64 || X86_PAE) && !UML
+- help
+- This feature reduces the number of hardware side channels by
+- ensuring that the majority of kernel addresses are not mapped
+- into userspace.
+-
+- See Documentation/x86/pti.rst for more details.
+-
+ config SECURITY_INFINIBAND
+ bool "Infiniband Security Hooks"
+ depends on SECURITY && INFINIBAND
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -19,7 +19,7 @@
+ #include "objtool.h"
+
+ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+- validate_dup, vmlinux, sls, unret;
++ validate_dup, vmlinux, sls, unret, rethunk;
+
+ static const char * const check_usage[] = {
+ "objtool check [<options>] file.o",
+@@ -30,6 +30,7 @@ const struct option check_options[] = {
+ OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+ OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+ OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
++ OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"),
+ OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"),
+ OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+ OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+--- a/tools/objtool/builtin.h
++++ b/tools/objtool/builtin.h
+@@ -9,7 +9,7 @@
+
+ extern const struct option check_options[];
+ extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+- validate_dup, vmlinux, sls, unret;
++ validate_dup, vmlinux, sls, unret, rethunk;
+
+ extern int cmd_check(int argc, const char **argv);
+ extern int cmd_orc(int argc, const char **argv);
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -3262,8 +3262,11 @@ static int validate_retpoline(struct obj
+ continue;
+
+ if (insn->type == INSN_RETURN) {
+- WARN_FUNC("'naked' return found in RETPOLINE build",
+- insn->sec, insn->offset);
++ if (rethunk) {
++ WARN_FUNC("'naked' return found in RETHUNK build",
++ insn->sec, insn->offset);
++ } else
++ continue;
+ } else {
+ WARN_FUNC("indirect %s found in RETPOLINE build",
+ insn->sec, insn->offset,
+@@ -3533,7 +3536,9 @@ int check(struct objtool_file *file)
+ if (ret < 0)
+ goto out;
+ warnings += ret;
++ }
+
++ if (rethunk) {
+ ret = create_return_sites_sections(file);
+ if (ret < 0)
+ goto out;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:34 +0200
+Subject: x86/retpoline: Cleanup some #ifdefery
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 369ae6ffc41a3c1137cab697635a84d0cc7cdcea upstream.
+
+On it's own not much of a cleanup but it prepares for more/similar
+code.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: conflict fixup because of DISABLE_ENQCMD]
+[cascardo: no changes at nospec-branch.h and bpf_jit_comp.c]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/disabled-features.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -56,6 +56,13 @@
+ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
+ #endif
+
++#ifdef CONFIG_RETPOLINE
++# define DISABLE_RETPOLINE 0
++#else
++# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++#endif
++
+ /* Force disable because it's broken beyond repair */
+ #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+
+@@ -73,7 +80,7 @@
+ #define DISABLED_MASK8 0
+ #define DISABLED_MASK9 (DISABLE_SMAP)
+ #define DISABLED_MASK10 0
+-#define DISABLED_MASK11 0
++#define DISABLED_MASK11 (DISABLE_RETPOLINE)
+ #define DISABLED_MASK12 0
+ #define DISABLED_MASK13 0
+ #define DISABLED_MASK14 0
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:41 +0200
+Subject: x86/retpoline: Create a retpoline thunk array
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1a6f74429c42a3854980359a758e222005712aee upstream.
+
+Stick all the retpolines in a single symbol and have the individual
+thunks as inner labels, this should guarantee thunk order and layout.
+
+Previously there were 16 (or rather 15 without rsp) separate symbols and
+a toolchain might reasonably expect it could displace them however it
+liked, with disregard for their relative position.
+
+However, now they're part of a larger symbol. Any change to their
+relative position would disrupt this larger _array symbol and thus not
+be sound.
+
+This is the same reasoning used for data symbols. On their own there
+is no guarantee about their relative position wrt to one aonther, but
+we're still able to do arrays because an array as a whole is a single
+larger symbol.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.169659320@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 8 +++++++-
+ arch/x86/lib/retpoline.S | 14 +++++++++-----
+ 2 files changed, 16 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -12,6 +12,8 @@
+ #include <asm/msr-index.h>
+ #include <asm/unwind_hints.h>
+
++#define RETPOLINE_THUNK_SIZE 32
++
+ /*
+ * Fill the CPU return stack buffer.
+ *
+@@ -120,11 +122,15 @@
+
+ #ifdef CONFIG_RETPOLINE
+
++typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
++
+ #define GEN(reg) \
+- extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
++ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
+
++extern retpoline_thunk_t __x86_indirect_thunk_array[];
++
+ #ifdef CONFIG_X86_64
+
+ /*
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -28,16 +28,14 @@
+
+ .macro THUNK reg
+
+- .align 32
+-
+-SYM_FUNC_START(__x86_indirect_thunk_\reg)
++ .align RETPOLINE_THUNK_SIZE
++SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
++ UNWIND_HINT_EMPTY
+
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+
+-SYM_FUNC_END(__x86_indirect_thunk_\reg)
+-
+ .endm
+
+ /*
+@@ -55,10 +53,16 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+ #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+
++ .align RETPOLINE_THUNK_SIZE
++SYM_CODE_START(__x86_indirect_thunk_array)
++
+ #define GEN(reg) THUNK reg
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
+
++ .align RETPOLINE_THUNK_SIZE
++SYM_CODE_END(__x86_indirect_thunk_array)
++
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:40 +0200
+Subject: x86/retpoline: Move the retpoline thunk declarations to nospec-branch.h
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 6fda8a38865607db739be3e567a2387376222dbd upstream.
+
+Because it makes no sense to split the retpoline gunk over multiple
+headers.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120310.106290934@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h | 8 --------
+ arch/x86/include/asm/nospec-branch.h | 7 +++++++
+ arch/x86/net/bpf_jit_comp.c | 1 -
+ 3 files changed, 7 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -17,11 +17,3 @@
+ extern void cmpxchg8b_emu(void);
+ #endif
+
+-#ifdef CONFIG_RETPOLINE
+-
+-#define GEN(reg) \
+- extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-#undef GEN
+-
+-#endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -5,6 +5,7 @@
+
+ #include <linux/static_key.h>
+ #include <linux/objtool.h>
++#include <linux/linkage.h>
+
+ #include <asm/alternative.h>
+ #include <asm/cpufeatures.h>
+@@ -118,6 +119,12 @@
+ ".popsection\n\t"
+
+ #ifdef CONFIG_RETPOLINE
++
++#define GEN(reg) \
++ extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
++#include <asm/GEN-for-each-reg.h>
++#undef GEN
++
+ #ifdef CONFIG_X86_64
+
+ /*
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -15,7 +15,6 @@
+ #include <asm/set_memory.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/text-patching.h>
+-#include <asm/asm-prototypes.h>
+
+ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+ {
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 26 Oct 2021 14:01:37 +0200
+Subject: x86/retpoline: Remove unused replacement symbols
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4fe79e710d9574a14993f8b4e16b7252da72d5e8 upstream.
+
+Now that objtool no longer creates alternatives, these replacement
+symbols are no longer needed, remove them.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Tested-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/r/20211026120309.915051744@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h | 10 --------
+ arch/x86/lib/retpoline.S | 42 ----------------------------------
+ 2 files changed, 52 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -24,14 +24,4 @@ extern void cmpxchg8b_emu(void);
+ extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+ #include <asm/GEN-for-each-reg.h>
+
+-#undef GEN
+-#define GEN(reg) \
+- extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) \
+- extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -41,36 +41,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
+ .endm
+
+ /*
+- * This generates .altinstr_replacement symbols for use by objtool. They,
+- * however, must not actually live in .altinstr_replacement since that will be
+- * discarded after init, but module alternatives will also reference these
+- * symbols.
+- *
+- * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
+- */
+-.macro ALT_THUNK reg
+-
+- .align 1
+-
+-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+- ANNOTATE_RETPOLINE_SAFE
+-1: call *%\reg
+-2: .skip 5-(2b-1b), 0x90
+-SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+-
+-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
+-
+-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+- ANNOTATE_RETPOLINE_SAFE
+-1: jmp *%\reg
+-2: .skip 5-(2b-1b), 0x90
+-SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+-
+-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
+-
+-.endm
+-
+-/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+@@ -92,15 +62,3 @@ STACK_FRAME_NON_STANDARD(__x86_indirect_
+ #undef GEN
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) ALT_THUNK reg
+-#include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+-#include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+-#include <asm/GEN-for-each-reg.h>
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 26 Mar 2021 16:12:02 +0100
+Subject: x86/retpoline: Simplify retpolines
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 119251855f9adf9421cb5eb409933092141ab2c7 upstream.
+
+Due to:
+
+ c9c324dc22aa ("objtool: Support stack layout changes in alternatives")
+
+it is now possible to simplify the retpolines.
+
+Currently our retpolines consist of 2 symbols:
+
+ - __x86_indirect_thunk_\reg: the compiler target
+ - __x86_retpoline_\reg: the actual retpoline.
+
+Both are consecutive in code and aligned such that for any one register
+they both live in the same cacheline:
+
+ 0000000000000000 <__x86_indirect_thunk_rax>:
+ 0: ff e0 jmpq *%rax
+ 2: 90 nop
+ 3: 90 nop
+ 4: 90 nop
+
+ 0000000000000005 <__x86_retpoline_rax>:
+ 5: e8 07 00 00 00 callq 11 <__x86_retpoline_rax+0xc>
+ a: f3 90 pause
+ c: 0f ae e8 lfence
+ f: eb f9 jmp a <__x86_retpoline_rax+0x5>
+ 11: 48 89 04 24 mov %rax,(%rsp)
+ 15: c3 retq
+ 16: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%rax,%rax,1)
+
+The thunk is an alternative_2, where one option is a JMP to the
+retpoline. This was done so that objtool didn't need to deal with
+alternatives with stack ops. But that problem has been solved, so now
+it is possible to fold the entire retpoline into the alternative to
+simplify and consolidate unused bytes:
+
+ 0000000000000000 <__x86_indirect_thunk_rax>:
+ 0: ff e0 jmpq *%rax
+ 2: 90 nop
+ 3: 90 nop
+ 4: 90 nop
+ 5: 90 nop
+ 6: 90 nop
+ 7: 90 nop
+ 8: 90 nop
+ 9: 90 nop
+ a: 90 nop
+ b: 90 nop
+ c: 90 nop
+ d: 90 nop
+ e: 90 nop
+ f: 90 nop
+ 10: 90 nop
+ 11: 66 66 2e 0f 1f 84 00 00 00 00 00 data16 nopw %cs:0x0(%rax,%rax,1)
+ 1c: 0f 1f 40 00 nopl 0x0(%rax)
+
+Notice that since the longest alternative sequence is now:
+
+ 0: e8 07 00 00 00 callq c <.altinstr_replacement+0xc>
+ 5: f3 90 pause
+ 7: 0f ae e8 lfence
+ a: eb f9 jmp 5 <.altinstr_replacement+0x5>
+ c: 48 89 04 24 mov %rax,(%rsp)
+ 10: c3 retq
+
+17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if
+we can shrink the retpoline by 1 byte we can pack it more densely).
+
+ [ bp: Massage commit message. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lkml.kernel.org/r/20210326151259.506071949@infradead.org
+[bwh: Backported to 5.10:
+ - Use X86_FEATRURE_RETPOLINE_LFENCE flag instead of
+ X86_FEATURE_RETPOLINE_AMD, since the later renaming of this flag
+ has already been applied
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h | 7 -------
+ arch/x86/include/asm/nospec-branch.h | 6 +++---
+ arch/x86/lib/retpoline.S | 34 +++++++++++++++++-----------------
+ tools/objtool/check.c | 3 +--
+ 4 files changed, 21 insertions(+), 29 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void);
+ #define DECL_INDIRECT_THUNK(reg) \
+ extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+
+-#define DECL_RETPOLINE(reg) \
+- extern asmlinkage void __x86_retpoline_ ## reg (void);
+-
+ #undef GEN
+ #define GEN(reg) DECL_INDIRECT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+
+-#undef GEN
+-#define GEN(reg) DECL_RETPOLINE(reg)
+-#include <asm/GEN-for-each-reg.h>
+-
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -80,7 +80,7 @@
+ .macro JMP_NOSPEC reg:req
+ #ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+- __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
++ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+ #else
+ jmp *%\reg
+@@ -90,7 +90,7 @@
+ .macro CALL_NOSPEC reg:req
+ #ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+- __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
++ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+ #else
+ call *%\reg
+@@ -128,7 +128,7 @@
+ ALTERNATIVE_2( \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
+- "call __x86_retpoline_%V[thunk_target]\n", \
++ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE, \
+ "lfence;\n" \
+ ANNOTATE_RETPOLINE_SAFE \
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -10,27 +10,31 @@
+ #include <asm/unwind_hints.h>
+ #include <asm/frame.h>
+
+-.macro THUNK reg
+- .section .text.__x86.indirect_thunk
+-
+- .align 32
+-SYM_FUNC_START(__x86_indirect_thunk_\reg)
+- JMP_NOSPEC \reg
+-SYM_FUNC_END(__x86_indirect_thunk_\reg)
+-
+-SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
++.macro RETPOLINE reg
+ ANNOTATE_INTRA_FUNCTION_CALL
+- call .Ldo_rop_\@
++ call .Ldo_rop_\@
+ .Lspec_trap_\@:
+ UNWIND_HINT_EMPTY
+ pause
+ lfence
+- jmp .Lspec_trap_\@
++ jmp .Lspec_trap_\@
+ .Ldo_rop_\@:
+- mov %\reg, (%_ASM_SP)
++ mov %\reg, (%_ASM_SP)
+ UNWIND_HINT_FUNC
+ ret
+-SYM_FUNC_END(__x86_retpoline_\reg)
++.endm
++
++.macro THUNK reg
++ .section .text.__x86.indirect_thunk
++
++ .align 32
++SYM_FUNC_START(__x86_indirect_thunk_\reg)
++
++ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
++ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
++
++SYM_FUNC_END(__x86_indirect_thunk_\reg)
+
+ .endm
+
+@@ -48,7 +52,6 @@ SYM_FUNC_END(__x86_retpoline_\reg)
+
+ #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+ #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+-#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg)
+
+ #undef GEN
+ #define GEN(reg) THUNK reg
+@@ -58,6 +61,3 @@ SYM_FUNC_END(__x86_retpoline_\reg)
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+
+-#undef GEN
+-#define GEN(reg) EXPORT_RETPOLINE(reg)
+-#include <asm/GEN-for-each-reg.h>
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -800,8 +800,7 @@ static int add_jump_destinations(struct
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_sec = reloc->sym->sec;
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+- } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
+- !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
++ } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
+ /*
+ * Retpoline jumps are really dynamic jumps in
+ * disguise, so convert them accordingly.
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:35 +0200
+Subject: x86/retpoline: Swizzle retpoline thunk
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 00e1533325fd1fb5459229fe37f235462649f668 upstream.
+
+Put the actual retpoline thunk as the original code so that it can
+become more complicated. Specifically, it allows RET to be a JMP,
+which can't be .altinstr_replacement since that doesn't do relocations
+(except for the very first instruction).
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -32,9 +32,9 @@
+ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_EMPTY
+
+- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
++ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
++ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
+
+ .endm
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:36 +0200
+Subject: x86/retpoline: Use -mfunction-return
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0b53c374b9eff2255a386f1f1cfb9a928e52a5ae upstream.
+
+Utilize -mfunction-return=thunk-extern when available to have the
+compiler replace RET instructions with direct JMPs to the symbol
+__x86_return_thunk. This does not affect assembler (.S) sources, only C
+sources.
+
+-mfunction-return=thunk-extern has been available since gcc 7.3 and
+clang 15.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Tested-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: RETPOLINE_CFLAGS is at Makefile]
+[cascardo: remove ANNOTATE_NOENDBR from __x86_return_thunk]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile | 2 ++
+ arch/x86/include/asm/nospec-branch.h | 2 ++
+ arch/x86/lib/retpoline.S | 12 ++++++++++++
+ 3 files changed, 16 insertions(+)
+
+--- a/Makefile
++++ b/Makefile
+@@ -672,11 +672,13 @@ endif
+
+ ifdef CONFIG_CC_IS_GCC
+ RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
++RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern)
+ RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
+ endif
+ ifdef CONFIG_CC_IS_CLANG
+ RETPOLINE_CFLAGS := -mretpoline-external-thunk
+ RETPOLINE_VDSO_CFLAGS := -mretpoline
++RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern)
+ endif
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -120,6 +120,8 @@
+ _ASM_PTR " 999b\n\t" \
+ ".popsection\n\t"
+
++extern void __x86_return_thunk(void);
++
+ #ifdef CONFIG_RETPOLINE
+
+ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -66,3 +66,15 @@ SYM_CODE_END(__x86_indirect_thunk_array)
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
++
++/*
++ * This function name is magical and is used by -mfunction-return=thunk-extern
++ * for the compiler to generate JMPs to it.
++ */
++SYM_CODE_START(__x86_return_thunk)
++ UNWIND_HINT_EMPTY
++ ret
++ int3
++SYM_CODE_END(__x86_return_thunk)
++
++__EXPORT_THUNK(__x86_return_thunk)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Tue, 14 Jun 2022 23:15:44 +0200
+Subject: x86/sev: Avoid using __x86_return_thunk
+
+From: Kim Phillips <kim.phillips@amd.com>
+
+commit 0ee9073000e8791f8b134a8ded31bcc767f7f232 upstream.
+
+Specifically, it's because __enc_copy() encrypts the kernel after
+being relocated outside the kernel in sme_encrypt_execute(), and the
+RET macro's jmp offset isn't amended prior to execution.
+
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/mem_encrypt_boot.S | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -65,7 +65,9 @@ SYM_FUNC_START(sme_encrypt_execute)
+ movq %rbp, %rsp /* Restore original stack pointer */
+ pop %rbp
+
+- RET
++ /* Offset to __x86_return_thunk would be wrong here */
++ ret
++ int3
+ SYM_FUNC_END(sme_encrypt_execute)
+
+ SYM_FUNC_START(__enc_copy)
+@@ -151,6 +153,8 @@ SYM_FUNC_START(__enc_copy)
+ pop %r12
+ pop %r15
+
+- RET
++ /* Offset to __x86_return_thunk would be wrong here */
++ ret
++ int3
+ .L__enc_copy_end:
+ SYM_FUNC_END(__enc_copy)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 14 Jun 2022 23:15:55 +0200
+Subject: x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7c693f54c873691a4b7da05c7e0f74e67745d144 upstream.
+
+Extend spectre_v2= boot option with Kernel IBRS.
+
+ [jpoimboe: no STIBP with IBRS]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 1
+ arch/x86/include/asm/nospec-branch.h | 1
+ arch/x86/kernel/cpu/bugs.c | 66 ++++++++++++++++++------
+ 3 files changed, 54 insertions(+), 14 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5026,6 +5026,7 @@
+ eibrs - enhanced IBRS
+ eibrs,retpoline - enhanced IBRS + Retpolines
+ eibrs,lfence - enhanced IBRS + LFENCE
++ ibrs - use IBRS to protect kernel
+
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -212,6 +212,7 @@ enum spectre_v2_mitigation {
+ SPECTRE_V2_EIBRS,
+ SPECTRE_V2_EIBRS_RETPOLINE,
+ SPECTRE_V2_EIBRS_LFENCE,
++ SPECTRE_V2_IBRS,
+ };
+
+ /* The indirect branch speculation control variants */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -928,6 +928,7 @@ enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_EIBRS,
+ SPECTRE_V2_CMD_EIBRS_RETPOLINE,
+ SPECTRE_V2_CMD_EIBRS_LFENCE,
++ SPECTRE_V2_CMD_IBRS,
+ };
+
+ enum spectre_v2_user_cmd {
+@@ -1000,11 +1001,12 @@ spectre_v2_parse_user_cmdline(enum spect
+ return SPECTRE_V2_USER_CMD_AUTO;
+ }
+
+-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+ {
+- return (mode == SPECTRE_V2_EIBRS ||
+- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+- mode == SPECTRE_V2_EIBRS_LFENCE);
++ return mode == SPECTRE_V2_IBRS ||
++ mode == SPECTRE_V2_EIBRS ||
++ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
++ mode == SPECTRE_V2_EIBRS_LFENCE;
+ }
+
+ static void __init
+@@ -1069,12 +1071,12 @@ spectre_v2_user_select_mitigation(enum s
+ }
+
+ /*
+- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
+- * required.
++ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
++ * STIBP is not required.
+ */
+ if (!boot_cpu_has(X86_FEATURE_STIBP) ||
+ !smt_possible ||
+- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
++ spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+ return;
+
+ /*
+@@ -1106,6 +1108,7 @@ static const char * const spectre_v2_str
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
++ [SPECTRE_V2_IBRS] = "Mitigation: IBRS",
+ };
+
+ static const struct {
+@@ -1123,6 +1126,7 @@ static const struct {
+ { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
+ { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
++ { "ibrs", SPECTRE_V2_CMD_IBRS, false },
+ };
+
+ static void __init spec_v2_print_cond(const char *reason, bool secure)
+@@ -1185,6 +1189,24 @@ static enum spectre_v2_mitigation_cmd __
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
++ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
++ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
++ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
++ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
+ spec_v2_print_cond(mitigation_options[i].option,
+ mitigation_options[i].secure);
+ return cmd;
+@@ -1224,6 +1246,14 @@ static void __init spectre_v2_select_mit
+ break;
+ }
+
++ if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++ retbleed_cmd != RETBLEED_CMD_OFF &&
++ boot_cpu_has(X86_FEATURE_IBRS) &&
++ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++ mode = SPECTRE_V2_IBRS;
++ break;
++ }
++
+ mode = spectre_v2_select_retpoline();
+ break;
+
+@@ -1240,6 +1270,10 @@ static void __init spectre_v2_select_mit
+ mode = spectre_v2_select_retpoline();
+ break;
+
++ case SPECTRE_V2_CMD_IBRS:
++ mode = SPECTRE_V2_IBRS;
++ break;
++
+ case SPECTRE_V2_CMD_EIBRS:
+ mode = SPECTRE_V2_EIBRS;
+ break;
+@@ -1256,7 +1290,7 @@ static void __init spectre_v2_select_mit
+ if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+
+- if (spectre_v2_in_eibrs_mode(mode)) {
++ if (spectre_v2_in_ibrs_mode(mode)) {
+ /* Force it so VMEXIT will restore correctly */
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+@@ -1267,6 +1301,10 @@ static void __init spectre_v2_select_mit
+ case SPECTRE_V2_EIBRS:
+ break;
+
++ case SPECTRE_V2_IBRS:
++ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
++ break;
++
+ case SPECTRE_V2_LFENCE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
+@@ -1293,17 +1331,17 @@ static void __init spectre_v2_select_mit
+ pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+
+ /*
+- * Retpoline means the kernel is safe because it has no indirect
+- * branches. Enhanced IBRS protects firmware too, so, enable restricted
+- * speculation around firmware calls only when Enhanced IBRS isn't
+- * supported.
++ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
++ * and Enhanced IBRS protect firmware too, so enable IBRS around
++ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
++ * enabled.
+ *
+ * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
+ * the user might select retpoline on the kernel command line and if
+ * the CPU supports Enhanced IBRS, kernel might un-intentionally not
+ * enable IBRS around firmware calls.
+ */
+- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
++ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ pr_info("Enabling Restricted Speculation for firmware calls\n");
+ }
+@@ -2012,7 +2050,7 @@ static ssize_t mmio_stale_data_show_stat
+
+ static char *stibp_state(void)
+ {
+- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
++ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+ return "";
+
+ switch (spectre_v2_user_stibp) {
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Fri, 8 Jul 2022 13:36:09 -0700
+Subject: x86/speculation: Disable RRSBA behavior
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 4ad3278df6fe2b0852b00d5757fc2ccd8e92c26e upstream.
+
+Some Intel processors may use alternate predictors for RETs on
+RSB-underflow. This condition may be vulnerable to Branch History
+Injection (BHI) and intramode-BTI.
+
+Kernel earlier added spectre_v2 mitigation modes (eIBRS+Retpolines,
+eIBRS+LFENCE, Retpolines) which protect indirect CALLs and JMPs against
+such attacks. However, on RSB-underflow, RET target prediction may
+fallback to alternate predictors. As a result, RET's predicted target
+may get influenced by branch history.
+
+A new MSR_IA32_SPEC_CTRL bit (RRSBA_DIS_S) controls this fallback
+behavior when in kernel mode. When set, RETs will not take predictions
+from alternate predictors, hence mitigating RETs as well. Support for
+this is enumerated by CPUID.7.2.EDX[RRSBA_CTRL] (bit2).
+
+For spectre v2 mitigation, when a user selects a mitigation that
+protects indirect CALLs and JMPs against BHI and intramode-BTI, set
+RRSBA_DIS_S also to protect RETs for RSB-underflow case.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[bwh: Backported to 5.15: adjust context in scattered.c]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 2 +-
+ arch/x86/include/asm/msr-index.h | 9 +++++++++
+ arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/scattered.c | 1 +
+ tools/arch/x86/include/asm/msr-index.h | 9 +++++++++
+ 5 files changed, 46 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -293,7 +293,7 @@
+ /* FREE! (11*32+ 8) */
+ /* FREE! (11*32+ 9) */
+ #define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+-/* FREE! (11*32+11) */
++#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -51,6 +51,8 @@
+ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+
+ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
+@@ -139,6 +141,13 @@
+ * bit available to control VERW
+ * behavior.
+ */
++#define ARCH_CAP_RRSBA BIT(19) /*
++ * Indicates RET may use predictors
++ * other than the RSB. With eIBRS
++ * enabled predictions in kernel mode
++ * are restricted to targets in
++ * kernel.
++ */
+
+ #define MSR_IA32_FLUSH_CMD 0x0000010b
+ #define L1D_FLUSH BIT(0) /*
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1274,6 +1274,22 @@ static enum spectre_v2_mitigation __init
+ return SPECTRE_V2_RETPOLINE;
+ }
+
++/* Disable in-kernel use of non-RSB RET predictors */
++static void __init spec_ctrl_disable_kernel_rrsba(void)
++{
++ u64 ia32_cap;
++
++ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
++ return;
++
++ ia32_cap = x86_read_arch_cap_msr();
++
++ if (ia32_cap & ARCH_CAP_RRSBA) {
++ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
++ }
++}
++
+ static void __init spectre_v2_select_mitigation(void)
+ {
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+@@ -1368,6 +1384,16 @@ static void __init spectre_v2_select_mit
+ break;
+ }
+
++ /*
++ * Disable alternate RSB predictions in kernel when indirect CALLs and
++ * JMPs gets protection against BHI and Intramode-BTI, but RET
++ * prediction from a non-RSB predictor is still a risk.
++ */
++ if (mode == SPECTRE_V2_EIBRS_LFENCE ||
++ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
++ mode == SPECTRE_V2_RETPOLINE)
++ spec_ctrl_disable_kernel_rrsba();
++
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -26,6 +26,7 @@ struct cpuid_bit {
+ static const struct cpuid_bit cpuid_bits[] = {
+ { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
+ { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
++ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
+ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
+--- a/tools/arch/x86/include/asm/msr-index.h
++++ b/tools/arch/x86/include/asm/msr-index.h
+@@ -51,6 +51,8 @@
+ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+
+ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
+@@ -138,6 +140,13 @@
+ * bit available to control VERW
+ * behavior.
+ */
++#define ARCH_CAP_RRSBA BIT(19) /*
++ * Indicates RET may use predictors
++ * other than the RSB. With eIBRS
++ * enabled predictions in kernel mode
++ * are restricted to targets in
++ * kernel.
++ */
+
+ #define MSR_IA32_FLUSH_CMD 0x0000010b
+ #define L1D_FLUSH BIT(0) /*
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:15 +0200
+Subject: x86/speculation: Fill RSB on vmexit for IBRS
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 9756bba28470722dacb79ffce554336dd1f6a6cd upstream.
+
+Prevent RSB underflow/poisoning attacks with RSB. While at it, add a
+bunch of comments to attempt to document the current state of tribal
+knowledge about RSB attacks and what exactly is being mitigated.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 2 -
+ arch/x86/kernel/cpu/bugs.c | 63 ++++++++++++++++++++++++++++++++++---
+ arch/x86/kvm/vmx/vmenter.S | 6 +--
+ 3 files changed, 62 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -204,7 +204,7 @@
+ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+ #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+-/* FREE! ( 7*32+13) */
++#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
+ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
+ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1357,17 +1357,70 @@ static void __init spectre_v2_select_mit
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+- * If spectre v2 protection has been enabled, unconditionally fill
+- * RSB during a context switch; this protects against two independent
+- * issues:
++ * If Spectre v2 protection has been enabled, fill the RSB during a
++ * context switch. In general there are two types of RSB attacks
++ * across context switches, for which the CALLs/RETs may be unbalanced.
+ *
+- * - RSB underflow (and switch to BTB) on Skylake+
+- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
++ * 1) RSB underflow
++ *
++ * Some Intel parts have "bottomless RSB". When the RSB is empty,
++ * speculated return targets may come from the branch predictor,
++ * which could have a user-poisoned BTB or BHB entry.
++ *
++ * AMD has it even worse: *all* returns are speculated from the BTB,
++ * regardless of the state of the RSB.
++ *
++ * When IBRS or eIBRS is enabled, the "user -> kernel" attack
++ * scenario is mitigated by the IBRS branch prediction isolation
++ * properties, so the RSB buffer filling wouldn't be necessary to
++ * protect against this type of attack.
++ *
++ * The "user -> user" attack scenario is mitigated by RSB filling.
++ *
++ * 2) Poisoned RSB entry
++ *
++ * If the 'next' in-kernel return stack is shorter than 'prev',
++ * 'next' could be tricked into speculating with a user-poisoned RSB
++ * entry.
++ *
++ * The "user -> kernel" attack scenario is mitigated by SMEP and
++ * eIBRS.
++ *
++ * The "user -> user" scenario, also known as SpectreBHB, requires
++ * RSB clearing.
++ *
++ * So to mitigate all cases, unconditionally fill RSB on context
++ * switches.
++ *
++ * FIXME: Is this pointless for retbleed-affected AMD?
+ */
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+
+ /*
++ * Similar to context switches, there are two types of RSB attacks
++ * after vmexit:
++ *
++ * 1) RSB underflow
++ *
++ * 2) Poisoned RSB entry
++ *
++ * When retpoline is enabled, both are mitigated by filling/clearing
++ * the RSB.
++ *
++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
++ * prediction isolation protections, RSB still needs to be cleared
++ * because of #2. Note that SMEP provides no protection here, unlike
++ * user-space-poisoned RSB entries.
++ *
++ * eIBRS, on the other hand, has RSB-poisoning protections, so it
++ * doesn't need RSB clearing after vmexit.
++ */
++ if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
++ boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
++
++ /*
+ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
+ * and Enhanced IBRS protect firmware too, so enable IBRS around
+ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -193,15 +193,15 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL
+ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
+ * the first unbalanced RET after vmexit!
+ *
+- * For retpoline, RSB filling is needed to prevent poisoned RSB entries
+- * and (in some cases) RSB underflow.
++ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
++ * entries and (in some cases) RSB underflow.
+ *
+ * eIBRS has its own protection against poisoned RSB, so it doesn't
+ * need the RSB filling sequence. But it does need to be enabled
+ * before the first unbalanced RET.
+ */
+
+- FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
+
+ pop %_ASM_ARG2 /* @flags */
+ pop %_ASM_ARG1 /* @vmx */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:06 +0200
+Subject: x86/speculation: Fix firmware entry SPEC_CTRL handling
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit e6aa13622ea8283cc699cac5d018cc40a2ba2010 upstream.
+
+The firmware entry code may accidentally clear STIBP or SSBD. Fix that.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -286,18 +286,16 @@ extern u64 spec_ctrl_current(void);
+ */
+ #define firmware_restrict_branch_speculation_start() \
+ do { \
+- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
+- \
+ preempt_disable(); \
+- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
++ spec_ctrl_current() | SPEC_CTRL_IBRS, \
+ X86_FEATURE_USE_IBRS_FW); \
+ } while (0)
+
+ #define firmware_restrict_branch_speculation_end() \
+ do { \
+- u64 val = x86_spec_ctrl_base; \
+- \
+- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
++ spec_ctrl_current(), \
+ X86_FEATURE_USE_IBRS_FW); \
+ preempt_enable(); \
+ } while (0)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:05 +0200
+Subject: x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit b2620facef4889fefcbf2e87284f34dcd4189bce upstream.
+
+If a kernel is built with CONFIG_RETPOLINE=n, but the user still wants
+to mitigate Spectre v2 using IBRS or eIBRS, the RSB filling will be
+silently disabled.
+
+There's nothing retpoline-specific about RSB buffer filling. Remove the
+CONFIG_RETPOLINE guards around it.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_32.S | 2 --
+ arch/x86/entry/entry_64.S | 2 --
+ arch/x86/include/asm/nospec-branch.h | 2 --
+ 3 files changed, 6 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -782,7 +782,6 @@ SYM_CODE_START(__switch_to_asm)
+ movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+ #endif
+
+-#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+@@ -791,7 +790,6 @@ SYM_CODE_START(__switch_to_asm)
+ * speculative execution to prevent attack.
+ */
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+-#endif
+
+ /* Restore flags or the incoming task to restore AC state. */
+ popfl
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -249,7 +249,6 @@ SYM_FUNC_START(__switch_to_asm)
+ movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
+ #endif
+
+-#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+@@ -258,7 +257,6 @@ SYM_FUNC_START(__switch_to_asm)
+ * speculative execution to prevent attack.
+ */
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+-#endif
+
+ /* restore callee-saved registers */
+ popq %r15
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -122,11 +122,9 @@
+ * monstrosity above, manually.
+ */
+ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+-#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+ __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
+ .Lskip_rsb_\@:
+-#endif
+ .endm
+
+ /*
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:07 +0200
+Subject: x86/speculation: Fix SPEC_CTRL write on SMT state change
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 56aa4d221f1ee2c3a49b45b800778ec6e0ab73c5 upstream.
+
+If the SMT state changes, SSBD might get accidentally disabled. Fix
+that.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1414,7 +1414,8 @@ static void __init spectre_v2_select_mit
+
+ static void update_stibp_msr(void * __unused)
+ {
+- write_spec_ctrl_current(x86_spec_ctrl_base, true);
++ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
++ write_spec_ctrl_current(val, true);
+ }
+
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Fri, 17 Jun 2022 12:12:48 -0700
+Subject: x86/speculation: Remove x86_spec_ctrl_mask
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit acac5e98ef8d638a411cfa2ee676c87e1973f126 upstream.
+
+This mask has been made redundant by kvm_spec_ctrl_test_value(). And it
+doesn't even work when MSR interception is disabled, as the guest can
+just write to SPEC_CTRL directly.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 31 +------------------------------
+ 1 file changed, 1 insertion(+), 30 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -85,12 +85,6 @@ u64 spec_ctrl_current(void)
+ EXPORT_SYMBOL_GPL(spec_ctrl_current);
+
+ /*
+- * The vendor and possibly platform specific bits which can be modified in
+- * x86_spec_ctrl_base.
+- */
+-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+-
+-/*
+ * AMD specific MSR info for Speculative Store Bypass control.
+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+ */
+@@ -138,10 +132,6 @@ void __init check_bugs(void)
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+- /* Allow STIBP in MSR_SPEC_CTRL if supported */
+- if (boot_cpu_has(X86_FEATURE_STIBP))
+- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+-
+ /* Select the proper CPU mitigations before patching alternatives: */
+ spectre_v1_select_mitigation();
+ spectre_v2_select_mitigation();
+@@ -199,19 +189,10 @@ void __init check_bugs(void)
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+- u64 msrval, guestval, hostval = spec_ctrl_current();
++ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
+ struct thread_info *ti = current_thread_info();
+
+- /* Is MSR_SPEC_CTRL implemented ? */
+ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+- /*
+- * Restrict guest_spec_ctrl to supported values. Clear the
+- * modifiable bits in the host base value and or the
+- * modifiable bits from the guest value.
+- */
+- guestval = hostval & ~x86_spec_ctrl_mask;
+- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+-
+ if (hostval != guestval) {
+ msrval = setguest ? guestval : hostval;
+ wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -1622,16 +1603,6 @@ static enum ssb_mitigation __init __ssb_
+ }
+
+ /*
+- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+- * bit in the mask to allow guests to use the mitigation even in the
+- * case where the host does not enable it.
+- */
+- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+- }
+-
+- /*
+ * We have three CPU feature flags that are in play here:
+ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Tue, 14 Jun 2022 23:16:08 +0200
+Subject: x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit bbb69e8bee1bd882784947095ffb2bfe0f7c9470 upstream.
+
+There's no need to recalculate the host value for every entry/exit.
+Just use the cached value in spec_ctrl_current().
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 12 +-----------
+ 1 file changed, 1 insertion(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -199,7 +199,7 @@ void __init check_bugs(void)
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+- u64 msrval, guestval, hostval = x86_spec_ctrl_base;
++ u64 msrval, guestval, hostval = spec_ctrl_current();
+ struct thread_info *ti = current_thread_info();
+
+ /* Is MSR_SPEC_CTRL implemented ? */
+@@ -212,15 +212,6 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl,
+ guestval = hostval & ~x86_spec_ctrl_mask;
+ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+
+- /* SSBD controlled in MSR_SPEC_CTRL */
+- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+- static_cpu_has(X86_FEATURE_AMD_SSBD))
+- hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+-
+- /* Conditional STIBP enabled? */
+- if (static_branch_unlikely(&switch_to_cond_stibp))
+- hostval |= stibp_tif_to_spec_ctrl(ti->flags);
+-
+ if (hostval != guestval) {
+ msrval = setguest ? guestval : hostval;
+ wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -1353,7 +1344,6 @@ static void __init spectre_v2_select_mit
+ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+
+ if (spectre_v2_in_ibrs_mode(mode)) {
+- /* Force it so VMEXIT will restore correctly */
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
--- /dev/null
+From c27c753ea6fd1237f4f96abf8b623d7bab505513 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 12 Jul 2022 14:01:06 +0200
+Subject: x86/static_call: Serialize __static_call_fixup() properly
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit c27c753ea6fd1237f4f96abf8b623d7bab505513 upstream.
+
+__static_call_fixup() invokes __static_call_transform() without holding
+text_mutex, which causes lockdep to complain in text_poke_bp().
+
+Adding the proper locking cures that, but as this is either used during
+early boot or during module finalizing, it's not required to use
+text_poke_bp(). Add an argument to __static_call_transform() which tells
+it to use text_poke_early() for it.
+
+Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/static_call.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -20,7 +20,8 @@ static const u8 tramp_ud[] = { 0x0f, 0xb
+
+ static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
+
+-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
++static void __ref __static_call_transform(void *insn, enum insn_type type,
++ void *func, bool modinit)
+ {
+ int size = CALL_INSN_SIZE;
+ const void *code;
+@@ -49,7 +50,7 @@ static void __ref __static_call_transfor
+ if (memcmp(insn, code, size) == 0)
+ return;
+
+- if (unlikely(system_state == SYSTEM_BOOTING))
++ if (system_state == SYSTEM_BOOTING || modinit)
+ return text_poke_early(insn, code, size);
+
+ text_poke_bp(insn, code, size, NULL);
+@@ -96,12 +97,12 @@ void arch_static_call_transform(void *si
+
+ if (tramp) {
+ __static_call_validate(tramp, true);
+- __static_call_transform(tramp, __sc_insn(!func, true), func);
++ __static_call_transform(tramp, __sc_insn(!func, true), func, false);
+ }
+
+ if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
+ __static_call_validate(site, tail);
+- __static_call_transform(site, __sc_insn(!func, tail), func);
++ __static_call_transform(site, __sc_insn(!func, tail), func, false);
+ }
+
+ mutex_unlock(&text_mutex);
+@@ -127,8 +128,10 @@ bool __static_call_fixup(void *tramp, u8
+ return false;
+ }
+
++ mutex_lock(&text_mutex);
+ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
+- __static_call_transform(tramp, RET, NULL);
++ __static_call_transform(tramp, RET, NULL, true);
++ mutex_unlock(&text_mutex);
+
+ return true;
+ }
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:39 +0200
+Subject: x86,static_call: Use alternative RET encoding
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ee88d363d15617ff50ac24fab0ffec11113b2aeb upstream.
+
+In addition to teaching static_call about the new way to spell 'RET',
+there is an added complication in that static_call() is allowed to
+rewrite text before it is known which particular spelling is required.
+
+In order to deal with this; have a static_call specific fixup in the
+apply_return() 'alternative' patching routine that will rewrite the
+static_call trampoline to match the definite sequence.
+
+This in turn creates the problem of uniquely identifying static call
+trampolines. Currently trampolines are 8 bytes, the first 5 being the
+jmp.d32/ret sequence and the final 3 a byte sequence that spells out
+'SCT'.
+
+This sequence is used in __static_call_validate() to ensure it is
+patching a trampoline and not a random other jmp.d32. That is,
+false-positives shouldn't be plenty, but aren't a big concern.
+
+OTOH the new __static_call_fixup() must not have false-positives, and
+'SCT' decodes to the somewhat weird but semi plausible sequence:
+
+ push %rbx
+ rex.XB push %r12
+
+Additionally, there are SLS concerns with immediate jumps. Combined it
+seems like a good moment to change the signature to a single 3 byte
+trap instruction that is unique to this usage and will not ever get
+generated by accident.
+
+As such, change the signature to: '0x0f, 0xb9, 0xcc', which decodes
+to:
+
+ ud1 %esp, %ecx
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: skip validation as introduced by 2105a92748e8 ("static_call,x86: Robustify trampoline patching")]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/static_call.h | 17 ++++++++++++++++
+ arch/x86/kernel/alternative.c | 12 +++++++----
+ arch/x86/kernel/static_call.c | 38 ++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 62 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -21,6 +21,16 @@
+ * relative displacement across sections.
+ */
+
++/*
++ * The trampoline is 8 bytes and of the general form:
++ *
++ * jmp.d32 \func
++ * ud1 %esp, %ecx
++ *
++ * That trailing #UD provides both a speculation stop and serves as a unique
++ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
++ * and __static_call_fixup().
++ */
+ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
+ asm(".pushsection .static_call.text, \"ax\" \n" \
+ ".align 4 \n" \
+@@ -34,8 +44,13 @@
+ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+
++#ifdef CONFIG_RETPOLINE
++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
++#else
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
++#endif
+
+
+ #define ARCH_ADD_TRAMP_KEY(name) \
+@@ -44,4 +59,6 @@
+ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \
+ ".popsection \n")
+
++extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
++
+ #endif /* _ASM_STATIC_CALL_H */
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -693,18 +693,22 @@ void __init_or_module noinline apply_ret
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+- void *addr = (void *)s + *s;
++ void *dest = NULL, *addr = (void *)s + *s;
+ struct insn insn;
+ int len, ret;
+ u8 bytes[16];
+- u8 op1;
++ u8 op;
+
+ ret = insn_decode_kernel(&insn, addr);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+- op1 = insn.opcode.bytes[0];
+- if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE))
++ op = insn.opcode.bytes[0];
++ if (op == JMP32_INSN_OPCODE)
++ dest = addr + insn.length + insn.immediate.value;
++
++ if (__static_call_fixup(addr, op, dest) ||
++ WARN_ON_ONCE(dest != &__x86_return_thunk))
+ continue;
+
+ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -11,6 +11,13 @@ enum insn_type {
+ RET = 3, /* tramp / site cond-tail-call */
+ };
+
++/*
++ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such
++ * that there is no false-positive trampoline identification while also being a
++ * speculation stop.
++ */
++static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
++
+ static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
+
+ static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
+@@ -32,7 +39,10 @@ static void __ref __static_call_transfor
+ break;
+
+ case RET:
+- code = &retinsn;
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
++ else
++ code = &retinsn;
+ break;
+ }
+
+@@ -97,3 +107,29 @@ void arch_static_call_transform(void *si
+ mutex_unlock(&text_mutex);
+ }
+ EXPORT_SYMBOL_GPL(arch_static_call_transform);
++
++#ifdef CONFIG_RETPOLINE
++/*
++ * This is called by apply_returns() to fix up static call trampolines,
++ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
++ * having a return trampoline.
++ *
++ * The problem is that static_call() is available before determining
++ * X86_FEATURE_RETHUNK and, by implication, running alternatives.
++ *
++ * This means that __static_call_transform() above can have overwritten the
++ * return trampoline and we now need to fix things up to be consistent.
++ */
++bool __static_call_fixup(void *tramp, u8 op, void *dest)
++{
++ if (memcmp(tramp+5, tramp_ud, 3)) {
++ /* Not a trampoline site, not our problem. */
++ return false;
++ }
++
++ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
++ __static_call_transform(tramp, RET, NULL);
++
++ return true;
++}
++#endif
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:37 +0200
+Subject: x86: Undo return-thunk damage
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 15e67227c49a57837108acfe1c80570e1bd9f962 upstream.
+
+Introduce X86_FEATURE_RETHUNK for those afflicted with needing this.
+
+ [ bp: Do only INT3 padding - simpler. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: CONFIG_STACK_VALIDATION vs CONFIG_OBJTOOL]
+[cascardo: no IBT support]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/alternative.h | 1
+ arch/x86/include/asm/cpufeatures.h | 1
+ arch/x86/include/asm/disabled-features.h | 3 +
+ arch/x86/kernel/alternative.c | 60 +++++++++++++++++++++++++++++++
+ arch/x86/kernel/module.c | 8 +++-
+ arch/x86/kernel/vmlinux.lds.S | 7 +++
+ 6 files changed, 78 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -76,6 +76,7 @@ extern int alternatives_patched;
+ extern void alternative_instructions(void);
+ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+ extern void apply_retpolines(s32 *start, s32 *end);
++extern void apply_returns(s32 *start, s32 *end);
+
+ struct module;
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -296,6 +296,7 @@
+ /* FREE! (11*32+11) */
+ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
++#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -60,7 +60,8 @@
+ # define DISABLE_RETPOLINE 0
+ #else
+ # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
++ (1 << (X86_FEATURE_RETHUNK & 31)))
+ #endif
+
+ /* Force disable because it's broken beyond repair */
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -270,6 +270,7 @@ static void __init_or_module add_nops(vo
+ }
+
+ extern s32 __retpoline_sites[], __retpoline_sites_end[];
++extern s32 __return_sites[], __return_sites_end[];
+ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+ extern s32 __smp_locks[], __smp_locks_end[];
+ void text_poke_early(void *addr, const void *opcode, size_t len);
+@@ -661,9 +662,67 @@ void __init_or_module noinline apply_ret
+ }
+ }
+
++/*
++ * Rewrite the compiler generated return thunk tail-calls.
++ *
++ * For example, convert:
++ *
++ * JMP __x86_return_thunk
++ *
++ * into:
++ *
++ * RET
++ */
++static int patch_return(void *addr, struct insn *insn, u8 *bytes)
++{
++ int i = 0;
++
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++ return -1;
++
++ bytes[i++] = RET_INSN_OPCODE;
++
++ for (; i < insn->length;)
++ bytes[i++] = INT3_INSN_OPCODE;
++
++ return i;
++}
++
++void __init_or_module noinline apply_returns(s32 *start, s32 *end)
++{
++ s32 *s;
++
++ for (s = start; s < end; s++) {
++ void *addr = (void *)s + *s;
++ struct insn insn;
++ int len, ret;
++ u8 bytes[16];
++ u8 op1;
++
++ ret = insn_decode_kernel(&insn, addr);
++ if (WARN_ON_ONCE(ret < 0))
++ continue;
++
++ op1 = insn.opcode.bytes[0];
++ if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE))
++ continue;
++
++ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
++ addr, addr, insn.length,
++ addr + insn.length + insn.immediate.value);
++
++ len = patch_return(addr, &insn, bytes);
++ if (len == insn.length) {
++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
++ text_poke_early(addr, bytes, len);
++ }
++ }
++}
+ #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+
+ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+
+ #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
+
+@@ -956,6 +1015,7 @@ void __init alternative_instructions(voi
+ * those can rewrite the retpoline thunks.
+ */
+ apply_retpolines(__retpoline_sites, __retpoline_sites_end);
++ apply_returns(__return_sites, __return_sites_end);
+
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+
+--- a/arch/x86/kernel/module.c
++++ b/arch/x86/kernel/module.c
+@@ -252,7 +252,7 @@ int module_finalize(const Elf_Ehdr *hdr,
+ {
+ const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+ *para = NULL, *orc = NULL, *orc_ip = NULL,
+- *retpolines = NULL;
++ *retpolines = NULL, *returns = NULL;
+ char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+@@ -270,12 +270,18 @@ int module_finalize(const Elf_Ehdr *hdr,
+ orc_ip = s;
+ if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
+ retpolines = s;
++ if (!strcmp(".return_sites", secstrings + s->sh_name))
++ returns = s;
+ }
+
+ if (retpolines) {
+ void *rseg = (void *)retpolines->sh_addr;
+ apply_retpolines(rseg, rseg + retpolines->sh_size);
+ }
++ if (returns) {
++ void *rseg = (void *)returns->sh_addr;
++ apply_returns(rseg, rseg + returns->sh_size);
++ }
+ if (alt) {
+ /* patch .altinstructions */
+ void *aseg = (void *)alt->sh_addr;
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -284,6 +284,13 @@ SECTIONS
+ *(.retpoline_sites)
+ __retpoline_sites_end = .;
+ }
++
++ . = ALIGN(8);
++ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
++ __return_sites = .;
++ *(.return_sites)
++ __return_sites_end = .;
++ }
+ #endif
+
+ /*
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:45 +0200
+Subject: x86: Use return-thunk in asm code
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit aa3d480315ba6c3025a60958e1981072ea37c3df upstream.
+
+Use the return thunk in asm code. If the thunk isn't needed, it will
+get patched into a RET instruction during boot by apply_returns().
+
+Since alternatives can't handle relocations outside of the first
+instruction, putting a 'jmp __x86_return_thunk' in one is not valid,
+therefore carve out the memmove ERMS path into a separate label and jump
+to it.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: no RANDSTRUCT_CFLAGS]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[bwh: Backported to 5.10: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vdso/Makefile | 1 +
+ arch/x86/include/asm/linkage.h | 8 ++++++++
+ arch/x86/lib/memmove_64.S | 7 ++++++-
+ 3 files changed, 15 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/vdso/Makefile
++++ b/arch/x86/entry/vdso/Makefile
+@@ -91,6 +91,7 @@ endif
+ endif
+
+ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
++$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
+
+ #
+ # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,19 +18,27 @@
+ #define __ALIGN_STR __stringify(__ALIGN)
+ #endif
+
++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#define RET jmp __x86_return_thunk
++#else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+ #define RET ret; int3
+ #else
+ #define RET ret
+ #endif
++#endif /* CONFIG_RETPOLINE */
+
+ #else /* __ASSEMBLY__ */
+
++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#define ASM_RET "jmp __x86_return_thunk\n\t"
++#else /* CONFIG_RETPOLINE */
+ #ifdef CONFIG_SLS
+ #define ASM_RET "ret; int3\n\t"
+ #else
+ #define ASM_RET "ret\n\t"
+ #endif
++#endif /* CONFIG_RETPOLINE */
+
+ #endif /* __ASSEMBLY__ */
+
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove)
+ /* FSRM implies ERMS => no length checks, do the copy directly */
+ .Lmemmove_begin_forward:
+ ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
+- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
++ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
+
+ /*
+ * movsq instruction have many startup latency
+@@ -206,6 +206,11 @@ SYM_FUNC_START(__memmove)
+ movb %r11b, (%rdi)
+ 13:
+ RET
++
++.Lmemmove_erms:
++ movq %rdx, %rcx
++ rep movsb
++ RET
+ SYM_FUNC_END(__memmove)
+ SYM_FUNC_END_ALIAS(memmove)
+ EXPORT_SYMBOL(__memmove)
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:15:43 +0200
+Subject: x86/vsyscall_emu/64: Don't use RET in vsyscall emulation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 15583e514eb16744b80be85dea0774ece153177d upstream.
+
+This is userspace code and doesn't play by the normal kernel rules.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vsyscall/vsyscall_emu_64.S | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+@@ -19,17 +19,20 @@ __vsyscall_page:
+
+ mov $__NR_gettimeofday, %rax
+ syscall
+- RET
++ ret
++ int3
+
+ .balign 1024, 0xcc
+ mov $__NR_time, %rax
+ syscall
+- RET
++ ret
++ int3
+
+ .balign 1024, 0xcc
+ mov $__NR_getcpu, %rax
+ syscall
+- RET
++ ret
++ int3
+
+ .balign 4096, 0xcc
+
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 14 Jun 2022 23:16:00 +0200
+Subject: x86/xen: Rename SYS* entry points
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b75b7f8ef1148be1b9321ffc2f6c19238904b438 upstream.
+
+Native SYS{CALL,ENTER} entry points are called
+entry_SYS{CALL,ENTER}_{64,compat}, make sure the Xen versions are
+named consistently.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/xen/setup.c | 6 +++---
+ arch/x86/xen/xen-asm.S | 20 ++++++++++----------
+ arch/x86/xen/xen-ops.h | 6 +++---
+ 3 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -922,7 +922,7 @@ void xen_enable_sysenter(void)
+ if (!boot_cpu_has(sysenter_feature))
+ return;
+
+- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
++ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
+ if(ret != 0)
+ setup_clear_cpu_cap(sysenter_feature);
+ }
+@@ -931,7 +931,7 @@ void xen_enable_syscall(void)
+ {
+ int ret;
+
+- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
++ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
+ if (ret != 0) {
+ printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
+ /* Pretty fatal; 64-bit userspace has no other
+@@ -940,7 +940,7 @@ void xen_enable_syscall(void)
+
+ if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
+ ret = register_callback(CALLBACKTYPE_syscall32,
+- xen_syscall32_target);
++ xen_entry_SYSCALL_compat);
+ if (ret != 0)
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+ }
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -276,7 +276,7 @@ SYM_CODE_END(xenpv_restore_regs_and_retu
+ */
+
+ /* Normal 64-bit system call target */
+-SYM_CODE_START(xen_syscall_target)
++SYM_CODE_START(xen_entry_SYSCALL_64)
+ UNWIND_HINT_EMPTY
+ popq %rcx
+ popq %r11
+@@ -290,12 +290,12 @@ SYM_CODE_START(xen_syscall_target)
+ movq $__USER_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_64_after_hwframe
+-SYM_CODE_END(xen_syscall_target)
++SYM_CODE_END(xen_entry_SYSCALL_64)
+
+ #ifdef CONFIG_IA32_EMULATION
+
+ /* 32-bit compat syscall target */
+-SYM_CODE_START(xen_syscall32_target)
++SYM_CODE_START(xen_entry_SYSCALL_compat)
+ UNWIND_HINT_EMPTY
+ popq %rcx
+ popq %r11
+@@ -309,10 +309,10 @@ SYM_CODE_START(xen_syscall32_target)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_compat_after_hwframe
+-SYM_CODE_END(xen_syscall32_target)
++SYM_CODE_END(xen_entry_SYSCALL_compat)
+
+ /* 32-bit compat sysenter target */
+-SYM_CODE_START(xen_sysenter_target)
++SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_EMPTY
+ /*
+ * NB: Xen is polite and clears TF from EFLAGS for us. This means
+@@ -330,18 +330,18 @@ SYM_CODE_START(xen_sysenter_target)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSENTER_compat_after_hwframe
+-SYM_CODE_END(xen_sysenter_target)
++SYM_CODE_END(xen_entry_SYSENTER_compat)
+
+ #else /* !CONFIG_IA32_EMULATION */
+
+-SYM_CODE_START(xen_syscall32_target)
+-SYM_CODE_START(xen_sysenter_target)
++SYM_CODE_START(xen_entry_SYSCALL_compat)
++SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_EMPTY
+ lea 16(%rsp), %rsp /* strip %rcx, %r11 */
+ mov $-ENOSYS, %rax
+ pushq $0
+ jmp hypercall_iret
+-SYM_CODE_END(xen_sysenter_target)
+-SYM_CODE_END(xen_syscall32_target)
++SYM_CODE_END(xen_entry_SYSENTER_compat)
++SYM_CODE_END(xen_entry_SYSCALL_compat)
+
+ #endif /* CONFIG_IA32_EMULATION */
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -10,10 +10,10 @@
+ /* These are code, but not functions. Defined in entry.S */
+ extern const char xen_failsafe_callback[];
+
+-void xen_sysenter_target(void);
++void xen_entry_SYSENTER_compat(void);
+ #ifdef CONFIG_X86_64
+-void xen_syscall_target(void);
+-void xen_syscall32_target(void);
++void xen_entry_SYSCALL_64(void);
++void xen_entry_SYSCALL_compat(void);
+ #endif
+
+ extern void *xen_initial_gdt;
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:28 -0600
+Subject: x86/xen: Support objtool validation in xen-asm.S
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit cde07a4e4434ddfb9b1616ac971edf6d66329804 upstream.
+
+The OBJECT_FILES_NON_STANDARD annotation is used to tell objtool to
+ignore a file. File-level ignores won't work when validating vmlinux.o.
+
+Tweak the ELF metadata and unwind hints to allow objtool to follow the
+code.
+
+Cc: Juergen Gross <jgross@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/8b042a09c69e8645f3b133ef6653ba28f896807d.1611263462.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/xen/Makefile | 1 -
+ arch/x86/xen/xen-asm.S | 29 +++++++++++++++++++----------
+ 2 files changed, 19 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/xen/Makefile
++++ b/arch/x86/xen/Makefile
+@@ -1,5 +1,4 @@
+ # SPDX-License-Identifier: GPL-2.0
+-OBJECT_FILES_NON_STANDARD_xen-asm.o := y
+
+ ifdef CONFIG_FUNCTION_TRACER
+ # Do not profile debug and lowlevel utilities
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -14,6 +14,7 @@
+ #include <asm/thread_info.h>
+ #include <asm/asm.h>
+ #include <asm/frame.h>
++#include <asm/unwind_hints.h>
+
+ #include <xen/interface/xen.h>
+
+@@ -147,6 +148,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
+
+ .macro xen_pv_trap name
+ SYM_CODE_START(xen_\name)
++ UNWIND_HINT_EMPTY
+ pop %rcx
+ pop %r11
+ jmp \name
+@@ -186,6 +188,7 @@ xen_pv_trap asm_exc_xen_hypervisor_callb
+ SYM_CODE_START(xen_early_idt_handler_array)
+ i = 0
+ .rept NUM_EXCEPTION_VECTORS
++ UNWIND_HINT_EMPTY
+ pop %rcx
+ pop %r11
+ jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+@@ -212,11 +215,13 @@ hypercall_iret = hypercall_page + __HYPE
+ * rsp->rax }
+ */
+ SYM_CODE_START(xen_iret)
++ UNWIND_HINT_EMPTY
+ pushq $0
+ jmp hypercall_iret
+ SYM_CODE_END(xen_iret)
+
+ SYM_CODE_START(xen_sysret64)
++ UNWIND_HINT_EMPTY
+ /*
+ * We're already on the usermode stack at this point, but
+ * still with the kernel gs, so we can easily switch back.
+@@ -271,7 +276,8 @@ SYM_CODE_END(xenpv_restore_regs_and_retu
+ */
+
+ /* Normal 64-bit system call target */
+-SYM_FUNC_START(xen_syscall_target)
++SYM_CODE_START(xen_syscall_target)
++ UNWIND_HINT_EMPTY
+ popq %rcx
+ popq %r11
+
+@@ -284,12 +290,13 @@ SYM_FUNC_START(xen_syscall_target)
+ movq $__USER_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_64_after_hwframe
+-SYM_FUNC_END(xen_syscall_target)
++SYM_CODE_END(xen_syscall_target)
+
+ #ifdef CONFIG_IA32_EMULATION
+
+ /* 32-bit compat syscall target */
+-SYM_FUNC_START(xen_syscall32_target)
++SYM_CODE_START(xen_syscall32_target)
++ UNWIND_HINT_EMPTY
+ popq %rcx
+ popq %r11
+
+@@ -302,10 +309,11 @@ SYM_FUNC_START(xen_syscall32_target)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_compat_after_hwframe
+-SYM_FUNC_END(xen_syscall32_target)
++SYM_CODE_END(xen_syscall32_target)
+
+ /* 32-bit compat sysenter target */
+-SYM_FUNC_START(xen_sysenter_target)
++SYM_CODE_START(xen_sysenter_target)
++ UNWIND_HINT_EMPTY
+ /*
+ * NB: Xen is polite and clears TF from EFLAGS for us. This means
+ * that we don't need to guard against single step exceptions here.
+@@ -322,17 +330,18 @@ SYM_FUNC_START(xen_sysenter_target)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSENTER_compat_after_hwframe
+-SYM_FUNC_END(xen_sysenter_target)
++SYM_CODE_END(xen_sysenter_target)
+
+ #else /* !CONFIG_IA32_EMULATION */
+
+-SYM_FUNC_START_ALIAS(xen_syscall32_target)
+-SYM_FUNC_START(xen_sysenter_target)
++SYM_CODE_START(xen_syscall32_target)
++SYM_CODE_START(xen_sysenter_target)
++ UNWIND_HINT_EMPTY
+ lea 16(%rsp), %rsp /* strip %rcx, %r11 */
+ mov $-ENOSYS, %rax
+ pushq $0
+ jmp hypercall_iret
+-SYM_FUNC_END(xen_sysenter_target)
+-SYM_FUNC_END_ALIAS(xen_syscall32_target)
++SYM_CODE_END(xen_sysenter_target)
++SYM_CODE_END(xen_syscall32_target)
+
+ #endif /* CONFIG_IA32_EMULATION */
--- /dev/null
+From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 21 Jan 2021 15:29:29 -0600
+Subject: x86/xen: Support objtool vmlinux.o validation in xen-head.S
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit f4b4bc10b0b85ec66f1a9bf5dddf475e6695b6d2 upstream.
+
+The Xen hypercall page is filled with zeros, causing objtool to fall
+through all the empty hypercall functions until it reaches a real
+function, resulting in a stack state mismatch.
+
+The build-time contents of the hypercall page don't matter because the
+page gets rewritten by the hypervisor. Make it more palatable to
+objtool by making each hypervisor function a true empty function, with
+nops and a return.
+
+Cc: Juergen Gross <jgross@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/0883bde1d7a1fb3b6a4c952bc0200e873752f609.1611263462.git.jpoimboe@redhat.com
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/xen/xen-head.S | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -68,8 +68,9 @@ SYM_CODE_END(asm_cpu_bringup_and_idle)
+ .balign PAGE_SIZE
+ SYM_CODE_START(hypercall_page)
+ .rept (PAGE_SIZE / 32)
+- UNWIND_HINT_EMPTY
+- .skip 32
++ UNWIND_HINT_FUNC
++ .skip 31, 0x90
++ ret
+ .endr
+
+ #define HYPERCALL(n) \