From: Josh Poimboeuf Date: Fri, 6 Jun 2025 11:04:25 +0000 (+0200) Subject: x86/fred: Play nice with invoking asm_fred_entry_from_kvm() on non-FRED hardware X-Git-Tag: v6.18-rc1~7^2~10 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=deed19b9b28724bd32e85063c60718c0a6803906;p=thirdparty%2Flinux.git x86/fred: Play nice with invoking asm_fred_entry_from_kvm() on non-FRED hardware Modify asm_fred_entry_from_kvm() to allow it to be invoked by KVM even when FRED isn't fully enabled, e.g. when running with CONFIG_X86_FRED=y on non-FRED hardware. This will allow forcing KVM to always use the FRED entry points for 64-bit kernels, which in turn will eliminate a rather gross non-CFI indirect call that KVM uses to trampoline IRQs by doing IDT lookups. The point of asm_fred_entry_from_kvm() is to bridge between C (vmx:handle_external_interrupt_irqoff()) and more C (__fred_entry_from_kvm()) while changing the calling context to appear like an interrupt (pt_regs). Making the whole thing bound by C ABI. All that remains for non-FRED hardware is to restore RSP (to undo the redzone and alignment). However the trivial change would result in code like: push %rbp mov %rsp, %rbp sub $REDZONE, %rsp and $MASK, %rsp PUSH_AND_CLEAR_REGS push %rbp POP_REGS pop %rbp <-- *objtool fail* mov %rbp, %rsp pop %rbp ret And this will confuse objtool something wicked -- it gets confused by the extra pop %rbp, not realizing the push and pop preserve the value. Rather than trying to each objtool about this, recognise that since the code is bound by C ABI on both ends and interrupts are not allowed to change pt_regs (only exceptions are) it is sufficient to PUSH_REGS in order to create pt_regs, but there is no reason to POP_REGS -- provided the callee-saved registers are preserved. So avoid clearing callee-saved regs and skip POP_REGS. [Original patch by Sean; much of this version by Josh; Changelog, comments and final form by Peterz] Originally-by: Sean Christopherson Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Sean Christopherson Link: https://lkml.kernel.org/r/20250714103441.245417052@infradead.org --- diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 94519688b0071..77e2d920a6407 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -99,7 +99,7 @@ For 32-bit we have the following conventions - kernel is built with .endif .endm -.macro CLEAR_REGS clear_bp=1 +.macro CLEAR_REGS clear_callee=1 /* * Sanitize registers of values that a speculation attack might * otherwise want to exploit. The lower registers are likely clobbered @@ -113,20 +113,19 @@ For 32-bit we have the following conventions - kernel is built with xorl %r9d, %r9d /* nospec r9 */ xorl %r10d, %r10d /* nospec r10 */ xorl %r11d, %r11d /* nospec r11 */ + .if \clear_callee xorl %ebx, %ebx /* nospec rbx */ - .if \clear_bp xorl %ebp, %ebp /* nospec rbp */ - .endif xorl %r12d, %r12d /* nospec r12 */ xorl %r13d, %r13d /* nospec r13 */ xorl %r14d, %r14d /* nospec r14 */ xorl %r15d, %r15d /* nospec r15 */ - + .endif .endm -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_bp=1 unwind_hint=1 +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_callee=1 unwind_hint=1 PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint - CLEAR_REGS clear_bp=\clear_bp + CLEAR_REGS clear_callee=\clear_callee .endm .macro POP_REGS pop_rdi=1 diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index 29c5c32c16c36..0d00ec8ee79a2 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -112,18 +112,37 @@ SYM_FUNC_START(asm_fred_entry_from_kvm) push %rax /* Return RIP */ push $0 /* Error code, 0 for IRQ/NMI */ - PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0 + PUSH_AND_CLEAR_REGS clear_callee=0 unwind_hint=0 + movq %rsp, %rdi /* %rdi -> pt_regs */ + /* + * At this point: {rdi, rsi, rdx, rcx, r8, r9}, {r10, r11}, {rax, rdx} + * are clobbered, which corresponds to: arguments, extra caller-saved + * and return. All registers a C function is allowed to clobber. + * + * Notably, the callee-saved registers: {rbx, r12, r13, r14, r15} + * are untouched, with the exception of rbp, which carries the stack + * frame and will be restored before exit. + * + * Further calling another C function will not alter this state. + */ call __fred_entry_from_kvm /* Call the C entry point */ - POP_REGS - ERETS -1: + /* - * Objtool doesn't understand what ERETS does, this hint tells it that - * yes, we'll reach here and with what stack state. A save/restore pair - * isn't strictly needed, but it's the simplest form. + * When FRED, use ERETS to potentially clear NMIs, otherwise simply + * restore the stack pointer. + */ + ALTERNATIVE "nop; nop; mov %rbp, %rsp", \ + __stringify(add $C_PTREGS_SIZE, %rsp; ERETS), \ + X86_FEATURE_FRED + +1: /* + * Objtool doesn't understand ERETS, and the cfi register state is + * different from initial_func_cfi due to PUSH_REGS. Tell it the state + * is similar to where UNWIND_HINT_SAVE is. */ UNWIND_HINT_RESTORE + pop %rbp RET diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 6259b474073bc..32ba599a51f88 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -102,6 +102,7 @@ static void __used common(void) BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + OFFSET(C_PTREGS_SIZE, pt_regs, orig_ax); /* TLB state for the entry code */ OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);