From: Charlie Jenkins Date: Thu, 20 Mar 2025 17:29:24 +0000 (-0700) Subject: entry: Inline syscall_exit_to_user_mode() X-Git-Tag: v6.16-rc1~190^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e43b8bb56e537bfc8d9076793091e7679020fc9c;p=thirdparty%2Flinux.git entry: Inline syscall_exit_to_user_mode() Similar to commit 221a164035fd ("entry: Move syscall_enter_from_user_mode() to header file"), move syscall_exit_to_user_mode() to the header file as well. Testing was done with the byte-unixbench syscall benchmark (which calls getpid) and QEMU. On riscv I measured a 7.09246% improvement, on x86 a 2.98843% improvement, on loongarch a 6.07954% improvement, and on s390 a 11.1328% improvement. The Intel bot also reported "kernel test robot noticed a 1.9% improvement of stress-ng.seek.ops_per_sec". Signed-off-by: Charlie Jenkins Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/all/20250320-riscv_optimize_entry-v6-4-63e187e26041@rivosinc.com Link: https://lore.kernel.org/linux-riscv/202502051555.85ae6844-lkp@intel.com/ --- diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fc61d0205c970..f94f3fdf15fc0 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -14,6 +14,7 @@ #include #include +#include /* * Define dummy _TIF work flags if not defined by the architecture or for @@ -366,6 +367,15 @@ static __always_inline void exit_to_user_mode(void) lockdep_hardirqs_on(CALLER_ADDR0); } +/** + * syscall_exit_work - Handle work before returning to user mode + * @regs: Pointer to current pt_regs + * @work: Current thread syscall work + * + * Do one-time syscall specific work. + */ +void syscall_exit_work(struct pt_regs *regs, unsigned long work); + /** * syscall_exit_to_user_mode_work - Handle work before returning to user mode * @regs: Pointer to currents pt_regs @@ -379,7 +389,30 @@ static __always_inline void exit_to_user_mode(void) * make the final state transitions. Interrupts must stay disabled between * return from this function and the invocation of exit_to_user_mode(). */ -void syscall_exit_to_user_mode_work(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long nr = syscall_get_nr(current, regs); + + CT_WARN_ON(ct_state() != CT_STATE_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) + local_irq_enable(); + } + + rseq_syscall(regs); + + /* + * Do one-time syscall specific work. If these work items are + * enabled, we want to run them exactly once per syscall exit with + * interrupts enabled. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) + syscall_exit_work(regs, work); + local_irq_disable_exit_to_user(); + exit_to_user_mode_prepare(regs); +} /** * syscall_exit_to_user_mode - Handle work before returning to user mode @@ -410,7 +443,13 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * exit_to_user_mode(). This function is preferred unless there is a * compelling architectural reason to use the separate functions. */ -void syscall_exit_to_user_mode(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) +{ + instrumentation_begin(); + syscall_exit_to_user_mode_work(regs); + instrumentation_end(); + exit_to_user_mode(); +} /** * irqentry_enter_from_user_mode - Establish state before invoking the irq handler diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 20154572ede94..a8dd1f27417cf 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -146,7 +146,7 @@ static inline bool report_single_step(unsigned long work) return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; } -static void syscall_exit_work(struct pt_regs *regs, unsigned long work) +void syscall_exit_work(struct pt_regs *regs, unsigned long work) { bool step; @@ -173,53 +173,6 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long work) ptrace_report_syscall_exit(regs, step); } -/* - * Syscall specific exit to user mode preparation. Runs with interrupts - * enabled. - */ -static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) -{ - unsigned long work = READ_ONCE(current_thread_info()->syscall_work); - unsigned long nr = syscall_get_nr(current, regs); - - CT_WARN_ON(ct_state() != CT_STATE_KERNEL); - - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { - if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) - local_irq_enable(); - } - - rseq_syscall(regs); - - /* - * Do one-time syscall specific work. If these work items are - * enabled, we want to run them exactly once per syscall exit with - * interrupts enabled. - */ - if (unlikely(work & SYSCALL_WORK_EXIT)) - syscall_exit_work(regs, work); -} - -static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs) -{ - syscall_exit_to_user_mode_prepare(regs); - local_irq_disable_exit_to_user(); - exit_to_user_mode_prepare(regs); -} - -void syscall_exit_to_user_mode_work(struct pt_regs *regs) -{ - __syscall_exit_to_user_mode_work(regs); -} - -__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs) -{ - instrumentation_begin(); - __syscall_exit_to_user_mode_work(regs); - instrumentation_end(); - exit_to_user_mode(); -} - noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs) { enter_from_user_mode(regs);