From: Jinjie Ruan Date: Wed, 28 Jan 2026 03:19:33 +0000 (+0800) Subject: entry: Inline syscall_exit_work() and syscall_trace_enter() X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=31c9387d0d84bc1d643a0c30155b6d92d05c92fc;p=thirdparty%2Flinux.git entry: Inline syscall_exit_work() and syscall_trace_enter() After switching ARM64 to the generic entry code, a syscall_exit_work() appeared as a profiling hotspot because it is not inlined. Inlining both syscall_trace_enter() and syscall_exit_work() provides a performance gain when any of the work items is enabled. With audit enabled this results in a ~4% performance gain for perf bench basic syscall on a kunpeng920 system: | Metric | Baseline | Inlined | Change | | ---------- | ----------- | ----------- | ------ | | Total time | 2.353 [sec] | 2.264 [sec] | ↓3.8% | | usecs/op | 0.235374 | 0.226472 | ↓3.8% | | ops/sec | 4,248,588 | 4,415,554 | ↑3.9% | Small gains can be observed on x86 as well, though the generated code optimizes for the work case, which is counterproductive for high performance scenarios where such entry/exit work is usually avoided. Avoid this by marking the work check in syscall_enter_from_user_mode_work() unlikely, which is what the corresponding check in the exit path does already. [ tglx: Massage changelog and add the unlikely() ] Signed-off-by: Jinjie Ruan Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260128031934.3906955-14-ruanjinjie@huawei.com --- diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index bea207e32c58f..e67e3afe39ade 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -2,6 +2,7 @@ #ifndef __LINUX_ENTRYCOMMON_H #define __LINUX_ENTRYCOMMON_H +#include #include #include #include @@ -63,7 +64,58 @@ static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs } #endif -long syscall_trace_enter(struct pt_regs *regs, unsigned long work); +bool syscall_user_dispatch(struct pt_regs *regs); +long trace_syscall_enter(struct pt_regs *regs, long syscall); +void trace_syscall_exit(struct pt_regs *regs, long ret); + +static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) +{ + if (unlikely(audit_context())) { + unsigned long args[6]; + + syscall_get_arguments(current, regs, args); + audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); + } +} + +static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work) +{ + long syscall, ret = 0; + + /* + * Handle Syscall User Dispatch. This must comes first, since + * the ABI here can be something that doesn't make sense for + * other syscall_work features. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (syscall_user_dispatch(regs)) + return -1L; + } + + /* Handle ptrace */ + if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { + ret = arch_ptrace_report_syscall_entry(regs); + if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) + return -1L; + } + + /* Do seccomp after ptrace, to catch any tracer changes. */ + if (work & SYSCALL_WORK_SECCOMP) { + ret = __secure_computing(); + if (ret == -1L) + return ret; + } + + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) + syscall = trace_syscall_enter(regs, syscall); + + syscall_enter_audit(regs, syscall); + + return ret ? : syscall; +} /** * syscall_enter_from_user_mode_work - Check and handle work before invoking @@ -130,6 +182,19 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l return ret; } +/* + * If SYSCALL_EMU is set, then the only reason to report is when + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall + * instruction has been already reported in syscall_enter_from_user_mode(). + */ +static __always_inline bool report_single_step(unsigned long work) +{ + if (work & SYSCALL_WORK_SYSCALL_EMU) + return false; + + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; +} + /** * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit() * @@ -155,7 +220,32 @@ static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs * * Do one-time syscall specific work. */ -void syscall_exit_work(struct pt_regs *regs, unsigned long work); +static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work) +{ + bool step; + + /* + * If the syscall was rolled back due to syscall user dispatching, + * then the tracers below are not invoked for the same reason as + * the entry side was not invoked in syscall_trace_enter(): The ABI + * of these syscalls is unknown. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (unlikely(current->syscall_dispatch.on_dispatch)) { + current->syscall_dispatch.on_dispatch = false; + return; + } + } + + audit_syscall_exit(regs); + + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) + trace_syscall_exit(regs, syscall_get_return_value(current, regs)); + + step = report_single_step(work); + if (step || work & SYSCALL_WORK_SYSCALL_TRACE) + arch_ptrace_report_syscall_exit(regs, step); +} /** * syscall_exit_to_user_mode_work - Handle one time work before returning to user mode diff --git a/kernel/entry/common.h b/kernel/entry/common.h deleted file mode 100644 index f6e6d02f07feb..0000000000000 --- a/kernel/entry/common.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _COMMON_H -#define _COMMON_H - -bool syscall_user_dispatch(struct pt_regs *regs); - -#endif diff --git a/kernel/entry/syscall-common.c b/kernel/entry/syscall-common.c index bb5f61f5629d4..cd4967a9c53ee 100644 --- a/kernel/entry/syscall-common.c +++ b/kernel/entry/syscall-common.c @@ -1,103 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include -#include "common.h" #define CREATE_TRACE_POINTS #include -static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) -{ - if (unlikely(audit_context())) { - unsigned long args[6]; - - syscall_get_arguments(current, regs, args); - audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); - } -} +/* Out of line to prevent tracepoint code duplication */ -long syscall_trace_enter(struct pt_regs *regs, unsigned long work) +long trace_syscall_enter(struct pt_regs *regs, long syscall) { - long syscall, ret = 0; - + trace_sys_enter(regs, syscall); /* - * Handle Syscall User Dispatch. This must comes first, since - * the ABI here can be something that doesn't make sense for - * other syscall_work features. + * Probes or BPF hooks in the tracepoint may have changed the + * system call number. Reread it. */ - if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { - if (syscall_user_dispatch(regs)) - return -1L; - } - - /* Handle ptrace */ - if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { - ret = arch_ptrace_report_syscall_entry(regs); - if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) - return -1L; - } - - /* Do seccomp after ptrace, to catch any tracer changes. */ - if (work & SYSCALL_WORK_SECCOMP) { - ret = __secure_computing(); - if (ret == -1L) - return ret; - } - - /* Either of the above might have changed the syscall number */ - syscall = syscall_get_nr(current, regs); - - if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { - trace_sys_enter(regs, syscall); - /* - * Probes or BPF hooks in the tracepoint may have changed the - * system call number as well. - */ - syscall = syscall_get_nr(current, regs); - } - - syscall_enter_audit(regs, syscall); - - return ret ? : syscall; + return syscall_get_nr(current, regs); } -/* - * If SYSCALL_EMU is set, then the only reason to report is when - * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall - * instruction has been already reported in syscall_enter_from_user_mode(). - */ -static inline bool report_single_step(unsigned long work) +void trace_syscall_exit(struct pt_regs *regs, long ret) { - if (work & SYSCALL_WORK_SYSCALL_EMU) - return false; - - return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; -} - -void syscall_exit_work(struct pt_regs *regs, unsigned long work) -{ - bool step; - - /* - * If the syscall was rolled back due to syscall user dispatching, - * then the tracers below are not invoked for the same reason as - * the entry side was not invoked in syscall_trace_enter(): The ABI - * of these syscalls is unknown. - */ - if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { - if (unlikely(current->syscall_dispatch.on_dispatch)) { - current->syscall_dispatch.on_dispatch = false; - return; - } - } - - audit_syscall_exit(regs); - - if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, syscall_get_return_value(current, regs)); - - step = report_single_step(work); - if (step || work & SYSCALL_WORK_SYSCALL_TRACE) - arch_ptrace_report_syscall_exit(regs, step); + trace_sys_exit(regs, ret); } diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c index a9055eccb27e1..d89dffcc2d642 100644 --- a/kernel/entry/syscall_user_dispatch.c +++ b/kernel/entry/syscall_user_dispatch.c @@ -2,6 +2,8 @@ /* * Copyright (C) 2020 Collabora Ltd. */ + +#include #include #include #include @@ -15,8 +17,6 @@ #include -#include "common.h" - static void trigger_sigsys(struct pt_regs *regs) { struct kernel_siginfo info;