From: Arusekk Date: Fri, 11 Jul 2025 16:48:29 +0000 (+0200) Subject: linux-user: Add syscall dispatch support X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c8e5aed246914ff6438839350e414a3bea5a8041;p=thirdparty%2Fqemu.git linux-user: Add syscall dispatch support This commit adds support for the `prctl(PR_SET_SYSCALL_USER_DISPATCH)` function in the Linux userspace emulator. It is implemented as a fully host-independent function, by forcing a SIGSYS early during syscall handling, if the PC is outside the allowed range. Since disabled SUD is indistinguishable from enabled SUD with always-allowed region length == ~0, this encoding is used instead of introducing a new flag. Tested on [uglendix][1], will probably also apply to software like tiny-wine, rpcsx, limbo, lazypoline, vicar, sysfail and endokernel, to name a few. [1]: https://sr.ht/~arusekk/uglendix Signed-off-by: Arusekk Message-ID: <20250711225226.14652-1-floss@arusekk.pl> [rth: Split out is_vdso_sigreturn region matching and other minor tweaks.] Signed-off-by: Richard Henderson --- diff --git a/linux-user/main.c b/linux-user/main.c index 4ddfc9a619c..db751c07576 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -228,6 +228,8 @@ void init_task_state(TaskState *ts) ts->start_boottime += bt.tv_nsec * (uint64_t) ticks_per_sec / NANOSECONDS_PER_SECOND; } + + ts->sys_dispatch_len = -1; } CPUArchState *cpu_copy(CPUArchState *env) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index e4dca0c20f0..cabb7bd6a8b 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -155,6 +155,11 @@ struct TaskState { /* This thread's sigaltstack, if it has one */ struct target_sigaltstack sigaltstack_used; + /* This thread's SYSCALL_USER_DISPATCH state, len=~0 means disabled */ + vaddr sys_dispatch; + vaddr sys_dispatch_selector; + abi_ulong sys_dispatch_len; + /* Start time of task after system boot in clock ticks */ uint64_t start_boottime; }; diff --git a/linux-user/signal-common.h b/linux-user/signal-common.h index 0b04868727e..8a44714251a 100644 --- a/linux-user/signal-common.h +++ b/linux-user/signal-common.h @@ -28,6 +28,11 @@ extern abi_ulong default_rt_sigreturn; extern abi_ulong vdso_sigreturn_region_start; extern abi_ulong vdso_sigreturn_region_end; +static inline bool is_vdso_sigreturn(abi_ulong pc) +{ + return pc >= vdso_sigreturn_region_start && pc < vdso_sigreturn_region_end; +} + void setup_sigtramp(abi_ulong tramp_page); int on_sig_stack(unsigned long sp); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 91360a072c7..9098cdb9faa 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6344,6 +6344,10 @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr) #endif #ifndef PR_SET_SYSCALL_USER_DISPATCH # define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif #ifndef PR_SME_SET_VL # define PR_SME_SET_VL 63 @@ -6398,6 +6402,36 @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2) #define do_prctl_sme_set_vl do_prctl_inval1 #endif +static abi_long do_prctl_syscall_user_dispatch(CPUArchState *env, + abi_ulong arg2, abi_ulong arg3, + abi_ulong arg4, abi_ulong arg5) +{ + CPUState *cpu = env_cpu(env); + TaskState *ts = get_task_state(cpu); + + switch (arg2) { + case PR_SYS_DISPATCH_OFF: + if (arg3 || arg4 || arg5) { + return -TARGET_EINVAL; + } + ts->sys_dispatch_len = -1; + return 0; + case PR_SYS_DISPATCH_ON: + if (arg3 && arg3 + arg4 <= arg3) { + return -TARGET_EINVAL; + } + if (arg5 && !access_ok(cpu, VERIFY_READ, arg5, 1)) { + return -TARGET_EFAULT; + } + ts->sys_dispatch = arg3; + ts->sys_dispatch_len = arg4; + ts->sys_dispatch_selector = arg5; + return 0; + default: + return -TARGET_EINVAL; + } +} + static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5) { @@ -6473,6 +6507,9 @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, case PR_SET_UNALIGN: return do_prctl_set_unalign(env, arg2); + case PR_SET_SYSCALL_USER_DISPATCH: + return do_prctl_syscall_user_dispatch(env, arg2, arg3, arg4, arg5); + case PR_CAP_AMBIENT: case PR_CAPBSET_READ: case PR_CAPBSET_DROP: @@ -6527,7 +6564,6 @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, case PR_SET_MM: case PR_GET_SECCOMP: case PR_SET_SECCOMP: - case PR_SET_SYSCALL_USER_DISPATCH: case PR_GET_THP_DISABLE: case PR_SET_THP_DISABLE: case PR_GET_TSC: @@ -13897,12 +13933,46 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, return ret; } +static bool sys_dispatch(CPUState *cpu, TaskState *ts) +{ + abi_ptr pc; + + if (likely(ts->sys_dispatch_len == -1)) { + return false; + } + + pc = cpu->cc->get_pc(cpu); + if (likely(pc - ts->sys_dispatch < ts->sys_dispatch_len)) { + return false; + } + if (unlikely(is_vdso_sigreturn(pc))) { + return false; + } + if (likely(ts->sys_dispatch_selector)) { + uint8_t sb; + if (get_user_u8(sb, ts->sys_dispatch_selector)) { + force_sig(TARGET_SIGSEGV); + return true; + } + if (likely(sb == SYSCALL_DISPATCH_FILTER_ALLOW)) { + return false; + } + if (unlikely(sb != SYSCALL_DISPATCH_FILTER_BLOCK)) { + force_sig(TARGET_SIGSYS); + return true; + } + } + force_sig_fault(TARGET_SIGSYS, TARGET_SYS_USER_DISPATCH, pc); + return true; +} + abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1, abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6, abi_long arg7, abi_long arg8) { CPUState *cpu = env_cpu(cpu_env); + TaskState *ts = get_task_state(cpu); abi_long ret; #ifdef DEBUG_ERESTARTSYS @@ -13919,6 +13989,10 @@ abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1, } #endif + if (sys_dispatch(cpu, ts)) { + return -QEMU_ESIGRETURN; + } + record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index df26a2d28f2..cd9ff709b87 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -689,6 +689,12 @@ typedef struct target_siginfo { #define TARGET_TRAP_HWBKPT (4) /* hardware breakpoint/watchpoint */ #define TARGET_TRAP_UNK (5) /* undiagnosed trap */ +/* + * SIGSYS si_codes + */ +#define TARGET_SYS_SECCOMP (1) /* seccomp triggered */ +#define TARGET_SYS_USER_DISPATCH (2) /* syscall user dispatch triggered */ + /* * SIGEMT si_codes */