]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
uprobes/x86: Add uprobe syscall to speed up uprobe
authorJiri Olsa <jolsa@kernel.org>
Sun, 20 Jul 2025 11:21:19 +0000 (13:21 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Thu, 21 Aug 2025 18:09:20 +0000 (20:09 +0200)
Adding new uprobe syscall that calls uprobe handlers for given
'breakpoint' address.

The idea is that the 'breakpoint' address calls the user space
trampoline which executes the uprobe syscall.

The syscall handler reads the return address of the initial call
to retrieve the original 'breakpoint' address. With this address
we find the related uprobe object and call its consumers.

Adding the arch_uprobe_trampoline_mapping function that provides
uprobe trampoline mapping. This mapping is backed with one global
page initialized at __init time and shared by the all the mapping
instances.

We do not allow to execute uprobe syscall if the caller is not
from uprobe trampoline mapping.

The uprobe syscall ensures the consumer (bpf program) sees registers
values in the state before the trampoline was called.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20250720112133.244369-10-jolsa@kernel.org
arch/x86/entry/syscalls/syscall_64.tbl
arch/x86/kernel/uprobes.c
include/linux/syscalls.h
include/linux/uprobes.h
kernel/events/uprobes.c
kernel/sys_ni.c

index 92cf0fe2291eb99b536e4de4201aec16b3472094..ced2a1deecd7ce082b786a8e7a7b094f6b74904c 100644 (file)
 333    common  io_pgetevents           sys_io_pgetevents
 334    common  rseq                    sys_rseq
 335    common  uretprobe               sys_uretprobe
+336    common  uprobe                  sys_uprobe
 # don't use numbers 387 through 423, add new calls after the last
 # 'common' entry
 424    common  pidfd_send_signal       sys_pidfd_send_signal
index 6c4dcbdd0c3c022d7cc1ef4ef0889117e79e46df..d18e1ae599016421670b98c9212dd4b63fc1b1dc 100644 (file)
@@ -752,6 +752,145 @@ void arch_uprobe_clear_state(struct mm_struct *mm)
        hlist_for_each_entry_safe(tramp, n, &state->head_tramps, node)
                destroy_uprobe_trampoline(tramp);
 }
+
+static bool __in_uprobe_trampoline(unsigned long ip)
+{
+       struct vm_area_struct *vma = vma_lookup(current->mm, ip);
+
+       return vma && vma_is_special_mapping(vma, &tramp_mapping);
+}
+
+static bool in_uprobe_trampoline(unsigned long ip)
+{
+       struct mm_struct *mm = current->mm;
+       bool found, retry = true;
+       unsigned int seq;
+
+       rcu_read_lock();
+       if (mmap_lock_speculate_try_begin(mm, &seq)) {
+               found = __in_uprobe_trampoline(ip);
+               retry = mmap_lock_speculate_retry(mm, seq);
+       }
+       rcu_read_unlock();
+
+       if (retry) {
+               mmap_read_lock(mm);
+               found = __in_uprobe_trampoline(ip);
+               mmap_read_unlock(mm);
+       }
+       return found;
+}
+
+/*
+ * See uprobe syscall trampoline; the call to the trampoline will push
+ * the return address on the stack, the trampoline itself then pushes
+ * cx, r11 and ax.
+ */
+struct uprobe_syscall_args {
+       unsigned long ax;
+       unsigned long r11;
+       unsigned long cx;
+       unsigned long retaddr;
+};
+
+SYSCALL_DEFINE0(uprobe)
+{
+       struct pt_regs *regs = task_pt_regs(current);
+       struct uprobe_syscall_args args;
+       unsigned long ip, sp;
+       int err;
+
+       /* Allow execution only from uprobe trampolines. */
+       if (!in_uprobe_trampoline(regs->ip))
+               goto sigill;
+
+       err = copy_from_user(&args, (void __user *)regs->sp, sizeof(args));
+       if (err)
+               goto sigill;
+
+       ip = regs->ip;
+
+       /*
+        * expose the "right" values of ax/r11/cx/ip/sp to uprobe_consumer/s, plus:
+        * - adjust ip to the probe address, call saved next instruction address
+        * - adjust sp to the probe's stack frame (check trampoline code)
+        */
+       regs->ax  = args.ax;
+       regs->r11 = args.r11;
+       regs->cx  = args.cx;
+       regs->ip  = args.retaddr - 5;
+       regs->sp += sizeof(args);
+       regs->orig_ax = -1;
+
+       sp = regs->sp;
+
+       handle_syscall_uprobe(regs, regs->ip);
+
+       /*
+        * Some of the uprobe consumers has changed sp, we can do nothing,
+        * just return via iret.
+        */
+       if (regs->sp != sp) {
+               /* skip the trampoline call */
+               if (args.retaddr - 5 == regs->ip)
+                       regs->ip += 5;
+               return regs->ax;
+       }
+
+       regs->sp -= sizeof(args);
+
+       /* for the case uprobe_consumer has changed ax/r11/cx */
+       args.ax  = regs->ax;
+       args.r11 = regs->r11;
+       args.cx  = regs->cx;
+
+       /* keep return address unless we are instructed otherwise */
+       if (args.retaddr - 5 != regs->ip)
+               args.retaddr = regs->ip;
+
+       regs->ip = ip;
+
+       err = copy_to_user((void __user *)regs->sp, &args, sizeof(args));
+       if (err)
+               goto sigill;
+
+       /* ensure sysret, see do_syscall_64() */
+       regs->r11 = regs->flags;
+       regs->cx  = regs->ip;
+       return 0;
+
+sigill:
+       force_sig(SIGILL);
+       return -1;
+}
+
+asm (
+       ".pushsection .rodata\n"
+       ".balign " __stringify(PAGE_SIZE) "\n"
+       "uprobe_trampoline_entry:\n"
+       "push %rcx\n"
+       "push %r11\n"
+       "push %rax\n"
+       "movq $" __stringify(__NR_uprobe) ", %rax\n"
+       "syscall\n"
+       "pop %rax\n"
+       "pop %r11\n"
+       "pop %rcx\n"
+       "ret\n"
+       ".balign " __stringify(PAGE_SIZE) "\n"
+       ".popsection\n"
+);
+
+extern u8 uprobe_trampoline_entry[];
+
+static int __init arch_uprobes_init(void)
+{
+       tramp_mapping_pages[0] = virt_to_page(uprobe_trampoline_entry);
+       return 0;
+}
+
+late_initcall(arch_uprobes_init);
+
 #else /* 32-bit: */
 /*
  * No RIP-relative addressing on 32-bit
index 77f45e5d44139da36a5dacbf9db7b65261d13398..66c06fcdfe19e27b99eb9a187c22e022e260802f 100644 (file)
@@ -1005,6 +1005,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
 
 asmlinkage long sys_uretprobe(void);
 
+asmlinkage long sys_uprobe(void);
+
 /* pciconfig: alpha, arm, arm64, ia64, sparc */
 asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn,
                                unsigned long off, unsigned long len,
index b40d33aae0168bf5a04b3665ec39966ebc558658..b6b077cc7d0f2a1645e0b6b1263c881d8b31fb76 100644 (file)
@@ -239,6 +239,7 @@ extern unsigned long uprobe_get_trampoline_vaddr(void);
 extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len);
 extern void arch_uprobe_clear_state(struct mm_struct *mm);
 extern void arch_uprobe_init_state(struct mm_struct *mm);
+extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
index 2cd7a4c6f303bd106f3e4831427122425d0bc5d4..eb07e602b6c9ad35ca909e3e2b80c60ae29a7972 100644 (file)
@@ -2771,6 +2771,23 @@ out:
        rcu_read_unlock_trace();
 }
 
+void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr)
+{
+       struct uprobe *uprobe;
+       int is_swbp;
+
+       guard(rcu_tasks_trace)();
+
+       uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
+       if (!uprobe)
+               return;
+       if (!get_utask())
+               return;
+       if (arch_uprobe_ignore(&uprobe->arch, regs))
+               return;
+       handler_chain(uprobe, regs);
+}
+
 /*
  * Perform required fix-ups and disable singlestep.
  * Allow pending signals to take effect.
index c00a86931f8c6cb30d35a9d56cbcc5994add90e1..bf5d05c635ffd525afcb42fd780a0ab198e1eebe 100644 (file)
@@ -392,3 +392,4 @@ COND_SYSCALL(setuid16);
 COND_SYSCALL(rseq);
 
 COND_SYSCALL(uretprobe);
+COND_SYSCALL(uprobe);