]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
tracing: Replace syscall RCU pointer assignment with READ/WRITE_ONCE()
authorSteven Rostedt <rostedt@goodmis.org>
Tue, 23 Sep 2025 13:04:58 +0000 (09:04 -0400)
committerSteven Rostedt (Google) <rostedt@goodmis.org>
Tue, 23 Sep 2025 13:29:29 +0000 (09:29 -0400)
The syscall events are pseudo events that hook to the raw syscalls. The
ftrace_syscall_enter/exit() callback is called by the raw_syscall
enter/exit tracepoints respectively whenever any of the syscall events are
enabled.

The trace_array has an array of syscall "files" that correspond to the
system calls based on their __NR_SYSCALL number. The array is read and if
there's a pointer to a trace_event_file then it is considered enabled and
if it is NULL that syscall event is considered disabled.

Currently it uses an rcu_dereference_sched() to get this pointer and a
rcu_assign_ptr() or RCU_INIT_POINTER() to write to it. This is unnecessary
as the file pointer will not go away outside the synchronization of the
tracepoint logic itself. And this code adds no extra RCU synchronization
that uses this.

Replace these functions with a simple READ_ONCE() and WRITE_ONCE() which
is all they need. This will also allow this code to not depend on
preemption being disabled as system call tracepoints are now allowed to
fault.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Takaya Saeki <takayas@google.com>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Douglas Raillard <douglas.raillard@arm.com>
Link: https://lore.kernel.org/20250923130713.594320290@kernel.org
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
kernel/trace/trace.h
kernel/trace/trace_syscalls.c

index 5f4bed5842f92803c399e105344e53290ad19868..85eabb454bee9fc7565f20dd495ceebe8a0887f9 100644 (file)
@@ -380,8 +380,8 @@ struct trace_array {
 #ifdef CONFIG_FTRACE_SYSCALLS
        int                     sys_refcount_enter;
        int                     sys_refcount_exit;
-       struct trace_event_file __rcu *enter_syscall_files[NR_syscalls];
-       struct trace_event_file __rcu *exit_syscall_files[NR_syscalls];
+       struct trace_event_file *enter_syscall_files[NR_syscalls];
+       struct trace_event_file *exit_syscall_files[NR_syscalls];
 #endif
        int                     stop_count;
        int                     clock_id;
index 46aab0ab9350a204b47c8eec4f575af627a58a40..3a0b65f891300275a030358fb727223210a64db7 100644 (file)
@@ -310,8 +310,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
        if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
                return;
 
-       /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
-       trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
+       trace_file = READ_ONCE(tr->enter_syscall_files[syscall_nr]);
        if (!trace_file)
                return;
 
@@ -356,8 +355,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
        if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
                return;
 
-       /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
-       trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
+       trace_file = READ_ONCE(tr->exit_syscall_files[syscall_nr]);
        if (!trace_file)
                return;
 
@@ -393,7 +391,7 @@ static int reg_event_syscall_enter(struct trace_event_file *file,
        if (!tr->sys_refcount_enter)
                ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
        if (!ret) {
-               rcu_assign_pointer(tr->enter_syscall_files[num], file);
+               WRITE_ONCE(tr->enter_syscall_files[num], file);
                tr->sys_refcount_enter++;
        }
        mutex_unlock(&syscall_trace_lock);
@@ -411,7 +409,7 @@ static void unreg_event_syscall_enter(struct trace_event_file *file,
                return;
        mutex_lock(&syscall_trace_lock);
        tr->sys_refcount_enter--;
-       RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
+       WRITE_ONCE(tr->enter_syscall_files[num], NULL);
        if (!tr->sys_refcount_enter)
                unregister_trace_sys_enter(ftrace_syscall_enter, tr);
        mutex_unlock(&syscall_trace_lock);
@@ -431,7 +429,7 @@ static int reg_event_syscall_exit(struct trace_event_file *file,
        if (!tr->sys_refcount_exit)
                ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
        if (!ret) {
-               rcu_assign_pointer(tr->exit_syscall_files[num], file);
+               WRITE_ONCE(tr->exit_syscall_files[num], file);
                tr->sys_refcount_exit++;
        }
        mutex_unlock(&syscall_trace_lock);
@@ -449,7 +447,7 @@ static void unreg_event_syscall_exit(struct trace_event_file *file,
                return;
        mutex_lock(&syscall_trace_lock);
        tr->sys_refcount_exit--;
-       RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
+       WRITE_ONCE(tr->exit_syscall_files[num], NULL);
        if (!tr->sys_refcount_exit)
                unregister_trace_sys_exit(ftrace_syscall_exit, tr);
        mutex_unlock(&syscall_trace_lock);