From: Thomas Gleixner Date: Tue, 2 Jun 2026 09:09:25 +0000 (+0200) Subject: futex: Move futex task related data into a struct X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c1ffc9c6e4f8a13dd68e97920c9a24d095c6e41a;p=thirdparty%2Flinux.git futex: Move futex task related data into a struct Having all these members in task_struct along with the required #ifdeffery is annoying, does not allow efficient initializing of the data with memset() and makes extending it tedious. Move it into a data structure and fix up all usage sites. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mathieu Desnoyers Reviewed-by: André Almeida Link: https://patch.msgid.link/20260602090535.308220888@kernel.org --- diff --git a/Documentation/locking/robust-futexes.rst b/Documentation/locking/robust-futexes.rst index 6361fb01c9c1e..1423f53ea2f46 100644 --- a/Documentation/locking/robust-futexes.rst +++ b/Documentation/locking/robust-futexes.rst @@ -94,7 +94,7 @@ time, the kernel checks this user-space list: are there any robust futex locks to be cleaned up? In the common case, at do_exit() time, there is no list registered, so -the cost of robust futexes is just a simple current->robust_list != NULL +the cost of robust futexes is just a current->futex.robust_list != NULL comparison. If the thread has registered a list, then normally the list is empty. If the thread/process crashed or terminated in some incorrect way then the list might be non-empty: in this case the kernel carefully @@ -178,9 +178,9 @@ one to query the registered list pointer:: size_t __user *len_ptr); List registration is very fast: the pointer is simply stored in -current->robust_list. [Note that in the future, if robust futexes become -widespread, we could extend sys_clone() to register a robust-list head -for new threads, without the need of another syscall.] +current->futex.robust_list. [Note that in the future, if robust futexes +become widespread, we could extend sys_clone() to register a robust-list +head for new threads, without the need of another syscall.] So there is virtually zero overhead for tasks not using robust futexes, and even for robust futex users, there is only one extra syscall per diff --git a/include/linux/futex.h b/include/linux/futex.h index 9e9750f049805..563e8dd671798 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -64,14 +64,10 @@ enum { static inline void futex_init_task(struct task_struct *tsk) { - tsk->robust_list = NULL; -#ifdef CONFIG_COMPAT - tsk->compat_robust_list = NULL; -#endif - INIT_LIST_HEAD(&tsk->pi_state_list); - tsk->pi_state_cache = NULL; - tsk->futex_state = FUTEX_STATE_OK; - mutex_init(&tsk->futex_exit_mutex); + memset(&tsk->futex, 0, sizeof(tsk->futex)); + INIT_LIST_HEAD(&tsk->futex.pi_state_list); + tsk->futex.state = FUTEX_STATE_OK; + mutex_init(&tsk->futex.exit_mutex); } void futex_exit_recursive(struct task_struct *tsk); diff --git a/include/linux/futex_types.h b/include/linux/futex_types.h new file mode 100644 index 0000000000000..9c6c0dc4148d2 --- /dev/null +++ b/include/linux/futex_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FUTEX_TYPES_H +#define _LINUX_FUTEX_TYPES_H + +#ifdef CONFIG_FUTEX +#include +#include + +struct compat_robust_list_head; +struct futex_pi_state; +struct robust_list_head; + +/** + * struct futex_sched_data - Futex related per task data + * @robust_list: User space registered robust list pointer + * @compat_robust_list: User space registered robust list pointer for compat tasks + * @pi_state_list: List head for Priority Inheritance (PI) state management + * @pi_state_cache: Pointer to cache one PI state object per task + * @exit_mutex: Mutex for serializing exit + * @state: Futex handling state to handle exit races correctly + */ +struct futex_sched_data { + struct robust_list_head __user *robust_list; +#ifdef CONFIG_COMPAT + struct compat_robust_list_head __user *compat_robust_list; +#endif + struct list_head pi_state_list; + struct futex_pi_state *pi_state_cache; + struct mutex exit_mutex; + unsigned int state; +}; +#else +struct futex_sched_data { }; +#endif /* !CONFIG_FUTEX */ + +#endif /* _LINUX_FUTEX_TYPES_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 368c7b4d7cb51..c88fc10e9c381 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -64,7 +65,6 @@ struct bpf_net_context; struct capture_control; struct cfs_rq; struct fs_struct; -struct futex_pi_state; struct io_context; struct io_uring_task; struct mempolicy; @@ -76,7 +76,6 @@ struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; -struct robust_list_head; struct root_domain; struct rq; struct sched_attr; @@ -1331,16 +1330,9 @@ struct task_struct { u32 closid; u32 rmid; #endif -#ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; -#ifdef CONFIG_COMPAT - struct compat_robust_list_head __user *compat_robust_list; -#endif - struct list_head pi_state_list; - struct futex_pi_state *pi_state_cache; - struct mutex futex_exit_mutex; - unsigned int futex_state; -#endif + + struct futex_sched_data futex; + #ifdef CONFIG_PERF_EVENTS u8 perf_recursion[PERF_NR_CONTEXTS]; struct perf_event_context *perf_event_ctxp; diff --git a/kernel/exit.c b/kernel/exit.c index 25e9cb6de7e79..1b4e55b60bc2f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -988,8 +988,8 @@ void __noreturn do_exit(long code) proc_exit_connector(tsk); mpol_put_task_policy(tsk); #ifdef CONFIG_FUTEX - if (unlikely(current->pi_state_cache)) - kfree(current->pi_state_cache); + if (unlikely(current->futex.pi_state_cache)) + kfree(current->futex.pi_state_cache); #endif /* * Make sure we are holding no locks: diff --git a/kernel/futex/core.c b/kernel/futex/core.c index ff2a4fb2993f0..e7d33d2771eca 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -32,18 +32,19 @@ * "But they come in a choice of three flavours!" */ #include -#include -#include #include -#include +#include #include -#include +#include #include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include #include "futex.h" #include "../locking/rtmutex_common.h" @@ -829,7 +830,7 @@ void wait_for_owner_exiting(int ret, struct task_struct *exiting) if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) return; - mutex_lock(&exiting->futex_exit_mutex); + mutex_lock(&exiting->futex.exit_mutex); /* * No point in doing state checking here. If the waiter got here * while the task was in exec()->exec_futex_release() then it can @@ -838,7 +839,7 @@ void wait_for_owner_exiting(int ret, struct task_struct *exiting) * already. Highly unlikely and not a problem. Just one more round * through the futex maze. */ - mutex_unlock(&exiting->futex_exit_mutex); + mutex_unlock(&exiting->futex.exit_mutex); put_task_struct(exiting); } @@ -1047,7 +1048,7 @@ retry: * * In both cases the following conditions are met: * - * 1) task->robust_list->list_op_pending != NULL + * 1) task->futex.robust_list->list_op_pending != NULL * @pending_op == true * 2) The owner part of user space futex value == 0 * 3) Regular futex: @pi == false @@ -1152,7 +1153,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry, */ static void exit_robust_list(struct task_struct *curr) { - struct robust_list_head __user *head = curr->robust_list; + struct robust_list_head __user *head = curr->futex.robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -1246,7 +1247,7 @@ compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **ent */ static void compat_exit_robust_list(struct task_struct *curr) { - struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct compat_robust_list_head __user *head = curr->futex.compat_robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -1322,7 +1323,7 @@ static void compat_exit_robust_list(struct task_struct *curr) */ static void exit_pi_state_list(struct task_struct *curr) { - struct list_head *next, *head = &curr->pi_state_list; + struct list_head *next, *head = &curr->futex.pi_state_list; struct futex_pi_state *pi_state; union futex_key key = FUTEX_KEY_INIT; @@ -1406,19 +1407,19 @@ static inline void exit_pi_state_list(struct task_struct *curr) { } static void futex_cleanup(struct task_struct *tsk) { - if (unlikely(tsk->robust_list)) { + if (unlikely(tsk->futex.robust_list)) { exit_robust_list(tsk); - tsk->robust_list = NULL; + tsk->futex.robust_list = NULL; } #ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { + if (unlikely(tsk->futex.compat_robust_list)) { compat_exit_robust_list(tsk); - tsk->compat_robust_list = NULL; + tsk->futex.compat_robust_list = NULL; } #endif - if (unlikely(!list_empty(&tsk->pi_state_list))) + if (unlikely(!list_empty(&tsk->futex.pi_state_list))) exit_pi_state_list(tsk); } @@ -1442,23 +1443,23 @@ static void futex_cleanup(struct task_struct *tsk) void futex_exit_recursive(struct task_struct *tsk) { /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ - if (tsk->futex_state == FUTEX_STATE_EXITING) { - __assume_ctx_lock(&tsk->futex_exit_mutex); - mutex_unlock(&tsk->futex_exit_mutex); + if (tsk->futex.state == FUTEX_STATE_EXITING) { + __assume_ctx_lock(&tsk->futex.exit_mutex); + mutex_unlock(&tsk->futex.exit_mutex); } - tsk->futex_state = FUTEX_STATE_DEAD; + tsk->futex.state = FUTEX_STATE_DEAD; } static void futex_cleanup_begin(struct task_struct *tsk) - __acquires(&tsk->futex_exit_mutex) + __acquires(&tsk->futex.exit_mutex) { /* * Prevent various race issues against a concurrent incoming waiter * including live locks by forcing the waiter to block on - * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in + * tsk->futex.exit_mutex when it observes FUTEX_STATE_EXITING in * attach_to_pi_owner(). */ - mutex_lock(&tsk->futex_exit_mutex); + mutex_lock(&tsk->futex.exit_mutex); /* * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. @@ -1472,23 +1473,23 @@ static void futex_cleanup_begin(struct task_struct *tsk) * be observed in exit_pi_state_list(). */ raw_spin_lock_irq(&tsk->pi_lock); - tsk->futex_state = FUTEX_STATE_EXITING; + tsk->futex.state = FUTEX_STATE_EXITING; raw_spin_unlock_irq(&tsk->pi_lock); } static void futex_cleanup_end(struct task_struct *tsk, int state) - __releases(&tsk->futex_exit_mutex) + __releases(&tsk->futex.exit_mutex) { /* * Lockless store. The only side effect is that an observer might * take another loop until it becomes visible. */ - tsk->futex_state = state; + tsk->futex.state = state; /* * Drop the exit protection. This unblocks waiters which observed * FUTEX_STATE_EXITING to reevaluate the state. */ - mutex_unlock(&tsk->futex_exit_mutex); + mutex_unlock(&tsk->futex.exit_mutex); } void futex_exec_release(struct task_struct *tsk) diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index 643199fdbe626..e037a97fe2770 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -14,7 +14,7 @@ int refill_pi_state_cache(void) { struct futex_pi_state *pi_state; - if (likely(current->pi_state_cache)) + if (likely(current->futex.pi_state_cache)) return 0; pi_state = kzalloc_obj(*pi_state); @@ -28,17 +28,17 @@ int refill_pi_state_cache(void) refcount_set(&pi_state->refcount, 1); pi_state->key = FUTEX_KEY_INIT; - current->pi_state_cache = pi_state; + current->futex.pi_state_cache = pi_state; return 0; } static struct futex_pi_state *alloc_pi_state(void) { - struct futex_pi_state *pi_state = current->pi_state_cache; + struct futex_pi_state *pi_state = current->futex.pi_state_cache; WARN_ON(!pi_state); - current->pi_state_cache = NULL; + current->futex.pi_state_cache = NULL; return pi_state; } @@ -60,7 +60,7 @@ static void pi_state_update_owner(struct futex_pi_state *pi_state, if (new_owner) { raw_spin_lock(&new_owner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &new_owner->pi_state_list); + list_add(&pi_state->list, &new_owner->futex.pi_state_list); pi_state->owner = new_owner; raw_spin_unlock(&new_owner->pi_lock); } @@ -96,7 +96,7 @@ void put_pi_state(struct futex_pi_state *pi_state) raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } - if (current->pi_state_cache) { + if (current->futex.pi_state_cache) { kfree(pi_state); } else { /* @@ -106,7 +106,7 @@ void put_pi_state(struct futex_pi_state *pi_state) */ pi_state->owner = NULL; refcount_set(&pi_state->refcount, 1); - current->pi_state_cache = pi_state; + current->futex.pi_state_cache = pi_state; } } @@ -179,7 +179,7 @@ void put_pi_state(struct futex_pi_state *pi_state) * * p->pi_lock: * - * p->pi_state_list -> pi_state->list, relation + * p->futex.pi_state_list -> pi_state->list, relation * pi_mutex->owner -> pi_state->owner, relation * * pi_state->refcount: @@ -327,7 +327,7 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the * caller that the alleged owner is busy. */ - if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) + if (tsk && tsk->futex.state != FUTEX_STATE_DEAD) return -EBUSY; /* @@ -346,8 +346,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, * *uaddr = 0xC0000000; tsk = get_task(PID); * } if (!tsk->flags & PF_EXITING) { * ... attach(); - * tsk->futex_state = } else { - * FUTEX_STATE_DEAD; if (tsk->futex_state != + * tsk->futex.state = } else { + * FUTEX_STATE_DEAD; if (tsk->futex.state != * FUTEX_STATE_DEAD) * return -EAGAIN; * return -ESRCH; <--- FAIL @@ -396,7 +396,7 @@ static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, pi_state->key = *key; WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &p->pi_state_list); + list_add(&pi_state->list, &p->futex.pi_state_list); /* * Assignment without holding pi_state->pi_mutex.wait_lock is safe * because there is no concurrency as the object is not published yet. @@ -440,7 +440,7 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, * in futex_exit_release(), we do this protected by p->pi_lock: */ raw_spin_lock_irq(&p->pi_lock); - if (unlikely(p->futex_state != FUTEX_STATE_OK)) { + if (unlikely(p->futex.state != FUTEX_STATE_OK)) { /* * The task is on the way out. When the futex state is * FUTEX_STATE_DEAD, we know that the task has finished diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 77ad9691f6a61..8944ff4930d7c 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -25,17 +25,13 @@ * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */ -SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, - size_t, len) +SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { - /* - * The kernel knows only one size for now: - */ + /* The kernel knows only one size for now. */ if (unlikely(len != sizeof(*head))) return -EINVAL; - current->robust_list = head; - + current->futex.robust_list = head; return 0; } @@ -43,9 +39,9 @@ static inline void __user *futex_task_robust_list(struct task_struct *p, bool co { #ifdef CONFIG_COMPAT if (compat) - return p->compat_robust_list; + return p->futex.compat_robust_list; #endif - return p->robust_list; + return p->futex.robust_list; } static void __user *futex_get_robust_list_common(int pid, bool compat) @@ -475,15 +471,13 @@ SYSCALL_DEFINE4(futex_requeue, } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE2(set_robust_list, - struct compat_robust_list_head __user *, head, - compat_size_t, len) +COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, + compat_size_t, len) { if (unlikely(len != sizeof(*head))) return -EINVAL; - current->compat_robust_list = head; - + current->futex.compat_robust_list = head; return 0; } @@ -523,4 +517,3 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } #endif /* CONFIG_COMPAT_32BIT_TIME */ -