From: Greg Kroah-Hartman Date: Wed, 27 Nov 2019 12:33:44 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v4.4.204~29 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3872de64808c9e42fc954d0804d9c1f97e190f61;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: futex-prevent-robust-futex-exit-race.patch y2038-futex-move-compat-implementation-into-futex.c.patch --- diff --git a/queue-4.19/futex-prevent-exit-livelock.patch b/queue-4.19/futex-prevent-exit-livelock.patch deleted file mode 100644 index 45d6ad3f00d..00000000000 --- a/queue-4.19/futex-prevent-exit-livelock.patch +++ /dev/null @@ -1,343 +0,0 @@ -From 3ef240eaff36b8119ac9e2ea17cbf41179c930ba Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner -Date: Wed, 6 Nov 2019 22:55:46 +0100 -Subject: futex: Prevent exit livelock - -From: Thomas Gleixner - -commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream. - -Oleg provided the following test case: - -int main(void) -{ - struct sched_param sp = {}; - - sp.sched_priority = 2; - assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0); - - int lock = vfork(); - if (!lock) { - sp.sched_priority = 1; - assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0); - _exit(0); - } - - syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0); - return 0; -} - -This creates an unkillable RT process spinning in futex_lock_pi() on a UP -machine or if the process is affine to a single CPU. The reason is: - - parent child - - set FIFO prio 2 - - vfork() -> set FIFO prio 1 - implies wait_for_child() sched_setscheduler(...) - exit() - do_exit() - .... - mm_release() - tsk->futex_state = FUTEX_STATE_EXITING; - exit_futex(); (NOOP in this case) - complete() --> wakes parent - sys_futex() - loop infinite because - tsk->futex_state == FUTEX_STATE_EXITING - -The same problem can happen just by regular preemption as well: - - task holds futex - ... - do_exit() - tsk->futex_state = FUTEX_STATE_EXITING; - - --> preemption (unrelated wakeup of some other higher prio task, e.g. timer) - - switch_to(other_task) - - return to user - sys_futex() - loop infinite as above - -Just for the fun of it the futex exit cleanup could trigger the wakeup -itself before the task sets its futex state to DEAD. - -To cure this, the handling of the exiting owner is changed so: - - - A refcount is held on the task - - - The task pointer is stored in a caller visible location - - - The caller drops all locks (hash bucket, mmap_sem) and blocks - on task::futex_exit_mutex. When the mutex is acquired then - the exiting task has completed the cleanup and the state - is consistent and can be reevaluated. - -This is not a pretty solution, but there is no choice other than returning -an error code to user space, which would break the state consistency -guarantee and open another can of problems including regressions. - -For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538 -are required as well, but for anything older than 5.3.y the backports are -going to be provided when this hits mainline as the other dependencies for -those kernels are definitely not stable material. - -Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems") -Reported-by: Oleg Nesterov -Signed-off-by: Thomas Gleixner -Reviewed-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Cc: Stable Team -Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de -Signed-off-by: Greg Kroah-Hartman - ---- - kernel/futex.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 91 insertions(+), 15 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1148,6 +1148,36 @@ out_error: - return ret; - } - -+/** -+ * wait_for_owner_exiting - Block until the owner has exited -+ * @exiting: Pointer to the exiting task -+ * -+ * Caller must hold a refcount on @exiting. -+ */ -+static void wait_for_owner_exiting(int ret, struct task_struct *exiting) -+{ -+ if (ret != -EBUSY) { -+ WARN_ON_ONCE(exiting); -+ return; -+ } -+ -+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) -+ return; -+ -+ mutex_lock(&exiting->futex_exit_mutex); -+ /* -+ * No point in doing state checking here. If the waiter got here -+ * while the task was in exec()->exec_futex_release() then it can -+ * have any FUTEX_STATE_* value when the waiter has acquired the -+ * mutex. OK, if running, EXITING or DEAD if it reached exit() -+ * already. Highly unlikely and not a problem. Just one more round -+ * through the futex maze. -+ */ -+ mutex_unlock(&exiting->futex_exit_mutex); -+ -+ put_task_struct(exiting); -+} -+ - static int handle_exit_race(u32 __user *uaddr, u32 uval, - struct task_struct *tsk) - { -@@ -1207,7 +1237,8 @@ static int handle_exit_race(u32 __user * - * it after doing proper sanity checks. - */ - static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, -- struct futex_pi_state **ps) -+ struct futex_pi_state **ps, -+ struct task_struct **exiting) - { - pid_t pid = uval & FUTEX_TID_MASK; - struct futex_pi_state *pi_state; -@@ -1247,7 +1278,19 @@ static int attach_to_pi_owner(u32 __user - int ret = handle_exit_race(uaddr, uval, p); - - raw_spin_unlock_irq(&p->pi_lock); -- put_task_struct(p); -+ /* -+ * If the owner task is between FUTEX_STATE_EXITING and -+ * FUTEX_STATE_DEAD then store the task pointer and keep -+ * the reference on the task struct. The calling code will -+ * drop all locks, wait for the task to reach -+ * FUTEX_STATE_DEAD and then drop the refcount. This is -+ * required to prevent a live lock when the current task -+ * preempted the exiting task between the two states. -+ */ -+ if (ret == -EBUSY) -+ *exiting = p; -+ else -+ put_task_struct(p); - return ret; - } - -@@ -1286,7 +1329,8 @@ static int attach_to_pi_owner(u32 __user - - static int lookup_pi_state(u32 __user *uaddr, u32 uval, - struct futex_hash_bucket *hb, -- union futex_key *key, struct futex_pi_state **ps) -+ union futex_key *key, struct futex_pi_state **ps, -+ struct task_struct **exiting) - { - struct futex_q *top_waiter = futex_top_waiter(hb, key); - -@@ -1301,7 +1345,7 @@ static int lookup_pi_state(u32 __user *u - * We are the first waiter - try to look up the owner based on - * @uval and attach to it. - */ -- return attach_to_pi_owner(uaddr, uval, key, ps); -+ return attach_to_pi_owner(uaddr, uval, key, ps, exiting); - } - - static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) -@@ -1329,6 +1373,8 @@ static int lock_pi_update_atomic(u32 __u - * lookup - * @task: the task to perform the atomic lock work for. This will - * be "current" except in the case of requeue pi. -+ * @exiting: Pointer to store the task pointer of the owner task -+ * which is in the middle of exiting - * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) - * - * Return: -@@ -1337,11 +1383,17 @@ static int lock_pi_update_atomic(u32 __u - * - <0 - error - * - * The hb->lock and futex_key refs shall be held by the caller. -+ * -+ * @exiting is only set when the return value is -EBUSY. If so, this holds -+ * a refcount on the exiting task on return and the caller needs to drop it -+ * after waiting for the exit to complete. - */ - static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, - union futex_key *key, - struct futex_pi_state **ps, -- struct task_struct *task, int set_waiters) -+ struct task_struct *task, -+ struct task_struct **exiting, -+ int set_waiters) - { - u32 uval, newval, vpid = task_pid_vnr(task); - struct futex_q *top_waiter; -@@ -1411,7 +1463,7 @@ static int futex_lock_pi_atomic(u32 __us - * attach to the owner. If that fails, no harm done, we only - * set the FUTEX_WAITERS bit in the user space variable. - */ -- return attach_to_pi_owner(uaddr, newval, key, ps); -+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting); - } - - /** -@@ -1830,6 +1882,8 @@ void requeue_pi_wake_futex(struct futex_ - * @key1: the from futex key - * @key2: the to futex key - * @ps: address to store the pi_state pointer -+ * @exiting: Pointer to store the task pointer of the owner task -+ * which is in the middle of exiting - * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) - * - * Try and get the lock on behalf of the top waiter if we can do it atomically. -@@ -1837,16 +1891,20 @@ void requeue_pi_wake_futex(struct futex_ - * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. - * hb1 and hb2 must be held by the caller. - * -+ * @exiting is only set when the return value is -EBUSY. If so, this holds -+ * a refcount on the exiting task on return and the caller needs to drop it -+ * after waiting for the exit to complete. -+ * - * Return: - * - 0 - failed to acquire the lock atomically; - * - >0 - acquired the lock, return value is vpid of the top_waiter - * - <0 - error - */ --static int futex_proxy_trylock_atomic(u32 __user *pifutex, -- struct futex_hash_bucket *hb1, -- struct futex_hash_bucket *hb2, -- union futex_key *key1, union futex_key *key2, -- struct futex_pi_state **ps, int set_waiters) -+static int -+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, -+ struct futex_hash_bucket *hb2, union futex_key *key1, -+ union futex_key *key2, struct futex_pi_state **ps, -+ struct task_struct **exiting, int set_waiters) - { - struct futex_q *top_waiter = NULL; - u32 curval; -@@ -1883,7 +1941,7 @@ static int futex_proxy_trylock_atomic(u3 - */ - vpid = task_pid_vnr(top_waiter->task); - ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, -- set_waiters); -+ exiting, set_waiters); - if (ret == 1) { - requeue_pi_wake_futex(top_waiter, key2, hb2); - return vpid; -@@ -2012,6 +2070,8 @@ retry_private: - } - - if (requeue_pi && (task_count - nr_wake < nr_requeue)) { -+ struct task_struct *exiting = NULL; -+ - /* - * Attempt to acquire uaddr2 and wake the top waiter. If we - * intend to requeue waiters, force setting the FUTEX_WAITERS -@@ -2019,7 +2079,8 @@ retry_private: - * faults rather in the requeue loop below. - */ - ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, -- &key2, &pi_state, nr_requeue); -+ &key2, &pi_state, -+ &exiting, nr_requeue); - - /* - * At this point the top_waiter has either taken uaddr2 or is -@@ -2046,7 +2107,8 @@ retry_private: - * If that call succeeds then we have pi_state and an - * initial refcount on it. - */ -- ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); -+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2, -+ &pi_state, &exiting); - } - - switch (ret) { -@@ -2075,6 +2137,12 @@ retry_private: - hb_waiters_dec(hb2); - put_futex_key(&key2); - put_futex_key(&key1); -+ /* -+ * Handle the case where the owner is in the middle of -+ * exiting. Wait for the exit to complete otherwise -+ * this task might loop forever, aka. live lock. -+ */ -+ wait_for_owner_exiting(ret, exiting); - cond_resched(); - goto retry; - default: -@@ -2790,6 +2858,7 @@ static int futex_lock_pi(u32 __user *uad - { - struct hrtimer_sleeper timeout, *to = NULL; - struct futex_pi_state *pi_state = NULL; -+ struct task_struct *exiting = NULL; - struct rt_mutex_waiter rt_waiter; - struct futex_hash_bucket *hb; - struct futex_q q = futex_q_init; -@@ -2817,7 +2886,8 @@ retry: - retry_private: - hb = queue_lock(&q); - -- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); -+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, -+ &exiting, 0); - if (unlikely(ret)) { - /* - * Atomic work succeeded and we got the lock, -@@ -2839,6 +2909,12 @@ retry_private: - */ - queue_unlock(hb); - put_futex_key(&q.key); -+ /* -+ * Handle the case where the owner is in the middle of -+ * exiting. Wait for the exit to complete otherwise -+ * this task might loop forever, aka. live lock. -+ */ -+ wait_for_owner_exiting(ret, exiting); - cond_resched(); - goto retry; - default: diff --git a/queue-4.19/futex-prevent-robust-futex-exit-race.patch b/queue-4.19/futex-prevent-robust-futex-exit-race.patch new file mode 100644 index 00000000000..c551328d2df --- /dev/null +++ b/queue-4.19/futex-prevent-robust-futex-exit-race.patch @@ -0,0 +1,261 @@ +From ca16d5bee59807bf04deaab0a8eccecd5061528c Mon Sep 17 00:00:00 2001 +From: Yang Tao +Date: Wed, 6 Nov 2019 22:55:35 +0100 +Subject: futex: Prevent robust futex exit race + +From: Yang Tao + +commit ca16d5bee59807bf04deaab0a8eccecd5061528c upstream. + +Robust futexes utilize the robust_list mechanism to allow the kernel to +release futexes which are held when a task exits. The exit can be voluntary +or caused by a signal or fault. This prevents that waiters block forever. + +The futex operations in user space store a pointer to the futex they are +either locking or unlocking in the op_pending member of the per task robust +list. + +After a lock operation has succeeded the futex is queued in the robust list +linked list and the op_pending pointer is cleared. + +After an unlock operation has succeeded the futex is removed from the +robust list linked list and the op_pending pointer is cleared. + +The robust list exit code checks for the pending operation and any futex +which is queued in the linked list. It carefully checks whether the futex +value is the TID of the exiting task. If so, it sets the OWNER_DIED bit and +tries to wake up a potential waiter. + +This is race free for the lock operation but unlock has two race scenarios +where waiters might not be woken up. These issues can be observed with +regular robust pthread mutexes. PI aware pthread mutexes are not affected. + +(1) Unlocking task is killed after unlocking the futex value in user space + before being able to wake a waiter. + + pthread_mutex_unlock() + | + V + atomic_exchange_rel (&mutex->__data.__lock, 0) + <------------------------killed + lll_futex_wake () | + | + |(__lock = 0) + |(enter kernel) + | + V + do_exit() + exit_mm() + mm_release() + exit_robust_list() + handle_futex_death() + | + |(__lock = 0) + |(uval = 0) + | + V + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) + return 0; + + The sanity check which ensures that the user space futex is owned by + the exiting task prevents the wakeup of waiters which in consequence + block infinitely. + +(2) Waiting task is killed after a wakeup and before it can acquire the + futex in user space. + + OWNER WAITER + futex_wait() + pthread_mutex_unlock() | + | | + |(__lock = 0) | + | | + V | + futex_wake() ------------> wakeup() + | + |(return to userspace) + |(__lock = 0) + | + V + oldval = mutex->__data.__lock + <-----------------killed + atomic_compare_and_exchange_val_acq (&mutex->__data.__lock, | + id | assume_other_futex_waiters, 0) | + | + | + (enter kernel)| + | + V + do_exit() + | + | + V + handle_futex_death() + | + |(__lock = 0) + |(uval = 0) + | + V + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) + return 0; + + The sanity check which ensures that the user space futex is owned + by the exiting task prevents the wakeup of waiters, which seems to + be correct as the exiting task does not own the futex value, but + the consequence is that other waiters wont be woken up and block + infinitely. + +In both scenarios the following conditions are true: + + - task->robust_list->list_op_pending != NULL + - user space futex value == 0 + - Regular futex (not PI) + +If these conditions are met then it is reasonably safe to wake up a +potential waiter in order to prevent the above problems. + +As this might be a false positive it can cause spurious wakeups, but the +waiter side has to handle other types of unrelated wakeups, e.g. signals +gracefully anyway. So such a spurious wakeup will not affect the +correctness of these operations. + +This workaround must not touch the user space futex value and cannot set +the OWNER_DIED bit because the lock value is 0, i.e. uncontended. Setting +OWNER_DIED in this case would result in inconsistent state and subsequently +in malfunction of the owner died handling in user space. + +The rest of the user space state is still consistent as no other task can +observe the list_op_pending entry in the exiting tasks robust list. + +The eventually woken up waiter will observe the uncontended lock value and +take it over. + +[ tglx: Massaged changelog and comment. Made the return explicit and not + depend on the subsequent check and added constants to hand into + handle_futex_death() instead of plain numbers. Fixed a few coding + style issues. ] + +Fixes: 0771dfefc9e5 ("[PATCH] lightweight robust futexes: core") +Signed-off-by: Yang Tao +Signed-off-by: Yi Wang +Signed-off-by: Thomas Gleixner +Reviewed-by: Ingo Molnar +Acked-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1573010582-35297-1-git-send-email-wang.yi59@zte.com.cn +Link: https://lkml.kernel.org/r/20191106224555.943191378@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/futex.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 51 insertions(+), 7 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -3457,11 +3457,16 @@ err_unlock: + return ret; + } + ++/* Constants for the pending_op argument of handle_futex_death */ ++#define HANDLE_DEATH_PENDING true ++#define HANDLE_DEATH_LIST false ++ + /* + * Process a futex-list entry, check whether it's owned by the + * dying task, and do notification if so: + */ +-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) ++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, ++ bool pi, bool pending_op) + { + u32 uval, uninitialized_var(nval), mval; + int err; +@@ -3474,6 +3479,42 @@ retry: + if (get_user(uval, uaddr)) + return -1; + ++ /* ++ * Special case for regular (non PI) futexes. The unlock path in ++ * user space has two race scenarios: ++ * ++ * 1. The unlock path releases the user space futex value and ++ * before it can execute the futex() syscall to wake up ++ * waiters it is killed. ++ * ++ * 2. A woken up waiter is killed before it can acquire the ++ * futex in user space. ++ * ++ * In both cases the TID validation below prevents a wakeup of ++ * potential waiters which can cause these waiters to block ++ * forever. ++ * ++ * In both cases the following conditions are met: ++ * ++ * 1) task->robust_list->list_op_pending != NULL ++ * @pending_op == true ++ * 2) User space futex value == 0 ++ * 3) Regular futex: @pi == false ++ * ++ * If these conditions are met, it is safe to attempt waking up a ++ * potential waiter without touching the user space futex value and ++ * trying to set the OWNER_DIED bit. The user space futex value is ++ * uncontended and the rest of the user space mutex state is ++ * consistent, so a woken waiter will just take over the ++ * uncontended futex. Setting the OWNER_DIED bit would create ++ * inconsistent state and malfunction of the user space owner died ++ * handling. ++ */ ++ if (pending_op && !pi && !uval) { ++ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); ++ return 0; ++ } ++ + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) + return 0; + +@@ -3593,10 +3634,11 @@ void exit_robust_list(struct task_struct + * A pending lock might already be on the list, so + * don't process it twice: + */ +- if (entry != pending) ++ if (entry != pending) { + if (handle_futex_death((void __user *)entry + futex_offset, +- curr, pi)) ++ curr, pi, HANDLE_DEATH_LIST)) + return; ++ } + if (rc) + return; + entry = next_entry; +@@ -3610,9 +3652,10 @@ void exit_robust_list(struct task_struct + cond_resched(); + } + +- if (pending) ++ if (pending) { + handle_futex_death((void __user *)pending + futex_offset, +- curr, pip); ++ curr, pip, HANDLE_DEATH_PENDING); ++ } + } + + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, +@@ -3789,7 +3832,8 @@ void compat_exit_robust_list(struct task + if (entry != pending) { + void __user *uaddr = futex_uaddr(entry, futex_offset); + +- if (handle_futex_death(uaddr, curr, pi)) ++ if (handle_futex_death(uaddr, curr, pi, ++ HANDLE_DEATH_LIST)) + return; + } + if (rc) +@@ -3808,7 +3852,7 @@ void compat_exit_robust_list(struct task + if (pending) { + void __user *uaddr = futex_uaddr(pending, futex_offset); + +- handle_futex_death(uaddr, curr, pip); ++ handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); + } + } + diff --git a/queue-4.19/series b/queue-4.19/series index 35122dc811a..f6ed71565b9 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -275,7 +275,8 @@ selftests-x86-mov_ss_trap-fix-the-sysenter-test.patch selftests-x86-sigreturn-32-invalidate-ds-and-es-when-abusing-the-kernel.patch x86-pti-32-calculate-the-various-pti-cpu_entry_area-sizes-correctly-make-the-cpu_entry_area_pages-assert-precise.patch x86-entry-32-fix-fixup_espfix_stack-with-user-cr3.patch -futex-prevent-exit-livelock.patch +y2038-futex-move-compat-implementation-into-futex.c.patch +futex-prevent-robust-futex-exit-race.patch alsa-usb-audio-fix-null-dereference-at-parsing-badd.patch nfc-port100-handle-command-failure-cleanly.patch net-sysfs-fix-reference-count-leak-in-rx-netdev_queue_add_kobject.patch diff --git a/queue-4.19/y2038-futex-move-compat-implementation-into-futex.c.patch b/queue-4.19/y2038-futex-move-compat-implementation-into-futex.c.patch new file mode 100644 index 00000000000..a2c847f2250 --- /dev/null +++ b/queue-4.19/y2038-futex-move-compat-implementation-into-futex.c.patch @@ -0,0 +1,501 @@ +From 04e7712f4460585e5eed5b853fd8b82a9943958f Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Tue, 17 Apr 2018 16:31:07 +0200 +Subject: y2038: futex: Move compat implementation into futex.c + +From: Arnd Bergmann + +commit 04e7712f4460585e5eed5b853fd8b82a9943958f upstream. + +We are going to share the compat_sys_futex() handler between 64-bit +architectures and 32-bit architectures that need to deal with both 32-bit +and 64-bit time_t, and this is easier if both entry points are in the +same file. + +In fact, most other system call handlers do the same thing these days, so +let's follow the trend here and merge all of futex_compat.c into futex.c. + +In the process, a few minor changes have to be done to make sure everything +still makes sense: handle_futex_death() and futex_cmpxchg_enabled() become +local symbol, and the compat version of the fetch_robust_entry() function +gets renamed to compat_fetch_robust_entry() to avoid a symbol clash. + +This is intended as a purely cosmetic patch, no behavior should +change. + +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/futex.h | 8 - + kernel/Makefile | 3 + kernel/futex.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++- + kernel/futex_compat.c | 202 -------------------------------------------------- + 4 files changed, 192 insertions(+), 216 deletions(-) + +--- a/include/linux/futex.h ++++ b/include/linux/futex.h +@@ -9,9 +9,6 @@ struct inode; + struct mm_struct; + struct task_struct; + +-extern int +-handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); +- + /* + * Futexes are matched on equal values of this key. + * The key type depends on whether it's a shared or private mapping. +@@ -55,11 +52,6 @@ extern void exit_robust_list(struct task + + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); +-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +-#define futex_cmpxchg_enabled 1 +-#else +-extern int futex_cmpxchg_enabled; +-#endif + #else + static inline void exit_robust_list(struct task_struct *curr) + { +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -50,9 +50,6 @@ obj-$(CONFIG_PROFILING) += profile.o + obj-$(CONFIG_STACKTRACE) += stacktrace.o + obj-y += time/ + obj-$(CONFIG_FUTEX) += futex.o +-ifeq ($(CONFIG_COMPAT),y) +-obj-$(CONFIG_FUTEX) += futex_compat.o +-endif + obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o + obj-$(CONFIG_SMP) += smp.o + ifneq ($(CONFIG_SMP),y) +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -44,6 +44,7 @@ + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ ++#include + #include + #include + #include +@@ -173,8 +174,10 @@ + * double_lock_hb() and double_unlock_hb(), respectively. + */ + +-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG +-int __read_mostly futex_cmpxchg_enabled; ++#ifdef CONFIG_HAVE_FUTEX_CMPXCHG ++#define futex_cmpxchg_enabled 1 ++#else ++static int __read_mostly futex_cmpxchg_enabled; + #endif + + /* +@@ -3458,7 +3461,7 @@ err_unlock: + * Process a futex-list entry, check whether it's owned by the + * dying task, and do notification if so: + */ +-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) ++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) + { + u32 uval, uninitialized_var(nval), mval; + int err; +@@ -3707,6 +3710,192 @@ SYSCALL_DEFINE6(futex, u32 __user *, uad + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + ++#ifdef CONFIG_COMPAT ++/* ++ * Fetch a robust-list pointer. Bit 0 signals PI futexes: ++ */ ++static inline int ++compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, ++ compat_uptr_t __user *head, unsigned int *pi) ++{ ++ if (get_user(*uentry, head)) ++ return -EFAULT; ++ ++ *entry = compat_ptr((*uentry) & ~1); ++ *pi = (unsigned int)(*uentry) & 1; ++ ++ return 0; ++} ++ ++static void __user *futex_uaddr(struct robust_list __user *entry, ++ compat_long_t futex_offset) ++{ ++ compat_uptr_t base = ptr_to_compat(entry); ++ void __user *uaddr = compat_ptr(base + futex_offset); ++ ++ return uaddr; ++} ++ ++/* ++ * Walk curr->robust_list (very carefully, it's a userspace list!) ++ * and mark any locks found there dead, and notify any waiters. ++ * ++ * We silently return on any sign of list-walking problem. ++ */ ++void compat_exit_robust_list(struct task_struct *curr) ++{ ++ struct compat_robust_list_head __user *head = curr->compat_robust_list; ++ struct robust_list __user *entry, *next_entry, *pending; ++ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; ++ unsigned int uninitialized_var(next_pi); ++ compat_uptr_t uentry, next_uentry, upending; ++ compat_long_t futex_offset; ++ int rc; ++ ++ if (!futex_cmpxchg_enabled) ++ return; ++ ++ /* ++ * Fetch the list head (which was registered earlier, via ++ * sys_set_robust_list()): ++ */ ++ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) ++ return; ++ /* ++ * Fetch the relative futex offset: ++ */ ++ if (get_user(futex_offset, &head->futex_offset)) ++ return; ++ /* ++ * Fetch any possibly pending lock-add first, and handle it ++ * if it exists: ++ */ ++ if (compat_fetch_robust_entry(&upending, &pending, ++ &head->list_op_pending, &pip)) ++ return; ++ ++ next_entry = NULL; /* avoid warning with gcc */ ++ while (entry != (struct robust_list __user *) &head->list) { ++ /* ++ * Fetch the next entry in the list before calling ++ * handle_futex_death: ++ */ ++ rc = compat_fetch_robust_entry(&next_uentry, &next_entry, ++ (compat_uptr_t __user *)&entry->next, &next_pi); ++ /* ++ * A pending lock might already be on the list, so ++ * dont process it twice: ++ */ ++ if (entry != pending) { ++ void __user *uaddr = futex_uaddr(entry, futex_offset); ++ ++ if (handle_futex_death(uaddr, curr, pi)) ++ return; ++ } ++ if (rc) ++ return; ++ uentry = next_uentry; ++ entry = next_entry; ++ pi = next_pi; ++ /* ++ * Avoid excessively long or circular lists: ++ */ ++ if (!--limit) ++ break; ++ ++ cond_resched(); ++ } ++ if (pending) { ++ void __user *uaddr = futex_uaddr(pending, futex_offset); ++ ++ handle_futex_death(uaddr, curr, pip); ++ } ++} ++ ++COMPAT_SYSCALL_DEFINE2(set_robust_list, ++ struct compat_robust_list_head __user *, head, ++ compat_size_t, len) ++{ ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; ++ ++ if (unlikely(len != sizeof(*head))) ++ return -EINVAL; ++ ++ current->compat_robust_list = head; ++ ++ return 0; ++} ++ ++COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, ++ compat_uptr_t __user *, head_ptr, ++ compat_size_t __user *, len_ptr) ++{ ++ struct compat_robust_list_head __user *head; ++ unsigned long ret; ++ struct task_struct *p; ++ ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; ++ ++ rcu_read_lock(); ++ ++ ret = -ESRCH; ++ if (!pid) ++ p = current; ++ else { ++ p = find_task_by_vpid(pid); ++ if (!p) ++ goto err_unlock; ++ } ++ ++ ret = -EPERM; ++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) ++ goto err_unlock; ++ ++ head = p->compat_robust_list; ++ rcu_read_unlock(); ++ ++ if (put_user(sizeof(*head), len_ptr)) ++ return -EFAULT; ++ return put_user(ptr_to_compat(head), head_ptr); ++ ++err_unlock: ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, ++ struct old_timespec32 __user *, utime, u32 __user *, uaddr2, ++ u32, val3) ++{ ++ struct timespec ts; ++ ktime_t t, *tp = NULL; ++ int val2 = 0; ++ int cmd = op & FUTEX_CMD_MASK; ++ ++ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || ++ cmd == FUTEX_WAIT_BITSET || ++ cmd == FUTEX_WAIT_REQUEUE_PI)) { ++ if (compat_get_timespec(&ts, utime)) ++ return -EFAULT; ++ if (!timespec_valid(&ts)) ++ return -EINVAL; ++ ++ t = timespec_to_ktime(ts); ++ if (cmd == FUTEX_WAIT) ++ t = ktime_add_safe(ktime_get(), t); ++ tp = &t; ++ } ++ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || ++ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) ++ val2 = (int) (unsigned long) utime; ++ ++ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); ++} ++#endif /* CONFIG_COMPAT */ ++ + static void __init futex_detect_cmpxchg(void) + { + #ifndef CONFIG_HAVE_FUTEX_CMPXCHG +--- a/kernel/futex_compat.c ++++ /dev/null +@@ -1,202 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0 +-/* +- * linux/kernel/futex_compat.c +- * +- * Futex compatibililty routines. +- * +- * Copyright 2006, Red Hat, Inc., Ingo Molnar +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +- +- +-/* +- * Fetch a robust-list pointer. Bit 0 signals PI futexes: +- */ +-static inline int +-fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, +- compat_uptr_t __user *head, unsigned int *pi) +-{ +- if (get_user(*uentry, head)) +- return -EFAULT; +- +- *entry = compat_ptr((*uentry) & ~1); +- *pi = (unsigned int)(*uentry) & 1; +- +- return 0; +-} +- +-static void __user *futex_uaddr(struct robust_list __user *entry, +- compat_long_t futex_offset) +-{ +- compat_uptr_t base = ptr_to_compat(entry); +- void __user *uaddr = compat_ptr(base + futex_offset); +- +- return uaddr; +-} +- +-/* +- * Walk curr->robust_list (very carefully, it's a userspace list!) +- * and mark any locks found there dead, and notify any waiters. +- * +- * We silently return on any sign of list-walking problem. +- */ +-void compat_exit_robust_list(struct task_struct *curr) +-{ +- struct compat_robust_list_head __user *head = curr->compat_robust_list; +- struct robust_list __user *entry, *next_entry, *pending; +- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; +- unsigned int uninitialized_var(next_pi); +- compat_uptr_t uentry, next_uentry, upending; +- compat_long_t futex_offset; +- int rc; +- +- if (!futex_cmpxchg_enabled) +- return; +- +- /* +- * Fetch the list head (which was registered earlier, via +- * sys_set_robust_list()): +- */ +- if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) +- return; +- /* +- * Fetch the relative futex offset: +- */ +- if (get_user(futex_offset, &head->futex_offset)) +- return; +- /* +- * Fetch any possibly pending lock-add first, and handle it +- * if it exists: +- */ +- if (fetch_robust_entry(&upending, &pending, +- &head->list_op_pending, &pip)) +- return; +- +- next_entry = NULL; /* avoid warning with gcc */ +- while (entry != (struct robust_list __user *) &head->list) { +- /* +- * Fetch the next entry in the list before calling +- * handle_futex_death: +- */ +- rc = fetch_robust_entry(&next_uentry, &next_entry, +- (compat_uptr_t __user *)&entry->next, &next_pi); +- /* +- * A pending lock might already be on the list, so +- * dont process it twice: +- */ +- if (entry != pending) { +- void __user *uaddr = futex_uaddr(entry, futex_offset); +- +- if (handle_futex_death(uaddr, curr, pi)) +- return; +- } +- if (rc) +- return; +- uentry = next_uentry; +- entry = next_entry; +- pi = next_pi; +- /* +- * Avoid excessively long or circular lists: +- */ +- if (!--limit) +- break; +- +- cond_resched(); +- } +- if (pending) { +- void __user *uaddr = futex_uaddr(pending, futex_offset); +- +- handle_futex_death(uaddr, curr, pip); +- } +-} +- +-COMPAT_SYSCALL_DEFINE2(set_robust_list, +- struct compat_robust_list_head __user *, head, +- compat_size_t, len) +-{ +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- +- if (unlikely(len != sizeof(*head))) +- return -EINVAL; +- +- current->compat_robust_list = head; +- +- return 0; +-} +- +-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, +- compat_uptr_t __user *, head_ptr, +- compat_size_t __user *, len_ptr) +-{ +- struct compat_robust_list_head __user *head; +- unsigned long ret; +- struct task_struct *p; +- +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- +- rcu_read_lock(); +- +- ret = -ESRCH; +- if (!pid) +- p = current; +- else { +- p = find_task_by_vpid(pid); +- if (!p) +- goto err_unlock; +- } +- +- ret = -EPERM; +- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) +- goto err_unlock; +- +- head = p->compat_robust_list; +- rcu_read_unlock(); +- +- if (put_user(sizeof(*head), len_ptr)) +- return -EFAULT; +- return put_user(ptr_to_compat(head), head_ptr); +- +-err_unlock: +- rcu_read_unlock(); +- +- return ret; +-} +- +-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, +- struct compat_timespec __user *, utime, u32 __user *, uaddr2, +- u32, val3) +-{ +- struct timespec ts; +- ktime_t t, *tp = NULL; +- int val2 = 0; +- int cmd = op & FUTEX_CMD_MASK; +- +- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || +- cmd == FUTEX_WAIT_BITSET || +- cmd == FUTEX_WAIT_REQUEUE_PI)) { +- if (compat_get_timespec(&ts, utime)) +- return -EFAULT; +- if (!timespec_valid(&ts)) +- return -EINVAL; +- +- t = timespec_to_ktime(ts); +- if (cmd == FUTEX_WAIT) +- t = ktime_add_safe(ktime_get(), t); +- tp = &t; +- } +- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || +- cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) +- val2 = (int) (unsigned long) utime; +- +- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); +-}