From: Greg Kroah-Hartman Date: Fri, 12 Mar 2021 10:14:21 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.4.262~122 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=939d04df2c8ebed19665e31e6d04f1397b8d562b;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: cifs-return-proper-error-code-in-statfs-2.patch floppy-fix-lock_fdc-signal-handling.patch futex-change-locking-rules.patch futex-cure-exit-race.patch futex-fix-dead-code-in-attach_to_pi_owner.patch revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch --- diff --git a/queue-4.4/cifs-return-proper-error-code-in-statfs-2.patch b/queue-4.4/cifs-return-proper-error-code-in-statfs-2.patch new file mode 100644 index 00000000000..d3a915e7573 --- /dev/null +++ b/queue-4.4/cifs-return-proper-error-code-in-statfs-2.patch @@ -0,0 +1,35 @@ +From 14302ee3301b3a77b331cc14efb95bf7184c73cc Mon Sep 17 00:00:00 2001 +From: Paulo Alcantara +Date: Mon, 8 Mar 2021 12:00:49 -0300 +Subject: cifs: return proper error code in statfs(2) + +From: Paulo Alcantara + +commit 14302ee3301b3a77b331cc14efb95bf7184c73cc upstream. + +In cifs_statfs(), if server->ops->queryfs is not NULL, then we should +use its return value rather than always returning 0. Instead, use rc +variable as it is properly set to 0 in case there is no +server->ops->queryfs. + +Signed-off-by: Paulo Alcantara (SUSE) +Reviewed-by: Aurelien Aptel +Reviewed-by: Ronnie Sahlberg +CC: +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/cifs/cifsfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/cifs/cifsfs.c ++++ b/fs/cifs/cifsfs.c +@@ -204,7 +204,7 @@ cifs_statfs(struct dentry *dentry, struc + rc = server->ops->queryfs(xid, tcon, buf); + + free_xid(xid); +- return 0; ++ return rc; + } + + static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) diff --git a/queue-4.4/floppy-fix-lock_fdc-signal-handling.patch b/queue-4.4/floppy-fix-lock_fdc-signal-handling.patch new file mode 100644 index 00000000000..31db29e7793 --- /dev/null +++ b/queue-4.4/floppy-fix-lock_fdc-signal-handling.patch @@ -0,0 +1,186 @@ +From a0c80efe5956ccce9fe7ae5c78542578c07bc20a Mon Sep 17 00:00:00 2001 +From: Jiri Kosina +Date: Mon, 1 Feb 2016 11:19:17 +0100 +Subject: floppy: fix lock_fdc() signal handling + +From: Jiri Kosina + +commit a0c80efe5956ccce9fe7ae5c78542578c07bc20a upstream. + +floppy_revalidate() doesn't perform any error handling on lock_fdc() +result. lock_fdc() might actually be interrupted by a signal (it waits for +fdc becoming non-busy interruptibly). In such case, floppy_revalidate() +proceeds as if it had claimed the lock, but it fact it doesn't. + +In case of multiple threads trying to open("/dev/fdX"), this leads to +serious corruptions all over the place, because all of a sudden there is +no critical section protection (that'd otherwise be guaranteed by locked +fd) whatsoever. + +While at this, fix the fact that the 'interruptible' parameter to +lock_fdc() doesn't make any sense whatsoever, because we always wait +interruptibly anyway. + +Most of the lock_fdc() callsites do properly handle error (and propagate +EINTR), but floppy_revalidate() and floppy_check_events() don't. Fix this. + +Spotted by 'syzkaller' tool. + +Reported-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Signed-off-by: Jiri Kosina +Cc: Wade Mealing +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/floppy.c | 35 +++++++++++++++++++---------------- + 1 file changed, 19 insertions(+), 16 deletions(-) + +--- a/drivers/block/floppy.c ++++ b/drivers/block/floppy.c +@@ -870,7 +870,7 @@ static void set_fdc(int drive) + } + + /* locks the driver */ +-static int lock_fdc(int drive, bool interruptible) ++static int lock_fdc(int drive) + { + if (WARN(atomic_read(&usage_count) == 0, + "Trying to lock fdc while usage count=0\n")) +@@ -2180,7 +2180,7 @@ static int do_format(int drive, struct f + { + int ret; + +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + + set_floppy(drive); +@@ -2967,7 +2967,7 @@ static int user_reset_fdc(int drive, int + { + int ret; + +- if (lock_fdc(drive, interruptible)) ++ if (lock_fdc(drive)) + return -EINTR; + + if (arg == FD_RESET_ALWAYS) +@@ -3254,7 +3254,7 @@ static int set_geometry(unsigned int cmd + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + mutex_lock(&open_lock); +- if (lock_fdc(drive, true)) { ++ if (lock_fdc(drive)) { + mutex_unlock(&open_lock); + return -EINTR; + } +@@ -3274,7 +3274,7 @@ static int set_geometry(unsigned int cmd + } else { + int oldStretch; + +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + if (cmd != FDDEFPRM) { + /* notice a disk change immediately, else +@@ -3360,7 +3360,7 @@ static int get_floppy_geometry(int drive + if (type) + *g = &floppy_type[type]; + else { +- if (lock_fdc(drive, false)) ++ if (lock_fdc(drive)) + return -EINTR; + if (poll_drive(false, 0) == -EINTR) + return -EINTR; +@@ -3462,7 +3462,7 @@ static int fd_locked_ioctl(struct block_ + if (UDRS->fd_ref != 1) + /* somebody else has this drive open */ + return -EBUSY; +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + + /* do the actual eject. Fails on +@@ -3474,7 +3474,7 @@ static int fd_locked_ioctl(struct block_ + process_fd_request(); + return ret; + case FDCLRPRM: +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + current_type[drive] = NULL; + floppy_sizes[drive] = MAX_DISK_SIZE << 1; +@@ -3499,7 +3499,7 @@ static int fd_locked_ioctl(struct block_ + UDP->flags &= ~FTD_MSG; + return 0; + case FDFMTBEG: +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR) + return -EINTR; +@@ -3516,7 +3516,7 @@ static int fd_locked_ioctl(struct block_ + return do_format(drive, &inparam.f); + case FDFMTEND: + case FDFLUSH: +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + return invalidate_drive(bdev); + case FDSETEMSGTRESH: +@@ -3542,7 +3542,7 @@ static int fd_locked_ioctl(struct block_ + outparam = UDP; + break; + case FDPOLLDRVSTAT: +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR) + return -EINTR; +@@ -3565,7 +3565,7 @@ static int fd_locked_ioctl(struct block_ + case FDRAWCMD: + if (type) + return -EINVAL; +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + set_floppy(drive); + i = raw_cmd_ioctl(cmd, (void __user *)param); +@@ -3574,7 +3574,7 @@ static int fd_locked_ioctl(struct block_ + process_fd_request(); + return i; + case FDTWADDLE: +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + return -EINTR; + twaddle(); + process_fd_request(); +@@ -3801,7 +3801,7 @@ static int compat_getdrvstat(int drive, + mutex_lock(&floppy_mutex); + + if (poll) { +- if (lock_fdc(drive, true)) ++ if (lock_fdc(drive)) + goto Eintr; + if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR) + goto Eintr; +@@ -4109,7 +4109,8 @@ static unsigned int floppy_check_events( + return DISK_EVENT_MEDIA_CHANGE; + + if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { +- lock_fdc(drive, false); ++ if (lock_fdc(drive)) ++ return -EINTR; + poll_drive(false, 0); + process_fd_request(); + } +@@ -4208,7 +4209,9 @@ static int floppy_revalidate(struct gend + "VFS: revalidate called on non-open device.\n")) + return -EFAULT; + +- lock_fdc(drive, false); ++ res = lock_fdc(drive); ++ if (res) ++ return res; + cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || + test_bit(FD_VERIFY_BIT, &UDRS->flags)); + if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) { diff --git a/queue-4.4/futex-change-locking-rules.patch b/queue-4.4/futex-change-locking-rules.patch new file mode 100644 index 00000000000..a507fbef2a1 --- /dev/null +++ b/queue-4.4/futex-change-locking-rules.patch @@ -0,0 +1,327 @@ +From foo@baz Fri Mar 12 11:10:32 AM CET 2021 +From: Zheng Yejian +Date: Thu, 11 Mar 2021 11:25:58 +0800 +Subject: futex: Change locking rules +To: , , , +Cc: , , , , +Message-ID: <20210311032600.2326035-2-zhengyejian1@huawei.com> + +From: Peter Zijlstra + +commit 734009e96d1983ad739e5b656e03430b3660c913 upstream. + +This patch comes directly from an origin patch (commit +dc3f2ff11740159080f2e8e359ae0ab57c8e74b6) in v4.9. + +Currently futex-pi relies on hb->lock to serialize everything. But hb->lock +creates another set of problems, especially priority inversions on RT where +hb->lock becomes a rt_mutex itself. + +The rt_mutex::wait_lock is the most obvious protection for keeping the +futex user space value and the kernel internal pi_state in sync. + +Rework and document the locking so rt_mutex::wait_lock is held accross all +operations which modify the user space value and the pi state. + +This allows to invoke rt_mutex_unlock() (including deboost) without holding +hb->lock as a next step. + +Nothing yet relies on the new locking rules. + +Signed-off-by: Peter Zijlstra (Intel) +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: dvhart@infradead.org +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170322104151.751993333@infradead.org +Signed-off-by: Thomas Gleixner +[Lee: Back-ported in support of a previous futex back-port attempt] +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Zheng Yejian +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 112 insertions(+), 26 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1016,6 +1016,39 @@ static void exit_pi_state_list(struct ta + * [10] There is no transient state which leaves owner and user space + * TID out of sync. Except one error case where the kernel is denied + * write access to the user address, see fixup_pi_state_owner(). ++ * ++ * ++ * Serialization and lifetime rules: ++ * ++ * hb->lock: ++ * ++ * hb -> futex_q, relation ++ * futex_q -> pi_state, relation ++ * ++ * (cannot be raw because hb can contain arbitrary amount ++ * of futex_q's) ++ * ++ * pi_mutex->wait_lock: ++ * ++ * {uval, pi_state} ++ * ++ * (and pi_mutex 'obviously') ++ * ++ * p->pi_lock: ++ * ++ * p->pi_state_list -> pi_state->list, relation ++ * ++ * pi_state->refcount: ++ * ++ * pi_state lifetime ++ * ++ * ++ * Lock order: ++ * ++ * hb->lock ++ * pi_mutex->wait_lock ++ * p->pi_lock ++ * + */ + + /* +@@ -1023,10 +1056,12 @@ static void exit_pi_state_list(struct ta + * the pi_state against the user space value. If correct, attach to + * it. + */ +-static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, ++static int attach_to_pi_state(u32 __user *uaddr, u32 uval, ++ struct futex_pi_state *pi_state, + struct futex_pi_state **ps) + { + pid_t pid = uval & FUTEX_TID_MASK; ++ int ret, uval2; + + /* + * Userspace might have messed up non-PI and PI futexes [3] +@@ -1034,9 +1069,34 @@ static int attach_to_pi_state(u32 uval, + if (unlikely(!pi_state)) + return -EINVAL; + ++ /* ++ * We get here with hb->lock held, and having found a ++ * futex_top_waiter(). This means that futex_lock_pi() of said futex_q ++ * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), ++ * which in turn means that futex_lock_pi() still has a reference on ++ * our pi_state. ++ */ + WARN_ON(!atomic_read(&pi_state->refcount)); + + /* ++ * Now that we have a pi_state, we can acquire wait_lock ++ * and do the state validation. ++ */ ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ ++ /* ++ * Since {uval, pi_state} is serialized by wait_lock, and our current ++ * uval was read without holding it, it can have changed. Verify it ++ * still is what we expect it to be, otherwise retry the entire ++ * operation. ++ */ ++ if (get_futex_value_locked(&uval2, uaddr)) ++ goto out_efault; ++ ++ if (uval != uval2) ++ goto out_eagain; ++ ++ /* + * Handle the owner died case: + */ + if (uval & FUTEX_OWNER_DIED) { +@@ -1051,11 +1111,11 @@ static int attach_to_pi_state(u32 uval, + * is not 0. Inconsistent state. [5] + */ + if (pid) +- return -EINVAL; ++ goto out_einval; + /* + * Take a ref on the state and return success. [4] + */ +- goto out_state; ++ goto out_attach; + } + + /* +@@ -1067,14 +1127,14 @@ static int attach_to_pi_state(u32 uval, + * Take a ref on the state and return success. [6] + */ + if (!pid) +- goto out_state; ++ goto out_attach; + } else { + /* + * If the owner died bit is not set, then the pi_state + * must have an owner. [7] + */ + if (!pi_state->owner) +- return -EINVAL; ++ goto out_einval; + } + + /* +@@ -1083,11 +1143,29 @@ static int attach_to_pi_state(u32 uval, + * user space TID. [9/10] + */ + if (pid != task_pid_vnr(pi_state->owner)) +- return -EINVAL; +-out_state: ++ goto out_einval; ++ ++out_attach: + atomic_inc(&pi_state->refcount); ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + *ps = pi_state; + return 0; ++ ++out_einval: ++ ret = -EINVAL; ++ goto out_error; ++ ++out_eagain: ++ ret = -EAGAIN; ++ goto out_error; ++ ++out_efault: ++ ret = -EFAULT; ++ goto out_error; ++ ++out_error: ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ return ret; + } + + /** +@@ -1180,6 +1258,9 @@ static int attach_to_pi_owner(u32 uval, + + /* + * No existing pi state. First waiter. [2] ++ * ++ * This creates pi_state, we have hb->lock held, this means nothing can ++ * observe this state, wait_lock is irrelevant. + */ + pi_state = alloc_pi_state(); + +@@ -1204,7 +1285,8 @@ static int attach_to_pi_owner(u32 uval, + return 0; + } + +-static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, ++static int lookup_pi_state(u32 __user *uaddr, u32 uval, ++ struct futex_hash_bucket *hb, + union futex_key *key, struct futex_pi_state **ps, + struct task_struct **exiting) + { +@@ -1215,7 +1297,7 @@ static int lookup_pi_state(u32 uval, str + * attach to the pi_state when the validation succeeds. + */ + if (match) +- return attach_to_pi_state(uval, match->pi_state, ps); ++ return attach_to_pi_state(uaddr, uval, match->pi_state, ps); + + /* + * We are the first waiter - try to look up the owner based on +@@ -1234,7 +1316,7 @@ static int lock_pi_update_atomic(u32 __u + if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) + return -EFAULT; + +- /*If user space value changed, let the caller retry */ ++ /* If user space value changed, let the caller retry */ + return curval != uval ? -EAGAIN : 0; + } + +@@ -1298,7 +1380,7 @@ static int futex_lock_pi_atomic(u32 __us + */ + match = futex_top_waiter(hb, key); + if (match) +- return attach_to_pi_state(uval, match->pi_state, ps); ++ return attach_to_pi_state(uaddr, uval, match->pi_state, ps); + + /* + * No waiter and user TID is 0. We are here because the +@@ -1438,6 +1520,7 @@ static int wake_futex_pi(u32 __user *uad + + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { + ret = -EFAULT; ++ + } else if (curval != uval) { + /* + * If a unconditional UNLOCK_PI operation (user space did not +@@ -1971,7 +2054,7 @@ retry_private: + * rereading and handing potential crap to + * lookup_pi_state. + */ +- ret = lookup_pi_state(ret, hb2, &key2, ++ ret = lookup_pi_state(uaddr2, ret, hb2, &key2, + &pi_state, &exiting); + } + +@@ -2249,7 +2332,6 @@ static int __fixup_pi_state_owner(u32 __ + int err = 0; + + oldowner = pi_state->owner; +- + /* + * We are here because either: + * +@@ -2268,11 +2350,10 @@ static int __fixup_pi_state_owner(u32 __ + * because we can fault here. Imagine swapped out pages or a fork + * that marked all the anonymous memory readonly for cow. + * +- * Modifying pi_state _before_ the user space value would +- * leave the pi_state in an inconsistent state when we fault +- * here, because we need to drop the hash bucket lock to +- * handle the fault. This might be observed in the PID check +- * in lookup_pi_state. ++ * Modifying pi_state _before_ the user space value would leave the ++ * pi_state in an inconsistent state when we fault here, because we ++ * need to drop the locks to handle the fault. This might be observed ++ * in the PID check in lookup_pi_state. + */ + retry: + if (!argowner) { +@@ -2333,21 +2414,26 @@ retry: + return argowner == current; + + /* +- * To handle the page fault we need to drop the hash bucket +- * lock here. That gives the other task (either the highest priority +- * waiter itself or the task which stole the rtmutex) the +- * chance to try the fixup of the pi_state. So once we are +- * back from handling the fault we need to check the pi_state +- * after reacquiring the hash bucket lock and before trying to +- * do another fixup. When the fixup has been done already we +- * simply return. ++ * To handle the page fault we need to drop the locks here. That gives ++ * the other task (either the highest priority waiter itself or the ++ * task which stole the rtmutex) the chance to try the fixup of the ++ * pi_state. So once we are back from handling the fault we need to ++ * check the pi_state after reacquiring the locks and before trying to ++ * do another fixup. When the fixup has been done already we simply ++ * return. ++ * ++ * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely ++ * drop hb->lock since the caller owns the hb -> futex_q relation. ++ * Dropping the pi_mutex->wait_lock requires the state revalidate. + */ + handle_fault: ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + spin_unlock(q->lock_ptr); + + err = fault_in_user_writeable(uaddr); + + spin_lock(q->lock_ptr); ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + + /* + * Check if someone else fixed it for us: diff --git a/queue-4.4/futex-cure-exit-race.patch b/queue-4.4/futex-cure-exit-race.patch new file mode 100644 index 00000000000..e06e15fb002 --- /dev/null +++ b/queue-4.4/futex-cure-exit-race.patch @@ -0,0 +1,192 @@ +From foo@baz Fri Mar 12 11:10:32 AM CET 2021 +From: Zheng Yejian +Date: Thu, 11 Mar 2021 11:25:59 +0800 +Subject: futex: Cure exit race +To: , , , +Cc: , , , , +Message-ID: <20210311032600.2326035-3-zhengyejian1@huawei.com> + +From: Thomas Gleixner + +commit da791a667536bf8322042e38ca85d55a78d3c273 upstream. + +This patch comes directly from an origin patch (commit +9c3f3986036760c48a92f04b36774aa9f63673f80) in v4.9. + +Stefan reported, that the glibc tst-robustpi4 test case fails +occasionally. That case creates the following race between +sys_exit() and sys_futex_lock_pi(): + + CPU0 CPU1 + + sys_exit() sys_futex() + do_exit() futex_lock_pi() + exit_signals(tsk) No waiters: + tsk->flags |= PF_EXITING; *uaddr == 0x00000PID + mm_release(tsk) Set waiter bit + exit_robust_list(tsk) { *uaddr = 0x80000PID; + Set owner died attach_to_pi_owner() { + *uaddr = 0xC0000000; tsk = get_task(PID); + } if (!tsk->flags & PF_EXITING) { + ... attach(); + tsk->flags |= PF_EXITPIDONE; } else { + if (!(tsk->flags & PF_EXITPIDONE)) + return -EAGAIN; + return -ESRCH; <--- FAIL + } + +ESRCH is returned all the way to user space, which triggers the glibc test +case assert. Returning ESRCH unconditionally is wrong here because the user +space value has been changed by the exiting task to 0xC0000000, i.e. the +FUTEX_OWNER_DIED bit is set and the futex PID value has been cleared. This +is a valid state and the kernel has to handle it, i.e. taking the futex. + +Cure it by rereading the user space value when PF_EXITING and PF_EXITPIDONE +is set in the task which 'owns' the futex. If the value has changed, let +the kernel retry the operation, which includes all regular sanity checks +and correctly handles the FUTEX_OWNER_DIED case. + +If it hasn't changed, then return ESRCH as there is no way to distinguish +this case from malfunctioning user space. This happens when the exiting +task did not have a robust list, the robust list was corrupted or the user +space value in the futex was simply bogus. + +Reported-by: Stefan Liebler +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra +Cc: Heiko Carstens +Cc: Darren Hart +Cc: Ingo Molnar +Cc: Sasha Levin +Cc: stable@vger.kernel.org +Link: https://bugzilla.kernel.org/show_bug.cgi?id=200467 +Link: https://lkml.kernel.org/r/20181210152311.986181245@linutronix.de +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +[Lee: Required to satisfy functional dependency from futex back-port. + Re-add the missing handle_exit_race() parts from: + 3d4775df0a89 ("futex: Replace PF_EXITPIDONE with a state")] +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Zheng Yejian +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 65 insertions(+), 6 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1198,11 +1198,67 @@ static void wait_for_owner_exiting(int r + put_task_struct(exiting); + } + ++static int handle_exit_race(u32 __user *uaddr, u32 uval, ++ struct task_struct *tsk) ++{ ++ u32 uval2; ++ ++ /* ++ * If the futex exit state is not yet FUTEX_STATE_DEAD, wait ++ * for it to finish. ++ */ ++ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) ++ return -EAGAIN; ++ ++ /* ++ * Reread the user space value to handle the following situation: ++ * ++ * CPU0 CPU1 ++ * ++ * sys_exit() sys_futex() ++ * do_exit() futex_lock_pi() ++ * futex_lock_pi_atomic() ++ * exit_signals(tsk) No waiters: ++ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID ++ * mm_release(tsk) Set waiter bit ++ * exit_robust_list(tsk) { *uaddr = 0x80000PID; ++ * Set owner died attach_to_pi_owner() { ++ * *uaddr = 0xC0000000; tsk = get_task(PID); ++ * } if (!tsk->flags & PF_EXITING) { ++ * ... attach(); ++ * tsk->futex_state = } else { ++ * FUTEX_STATE_DEAD; if (tsk->futex_state != ++ * FUTEX_STATE_DEAD) ++ * return -EAGAIN; ++ * return -ESRCH; <--- FAIL ++ * } ++ * ++ * Returning ESRCH unconditionally is wrong here because the ++ * user space value has been changed by the exiting task. ++ * ++ * The same logic applies to the case where the exiting task is ++ * already gone. ++ */ ++ if (get_futex_value_locked(&uval2, uaddr)) ++ return -EFAULT; ++ ++ /* If the user space value has changed, try again. */ ++ if (uval2 != uval) ++ return -EAGAIN; ++ ++ /* ++ * The exiting task did not have a robust list, the robust list was ++ * corrupted or the user space value in *uaddr is simply bogus. ++ * Give up and tell user space. ++ */ ++ return -ESRCH; ++} ++ + /* + * Lookup the task for the TID provided from user space and attach to + * it after doing proper sanity checks. + */ +-static int attach_to_pi_owner(u32 uval, union futex_key *key, ++static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, + struct futex_pi_state **ps, + struct task_struct **exiting) + { +@@ -1213,12 +1269,15 @@ static int attach_to_pi_owner(u32 uval, + /* + * We are the first waiter - try to look up the real owner and attach + * the new pi_state to it, but bail out when TID = 0 [1] ++ * ++ * The !pid check is paranoid. None of the call sites should end up ++ * with pid == 0, but better safe than sorry. Let the caller retry + */ + if (!pid) +- return -ESRCH; ++ return -EAGAIN; + p = futex_find_get_task(pid); + if (!p) +- return -ESRCH; ++ return handle_exit_race(uaddr, uval, NULL); + + if (unlikely(p->flags & PF_KTHREAD)) { + put_task_struct(p); +@@ -1237,7 +1296,7 @@ static int attach_to_pi_owner(u32 uval, + * FUTEX_STATE_DEAD, we know that the task has finished + * the cleanup: + */ +- int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN; ++ int ret = handle_exit_race(uaddr, uval, p); + + raw_spin_unlock_irq(&p->pi_lock); + /* +@@ -1303,7 +1362,7 @@ static int lookup_pi_state(u32 __user *u + * We are the first waiter - try to look up the owner based on + * @uval and attach to it. + */ +- return attach_to_pi_owner(uval, key, ps, exiting); ++ return attach_to_pi_owner(uaddr, uval, key, ps, exiting); + } + + static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) +@@ -1419,7 +1478,7 @@ static int futex_lock_pi_atomic(u32 __us + * attach to the owner. If that fails, no harm done, we only + * set the FUTEX_WAITERS bit in the user space variable. + */ +- return attach_to_pi_owner(uval, key, ps, exiting); ++ return attach_to_pi_owner(uaddr, newval, key, ps, exiting); + } + + /** diff --git a/queue-4.4/futex-fix-dead-code-in-attach_to_pi_owner.patch b/queue-4.4/futex-fix-dead-code-in-attach_to_pi_owner.patch new file mode 100644 index 00000000000..d471b981601 --- /dev/null +++ b/queue-4.4/futex-fix-dead-code-in-attach_to_pi_owner.patch @@ -0,0 +1,70 @@ +From foo@baz Fri Mar 12 11:10:32 AM CET 2021 +From: Zheng Yejian +Date: Thu, 11 Mar 2021 11:26:00 +0800 +Subject: futex: fix dead code in attach_to_pi_owner() +To: , , , +Cc: , , , , +Message-ID: <20210311032600.2326035-4-zhengyejian1@huawei.com> + +From: Thomas Gleixner + +This patch comes directly from an origin patch (commit +91509e84949fc97e7424521c32a9e227746e0b85) in v4.9. +And it is part of a full patch which was originally back-ported +to v4.14 as commit e6e00df182908f34360c3c9f2d13cc719362e9c0 + +The handle_exit_race() function is defined in commit 9c3f39860367 + ("futex: Cure exit race"), which never returns -EBUSY. This results +in a small piece of dead code in the attach_to_pi_owner() function: + + int ret = handle_exit_race(uaddr, uval, p); /* Never return -EBUSY */ + ... + if (ret == -EBUSY) + *exiting = p; /* dead code */ + +The return value -EBUSY is added to handle_exit_race() in upsteam +commit ac31c7ff8624409 ("futex: Provide distinct return value when +owner is exiting"). This commit was incorporated into v4.9.255, before +the function handle_exit_race() was introduced, whitout Modify +handle_exit_race(). + +To fix dead code, extract the change of handle_exit_race() from +commit ac31c7ff8624409 ("futex: Provide distinct return value when owner + is exiting"), re-incorporated. + +Lee writes: + +This commit takes the remaining functional snippet of: + + ac31c7ff8624409 ("futex: Provide distinct return value when owner is exiting") + +... and is the correct fix for this issue. + +Fixes: 9c3f39860367 ("futex: Cure exit race") +Cc: stable@vger.kernel.org # v4.9.258 +Signed-off-by: Xiaoming Ni +Reviewed-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Zheng Yejian +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1204,11 +1204,11 @@ static int handle_exit_race(u32 __user * + u32 uval2; + + /* +- * If the futex exit state is not yet FUTEX_STATE_DEAD, wait +- * for it to finish. ++ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the ++ * caller that the alleged owner is busy. + */ + if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) +- return -EAGAIN; ++ return -EBUSY; + + /* + * Reread the user space value to handle the following situation: diff --git a/queue-4.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch b/queue-4.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch new file mode 100644 index 00000000000..f27697d9ea1 --- /dev/null +++ b/queue-4.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch @@ -0,0 +1,58 @@ +From 9b1ea29bc0d7b94d420f96a0f4121403efc3dd85 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Wed, 10 Mar 2021 10:18:04 -0800 +Subject: Revert "mm, slub: consider rest of partial list if acquire_slab() fails" + +From: Linus Torvalds + +commit 9b1ea29bc0d7b94d420f96a0f4121403efc3dd85 upstream. + +This reverts commit 8ff60eb052eeba95cfb3efe16b08c9199f8121cf. + +The kernel test robot reports a huge performance regression due to the +commit, and the reason seems fairly straightforward: when there is +contention on the page list (which is what causes acquire_slab() to +fail), we do _not_ want to just loop and try again, because that will +transfer the contention to the 'n->list_lock' spinlock we hold, and +just make things even worse. + +This is admittedly likely a problem only on big machines - the kernel +test robot report comes from a 96-thread dual socket Intel Xeon Gold +6252 setup, but the regression there really is quite noticeable: + + -47.9% regression of stress-ng.rawpkt.ops_per_sec + +and the commit that was marked as being fixed (7ced37197196: "slub: +Acquire_slab() avoid loop") actually did the loop exit early very +intentionally (the hint being that "avoid loop" part of that commit +message), exactly to avoid this issue. + +The correct thing to do may be to pick some kind of reasonable middle +ground: instead of breaking out of the loop on the very first sign of +contention, or trying over and over and over again, the right thing may +be to re-try _once_, and then give up on the second failure (or pick +your favorite value for "once"..). + +Reported-by: kernel test robot +Link: https://lore.kernel.org/lkml/20210301080404.GF12822@xsang-OptiPlex-9020/ +Cc: Jann Horn +Cc: David Rientjes +Cc: Joonsoo Kim +Acked-by: Christoph Lameter +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/slub.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1682,7 +1682,7 @@ static void *get_partial_node(struct kme + + t = acquire_slab(s, n, page, object == NULL, &objects); + if (!t) +- continue; /* cmpxchg raced */ ++ break; + + available += objects; + if (!object) { diff --git a/queue-4.4/series b/queue-4.4/series index 9920d87d60a..d335c3e6ed7 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -5,3 +5,9 @@ can-skb-can_skb_set_owner-fix-ref-counting-if-socket-was-closed-before-setting-s can-flexcan-assert-frz-bit-in-flexcan_chip_freeze.patch can-flexcan-enable-rx-fifo-after-frz-halt-valid.patch netfilter-x_tables-gpf-inside-xt_find_revision.patch +cifs-return-proper-error-code-in-statfs-2.patch +floppy-fix-lock_fdc-signal-handling.patch +revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch +futex-change-locking-rules.patch +futex-cure-exit-race.patch +futex-fix-dead-code-in-attach_to_pi_owner.patch