From: Sasha Levin Date: Sun, 5 Jul 2020 13:48:00 +0000 (-0400) Subject: Fixes for 5.7 X-Git-Tag: v4.4.230~28^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=05e4ca48613bb46e1c2c5504f116c5359ad80f0d;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.7 Signed-off-by: Sasha Levin --- diff --git a/queue-5.7/io_uring-use-signal-based-task_work-running.patch b/queue-5.7/io_uring-use-signal-based-task_work-running.patch new file mode 100644 index 00000000000..08e92590145 --- /dev/null +++ b/queue-5.7/io_uring-use-signal-based-task_work-running.patch @@ -0,0 +1,104 @@ +From a1206c898847aace52976bc5abecabef5175b96b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Jun 2020 12:39:05 -0600 +Subject: io_uring: use signal based task_work running + +From: Jens Axboe + +[ Upstream commit ce593a6c480a22acba08795be313c0c6d49dd35d ] + +Since 5.7, we've been using task_work to trigger async running of +requests in the context of the original task. This generally works +great, but there's a case where if the task is currently blocked +in the kernel waiting on a condition to become true, it won't process +task_work. Even though the task is woken, it just checks whatever +condition it's waiting on, and goes back to sleep if it's still false. + +This is a problem if that very condition only becomes true when that +task_work is run. An example of that is the task registering an eventfd +with io_uring, and it's now blocked waiting on an eventfd read. That +read could depend on a completion event, and that completion event +won't get trigged until task_work has been run. + +Use the TWA_SIGNAL notification for task_work, so that we ensure that +the task always runs the work when queued. + +Cc: stable@vger.kernel.org # v5.7 +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + fs/io_uring.c | 32 ++++++++++++++++++++++++-------- + 1 file changed, 24 insertions(+), 8 deletions(-) + +diff --git a/fs/io_uring.c b/fs/io_uring.c +index 71d281f68ed83..51362a619fd50 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -4136,6 +4136,21 @@ struct io_poll_table { + int error; + }; + ++static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb, ++ int notify) ++{ ++ struct task_struct *tsk = req->task; ++ int ret; ++ ++ if (req->ctx->flags & IORING_SETUP_SQPOLL) ++ notify = 0; ++ ++ ret = task_work_add(tsk, cb, notify); ++ if (!ret) ++ wake_up_process(tsk); ++ return ret; ++} ++ + static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, + __poll_t mask, task_work_func_t func) + { +@@ -4159,13 +4174,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, + * of executing it. We can't safely execute it anyway, as we may not + * have the needed state needed for it anyway. + */ +- ret = task_work_add(tsk, &req->task_work, true); ++ ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL); + if (unlikely(ret)) { + WRITE_ONCE(poll->canceled, true); + tsk = io_wq_get_task(req->ctx->io_wq); +- task_work_add(tsk, &req->task_work, true); ++ task_work_add(tsk, &req->task_work, 0); ++ wake_up_process(tsk); + } +- wake_up_process(tsk); + return 1; + } + +@@ -6260,19 +6275,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, + do { + prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, + TASK_INTERRUPTIBLE); ++ /* make sure we run task_work before checking for signals */ + if (current->task_works) + task_work_run(); +- if (io_should_wake(&iowq, false)) +- break; +- schedule(); + if (signal_pending(current)) { +- ret = -EINTR; ++ ret = -ERESTARTSYS; + break; + } ++ if (io_should_wake(&iowq, false)) ++ break; ++ schedule(); + } while (1); + finish_wait(&ctx->wait, &iowq.wq); + +- restore_saved_sigmask_unless(ret == -EINTR); ++ restore_saved_sigmask_unless(ret == -ERESTARTSYS); + + return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; + } +-- +2.25.1 + diff --git a/queue-5.7/series b/queue-5.7/series index a6590c89d59..7bf9ae49f7e 100644 --- a/queue-5.7/series +++ b/queue-5.7/series @@ -43,3 +43,5 @@ tpm_tis-remove-the-hid-ifx0102.patch selftests-tpm-use-bin-sh-instead-of-bin-bash.patch tpm-fix-tis-locality-timeout-problems.patch crypto-af_alg-fix-use-after-free-in-af_alg_accept-due-to-bh_lock_sock.patch +task_work-teach-task_work_add-to-do-signal_wake_up.patch +io_uring-use-signal-based-task_work-running.patch diff --git a/queue-5.7/task_work-teach-task_work_add-to-do-signal_wake_up.patch b/queue-5.7/task_work-teach-task_work_add-to-do-signal_wake_up.patch new file mode 100644 index 00000000000..234e723ac88 --- /dev/null +++ b/queue-5.7/task_work-teach-task_work_add-to-do-signal_wake_up.patch @@ -0,0 +1,140 @@ +From 8cc977dba986a5ffab5b5b9d827c1b69ea746c33 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Jun 2020 17:32:54 +0200 +Subject: task_work: teach task_work_add() to do signal_wake_up() + +From: Oleg Nesterov + +[ Upstream commit e91b48162332480f5840902268108bb7fb7a44c7 ] + +So that the target task will exit the wait_event_interruptible-like +loop and call task_work_run() asap. + +The patch turns "bool notify" into 0,TWA_RESUME,TWA_SIGNAL enum, the +new TWA_SIGNAL flag implies signal_wake_up(). However, it needs to +avoid the race with recalc_sigpending(), so the patch also adds the +new JOBCTL_TASK_WORK bit included in JOBCTL_PENDING_MASK. + +TODO: once this patch is merged we need to change all current users +of task_work_add(notify = true) to use TWA_RESUME. + +Cc: stable@vger.kernel.org # v5.7 +Acked-by: Peter Zijlstra (Intel) +Signed-off-by: Oleg Nesterov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + include/linux/sched/jobctl.h | 4 +++- + include/linux/task_work.h | 5 ++++- + kernel/signal.c | 10 +++++++--- + kernel/task_work.c | 16 ++++++++++++++-- + 4 files changed, 28 insertions(+), 7 deletions(-) + +diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h +index fa067de9f1a94..d2b4204ba4d34 100644 +--- a/include/linux/sched/jobctl.h ++++ b/include/linux/sched/jobctl.h +@@ -19,6 +19,7 @@ struct task_struct; + #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ + #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ + #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ ++#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ + + #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) + #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) +@@ -28,9 +29,10 @@ struct task_struct; + #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) + #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) + #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) ++#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) + + #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) +-#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) ++#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) + + extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); + extern void task_clear_jobctl_trapping(struct task_struct *task); +diff --git a/include/linux/task_work.h b/include/linux/task_work.h +index bd9a6a91c097e..0fb93aafa4785 100644 +--- a/include/linux/task_work.h ++++ b/include/linux/task_work.h +@@ -13,7 +13,10 @@ init_task_work(struct callback_head *twork, task_work_func_t func) + twork->func = func; + } + +-int task_work_add(struct task_struct *task, struct callback_head *twork, bool); ++#define TWA_RESUME 1 ++#define TWA_SIGNAL 2 ++int task_work_add(struct task_struct *task, struct callback_head *twork, int); ++ + struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); + void task_work_run(void); + +diff --git a/kernel/signal.c b/kernel/signal.c +index 284fc1600063b..d5feb34b5e158 100644 +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -2529,9 +2529,6 @@ bool get_signal(struct ksignal *ksig) + struct signal_struct *signal = current->signal; + int signr; + +- if (unlikely(current->task_works)) +- task_work_run(); +- + if (unlikely(uprobe_deny_signal())) + return false; + +@@ -2544,6 +2541,13 @@ bool get_signal(struct ksignal *ksig) + + relock: + spin_lock_irq(&sighand->siglock); ++ current->jobctl &= ~JOBCTL_TASK_WORK; ++ if (unlikely(current->task_works)) { ++ spin_unlock_irq(&sighand->siglock); ++ task_work_run(); ++ goto relock; ++ } ++ + /* + * Every stopped thread goes here after wakeup. Check to see if + * we should notify the parent, prepare_signal(SIGCONT) encodes +diff --git a/kernel/task_work.c b/kernel/task_work.c +index 825f28259a19a..5c0848ca1287d 100644 +--- a/kernel/task_work.c ++++ b/kernel/task_work.c +@@ -25,9 +25,10 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ + * 0 if succeeds or -ESRCH. + */ + int +-task_work_add(struct task_struct *task, struct callback_head *work, bool notify) ++task_work_add(struct task_struct *task, struct callback_head *work, int notify) + { + struct callback_head *head; ++ unsigned long flags; + + do { + head = READ_ONCE(task->task_works); +@@ -36,8 +37,19 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify) + work->next = head; + } while (cmpxchg(&task->task_works, head, work) != head); + +- if (notify) ++ switch (notify) { ++ case TWA_RESUME: + set_notify_resume(task); ++ break; ++ case TWA_SIGNAL: ++ if (lock_task_sighand(task, &flags)) { ++ task->jobctl |= JOBCTL_TASK_WORK; ++ signal_wake_up(task, 0); ++ unlock_task_sighand(task, &flags); ++ } ++ break; ++ } ++ + return 0; + } + +-- +2.25.1 +