]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.7
authorSasha Levin <sashal@kernel.org>
Sun, 5 Jul 2020 13:48:00 +0000 (09:48 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 5 Jul 2020 13:48:00 +0000 (09:48 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.7/io_uring-use-signal-based-task_work-running.patch [new file with mode: 0644]
queue-5.7/series
queue-5.7/task_work-teach-task_work_add-to-do-signal_wake_up.patch [new file with mode: 0644]

diff --git a/queue-5.7/io_uring-use-signal-based-task_work-running.patch b/queue-5.7/io_uring-use-signal-based-task_work-running.patch
new file mode 100644 (file)
index 0000000..08e9259
--- /dev/null
@@ -0,0 +1,104 @@
+From a1206c898847aace52976bc5abecabef5175b96b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Jun 2020 12:39:05 -0600
+Subject: io_uring: use signal based task_work running
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit ce593a6c480a22acba08795be313c0c6d49dd35d ]
+
+Since 5.7, we've been using task_work to trigger async running of
+requests in the context of the original task. This generally works
+great, but there's a case where if the task is currently blocked
+in the kernel waiting on a condition to become true, it won't process
+task_work. Even though the task is woken, it just checks whatever
+condition it's waiting on, and goes back to sleep if it's still false.
+
+This is a problem if that very condition only becomes true when that
+task_work is run. An example of that is the task registering an eventfd
+with io_uring, and it's now blocked waiting on an eventfd read. That
+read could depend on a completion event, and that completion event
+won't get trigged until task_work has been run.
+
+Use the TWA_SIGNAL notification for task_work, so that we ensure that
+the task always runs the work when queued.
+
+Cc: stable@vger.kernel.org # v5.7
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 32 ++++++++++++++++++++++++--------
+ 1 file changed, 24 insertions(+), 8 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 71d281f68ed83..51362a619fd50 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -4136,6 +4136,21 @@ struct io_poll_table {
+       int error;
+ };
++static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
++                              int notify)
++{
++      struct task_struct *tsk = req->task;
++      int ret;
++
++      if (req->ctx->flags & IORING_SETUP_SQPOLL)
++              notify = 0;
++
++      ret = task_work_add(tsk, cb, notify);
++      if (!ret)
++              wake_up_process(tsk);
++      return ret;
++}
++
+ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+                          __poll_t mask, task_work_func_t func)
+ {
+@@ -4159,13 +4174,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+        * of executing it. We can't safely execute it anyway, as we may not
+        * have the needed state needed for it anyway.
+        */
+-      ret = task_work_add(tsk, &req->task_work, true);
++      ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL);
+       if (unlikely(ret)) {
+               WRITE_ONCE(poll->canceled, true);
+               tsk = io_wq_get_task(req->ctx->io_wq);
+-              task_work_add(tsk, &req->task_work, true);
++              task_work_add(tsk, &req->task_work, 0);
++              wake_up_process(tsk);
+       }
+-      wake_up_process(tsk);
+       return 1;
+ }
+@@ -6260,19 +6275,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+       do {
+               prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
+                                               TASK_INTERRUPTIBLE);
++              /* make sure we run task_work before checking for signals */
+               if (current->task_works)
+                       task_work_run();
+-              if (io_should_wake(&iowq, false))
+-                      break;
+-              schedule();
+               if (signal_pending(current)) {
+-                      ret = -EINTR;
++                      ret = -ERESTARTSYS;
+                       break;
+               }
++              if (io_should_wake(&iowq, false))
++                      break;
++              schedule();
+       } while (1);
+       finish_wait(&ctx->wait, &iowq.wq);
+-      restore_saved_sigmask_unless(ret == -EINTR);
++      restore_saved_sigmask_unless(ret == -ERESTARTSYS);
+       return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
+ }
+-- 
+2.25.1
+
index a6590c89d598bdee50212471a17378e8c0862c50..7bf9ae49f7e8591d879c43368bd5cbd2c56ea914 100644 (file)
@@ -43,3 +43,5 @@ tpm_tis-remove-the-hid-ifx0102.patch
 selftests-tpm-use-bin-sh-instead-of-bin-bash.patch
 tpm-fix-tis-locality-timeout-problems.patch
 crypto-af_alg-fix-use-after-free-in-af_alg_accept-due-to-bh_lock_sock.patch
+task_work-teach-task_work_add-to-do-signal_wake_up.patch
+io_uring-use-signal-based-task_work-running.patch
diff --git a/queue-5.7/task_work-teach-task_work_add-to-do-signal_wake_up.patch b/queue-5.7/task_work-teach-task_work_add-to-do-signal_wake_up.patch
new file mode 100644 (file)
index 0000000..234e723
--- /dev/null
@@ -0,0 +1,140 @@
+From 8cc977dba986a5ffab5b5b9d827c1b69ea746c33 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Jun 2020 17:32:54 +0200
+Subject: task_work: teach task_work_add() to do signal_wake_up()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit e91b48162332480f5840902268108bb7fb7a44c7 ]
+
+So that the target task will exit the wait_event_interruptible-like
+loop and call task_work_run() asap.
+
+The patch turns "bool notify" into 0,TWA_RESUME,TWA_SIGNAL enum, the
+new TWA_SIGNAL flag implies signal_wake_up().  However, it needs to
+avoid the race with recalc_sigpending(), so the patch also adds the
+new JOBCTL_TASK_WORK bit included in JOBCTL_PENDING_MASK.
+
+TODO: once this patch is merged we need to change all current users
+of task_work_add(notify = true) to use TWA_RESUME.
+
+Cc: stable@vger.kernel.org # v5.7
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched/jobctl.h |  4 +++-
+ include/linux/task_work.h    |  5 ++++-
+ kernel/signal.c              | 10 +++++++---
+ kernel/task_work.c           | 16 ++++++++++++++--
+ 4 files changed, 28 insertions(+), 7 deletions(-)
+
+diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
+index fa067de9f1a94..d2b4204ba4d34 100644
+--- a/include/linux/sched/jobctl.h
++++ b/include/linux/sched/jobctl.h
+@@ -19,6 +19,7 @@ struct task_struct;
+ #define JOBCTL_TRAPPING_BIT   21      /* switching to TRACED */
+ #define JOBCTL_LISTENING_BIT  22      /* ptracer is listening for events */
+ #define JOBCTL_TRAP_FREEZE_BIT        23      /* trap for cgroup freezer */
++#define JOBCTL_TASK_WORK_BIT  24      /* set by TWA_SIGNAL */
+ #define JOBCTL_STOP_DEQUEUED  (1UL << JOBCTL_STOP_DEQUEUED_BIT)
+ #define JOBCTL_STOP_PENDING   (1UL << JOBCTL_STOP_PENDING_BIT)
+@@ -28,9 +29,10 @@ struct task_struct;
+ #define JOBCTL_TRAPPING               (1UL << JOBCTL_TRAPPING_BIT)
+ #define JOBCTL_LISTENING      (1UL << JOBCTL_LISTENING_BIT)
+ #define JOBCTL_TRAP_FREEZE    (1UL << JOBCTL_TRAP_FREEZE_BIT)
++#define JOBCTL_TASK_WORK      (1UL << JOBCTL_TASK_WORK_BIT)
+ #define JOBCTL_TRAP_MASK      (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
+-#define JOBCTL_PENDING_MASK   (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
++#define JOBCTL_PENDING_MASK   (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK)
+ extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask);
+ extern void task_clear_jobctl_trapping(struct task_struct *task);
+diff --git a/include/linux/task_work.h b/include/linux/task_work.h
+index bd9a6a91c097e..0fb93aafa4785 100644
+--- a/include/linux/task_work.h
++++ b/include/linux/task_work.h
+@@ -13,7 +13,10 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
+       twork->func = func;
+ }
+-int task_work_add(struct task_struct *task, struct callback_head *twork, bool);
++#define TWA_RESUME    1
++#define TWA_SIGNAL    2
++int task_work_add(struct task_struct *task, struct callback_head *twork, int);
++
+ struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
+ void task_work_run(void);
+diff --git a/kernel/signal.c b/kernel/signal.c
+index 284fc1600063b..d5feb34b5e158 100644
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -2529,9 +2529,6 @@ bool get_signal(struct ksignal *ksig)
+       struct signal_struct *signal = current->signal;
+       int signr;
+-      if (unlikely(current->task_works))
+-              task_work_run();
+-
+       if (unlikely(uprobe_deny_signal()))
+               return false;
+@@ -2544,6 +2541,13 @@ bool get_signal(struct ksignal *ksig)
+ relock:
+       spin_lock_irq(&sighand->siglock);
++      current->jobctl &= ~JOBCTL_TASK_WORK;
++      if (unlikely(current->task_works)) {
++              spin_unlock_irq(&sighand->siglock);
++              task_work_run();
++              goto relock;
++      }
++
+       /*
+        * Every stopped thread goes here after wakeup. Check to see if
+        * we should notify the parent, prepare_signal(SIGCONT) encodes
+diff --git a/kernel/task_work.c b/kernel/task_work.c
+index 825f28259a19a..5c0848ca1287d 100644
+--- a/kernel/task_work.c
++++ b/kernel/task_work.c
+@@ -25,9 +25,10 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
+  * 0 if succeeds or -ESRCH.
+  */
+ int
+-task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
++task_work_add(struct task_struct *task, struct callback_head *work, int notify)
+ {
+       struct callback_head *head;
++      unsigned long flags;
+       do {
+               head = READ_ONCE(task->task_works);
+@@ -36,8 +37,19 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
+               work->next = head;
+       } while (cmpxchg(&task->task_works, head, work) != head);
+-      if (notify)
++      switch (notify) {
++      case TWA_RESUME:
+               set_notify_resume(task);
++              break;
++      case TWA_SIGNAL:
++              if (lock_task_sighand(task, &flags)) {
++                      task->jobctl |= JOBCTL_TASK_WORK;
++                      signal_wake_up(task, 0);
++                      unlock_task_sighand(task, &flags);
++              }
++              break;
++      }
++
+       return 0;
+ }
+-- 
+2.25.1
+