Fixes for 5.10

author Sasha Levin <sashal@kernel.org>

Wed, 13 Jan 2021 17:32:17 +0000 (12:32 -0500)

committer Sasha Levin <sashal@kernel.org>

Wed, 13 Jan 2021 17:32:17 +0000 (12:32 -0500)
author Sasha Levin <sashal@kernel.org>
Wed, 13 Jan 2021 17:32:17 +0000 (12:32 -0500)
committer Sasha Levin <sashal@kernel.org>
Wed, 13 Jan 2021 17:32:17 +0000 (12:32 -0500)
diff --git a/queue-5.10/io_uring-limit-io-sq-poll-submit-locking-scope.patch b/queue-5.10/io_uring-limit-io-sq-poll-submit-locking-scope.patch

new file mode 100644 (file)

index 0000000..8b3016c
--- /dev/null
+++ b/queue-5.10/io_uring-limit-io-sq-poll-submit-locking-scope.patch
@@ -0,0 +1,44 @@
+From 3a5d5f02360671f2af08c015b3b0108254c256b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 21:17:25 +0000
+Subject: io_uring: limit {io|sq}poll submit locking scope
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 89448c47b8452b67c146dc6cad6f737e004c5caf upstream
+
+We don't need to take uring_lock for SQPOLL|IOPOLL to do
+io_cqring_overflow_flush() when cq_overflow_list is empty, remove it
+from the hot path.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 3974b4f124b6a..5ba312ab99786 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -9024,10 +9024,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
+        */
+       ret = 0;
+       if (ctx->flags & IORING_SETUP_SQPOLL) {
+-              io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
+-              if (!list_empty_careful(&ctx->cq_overflow_list))
++              if (!list_empty_careful(&ctx->cq_overflow_list)) {
++                      bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL;
++
++                      io_ring_submit_lock(ctx, needs_lock);
+                       io_cqring_overflow_flush(ctx, false, NULL, NULL);
+-              io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
++                      io_ring_submit_unlock(ctx, needs_lock);
++              }
+               if (flags & IORING_ENTER_SQ_WAKEUP)
+                       wake_up(&ctx->sq_data->wait);
+               if (flags & IORING_ENTER_SQ_WAIT)
+-- 
+2.27.0
+
diff --git a/queue-5.10/io_uring-patch-up-iopoll-overflow_flush-sync.patch b/queue-5.10/io_uring-patch-up-iopoll-overflow_flush-sync.patch

new file mode 100644 (file)

index 0000000..d5904f2
--- /dev/null
+++ b/queue-5.10/io_uring-patch-up-iopoll-overflow_flush-sync.patch
@@ -0,0 +1,219 @@
+From ee7c027ca613e6e5e6fc1aa32679280e6f08415a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 21:17:26 +0000
+Subject: io_uring: patch up IOPOLL overflow_flush sync
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 6c503150ae33ee19036255cfda0998463613352c upstream
+
+IOPOLL skips completion locking but keeps it under uring_lock, thus
+io_cqring_overflow_flush() and so io_cqring_events() need additional
+locking with uring_lock in some cases for IOPOLL.
+
+Remove __io_cqring_overflow_flush() from io_cqring_events(), introduce a
+wrapper around flush doing needed synchronisation and call it by hand.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 78 +++++++++++++++++++++++++++------------------------
+ 1 file changed, 41 insertions(+), 37 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 5ba312ab99786..492492a010a2f 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1625,9 +1625,9 @@ static bool io_match_files(struct io_kiocb *req,
+ }
+ 
+ /* Returns true if there are no backlogged entries after the flush */
+-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+-                                   struct task_struct *tsk,
+-                                   struct files_struct *files)
++static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
++                                     struct task_struct *tsk,
++                                     struct files_struct *files)
+ {
+       struct io_rings *rings = ctx->rings;
+       struct io_kiocb *req, *tmp;
+@@ -1681,6 +1681,20 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+       return cqe != NULL;
+ }
+ 
++static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
++                                   struct task_struct *tsk,
++                                   struct files_struct *files)
++{
++      if (test_bit(0, &ctx->cq_check_overflow)) {
++              /* iopoll syncs against uring_lock, not completion_lock */
++              if (ctx->flags & IORING_SETUP_IOPOLL)
++                      mutex_lock(&ctx->uring_lock);
++              __io_cqring_overflow_flush(ctx, force, tsk, files);
++              if (ctx->flags & IORING_SETUP_IOPOLL)
++                      mutex_unlock(&ctx->uring_lock);
++      }
++}
++
+ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+@@ -2235,22 +2249,10 @@ static void io_double_put_req(struct io_kiocb *req)
+               io_free_req(req);
+ }
+ 
+-static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
++static unsigned io_cqring_events(struct io_ring_ctx *ctx)
+ {
+       struct io_rings *rings = ctx->rings;
+ 
+-      if (test_bit(0, &ctx->cq_check_overflow)) {
+-              /*
+-               * noflush == true is from the waitqueue handler, just ensure
+-               * we wake up the task, and the next invocation will flush the
+-               * entries. We cannot safely to it from here.
+-               */
+-              if (noflush)
+-                      return -1U;
+-
+-              io_cqring_overflow_flush(ctx, false, NULL, NULL);
+-      }
+-
+       /* See comment at the top of this file */
+       smp_rmb();
+       return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
+@@ -2475,7 +2477,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
+                * If we do, we can potentially be spinning for commands that
+                * already triggered a CQE (eg in error).
+                */
+-              if (io_cqring_events(ctx, false))
++              if (test_bit(0, &ctx->cq_check_overflow))
++                      __io_cqring_overflow_flush(ctx, false, NULL, NULL);
++              if (io_cqring_events(ctx))
+                       break;
+ 
+               /*
+@@ -6578,7 +6582,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
+ 
+       /* if we have a backlog and couldn't flush it all, return BUSY */
+       if (test_bit(0, &ctx->sq_check_overflow)) {
+-              if (!io_cqring_overflow_flush(ctx, false, NULL, NULL))
++              if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL))
+                       return -EBUSY;
+       }
+ 
+@@ -6867,7 +6871,7 @@ struct io_wait_queue {
+       unsigned nr_timeouts;
+ };
+ 
+-static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
++static inline bool io_should_wake(struct io_wait_queue *iowq)
+ {
+       struct io_ring_ctx *ctx = iowq->ctx;
+ 
+@@ -6876,7 +6880,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
+        * started waiting. For timeouts, we always want to return to userspace,
+        * regardless of event count.
+        */
+-      return io_cqring_events(ctx, noflush) >= iowq->to_wait ||
++      return io_cqring_events(ctx) >= iowq->to_wait ||
+                       atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+ }
+ 
+@@ -6886,11 +6890,13 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
+       struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
+                                                       wq);
+ 
+-      /* use noflush == true, as we can't safely rely on locking context */
+-      if (!io_should_wake(iowq, true))
+-              return -1;
+-
+-      return autoremove_wake_function(curr, mode, wake_flags, key);
++      /*
++       * Cannot safely flush overflowed CQEs from here, ensure we wake up
++       * the task, and the next invocation will do it.
++       */
++      if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow))
++              return autoremove_wake_function(curr, mode, wake_flags, key);
++      return -1;
+ }
+ 
+ static int io_run_task_work_sig(void)
+@@ -6929,7 +6935,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+       int ret = 0;
+ 
+       do {
+-              if (io_cqring_events(ctx, false) >= min_events)
++              io_cqring_overflow_flush(ctx, false, NULL, NULL);
++              if (io_cqring_events(ctx) >= min_events)
+                       return 0;
+               if (!io_run_task_work())
+                       break;
+@@ -6951,6 +6958,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+       iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+       trace_io_uring_cqring_wait(ctx, min_events);
+       do {
++              io_cqring_overflow_flush(ctx, false, NULL, NULL);
+               prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
+                                               TASK_INTERRUPTIBLE);
+               /* make sure we run task_work before checking for signals */
+@@ -6959,8 +6967,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+                       continue;
+               else if (ret < 0)
+                       break;
+-              if (io_should_wake(&iowq, false))
++              if (io_should_wake(&iowq))
+                       break;
++              if (test_bit(0, &ctx->cq_check_overflow))
++                      continue;
+               schedule();
+       } while (1);
+       finish_wait(&ctx->wait, &iowq.wq);
+@@ -8385,7 +8395,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
+       smp_rmb();
+       if (!io_sqring_full(ctx))
+               mask |= EPOLLOUT | EPOLLWRNORM;
+-      if (io_cqring_events(ctx, false))
++      io_cqring_overflow_flush(ctx, false, NULL, NULL);
++      if (io_cqring_events(ctx))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       return mask;
+@@ -8443,7 +8454,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+       /* if force is set, the ring is going away. always drop after that */
+       ctx->cq_overflow_flushed = 1;
+       if (ctx->rings)
+-              io_cqring_overflow_flush(ctx, true, NULL, NULL);
++              __io_cqring_overflow_flush(ctx, true, NULL, NULL);
+       mutex_unlock(&ctx->uring_lock);
+ 
+       io_kill_timeouts(ctx, NULL);
+@@ -8716,9 +8727,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
+       }
+ 
+       io_cancel_defer_files(ctx, task, files);
+-      io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
+       io_cqring_overflow_flush(ctx, true, task, files);
+-      io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
+ 
+       while (__io_uring_cancel_task_requests(ctx, task, files)) {
+               io_run_task_work();
+@@ -9024,13 +9033,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
+        */
+       ret = 0;
+       if (ctx->flags & IORING_SETUP_SQPOLL) {
+-              if (!list_empty_careful(&ctx->cq_overflow_list)) {
+-                      bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL;
++              io_cqring_overflow_flush(ctx, false, NULL, NULL);
+ 
+-                      io_ring_submit_lock(ctx, needs_lock);
+-                      io_cqring_overflow_flush(ctx, false, NULL, NULL);
+-                      io_ring_submit_unlock(ctx, needs_lock);
+-              }
+               if (flags & IORING_ENTER_SQ_WAKEUP)
+                       wake_up(&ctx->sq_data->wait);
+               if (flags & IORING_ENTER_SQ_WAIT)
+-- 
+2.27.0
+
diff --git a/queue-5.10/io_uring-synchronise-iopoll-on-task_submit-fail.patch b/queue-5.10/io_uring-synchronise-iopoll-on-task_submit-fail.patch

new file mode 100644 (file)

index 0000000..611dc8d
--- /dev/null
+++ b/queue-5.10/io_uring-synchronise-iopoll-on-task_submit-fail.patch
@@ -0,0 +1,48 @@
+From c376c506b4513e214ce71f8d5dfd113f428eb126 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 21:17:24 +0000
+Subject: io_uring: synchronise IOPOLL on task_submit fail
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 81b6d05ccad4f3d8a9dfb091fb46ad6978ee40e4 upstream
+
+io_req_task_submit() might be called for IOPOLL, do the fail path under
+uring_lock to comply with IOPOLL synchronisation based solely on it.
+
+Cc: stable@vger.kernel.org # 5.5+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 1f798c5c4213e..3974b4f124b6a 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -2047,14 +2047,15 @@ static void io_req_task_cancel(struct callback_head *cb)
+ static void __io_req_task_submit(struct io_kiocb *req)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
++      bool fail;
+ 
+-      if (!__io_sq_thread_acquire_mm(ctx)) {
+-              mutex_lock(&ctx->uring_lock);
++      fail = __io_sq_thread_acquire_mm(ctx);
++      mutex_lock(&ctx->uring_lock);
++      if (!fail)
+               __io_queue_sqe(req, NULL);
+-              mutex_unlock(&ctx->uring_lock);
+-      } else {
++      else
+               __io_req_task_cancel(req, -EFAULT);
+-      }
++      mutex_unlock(&ctx->uring_lock);
+ }
+ 
+ static void io_req_task_submit(struct callback_head *cb)
+-- 
+2.27.0
+
diff --git a/queue-5.10/powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch b/queue-5.10/powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch

new file mode 100644 (file)

index 0000000..3e564c4
--- /dev/null
+++ b/queue-5.10/powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch
@@ -0,0 +1,53 @@
+From 4e6441b56402324283571d96bd9a55ef97b74b4d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jan 2021 06:40:20 +0000
+Subject: powerpc/32s: Fix RTAS machine check with VMAP stack
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit 98bf2d3f4970179c702ef64db658e0553bc6ef3a ]
+
+When we have VMAP stack, exception prolog 1 sets r1, not r11.
+
+When it is not an RTAS machine check, don't trash r1 because it is
+needed by prolog 1.
+
+Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU")
+Fixes: d2e006036082 ("powerpc/32: Use SPRN_SPRG_SCRATCH2 in exception prologs")
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+[mpe: Squash in fixup for RTAS machine check from Christophe]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/bc77d61d1c18940e456a2dee464f1e2eda65a3f0.1608621048.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/kernel/head_book3s_32.S | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
+index a0dda2a1f2df0..d66da35f2e8d3 100644
+--- a/arch/powerpc/kernel/head_book3s_32.S
++++ b/arch/powerpc/kernel/head_book3s_32.S
+@@ -262,10 +262,19 @@ __secondary_hold_acknowledge:
+ MachineCheck:
+       EXCEPTION_PROLOG_0
+ #ifdef CONFIG_PPC_CHRP
++#ifdef CONFIG_VMAP_STACK
++      mr      r11, r1
++      mfspr   r1, SPRN_SPRG_THREAD
++      lwz     r1, RTAS_SP(r1)
++      cmpwi   cr1, r1, 0
++      bne     cr1, 7f
++      mr      r1, r11
++#else
+       mfspr   r11, SPRN_SPRG_THREAD
+       lwz     r11, RTAS_SP(r11)
+       cmpwi   cr1, r11, 0
+       bne     cr1, 7f
++#endif
+ #endif /* CONFIG_PPC_CHRP */
+       EXCEPTION_PROLOG_1 for_rtas=1
+ 7:    EXCEPTION_PROLOG_2
+-- 
+2.27.0
+
diff --git a/queue-5.10/series b/queue-5.10/series

new file mode 100644 (file)

index 0000000..790da92
--- /dev/null
+++ b/queue-5.10/series
@@ -0,0 +1,4 @@
+powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch
+io_uring-synchronise-iopoll-on-task_submit-fail.patch
+io_uring-limit-io-sq-poll-submit-locking-scope.patch
+io_uring-patch-up-iopoll-overflow_flush-sync.patch
author	Sasha Levin <sashal@kernel.org>
	Wed, 13 Jan 2021 17:32:17 +0000 (12:32 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Wed, 13 Jan 2021 17:32:17 +0000 (12:32 -0500)
queue-5.10/io_uring-limit-io-sq-poll-submit-locking-scope.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-patch-up-iopoll-overflow_flush-sync.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-synchronise-iopoll-on-task_submit-fail.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series	[new file with mode: 0644]	patch \| blob