From: Sasha Levin <sashal@kernel.org>
Date: Wed, 13 Jan 2021 17:32:17 +0000 (-0500)
Subject: Fixes for 5.10
X-Git-Tag: v4.4.252~38
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1e838eb9ed249bc0cd8b53d5a490986b393b5be7;p=thirdparty%2Fkernel%2Fstable-queue.git

Fixes for 5.10

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-5.10/io_uring-limit-io-sq-poll-submit-locking-scope.patch b/queue-5.10/io_uring-limit-io-sq-poll-submit-locking-scope.patch
new file mode 100644
index 00000000000..8b3016c07eb
--- /dev/null
+++ b/queue-5.10/io_uring-limit-io-sq-poll-submit-locking-scope.patch
@@ -0,0 +1,44 @@
+From 3a5d5f02360671f2af08c015b3b0108254c256b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 21:17:25 +0000
+Subject: io_uring: limit {io|sq}poll submit locking scope
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 89448c47b8452b67c146dc6cad6f737e004c5caf upstream
+
+We don't need to take uring_lock for SQPOLL|IOPOLL to do
+io_cqring_overflow_flush() when cq_overflow_list is empty, remove it
+from the hot path.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 3974b4f124b6a..5ba312ab99786 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -9024,10 +9024,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
+ 	 */
+ 	ret = 0;
+ 	if (ctx->flags & IORING_SETUP_SQPOLL) {
+-		io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
+-		if (!list_empty_careful(&ctx->cq_overflow_list))
++		if (!list_empty_careful(&ctx->cq_overflow_list)) {
++			bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL;
++
++			io_ring_submit_lock(ctx, needs_lock);
+ 			io_cqring_overflow_flush(ctx, false, NULL, NULL);
+-		io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
++			io_ring_submit_unlock(ctx, needs_lock);
++		}
+ 		if (flags & IORING_ENTER_SQ_WAKEUP)
+ 			wake_up(&ctx->sq_data->wait);
+ 		if (flags & IORING_ENTER_SQ_WAIT)
+-- 
+2.27.0
+
diff --git a/queue-5.10/io_uring-patch-up-iopoll-overflow_flush-sync.patch b/queue-5.10/io_uring-patch-up-iopoll-overflow_flush-sync.patch
new file mode 100644
index 00000000000..d5904f2bb19
--- /dev/null
+++ b/queue-5.10/io_uring-patch-up-iopoll-overflow_flush-sync.patch
@@ -0,0 +1,219 @@
+From ee7c027ca613e6e5e6fc1aa32679280e6f08415a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 21:17:26 +0000
+Subject: io_uring: patch up IOPOLL overflow_flush sync
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 6c503150ae33ee19036255cfda0998463613352c upstream
+
+IOPOLL skips completion locking but keeps it under uring_lock, thus
+io_cqring_overflow_flush() and so io_cqring_events() need additional
+locking with uring_lock in some cases for IOPOLL.
+
+Remove __io_cqring_overflow_flush() from io_cqring_events(), introduce a
+wrapper around flush doing needed synchronisation and call it by hand.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 78 +++++++++++++++++++++++++++------------------------
+ 1 file changed, 41 insertions(+), 37 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 5ba312ab99786..492492a010a2f 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1625,9 +1625,9 @@ static bool io_match_files(struct io_kiocb *req,
+ }
+ 
+ /* Returns true if there are no backlogged entries after the flush */
+-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+-				     struct task_struct *tsk,
+-				     struct files_struct *files)
++static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
++				       struct task_struct *tsk,
++				       struct files_struct *files)
+ {
+ 	struct io_rings *rings = ctx->rings;
+ 	struct io_kiocb *req, *tmp;
+@@ -1681,6 +1681,20 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+ 	return cqe != NULL;
+ }
+ 
++static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
++				     struct task_struct *tsk,
++				     struct files_struct *files)
++{
++	if (test_bit(0, &ctx->cq_check_overflow)) {
++		/* iopoll syncs against uring_lock, not completion_lock */
++		if (ctx->flags & IORING_SETUP_IOPOLL)
++			mutex_lock(&ctx->uring_lock);
++		__io_cqring_overflow_flush(ctx, force, tsk, files);
++		if (ctx->flags & IORING_SETUP_IOPOLL)
++			mutex_unlock(&ctx->uring_lock);
++	}
++}
++
+ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
+ {
+ 	struct io_ring_ctx *ctx = req->ctx;
+@@ -2235,22 +2249,10 @@ static void io_double_put_req(struct io_kiocb *req)
+ 		io_free_req(req);
+ }
+ 
+-static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
++static unsigned io_cqring_events(struct io_ring_ctx *ctx)
+ {
+ 	struct io_rings *rings = ctx->rings;
+ 
+-	if (test_bit(0, &ctx->cq_check_overflow)) {
+-		/*
+-		 * noflush == true is from the waitqueue handler, just ensure
+-		 * we wake up the task, and the next invocation will flush the
+-		 * entries. We cannot safely to it from here.
+-		 */
+-		if (noflush)
+-			return -1U;
+-
+-		io_cqring_overflow_flush(ctx, false, NULL, NULL);
+-	}
+-
+ 	/* See comment at the top of this file */
+ 	smp_rmb();
+ 	return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
+@@ -2475,7 +2477,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
+ 		 * If we do, we can potentially be spinning for commands that
+ 		 * already triggered a CQE (eg in error).
+ 		 */
+-		if (io_cqring_events(ctx, false))
++		if (test_bit(0, &ctx->cq_check_overflow))
++			__io_cqring_overflow_flush(ctx, false, NULL, NULL);
++		if (io_cqring_events(ctx))
+ 			break;
+ 
+ 		/*
+@@ -6578,7 +6582,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
+ 
+ 	/* if we have a backlog and couldn't flush it all, return BUSY */
+ 	if (test_bit(0, &ctx->sq_check_overflow)) {
+-		if (!io_cqring_overflow_flush(ctx, false, NULL, NULL))
++		if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL))
+ 			return -EBUSY;
+ 	}
+ 
+@@ -6867,7 +6871,7 @@ struct io_wait_queue {
+ 	unsigned nr_timeouts;
+ };
+ 
+-static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
++static inline bool io_should_wake(struct io_wait_queue *iowq)
+ {
+ 	struct io_ring_ctx *ctx = iowq->ctx;
+ 
+@@ -6876,7 +6880,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
+ 	 * started waiting. For timeouts, we always want to return to userspace,
+ 	 * regardless of event count.
+ 	 */
+-	return io_cqring_events(ctx, noflush) >= iowq->to_wait ||
++	return io_cqring_events(ctx) >= iowq->to_wait ||
+ 			atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+ }
+ 
+@@ -6886,11 +6890,13 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
+ 	struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
+ 							wq);
+ 
+-	/* use noflush == true, as we can't safely rely on locking context */
+-	if (!io_should_wake(iowq, true))
+-		return -1;
+-
+-	return autoremove_wake_function(curr, mode, wake_flags, key);
++	/*
++	 * Cannot safely flush overflowed CQEs from here, ensure we wake up
++	 * the task, and the next invocation will do it.
++	 */
++	if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow))
++		return autoremove_wake_function(curr, mode, wake_flags, key);
++	return -1;
+ }
+ 
+ static int io_run_task_work_sig(void)
+@@ -6929,7 +6935,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+ 	int ret = 0;
+ 
+ 	do {
+-		if (io_cqring_events(ctx, false) >= min_events)
++		io_cqring_overflow_flush(ctx, false, NULL, NULL);
++		if (io_cqring_events(ctx) >= min_events)
+ 			return 0;
+ 		if (!io_run_task_work())
+ 			break;
+@@ -6951,6 +6958,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+ 	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+ 	trace_io_uring_cqring_wait(ctx, min_events);
+ 	do {
++		io_cqring_overflow_flush(ctx, false, NULL, NULL);
+ 		prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
+ 						TASK_INTERRUPTIBLE);
+ 		/* make sure we run task_work before checking for signals */
+@@ -6959,8 +6967,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+ 			continue;
+ 		else if (ret < 0)
+ 			break;
+-		if (io_should_wake(&iowq, false))
++		if (io_should_wake(&iowq))
+ 			break;
++		if (test_bit(0, &ctx->cq_check_overflow))
++			continue;
+ 		schedule();
+ 	} while (1);
+ 	finish_wait(&ctx->wait, &iowq.wq);
+@@ -8385,7 +8395,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
+ 	smp_rmb();
+ 	if (!io_sqring_full(ctx))
+ 		mask |= EPOLLOUT | EPOLLWRNORM;
+-	if (io_cqring_events(ctx, false))
++	io_cqring_overflow_flush(ctx, false, NULL, NULL);
++	if (io_cqring_events(ctx))
+ 		mask |= EPOLLIN | EPOLLRDNORM;
+ 
+ 	return mask;
+@@ -8443,7 +8454,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+ 	/* if force is set, the ring is going away. always drop after that */
+ 	ctx->cq_overflow_flushed = 1;
+ 	if (ctx->rings)
+-		io_cqring_overflow_flush(ctx, true, NULL, NULL);
++		__io_cqring_overflow_flush(ctx, true, NULL, NULL);
+ 	mutex_unlock(&ctx->uring_lock);
+ 
+ 	io_kill_timeouts(ctx, NULL);
+@@ -8716,9 +8727,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
+ 	}
+ 
+ 	io_cancel_defer_files(ctx, task, files);
+-	io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
+ 	io_cqring_overflow_flush(ctx, true, task, files);
+-	io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
+ 
+ 	while (__io_uring_cancel_task_requests(ctx, task, files)) {
+ 		io_run_task_work();
+@@ -9024,13 +9033,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
+ 	 */
+ 	ret = 0;
+ 	if (ctx->flags & IORING_SETUP_SQPOLL) {
+-		if (!list_empty_careful(&ctx->cq_overflow_list)) {
+-			bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL;
++		io_cqring_overflow_flush(ctx, false, NULL, NULL);
+ 
+-			io_ring_submit_lock(ctx, needs_lock);
+-			io_cqring_overflow_flush(ctx, false, NULL, NULL);
+-			io_ring_submit_unlock(ctx, needs_lock);
+-		}
+ 		if (flags & IORING_ENTER_SQ_WAKEUP)
+ 			wake_up(&ctx->sq_data->wait);
+ 		if (flags & IORING_ENTER_SQ_WAIT)
+-- 
+2.27.0
+
diff --git a/queue-5.10/io_uring-synchronise-iopoll-on-task_submit-fail.patch b/queue-5.10/io_uring-synchronise-iopoll-on-task_submit-fail.patch
new file mode 100644
index 00000000000..611dc8da50f
--- /dev/null
+++ b/queue-5.10/io_uring-synchronise-iopoll-on-task_submit-fail.patch
@@ -0,0 +1,48 @@
+From c376c506b4513e214ce71f8d5dfd113f428eb126 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 21:17:24 +0000
+Subject: io_uring: synchronise IOPOLL on task_submit fail
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 81b6d05ccad4f3d8a9dfb091fb46ad6978ee40e4 upstream
+
+io_req_task_submit() might be called for IOPOLL, do the fail path under
+uring_lock to comply with IOPOLL synchronisation based solely on it.
+
+Cc: stable@vger.kernel.org # 5.5+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 1f798c5c4213e..3974b4f124b6a 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -2047,14 +2047,15 @@ static void io_req_task_cancel(struct callback_head *cb)
+ static void __io_req_task_submit(struct io_kiocb *req)
+ {
+ 	struct io_ring_ctx *ctx = req->ctx;
++	bool fail;
+ 
+-	if (!__io_sq_thread_acquire_mm(ctx)) {
+-		mutex_lock(&ctx->uring_lock);
++	fail = __io_sq_thread_acquire_mm(ctx);
++	mutex_lock(&ctx->uring_lock);
++	if (!fail)
+ 		__io_queue_sqe(req, NULL);
+-		mutex_unlock(&ctx->uring_lock);
+-	} else {
++	else
+ 		__io_req_task_cancel(req, -EFAULT);
+-	}
++	mutex_unlock(&ctx->uring_lock);
+ }
+ 
+ static void io_req_task_submit(struct callback_head *cb)
+-- 
+2.27.0
+
diff --git a/queue-5.10/powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch b/queue-5.10/powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch
new file mode 100644
index 00000000000..3e564c4784f
--- /dev/null
+++ b/queue-5.10/powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch
@@ -0,0 +1,53 @@
+From 4e6441b56402324283571d96bd9a55ef97b74b4d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jan 2021 06:40:20 +0000
+Subject: powerpc/32s: Fix RTAS machine check with VMAP stack
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit 98bf2d3f4970179c702ef64db658e0553bc6ef3a ]
+
+When we have VMAP stack, exception prolog 1 sets r1, not r11.
+
+When it is not an RTAS machine check, don't trash r1 because it is
+needed by prolog 1.
+
+Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU")
+Fixes: d2e006036082 ("powerpc/32: Use SPRN_SPRG_SCRATCH2 in exception prologs")
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+[mpe: Squash in fixup for RTAS machine check from Christophe]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/bc77d61d1c18940e456a2dee464f1e2eda65a3f0.1608621048.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/kernel/head_book3s_32.S | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
+index a0dda2a1f2df0..d66da35f2e8d3 100644
+--- a/arch/powerpc/kernel/head_book3s_32.S
++++ b/arch/powerpc/kernel/head_book3s_32.S
+@@ -262,10 +262,19 @@ __secondary_hold_acknowledge:
+ MachineCheck:
+ 	EXCEPTION_PROLOG_0
+ #ifdef CONFIG_PPC_CHRP
++#ifdef CONFIG_VMAP_STACK
++	mr	r11, r1
++	mfspr	r1, SPRN_SPRG_THREAD
++	lwz	r1, RTAS_SP(r1)
++	cmpwi	cr1, r1, 0
++	bne	cr1, 7f
++	mr	r1, r11
++#else
+ 	mfspr	r11, SPRN_SPRG_THREAD
+ 	lwz	r11, RTAS_SP(r11)
+ 	cmpwi	cr1, r11, 0
+ 	bne	cr1, 7f
++#endif
+ #endif /* CONFIG_PPC_CHRP */
+ 	EXCEPTION_PROLOG_1 for_rtas=1
+ 7:	EXCEPTION_PROLOG_2
+-- 
+2.27.0
+
diff --git a/queue-5.10/series b/queue-5.10/series
new file mode 100644
index 00000000000..790da9248e4
--- /dev/null
+++ b/queue-5.10/series
@@ -0,0 +1,4 @@
+powerpc-32s-fix-rtas-machine-check-with-vmap-stack.patch
+io_uring-synchronise-iopoll-on-task_submit-fail.patch
+io_uring-limit-io-sq-poll-submit-locking-scope.patch
+io_uring-patch-up-iopoll-overflow_flush-sync.patch