From 4f306938e2275d96b56b66313c15313259fa6389 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Mon, 26 Aug 2019 18:31:40 -0400
Subject: [PATCH] uio fixes for 5.2

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 ...eed_resched-check-in-inner-poll-loop.patch |  54 +++++++++
 ...nter-poll-loop-if-we-have-cqes-pendi.patch |  68 ++++++++++++
 ...ng-fix-potential-hang-with-polled-io.patch | 105 ++++++++++++++++++
 queue-5.2/series                              |   3 +
 4 files changed, 230 insertions(+)
 create mode 100644 queue-5.2/io_uring-add-need_resched-check-in-inner-poll-loop.patch
 create mode 100644 queue-5.2/io_uring-don-t-enter-poll-loop-if-we-have-cqes-pendi.patch
 create mode 100644 queue-5.2/io_uring-fix-potential-hang-with-polled-io.patch

diff --git a/queue-5.2/io_uring-add-need_resched-check-in-inner-poll-loop.patch b/queue-5.2/io_uring-add-need_resched-check-in-inner-poll-loop.patch
new file mode 100644
index 00000000000..656d5961aec
--- /dev/null
+++ b/queue-5.2/io_uring-add-need_resched-check-in-inner-poll-loop.patch
@@ -0,0 +1,54 @@
+From cdb89025a898ca7f0899b5c8f43ca32fa5dad86b Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 21 Aug 2019 22:19:11 -0600
+Subject: io_uring: add need_resched() check in inner poll loop
+
+[ Upstream commit 08f5439f1df25a6cf6cf4c72cf6c13025599ce67 ]
+
+The outer poll loop checks for whether we need to reschedule, and
+returns to userspace if we do. However, it's possible to get stuck
+in the inner loop as well, if the CPU we are running on needs to
+reschedule to finish the IO work.
+
+Add the need_resched() check in the inner loop as well. This fixes
+a potential hang if the kernel is configured with
+CONFIG_PREEMPT_VOLUNTARY=y.
+
+Reported-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Tested-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 83e3cede11220..03cd8f5bba850 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -716,7 +716,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
+ static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
+ 				long min)
+ {
+-	while (!list_empty(&ctx->poll_list)) {
++	while (!list_empty(&ctx->poll_list) && !need_resched()) {
+ 		int ret;
+ 
+ 		ret = io_do_iopoll(ctx, nr_events, min);
+@@ -743,6 +743,12 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
+ 		unsigned int nr_events = 0;
+ 
+ 		io_iopoll_getevents(ctx, &nr_events, 1);
++
++		/*
++		 * Ensure we allow local-to-the-cpu processing to take place,
++		 * in this case we need to ensure that we reap all events.
++		 */
++		cond_resched();
+ 	}
+ 	mutex_unlock(&ctx->uring_lock);
+ }
+-- 
+2.20.1
+
diff --git a/queue-5.2/io_uring-don-t-enter-poll-loop-if-we-have-cqes-pendi.patch b/queue-5.2/io_uring-don-t-enter-poll-loop-if-we-have-cqes-pendi.patch
new file mode 100644
index 00000000000..78c561bb121
--- /dev/null
+++ b/queue-5.2/io_uring-don-t-enter-poll-loop-if-we-have-cqes-pendi.patch
@@ -0,0 +1,68 @@
+From 7abdcd80befc266e70f2363af05240103ca386b2 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 20 Aug 2019 11:03:11 -0600
+Subject: io_uring: don't enter poll loop if we have CQEs pending
+
+[ Upstream commit a3a0e43fd77013819e4b6f55e37e0efe8e35d805 ]
+
+We need to check if we have CQEs pending before starting a poll loop,
+as those could be the events we will be spinning for (and hence we'll
+find none). This can happen if a CQE triggers an error, or if it is
+found by eg an IRQ before we get a chance to find it through polling.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 22 +++++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 5bb01d84f38d3..83e3cede11220 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -618,6 +618,13 @@ static void io_put_req(struct io_kiocb *req)
+ 		io_free_req(req);
+ }
+ 
++static unsigned io_cqring_events(struct io_cq_ring *ring)
++{
++	/* See comment at the top of this file */
++	smp_rmb();
++	return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
++}
++
+ /*
+  * Find and free completed poll iocbs
+  */
+@@ -756,6 +763,14 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ 	do {
+ 		int tmin = 0;
+ 
++		/*
++		 * Don't enter poll loop if we already have events pending.
++		 * If we do, we can potentially be spinning for commands that
++		 * already triggered a CQE (eg in error).
++		 */
++		if (io_cqring_events(ctx->cq_ring))
++			break;
++
+ 		/*
+ 		 * If a submit got punted to a workqueue, we can have the
+ 		 * application entering polling for a command before it gets
+@@ -2232,13 +2247,6 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
+ 	return submit;
+ }
+ 
+-static unsigned io_cqring_events(struct io_cq_ring *ring)
+-{
+-	/* See comment at the top of this file */
+-	smp_rmb();
+-	return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
+-}
+-
+ /*
+  * Wait until events become available, if we don't already have some. The
+  * application must reap them itself, as they reside on the shared cq ring.
+-- 
+2.20.1
+
diff --git a/queue-5.2/io_uring-fix-potential-hang-with-polled-io.patch b/queue-5.2/io_uring-fix-potential-hang-with-polled-io.patch
new file mode 100644
index 00000000000..dbab3d9dd43
--- /dev/null
+++ b/queue-5.2/io_uring-fix-potential-hang-with-polled-io.patch
@@ -0,0 +1,105 @@
+From 394c0118eae16ded999c104233779d66b9d6459c Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 19 Aug 2019 12:15:59 -0600
+Subject: io_uring: fix potential hang with polled IO
+
+[ Upstream commit 500f9fbadef86466a435726192f4ca4df7d94236 ]
+
+If a request issue ends up being punted to async context to avoid
+blocking, we can get into a situation where the original application
+enters the poll loop for that very request before it has been issued.
+This should not be an issue, except that the polling will hold the
+io_uring uring_ctx mutex for the duration of the poll. When the async
+worker has actually issued the request, it needs to acquire this mutex
+to add the request to the poll issued list. Since the application
+polling is already holding this mutex, the workqueue sleeps on the
+mutex forever, and the application thus never gets a chance to poll for
+the very request it was interested in.
+
+Fix this by ensuring that the polling drops the uring_ctx occasionally
+if it's not making any progress.
+
+Reported-by: Jeffrey M. Birnbaum <jmbnyc@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 36 +++++++++++++++++++++++++-----------
+ 1 file changed, 25 insertions(+), 11 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 61018559e8fe6..5bb01d84f38d3 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -743,11 +743,34 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
+ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ 			   long min)
+ {
+-	int ret = 0;
++	int iters, ret = 0;
++
++	/*
++	 * We disallow the app entering submit/complete with polling, but we
++	 * still need to lock the ring to prevent racing with polled issue
++	 * that got punted to a workqueue.
++	 */
++	mutex_lock(&ctx->uring_lock);
+ 
++	iters = 0;
+ 	do {
+ 		int tmin = 0;
+ 
++		/*
++		 * If a submit got punted to a workqueue, we can have the
++		 * application entering polling for a command before it gets
++		 * issued. That app will hold the uring_lock for the duration
++		 * of the poll right here, so we need to take a breather every
++		 * now and then to ensure that the issue has a chance to add
++		 * the poll to the issued list. Otherwise we can spin here
++		 * forever, while the workqueue is stuck trying to acquire the
++		 * very same mutex.
++		 */
++		if (!(++iters & 7)) {
++			mutex_unlock(&ctx->uring_lock);
++			mutex_lock(&ctx->uring_lock);
++		}
++
+ 		if (*nr_events < min)
+ 			tmin = min - *nr_events;
+ 
+@@ -757,6 +780,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ 		ret = 0;
+ 	} while (min && !*nr_events && !need_resched());
+ 
++	mutex_unlock(&ctx->uring_lock);
+ 	return ret;
+ }
+ 
+@@ -2073,15 +2097,7 @@ static int io_sq_thread(void *data)
+ 			unsigned nr_events = 0;
+ 
+ 			if (ctx->flags & IORING_SETUP_IOPOLL) {
+-				/*
+-				 * We disallow the app entering submit/complete
+-				 * with polling, but we still need to lock the
+-				 * ring to prevent racing with polled issue
+-				 * that got punted to a workqueue.
+-				 */
+-				mutex_lock(&ctx->uring_lock);
+ 				io_iopoll_check(ctx, &nr_events, 0);
+-				mutex_unlock(&ctx->uring_lock);
+ 			} else {
+ 				/*
+ 				 * Normal IO, just pretend everything completed.
+@@ -2978,9 +2994,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
+ 		min_complete = min(min_complete, ctx->cq_entries);
+ 
+ 		if (ctx->flags & IORING_SETUP_IOPOLL) {
+-			mutex_lock(&ctx->uring_lock);
+ 			ret = io_iopoll_check(ctx, &nr_events, min_complete);
+-			mutex_unlock(&ctx->uring_lock);
+ 		} else {
+ 			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
+ 		}
+-- 
+2.20.1
+
diff --git a/queue-5.2/series b/queue-5.2/series
index 53092549e48..34f8e1de3e2 100644
--- a/queue-5.2/series
+++ b/queue-5.2/series
@@ -154,3 +154,6 @@ mm-kasan-fix-false-positive-invalid-free-reports-with-config_kasan_sw_tags-y.pat
 xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch
 ib-hfi1-drop-stale-tid-rdma-packets.patch
 dm-zoned-fix-potential-null-dereference-in-dmz_do_re.patch
+io_uring-fix-potential-hang-with-polled-io.patch
+io_uring-don-t-enter-poll-loop-if-we-have-cqes-pendi.patch
+io_uring-add-need_resched-check-in-inner-poll-loop.patch
-- 
2.47.3