fixes for 5.3

author Sasha Levin <sashal@kernel.org>

Sun, 27 Oct 2019 08:48:06 +0000 (04:48 -0400)

committer Sasha Levin <sashal@kernel.org>

Sun, 27 Oct 2019 08:49:41 +0000 (04:49 -0400)
author Sasha Levin <sashal@kernel.org>
Sun, 27 Oct 2019 08:48:06 +0000 (04:48 -0400)
committer Sasha Levin <sashal@kernel.org>
Sun, 27 Oct 2019 08:49:41 +0000 (04:49 -0400)
diff --git a/queue-5.3/io_uring-fix-broken-links-with-offloading.patch b/queue-5.3/io_uring-fix-broken-links-with-offloading.patch

new file mode 100644 (file)

index 0000000..bc6e093
--- /dev/null
+++ b/queue-5.3/io_uring-fix-broken-links-with-offloading.patch
@@ -0,0 +1,167 @@
+From f5ff6a51c7051323ef3cb201c352cb4eb1088175 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2019 12:31:30 +0300
+Subject: io_uring: Fix broken links with offloading
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ Upstream commit fb5ccc98782f654778cb8d96ba8a998304f9a51f ]
+
+io_sq_thread() processes sqes by 8 without considering links. As a
+result, links will be randomely subdivided.
+
+The easiest way to fix it is to call io_get_sqring() inside
+io_submit_sqes() as do io_ring_submit().
+
+Downsides:
+1. This removes optimisation of not grabbing mm_struct for fixed files
+2. It submitting all sqes in one go, without finer-grained sheduling
+with cq processing.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 58 +++++++++++++++++++++++++++------------------------
+ 1 file changed, 31 insertions(+), 27 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 79f9c9f7b298e..518042cc6628b 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -687,6 +687,14 @@ static unsigned io_cqring_events(struct io_cq_ring *ring)
+       return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
+ }
+ 
++static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
++{
++      struct io_rings *rings = ctx->rings;
++
++      /* make sure SQ entry isn't read before tail */
++      return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
++}
++
+ /*
+  * Find and free completed poll iocbs
+  */
+@@ -2268,8 +2276,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
+       return false;
+ }
+ 
+-static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
+-                        unsigned int nr, bool has_user, bool mm_fault)
++static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
++                        bool has_user, bool mm_fault)
+ {
+       struct io_submit_state state, *statep = NULL;
+       struct io_kiocb *link = NULL;
+@@ -2282,6 +2290,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
+       }
+ 
+       for (i = 0; i < nr; i++) {
++              struct sqe_submit s;
++
++              if (!io_get_sqring(ctx, &s))
++                      break;
++
+               /*
+                * If previous wasn't linked and we have a linked command,
+                * that's the end of the chain. Submit the previous link.
+@@ -2290,16 +2303,16 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
+                       io_queue_sqe(ctx, link, &link->submit);
+                       link = NULL;
+               }
+-              prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
++              prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
+ 
+               if (unlikely(mm_fault)) {
+-                      io_cqring_add_event(ctx, sqes[i].sqe->user_data,
++                      io_cqring_add_event(ctx, s.sqe->user_data,
+                                               -EFAULT);
+               } else {
+-                      sqes[i].has_user = has_user;
+-                      sqes[i].needs_lock = true;
+-                      sqes[i].needs_fixed_file = true;
+-                      io_submit_sqe(ctx, &sqes[i], statep, &link);
++                      s.has_user = has_user;
++                      s.needs_lock = true;
++                      s.needs_fixed_file = true;
++                      io_submit_sqe(ctx, &s, statep, &link);
+                       submitted++;
+               }
+       }
+@@ -2314,7 +2327,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
+ 
+ static int io_sq_thread(void *data)
+ {
+-      struct sqe_submit sqes[IO_IOPOLL_BATCH];
+       struct io_ring_ctx *ctx = data;
+       struct mm_struct *cur_mm = NULL;
+       mm_segment_t old_fs;
+@@ -2329,8 +2341,8 @@ static int io_sq_thread(void *data)
+ 
+       timeout = inflight = 0;
+       while (!kthread_should_park()) {
+-              bool all_fixed, mm_fault = false;
+-              int i;
++              bool mm_fault = false;
++              unsigned int to_submit;
+ 
+               if (inflight) {
+                       unsigned nr_events = 0;
+@@ -2363,7 +2375,8 @@ static int io_sq_thread(void *data)
+                               timeout = jiffies + ctx->sq_thread_idle;
+               }
+ 
+-              if (!io_get_sqring(ctx, &sqes[0])) {
++              to_submit = io_sqring_entries(ctx);
++              if (!to_submit) {
+                       /*
+                        * We're polling. If we're within the defined idle
+                        * period, then let us spin without work before going
+@@ -2394,7 +2407,8 @@ static int io_sq_thread(void *data)
+                       /* make sure to read SQ tail after writing flags */
+                       smp_mb();
+ 
+-                      if (!io_get_sqring(ctx, &sqes[0])) {
++                      to_submit = io_sqring_entries(ctx);
++                      if (!to_submit) {
+                               if (kthread_should_park()) {
+                                       finish_wait(&ctx->sqo_wait, &wait);
+                                       break;
+@@ -2412,19 +2426,8 @@ static int io_sq_thread(void *data)
+                       ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
+               }
+ 
+-              i = 0;
+-              all_fixed = true;
+-              do {
+-                      if (all_fixed && io_sqe_needs_user(sqes[i].sqe))
+-                              all_fixed = false;
+-
+-                      i++;
+-                      if (i == ARRAY_SIZE(sqes))
+-                              break;
+-              } while (io_get_sqring(ctx, &sqes[i]));
+-
+               /* Unless all new commands are FIXED regions, grab mm */
+-              if (!all_fixed && !cur_mm) {
++              if (!cur_mm) {
+                       mm_fault = !mmget_not_zero(ctx->sqo_mm);
+                       if (!mm_fault) {
+                               use_mm(ctx->sqo_mm);
+@@ -2432,8 +2435,9 @@ static int io_sq_thread(void *data)
+                       }
+               }
+ 
+-              inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL,
+-                                              mm_fault);
++              to_submit = min(to_submit, ctx->sq_entries);
++              inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
++                                         mm_fault);
+ 
+               /* Commit SQ ring head once we've consumed all SQEs */
+               io_commit_sqring(ctx);
+-- 
+2.20.1
+
diff --git a/queue-5.3/io_uring-fix-race-for-sqes-with-userspace.patch b/queue-5.3/io_uring-fix-race-for-sqes-with-userspace.patch

new file mode 100644 (file)

index 0000000..398ac51
--- /dev/null
+++ b/queue-5.3/io_uring-fix-race-for-sqes-with-userspace.patch
@@ -0,0 +1,45 @@
+From 6de12a9f07aa0ea5a6bc5e2b78f000e2328d6d85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2019 12:31:31 +0300
+Subject: io_uring: Fix race for sqes with userspace
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ Upstream commit 935d1e45908afb8853c497f2c2bbbb685dec51dc ]
+
+io_ring_submit() finalises with
+1. io_commit_sqring(), which releases sqes to the userspace
+2. Then calls to io_queue_link_head(), accessing released head's sqe
+
+Reorder them.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 518042cc6628b..d447f43d64a24 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -2488,13 +2488,14 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
+               submit++;
+               io_submit_sqe(ctx, &s, statep, &link);
+       }
+-      io_commit_sqring(ctx);
+ 
+       if (link)
+               io_queue_sqe(ctx, link, &link->submit);
+       if (statep)
+               io_submit_state_end(statep);
+ 
++      io_commit_sqring(ctx);
++
+       return submit;
+ }
+ 
+-- 
+2.20.1
+
diff --git a/queue-5.3/io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch b/queue-5.3/io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch

new file mode 100644 (file)

index 0000000..79ac82f
--- /dev/null
+++ b/queue-5.3/io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch
@@ -0,0 +1,78 @@
+From 26b632e6df767c1726f5396a0920372415a215e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2019 10:04:25 -0600
+Subject: io_uring: used cached copies of sq->dropped and cq->overflow
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 498ccd9eda49117c34e0041563d0da6ac40e52b8 ]
+
+We currently use the ring values directly, but that can lead to issues
+if the application is malicious and changes these values on our behalf.
+Created in-kernel cached versions of them, and just overwrite the user
+side when we update them. This is similar to how we treat the sq/cq
+ring tail/head updates.
+
+Reported-by: Pavel Begunkov <asml.silence@gmail.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index d447f43d64a24..3c8906494a8e1 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -221,6 +221,7 @@ struct io_ring_ctx {
+               unsigned                sq_entries;
+               unsigned                sq_mask;
+               unsigned                sq_thread_idle;
++              unsigned                cached_sq_dropped;
+               struct io_uring_sqe     *sq_sqes;
+ 
+               struct list_head        defer_list;
+@@ -237,6 +238,7 @@ struct io_ring_ctx {
+               /* CQ ring */
+               struct io_cq_ring       *cq_ring;
+               unsigned                cached_cq_tail;
++              atomic_t                cached_cq_overflow;
+               unsigned                cq_entries;
+               unsigned                cq_mask;
+               struct wait_queue_head  cq_wait;
+@@ -431,7 +433,8 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
+       if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
+               return false;
+ 
+-      return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped;
++      return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped
++                                      + atomic_read(&ctx->cached_cq_overflow);
+ }
+ 
+ static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+@@ -511,9 +514,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+               WRITE_ONCE(cqe->res, res);
+               WRITE_ONCE(cqe->flags, 0);
+       } else {
+-              unsigned overflow = READ_ONCE(ctx->cq_ring->overflow);
+-
+-              WRITE_ONCE(ctx->cq_ring->overflow, overflow + 1);
++              WRITE_ONCE(ctx->cq_ring->overflow,
++                              atomic_inc_return(&ctx->cached_cq_overflow));
+       }
+ }
+ 
+@@ -2272,7 +2274,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
+ 
+       /* drop invalid entries */
+       ctx->cached_sq_head++;
+-      ring->dropped++;
++      ctx->cached_sq_dropped++;
++      WRITE_ONCE(ring->dropped, ctx->cached_sq_dropped);
+       return false;
+ }
+ 
+-- 
+2.20.1
+
diff --git a/queue-5.3/series b/queue-5.3/series

index 343aab80cf380405b850d83faaa3b71518d0db3e..fdfe0e6a35354684186bb57568e0258e2db4b696 100644 (file)
--- a/queue-5.3/series
+++ b/queue-5.3/series
@@ -104,3 +104,6 @@ usb-ldusb-fix-memleak-on-disconnect.patch
  usb-usblp-fix-use-after-free-on-disconnect.patch
  usb-ldusb-fix-read-info-leaks.patch
  binder-don-t-modify-vma-bounds-in-mmap-handler.patch
+io_uring-fix-broken-links-with-offloading.patch
+io_uring-fix-race-for-sqes-with-userspace.patch
+io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch
author	Sasha Levin <sashal@kernel.org>
	Sun, 27 Oct 2019 08:48:06 +0000 (04:48 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 27 Oct 2019 08:49:41 +0000 (04:49 -0400)
queue-5.3/io_uring-fix-broken-links-with-offloading.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/io_uring-fix-race-for-sqes-with-userspace.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/series		patch \| blob \| blame \| history