From: Sasha Levin Date: Sun, 27 Oct 2019 08:48:06 +0000 (-0400) Subject: fixes for 5.3 X-Git-Tag: v4.4.198~27^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b1428778215af85be296714ca66fed4eae4bed13;p=thirdparty%2Fkernel%2Fstable-queue.git fixes for 5.3 Signed-off-by: Sasha Levin --- diff --git a/queue-5.3/io_uring-fix-broken-links-with-offloading.patch b/queue-5.3/io_uring-fix-broken-links-with-offloading.patch new file mode 100644 index 00000000000..bc6e093e620 --- /dev/null +++ b/queue-5.3/io_uring-fix-broken-links-with-offloading.patch @@ -0,0 +1,167 @@ +From f5ff6a51c7051323ef3cb201c352cb4eb1088175 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2019 12:31:30 +0300 +Subject: io_uring: Fix broken links with offloading + +From: Pavel Begunkov + +[ Upstream commit fb5ccc98782f654778cb8d96ba8a998304f9a51f ] + +io_sq_thread() processes sqes by 8 without considering links. As a +result, links will be randomely subdivided. + +The easiest way to fix it is to call io_get_sqring() inside +io_submit_sqes() as do io_ring_submit(). + +Downsides: +1. This removes optimisation of not grabbing mm_struct for fixed files +2. It submitting all sqes in one go, without finer-grained sheduling +with cq processing. + +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + fs/io_uring.c | 58 +++++++++++++++++++++++++++------------------------ + 1 file changed, 31 insertions(+), 27 deletions(-) + +diff --git a/fs/io_uring.c b/fs/io_uring.c +index 79f9c9f7b298e..518042cc6628b 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -687,6 +687,14 @@ static unsigned io_cqring_events(struct io_cq_ring *ring) + return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); + } + ++static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) ++{ ++ struct io_rings *rings = ctx->rings; ++ ++ /* make sure SQ entry isn't read before tail */ ++ return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; ++} ++ + /* + * Find and free completed poll iocbs + */ +@@ -2268,8 +2276,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) + return false; + } + +-static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, +- unsigned int nr, bool has_user, bool mm_fault) ++static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, ++ bool has_user, bool mm_fault) + { + struct io_submit_state state, *statep = NULL; + struct io_kiocb *link = NULL; +@@ -2282,6 +2290,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, + } + + for (i = 0; i < nr; i++) { ++ struct sqe_submit s; ++ ++ if (!io_get_sqring(ctx, &s)) ++ break; ++ + /* + * If previous wasn't linked and we have a linked command, + * that's the end of the chain. Submit the previous link. +@@ -2290,16 +2303,16 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, + io_queue_sqe(ctx, link, &link->submit); + link = NULL; + } +- prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; ++ prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0; + + if (unlikely(mm_fault)) { +- io_cqring_add_event(ctx, sqes[i].sqe->user_data, ++ io_cqring_add_event(ctx, s.sqe->user_data, + -EFAULT); + } else { +- sqes[i].has_user = has_user; +- sqes[i].needs_lock = true; +- sqes[i].needs_fixed_file = true; +- io_submit_sqe(ctx, &sqes[i], statep, &link); ++ s.has_user = has_user; ++ s.needs_lock = true; ++ s.needs_fixed_file = true; ++ io_submit_sqe(ctx, &s, statep, &link); + submitted++; + } + } +@@ -2314,7 +2327,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, + + static int io_sq_thread(void *data) + { +- struct sqe_submit sqes[IO_IOPOLL_BATCH]; + struct io_ring_ctx *ctx = data; + struct mm_struct *cur_mm = NULL; + mm_segment_t old_fs; +@@ -2329,8 +2341,8 @@ static int io_sq_thread(void *data) + + timeout = inflight = 0; + while (!kthread_should_park()) { +- bool all_fixed, mm_fault = false; +- int i; ++ bool mm_fault = false; ++ unsigned int to_submit; + + if (inflight) { + unsigned nr_events = 0; +@@ -2363,7 +2375,8 @@ static int io_sq_thread(void *data) + timeout = jiffies + ctx->sq_thread_idle; + } + +- if (!io_get_sqring(ctx, &sqes[0])) { ++ to_submit = io_sqring_entries(ctx); ++ if (!to_submit) { + /* + * We're polling. If we're within the defined idle + * period, then let us spin without work before going +@@ -2394,7 +2407,8 @@ static int io_sq_thread(void *data) + /* make sure to read SQ tail after writing flags */ + smp_mb(); + +- if (!io_get_sqring(ctx, &sqes[0])) { ++ to_submit = io_sqring_entries(ctx); ++ if (!to_submit) { + if (kthread_should_park()) { + finish_wait(&ctx->sqo_wait, &wait); + break; +@@ -2412,19 +2426,8 @@ static int io_sq_thread(void *data) + ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP; + } + +- i = 0; +- all_fixed = true; +- do { +- if (all_fixed && io_sqe_needs_user(sqes[i].sqe)) +- all_fixed = false; +- +- i++; +- if (i == ARRAY_SIZE(sqes)) +- break; +- } while (io_get_sqring(ctx, &sqes[i])); +- + /* Unless all new commands are FIXED regions, grab mm */ +- if (!all_fixed && !cur_mm) { ++ if (!cur_mm) { + mm_fault = !mmget_not_zero(ctx->sqo_mm); + if (!mm_fault) { + use_mm(ctx->sqo_mm); +@@ -2432,8 +2435,9 @@ static int io_sq_thread(void *data) + } + } + +- inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL, +- mm_fault); ++ to_submit = min(to_submit, ctx->sq_entries); ++ inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL, ++ mm_fault); + + /* Commit SQ ring head once we've consumed all SQEs */ + io_commit_sqring(ctx); +-- +2.20.1 + diff --git a/queue-5.3/io_uring-fix-race-for-sqes-with-userspace.patch b/queue-5.3/io_uring-fix-race-for-sqes-with-userspace.patch new file mode 100644 index 00000000000..398ac51f4f1 --- /dev/null +++ b/queue-5.3/io_uring-fix-race-for-sqes-with-userspace.patch @@ -0,0 +1,45 @@ +From 6de12a9f07aa0ea5a6bc5e2b78f000e2328d6d85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2019 12:31:31 +0300 +Subject: io_uring: Fix race for sqes with userspace + +From: Pavel Begunkov + +[ Upstream commit 935d1e45908afb8853c497f2c2bbbb685dec51dc ] + +io_ring_submit() finalises with +1. io_commit_sqring(), which releases sqes to the userspace +2. Then calls to io_queue_link_head(), accessing released head's sqe + +Reorder them. + +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + fs/io_uring.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/io_uring.c b/fs/io_uring.c +index 518042cc6628b..d447f43d64a24 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2488,13 +2488,14 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) + submit++; + io_submit_sqe(ctx, &s, statep, &link); + } +- io_commit_sqring(ctx); + + if (link) + io_queue_sqe(ctx, link, &link->submit); + if (statep) + io_submit_state_end(statep); + ++ io_commit_sqring(ctx); ++ + return submit; + } + +-- +2.20.1 + diff --git a/queue-5.3/io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch b/queue-5.3/io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch new file mode 100644 index 00000000000..79ac82fb83a --- /dev/null +++ b/queue-5.3/io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch @@ -0,0 +1,78 @@ +From 26b632e6df767c1726f5396a0920372415a215e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2019 10:04:25 -0600 +Subject: io_uring: used cached copies of sq->dropped and cq->overflow + +From: Jens Axboe + +[ Upstream commit 498ccd9eda49117c34e0041563d0da6ac40e52b8 ] + +We currently use the ring values directly, but that can lead to issues +if the application is malicious and changes these values on our behalf. +Created in-kernel cached versions of them, and just overwrite the user +side when we update them. This is similar to how we treat the sq/cq +ring tail/head updates. + +Reported-by: Pavel Begunkov +Reviewed-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + fs/io_uring.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/fs/io_uring.c b/fs/io_uring.c +index d447f43d64a24..3c8906494a8e1 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -221,6 +221,7 @@ struct io_ring_ctx { + unsigned sq_entries; + unsigned sq_mask; + unsigned sq_thread_idle; ++ unsigned cached_sq_dropped; + struct io_uring_sqe *sq_sqes; + + struct list_head defer_list; +@@ -237,6 +238,7 @@ struct io_ring_ctx { + /* CQ ring */ + struct io_cq_ring *cq_ring; + unsigned cached_cq_tail; ++ atomic_t cached_cq_overflow; + unsigned cq_entries; + unsigned cq_mask; + struct wait_queue_head cq_wait; +@@ -431,7 +433,8 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx, + if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) + return false; + +- return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped; ++ return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped ++ + atomic_read(&ctx->cached_cq_overflow); + } + + static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) +@@ -511,9 +514,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, + WRITE_ONCE(cqe->res, res); + WRITE_ONCE(cqe->flags, 0); + } else { +- unsigned overflow = READ_ONCE(ctx->cq_ring->overflow); +- +- WRITE_ONCE(ctx->cq_ring->overflow, overflow + 1); ++ WRITE_ONCE(ctx->cq_ring->overflow, ++ atomic_inc_return(&ctx->cached_cq_overflow)); + } + } + +@@ -2272,7 +2274,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) + + /* drop invalid entries */ + ctx->cached_sq_head++; +- ring->dropped++; ++ ctx->cached_sq_dropped++; ++ WRITE_ONCE(ring->dropped, ctx->cached_sq_dropped); + return false; + } + +-- +2.20.1 + diff --git a/queue-5.3/series b/queue-5.3/series index 343aab80cf3..fdfe0e6a353 100644 --- a/queue-5.3/series +++ b/queue-5.3/series @@ -104,3 +104,6 @@ usb-ldusb-fix-memleak-on-disconnect.patch usb-usblp-fix-use-after-free-on-disconnect.patch usb-ldusb-fix-read-info-leaks.patch binder-don-t-modify-vma-bounds-in-mmap-handler.patch +io_uring-fix-broken-links-with-offloading.patch +io_uring-fix-race-for-sqes-with-userspace.patch +io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch