--- /dev/null
+From c0e48f9dea9129aa11bec3ed13803bcc26e96e49 Mon Sep 17 00:00:00 2001
+From: Zhengyuan Liu <liuzhengyuan@kylinos.cn>
+Date: Thu, 18 Jul 2019 20:44:00 +0800
+Subject: io_uring: add a memory barrier before atomic_read
+
+From: Zhengyuan Liu <liuzhengyuan@kylinos.cn>
+
+commit c0e48f9dea9129aa11bec3ed13803bcc26e96e49 upstream.
+
+There is a hang issue while using fio to do some basic test. The issue
+can be easily reproduced using the below script:
+
+ while true
+ do
+ fio --ioengine=io_uring -rw=write -bs=4k -numjobs=1 \
+ -size=1G -iodepth=64 -name=uring --filename=/dev/zero
+ done
+
+After several minutes (or more), fio would block at
+io_uring_enter->io_cqring_wait in order to waiting for previously
+committed sqes to be completed and can't return to user anymore until
+we send a SIGTERM to fio. After receiving SIGTERM, fio hangs at
+io_ring_ctx_wait_and_kill with a backtrace like this:
+
+ [54133.243816] Call Trace:
+ [54133.243842] __schedule+0x3a0/0x790
+ [54133.243868] schedule+0x38/0xa0
+ [54133.243880] schedule_timeout+0x218/0x3b0
+ [54133.243891] ? sched_clock+0x9/0x10
+ [54133.243903] ? wait_for_completion+0xa3/0x130
+ [54133.243916] ? _raw_spin_unlock_irq+0x2c/0x40
+ [54133.243930] ? trace_hardirqs_on+0x3f/0xe0
+ [54133.243951] wait_for_completion+0xab/0x130
+ [54133.243962] ? wake_up_q+0x70/0x70
+ [54133.243984] io_ring_ctx_wait_and_kill+0xa0/0x1d0
+ [54133.243998] io_uring_release+0x20/0x30
+ [54133.244008] __fput+0xcf/0x270
+ [54133.244029] ____fput+0xe/0x10
+ [54133.244040] task_work_run+0x7f/0xa0
+ [54133.244056] do_exit+0x305/0xc40
+ [54133.244067] ? get_signal+0x13b/0xbd0
+ [54133.244088] do_group_exit+0x50/0xd0
+ [54133.244103] get_signal+0x18d/0xbd0
+ [54133.244112] ? _raw_spin_unlock_irqrestore+0x36/0x60
+ [54133.244142] do_signal+0x34/0x720
+ [54133.244171] ? exit_to_usermode_loop+0x7e/0x130
+ [54133.244190] exit_to_usermode_loop+0xc0/0x130
+ [54133.244209] do_syscall_64+0x16b/0x1d0
+ [54133.244221] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+The reason is that we had added a req to ctx->pending_async at the very
+end, but it didn't get a chance to be processed. How could this happen?
+
+ fio#cpu0 wq#cpu1
+
+ io_add_to_prev_work io_sq_wq_submit_work
+
+ atomic_read() <<< 1
+
+ atomic_dec_return() << 1->0
+ list_empty(); <<< true;
+
+ list_add_tail()
+ atomic_read() << 0 or 1?
+
+As atomic_ops.rst states, atomic_read does not guarantee that the
+runtime modification by any other thread is visible yet, so we must take
+care of that with a proper implicit or explicit memory barrier.
+
+This issue was detected with the help of Jackie's <liuyun01@kylinos.cn>
+
+Fixes: 31b515106428 ("io_uring: allow workqueue item to handle multiple buffered requests")
+Signed-off-by: Zhengyuan Liu <liuzhengyuan@kylinos.cn>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1769,6 +1769,10 @@ static bool io_add_to_prev_work(struct a
+ ret = true;
+ spin_lock(&list->lock);
+ list_add_tail(&req->list, &list->list);
++ /*
++ * Ensure we see a simultaneous modification from io_sq_wq_submit_work()
++ */
++ smp_mb();
+ if (!atomic_read(&list->cnt)) {
+ list_del_init(&req->list);
+ ret = false;
--- /dev/null
+From bd11b3a391e3df6fa958facbe4b3f9f4cca9bd49 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sat, 20 Jul 2019 08:37:31 -0600
+Subject: io_uring: don't use iov_iter_advance() for fixed buffers
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit bd11b3a391e3df6fa958facbe4b3f9f4cca9bd49 upstream.
+
+Hrvoje reports that when a large fixed buffer is registered and IO is
+being done to the latter pages of said buffer, the IO submission time
+is much worse:
+
+reading to the start of the buffer: 11238 ns
+reading to the end of the buffer: 1039879 ns
+
+In fact, it's worse by two orders of magnitude. The reason for that is
+how io_uring figures out how to setup the iov_iter. We point the iter
+at the first bvec, and then use iov_iter_advance() to fast-forward to
+the offset within that buffer we need.
+
+However, that is abysmally slow, as it entails iterating the bvecs
+that we setup as part of buffer registration. There's really no need
+to use this generic helper, as we know it's a BVEC type iterator, and
+we also know that each bvec is PAGE_SIZE in size, apart from possibly
+the first and last. Hence we can just use a shift on the offset to
+find the right index, and then adjust the iov_iter appropriately.
+After this fix, the timings are:
+
+reading to the start of the buffer: 10135 ns
+reading to the end of the buffer: 1377 ns
+
+Or about an 755x improvement for the tail page.
+
+Reported-by: Hrvoje Zeba <zeba.hrvoje@gmail.com>
+Tested-by: Hrvoje Zeba <zeba.hrvoje@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/io_uring.c | 39 +++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 37 insertions(+), 2 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1001,8 +1001,43 @@ static int io_import_fixed(struct io_rin
+ */
+ offset = buf_addr - imu->ubuf;
+ iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
+- if (offset)
+- iov_iter_advance(iter, offset);
++
++ if (offset) {
++ /*
++ * Don't use iov_iter_advance() here, as it's really slow for
++ * using the latter parts of a big fixed buffer - it iterates
++ * over each segment manually. We can cheat a bit here, because
++ * we know that:
++ *
++ * 1) it's a BVEC iter, we set it up
++ * 2) all bvecs are PAGE_SIZE in size, except potentially the
++ * first and last bvec
++ *
++ * So just find our index, and adjust the iterator afterwards.
++ * If the offset is within the first bvec (or the whole first
++ * bvec, just use iov_iter_advance(). This makes it easier
++ * since we can just skip the first segment, which may not
++ * be PAGE_SIZE aligned.
++ */
++ const struct bio_vec *bvec = imu->bvec;
++
++ if (offset <= bvec->bv_len) {
++ iov_iter_advance(iter, offset);
++ } else {
++ unsigned long seg_skip;
++
++ /* skip first vec */
++ offset -= bvec->bv_len;
++ seg_skip = 1 + (offset >> PAGE_SHIFT);
++
++ iter->bvec = bvec + seg_skip;
++ iter->nr_segs -= seg_skip;
++ iter->count -= (seg_skip << PAGE_SHIFT);
++ iter->iov_offset = offset & ~PAGE_MASK;
++ if (iter->iov_offset)
++ iter->count -= iter->iov_offset;
++ }
++ }
+
+ /* don't drop a reference to these pages */
+ iter->type |= ITER_BVEC_FLAG_NO_REF;
--- /dev/null
+From 36703247d5f52a679df9da51192b6950fe81689f Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Thu, 25 Jul 2019 10:20:18 -0600
+Subject: io_uring: ensure ->list is initialized for poll commands
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 36703247d5f52a679df9da51192b6950fe81689f upstream.
+
+Daniel reports that when testing an http server that uses io_uring
+to poll for incoming connections, sometimes it hard crashes. This is
+due to an uninitialized list member for the io_uring request. Normally
+this doesn't trigger and none of the test cases caught it.
+
+Reported-by: Daniel Kozak <kozzi11@gmail.com>
+Tested-by: Daniel Kozak <kozzi11@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1489,6 +1489,8 @@ static int io_poll_add(struct io_kiocb *
+ INIT_LIST_HEAD(&poll->wait.entry);
+ init_waitqueue_func_entry(&poll->wait, io_poll_wake);
+
++ INIT_LIST_HEAD(&req->list);
++
+ mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
+
+ spin_lock_irq(&ctx->completion_lock);
--- /dev/null
+From f7b76ac9d17e16e44feebb6d2749fec92bfd6dd4 Mon Sep 17 00:00:00 2001
+From: Zhengyuan Liu <liuzhengyuan@kylinos.cn>
+Date: Tue, 16 Jul 2019 23:26:14 +0800
+Subject: io_uring: fix counter inc/dec mismatch in async_list
+
+From: Zhengyuan Liu <liuzhengyuan@kylinos.cn>
+
+commit f7b76ac9d17e16e44feebb6d2749fec92bfd6dd4 upstream.
+
+We could queue a work for each req in defer and link list without
+increasing async_list->cnt, so we shouldn't decrease it while exiting
+from workqueue as well if we didn't process the req in async list.
+
+Thanks to Jens Axboe <axboe@kernel.dk> for his guidance.
+
+Fixes: 31b515106428 ("io_uring: allow workqueue item to handle multiple buffered requests")
+Signed-off-by: Zhengyuan Liu <liuzhengyuan@kylinos.cn>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/io_uring.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -331,6 +331,9 @@ struct io_kiocb {
+ #define REQ_F_SEQ_PREV 8 /* sequential with previous */
+ #define REQ_F_IO_DRAIN 16 /* drain existing IO first */
+ #define REQ_F_IO_DRAINED 32 /* drain done */
++#define REQ_F_LINK 64 /* linked sqes */
++#define REQ_F_LINK_DONE 128 /* linked sqes done */
++#define REQ_F_FAIL_LINK 256 /* fail rest of links */
+ u64 user_data;
+ u32 error; /* iopoll result from callback */
+ u32 sequence;
+@@ -1698,6 +1701,10 @@ restart:
+ /* async context always use a copy of the sqe */
+ kfree(sqe);
+
++ /* req from defer and link list needn't decrease async cnt */
++ if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
++ goto out;
++
+ if (!async_list)
+ break;
+ if (!list_empty(&req_list)) {
+@@ -1745,6 +1752,7 @@ restart:
+ }
+ }
+
++out:
+ if (cur_mm) {
+ set_fs(old_fs);
+ unuse_mm(cur_mm);
structleak-disable-structleak_byref-in-combination-with-kasan_stack.patch
drm-i915-make-the-semaphore-saturation-mask-global.patch
access-avoid-the-rcu-grace-period-for-the-temporary-subjective-credentials.patch
+io_uring-add-a-memory-barrier-before-atomic_read.patch
+io_uring-ensure-list-is-initialized-for-poll-commands.patch
+io_uring-fix-counter-inc-dec-mismatch-in-async_list.patch
+io_uring-don-t-use-iov_iter_advance-for-fixed-buffers.patch