5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 28 Dec 2020 08:06:42 +0000 (09:06 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 28 Dec 2020 08:06:42 +0000 (09:06 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Dec 2020 08:06:42 +0000 (09:06 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Dec 2020 08:06:42 +0000 (09:06 +0100)
diff --git a/queue-5.10/io_uring-always-let-io_iopoll_complete-complete-polled-io.patch b/queue-5.10/io_uring-always-let-io_iopoll_complete-complete-polled-io.patch

new file mode 100644 (file)

index 0000000..949f0a2
--- /dev/null
+++ b/queue-5.10/io_uring-always-let-io_iopoll_complete-complete-polled-io.patch
@@ -0,0 +1,115 @@
+From dad1b1242fd5717af18ae4ac9d12b9f65849e13a Mon Sep 17 00:00:00 2001
+From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+Date: Sun, 6 Dec 2020 22:22:42 +0000
+Subject: io_uring: always let io_iopoll_complete() complete polled io
+
+From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+
+commit dad1b1242fd5717af18ae4ac9d12b9f65849e13a upstream.
+
+Abaci Fuzz reported a double-free or invalid-free BUG in io_commit_cqring():
+[   95.504842] BUG: KASAN: double-free or invalid-free in io_commit_cqring+0x3ec/0x8e0
+[   95.505921]
+[   95.506225] CPU: 0 PID: 4037 Comm: io_wqe_worker-0 Tainted: G    B
+W         5.10.0-rc5+ #1
+[   95.507434] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+[   95.508248] Call Trace:
+[   95.508683]  dump_stack+0x107/0x163
+[   95.509323]  ? io_commit_cqring+0x3ec/0x8e0
+[   95.509982]  print_address_description.constprop.0+0x3e/0x60
+[   95.510814]  ? vprintk_func+0x98/0x140
+[   95.511399]  ? io_commit_cqring+0x3ec/0x8e0
+[   95.512036]  ? io_commit_cqring+0x3ec/0x8e0
+[   95.512733]  kasan_report_invalid_free+0x51/0x80
+[   95.513431]  ? io_commit_cqring+0x3ec/0x8e0
+[   95.514047]  __kasan_slab_free+0x141/0x160
+[   95.514699]  kfree+0xd1/0x390
+[   95.515182]  io_commit_cqring+0x3ec/0x8e0
+[   95.515799]  __io_req_complete.part.0+0x64/0x90
+[   95.516483]  io_wq_submit_work+0x1fa/0x260
+[   95.517117]  io_worker_handle_work+0xeac/0x1c00
+[   95.517828]  io_wqe_worker+0xc94/0x11a0
+[   95.518438]  ? io_worker_handle_work+0x1c00/0x1c00
+[   95.519151]  ? __kthread_parkme+0x11d/0x1d0
+[   95.519806]  ? io_worker_handle_work+0x1c00/0x1c00
+[   95.520512]  ? io_worker_handle_work+0x1c00/0x1c00
+[   95.521211]  kthread+0x396/0x470
+[   95.521727]  ? _raw_spin_unlock_irq+0x24/0x30
+[   95.522380]  ? kthread_mod_delayed_work+0x180/0x180
+[   95.523108]  ret_from_fork+0x22/0x30
+[   95.523684]
+[   95.523985] Allocated by task 4035:
+[   95.524543]  kasan_save_stack+0x1b/0x40
+[   95.525136]  __kasan_kmalloc.constprop.0+0xc2/0xd0
+[   95.525882]  kmem_cache_alloc_trace+0x17b/0x310
+[   95.533930]  io_queue_sqe+0x225/0xcb0
+[   95.534505]  io_submit_sqes+0x1768/0x25f0
+[   95.535164]  __x64_sys_io_uring_enter+0x89e/0xd10
+[   95.535900]  do_syscall_64+0x33/0x40
+[   95.536465]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[   95.537199]
+[   95.537505] Freed by task 4035:
+[   95.538003]  kasan_save_stack+0x1b/0x40
+[   95.538599]  kasan_set_track+0x1c/0x30
+[   95.539177]  kasan_set_free_info+0x1b/0x30
+[   95.539798]  __kasan_slab_free+0x112/0x160
+[   95.540427]  kfree+0xd1/0x390
+[   95.540910]  io_commit_cqring+0x3ec/0x8e0
+[   95.541516]  io_iopoll_complete+0x914/0x1390
+[   95.542150]  io_do_iopoll+0x580/0x700
+[   95.542724]  io_iopoll_try_reap_events.part.0+0x108/0x200
+[   95.543512]  io_ring_ctx_wait_and_kill+0x118/0x340
+[   95.544206]  io_uring_release+0x43/0x50
+[   95.544791]  __fput+0x28d/0x940
+[   95.545291]  task_work_run+0xea/0x1b0
+[   95.545873]  do_exit+0xb6a/0x2c60
+[   95.546400]  do_group_exit+0x12a/0x320
+[   95.546967]  __x64_sys_exit_group+0x3f/0x50
+[   95.547605]  do_syscall_64+0x33/0x40
+[   95.548155]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+The reason is that once we got a non EAGAIN error in io_wq_submit_work(),
+we'll complete req by calling io_req_complete(), which will hold completion_lock
+to call io_commit_cqring(), but for polled io, io_iopoll_complete() won't
+hold completion_lock to call io_commit_cqring(), then there maybe concurrent
+access to ctx->defer_list, double free may happen.
+
+To fix this bug, we always let io_iopoll_complete() complete polled io.
+
+Cc: <stable@vger.kernel.org> # 5.5+
+Reported-by: Abaci Fuzz <abaci@linux.alibaba.com>
+Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c |   15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6083,8 +6083,19 @@ static struct io_wq_work *io_wq_submit_w
+       }
+ 
+       if (ret) {
+-              req_set_fail_links(req);
+-              io_req_complete(req, ret);
++              /*
++               * io_iopoll_complete() does not hold completion_lock to complete
++               * polled io, so here for polled io, just mark it done and still let
++               * io_iopoll_complete() complete it.
++               */
++              if (req->ctx->flags & IORING_SETUP_IOPOLL) {
++                      struct kiocb *kiocb = &req->rw.kiocb;
++
++                      kiocb_done(kiocb, ret, NULL);
++              } else {
++                      req_set_fail_links(req);
++                      io_req_complete(req, ret);
++              }
+       }
+ 
+       return io_steal_work(req);
diff --git a/queue-5.10/io_uring-fix-0-iov-read-buffer-select.patch b/queue-5.10/io_uring-fix-0-iov-read-buffer-select.patch

new file mode 100644 (file)

index 0000000..01865da
--- /dev/null
+++ b/queue-5.10/io_uring-fix-0-iov-read-buffer-select.patch
@@ -0,0 +1,34 @@
+From dd20166236953c8cd14f4c668bf972af32f0c6be Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Sat, 19 Dec 2020 03:15:43 +0000
+Subject: io_uring: fix 0-iov read buffer select
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit dd20166236953c8cd14f4c668bf972af32f0c6be upstream.
+
+Doing vectored buf-select read with 0 iovec passed is meaningless and
+utterly broken, forbid it.
+
+Cc: <stable@vger.kernel.org> # 5.7+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -3048,9 +3048,7 @@ static ssize_t io_iov_buffer_select(stru
+               iov[0].iov_len = kbuf->len;
+               return 0;
+       }
+-      if (!req->rw.len)
+-              return 0;
+-      else if (req->rw.len > 1)
++      if (req->rw.len != 1)
+               return -EINVAL;
+ 
+ #ifdef CONFIG_COMPAT
diff --git a/queue-5.10/io_uring-fix-double-io_uring-free.patch b/queue-5.10/io_uring-fix-double-io_uring-free.patch

new file mode 100644 (file)

index 0000000..432157f
--- /dev/null
+++ b/queue-5.10/io_uring-fix-double-io_uring-free.patch
@@ -0,0 +1,141 @@
+From 9faadcc8abe4b83d0263216dc3a6321d5bbd616b Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 21 Dec 2020 18:34:05 +0000
+Subject: io_uring: fix double io_uring free
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 9faadcc8abe4b83d0263216dc3a6321d5bbd616b upstream.
+
+Once we created a file for current context during setup, we should not
+call io_ring_ctx_wait_and_kill() directly as it'll be done by fput(file)
+
+Cc: stable@vger.kernel.org # 5.10
+Reported-by: syzbot+c9937dfb2303a5f18640@syzkaller.appspotmail.com
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+[axboe: fix unused 'ret' for !CONFIG_UNIX]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c |   71 +++++++++++++++++++++++++++++++---------------------------
+ 1 file changed, 39 insertions(+), 32 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -9195,55 +9195,52 @@ static int io_allocate_scq_urings(struct
+       return 0;
+ }
+ 
++static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
++{
++      int ret, fd;
++
++      fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
++      if (fd < 0)
++              return fd;
++
++      ret = io_uring_add_task_file(ctx, file);
++      if (ret) {
++              put_unused_fd(fd);
++              return ret;
++      }
++      fd_install(fd, file);
++      return fd;
++}
++
+ /*
+  * Allocate an anonymous fd, this is what constitutes the application
+  * visible backing of an io_uring instance. The application mmaps this
+  * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
+  * we have to tie this fd to a socket for file garbage collection purposes.
+  */
+-static int io_uring_get_fd(struct io_ring_ctx *ctx)
++static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
+ {
+       struct file *file;
++#if defined(CONFIG_UNIX)
+       int ret;
+-      int fd;
+ 
+-#if defined(CONFIG_UNIX)
+       ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
+                               &ctx->ring_sock);
+       if (ret)
+-              return ret;
++              return ERR_PTR(ret);
+ #endif
+ 
+-      ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+-      if (ret < 0)
+-              goto err;
+-      fd = ret;
+-
+       file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
+                                       O_RDWR | O_CLOEXEC);
+-      if (IS_ERR(file)) {
+-              put_unused_fd(fd);
+-              ret = PTR_ERR(file);
+-              goto err;
+-      }
+-
+ #if defined(CONFIG_UNIX)
+-      ctx->ring_sock->file = file;
+-#endif
+-      ret = io_uring_add_task_file(ctx, file);
+-      if (ret) {
+-              fput(file);
+-              put_unused_fd(fd);
+-              goto err;
++      if (IS_ERR(file)) {
++              sock_release(ctx->ring_sock);
++              ctx->ring_sock = NULL;
++      } else {
++              ctx->ring_sock->file = file;
+       }
+-      fd_install(fd, file);
+-      return fd;
+-err:
+-#if defined(CONFIG_UNIX)
+-      sock_release(ctx->ring_sock);
+-      ctx->ring_sock = NULL;
+ #endif
+-      return ret;
++      return file;
+ }
+ 
+ static int io_uring_create(unsigned entries, struct io_uring_params *p,
+@@ -9251,6 +9248,7 @@ static int io_uring_create(unsigned entr
+ {
+       struct user_struct *user = NULL;
+       struct io_ring_ctx *ctx;
++      struct file *file;
+       bool limit_mem;
+       int ret;
+ 
+@@ -9397,13 +9395,22 @@ static int io_uring_create(unsigned entr
+               goto err;
+       }
+ 
++      file = io_uring_get_file(ctx);
++      if (IS_ERR(file)) {
++              ret = PTR_ERR(file);
++              goto err;
++      }
++
+       /*
+        * Install ring fd as the very last thing, so we don't risk someone
+        * having closed it before we finish setup
+        */
+-      ret = io_uring_get_fd(ctx);
+-      if (ret < 0)
+-              goto err;
++      ret = io_uring_install_fd(ctx, file);
++      if (ret < 0) {
++              /* fput will clean it up */
++              fput(file);
++              return ret;
++      }
+ 
+       trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
+       return ret;
diff --git a/queue-5.10/io_uring-fix-ignoring-xa_store-errors.patch b/queue-5.10/io_uring-fix-ignoring-xa_store-errors.patch

new file mode 100644 (file)

index 0000000..2cdbfa0
--- /dev/null
+++ b/queue-5.10/io_uring-fix-ignoring-xa_store-errors.patch
@@ -0,0 +1,49 @@
+From a528b04ea40690ff40501f50d618a62a02b19620 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 21 Dec 2020 18:34:04 +0000
+Subject: io_uring: fix ignoring xa_store errors
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit a528b04ea40690ff40501f50d618a62a02b19620 upstream.
+
+xa_store() may fail, check the result.
+
+Cc: stable@vger.kernel.org # 5.10
+Fixes: 0f2122045b946 ("io_uring: don't rely on weak ->files references")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -8708,10 +8708,9 @@ static void io_uring_cancel_task_request
+ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
+ {
+       struct io_uring_task *tctx = current->io_uring;
++      int ret;
+ 
+       if (unlikely(!tctx)) {
+-              int ret;
+-
+               ret = io_uring_alloc_task_context(current);
+               if (unlikely(ret))
+                       return ret;
+@@ -8722,7 +8721,12 @@ static int io_uring_add_task_file(struct
+ 
+               if (!old) {
+                       get_file(file);
+-                      xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL);
++                      ret = xa_err(xa_store(&tctx->xa, (unsigned long)file,
++                                              file, GFP_KERNEL));
++                      if (ret) {
++                              fput(file);
++                              return ret;
++                      }
+               }
+               tctx->last = file;
+       }
diff --git a/queue-5.10/io_uring-fix-io_cqring_events-s-noflush.patch b/queue-5.10/io_uring-fix-io_cqring_events-s-noflush.patch

new file mode 100644 (file)

index 0000000..9abb402
--- /dev/null
+++ b/queue-5.10/io_uring-fix-io_cqring_events-s-noflush.patch
@@ -0,0 +1,36 @@
+From 59850d226e4907a6f37c1d2fe5ba97546a8691a4 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Sun, 6 Dec 2020 22:22:45 +0000
+Subject: io_uring: fix io_cqring_events()'s noflush
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 59850d226e4907a6f37c1d2fe5ba97546a8691a4 upstream.
+
+Checking !list_empty(&ctx->cq_overflow_list) around noflush in
+io_cqring_events() is racy, because if it fails but a request overflowed
+just after that, io_cqring_overflow_flush() still will be called.
+
+Remove the second check, it shouldn't be a problem for performance,
+because there is cq_check_overflow bit check just above.
+
+Cc: <stable@vger.kernel.org> # 5.5+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -2242,7 +2242,7 @@ static unsigned io_cqring_events(struct
+                * we wake up the task, and the next invocation will flush the
+                * entries. We cannot safely to it from here.
+                */
+-              if (noflush && !list_empty(&ctx->cq_overflow_list))
++              if (noflush)
+                       return -1U;
+ 
+               io_cqring_overflow_flush(ctx, false, NULL, NULL);
diff --git a/queue-5.10/io_uring-fix-io_wqe-work_list-corruption.patch b/queue-5.10/io_uring-fix-io_wqe-work_list-corruption.patch

new file mode 100644 (file)

index 0000000..d931c7d
--- /dev/null
+++ b/queue-5.10/io_uring-fix-io_wqe-work_list-corruption.patch
@@ -0,0 +1,67 @@
+From 0020ef04e48571a88d4f482ad08f71052c5c5a08 Mon Sep 17 00:00:00 2001
+From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+Date: Fri, 18 Dec 2020 15:26:48 +0800
+Subject: io_uring: fix io_wqe->work_list corruption
+
+From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+
+commit 0020ef04e48571a88d4f482ad08f71052c5c5a08 upstream.
+
+For the first time a req punted to io-wq, we'll initialize io_wq_work's
+list to be NULL, then insert req to io_wqe->work_list. If this req is not
+inserted into tail of io_wqe->work_list, this req's io_wq_work list will
+point to another req's io_wq_work. For splitted bio case, this req maybe
+inserted to io_wqe->work_list repeatedly, once we insert it to tail of
+io_wqe->work_list for the second time, now io_wq_work->list->next will be
+invalid pointer, which then result in many strang error, panic, kernel
+soft-lockup, rcu stall, etc.
+
+In my vm, kernel doest not have commit cc29e1bf0d63f7 ("block: disable
+iopoll for split bio"), below fio job can reproduce this bug steadily:
+[global]
+name=iouring-sqpoll-iopoll-1
+ioengine=io_uring
+iodepth=128
+numjobs=1
+thread
+rw=randread
+direct=1
+registerfiles=1
+hipri=1
+bs=4m
+size=100M
+runtime=120
+time_based
+group_reporting
+randrepeat=0
+
+[device]
+directory=/home/feiman.wxg/mntpoint/  # an ext4 mount point
+
+If we have commit cc29e1bf0d63f7 ("block: disable iopoll for split bio"),
+there will no splitted bio case for polled io, but I think we still to need
+to fix this list corruption, it also should maybe go to stable branchs.
+
+To fix this corruption, if a req is inserted into tail of io_wqe->work_list,
+initialize req->io_wq_work->list->next to bu NULL.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io-wq.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/io-wq.h
++++ b/fs/io-wq.h
+@@ -59,6 +59,7 @@ static inline void wq_list_add_tail(stru
+               list->last->next = node;
+               list->last = node;
+       }
++      node->next = NULL;
+ }
+ 
+ static inline void wq_list_cut(struct io_wq_work_list *list,
diff --git a/queue-5.10/io_uring-fix-racy-iopoll-completions.patch b/queue-5.10/io_uring-fix-racy-iopoll-completions.patch

new file mode 100644 (file)

index 0000000..9e0e4ad
--- /dev/null
+++ b/queue-5.10/io_uring-fix-racy-iopoll-completions.patch
@@ -0,0 +1,64 @@
+From 31bff9a51b264df6d144931a6a5f1d6cc815ed4b Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Sun, 6 Dec 2020 22:22:43 +0000
+Subject: io_uring: fix racy IOPOLL completions
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 31bff9a51b264df6d144931a6a5f1d6cc815ed4b upstream.
+
+IOPOLL allows buffer remove/provide requests, but they doesn't
+synchronise by rules of IOPOLL, namely it have to hold uring_lock.
+
+Cc: <stable@vger.kernel.org> # 5.7+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c |   23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -3944,11 +3944,17 @@ static int io_remove_buffers(struct io_k
+       head = idr_find(&ctx->io_buffer_idr, p->bgid);
+       if (head)
+               ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
+-
+-      io_ring_submit_lock(ctx, !force_nonblock);
+       if (ret < 0)
+               req_set_fail_links(req);
+-      __io_req_complete(req, ret, 0, cs);
++
++      /* need to hold the lock to complete IOPOLL requests */
++      if (ctx->flags & IORING_SETUP_IOPOLL) {
++              __io_req_complete(req, ret, 0, cs);
++              io_ring_submit_unlock(ctx, !force_nonblock);
++      } else {
++              io_ring_submit_unlock(ctx, !force_nonblock);
++              __io_req_complete(req, ret, 0, cs);
++      }
+       return 0;
+ }
+ 
+@@ -4033,10 +4039,17 @@ static int io_provide_buffers(struct io_
+               }
+       }
+ out:
+-      io_ring_submit_unlock(ctx, !force_nonblock);
+       if (ret < 0)
+               req_set_fail_links(req);
+-      __io_req_complete(req, ret, 0, cs);
++
++      /* need to hold the lock to complete IOPOLL requests */
++      if (ctx->flags & IORING_SETUP_IOPOLL) {
++              __io_req_complete(req, ret, 0, cs);
++              io_ring_submit_unlock(ctx, !force_nonblock);
++      } else {
++              io_ring_submit_unlock(ctx, !force_nonblock);
++              __io_req_complete(req, ret, 0, cs);
++      }
+       return 0;
+ }
+ 
diff --git a/queue-5.10/io_uring-hold-uring_lock-while-completing-failed-polled-io-in-io_wq_submit_work.patch b/queue-5.10/io_uring-hold-uring_lock-while-completing-failed-polled-io-in-io_wq_submit_work.patch

new file mode 100644 (file)

index 0000000..87f25b7
--- /dev/null
+++ b/queue-5.10/io_uring-hold-uring_lock-while-completing-failed-polled-io-in-io_wq_submit_work.patch
@@ -0,0 +1,74 @@
+From c07e6719511e77c4b289f62bfe96423eb6ea061d Mon Sep 17 00:00:00 2001
+From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+Date: Mon, 14 Dec 2020 23:49:41 +0800
+Subject: io_uring: hold uring_lock while completing failed polled io in io_wq_submit_work()
+
+From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+
+commit c07e6719511e77c4b289f62bfe96423eb6ea061d upstream.
+
+io_iopoll_complete() does not hold completion_lock to complete polled io,
+so in io_wq_submit_work(), we can not call io_req_complete() directly, to
+complete polled io, otherwise there maybe concurrent access to cqring,
+defer_list, etc, which is not safe. Commit dad1b1242fd5 ("io_uring: always
+let io_iopoll_complete() complete polled io") has fixed this issue, but
+Pavel reported that IOPOLL apart from rw can do buf reg/unreg requests(
+IORING_OP_PROVIDE_BUFFERS or IORING_OP_REMOVE_BUFFERS), so the fix is not
+good.
+
+Given that io_iopoll_complete() is always called under uring_lock, so here
+for polled io, we can also get uring_lock to fix this issue.
+
+Fixes: dad1b1242fd5 ("io_uring: always let io_iopoll_complete() complete polled io")
+Cc: <stable@vger.kernel.org> # 5.5+
+Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+[axboe: don't deref 'req' after completing it']
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c |   29 +++++++++++++++++++----------
+ 1 file changed, 19 insertions(+), 10 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6081,19 +6081,28 @@ static struct io_wq_work *io_wq_submit_w
+       }
+ 
+       if (ret) {
++              struct io_ring_ctx *lock_ctx = NULL;
++
++              if (req->ctx->flags & IORING_SETUP_IOPOLL)
++                      lock_ctx = req->ctx;
++
+               /*
+-               * io_iopoll_complete() does not hold completion_lock to complete
+-               * polled io, so here for polled io, just mark it done and still let
+-               * io_iopoll_complete() complete it.
++               * io_iopoll_complete() does not hold completion_lock to
++               * complete polled io, so here for polled io, we can not call
++               * io_req_complete() directly, otherwise there maybe concurrent
++               * access to cqring, defer_list, etc, which is not safe. Given
++               * that io_iopoll_complete() is always called under uring_lock,
++               * so here for polled io, we also get uring_lock to complete
++               * it.
+                */
+-              if (req->ctx->flags & IORING_SETUP_IOPOLL) {
+-                      struct kiocb *kiocb = &req->rw.kiocb;
++              if (lock_ctx)
++                      mutex_lock(&lock_ctx->uring_lock);
++
++              req_set_fail_links(req);
++              io_req_complete(req, ret);
+ 
+-                      kiocb_done(kiocb, ret, NULL);
+-              } else {
+-                      req_set_fail_links(req);
+-                      io_req_complete(req, ret);
+-              }
++              if (lock_ctx)
++                      mutex_unlock(&lock_ctx->uring_lock);
+       }
+ 
+       return io_steal_work(req);
diff --git a/queue-5.10/io_uring-make-ctx-cancel-on-exit-targeted-to-actual-ctx.patch b/queue-5.10/io_uring-make-ctx-cancel-on-exit-targeted-to-actual-ctx.patch

new file mode 100644 (file)

index 0000000..192ec39
--- /dev/null
+++ b/queue-5.10/io_uring-make-ctx-cancel-on-exit-targeted-to-actual-ctx.patch
@@ -0,0 +1,47 @@
+From 00c18640c2430c4bafaaeede1f9dd6f7ec0e4b25 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 20 Dec 2020 10:45:02 -0700
+Subject: io_uring: make ctx cancel on exit targeted to actual ctx
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 00c18640c2430c4bafaaeede1f9dd6f7ec0e4b25 upstream.
+
+Before IORING_SETUP_ATTACH_WQ, we could just cancel everything on the
+io-wq when exiting. But that's not the case if they are shared, so
+cancel for the specific ctx instead.
+
+Cc: stable@vger.kernel.org
+Fixes: 24369c2e3bb0 ("io_uring: add io-wq workqueue sharing")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -8401,6 +8401,13 @@ static void io_ring_exit_work(struct wor
+       io_ring_ctx_free(ctx);
+ }
+ 
++static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
++{
++      struct io_kiocb *req = container_of(work, struct io_kiocb, work);
++
++      return req->ctx == data;
++}
++
+ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+ {
+       mutex_lock(&ctx->uring_lock);
+@@ -8415,7 +8422,7 @@ static void io_ring_ctx_wait_and_kill(st
+       io_poll_remove_all(ctx, NULL);
+ 
+       if (ctx->io_wq)
+-              io_wq_cancel_all(ctx->io_wq);
++              io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true);
+ 
+       /* if we failed setting up the ctx, we might not have any rings */
+       io_iopoll_try_reap_events(ctx);
diff --git a/queue-5.10/media-gspca-fix-memory-leak-in-probe.patch b/queue-5.10/media-gspca-fix-memory-leak-in-probe.patch

new file mode 100644 (file)

index 0000000..89431a6
--- /dev/null
+++ b/queue-5.10/media-gspca-fix-memory-leak-in-probe.patch
@@ -0,0 +1,40 @@
+From e469d0b09a19496e1972a20974bbf55b728151eb Mon Sep 17 00:00:00 2001
+From: Alan Stern <stern@rowland.harvard.edu>
+Date: Wed, 2 Dec 2020 18:20:04 +0100
+Subject: media: gspca: Fix memory leak in probe
+
+From: Alan Stern <stern@rowland.harvard.edu>
+
+commit e469d0b09a19496e1972a20974bbf55b728151eb upstream.
+
+The gspca driver leaks memory when a probe fails.  gspca_dev_probe2()
+calls v4l2_device_register(), which takes a reference to the
+underlying device node (in this case, a USB interface).  But the
+failure pathway neglects to call v4l2_device_unregister(), the routine
+responsible for dropping this reference.  Consequently the memory for
+the USB interface and its device never gets released.
+
+This patch adds the missing function call.
+
+Reported-and-tested-by: syzbot+44e64397bd81d5e84cba@syzkaller.appspotmail.com
+
+Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
+CC: <stable@vger.kernel.org>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/media/usb/gspca/gspca.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/media/usb/gspca/gspca.c
++++ b/drivers/media/usb/gspca/gspca.c
+@@ -1575,6 +1575,7 @@ out:
+               input_unregister_device(gspca_dev->input_dev);
+ #endif
+       v4l2_ctrl_handler_free(gspca_dev->vdev.ctrl_handler);
++      v4l2_device_unregister(&gspca_dev->v4l2_dev);
+       kfree(gspca_dev->usb_buf);
+       kfree(gspca_dev);
+       return ret;
diff --git a/queue-5.10/series b/queue-5.10/series

index 4cce9ca480b7bb26ca5a01a3f5c0c6ea6ab27232..b1a446ed318193c49ffab6b21f51dccd339ed21a 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -507,3 +507,15 @@ io_uring-fix-racy-iopoll-flush-overflow.patch
  io_uring-cancel-reqs-shouldn-t-kill-overflow-list.patch
  smack-handle-io_uring-kernel-thread-privileges.patch
  proc-mountinfo-make-splice-available-again.patch
+io_uring-fix-io_cqring_events-s-noflush.patch
+io_uring-fix-racy-iopoll-completions.patch
+io_uring-always-let-io_iopoll_complete-complete-polled-io.patch
+vfio-pci-move-dummy_resources_list-init-in-vfio_pci_probe.patch
+vfio-pci-nvlink2-do-not-attempt-npu2-setup-on-power8nvl-npu.patch
+media-gspca-fix-memory-leak-in-probe.patch
+io_uring-fix-io_wqe-work_list-corruption.patch
+io_uring-fix-0-iov-read-buffer-select.patch
+io_uring-hold-uring_lock-while-completing-failed-polled-io-in-io_wq_submit_work.patch
+io_uring-fix-ignoring-xa_store-errors.patch
+io_uring-fix-double-io_uring-free.patch
+io_uring-make-ctx-cancel-on-exit-targeted-to-actual-ctx.patch
diff --git a/queue-5.10/vfio-pci-move-dummy_resources_list-init-in-vfio_pci_probe.patch b/queue-5.10/vfio-pci-move-dummy_resources_list-init-in-vfio_pci_probe.patch

new file mode 100644 (file)

index 0000000..0c1e810
--- /dev/null
+++ b/queue-5.10/vfio-pci-move-dummy_resources_list-init-in-vfio_pci_probe.patch
@@ -0,0 +1,45 @@
+From 16b8fe4caf499ae8e12d2ab1b1324497e36a7b83 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Fri, 13 Nov 2020 18:52:02 +0100
+Subject: vfio/pci: Move dummy_resources_list init in vfio_pci_probe()
+
+From: Eric Auger <eric.auger@redhat.com>
+
+commit 16b8fe4caf499ae8e12d2ab1b1324497e36a7b83 upstream.
+
+In case an error occurs in vfio_pci_enable() before the call to
+vfio_pci_probe_mmaps(), vfio_pci_disable() will  try to iterate
+on an uninitialized list and cause a kernel panic.
+
+Lets move to the initialization to vfio_pci_probe() to fix the
+issue.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Fixes: 05f0c03fbac1 ("vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive")
+CC: Stable <stable@vger.kernel.org> # v4.7+
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/vfio/pci/vfio_pci.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/vfio/pci/vfio_pci.c
++++ b/drivers/vfio/pci/vfio_pci.c
+@@ -161,8 +161,6 @@ static void vfio_pci_probe_mmaps(struct
+       int i;
+       struct vfio_pci_dummy_resource *dummy_res;
+ 
+-      INIT_LIST_HEAD(&vdev->dummy_resources_list);
+-
+       for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+               int bar = i + PCI_STD_RESOURCES;
+ 
+@@ -1966,6 +1964,7 @@ static int vfio_pci_probe(struct pci_dev
+       mutex_init(&vdev->igate);
+       spin_lock_init(&vdev->irqlock);
+       mutex_init(&vdev->ioeventfds_lock);
++      INIT_LIST_HEAD(&vdev->dummy_resources_list);
+       INIT_LIST_HEAD(&vdev->ioeventfds_list);
+       mutex_init(&vdev->vma_lock);
+       INIT_LIST_HEAD(&vdev->vma_list);
diff --git a/queue-5.10/vfio-pci-nvlink2-do-not-attempt-npu2-setup-on-power8nvl-npu.patch b/queue-5.10/vfio-pci-nvlink2-do-not-attempt-npu2-setup-on-power8nvl-npu.patch

new file mode 100644 (file)

index 0000000..d2f21a5
--- /dev/null
+++ b/queue-5.10/vfio-pci-nvlink2-do-not-attempt-npu2-setup-on-power8nvl-npu.patch
@@ -0,0 +1,65 @@
+From d22f9a6c92de96304c81792942ae7c306f08ac77 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Sun, 22 Nov 2020 18:39:50 +1100
+Subject: vfio/pci/nvlink2: Do not attempt NPU2 setup on POWER8NVL NPU
+
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+
+commit d22f9a6c92de96304c81792942ae7c306f08ac77 upstream.
+
+We execute certain NPU2 setup code (such as mapping an LPID to a device
+in NPU2) unconditionally if an Nvlink bridge is detected. However this
+cannot succeed on POWER8NVL machines as the init helpers return an error
+other than ENODEV which means the device is there is and setup failed so
+vfio_pci_enable() fails and pass through is not possible.
+
+This changes the two NPU2 related init helpers to return -ENODEV if
+there is no "memory-region" device tree property as this is
+the distinction between NPU and NPU2.
+
+Tested on
+- POWER9 pvr=004e1201, Ubuntu 19.04 host, Ubuntu 18.04 vm,
+  NVIDIA GV100 10de:1db1 driver 418.39
+- POWER8 pvr=004c0100, RHEL 7.6 host, Ubuntu 16.10 vm,
+  NVIDIA P100 10de:15f9 driver 396.47
+
+Fixes: 7f92891778df ("vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver")
+Cc: stable@vger.kernel.org # 5.0
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/vfio/pci/vfio_pci_nvlink2.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/vfio/pci/vfio_pci_nvlink2.c
++++ b/drivers/vfio/pci/vfio_pci_nvlink2.c
+@@ -231,7 +231,7 @@ int vfio_pci_nvdia_v100_nvlink2_init(str
+               return -EINVAL;
+ 
+       if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
+-              return -EINVAL;
++              return -ENODEV;
+ 
+       mem_node = of_find_node_by_phandle(mem_phandle);
+       if (!mem_node)
+@@ -393,7 +393,7 @@ int vfio_pci_ibm_npu2_init(struct vfio_p
+       int ret;
+       struct vfio_pci_npu2_data *data;
+       struct device_node *nvlink_dn;
+-      u32 nvlink_index = 0;
++      u32 nvlink_index = 0, mem_phandle = 0;
+       struct pci_dev *npdev = vdev->pdev;
+       struct device_node *npu_node = pci_device_to_OF_node(npdev);
+       struct pci_controller *hose = pci_bus_to_host(npdev->bus);
+@@ -408,6 +408,9 @@ int vfio_pci_ibm_npu2_init(struct vfio_p
+       if (!pnv_pci_get_gpu_dev(vdev->pdev))
+               return -ENODEV;
+ 
++      if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
++              return -ENODEV;
++
+       /*
+        * NPU2 normally has 8 ATSD registers (for concurrency) and 6 links
+        * so we can allocate one register per link, using nvlink index as
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 28 Dec 2020 08:06:42 +0000 (09:06 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 28 Dec 2020 08:06:42 +0000 (09:06 +0100)
queue-5.10/io_uring-always-let-io_iopoll_complete-complete-polled-io.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-fix-0-iov-read-buffer-select.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-fix-double-io_uring-free.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-fix-ignoring-xa_store-errors.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-fix-io_cqring_events-s-noflush.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-fix-io_wqe-work_list-corruption.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-fix-racy-iopoll-completions.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-hold-uring_lock-while-completing-failed-polled-io-in-io_wq_submit_work.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/io_uring-make-ctx-cancel-on-exit-targeted-to-actual-ctx.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/media-gspca-fix-memory-leak-in-probe.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history
queue-5.10/vfio-pci-move-dummy_resources_list-init-in-vfio_pci_probe.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/vfio-pci-nvlink2-do-not-attempt-npu2-setup-on-power8nvl-npu.patch	[new file with mode: 0644]	patch \| blob