]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Oct 2020 12:18:17 +0000 (13:18 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Oct 2020 12:18:17 +0000 (13:18 +0100)
added patches:
io-wq-fix-use-after-free-in-io_wq_worker_running.patch
io_uring-allow-timeout-poll-files-killing-to-take-task-into-account.patch
io_uring-convert-advanced-xarray-uses-to-the-normal-api.patch
io_uring-don-t-rely-on-weak-files-references.patch
io_uring-don-t-run-task-work-on-an-exiting-task.patch
io_uring-enable-task-files-specific-overflow-flushing.patch
io_uring-fix-use-of-xarray-in-__io_uring_files_cancel.patch
io_uring-fix-xarray-usage-in-io_uring_add_task_file.patch
io_uring-move-dropping-of-files-into-separate-helper.patch
io_uring-no-need-to-call-xa_destroy-on-empty-xarray.patch
io_uring-reference-nsproxy-for-file-table-commands.patch
io_uring-return-cancelation-status-from-poll-timeout-files-handlers.patch
io_uring-stash-ctx-task-reference-for-sqpoll.patch
io_uring-unconditionally-grab-req-task.patch
io_wq-make-io_wqe-lock-a-raw_spinlock_t.patch

16 files changed:
queue-5.8/io-wq-fix-use-after-free-in-io_wq_worker_running.patch [new file with mode: 0644]
queue-5.8/io_uring-allow-timeout-poll-files-killing-to-take-task-into-account.patch [new file with mode: 0644]
queue-5.8/io_uring-convert-advanced-xarray-uses-to-the-normal-api.patch [new file with mode: 0644]
queue-5.8/io_uring-don-t-rely-on-weak-files-references.patch [new file with mode: 0644]
queue-5.8/io_uring-don-t-run-task-work-on-an-exiting-task.patch [new file with mode: 0644]
queue-5.8/io_uring-enable-task-files-specific-overflow-flushing.patch [new file with mode: 0644]
queue-5.8/io_uring-fix-use-of-xarray-in-__io_uring_files_cancel.patch [new file with mode: 0644]
queue-5.8/io_uring-fix-xarray-usage-in-io_uring_add_task_file.patch [new file with mode: 0644]
queue-5.8/io_uring-move-dropping-of-files-into-separate-helper.patch [new file with mode: 0644]
queue-5.8/io_uring-no-need-to-call-xa_destroy-on-empty-xarray.patch [new file with mode: 0644]
queue-5.8/io_uring-reference-nsproxy-for-file-table-commands.patch [new file with mode: 0644]
queue-5.8/io_uring-return-cancelation-status-from-poll-timeout-files-handlers.patch [new file with mode: 0644]
queue-5.8/io_uring-stash-ctx-task-reference-for-sqpoll.patch [new file with mode: 0644]
queue-5.8/io_uring-unconditionally-grab-req-task.patch [new file with mode: 0644]
queue-5.8/io_wq-make-io_wqe-lock-a-raw_spinlock_t.patch [new file with mode: 0644]
queue-5.8/series

diff --git a/queue-5.8/io-wq-fix-use-after-free-in-io_wq_worker_running.patch b/queue-5.8/io-wq-fix-use-after-free-in-io_wq_worker_running.patch
new file mode 100644 (file)
index 0000000..d6b4164
--- /dev/null
@@ -0,0 +1,310 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Hillf Danton <hdanton@sina.com>
+Date: Sat, 26 Sep 2020 21:26:55 +0800
+Subject: io-wq: fix use-after-free in io_wq_worker_running
+
+From: Hillf Danton <hdanton@sina.com>
+
+commit c4068bf898ddaef791049a366828d9b84b467bda upstream.
+
+The smart syzbot has found a reproducer for the following issue:
+
+ ==================================================================
+ BUG: KASAN: use-after-free in instrument_atomic_write include/linux/instrumented.h:71 [inline]
+ BUG: KASAN: use-after-free in atomic_inc include/asm-generic/atomic-instrumented.h:240 [inline]
+ BUG: KASAN: use-after-free in io_wqe_inc_running fs/io-wq.c:301 [inline]
+ BUG: KASAN: use-after-free in io_wq_worker_running+0xde/0x110 fs/io-wq.c:613
+ Write of size 4 at addr ffff8882183db08c by task io_wqe_worker-0/7771
+
+ CPU: 0 PID: 7771 Comm: io_wqe_worker-0 Not tainted 5.9.0-rc4-syzkaller #0
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+ Call Trace:
+  __dump_stack lib/dump_stack.c:77 [inline]
+  dump_stack+0x198/0x1fd lib/dump_stack.c:118
+  print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383
+  __kasan_report mm/kasan/report.c:513 [inline]
+  kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
+  check_memory_region_inline mm/kasan/generic.c:186 [inline]
+  check_memory_region+0x13d/0x180 mm/kasan/generic.c:192
+  instrument_atomic_write include/linux/instrumented.h:71 [inline]
+  atomic_inc include/asm-generic/atomic-instrumented.h:240 [inline]
+  io_wqe_inc_running fs/io-wq.c:301 [inline]
+  io_wq_worker_running+0xde/0x110 fs/io-wq.c:613
+  schedule_timeout+0x148/0x250 kernel/time/timer.c:1879
+  io_wqe_worker+0x517/0x10e0 fs/io-wq.c:580
+  kthread+0x3b5/0x4a0 kernel/kthread.c:292
+  ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
+
+ Allocated by task 7768:
+  kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
+  kasan_set_track mm/kasan/common.c:56 [inline]
+  __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461
+  kmem_cache_alloc_node_trace+0x17b/0x3f0 mm/slab.c:3594
+  kmalloc_node include/linux/slab.h:572 [inline]
+  kzalloc_node include/linux/slab.h:677 [inline]
+  io_wq_create+0x57b/0xa10 fs/io-wq.c:1064
+  io_init_wq_offload fs/io_uring.c:7432 [inline]
+  io_sq_offload_start fs/io_uring.c:7504 [inline]
+  io_uring_create fs/io_uring.c:8625 [inline]
+  io_uring_setup+0x1836/0x28e0 fs/io_uring.c:8694
+  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
+  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ Freed by task 21:
+  kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
+  kasan_set_track+0x1c/0x30 mm/kasan/common.c:56
+  kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355
+  __kasan_slab_free+0xd8/0x120 mm/kasan/common.c:422
+  __cache_free mm/slab.c:3418 [inline]
+  kfree+0x10e/0x2b0 mm/slab.c:3756
+  __io_wq_destroy fs/io-wq.c:1138 [inline]
+  io_wq_destroy+0x2af/0x460 fs/io-wq.c:1146
+  io_finish_async fs/io_uring.c:6836 [inline]
+  io_ring_ctx_free fs/io_uring.c:7870 [inline]
+  io_ring_exit_work+0x1e4/0x6d0 fs/io_uring.c:7954
+  process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
+  worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
+  kthread+0x3b5/0x4a0 kernel/kthread.c:292
+  ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
+
+ The buggy address belongs to the object at ffff8882183db000
+  which belongs to the cache kmalloc-1k of size 1024
+ The buggy address is located 140 bytes inside of
+  1024-byte region [ffff8882183db000, ffff8882183db400)
+ The buggy address belongs to the page:
+ page:000000009bada22b refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2183db
+ flags: 0x57ffe0000000200(slab)
+ raw: 057ffe0000000200 ffffea0008604c48 ffffea00086a8648 ffff8880aa040700
+ raw: 0000000000000000 ffff8882183db000 0000000100000002 0000000000000000
+ page dumped because: kasan: bad access detected
+
+ Memory state around the buggy address:
+  ffff8882183daf80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+  ffff8882183db000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ >ffff8882183db080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+                       ^
+  ffff8882183db100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+  ffff8882183db180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ==================================================================
+
+which is down to the comment below,
+
+       /* all workers gone, wq exit can proceed */
+       if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
+               complete(&wqe->wq->done);
+
+because there might be multiple cases of wqe in a wq and we would wait
+for every worker in every wqe to go home before releasing wq's resources
+on destroying.
+
+To that end, rework wq's refcount by making it independent of the tracking
+of workers because after all they are two different things, and keeping
+it balanced when workers come and go. Note the manager kthread, like
+other workers, now holds a grab to wq during its lifetime.
+
+Finally to help destroy wq, check IO_WQ_BIT_EXIT upon creating worker
+and do nothing for exiting wq.
+
+Cc: stable@vger.kernel.org # v5.5+
+Reported-by: syzbot+45fa0a195b941764e0f0@syzkaller.appspotmail.com
+Reported-by: syzbot+9af99580130003da82b1@syzkaller.appspotmail.com
+Cc: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Hillf Danton <hdanton@sina.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |  116 ++++++++++++++++++++++++++++++-------------------------------
+ 1 file changed, 58 insertions(+), 58 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -202,7 +202,6 @@ static void io_worker_exit(struct io_wor
+ {
+       struct io_wqe *wqe = worker->wqe;
+       struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+-      unsigned nr_workers;
+       /*
+        * If we're not at zero, someone else is holding a brief reference
+@@ -230,15 +229,11 @@ static void io_worker_exit(struct io_wor
+               raw_spin_lock_irq(&wqe->lock);
+       }
+       acct->nr_workers--;
+-      nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers +
+-                      wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers;
+       raw_spin_unlock_irq(&wqe->lock);
+-      /* all workers gone, wq exit can proceed */
+-      if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
+-              complete(&wqe->wq->done);
+-
+       kfree_rcu(worker, rcu);
++      if (refcount_dec_and_test(&wqe->wq->refs))
++              complete(&wqe->wq->done);
+ }
+ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
+@@ -644,7 +639,7 @@ void io_wq_worker_sleeping(struct task_s
+ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+ {
+-      struct io_wqe_acct *acct =&wqe->acct[index];
++      struct io_wqe_acct *acct = &wqe->acct[index];
+       struct io_worker *worker;
+       worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
+@@ -677,6 +672,7 @@ static bool create_io_worker(struct io_w
+       if (index == IO_WQ_ACCT_UNBOUND)
+               atomic_inc(&wq->user->processes);
++      refcount_inc(&wq->refs);
+       wake_up_process(worker->task);
+       return true;
+ }
+@@ -692,28 +688,63 @@ static inline bool io_wqe_need_worker(st
+       return acct->nr_workers < acct->max_workers;
+ }
++static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
++{
++      send_sig(SIGINT, worker->task, 1);
++      return false;
++}
++
++/*
++ * Iterate the passed in list and call the specific function for each
++ * worker that isn't exiting
++ */
++static bool io_wq_for_each_worker(struct io_wqe *wqe,
++                                bool (*func)(struct io_worker *, void *),
++                                void *data)
++{
++      struct io_worker *worker;
++      bool ret = false;
++
++      list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
++              if (io_worker_get(worker)) {
++                      /* no task if node is/was offline */
++                      if (worker->task)
++                              ret = func(worker, data);
++                      io_worker_release(worker);
++                      if (ret)
++                              break;
++              }
++      }
++
++      return ret;
++}
++
++static bool io_wq_worker_wake(struct io_worker *worker, void *data)
++{
++      wake_up_process(worker->task);
++      return false;
++}
++
+ /*
+  * Manager thread. Tasked with creating new workers, if we need them.
+  */
+ static int io_wq_manager(void *data)
+ {
+       struct io_wq *wq = data;
+-      int workers_to_create = num_possible_nodes();
+       int node;
+       /* create fixed workers */
+-      refcount_set(&wq->refs, workers_to_create);
++      refcount_set(&wq->refs, 1);
+       for_each_node(node) {
+               if (!node_online(node))
+                       continue;
+-              if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
+-                      goto err;
+-              workers_to_create--;
++              if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
++                      continue;
++              set_bit(IO_WQ_BIT_ERROR, &wq->state);
++              set_bit(IO_WQ_BIT_EXIT, &wq->state);
++              goto out;
+       }
+-      while (workers_to_create--)
+-              refcount_dec(&wq->refs);
+-
+       complete(&wq->done);
+       while (!kthread_should_stop()) {
+@@ -745,12 +776,18 @@ static int io_wq_manager(void *data)
+       if (current->task_works)
+               task_work_run();
+-      return 0;
+-err:
+-      set_bit(IO_WQ_BIT_ERROR, &wq->state);
+-      set_bit(IO_WQ_BIT_EXIT, &wq->state);
+-      if (refcount_sub_and_test(workers_to_create, &wq->refs))
++out:
++      if (refcount_dec_and_test(&wq->refs)) {
+               complete(&wq->done);
++              return 0;
++      }
++      /* if ERROR is set and we get here, we have workers to wake */
++      if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
++              rcu_read_lock();
++              for_each_node(node)
++                      io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
++              rcu_read_unlock();
++      }
+       return 0;
+ }
+@@ -858,37 +895,6 @@ void io_wq_hash_work(struct io_wq_work *
+       work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
+ }
+-static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
+-{
+-      send_sig(SIGINT, worker->task, 1);
+-      return false;
+-}
+-
+-/*
+- * Iterate the passed in list and call the specific function for each
+- * worker that isn't exiting
+- */
+-static bool io_wq_for_each_worker(struct io_wqe *wqe,
+-                                bool (*func)(struct io_worker *, void *),
+-                                void *data)
+-{
+-      struct io_worker *worker;
+-      bool ret = false;
+-
+-      list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
+-              if (io_worker_get(worker)) {
+-                      /* no task if node is/was offline */
+-                      if (worker->task)
+-                              ret = func(worker, data);
+-                      io_worker_release(worker);
+-                      if (ret)
+-                              break;
+-              }
+-      }
+-
+-      return ret;
+-}
+-
+ void io_wq_cancel_all(struct io_wq *wq)
+ {
+       int node;
+@@ -1121,12 +1127,6 @@ bool io_wq_get(struct io_wq *wq, struct
+       return refcount_inc_not_zero(&wq->use_refs);
+ }
+-static bool io_wq_worker_wake(struct io_worker *worker, void *data)
+-{
+-      wake_up_process(worker->task);
+-      return false;
+-}
+-
+ static void __io_wq_destroy(struct io_wq *wq)
+ {
+       int node;
diff --git a/queue-5.8/io_uring-allow-timeout-poll-files-killing-to-take-task-into-account.patch b/queue-5.8/io_uring-allow-timeout-poll-files-killing-to-take-task-into-account.patch
new file mode 100644 (file)
index 0000000..824498b
--- /dev/null
@@ -0,0 +1,92 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 22 Sep 2020 08:18:24 -0600
+Subject: io_uring: allow timeout/poll/files killing to take task into account
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit f3606e3a92ddd36299642c78592fc87609abb1f6 upstream.
+
+We currently cancel these when the ring exits, and we cancel all of
+them. This is in preparation for killing only the ones associated
+with a given task.
+
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   30 ++++++++++++++++++++++--------
+ 1 file changed, 22 insertions(+), 8 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1141,13 +1141,25 @@ static void io_kill_timeout(struct io_ki
+       }
+ }
+-static void io_kill_timeouts(struct io_ring_ctx *ctx)
++static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk)
++{
++      struct io_ring_ctx *ctx = req->ctx;
++
++      if (!tsk || req->task == tsk)
++              return true;
++      if ((ctx->flags & IORING_SETUP_SQPOLL) && req->task == ctx->sqo_thread)
++              return true;
++      return false;
++}
++
++static void io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk)
+ {
+       struct io_kiocb *req, *tmp;
+       spin_lock_irq(&ctx->completion_lock);
+       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+-              io_kill_timeout(req);
++              if (io_task_match(req, tsk))
++                      io_kill_timeout(req);
+       spin_unlock_irq(&ctx->completion_lock);
+ }
+@@ -4641,7 +4653,7 @@ static bool io_poll_remove_one(struct io
+       return do_complete;
+ }
+-static void io_poll_remove_all(struct io_ring_ctx *ctx)
++static void io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk)
+ {
+       struct hlist_node *tmp;
+       struct io_kiocb *req;
+@@ -4652,8 +4664,10 @@ static void io_poll_remove_all(struct io
+               struct hlist_head *list;
+               list = &ctx->cancel_hash[i];
+-              hlist_for_each_entry_safe(req, tmp, list, hash_node)
+-                      posted += io_poll_remove_one(req);
++              hlist_for_each_entry_safe(req, tmp, list, hash_node) {
++                      if (io_task_match(req, tsk))
++                              posted += io_poll_remove_one(req);
++              }
+       }
+       spin_unlock_irq(&ctx->completion_lock);
+@@ -7556,8 +7570,8 @@ static void io_ring_ctx_wait_and_kill(st
+       percpu_ref_kill(&ctx->refs);
+       mutex_unlock(&ctx->uring_lock);
+-      io_kill_timeouts(ctx);
+-      io_poll_remove_all(ctx);
++      io_kill_timeouts(ctx, NULL);
++      io_poll_remove_all(ctx, NULL);
+       if (ctx->io_wq)
+               io_wq_cancel_all(ctx->io_wq);
+@@ -7809,7 +7823,7 @@ static bool io_cancel_task_cb(struct io_
+       struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+       struct task_struct *task = data;
+-      return req->task == task;
++      return io_task_match(req, task);
+ }
+ static int io_uring_flush(struct file *file, void *data)
diff --git a/queue-5.8/io_uring-convert-advanced-xarray-uses-to-the-normal-api.patch b/queue-5.8/io_uring-convert-advanced-xarray-uses-to-the-normal-api.patch
new file mode 100644 (file)
index 0000000..eac61ac
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Fri, 9 Oct 2020 13:49:53 +0100
+Subject: io_uring: Convert advanced XArray uses to the normal API
+
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+
+commit 5e2ed8c4f45093698855b1f45cdf43efbf6dd498 upstream.
+
+There are no bugs here that I've spotted, it's just easier to use the
+normal API and there are no performance advantages to using the more
+verbose advanced API.
+
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   14 ++------------
+ 1 file changed, 2 insertions(+), 12 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -7958,27 +7958,17 @@ static int io_uring_add_task_file(struct
+ static void io_uring_del_task_file(struct file *file)
+ {
+       struct io_uring_task *tctx = current->io_uring;
+-      XA_STATE(xas, &tctx->xa, (unsigned long) file);
+       if (tctx->last == file)
+               tctx->last = NULL;
+-
+-      xas_lock(&xas);
+-      file = xas_store(&xas, NULL);
+-      xas_unlock(&xas);
+-
++      file = xa_erase(&tctx->xa, (unsigned long)file);
+       if (file)
+               fput(file);
+ }
+ static void __io_uring_attempt_task_drop(struct file *file)
+ {
+-      XA_STATE(xas, &current->io_uring->xa, (unsigned long) file);
+-      struct file *old;
+-
+-      rcu_read_lock();
+-      old = xas_load(&xas);
+-      rcu_read_unlock();
++      struct file *old = xa_load(&current->io_uring->xa, (unsigned long)file);
+       if (old == file)
+               io_uring_del_task_file(file);
diff --git a/queue-5.8/io_uring-don-t-rely-on-weak-files-references.patch b/queue-5.8/io_uring-don-t-rely-on-weak-files-references.patch
new file mode 100644 (file)
index 0000000..c0f8a77
--- /dev/null
@@ -0,0 +1,648 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 13 Sep 2020 13:09:39 -0600
+Subject: io_uring: don't rely on weak ->files references
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 0f2122045b946241a9e549c2a76cea54fa58a7ff upstream.
+
+Grab actual references to the files_struct. To avoid circular references
+issues due to this, we add a per-task note that keeps track of what
+io_uring contexts a task has used. When the tasks execs or exits its
+assigned files, we cancel requests based on this tracking.
+
+With that, we can grab proper references to the files table, and no
+longer need to rely on stashing away ring_fd and ring_file to check
+if the ring_fd may have been closed.
+
+Cc: stable@vger.kernel.org # v5.5+
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exec.c                |    6 
+ fs/file.c                |    2 
+ fs/io_uring.c            |  301 +++++++++++++++++++++++++++++++++++++++++------
+ include/linux/io_uring.h |   53 ++++++++
+ include/linux/sched.h    |    5 
+ init/init_task.c         |    3 
+ kernel/fork.c            |    6 
+ 7 files changed, 340 insertions(+), 36 deletions(-)
+ create mode 100644 include/linux/io_uring.h
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -62,6 +62,7 @@
+ #include <linux/oom.h>
+ #include <linux/compat.h>
+ #include <linux/vmalloc.h>
++#include <linux/io_uring.h>
+ #include <linux/uaccess.h>
+ #include <asm/mmu_context.h>
+@@ -1847,6 +1848,11 @@ static int __do_execve_file(int fd, stru
+        * further execve() calls fail. */
+       current->flags &= ~PF_NPROC_EXCEEDED;
++      /*
++       * Cancel any io_uring activity across execve
++       */
++      io_uring_task_cancel();
++
+       retval = unshare_files(&displaced);
+       if (retval)
+               goto out_ret;
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -18,6 +18,7 @@
+ #include <linux/bitops.h>
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
++#include <linux/io_uring.h>
+ unsigned int sysctl_nr_open __read_mostly = 1024*1024;
+ unsigned int sysctl_nr_open_min = BITS_PER_LONG;
+@@ -439,6 +440,7 @@ void exit_files(struct task_struct *tsk)
+       struct files_struct * files = tsk->files;
+       if (files) {
++              io_uring_files_cancel(files);
+               task_lock(tsk);
+               tsk->files = NULL;
+               task_unlock(tsk);
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -78,6 +78,7 @@
+ #include <linux/fs_struct.h>
+ #include <linux/splice.h>
+ #include <linux/task_work.h>
++#include <linux/io_uring.h>
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/io_uring.h>
+@@ -283,8 +284,6 @@ struct io_ring_ctx {
+        */
+       struct fixed_file_data  *file_data;
+       unsigned                nr_user_files;
+-      int                     ring_fd;
+-      struct file             *ring_file;
+       /* if used, fixed mapped user buffers */
+       unsigned                nr_user_bufs;
+@@ -1335,7 +1334,12 @@ static void __io_cqring_fill_event(struc
+               WRITE_ONCE(cqe->user_data, req->user_data);
+               WRITE_ONCE(cqe->res, res);
+               WRITE_ONCE(cqe->flags, cflags);
+-      } else if (ctx->cq_overflow_flushed) {
++      } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) {
++              /*
++               * If we're in ring overflow flush mode, or in task cancel mode,
++               * then we cannot store the request for later flushing, we need
++               * to drop it on the floor.
++               */
+               WRITE_ONCE(ctx->rings->cq_overflow,
+                               atomic_inc_return(&ctx->cached_cq_overflow));
+       } else {
+@@ -1451,17 +1455,22 @@ static void io_req_drop_files(struct io_
+               wake_up(&ctx->inflight_wait);
+       spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+       req->flags &= ~REQ_F_INFLIGHT;
++      put_files_struct(req->work.files);
+       req->work.files = NULL;
+ }
+ static void __io_req_aux_free(struct io_kiocb *req)
+ {
++      struct io_uring_task *tctx = req->task->io_uring;
+       if (req->flags & REQ_F_NEED_CLEANUP)
+               io_cleanup_req(req);
+       kfree(req->io);
+       if (req->file)
+               io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
++      atomic_long_inc(&tctx->req_complete);
++      if (tctx->in_idle)
++              wake_up(&tctx->wait);
+       put_task_struct(req->task);
+       io_req_work_drop_env(req);
+ }
+@@ -3532,8 +3541,7 @@ static int io_close_prep(struct io_kiocb
+               return -EBADF;
+       req->close.fd = READ_ONCE(sqe->fd);
+-      if ((req->file && req->file->f_op == &io_uring_fops) ||
+-          req->close.fd == req->ctx->ring_fd)
++      if ((req->file && req->file->f_op == &io_uring_fops))
+               return -EBADF;
+       req->close.put_file = NULL;
+@@ -5671,32 +5679,18 @@ static int io_req_set_file(struct io_sub
+ static int io_grab_files(struct io_kiocb *req)
+ {
+-      int ret = -EBADF;
+       struct io_ring_ctx *ctx = req->ctx;
+       if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE))
+               return 0;
+-      if (!ctx->ring_file)
+-              return -EBADF;
+-      rcu_read_lock();
++      req->work.files = get_files_struct(current);
++      req->flags |= REQ_F_INFLIGHT;
++
+       spin_lock_irq(&ctx->inflight_lock);
+-      /*
+-       * We use the f_ops->flush() handler to ensure that we can flush
+-       * out work accessing these files if the fd is closed. Check if
+-       * the fd has changed since we started down this path, and disallow
+-       * this operation if it has.
+-       */
+-      if (fcheck(ctx->ring_fd) == ctx->ring_file) {
+-              list_add(&req->inflight_entry, &ctx->inflight_list);
+-              req->flags |= REQ_F_INFLIGHT;
+-              req->work.files = current->files;
+-              ret = 0;
+-      }
++      list_add(&req->inflight_entry, &ctx->inflight_list);
+       spin_unlock_irq(&ctx->inflight_lock);
+-      rcu_read_unlock();
+-
+-      return ret;
++      return 0;
+ }
+ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
+@@ -6067,6 +6061,7 @@ static int io_init_req(struct io_ring_ct
+       refcount_set(&req->refs, 2);
+       req->task = current;
+       get_task_struct(req->task);
++      atomic_long_inc(&req->task->io_uring->req_issue);
+       req->result = 0;
+       if (unlikely(req->opcode >= IORING_OP_LAST))
+@@ -6102,8 +6097,7 @@ static int io_init_req(struct io_ring_ct
+       return io_req_set_file(state, req, READ_ONCE(sqe->fd));
+ }
+-static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
+-                        struct file *ring_file, int ring_fd)
++static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
+ {
+       struct io_submit_state state, *statep = NULL;
+       struct io_kiocb *link = NULL;
+@@ -6127,9 +6121,6 @@ static int io_submit_sqes(struct io_ring
+               statep = &state;
+       }
+-      ctx->ring_fd = ring_fd;
+-      ctx->ring_file = ring_file;
+-
+       for (i = 0; i < nr; i++) {
+               const struct io_uring_sqe *sqe;
+               struct io_kiocb *req;
+@@ -6290,7 +6281,7 @@ static int io_sq_thread(void *data)
+               mutex_lock(&ctx->uring_lock);
+               if (likely(!percpu_ref_is_dying(&ctx->refs)))
+-                      ret = io_submit_sqes(ctx, to_submit, NULL, -1);
++                      ret = io_submit_sqes(ctx, to_submit);
+               mutex_unlock(&ctx->uring_lock);
+               timeout = jiffies + ctx->sq_thread_idle;
+       }
+@@ -7119,6 +7110,34 @@ out_fput:
+       return ret;
+ }
++static int io_uring_alloc_task_context(struct task_struct *task)
++{
++      struct io_uring_task *tctx;
++
++      tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
++      if (unlikely(!tctx))
++              return -ENOMEM;
++
++      xa_init(&tctx->xa);
++      init_waitqueue_head(&tctx->wait);
++      tctx->last = NULL;
++      tctx->in_idle = 0;
++      atomic_long_set(&tctx->req_issue, 0);
++      atomic_long_set(&tctx->req_complete, 0);
++      task->io_uring = tctx;
++      return 0;
++}
++
++void __io_uring_free(struct task_struct *tsk)
++{
++      struct io_uring_task *tctx = tsk->io_uring;
++
++      WARN_ON_ONCE(!xa_empty(&tctx->xa));
++      xa_destroy(&tctx->xa);
++      kfree(tctx);
++      tsk->io_uring = NULL;
++}
++
+ static int io_sq_offload_start(struct io_ring_ctx *ctx,
+                              struct io_uring_params *p)
+ {
+@@ -7154,6 +7173,9 @@ static int io_sq_offload_start(struct io
+                       ctx->sqo_thread = NULL;
+                       goto err;
+               }
++              ret = io_uring_alloc_task_context(ctx->sqo_thread);
++              if (ret)
++                      goto err;
+               wake_up_process(ctx->sqo_thread);
+       } else if (p->flags & IORING_SETUP_SQ_AFF) {
+               /* Can't have SQ_AFF without SQPOLL */
+@@ -7633,7 +7655,7 @@ static bool io_wq_files_match(struct io_
+ {
+       struct files_struct *files = data;
+-      return work->files == files;
++      return !files || work->files == files;
+ }
+ /*
+@@ -7787,7 +7809,7 @@ static bool io_uring_cancel_files(struct
+               spin_lock_irq(&ctx->inflight_lock);
+               list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
+-                      if (req->work.files != files)
++                      if (files && req->work.files != files)
+                               continue;
+                       /* req is being completed, ignore */
+                       if (!refcount_inc_not_zero(&req->refs))
+@@ -7850,18 +7872,217 @@ static bool io_cancel_task_cb(struct io_
+       return io_task_match(req, task);
+ }
++static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
++                                          struct task_struct *task,
++                                          struct files_struct *files)
++{
++      bool ret;
++
++      ret = io_uring_cancel_files(ctx, files);
++      if (!files) {
++              enum io_wq_cancel cret;
++
++              cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true);
++              if (cret != IO_WQ_CANCEL_NOTFOUND)
++                      ret = true;
++
++              /* SQPOLL thread does its own polling */
++              if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
++                      if (!list_empty_careful(&ctx->poll_list)) {
++                              io_iopoll_reap_events(ctx);
++                              ret = true;
++                      }
++              }
++
++              ret |= io_poll_remove_all(ctx, task);
++              ret |= io_kill_timeouts(ctx, task);
++      }
++
++      return ret;
++}
++
++/*
++ * We need to iteratively cancel requests, in case a request has dependent
++ * hard links. These persist even for failure of cancelations, hence keep
++ * looping until none are found.
++ */
++static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
++                                        struct files_struct *files)
++{
++      struct task_struct *task = current;
++
++      if (ctx->flags & IORING_SETUP_SQPOLL)
++              task = ctx->sqo_thread;
++
++      io_cqring_overflow_flush(ctx, true, task, files);
++
++      while (__io_uring_cancel_task_requests(ctx, task, files)) {
++              io_run_task_work();
++              cond_resched();
++      }
++}
++
++/*
++ * Note that this task has used io_uring. We use it for cancelation purposes.
++ */
++static int io_uring_add_task_file(struct file *file)
++{
++      if (unlikely(!current->io_uring)) {
++              int ret;
++
++              ret = io_uring_alloc_task_context(current);
++              if (unlikely(ret))
++                      return ret;
++      }
++      if (current->io_uring->last != file) {
++              XA_STATE(xas, &current->io_uring->xa, (unsigned long) file);
++              void *old;
++
++              rcu_read_lock();
++              old = xas_load(&xas);
++              if (old != file) {
++                      get_file(file);
++                      xas_lock(&xas);
++                      xas_store(&xas, file);
++                      xas_unlock(&xas);
++              }
++              rcu_read_unlock();
++              current->io_uring->last = file;
++      }
++
++      return 0;
++}
++
++/*
++ * Remove this io_uring_file -> task mapping.
++ */
++static void io_uring_del_task_file(struct file *file)
++{
++      struct io_uring_task *tctx = current->io_uring;
++      XA_STATE(xas, &tctx->xa, (unsigned long) file);
++
++      if (tctx->last == file)
++              tctx->last = NULL;
++
++      xas_lock(&xas);
++      file = xas_store(&xas, NULL);
++      xas_unlock(&xas);
++
++      if (file)
++              fput(file);
++}
++
++static void __io_uring_attempt_task_drop(struct file *file)
++{
++      XA_STATE(xas, &current->io_uring->xa, (unsigned long) file);
++      struct file *old;
++
++      rcu_read_lock();
++      old = xas_load(&xas);
++      rcu_read_unlock();
++
++      if (old == file)
++              io_uring_del_task_file(file);
++}
++
++/*
++ * Drop task note for this file if we're the only ones that hold it after
++ * pending fput()
++ */
++static void io_uring_attempt_task_drop(struct file *file, bool exiting)
++{
++      if (!current->io_uring)
++              return;
++      /*
++       * fput() is pending, will be 2 if the only other ref is our potential
++       * task file note. If the task is exiting, drop regardless of count.
++       */
++      if (!exiting && atomic_long_read(&file->f_count) != 2)
++              return;
++
++      __io_uring_attempt_task_drop(file);
++}
++
++void __io_uring_files_cancel(struct files_struct *files)
++{
++      struct io_uring_task *tctx = current->io_uring;
++      XA_STATE(xas, &tctx->xa, 0);
++
++      /* make sure overflow events are dropped */
++      tctx->in_idle = true;
++
++      do {
++              struct io_ring_ctx *ctx;
++              struct file *file;
++
++              xas_lock(&xas);
++              file = xas_next_entry(&xas, ULONG_MAX);
++              xas_unlock(&xas);
++
++              if (!file)
++                      break;
++
++              ctx = file->private_data;
++
++              io_uring_cancel_task_requests(ctx, files);
++              if (files)
++                      io_uring_del_task_file(file);
++      } while (1);
++}
++
++static inline bool io_uring_task_idle(struct io_uring_task *tctx)
++{
++      return atomic_long_read(&tctx->req_issue) ==
++              atomic_long_read(&tctx->req_complete);
++}
++
++/*
++ * Find any io_uring fd that this task has registered or done IO on, and cancel
++ * requests.
++ */
++void __io_uring_task_cancel(void)
++{
++      struct io_uring_task *tctx = current->io_uring;
++      DEFINE_WAIT(wait);
++      long completions;
++
++      /* make sure overflow events are dropped */
++      tctx->in_idle = true;
++
++      while (!io_uring_task_idle(tctx)) {
++              /* read completions before cancelations */
++              completions = atomic_long_read(&tctx->req_complete);
++              __io_uring_files_cancel(NULL);
++
++              prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
++
++              /*
++               * If we've seen completions, retry. This avoids a race where
++               * a completion comes in before we did prepare_to_wait().
++               */
++              if (completions != atomic_long_read(&tctx->req_complete))
++                      continue;
++              if (io_uring_task_idle(tctx))
++                      break;
++              schedule();
++      }
++
++      finish_wait(&tctx->wait, &wait);
++      tctx->in_idle = false;
++}
++
+ static int io_uring_flush(struct file *file, void *data)
+ {
+       struct io_ring_ctx *ctx = file->private_data;
+-      io_uring_cancel_files(ctx, data);
+-
+       /*
+        * If the task is going away, cancel work it may have pending
+        */
+       if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
+-              io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
++              data = NULL;
++      io_uring_cancel_task_requests(ctx, data);
++      io_uring_attempt_task_drop(file, !data);
+       return 0;
+ }
+@@ -7975,8 +8196,11 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned
+                       wake_up(&ctx->sqo_wait);
+               submitted = to_submit;
+       } else if (to_submit) {
++              ret = io_uring_add_task_file(f.file);
++              if (unlikely(ret))
++                      goto out;
+               mutex_lock(&ctx->uring_lock);
+-              submitted = io_submit_sqes(ctx, to_submit, f.file, fd);
++              submitted = io_submit_sqes(ctx, to_submit);
+               mutex_unlock(&ctx->uring_lock);
+               if (submitted != to_submit)
+@@ -8188,6 +8412,7 @@ static int io_uring_get_fd(struct io_rin
+       file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
+                                       O_RDWR | O_CLOEXEC);
+       if (IS_ERR(file)) {
++err_fd:
+               put_unused_fd(ret);
+               ret = PTR_ERR(file);
+               goto err;
+@@ -8196,6 +8421,10 @@ static int io_uring_get_fd(struct io_rin
+ #if defined(CONFIG_UNIX)
+       ctx->ring_sock->file = file;
+ #endif
++      if (unlikely(io_uring_add_task_file(file))) {
++              file = ERR_PTR(-ENOMEM);
++              goto err_fd;
++      }
+       fd_install(ret, file);
+       return ret;
+ err:
+--- /dev/null
++++ b/include/linux/io_uring.h
+@@ -0,0 +1,53 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++#ifndef _LINUX_IO_URING_H
++#define _LINUX_IO_URING_H
++
++#include <linux/sched.h>
++#include <linux/xarray.h>
++#include <linux/percpu-refcount.h>
++
++struct io_uring_task {
++      /* submission side */
++      struct xarray           xa;
++      struct wait_queue_head  wait;
++      struct file             *last;
++      atomic_long_t           req_issue;
++
++      /* completion side */
++      bool                    in_idle ____cacheline_aligned_in_smp;
++      atomic_long_t           req_complete;
++};
++
++#if defined(CONFIG_IO_URING)
++void __io_uring_task_cancel(void);
++void __io_uring_files_cancel(struct files_struct *files);
++void __io_uring_free(struct task_struct *tsk);
++
++static inline void io_uring_task_cancel(void)
++{
++      if (current->io_uring && !xa_empty(&current->io_uring->xa))
++              __io_uring_task_cancel();
++}
++static inline void io_uring_files_cancel(struct files_struct *files)
++{
++      if (current->io_uring && !xa_empty(&current->io_uring->xa))
++              __io_uring_files_cancel(files);
++}
++static inline void io_uring_free(struct task_struct *tsk)
++{
++      if (tsk->io_uring)
++              __io_uring_free(tsk);
++}
++#else
++static inline void io_uring_task_cancel(void)
++{
++}
++static inline void io_uring_files_cancel(struct files_struct *files)
++{
++}
++static inline void io_uring_free(struct task_struct *tsk)
++{
++}
++#endif
++
++#endif
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -61,6 +61,7 @@ struct sighand_struct;
+ struct signal_struct;
+ struct task_delay_info;
+ struct task_group;
++struct io_uring_task;
+ /*
+  * Task state bitmask. NOTE! These bits are also
+@@ -923,6 +924,10 @@ struct task_struct {
+       /* Open file information: */
+       struct files_struct             *files;
++#ifdef CONFIG_IO_URING
++      struct io_uring_task            *io_uring;
++#endif
++
+       /* Namespaces: */
+       struct nsproxy                  *nsproxy;
+--- a/init/init_task.c
++++ b/init/init_task.c
+@@ -113,6 +113,9 @@ struct task_struct init_task
+       .thread         = INIT_THREAD,
+       .fs             = &init_fs,
+       .files          = &init_files,
++#ifdef CONFIG_IO_URING
++      .io_uring       = NULL,
++#endif
+       .signal         = &init_signals,
+       .sighand        = &init_sighand,
+       .nsproxy        = &init_nsproxy,
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -95,6 +95,7 @@
+ #include <linux/stackleak.h>
+ #include <linux/kasan.h>
+ #include <linux/scs.h>
++#include <linux/io_uring.h>
+ #include <asm/pgalloc.h>
+ #include <linux/uaccess.h>
+@@ -745,6 +746,7 @@ void __put_task_struct(struct task_struc
+       WARN_ON(refcount_read(&tsk->usage));
+       WARN_ON(tsk == current);
++      io_uring_free(tsk);
+       cgroup_free(tsk);
+       task_numa_free(tsk, true);
+       security_task_free(tsk);
+@@ -2022,6 +2024,10 @@ static __latent_entropy struct task_stru
+       p->vtime.state = VTIME_INACTIVE;
+ #endif
++#ifdef CONFIG_IO_URING
++      p->io_uring = NULL;
++#endif
++
+ #if defined(SPLIT_RSS_COUNTING)
+       memset(&p->rss_stat, 0, sizeof(p->rss_stat));
+ #endif
diff --git a/queue-5.8/io_uring-don-t-run-task-work-on-an-exiting-task.patch b/queue-5.8/io_uring-don-t-run-task-work-on-an-exiting-task.patch
new file mode 100644 (file)
index 0000000..e61b127
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 12 Oct 2020 11:53:29 -0600
+Subject: io_uring: don't run task work on an exiting task
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 6200b0ae4ea28a4bfd8eb434e33e6201b7a6a282 upstream.
+
+This isn't safe, and isn't needed either. We are guaranteed that any
+work we queue is on a live task (and will be run), or it goes to
+our backup io-wq threads if the task is exiting.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1762,6 +1762,12 @@ static int io_put_kbuf(struct io_kiocb *
+ static inline bool io_run_task_work(void)
+ {
++      /*
++       * Not safe to run on exiting task, and the task_work handling will
++       * not add work to such a task.
++       */
++      if (unlikely(current->flags & PF_EXITING))
++              return false;
+       if (current->task_works) {
+               __set_current_state(TASK_RUNNING);
+               task_work_run();
+@@ -7791,6 +7797,8 @@ static void io_uring_cancel_files(struct
+                       io_put_req(cancel_req);
+               }
++              /* cancellations _may_ trigger task work */
++              io_run_task_work();
+               schedule();
+               finish_wait(&ctx->inflight_wait, &wait);
+       }
diff --git a/queue-5.8/io_uring-enable-task-files-specific-overflow-flushing.patch b/queue-5.8/io_uring-enable-task-files-specific-overflow-flushing.patch
new file mode 100644 (file)
index 0000000..cad2079
--- /dev/null
@@ -0,0 +1,131 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 28 Sep 2020 13:10:13 -0600
+Subject: io_uring: enable task/files specific overflow flushing
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit e6c8aa9ac33bd7c968af7816240fc081401fddcd upstream.
+
+This allows us to selectively flush out pending overflows, depending on
+the task and/or files_struct being passed in.
+
+No intended functional changes in this patch.
+
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   41 ++++++++++++++++++++++++++---------------
+ 1 file changed, 26 insertions(+), 15 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1240,12 +1240,24 @@ static void io_cqring_ev_posted(struct i
+               eventfd_signal(ctx->cq_ev_fd, 1);
+ }
++static inline bool io_match_files(struct io_kiocb *req,
++                                     struct files_struct *files)
++{
++      if (!files)
++              return true;
++      if (req->flags & REQ_F_WORK_INITIALIZED)
++              return req->work.files == files;
++      return false;
++}
++
+ /* Returns true if there are no backlogged entries after the flush */
+-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
++static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
++                                   struct task_struct *tsk,
++                                   struct files_struct *files)
+ {
+       struct io_rings *rings = ctx->rings;
++      struct io_kiocb *req, *tmp;
+       struct io_uring_cqe *cqe;
+-      struct io_kiocb *req;
+       unsigned long flags;
+       LIST_HEAD(list);
+@@ -1264,7 +1276,12 @@ static bool io_cqring_overflow_flush(str
+               ctx->cq_overflow_flushed = 1;
+       cqe = NULL;
+-      while (!list_empty(&ctx->cq_overflow_list)) {
++      list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, list) {
++              if (tsk && req->task != tsk)
++                      continue;
++              if (!io_match_files(req, files))
++                      continue;
++
+               cqe = io_get_cqring(ctx);
+               if (!cqe && !force)
+                       break;
+@@ -1734,7 +1751,7 @@ static unsigned io_cqring_events(struct
+               if (noflush && !list_empty(&ctx->cq_overflow_list))
+                       return -1U;
+-              io_cqring_overflow_flush(ctx, false);
++              io_cqring_overflow_flush(ctx, false, NULL, NULL);
+       }
+       /* See comment at the top of this file */
+@@ -6095,7 +6112,7 @@ static int io_submit_sqes(struct io_ring
+       /* if we have a backlog and couldn't flush it all, return BUSY */
+       if (test_bit(0, &ctx->sq_check_overflow)) {
+               if (!list_empty(&ctx->cq_overflow_list) &&
+-                  !io_cqring_overflow_flush(ctx, false))
++                  !io_cqring_overflow_flush(ctx, false, NULL, NULL))
+                       return -EBUSY;
+       }
+@@ -7556,7 +7573,7 @@ static void io_ring_exit_work(struct wor
+       ctx = container_of(work, struct io_ring_ctx, exit_work);
+       if (ctx->rings)
+-              io_cqring_overflow_flush(ctx, true);
++              io_cqring_overflow_flush(ctx, true, NULL, NULL);
+       /*
+        * If we're doing polled IO and end up having requests being
+@@ -7567,7 +7584,7 @@ static void io_ring_exit_work(struct wor
+       while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) {
+               io_iopoll_reap_events(ctx);
+               if (ctx->rings)
+-                      io_cqring_overflow_flush(ctx, true);
++                      io_cqring_overflow_flush(ctx, true, NULL, NULL);
+       }
+       io_ring_ctx_free(ctx);
+ }
+@@ -7587,7 +7604,7 @@ static void io_ring_ctx_wait_and_kill(st
+       io_iopoll_reap_events(ctx);
+       /* if we failed setting up the ctx, we might not have any rings */
+       if (ctx->rings)
+-              io_cqring_overflow_flush(ctx, true);
++              io_cqring_overflow_flush(ctx, true, NULL, NULL);
+       idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
+       /*
+@@ -7637,12 +7654,6 @@ static bool io_match_link(struct io_kioc
+       return false;
+ }
+-static inline bool io_match_files(struct io_kiocb *req,
+-                                     struct files_struct *files)
+-{
+-      return (req->flags & REQ_F_WORK_INITIALIZED) && req->work.files == files;
+-}
+-
+ static bool io_match_link_files(struct io_kiocb *req,
+                               struct files_struct *files)
+ {
+@@ -7959,7 +7970,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned
+       ret = 0;
+       if (ctx->flags & IORING_SETUP_SQPOLL) {
+               if (!list_empty_careful(&ctx->cq_overflow_list))
+-                      io_cqring_overflow_flush(ctx, false);
++                      io_cqring_overflow_flush(ctx, false, NULL, NULL);
+               if (flags & IORING_ENTER_SQ_WAKEUP)
+                       wake_up(&ctx->sqo_wait);
+               submitted = to_submit;
diff --git a/queue-5.8/io_uring-fix-use-of-xarray-in-__io_uring_files_cancel.patch b/queue-5.8/io_uring-fix-use-of-xarray-in-__io_uring_files_cancel.patch
new file mode 100644 (file)
index 0000000..72a693f
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Fri, 9 Oct 2020 13:49:51 +0100
+Subject: io_uring: Fix use of XArray in __io_uring_files_cancel
+
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+
+commit ce765372bc443573d1d339a2bf4995de385dea3a upstream.
+
+We have to drop the lock during each iteration, so there's no advantage
+to using the advanced API.  Convert this to a standard xa_for_each() loop.
+
+Reported-by: syzbot+27c12725d8ff0bfe1a13@syzkaller.appspotmail.com
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   19 +++++--------------
+ 1 file changed, 5 insertions(+), 14 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -8008,28 +8008,19 @@ static void io_uring_attempt_task_drop(s
+ void __io_uring_files_cancel(struct files_struct *files)
+ {
+       struct io_uring_task *tctx = current->io_uring;
+-      XA_STATE(xas, &tctx->xa, 0);
++      struct file *file;
++      unsigned long index;
+       /* make sure overflow events are dropped */
+       tctx->in_idle = true;
+-      do {
+-              struct io_ring_ctx *ctx;
+-              struct file *file;
+-
+-              xas_lock(&xas);
+-              file = xas_next_entry(&xas, ULONG_MAX);
+-              xas_unlock(&xas);
+-
+-              if (!file)
+-                      break;
+-
+-              ctx = file->private_data;
++      xa_for_each(&tctx->xa, index, file) {
++              struct io_ring_ctx *ctx = file->private_data;
+               io_uring_cancel_task_requests(ctx, files);
+               if (files)
+                       io_uring_del_task_file(file);
+-      } while (1);
++      }
+ }
+ static inline bool io_uring_task_idle(struct io_uring_task *tctx)
diff --git a/queue-5.8/io_uring-fix-xarray-usage-in-io_uring_add_task_file.patch b/queue-5.8/io_uring-fix-xarray-usage-in-io_uring_add_task_file.patch
new file mode 100644 (file)
index 0000000..e803623
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Fri, 9 Oct 2020 13:49:52 +0100
+Subject: io_uring: Fix XArray usage in io_uring_add_task_file
+
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+
+commit 236434c3438c4da3dfbd6aeeab807577b85e951a upstream.
+
+The xas_store() wasn't paired with an xas_nomem() loop, so if it couldn't
+allocate memory using GFP_NOWAIT, it would leak the reference to the file
+descriptor.  Also the node pointed to by the xas could be freed between
+the call to xas_load() under the rcu_read_lock() and the acquisition of
+the xa_lock.
+
+It's easier to just use the normal xa_load/xa_store interface here.
+
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+[axboe: fix missing assign after alloc, cur_uring -> tctx rename]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   21 +++++++++------------
+ 1 file changed, 9 insertions(+), 12 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -7929,27 +7929,24 @@ static void io_uring_cancel_task_request
+  */
+ static int io_uring_add_task_file(struct file *file)
+ {
+-      if (unlikely(!current->io_uring)) {
++      struct io_uring_task *tctx = current->io_uring;
++
++      if (unlikely(!tctx)) {
+               int ret;
+               ret = io_uring_alloc_task_context(current);
+               if (unlikely(ret))
+                       return ret;
++              tctx = current->io_uring;
+       }
+-      if (current->io_uring->last != file) {
+-              XA_STATE(xas, &current->io_uring->xa, (unsigned long) file);
+-              void *old;
++      if (tctx->last != file) {
++              void *old = xa_load(&tctx->xa, (unsigned long)file);
+-              rcu_read_lock();
+-              old = xas_load(&xas);
+-              if (old != file) {
++              if (!old) {
+                       get_file(file);
+-                      xas_lock(&xas);
+-                      xas_store(&xas, file);
+-                      xas_unlock(&xas);
++                      xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL);
+               }
+-              rcu_read_unlock();
+-              current->io_uring->last = file;
++              tctx->last = file;
+       }
+       return 0;
diff --git a/queue-5.8/io_uring-move-dropping-of-files-into-separate-helper.patch b/queue-5.8/io_uring-move-dropping-of-files-into-separate-helper.patch
new file mode 100644 (file)
index 0000000..ecd4d40
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 12 Oct 2020 11:03:18 -0600
+Subject: io_uring: move dropping of files into separate helper
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit f573d384456b3025d3f8e58b3eafaeeb0f510784 upstream.
+
+No functional changes in this patch, prep patch for grabbing references
+to the files_struct.
+
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   26 ++++++++++++++++----------
+ 1 file changed, 16 insertions(+), 10 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1424,6 +1424,20 @@ static inline void io_put_file(struct io
+               fput(file);
+ }
++static void io_req_drop_files(struct io_kiocb *req)
++{
++      struct io_ring_ctx *ctx = req->ctx;
++      unsigned long flags;
++
++      spin_lock_irqsave(&ctx->inflight_lock, flags);
++      list_del(&req->inflight_entry);
++      if (waitqueue_active(&ctx->inflight_wait))
++              wake_up(&ctx->inflight_wait);
++      spin_unlock_irqrestore(&ctx->inflight_lock, flags);
++      req->flags &= ~REQ_F_INFLIGHT;
++      req->work.files = NULL;
++}
++
+ static void __io_req_aux_free(struct io_kiocb *req)
+ {
+       if (req->flags & REQ_F_NEED_CLEANUP)
+@@ -1440,16 +1454,8 @@ static void __io_free_req(struct io_kioc
+ {
+       __io_req_aux_free(req);
+-      if (req->flags & REQ_F_INFLIGHT) {
+-              struct io_ring_ctx *ctx = req->ctx;
+-              unsigned long flags;
+-
+-              spin_lock_irqsave(&ctx->inflight_lock, flags);
+-              list_del(&req->inflight_entry);
+-              if (waitqueue_active(&ctx->inflight_wait))
+-                      wake_up(&ctx->inflight_wait);
+-              spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+-      }
++      if (req->flags & REQ_F_INFLIGHT)
++              io_req_drop_files(req);
+       percpu_ref_put(&req->ctx->refs);
+       if (likely(!io_is_fallback_req(req)))
diff --git a/queue-5.8/io_uring-no-need-to-call-xa_destroy-on-empty-xarray.patch b/queue-5.8/io_uring-no-need-to-call-xa_destroy-on-empty-xarray.patch
new file mode 100644 (file)
index 0000000..b74efdb
--- /dev/null
@@ -0,0 +1,105 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Thu, 8 Oct 2020 07:46:52 -0600
+Subject: io_uring: no need to call xa_destroy() on empty xarray
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit ca6484cd308a671811bf39f3119e81966eb476e3 upstream.
+
+The kernel test robot reports this lockdep issue:
+
+[child1:659] mbind (274) returned ENOSYS, marking as inactive.
+[child1:659] mq_timedsend (279) returned ENOSYS, marking as inactive.
+[main] 10175 iterations. [F:7781 S:2344 HI:2397]
+[   24.610601]
+[   24.610743] ================================
+[   24.611083] WARNING: inconsistent lock state
+[   24.611437] 5.9.0-rc7-00017-g0f2122045b9462 #5 Not tainted
+[   24.611861] --------------------------------
+[   24.612193] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
+[   24.612660] ksoftirqd/0/7 [HC0[0]:SC1[3]:HE0:SE0] takes:
+[   24.613086] f00ed998 (&xa->xa_lock#4){+.?.}-{2:2}, at: xa_destroy+0x43/0xc1
+[   24.613642] {SOFTIRQ-ON-W} state was registered at:
+[   24.614024]   lock_acquire+0x20c/0x29b
+[   24.614341]   _raw_spin_lock+0x21/0x30
+[   24.614636]   io_uring_add_task_file+0xe8/0x13a
+[   24.614987]   io_uring_create+0x535/0x6bd
+[   24.615297]   io_uring_setup+0x11d/0x136
+[   24.615606]   __ia32_sys_io_uring_setup+0xd/0xf
+[   24.615977]   do_int80_syscall_32+0x53/0x6c
+[   24.616306]   restore_all_switch_stack+0x0/0xb1
+[   24.616677] irq event stamp: 939881
+[   24.616968] hardirqs last  enabled at (939880): [<8105592d>] __local_bh_enable_ip+0x13c/0x145
+[   24.617642] hardirqs last disabled at (939881): [<81b6ace3>] _raw_spin_lock_irqsave+0x1b/0x4e
+[   24.618321] softirqs last  enabled at (939738): [<81b6c7c8>] __do_softirq+0x3f0/0x45a
+[   24.618924] softirqs last disabled at (939743): [<81055741>] run_ksoftirqd+0x35/0x61
+[   24.619521]
+[   24.619521] other info that might help us debug this:
+[   24.620028]  Possible unsafe locking scenario:
+[   24.620028]
+[   24.620492]        CPU0
+[   24.620685]        ----
+[   24.620894]   lock(&xa->xa_lock#4);
+[   24.621168]   <Interrupt>
+[   24.621381]     lock(&xa->xa_lock#4);
+[   24.621695]
+[   24.621695]  *** DEADLOCK ***
+[   24.621695]
+[   24.622154] 1 lock held by ksoftirqd/0/7:
+[   24.622468]  #0: 823bfb94 (rcu_callback){....}-{0:0}, at: rcu_process_callbacks+0xc0/0x155
+[   24.623106]
+[   24.623106] stack backtrace:
+[   24.623454] CPU: 0 PID: 7 Comm: ksoftirqd/0 Not tainted 5.9.0-rc7-00017-g0f2122045b9462 #5
+[   24.624090] Call Trace:
+[   24.624284]  ? show_stack+0x40/0x46
+[   24.624551]  dump_stack+0x1b/0x1d
+[   24.624809]  print_usage_bug+0x17a/0x185
+[   24.625142]  mark_lock+0x11d/0x1db
+[   24.625474]  ? print_shortest_lock_dependencies+0x121/0x121
+[   24.625905]  __lock_acquire+0x41e/0x7bf
+[   24.626206]  lock_acquire+0x20c/0x29b
+[   24.626517]  ? xa_destroy+0x43/0xc1
+[   24.626810]  ? lock_acquire+0x20c/0x29b
+[   24.627110]  _raw_spin_lock_irqsave+0x3e/0x4e
+[   24.627450]  ? xa_destroy+0x43/0xc1
+[   24.627725]  xa_destroy+0x43/0xc1
+[   24.627989]  __io_uring_free+0x57/0x71
+[   24.628286]  ? get_pid+0x22/0x22
+[   24.628544]  __put_task_struct+0xf2/0x163
+[   24.628865]  put_task_struct+0x1f/0x2a
+[   24.629161]  delayed_put_task_struct+0xe2/0xe9
+[   24.629509]  rcu_process_callbacks+0x128/0x155
+[   24.629860]  __do_softirq+0x1a3/0x45a
+[   24.630151]  run_ksoftirqd+0x35/0x61
+[   24.630443]  smpboot_thread_fn+0x304/0x31a
+[   24.630763]  kthread+0x124/0x139
+[   24.631016]  ? sort_range+0x18/0x18
+[   24.631290]  ? kthread_create_worker_on_cpu+0x17/0x17
+[   24.631682]  ret_from_fork+0x1c/0x28
+
+which is complaining about xa_destroy() grabbing the xa lock in an
+IRQ disabling fashion, whereas the io_uring uses cases aren't interrupt
+safe. This is really an xarray issue, since it should not assume the
+lock type. But for our use case, since we know the xarray is empty at
+this point, there's no need to actually call xa_destroy(). So just get
+rid of it.
+
+Fixes: 0f2122045b94 ("io_uring: don't rely on weak ->files references")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -7136,7 +7136,6 @@ void __io_uring_free(struct task_struct
+       struct io_uring_task *tctx = tsk->io_uring;
+       WARN_ON_ONCE(!xa_empty(&tctx->xa));
+-      xa_destroy(&tctx->xa);
+       kfree(tctx);
+       tsk->io_uring = NULL;
+ }
diff --git a/queue-5.8/io_uring-reference-nsproxy-for-file-table-commands.patch b/queue-5.8/io_uring-reference-nsproxy-for-file-table-commands.patch
new file mode 100644 (file)
index 0000000..f59e826
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 18 Sep 2020 20:13:06 -0600
+Subject: io_uring: reference ->nsproxy for file table commands
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 9b8284921513fc1ea57d87777283a59b05862f03 upstream.
+
+If we don't get and assign the namespace for the async work, then certain
+paths just don't work properly (like /dev/stdin, /proc/mounts, etc).
+Anything that references the current namespace of the given task should
+be assigned for async work on behalf of that task.
+
+Cc: stable@vger.kernel.org # v5.5+
+Reported-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c    |    4 ++++
+ fs/io-wq.h    |    1 +
+ fs/io_uring.c |    3 +++
+ 3 files changed, 8 insertions(+)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -60,6 +60,7 @@ struct io_worker {
+       const struct cred *cur_creds;
+       const struct cred *saved_creds;
+       struct files_struct *restore_files;
++      struct nsproxy *restore_nsproxy;
+       struct fs_struct *restore_fs;
+ };
+@@ -153,6 +154,7 @@ static bool __io_worker_unuse(struct io_
+               task_lock(current);
+               current->files = worker->restore_files;
++              current->nsproxy = worker->restore_nsproxy;
+               task_unlock(current);
+       }
+@@ -318,6 +320,7 @@ static void io_worker_start(struct io_wq
+       worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
+       worker->restore_files = current->files;
++      worker->restore_nsproxy = current->nsproxy;
+       worker->restore_fs = current->fs;
+       io_wqe_inc_running(wqe, worker);
+ }
+@@ -454,6 +457,7 @@ static void io_impersonate_work(struct i
+       if (work->files && current->files != work->files) {
+               task_lock(current);
+               current->files = work->files;
++              current->nsproxy = work->nsproxy;
+               task_unlock(current);
+       }
+       if (work->fs && current->fs != work->fs)
+--- a/fs/io-wq.h
++++ b/fs/io-wq.h
+@@ -88,6 +88,7 @@ struct io_wq_work {
+       struct files_struct *files;
+       struct mm_struct *mm;
+       const struct cred *creds;
++      struct nsproxy *nsproxy;
+       struct fs_struct *fs;
+       unsigned flags;
+ };
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1456,6 +1456,7 @@ static void io_req_drop_files(struct io_
+       spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+       req->flags &= ~REQ_F_INFLIGHT;
+       put_files_struct(req->work.files);
++      put_nsproxy(req->work.nsproxy);
+       req->work.files = NULL;
+ }
+@@ -5685,6 +5686,8 @@ static int io_grab_files(struct io_kiocb
+               return 0;
+       req->work.files = get_files_struct(current);
++      get_nsproxy(current->nsproxy);
++      req->work.nsproxy = current->nsproxy;
+       req->flags |= REQ_F_INFLIGHT;
+       spin_lock_irq(&ctx->inflight_lock);
diff --git a/queue-5.8/io_uring-return-cancelation-status-from-poll-timeout-files-handlers.patch b/queue-5.8/io_uring-return-cancelation-status-from-poll-timeout-files-handlers.patch
new file mode 100644 (file)
index 0000000..21bfa6f
--- /dev/null
@@ -0,0 +1,96 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sat, 26 Sep 2020 15:05:03 -0600
+Subject: io_uring: return cancelation status from poll/timeout/files handlers
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 76e1b6427fd8246376a97e3227049d49188dfb9c upstream.
+
+Return whether we found and canceled requests or not. This is in
+preparation for using this information, no functional changes in this
+patch.
+
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   30 ++++++++++++++++++++++++------
+ 1 file changed, 24 insertions(+), 6 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1143,15 +1143,23 @@ static bool io_task_match(struct io_kioc
+       return false;
+ }
+-static void io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk)
++/*
++ * Returns true if we found and killed one or more timeouts
++ */
++static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk)
+ {
+       struct io_kiocb *req, *tmp;
++      int canceled = 0;
+       spin_lock_irq(&ctx->completion_lock);
+-      list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+-              if (io_task_match(req, tsk))
++      list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list) {
++              if (io_task_match(req, tsk)) {
+                       io_kill_timeout(req);
++                      canceled++;
++              }
++      }
+       spin_unlock_irq(&ctx->completion_lock);
++      return canceled != 0;
+ }
+ static void __io_queue_deferred(struct io_ring_ctx *ctx)
+@@ -4650,7 +4658,10 @@ static bool io_poll_remove_one(struct io
+       return do_complete;
+ }
+-static void io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk)
++/*
++ * Returns true if we found and killed one or more poll requests
++ */
++static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk)
+ {
+       struct hlist_node *tmp;
+       struct io_kiocb *req;
+@@ -4670,6 +4681,8 @@ static void io_poll_remove_all(struct io
+       if (posted)
+               io_cqring_ev_posted(ctx);
++
++      return posted != 0;
+ }
+ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
+@@ -7744,11 +7757,14 @@ static void io_cancel_defer_files(struct
+       }
+ }
+-static void io_uring_cancel_files(struct io_ring_ctx *ctx,
++/*
++ * Returns true if we found and killed one or more files pinning requests
++ */
++static bool io_uring_cancel_files(struct io_ring_ctx *ctx,
+                                 struct files_struct *files)
+ {
+       if (list_empty_careful(&ctx->inflight_list))
+-              return;
++              return false;
+       io_cancel_defer_files(ctx, files);
+       /* cancel all at once, should be faster than doing it one by one*/
+@@ -7811,6 +7827,8 @@ static void io_uring_cancel_files(struct
+               schedule();
+               finish_wait(&ctx->inflight_wait, &wait);
+       }
++
++      return true;
+ }
+ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
diff --git a/queue-5.8/io_uring-stash-ctx-task-reference-for-sqpoll.patch b/queue-5.8/io_uring-stash-ctx-task-reference-for-sqpoll.patch
new file mode 100644 (file)
index 0000000..a7241e7
--- /dev/null
@@ -0,0 +1,104 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 12 Oct 2020 11:15:07 -0600
+Subject: io_uring: stash ctx task reference for SQPOLL
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 2aede0e417db846793c276c7a1bbf7262c8349b0 upstream.
+
+We can grab a reference to the task instead of stashing away the task
+files_struct. This is doable without creating a circular reference
+between the ring fd and the task itself.
+
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   39 +++++++++++++++++++++++++++++----------
+ 1 file changed, 29 insertions(+), 10 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -264,7 +264,16 @@ struct io_ring_ctx {
+       /* IO offload */
+       struct io_wq            *io_wq;
+       struct task_struct      *sqo_thread;    /* if using sq thread polling */
+-      struct mm_struct        *sqo_mm;
++
++      /*
++       * For SQPOLL usage - we hold a reference to the parent task, so we
++       * have access to the ->files
++       */
++      struct task_struct      *sqo_task;
++
++      /* Only used for accounting purposes */
++      struct mm_struct        *mm_account;
++
+       wait_queue_head_t       sqo_wait;
+       /*
+@@ -4421,9 +4430,10 @@ static int io_sq_thread_acquire_mm(struc
+ {
+       if (io_op_defs[req->opcode].needs_mm && !current->mm) {
+               if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
+-                           !mmget_not_zero(ctx->sqo_mm)))
++                      !ctx->sqo_task->mm ||
++                      !mmget_not_zero(ctx->sqo_task->mm)))
+                       return -EFAULT;
+-              kthread_use_mm(ctx->sqo_mm);
++              kthread_use_mm(ctx->sqo_task->mm);
+       }
+       return 0;
+@@ -7104,9 +7114,6 @@ static int io_sq_offload_start(struct io
+ {
+       int ret;
+-      mmgrab(current->mm);
+-      ctx->sqo_mm = current->mm;
+-
+       if (ctx->flags & IORING_SETUP_SQPOLL) {
+               ret = -EPERM;
+               if (!capable(CAP_SYS_ADMIN))
+@@ -7151,8 +7158,6 @@ static int io_sq_offload_start(struct io
+       return 0;
+ err:
+       io_finish_async(ctx);
+-      mmdrop(ctx->sqo_mm);
+-      ctx->sqo_mm = NULL;
+       return ret;
+ }
+@@ -7482,8 +7487,12 @@ static void io_destroy_buffers(struct io
+ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
+ {
+       io_finish_async(ctx);
+-      if (ctx->sqo_mm)
+-              mmdrop(ctx->sqo_mm);
++      if (ctx->sqo_task) {
++              put_task_struct(ctx->sqo_task);
++              ctx->sqo_task = NULL;
++              mmdrop(ctx->mm_account);
++              ctx->mm_account = NULL;
++      }
+       io_iopoll_reap_events(ctx);
+       io_sqe_buffer_unregister(ctx);
+@@ -8256,6 +8265,16 @@ static int io_uring_create(unsigned entr
+       ctx->user = user;
+       ctx->creds = get_current_cred();
++      ctx->sqo_task = get_task_struct(current);
++      /*
++       * This is just grabbed for accounting purposes. When a process exits,
++       * the mm is exited and dropped before the files, hence we need to hang
++       * on to this mm purely for the purposes of being able to unaccount
++       * memory (locked/pinned vm). It's not used for anything else.
++       */
++      mmgrab(current->mm);
++      ctx->mm_account = current->mm;
++
+       ret = io_allocate_scq_urings(ctx, p);
+       if (ret)
+               goto err;
diff --git a/queue-5.8/io_uring-unconditionally-grab-req-task.patch b/queue-5.8/io_uring-unconditionally-grab-req-task.patch
new file mode 100644 (file)
index 0000000..348ee93
--- /dev/null
@@ -0,0 +1,104 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 12 Oct 2020 11:25:39 -0600
+Subject: io_uring: unconditionally grab req->task
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit e3bc8e9dad7f2f83cc807111d4472164c9210153 upstream.
+
+Sometimes we assign a weak reference to it, sometimes we grab a
+reference to it. Clean this up and make it unconditional, and drop the
+flag related to tracking this state.
+
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   26 +++-----------------------
+ 1 file changed, 3 insertions(+), 23 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -550,7 +550,6 @@ enum {
+       REQ_F_NO_FILE_TABLE_BIT,
+       REQ_F_QUEUE_TIMEOUT_BIT,
+       REQ_F_WORK_INITIALIZED_BIT,
+-      REQ_F_TASK_PINNED_BIT,
+       /* not a real bit, just to check we're not overflowing the space */
+       __REQ_F_LAST_BIT,
+@@ -608,8 +607,6 @@ enum {
+       REQ_F_QUEUE_TIMEOUT     = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
+       /* io_wq_work is initialized */
+       REQ_F_WORK_INITIALIZED  = BIT(REQ_F_WORK_INITIALIZED_BIT),
+-      /* req->task is refcounted */
+-      REQ_F_TASK_PINNED       = BIT(REQ_F_TASK_PINNED_BIT),
+ };
+ struct async_poll {
+@@ -924,21 +921,6 @@ struct sock *io_uring_get_socket(struct
+ }
+ EXPORT_SYMBOL(io_uring_get_socket);
+-static void io_get_req_task(struct io_kiocb *req)
+-{
+-      if (req->flags & REQ_F_TASK_PINNED)
+-              return;
+-      get_task_struct(req->task);
+-      req->flags |= REQ_F_TASK_PINNED;
+-}
+-
+-/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */
+-static void __io_put_req_task(struct io_kiocb *req)
+-{
+-      if (req->flags & REQ_F_TASK_PINNED)
+-              put_task_struct(req->task);
+-}
+-
+ static void io_file_put_work(struct work_struct *work);
+ /*
+@@ -1455,7 +1437,7 @@ static void __io_req_aux_free(struct io_
+       kfree(req->io);
+       if (req->file)
+               io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
+-      __io_put_req_task(req);
++      put_task_struct(req->task);
+       io_req_work_drop_env(req);
+ }
+@@ -1765,7 +1747,7 @@ static inline bool io_req_multi_free(str
+       if ((req->flags & REQ_F_LINK_HEAD) || io_is_fallback_req(req))
+               return false;
+-      if (req->file || req->io)
++      if (req->file || req->io || req->task)
+               rb->need_iter++;
+       rb->reqs[rb->to_free++] = req;
+@@ -4584,7 +4566,6 @@ static bool io_arm_poll_handler(struct i
+       if (req->flags & REQ_F_WORK_INITIALIZED)
+               memcpy(&apoll->work, &req->work, sizeof(req->work));
+-      io_get_req_task(req);
+       req->apoll = apoll;
+       INIT_HLIST_NODE(&req->hash_node);
+@@ -4774,8 +4755,6 @@ static int io_poll_add_prep(struct io_ki
+       events = READ_ONCE(sqe->poll_events);
+       poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+-
+-      io_get_req_task(req);
+       return 0;
+ }
+@@ -6057,6 +6036,7 @@ static int io_init_req(struct io_ring_ct
+       /* one is dropped after submission, the other at completion */
+       refcount_set(&req->refs, 2);
+       req->task = current;
++      get_task_struct(req->task);
+       req->result = 0;
+       if (unlikely(req->opcode >= IORING_OP_LAST))
diff --git a/queue-5.8/io_wq-make-io_wqe-lock-a-raw_spinlock_t.patch b/queue-5.8/io_wq-make-io_wqe-lock-a-raw_spinlock_t.patch
new file mode 100644 (file)
index 0000000..95b29b7
--- /dev/null
@@ -0,0 +1,253 @@
+From foo@baz Thu Oct 29 01:16:54 PM CET 2020
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 1 Sep 2020 10:41:46 +0200
+Subject: io_wq: Make io_wqe::lock a raw_spinlock_t
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 95da84659226d75698a1ab958be0af21d9cc2a9c upstream.
+
+During a context switch the scheduler invokes wq_worker_sleeping() with
+disabled preemption. Disabling preemption is needed because it protects
+access to `worker->sleeping'. As an optimisation it avoids invoking
+schedule() within the schedule path as part of possible wake up (thus
+preempt_enable_no_resched() afterwards).
+
+The io-wq has been added to the mix in the same section with disabled
+preemption. This breaks on PREEMPT_RT because io_wq_worker_sleeping()
+acquires a spinlock_t. Also within the schedule() the spinlock_t must be
+acquired after tsk_is_pi_blocked() otherwise it will block on the
+sleeping lock again while scheduling out.
+
+While playing with `io_uring-bench' I didn't notice a significant
+latency spike after converting io_wqe::lock to a raw_spinlock_t. The
+latency was more or less the same.
+
+In order to keep the spinlock_t it would have to be moved after the
+tsk_is_pi_blocked() check which would introduce a branch instruction
+into the hot path.
+
+The lock is used to maintain the `work_list' and wakes one task up at
+most.
+Should io_wqe_cancel_pending_work() cause latency spikes, while
+searching for a specific item, then it would need to drop the lock
+during iterations.
+revert_creds() is also invoked under the lock. According to debug
+cred::non_rcu is 0. Otherwise it should be moved outside of the locked
+section because put_cred_rcu()->free_uid() acquires a sleeping lock.
+
+Convert io_wqe::lock to a raw_spinlock_t.c
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |   52 ++++++++++++++++++++++++++--------------------------
+ 1 file changed, 26 insertions(+), 26 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -88,7 +88,7 @@ enum {
+  */
+ struct io_wqe {
+       struct {
+-              spinlock_t lock;
++              raw_spinlock_t lock;
+               struct io_wq_work_list work_list;
+               unsigned long hash_map;
+               unsigned flags;
+@@ -149,7 +149,7 @@ static bool __io_worker_unuse(struct io_
+       if (current->files != worker->restore_files) {
+               __acquire(&wqe->lock);
+-              spin_unlock_irq(&wqe->lock);
++              raw_spin_unlock_irq(&wqe->lock);
+               dropped_lock = true;
+               task_lock(current);
+@@ -168,7 +168,7 @@ static bool __io_worker_unuse(struct io_
+       if (worker->mm) {
+               if (!dropped_lock) {
+                       __acquire(&wqe->lock);
+-                      spin_unlock_irq(&wqe->lock);
++                      raw_spin_unlock_irq(&wqe->lock);
+                       dropped_lock = true;
+               }
+               __set_current_state(TASK_RUNNING);
+@@ -222,17 +222,17 @@ static void io_worker_exit(struct io_wor
+       worker->flags = 0;
+       preempt_enable();
+-      spin_lock_irq(&wqe->lock);
++      raw_spin_lock_irq(&wqe->lock);
+       hlist_nulls_del_rcu(&worker->nulls_node);
+       list_del_rcu(&worker->all_list);
+       if (__io_worker_unuse(wqe, worker)) {
+               __release(&wqe->lock);
+-              spin_lock_irq(&wqe->lock);
++              raw_spin_lock_irq(&wqe->lock);
+       }
+       acct->nr_workers--;
+       nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers +
+                       wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers;
+-      spin_unlock_irq(&wqe->lock);
++      raw_spin_unlock_irq(&wqe->lock);
+       /* all workers gone, wq exit can proceed */
+       if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
+@@ -508,7 +508,7 @@ get_next:
+               else if (!wq_list_empty(&wqe->work_list))
+                       wqe->flags |= IO_WQE_FLAG_STALLED;
+-              spin_unlock_irq(&wqe->lock);
++              raw_spin_unlock_irq(&wqe->lock);
+               if (!work)
+                       break;
+               io_assign_current_work(worker, work);
+@@ -543,7 +543,7 @@ get_next:
+                               io_wqe_enqueue(wqe, linked);
+                       if (hash != -1U && !next_hashed) {
+-                              spin_lock_irq(&wqe->lock);
++                              raw_spin_lock_irq(&wqe->lock);
+                               wqe->hash_map &= ~BIT_ULL(hash);
+                               wqe->flags &= ~IO_WQE_FLAG_STALLED;
+                               /* dependent work is not hashed */
+@@ -551,11 +551,11 @@ get_next:
+                               /* skip unnecessary unlock-lock wqe->lock */
+                               if (!work)
+                                       goto get_next;
+-                              spin_unlock_irq(&wqe->lock);
++                              raw_spin_unlock_irq(&wqe->lock);
+                       }
+               } while (work);
+-              spin_lock_irq(&wqe->lock);
++              raw_spin_lock_irq(&wqe->lock);
+       } while (1);
+ }
+@@ -570,7 +570,7 @@ static int io_wqe_worker(void *data)
+       while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+               set_current_state(TASK_INTERRUPTIBLE);
+ loop:
+-              spin_lock_irq(&wqe->lock);
++              raw_spin_lock_irq(&wqe->lock);
+               if (io_wqe_run_queue(wqe)) {
+                       __set_current_state(TASK_RUNNING);
+                       io_worker_handle_work(worker);
+@@ -581,7 +581,7 @@ loop:
+                       __release(&wqe->lock);
+                       goto loop;
+               }
+-              spin_unlock_irq(&wqe->lock);
++              raw_spin_unlock_irq(&wqe->lock);
+               if (signal_pending(current))
+                       flush_signals(current);
+               if (schedule_timeout(WORKER_IDLE_TIMEOUT))
+@@ -593,11 +593,11 @@ loop:
+       }
+       if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+-              spin_lock_irq(&wqe->lock);
++              raw_spin_lock_irq(&wqe->lock);
+               if (!wq_list_empty(&wqe->work_list))
+                       io_worker_handle_work(worker);
+               else
+-                      spin_unlock_irq(&wqe->lock);
++                      raw_spin_unlock_irq(&wqe->lock);
+       }
+       io_worker_exit(worker);
+@@ -637,9 +637,9 @@ void io_wq_worker_sleeping(struct task_s
+       worker->flags &= ~IO_WORKER_F_RUNNING;
+-      spin_lock_irq(&wqe->lock);
++      raw_spin_lock_irq(&wqe->lock);
+       io_wqe_dec_running(wqe, worker);
+-      spin_unlock_irq(&wqe->lock);
++      raw_spin_unlock_irq(&wqe->lock);
+ }
+ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+@@ -663,7 +663,7 @@ static bool create_io_worker(struct io_w
+               return false;
+       }
+-      spin_lock_irq(&wqe->lock);
++      raw_spin_lock_irq(&wqe->lock);
+       hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+       list_add_tail_rcu(&worker->all_list, &wqe->all_list);
+       worker->flags |= IO_WORKER_F_FREE;
+@@ -672,7 +672,7 @@ static bool create_io_worker(struct io_w
+       if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
+               worker->flags |= IO_WORKER_F_FIXED;
+       acct->nr_workers++;
+-      spin_unlock_irq(&wqe->lock);
++      raw_spin_unlock_irq(&wqe->lock);
+       if (index == IO_WQ_ACCT_UNBOUND)
+               atomic_inc(&wq->user->processes);
+@@ -727,12 +727,12 @@ static int io_wq_manager(void *data)
+                       if (!node_online(node))
+                               continue;
+-                      spin_lock_irq(&wqe->lock);
++                      raw_spin_lock_irq(&wqe->lock);
+                       if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
+                               fork_worker[IO_WQ_ACCT_BOUND] = true;
+                       if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
+                               fork_worker[IO_WQ_ACCT_UNBOUND] = true;
+-                      spin_unlock_irq(&wqe->lock);
++                      raw_spin_unlock_irq(&wqe->lock);
+                       if (fork_worker[IO_WQ_ACCT_BOUND])
+                               create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
+                       if (fork_worker[IO_WQ_ACCT_UNBOUND])
+@@ -829,10 +829,10 @@ static void io_wqe_enqueue(struct io_wqe
+       }
+       work_flags = work->flags;
+-      spin_lock_irqsave(&wqe->lock, flags);
++      raw_spin_lock_irqsave(&wqe->lock, flags);
+       io_wqe_insert_work(wqe, work);
+       wqe->flags &= ~IO_WQE_FLAG_STALLED;
+-      spin_unlock_irqrestore(&wqe->lock, flags);
++      raw_spin_unlock_irqrestore(&wqe->lock, flags);
+       if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
+           !atomic_read(&acct->nr_running))
+@@ -959,13 +959,13 @@ static void io_wqe_cancel_pending_work(s
+       unsigned long flags;
+ retry:
+-      spin_lock_irqsave(&wqe->lock, flags);
++      raw_spin_lock_irqsave(&wqe->lock, flags);
+       wq_list_for_each(node, prev, &wqe->work_list) {
+               work = container_of(node, struct io_wq_work, list);
+               if (!match->fn(work, match->data))
+                       continue;
+               io_wqe_remove_pending(wqe, work, prev);
+-              spin_unlock_irqrestore(&wqe->lock, flags);
++              raw_spin_unlock_irqrestore(&wqe->lock, flags);
+               io_run_cancel(work, wqe);
+               match->nr_pending++;
+               if (!match->cancel_all)
+@@ -974,7 +974,7 @@ retry:
+               /* not safe to continue after unlock */
+               goto retry;
+       }
+-      spin_unlock_irqrestore(&wqe->lock, flags);
++      raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ }
+ static void io_wqe_cancel_running_work(struct io_wqe *wqe,
+@@ -1082,7 +1082,7 @@ struct io_wq *io_wq_create(unsigned boun
+               }
+               atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
+               wqe->wq = wq;
+-              spin_lock_init(&wqe->lock);
++              raw_spin_lock_init(&wqe->lock);
+               INIT_WQ_LIST(&wqe->work_list);
+               INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
+               INIT_LIST_HEAD(&wqe->all_list);
index a9871319fc476f1da2588f42f1510b0f713c0ac1..2cb6815f349e6f8ad4c93c088273f8c10929c02b 100644 (file)
@@ -1 +1,16 @@
 netfilter-nftables_offload-kasan-slab-out-of-bounds-read-in-nft_flow_rule_create.patch
+io_uring-don-t-run-task-work-on-an-exiting-task.patch
+io_uring-allow-timeout-poll-files-killing-to-take-task-into-account.patch
+io_uring-move-dropping-of-files-into-separate-helper.patch
+io_uring-stash-ctx-task-reference-for-sqpoll.patch
+io_uring-unconditionally-grab-req-task.patch
+io_uring-return-cancelation-status-from-poll-timeout-files-handlers.patch
+io_uring-enable-task-files-specific-overflow-flushing.patch
+io_uring-don-t-rely-on-weak-files-references.patch
+io_uring-reference-nsproxy-for-file-table-commands.patch
+io_wq-make-io_wqe-lock-a-raw_spinlock_t.patch
+io-wq-fix-use-after-free-in-io_wq_worker_running.patch
+io_uring-no-need-to-call-xa_destroy-on-empty-xarray.patch
+io_uring-fix-use-of-xarray-in-__io_uring_files_cancel.patch
+io_uring-fix-xarray-usage-in-io_uring_add_task_file.patch
+io_uring-convert-advanced-xarray-uses-to-the-normal-api.patch