5.13-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 15 Sep 2021 13:04:14 +0000 (15:04 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 15 Sep 2021 13:04:14 +0000 (15:04 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 15 Sep 2021 13:04:14 +0000 (15:04 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 15 Sep 2021 13:04:14 +0000 (15:04 +0200)
diff --git a/queue-5.13/io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch b/queue-5.13/io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch

new file mode 100644 (file)

index 0000000..ecdb56c
--- /dev/null
+++ b/queue-5.13/io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch
@@ -0,0 +1,111 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 13 Sep 2021 09:24:07 -0600
+Subject: io-wq: fix race between adding work and activating a free worker
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 94ffb0a282872c2f4b14f757fa1aef2302aeaabb upstream.
+
+The attempt to find and activate a free worker for new work is currently
+combined with creating a new one if we don't find one, but that opens
+io-wq up to a race where the worker that is found and activated can
+put itself to sleep without knowing that it has been selected to perform
+this new work.
+
+Fix this by moving the activation into where we add the new work item,
+then we can retain it within the wqe->lock scope and elimiate the race
+with the worker itself checking inside the lock, but sleeping outside of
+it.
+
+Cc: stable@vger.kernel.org
+Reported-by: Andres Freund <andres@anarazel.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |   50 ++++++++++++++++++++++++--------------------------
+ 1 file changed, 24 insertions(+), 26 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -237,9 +237,9 @@ static bool io_wqe_activate_free_worker(
+  * We need a worker. If we find a free one, we're good. If not, and we're
+  * below the max number of workers, create one.
+  */
+-static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
++static void io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
+ {
+-      bool ret;
++      bool do_create = false, first = false;
+ 
+       /*
+        * Most likely an attempt to queue unbounded work on an io_wq that
+@@ -248,25 +248,18 @@ static void io_wqe_wake_worker(struct io
+       if (unlikely(!acct->max_workers))
+               pr_warn_once("io-wq is not configured for unbound workers");
+ 
+-      rcu_read_lock();
+-      ret = io_wqe_activate_free_worker(wqe);
+-      rcu_read_unlock();
+-
+-      if (!ret) {
+-              bool do_create = false, first = false;
+-
+-              raw_spin_lock_irq(&wqe->lock);
+-              if (acct->nr_workers < acct->max_workers) {
+-                      atomic_inc(&acct->nr_running);
+-                      atomic_inc(&wqe->wq->worker_refs);
+-                      if (!acct->nr_workers)
+-                              first = true;
+-                      acct->nr_workers++;
+-                      do_create = true;
+-              }
+-              raw_spin_unlock_irq(&wqe->lock);
+-              if (do_create)
+-                      create_io_worker(wqe->wq, wqe, acct->index, first);
++      raw_spin_lock_irq(&wqe->lock);
++      if (acct->nr_workers < acct->max_workers) {
++              if (!acct->nr_workers)
++                      first = true;
++              acct->nr_workers++;
++              do_create = true;
++      }
++      raw_spin_unlock_irq(&wqe->lock);
++      if (do_create) {
++              atomic_inc(&acct->nr_running);
++              atomic_inc(&wqe->wq->worker_refs);
++              create_io_worker(wqe->wq, wqe, acct->index, first);
+       }
+ }
+ 
+@@ -798,7 +791,8 @@ append:
+ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+ {
+       struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+-      bool do_wake;
++      unsigned work_flags = work->flags;
++      bool do_create;
+       unsigned long flags;
+ 
+       /*
+@@ -814,12 +808,16 @@ static void io_wqe_enqueue(struct io_wqe
+       raw_spin_lock_irqsave(&wqe->lock, flags);
+       io_wqe_insert_work(wqe, work);
+       wqe->flags &= ~IO_WQE_FLAG_STALLED;
+-      do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
+-                      !atomic_read(&acct->nr_running);
++
++      rcu_read_lock();
++      do_create = !io_wqe_activate_free_worker(wqe);
++      rcu_read_unlock();
++
+       raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ 
+-      if (do_wake)
+-              io_wqe_wake_worker(wqe, acct);
++      if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
++          !atomic_read(&acct->nr_running)))
++              io_wqe_create_worker(wqe, acct);
+ }
+ 
+ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
diff --git a/queue-5.13/io-wq-fix-wakeup-race-when-adding-new-work.patch b/queue-5.13/io-wq-fix-wakeup-race-when-adding-new-work.patch

new file mode 100644 (file)

index 0000000..cab2a5b
--- /dev/null
+++ b/queue-5.13/io-wq-fix-wakeup-race-when-adding-new-work.patch
@@ -0,0 +1,64 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 13 Sep 2021 09:20:44 -0600
+Subject: io-wq: fix wakeup race when adding new work
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 87df7fb922d18e96992aa5e824aa34b2065fef59 upstream.
+
+When new work is added, io_wqe_enqueue() checks if we need to wake or
+create a new worker. But that check is done outside the lock that
+otherwise synchronizes us with a worker going to sleep, so we can end
+up in the following situation:
+
+CPU0                           CPU1
+lock
+insert work
+unlock
+atomic_read(nr_running) != 0
+                               lock
+                               atomic_dec(nr_running)
+no wakeup needed
+
+Hold the wqe lock around the "need to wakeup" check. Then we can also get
+rid of the temporary work_flags variable, as we know the work will remain
+valid as long as we hold the lock.
+
+Cc: stable@vger.kernel.org
+Reported-by: Andres Freund <andres@anarazel.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -798,7 +798,7 @@ append:
+ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+ {
+       struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+-      int work_flags;
++      bool do_wake;
+       unsigned long flags;
+ 
+       /*
+@@ -811,14 +811,14 @@ static void io_wqe_enqueue(struct io_wqe
+               return;
+       }
+ 
+-      work_flags = work->flags;
+       raw_spin_lock_irqsave(&wqe->lock, flags);
+       io_wqe_insert_work(wqe, work);
+       wqe->flags &= ~IO_WQE_FLAG_STALLED;
++      do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
++                      !atomic_read(&acct->nr_running);
+       raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ 
+-      if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
+-          !atomic_read(&acct->nr_running))
++      if (do_wake)
+               io_wqe_wake_worker(wqe, acct);
+ }
+ 
diff --git a/queue-5.13/io_uring-add-splice_fd_in-checks.patch b/queue-5.13/io_uring-add-splice_fd_in-checks.patch

new file mode 100644 (file)

index 0000000..2417bb3
--- /dev/null
+++ b/queue-5.13/io_uring-add-splice_fd_in-checks.patch
@@ -0,0 +1,224 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 13 Sep 2021 09:17:19 -0600
+Subject: io_uring: add ->splice_fd_in checks
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 26578cda3db983b17cabe4e577af26306beb9987 upstream.
+
+->splice_fd_in is used only by splice/tee, but no other request checks
+it for validity. Add the check for most of request types excluding
+reads/writes/sends/recvs, we don't want overhead for them and can leave
+them be as is until the field is actually used.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/f44bc2acd6777d932de3d71a5692235b5b2b7397.1629451684.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   52 ++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 30 insertions(+), 22 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -3474,7 +3474,7 @@ static int io_renameat_prep(struct io_ki
+ 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->buf_index)
++      if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+@@ -3525,7 +3525,8 @@ static int io_unlinkat_prep(struct io_ki
+ 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
++      if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
++          sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+@@ -3571,8 +3572,8 @@ static int io_shutdown_prep(struct io_ki
+ #if defined(CONFIG_NET)
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+-          sqe->buf_index)
++      if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
++                   sqe->buf_index || sqe->splice_fd_in))
+               return -EINVAL;
+ 
+       req->shutdown.how = READ_ONCE(sqe->len);
+@@ -3720,7 +3721,8 @@ static int io_fsync_prep(struct io_kiocb
+ 
+       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
++      if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
++                   sqe->splice_fd_in))
+               return -EINVAL;
+ 
+       req->sync.flags = READ_ONCE(sqe->fsync_flags);
+@@ -3753,7 +3755,8 @@ static int io_fsync(struct io_kiocb *req
+ static int io_fallocate_prep(struct io_kiocb *req,
+                            const struct io_uring_sqe *sqe)
+ {
+-      if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
++      if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
++          sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+@@ -3784,7 +3787,7 @@ static int __io_openat_prep(struct io_ki
+       const char __user *fname;
+       int ret;
+ 
+-      if (unlikely(sqe->ioprio || sqe->buf_index))
++      if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in))
+               return -EINVAL;
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+@@ -3909,7 +3912,8 @@ static int io_remove_buffers_prep(struct
+       struct io_provide_buf *p = &req->pbuf;
+       u64 tmp;
+ 
+-      if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off)
++      if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
++          sqe->splice_fd_in)
+               return -EINVAL;
+ 
+       tmp = READ_ONCE(sqe->fd);
+@@ -3980,7 +3984,7 @@ static int io_provide_buffers_prep(struc
+       struct io_provide_buf *p = &req->pbuf;
+       u64 tmp;
+ 
+-      if (sqe->ioprio || sqe->rw_flags)
++      if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+               return -EINVAL;
+ 
+       tmp = READ_ONCE(sqe->fd);
+@@ -4067,7 +4071,7 @@ static int io_epoll_ctl_prep(struct io_k
+                            const struct io_uring_sqe *sqe)
+ {
+ #if defined(CONFIG_EPOLL)
+-      if (sqe->ioprio || sqe->buf_index)
++      if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+@@ -4113,7 +4117,7 @@ static int io_epoll_ctl(struct io_kiocb
+ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ {
+ #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
+-      if (sqe->ioprio || sqe->buf_index || sqe->off)
++      if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+@@ -4148,7 +4152,7 @@ static int io_madvise(struct io_kiocb *r
+ 
+ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ {
+-      if (sqe->ioprio || sqe->buf_index || sqe->addr)
++      if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+@@ -4186,7 +4190,7 @@ static int io_statx_prep(struct io_kiocb
+ {
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->buf_index)
++      if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+               return -EINVAL;
+       if (req->flags & REQ_F_FIXED_FILE)
+               return -EBADF;
+@@ -4222,7 +4226,7 @@ static int io_close_prep(struct io_kiocb
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+       if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
+-          sqe->rw_flags || sqe->buf_index)
++          sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+               return -EINVAL;
+       if (req->flags & REQ_F_FIXED_FILE)
+               return -EBADF;
+@@ -4283,7 +4287,8 @@ static int io_sfr_prep(struct io_kiocb *
+ 
+       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
++      if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
++                   sqe->splice_fd_in))
+               return -EINVAL;
+ 
+       req->sync.off = READ_ONCE(sqe->off);
+@@ -4710,7 +4715,7 @@ static int io_accept_prep(struct io_kioc
+ 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->len || sqe->buf_index)
++      if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->splice_fd_in)
+               return -EINVAL;
+ 
+       accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+@@ -4758,7 +4763,8 @@ static int io_connect_prep(struct io_kio
+ 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
++      if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
++          sqe->splice_fd_in)
+               return -EINVAL;
+ 
+       conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+@@ -5368,7 +5374,7 @@ static int io_poll_update_prep(struct io
+ 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->buf_index)
++      if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+               return -EINVAL;
+       flags = READ_ONCE(sqe->len);
+       if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
+@@ -5603,7 +5609,7 @@ static int io_timeout_remove_prep(struct
+               return -EINVAL;
+       if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->buf_index || sqe->len)
++      if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
+               return -EINVAL;
+ 
+       tr->addr = READ_ONCE(sqe->addr);
+@@ -5662,7 +5668,8 @@ static int io_timeout_prep(struct io_kio
+ 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->buf_index || sqe->len != 1)
++      if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
++          sqe->splice_fd_in)
+               return -EINVAL;
+       if (off && is_timeout_link)
+               return -EINVAL;
+@@ -5811,7 +5818,8 @@ static int io_async_cancel_prep(struct i
+               return -EINVAL;
+       if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags)
++      if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
++          sqe->splice_fd_in)
+               return -EINVAL;
+ 
+       req->cancel.addr = READ_ONCE(sqe->addr);
+@@ -5868,7 +5876,7 @@ static int io_rsrc_update_prep(struct io
+ {
+       if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+               return -EINVAL;
+-      if (sqe->ioprio || sqe->rw_flags)
++      if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+               return -EINVAL;
+ 
+       req->rsrc_update.offset = READ_ONCE(sqe->off);
diff --git a/queue-5.13/io_uring-fail-links-of-cancelled-timeouts.patch b/queue-5.13/io_uring-fail-links-of-cancelled-timeouts.patch

new file mode 100644 (file)

index 0000000..d103281
--- /dev/null
+++ b/queue-5.13/io_uring-fail-links-of-cancelled-timeouts.patch
@@ -0,0 +1,33 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 13 Sep 2021 09:27:44 -0600
+Subject: io_uring: fail links of cancelled timeouts
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 2ae2eb9dde18979b40629dd413b9adbd6c894cdf upstream.
+
+When we cancel a timeout we should mark it with REQ_F_FAIL, so
+linked requests are cancelled as well, but not queued for further
+execution.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/fff625b44eeced3a5cae79f60e6acf3fbdf8f990.1631192135.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1307,6 +1307,8 @@ static void io_kill_timeout(struct io_ki
+       struct io_timeout_data *io = req->async_data;
+ 
+       if (hrtimer_try_to_cancel(&io->timer) != -1) {
++              if (status)
++                      req_set_fail_links(req);
+               atomic_set(&req->ctx->cq_timeouts,
+                       atomic_read(&req->ctx->cq_timeouts) + 1);
+               list_del_init(&req->timeout.list);
diff --git a/queue-5.13/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch b/queue-5.13/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch

new file mode 100644 (file)

index 0000000..00e8294
--- /dev/null
+++ b/queue-5.13/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch
@@ -0,0 +1,62 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 13 Sep 2021 09:18:44 -0600
+Subject: io_uring: fix io_try_cancel_userdata race for iowq
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit dadebc350da2bef62593b1df007a6e0b90baf42a upstream.
+
+WARNING: CPU: 1 PID: 5870 at fs/io_uring.c:5975 io_try_cancel_userdata+0x30f/0x540 fs/io_uring.c:5975
+CPU: 0 PID: 5870 Comm: iou-wrk-5860 Not tainted 5.14.0-rc6-next-20210820-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:io_try_cancel_userdata+0x30f/0x540 fs/io_uring.c:5975
+Call Trace:
+ io_async_cancel fs/io_uring.c:6014 [inline]
+ io_issue_sqe+0x22d5/0x65a0 fs/io_uring.c:6407
+ io_wq_submit_work+0x1dc/0x300 fs/io_uring.c:6511
+ io_worker_handle_work+0xa45/0x1840 fs/io-wq.c:533
+ io_wqe_worker+0x2cc/0xbb0 fs/io-wq.c:582
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
+
+io_try_cancel_userdata() can be called from io_async_cancel() executing
+in the io-wq context, so the warning fires, which is there to alert
+anyone accessing task->io_uring->io_wq in a racy way. However,
+io_wq_put_and_exit() always first waits for all threads to complete,
+so the only detail left is to zero tctx->io_wq after the context is
+removed.
+
+note: one little assumption is that when IO_WQ_WORK_CANCEL, the executor
+won't touch ->io_wq, because io_wq_destroy() might cancel left pending
+requests in such a way.
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+b0c9d1588ae92866515f@syzkaller.appspotmail.com
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/dfdd37a80cfa9ffd3e59538929c99cdd55d8699e.1629721757.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6289,6 +6289,7 @@ static void io_wq_submit_work(struct io_
+       if (timeout)
+               io_queue_linked_timeout(timeout);
+ 
++      /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
+       if (work->flags & IO_WQ_WORK_CANCEL)
+               ret = -ECANCELED;
+ 
+@@ -9098,8 +9099,8 @@ static void io_uring_clean_tctx(struct i
+                * Must be after io_uring_del_task_file() (removes nodes under
+                * uring_lock) to avoid race with io_uring_try_cancel_iowq().
+                */
+-              tctx->io_wq = NULL;
+               io_wq_put_and_exit(wq);
++              tctx->io_wq = NULL;
+       }
+ }
+ 
diff --git a/queue-5.13/io_uring-place-fixed-tables-under-memcg-limits.patch b/queue-5.13/io_uring-place-fixed-tables-under-memcg-limits.patch

new file mode 100644 (file)

index 0000000..09befb0
--- /dev/null
+++ b/queue-5.13/io_uring-place-fixed-tables-under-memcg-limits.patch
@@ -0,0 +1,55 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 13 Sep 2021 09:13:30 -0600
+Subject: io_uring: place fixed tables under memcg limits
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 0bea96f59ba40e63c0ae93ad6a02417b95f22f4d upstream.
+
+Fixed tables may be large enough, place all of them together with
+allocated tags under memcg limits.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/b3ac9f5da9821bb59837b5fe25e8ef4be982218c.1629451684.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -7195,11 +7195,11 @@ static struct io_rsrc_data *io_rsrc_data
+ {
+       struct io_rsrc_data *data;
+ 
+-      data = kzalloc(sizeof(*data), GFP_KERNEL);
++      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
+       if (!data)
+               return NULL;
+ 
+-      data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL);
++      data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL_ACCOUNT);
+       if (!data->tags) {
+               kfree(data);
+               return NULL;
+@@ -7477,7 +7477,7 @@ static bool io_alloc_file_tables(struct
+ {
+       unsigned i, nr_tables = DIV_ROUND_UP(nr_files, IORING_MAX_FILES_TABLE);
+ 
+-      table->files = kcalloc(nr_tables, sizeof(*table->files), GFP_KERNEL);
++      table->files = kcalloc(nr_tables, sizeof(*table->files), GFP_KERNEL_ACCOUNT);
+       if (!table->files)
+               return false;
+ 
+@@ -7485,7 +7485,7 @@ static bool io_alloc_file_tables(struct
+               unsigned int this_files = min(nr_files, IORING_MAX_FILES_TABLE);
+ 
+               table->files[i] = kcalloc(this_files, sizeof(*table->files[i]),
+-                                      GFP_KERNEL);
++                                      GFP_KERNEL_ACCOUNT);
+               if (!table->files[i])
+                       break;
+               nr_files -= this_files;
diff --git a/queue-5.13/series b/queue-5.13/series

index b0b633726bcc1de9c8b49c6f8f49debe9f55c548..eb6e51452f7b861897677cc1dcbe95f7b2c2adbc 100644 (file)
--- a/queue-5.13/series
+++ b/queue-5.13/series
@@ -48,3 +48,9 @@ s390-qdio-fix-roll-back-after-timeout-on-establish-ccw.patch
  s390-qdio-cancel-the-establish-ccw-after-timeout.patch
  revert-dmaengine-imx-sdma-refine-to-load-context-only-once.patch
  dmaengine-imx-sdma-remove-duplicated-sdma_load_context.patch
+io_uring-place-fixed-tables-under-memcg-limits.patch
+io_uring-add-splice_fd_in-checks.patch
+io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch
+io-wq-fix-wakeup-race-when-adding-new-work.patch
+io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch
+io_uring-fail-links-of-cancelled-timeouts.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 15 Sep 2021 13:04:14 +0000 (15:04 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 15 Sep 2021 13:04:14 +0000 (15:04 +0200)
queue-5.13/io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/io-wq-fix-wakeup-race-when-adding-new-work.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/io_uring-add-splice_fd_in-checks.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/io_uring-fail-links-of-cancelled-timeouts.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/io_uring-place-fixed-tables-under-memcg-limits.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/series		patch \| blob \| blame \| history