From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 15 Sep 2021 13:04:14 +0000 (+0200)
Subject: 5.13-stable patches
X-Git-Tag: v5.14.5~37
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=88139de72d103a0759d1a13e91867cd5e9792f07;p=thirdparty%2Fkernel%2Fstable-queue.git

5.13-stable patches

added patches:
	io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch
	io-wq-fix-wakeup-race-when-adding-new-work.patch
	io_uring-add-splice_fd_in-checks.patch
	io_uring-fail-links-of-cancelled-timeouts.patch
	io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch
	io_uring-place-fixed-tables-under-memcg-limits.patch
---

diff --git a/queue-5.13/io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch b/queue-5.13/io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch
new file mode 100644
index 00000000000..ecdb56c57d3
--- /dev/null
+++ b/queue-5.13/io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch
@@ -0,0 +1,111 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 13 Sep 2021 09:24:07 -0600
+Subject: io-wq: fix race between adding work and activating a free worker
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 94ffb0a282872c2f4b14f757fa1aef2302aeaabb upstream.
+
+The attempt to find and activate a free worker for new work is currently
+combined with creating a new one if we don't find one, but that opens
+io-wq up to a race where the worker that is found and activated can
+put itself to sleep without knowing that it has been selected to perform
+this new work.
+
+Fix this by moving the activation into where we add the new work item,
+then we can retain it within the wqe->lock scope and elimiate the race
+with the worker itself checking inside the lock, but sleeping outside of
+it.
+
+Cc: stable@vger.kernel.org
+Reported-by: Andres Freund <andres@anarazel.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |   50 ++++++++++++++++++++++++--------------------------
+ 1 file changed, 24 insertions(+), 26 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -237,9 +237,9 @@ static bool io_wqe_activate_free_worker(
+  * We need a worker. If we find a free one, we're good. If not, and we're
+  * below the max number of workers, create one.
+  */
+-static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
++static void io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
+ {
+-	bool ret;
++	bool do_create = false, first = false;
+ 
+ 	/*
+ 	 * Most likely an attempt to queue unbounded work on an io_wq that
+@@ -248,25 +248,18 @@ static void io_wqe_wake_worker(struct io
+ 	if (unlikely(!acct->max_workers))
+ 		pr_warn_once("io-wq is not configured for unbound workers");
+ 
+-	rcu_read_lock();
+-	ret = io_wqe_activate_free_worker(wqe);
+-	rcu_read_unlock();
+-
+-	if (!ret) {
+-		bool do_create = false, first = false;
+-
+-		raw_spin_lock_irq(&wqe->lock);
+-		if (acct->nr_workers < acct->max_workers) {
+-			atomic_inc(&acct->nr_running);
+-			atomic_inc(&wqe->wq->worker_refs);
+-			if (!acct->nr_workers)
+-				first = true;
+-			acct->nr_workers++;
+-			do_create = true;
+-		}
+-		raw_spin_unlock_irq(&wqe->lock);
+-		if (do_create)
+-			create_io_worker(wqe->wq, wqe, acct->index, first);
++	raw_spin_lock_irq(&wqe->lock);
++	if (acct->nr_workers < acct->max_workers) {
++		if (!acct->nr_workers)
++			first = true;
++		acct->nr_workers++;
++		do_create = true;
++	}
++	raw_spin_unlock_irq(&wqe->lock);
++	if (do_create) {
++		atomic_inc(&acct->nr_running);
++		atomic_inc(&wqe->wq->worker_refs);
++		create_io_worker(wqe->wq, wqe, acct->index, first);
+ 	}
+ }
+ 
+@@ -798,7 +791,8 @@ append:
+ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+ {
+ 	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+-	bool do_wake;
++	unsigned work_flags = work->flags;
++	bool do_create;
+ 	unsigned long flags;
+ 
+ 	/*
+@@ -814,12 +808,16 @@ static void io_wqe_enqueue(struct io_wqe
+ 	raw_spin_lock_irqsave(&wqe->lock, flags);
+ 	io_wqe_insert_work(wqe, work);
+ 	wqe->flags &= ~IO_WQE_FLAG_STALLED;
+-	do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
+-			!atomic_read(&acct->nr_running);
++
++	rcu_read_lock();
++	do_create = !io_wqe_activate_free_worker(wqe);
++	rcu_read_unlock();
++
+ 	raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ 
+-	if (do_wake)
+-		io_wqe_wake_worker(wqe, acct);
++	if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
++	    !atomic_read(&acct->nr_running)))
++		io_wqe_create_worker(wqe, acct);
+ }
+ 
+ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
diff --git a/queue-5.13/io-wq-fix-wakeup-race-when-adding-new-work.patch b/queue-5.13/io-wq-fix-wakeup-race-when-adding-new-work.patch
new file mode 100644
index 00000000000..cab2a5b4430
--- /dev/null
+++ b/queue-5.13/io-wq-fix-wakeup-race-when-adding-new-work.patch
@@ -0,0 +1,64 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 13 Sep 2021 09:20:44 -0600
+Subject: io-wq: fix wakeup race when adding new work
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 87df7fb922d18e96992aa5e824aa34b2065fef59 upstream.
+
+When new work is added, io_wqe_enqueue() checks if we need to wake or
+create a new worker. But that check is done outside the lock that
+otherwise synchronizes us with a worker going to sleep, so we can end
+up in the following situation:
+
+CPU0				CPU1
+lock
+insert work
+unlock
+atomic_read(nr_running) != 0
+				lock
+				atomic_dec(nr_running)
+no wakeup needed
+
+Hold the wqe lock around the "need to wakeup" check. Then we can also get
+rid of the temporary work_flags variable, as we know the work will remain
+valid as long as we hold the lock.
+
+Cc: stable@vger.kernel.org
+Reported-by: Andres Freund <andres@anarazel.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -798,7 +798,7 @@ append:
+ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+ {
+ 	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+-	int work_flags;
++	bool do_wake;
+ 	unsigned long flags;
+ 
+ 	/*
+@@ -811,14 +811,14 @@ static void io_wqe_enqueue(struct io_wqe
+ 		return;
+ 	}
+ 
+-	work_flags = work->flags;
+ 	raw_spin_lock_irqsave(&wqe->lock, flags);
+ 	io_wqe_insert_work(wqe, work);
+ 	wqe->flags &= ~IO_WQE_FLAG_STALLED;
++	do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
++			!atomic_read(&acct->nr_running);
+ 	raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ 
+-	if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
+-	    !atomic_read(&acct->nr_running))
++	if (do_wake)
+ 		io_wqe_wake_worker(wqe, acct);
+ }
+ 
diff --git a/queue-5.13/io_uring-add-splice_fd_in-checks.patch b/queue-5.13/io_uring-add-splice_fd_in-checks.patch
new file mode 100644
index 00000000000..2417bb310ab
--- /dev/null
+++ b/queue-5.13/io_uring-add-splice_fd_in-checks.patch
@@ -0,0 +1,224 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 13 Sep 2021 09:17:19 -0600
+Subject: io_uring: add ->splice_fd_in checks
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 26578cda3db983b17cabe4e577af26306beb9987 upstream.
+
+->splice_fd_in is used only by splice/tee, but no other request checks
+it for validity. Add the check for most of request types excluding
+reads/writes/sends/recvs, we don't want overhead for them and can leave
+them be as is until the field is actually used.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/f44bc2acd6777d932de3d71a5692235b5b2b7397.1629451684.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   52 ++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 30 insertions(+), 22 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -3474,7 +3474,7 @@ static int io_renameat_prep(struct io_ki
+ 
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->buf_index)
++	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ 		return -EBADF;
+@@ -3525,7 +3525,8 @@ static int io_unlinkat_prep(struct io_ki
+ 
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
++	if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
++	    sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ 		return -EBADF;
+@@ -3571,8 +3572,8 @@ static int io_shutdown_prep(struct io_ki
+ #if defined(CONFIG_NET)
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+-	    sqe->buf_index)
++	if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
++		     sqe->buf_index || sqe->splice_fd_in))
+ 		return -EINVAL;
+ 
+ 	req->shutdown.how = READ_ONCE(sqe->len);
+@@ -3720,7 +3721,8 @@ static int io_fsync_prep(struct io_kiocb
+ 
+ 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
++	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
++		     sqe->splice_fd_in))
+ 		return -EINVAL;
+ 
+ 	req->sync.flags = READ_ONCE(sqe->fsync_flags);
+@@ -3753,7 +3755,8 @@ static int io_fsync(struct io_kiocb *req
+ static int io_fallocate_prep(struct io_kiocb *req,
+ 			     const struct io_uring_sqe *sqe)
+ {
+-	if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
++	if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
++	    sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+@@ -3784,7 +3787,7 @@ static int __io_openat_prep(struct io_ki
+ 	const char __user *fname;
+ 	int ret;
+ 
+-	if (unlikely(sqe->ioprio || sqe->buf_index))
++	if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in))
+ 		return -EINVAL;
+ 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ 		return -EBADF;
+@@ -3909,7 +3912,8 @@ static int io_remove_buffers_prep(struct
+ 	struct io_provide_buf *p = &req->pbuf;
+ 	u64 tmp;
+ 
+-	if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off)
++	if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
++	    sqe->splice_fd_in)
+ 		return -EINVAL;
+ 
+ 	tmp = READ_ONCE(sqe->fd);
+@@ -3980,7 +3984,7 @@ static int io_provide_buffers_prep(struc
+ 	struct io_provide_buf *p = &req->pbuf;
+ 	u64 tmp;
+ 
+-	if (sqe->ioprio || sqe->rw_flags)
++	if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 
+ 	tmp = READ_ONCE(sqe->fd);
+@@ -4067,7 +4071,7 @@ static int io_epoll_ctl_prep(struct io_k
+ 			     const struct io_uring_sqe *sqe)
+ {
+ #if defined(CONFIG_EPOLL)
+-	if (sqe->ioprio || sqe->buf_index)
++	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+@@ -4113,7 +4117,7 @@ static int io_epoll_ctl(struct io_kiocb
+ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ {
+ #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
+-	if (sqe->ioprio || sqe->buf_index || sqe->off)
++	if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+@@ -4148,7 +4152,7 @@ static int io_madvise(struct io_kiocb *r
+ 
+ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ {
+-	if (sqe->ioprio || sqe->buf_index || sqe->addr)
++	if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+@@ -4186,7 +4190,7 @@ static int io_statx_prep(struct io_kiocb
+ {
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->buf_index)
++	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (req->flags & REQ_F_FIXED_FILE)
+ 		return -EBADF;
+@@ -4222,7 +4226,7 @@ static int io_close_prep(struct io_kiocb
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+ 	if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
+-	    sqe->rw_flags || sqe->buf_index)
++	    sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (req->flags & REQ_F_FIXED_FILE)
+ 		return -EBADF;
+@@ -4283,7 +4287,8 @@ static int io_sfr_prep(struct io_kiocb *
+ 
+ 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
++	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
++		     sqe->splice_fd_in))
+ 		return -EINVAL;
+ 
+ 	req->sync.off = READ_ONCE(sqe->off);
+@@ -4710,7 +4715,7 @@ static int io_accept_prep(struct io_kioc
+ 
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->len || sqe->buf_index)
++	if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 
+ 	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+@@ -4758,7 +4763,8 @@ static int io_connect_prep(struct io_kio
+ 
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
++	if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
++	    sqe->splice_fd_in)
+ 		return -EINVAL;
+ 
+ 	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+@@ -5368,7 +5374,7 @@ static int io_poll_update_prep(struct io
+ 
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->buf_index)
++	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	flags = READ_ONCE(sqe->len);
+ 	if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
+@@ -5603,7 +5609,7 @@ static int io_timeout_remove_prep(struct
+ 		return -EINVAL;
+ 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->buf_index || sqe->len)
++	if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 
+ 	tr->addr = READ_ONCE(sqe->addr);
+@@ -5662,7 +5668,8 @@ static int io_timeout_prep(struct io_kio
+ 
+ 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->buf_index || sqe->len != 1)
++	if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
++	    sqe->splice_fd_in)
+ 		return -EINVAL;
+ 	if (off && is_timeout_link)
+ 		return -EINVAL;
+@@ -5811,7 +5818,8 @@ static int io_async_cancel_prep(struct i
+ 		return -EINVAL;
+ 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags)
++	if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
++	    sqe->splice_fd_in)
+ 		return -EINVAL;
+ 
+ 	req->cancel.addr = READ_ONCE(sqe->addr);
+@@ -5868,7 +5876,7 @@ static int io_rsrc_update_prep(struct io
+ {
+ 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+ 		return -EINVAL;
+-	if (sqe->ioprio || sqe->rw_flags)
++	if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+ 		return -EINVAL;
+ 
+ 	req->rsrc_update.offset = READ_ONCE(sqe->off);
diff --git a/queue-5.13/io_uring-fail-links-of-cancelled-timeouts.patch b/queue-5.13/io_uring-fail-links-of-cancelled-timeouts.patch
new file mode 100644
index 00000000000..d103281c49b
--- /dev/null
+++ b/queue-5.13/io_uring-fail-links-of-cancelled-timeouts.patch
@@ -0,0 +1,33 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 13 Sep 2021 09:27:44 -0600
+Subject: io_uring: fail links of cancelled timeouts
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 2ae2eb9dde18979b40629dd413b9adbd6c894cdf upstream.
+
+When we cancel a timeout we should mark it with REQ_F_FAIL, so
+linked requests are cancelled as well, but not queued for further
+execution.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/fff625b44eeced3a5cae79f60e6acf3fbdf8f990.1631192135.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1307,6 +1307,8 @@ static void io_kill_timeout(struct io_ki
+ 	struct io_timeout_data *io = req->async_data;
+ 
+ 	if (hrtimer_try_to_cancel(&io->timer) != -1) {
++		if (status)
++			req_set_fail_links(req);
+ 		atomic_set(&req->ctx->cq_timeouts,
+ 			atomic_read(&req->ctx->cq_timeouts) + 1);
+ 		list_del_init(&req->timeout.list);
diff --git a/queue-5.13/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch b/queue-5.13/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch
new file mode 100644
index 00000000000..00e8294c366
--- /dev/null
+++ b/queue-5.13/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch
@@ -0,0 +1,62 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 13 Sep 2021 09:18:44 -0600
+Subject: io_uring: fix io_try_cancel_userdata race for iowq
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit dadebc350da2bef62593b1df007a6e0b90baf42a upstream.
+
+WARNING: CPU: 1 PID: 5870 at fs/io_uring.c:5975 io_try_cancel_userdata+0x30f/0x540 fs/io_uring.c:5975
+CPU: 0 PID: 5870 Comm: iou-wrk-5860 Not tainted 5.14.0-rc6-next-20210820-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:io_try_cancel_userdata+0x30f/0x540 fs/io_uring.c:5975
+Call Trace:
+ io_async_cancel fs/io_uring.c:6014 [inline]
+ io_issue_sqe+0x22d5/0x65a0 fs/io_uring.c:6407
+ io_wq_submit_work+0x1dc/0x300 fs/io_uring.c:6511
+ io_worker_handle_work+0xa45/0x1840 fs/io-wq.c:533
+ io_wqe_worker+0x2cc/0xbb0 fs/io-wq.c:582
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
+
+io_try_cancel_userdata() can be called from io_async_cancel() executing
+in the io-wq context, so the warning fires, which is there to alert
+anyone accessing task->io_uring->io_wq in a racy way. However,
+io_wq_put_and_exit() always first waits for all threads to complete,
+so the only detail left is to zero tctx->io_wq after the context is
+removed.
+
+note: one little assumption is that when IO_WQ_WORK_CANCEL, the executor
+won't touch ->io_wq, because io_wq_destroy() might cancel left pending
+requests in such a way.
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+b0c9d1588ae92866515f@syzkaller.appspotmail.com
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/dfdd37a80cfa9ffd3e59538929c99cdd55d8699e.1629721757.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6289,6 +6289,7 @@ static void io_wq_submit_work(struct io_
+ 	if (timeout)
+ 		io_queue_linked_timeout(timeout);
+ 
++	/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
+ 	if (work->flags & IO_WQ_WORK_CANCEL)
+ 		ret = -ECANCELED;
+ 
+@@ -9098,8 +9099,8 @@ static void io_uring_clean_tctx(struct i
+ 		 * Must be after io_uring_del_task_file() (removes nodes under
+ 		 * uring_lock) to avoid race with io_uring_try_cancel_iowq().
+ 		 */
+-		tctx->io_wq = NULL;
+ 		io_wq_put_and_exit(wq);
++		tctx->io_wq = NULL;
+ 	}
+ }
+ 
diff --git a/queue-5.13/io_uring-place-fixed-tables-under-memcg-limits.patch b/queue-5.13/io_uring-place-fixed-tables-under-memcg-limits.patch
new file mode 100644
index 00000000000..09befb02fb2
--- /dev/null
+++ b/queue-5.13/io_uring-place-fixed-tables-under-memcg-limits.patch
@@ -0,0 +1,55 @@
+From foo@baz Wed Sep 15 03:01:54 PM CEST 2021
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 13 Sep 2021 09:13:30 -0600
+Subject: io_uring: place fixed tables under memcg limits
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 0bea96f59ba40e63c0ae93ad6a02417b95f22f4d upstream.
+
+Fixed tables may be large enough, place all of them together with
+allocated tags under memcg limits.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/b3ac9f5da9821bb59837b5fe25e8ef4be982218c.1629451684.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -7195,11 +7195,11 @@ static struct io_rsrc_data *io_rsrc_data
+ {
+ 	struct io_rsrc_data *data;
+ 
+-	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
+ 	if (!data)
+ 		return NULL;
+ 
+-	data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL);
++	data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL_ACCOUNT);
+ 	if (!data->tags) {
+ 		kfree(data);
+ 		return NULL;
+@@ -7477,7 +7477,7 @@ static bool io_alloc_file_tables(struct
+ {
+ 	unsigned i, nr_tables = DIV_ROUND_UP(nr_files, IORING_MAX_FILES_TABLE);
+ 
+-	table->files = kcalloc(nr_tables, sizeof(*table->files), GFP_KERNEL);
++	table->files = kcalloc(nr_tables, sizeof(*table->files), GFP_KERNEL_ACCOUNT);
+ 	if (!table->files)
+ 		return false;
+ 
+@@ -7485,7 +7485,7 @@ static bool io_alloc_file_tables(struct
+ 		unsigned int this_files = min(nr_files, IORING_MAX_FILES_TABLE);
+ 
+ 		table->files[i] = kcalloc(this_files, sizeof(*table->files[i]),
+-					GFP_KERNEL);
++					GFP_KERNEL_ACCOUNT);
+ 		if (!table->files[i])
+ 			break;
+ 		nr_files -= this_files;
diff --git a/queue-5.13/series b/queue-5.13/series
index b0b633726bc..eb6e51452f7 100644
--- a/queue-5.13/series
+++ b/queue-5.13/series
@@ -48,3 +48,9 @@ s390-qdio-fix-roll-back-after-timeout-on-establish-ccw.patch
 s390-qdio-cancel-the-establish-ccw-after-timeout.patch
 revert-dmaengine-imx-sdma-refine-to-load-context-only-once.patch
 dmaengine-imx-sdma-remove-duplicated-sdma_load_context.patch
+io_uring-place-fixed-tables-under-memcg-limits.patch
+io_uring-add-splice_fd_in-checks.patch
+io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch
+io-wq-fix-wakeup-race-when-adding-new-work.patch
+io-wq-fix-race-between-adding-work-and-activating-a-free-worker.patch
+io_uring-fail-links-of-cancelled-timeouts.patch