From: Greg Kroah-Hartman Date: Wed, 28 Dec 2022 13:14:01 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v5.15.86~26 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=be22245524cd2c26ede3deabd49bd89df3378f9f;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: blk-iolatency-fix-memory-leak-on-add_disk-failures.patch btrfs-do-not-bug_on-on-enomem-when-dropping-extent-items-for-a-range.patch fbdev-fbcon-release-buffer-when-fbcon_do_set_font-failed.patch gcov-add-support-for-checksum-field.patch io_uring-add-completion-locking-for-iopoll.patch io_uring-dont-remove-file-from-msg_ring-reqs.patch io_uring-improve-io_double_lock_ctx-fail-handling.patch io_uring-net-ensure-compat-import-handlers-clear-free_iov.patch io_uring-net-fix-cleanup-after-recycle.patch io_uring-net-introduce-ioring_send_zc_report_usage-flag.patch io_uring-pass-in-epoll_uring_wake-for-eventfd-signaling-and-wakeups.patch io_uring-protect-cq_timeouts-with-timeout_lock.patch maple_tree-fix-mas_spanning_rebalance-on-insufficient-data.patch mm-gup-disallow-foll_force-foll_write-on-hugetlb-mappings.patch ovl-fix-use-inode-directly-in-rcu-walk-mode.patch scsi-qla2xxx-fix-crash-when-i-o-abort-times-out.patch test_maple_tree-add-test-for-mas_spanning_rebalance-on-insufficient-data.patch --- diff --git a/queue-6.1/blk-iolatency-fix-memory-leak-on-add_disk-failures.patch b/queue-6.1/blk-iolatency-fix-memory-leak-on-add_disk-failures.patch new file mode 100644 index 00000000000..ee31a768b65 --- /dev/null +++ b/queue-6.1/blk-iolatency-fix-memory-leak-on-add_disk-failures.patch @@ -0,0 +1,60 @@ +From 813e693023ba10da9e75067780f8378465bf27cc Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Sat, 10 Dec 2022 08:33:10 -1000 +Subject: blk-iolatency: Fix memory leak on add_disk() failures + +From: Tejun Heo + +commit 813e693023ba10da9e75067780f8378465bf27cc upstream. + +When a gendisk is successfully initialized but add_disk() fails such as when +a loop device has invalid number of minor device numbers specified, +blkcg_init_disk() is called during init and then blkcg_exit_disk() during +error handling. Unfortunately, iolatency gets initialized in the former but +doesn't get cleaned up in the latter. + +This is because, in non-error cases, the cleanup is performed by +del_gendisk() calling rq_qos_exit(), the assumption being that rq_qos +policies, iolatency being one of them, can only be activated once the disk +is fully registered and visible. That assumption is true for wbt and iocost, +but not so for iolatency as it gets initialized before add_disk() is called. + +It is desirable to lazy-init rq_qos policies because they are optional +features and add to hot path overhead once initialized - each IO has to walk +all the registered rq_qos policies. So, we want to switch iolatency to lazy +init too. However, that's a bigger change. As a fix for the immediate +problem, let's just add an extra call to rq_qos_exit() in blkcg_exit_disk(). +This is safe because duplicate calls to rq_qos_exit() become noop's. + +Signed-off-by: Tejun Heo +Reported-by: darklight2357@icloud.com +Cc: Josef Bacik +Cc: Linus Torvalds +Fixes: d70675121546 ("block: introduce blk-iolatency io controller") +Cc: stable@vger.kernel.org # v4.19+ +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/Y5TQ5gm3O4HXrXR3@slm.duckdns.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-cgroup.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -33,6 +33,7 @@ + #include "blk-cgroup.h" + #include "blk-ioprio.h" + #include "blk-throttle.h" ++#include "blk-rq-qos.h" + + /* + * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. +@@ -1275,6 +1276,7 @@ err_unlock: + void blkcg_exit_disk(struct gendisk *disk) + { + blkg_destroy_all(disk); ++ rq_qos_exit(disk->queue); + blk_throtl_exit(disk); + } + diff --git a/queue-6.1/btrfs-do-not-bug_on-on-enomem-when-dropping-extent-items-for-a-range.patch b/queue-6.1/btrfs-do-not-bug_on-on-enomem-when-dropping-extent-items-for-a-range.patch new file mode 100644 index 00000000000..562999dd520 --- /dev/null +++ b/queue-6.1/btrfs-do-not-bug_on-on-enomem-when-dropping-extent-items-for-a-range.patch @@ -0,0 +1,64 @@ +From 162d053e15fe985f754ef495a96eb3db970c43ed Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 28 Nov 2022 15:07:30 +0000 +Subject: btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range + +From: Filipe Manana + +commit 162d053e15fe985f754ef495a96eb3db970c43ed upstream. + +If we get -ENOMEM while dropping file extent items in a given range, at +btrfs_drop_extents(), due to failure to allocate memory when attempting to +increment the reference count for an extent or drop the reference count, +we handle it with a BUG_ON(). This is excessive, instead we can simply +abort the transaction and return the error to the caller. In fact most +callers of btrfs_drop_extents(), directly or indirectly, already abort +the transaction if btrfs_drop_extents() returns any error. + +Also, we already have error paths at btrfs_drop_extents() that may return +-ENOMEM and in those cases we abort the transaction, like for example +anything that changes the b+tree may return -ENOMEM due to a failure to +allocate a new extent buffer when COWing an existing extent buffer, such +as a call to btrfs_duplicate_item() for example. + +So replace the BUG_ON() calls with proper logic to abort the transaction +and return the error. + +Reported-by: syzbot+0b1fb6b0108c27419f9f@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/00000000000089773e05ee4b9cb4@google.com/ +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/file.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -696,7 +696,10 @@ next_slot: + args->start - extent_offset, + 0, false); + ret = btrfs_inc_extent_ref(trans, &ref); +- BUG_ON(ret); /* -ENOMEM */ ++ if (ret) { ++ btrfs_abort_transaction(trans, ret); ++ break; ++ } + } + key.offset = args->start; + } +@@ -783,7 +786,10 @@ delete_extent_item: + key.offset - extent_offset, 0, + false); + ret = btrfs_free_extent(trans, &ref); +- BUG_ON(ret); /* -ENOMEM */ ++ if (ret) { ++ btrfs_abort_transaction(trans, ret); ++ break; ++ } + args->bytes_found += extent_end - key.offset; + } + diff --git a/queue-6.1/fbdev-fbcon-release-buffer-when-fbcon_do_set_font-failed.patch b/queue-6.1/fbdev-fbcon-release-buffer-when-fbcon_do_set_font-failed.patch new file mode 100644 index 00000000000..182251e37ef --- /dev/null +++ b/queue-6.1/fbdev-fbcon-release-buffer-when-fbcon_do_set_font-failed.patch @@ -0,0 +1,38 @@ +From 3c3bfb8586f848317ceba5d777e11204ba3e5758 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Tue, 6 Dec 2022 07:10:31 +0900 +Subject: fbdev: fbcon: release buffer when fbcon_do_set_font() failed + +From: Tetsuo Handa + +commit 3c3bfb8586f848317ceba5d777e11204ba3e5758 upstream. + +syzbot is reporting memory leak at fbcon_do_set_font() [1], for +commit a5a923038d70 ("fbdev: fbcon: Properly revert changes when +vc_resize() failed") missed that the buffer might be newly allocated +by fbcon_set_font(). + +Link: https://syzkaller.appspot.com/bug?extid=25bdb7b1703639abd498 [1] +Reported-by: syzbot +Signed-off-by: Tetsuo Handa +Tested-by: syzbot +Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed") +CC: stable@vger.kernel.org # 5.15+ +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/video/fbdev/core/fbcon.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/video/fbdev/core/fbcon.c ++++ b/drivers/video/fbdev/core/fbcon.c +@@ -2450,7 +2450,8 @@ err_out: + + if (userfont) { + p->userfont = old_userfont; +- REFCOUNT(data)--; ++ if (--REFCOUNT(data) == 0) ++ kfree(data - FONT_EXTRA_WORDS * sizeof(int)); + } + + vc->vc_font.width = old_width; diff --git a/queue-6.1/gcov-add-support-for-checksum-field.patch b/queue-6.1/gcov-add-support-for-checksum-field.patch new file mode 100644 index 00000000000..e5d06496330 --- /dev/null +++ b/queue-6.1/gcov-add-support-for-checksum-field.patch @@ -0,0 +1,49 @@ +From e96b95c2b7a63a454b6498e2df67aac14d046d13 Mon Sep 17 00:00:00 2001 +From: Rickard x Andersson +Date: Tue, 20 Dec 2022 11:23:18 +0100 +Subject: gcov: add support for checksum field + +From: Rickard x Andersson + +commit e96b95c2b7a63a454b6498e2df67aac14d046d13 upstream. + +In GCC version 12.1 a checksum field was added. + +This patch fixes a kernel crash occurring during boot when using +gcov-kernel with GCC version 12.2. The crash occurred on a system running +on i.MX6SX. + +Link: https://lkml.kernel.org/r/20221220102318.3418501-1-rickaran@axis.com +Fixes: 977ef30a7d88 ("gcov: support GCC 12.1 and newer compilers") +Signed-off-by: Rickard x Andersson +Reviewed-by: Peter Oberparleiter +Tested-by: Peter Oberparleiter +Reviewed-by: Martin Liska +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/gcov/gcc_4_7.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/kernel/gcov/gcc_4_7.c ++++ b/kernel/gcov/gcc_4_7.c +@@ -82,6 +82,7 @@ struct gcov_fn_info { + * @version: gcov version magic indicating the gcc version used for compilation + * @next: list head for a singly-linked list + * @stamp: uniquifying time stamp ++ * @checksum: unique object checksum + * @filename: name of the associated gcov data file + * @merge: merge functions (null for unused counter type) + * @n_functions: number of instrumented functions +@@ -94,6 +95,10 @@ struct gcov_info { + unsigned int version; + struct gcov_info *next; + unsigned int stamp; ++ /* Since GCC 12.1 a checksum field is added. */ ++#if (__GNUC__ >= 12) ++ unsigned int checksum; ++#endif + const char *filename; + void (*merge[GCOV_COUNTERS])(gcov_type *, unsigned int); + unsigned int n_functions; diff --git a/queue-6.1/io_uring-add-completion-locking-for-iopoll.patch b/queue-6.1/io_uring-add-completion-locking-for-iopoll.patch new file mode 100644 index 00000000000..c6764c8a459 --- /dev/null +++ b/queue-6.1/io_uring-add-completion-locking-for-iopoll.patch @@ -0,0 +1,45 @@ +From 2ccc92f4effcfa1c51c4fcf1e34d769099d3cad4 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 23 Nov 2022 11:33:36 +0000 +Subject: io_uring: add completion locking for iopoll + +From: Pavel Begunkov + +commit 2ccc92f4effcfa1c51c4fcf1e34d769099d3cad4 upstream. + +There are pieces of code that may allow iopoll to race filling cqes, +temporarily add spinlocking around posting events. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/84d86b5c117feda075471c5c9e65208e0dccf5d0.1669203009.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/rw.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -1043,6 +1043,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx + else if (!pos) + return 0; + ++ spin_lock(&ctx->completion_lock); + prev = start; + wq_list_for_each_resume(pos, prev) { + struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); +@@ -1057,11 +1058,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx + req->cqe.flags = io_put_kbuf(req, 0); + __io_fill_cqe_req(req->ctx, req); + } +- ++ io_commit_cqring(ctx); ++ spin_unlock(&ctx->completion_lock); + if (unlikely(!nr_events)) + return 0; + +- io_commit_cqring(ctx); + io_cqring_ev_posted_iopoll(ctx); + pos = start ? start->next : ctx->iopoll_list.first; + wq_list_cut(&ctx->iopoll_list, prev, start); diff --git a/queue-6.1/io_uring-dont-remove-file-from-msg_ring-reqs.patch b/queue-6.1/io_uring-dont-remove-file-from-msg_ring-reqs.patch new file mode 100644 index 00000000000..39dbca22b35 --- /dev/null +++ b/queue-6.1/io_uring-dont-remove-file-from-msg_ring-reqs.patch @@ -0,0 +1,118 @@ +From ef0ec1ad03119b8b46b035dad42bca7d6da7c2e5 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 7 Dec 2022 03:53:26 +0000 +Subject: io_uring: dont remove file from msg_ring reqs + +From: Pavel Begunkov + +commit ef0ec1ad03119b8b46b035dad42bca7d6da7c2e5 upstream. + +We should not be messing with req->file outside of core paths. Clearing +it makes msg_ring non reentrant, i.e. luckily io_msg_send_fd() fails the +request on failed io_double_lock_ctx() but clearly was originally +intended to do retries instead. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/e5ac9edadb574fe33f6d727cb8f14ce68262a684.1670384893.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 +- + io_uring/msg_ring.c | 4 ---- + io_uring/opdef.c | 7 +++++++ + io_uring/opdef.h | 2 ++ + 4 files changed, 10 insertions(+), 5 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -1757,7 +1757,7 @@ static int io_issue_sqe(struct io_kiocb + return ret; + + /* If the op doesn't have a file, we're not polling for it */ +- if ((req->ctx->flags & IORING_SETUP_IOPOLL) && req->file) ++ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) + io_iopoll_req_issued(req, issue_flags); + + return 0; +--- a/io_uring/msg_ring.c ++++ b/io_uring/msg_ring.c +@@ -167,9 +167,5 @@ done: + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); +- /* put file to avoid an attempt to IOPOLL the req */ +- if (!(req->flags & REQ_F_FIXED_FILE)) +- io_put_file(req->file); +- req->file = NULL; + return IOU_OK; + } +--- a/io_uring/opdef.c ++++ b/io_uring/opdef.c +@@ -63,6 +63,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "READV", + .prep = io_prep_rw, +@@ -80,6 +81,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "WRITEV", + .prep = io_prep_rw, +@@ -103,6 +105,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "READ_FIXED", + .prep = io_prep_rw, +@@ -118,6 +121,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "WRITE_FIXED", + .prep = io_prep_rw, +@@ -277,6 +281,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "READ", + .prep = io_prep_rw, +@@ -292,6 +297,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "WRITE", + .prep = io_prep_rw, +@@ -481,6 +487,7 @@ const struct io_op_def io_op_defs[] = { + .plug = 1, + .name = "URING_CMD", + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = uring_cmd_pdu_size(1), + .prep = io_uring_cmd_prep, + .issue = io_uring_cmd, +--- a/io_uring/opdef.h ++++ b/io_uring/opdef.h +@@ -25,6 +25,8 @@ struct io_op_def { + unsigned ioprio : 1; + /* supports iopoll */ + unsigned iopoll : 1; ++ /* have to be put into the iopoll list */ ++ unsigned iopoll_queue : 1; + /* opcode specific path will handle ->async_data allocation if needed */ + unsigned manual_alloc : 1; + /* size of async data needed, if any */ diff --git a/queue-6.1/io_uring-improve-io_double_lock_ctx-fail-handling.patch b/queue-6.1/io_uring-improve-io_double_lock_ctx-fail-handling.patch new file mode 100644 index 00000000000..94136a0d7d0 --- /dev/null +++ b/queue-6.1/io_uring-improve-io_double_lock_ctx-fail-handling.patch @@ -0,0 +1,32 @@ +From 4c979eaefa4356d385b7c7d2877dc04d7fe88969 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 7 Dec 2022 03:53:27 +0000 +Subject: io_uring: improve io_double_lock_ctx fail handling + +From: Pavel Begunkov + +commit 4c979eaefa4356d385b7c7d2877dc04d7fe88969 upstream. + +msg_ring will fail the request if it can't lock rings, instead punt it +to io-wq as was originally intended. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/4697f05afcc37df5c8f89e2fe6d9c7c19f0241f9.1670384893.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/msg_ring.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/io_uring/msg_ring.c ++++ b/io_uring/msg_ring.c +@@ -164,6 +164,8 @@ int io_msg_ring(struct io_kiocb *req, un + } + + done: ++ if (ret == -EAGAIN) ++ return -EAGAIN; + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); diff --git a/queue-6.1/io_uring-net-ensure-compat-import-handlers-clear-free_iov.patch b/queue-6.1/io_uring-net-ensure-compat-import-handlers-clear-free_iov.patch new file mode 100644 index 00000000000..72291a74e67 --- /dev/null +++ b/queue-6.1/io_uring-net-ensure-compat-import-handlers-clear-free_iov.patch @@ -0,0 +1,32 @@ +From 990a4de57e44f4f4cfc33c90d2ec5d285b7c8342 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 19 Dec 2022 07:28:26 -0700 +Subject: io_uring/net: ensure compat import handlers clear free_iov + +From: Jens Axboe + +commit 990a4de57e44f4f4cfc33c90d2ec5d285b7c8342 upstream. + +If we're not allocating the vectors because the count is below +UIO_FASTIOV, we still do need to properly clear ->free_iov to prevent +an erronous free of on-stack data. + +Reported-by: Jiri Slaby +Fixes: 4c17a496a7a0 ("io_uring/net: fix cleanup double free free_iov init") +Cc: stable@vger.kernel.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -479,6 +479,7 @@ static int __io_compat_recvmsg_copy_hdr( + if (req->flags & REQ_F_BUFFER_SELECT) { + compat_ssize_t clen; + ++ iomsg->free_iov = NULL; + if (msg.msg_iovlen == 0) { + sr->len = 0; + } else if (msg.msg_iovlen > 1) { diff --git a/queue-6.1/io_uring-net-fix-cleanup-after-recycle.patch b/queue-6.1/io_uring-net-fix-cleanup-after-recycle.patch new file mode 100644 index 00000000000..75a5ba101b6 --- /dev/null +++ b/queue-6.1/io_uring-net-fix-cleanup-after-recycle.patch @@ -0,0 +1,35 @@ +From 6c3e8955d4bd9811a6e1761eea412a14fb51a2e6 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 19 Dec 2022 15:11:40 +0000 +Subject: io_uring/net: fix cleanup after recycle + +From: Pavel Begunkov + +commit 6c3e8955d4bd9811a6e1761eea412a14fb51a2e6 upstream. + +Don't access io_async_msghdr io_netmsg_recycle(), it may be reallocated. + +Cc: stable@vger.kernel.org +Fixes: 9bb66906f23e5 ("io_uring: support multishot in recvmsg") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/9e326f4ad4046ddadf15bf34bf3fa58c6372f6b5.1671461985.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -806,10 +806,10 @@ retry_multishot: + goto retry_multishot; + + if (mshot_finished) { +- io_netmsg_recycle(req, issue_flags); + /* fast path, check for non-NULL to avoid function call */ + if (kmsg->free_iov) + kfree(kmsg->free_iov); ++ io_netmsg_recycle(req, issue_flags); + req->flags &= ~REQ_F_NEED_CLEANUP; + } + diff --git a/queue-6.1/io_uring-net-introduce-ioring_send_zc_report_usage-flag.patch b/queue-6.1/io_uring-net-introduce-ioring_send_zc_report_usage-flag.patch new file mode 100644 index 00000000000..bf7be7c5cab --- /dev/null +++ b/queue-6.1/io_uring-net-introduce-ioring_send_zc_report_usage-flag.patch @@ -0,0 +1,129 @@ +From e307e6698165ca6508ed42c69cb1be76c8eb6a3c Mon Sep 17 00:00:00 2001 +From: Stefan Metzmacher +Date: Thu, 27 Oct 2022 20:34:45 +0200 +Subject: io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag + +From: Stefan Metzmacher + +commit e307e6698165ca6508ed42c69cb1be76c8eb6a3c upstream. + +It might be useful for applications to detect if a zero copy transfer with +SEND[MSG]_ZC was actually possible or not. The application can fallback to +plain SEND[MSG] in order to avoid the overhead of two cqes per request. Or +it can generate a log message that could indicate to an administrator that +no zero copy was possible and could explain degraded performance. + +Cc: stable@vger.kernel.org # 6.1 +Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa +Signed-off-by: Stefan Metzmacher +Reviewed-by: Pavel Begunkov +Link: https://lore.kernel.org/r/8945b01756d902f5d5b0667f20b957ad3f742e5e.1666895626.git.metze@samba.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/io_uring.h | 18 ++++++++++++++++++ + io_uring/net.c | 6 +++++- + io_uring/notif.c | 12 ++++++++++++ + io_uring/notif.h | 3 +++ + 4 files changed, 38 insertions(+), 1 deletion(-) + +--- a/include/uapi/linux/io_uring.h ++++ b/include/uapi/linux/io_uring.h +@@ -296,10 +296,28 @@ enum io_uring_op { + * + * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in + * the buf_index field. ++ * ++ * IORING_SEND_ZC_REPORT_USAGE ++ * If set, SEND[MSG]_ZC should report ++ * the zerocopy usage in cqe.res ++ * for the IORING_CQE_F_NOTIF cqe. ++ * 0 is reported if zerocopy was actually possible. ++ * IORING_NOTIF_USAGE_ZC_COPIED if data was copied ++ * (at least partially). + */ + #define IORING_RECVSEND_POLL_FIRST (1U << 0) + #define IORING_RECV_MULTISHOT (1U << 1) + #define IORING_RECVSEND_FIXED_BUF (1U << 2) ++#define IORING_SEND_ZC_REPORT_USAGE (1U << 3) ++ ++/* ++ * cqe.res for IORING_CQE_F_NOTIF if ++ * IORING_SEND_ZC_REPORT_USAGE was requested ++ * ++ * It should be treated as a flag, all other ++ * bits of cqe.res should be treated as reserved! ++ */ ++#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) + + /* + * accept flags stored in sqe->ioprio +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -937,7 +937,8 @@ int io_send_zc_prep(struct io_kiocb *req + + zc->flags = READ_ONCE(sqe->ioprio); + if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | +- IORING_RECVSEND_FIXED_BUF)) ++ IORING_RECVSEND_FIXED_BUF | ++ IORING_SEND_ZC_REPORT_USAGE)) + return -EINVAL; + notif = zc->notif = io_alloc_notif(ctx); + if (!notif) +@@ -955,6 +956,9 @@ int io_send_zc_prep(struct io_kiocb *req + req->imu = READ_ONCE(ctx->user_bufs[idx]); + io_req_set_rsrc_node(notif, ctx, 0); + } ++ if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { ++ io_notif_to_data(notif)->zc_report = true; ++ } + + if (req->opcode == IORING_OP_SEND_ZC) { + if (READ_ONCE(sqe->__pad3[0])) +--- a/io_uring/notif.c ++++ b/io_uring/notif.c +@@ -18,6 +18,10 @@ static void __io_notif_complete_tw(struc + __io_unaccount_mem(ctx->user, nd->account_pages); + nd->account_pages = 0; + } ++ ++ if (nd->zc_report && (nd->zc_copied || !nd->zc_used)) ++ notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED; ++ + io_req_task_complete(notif, locked); + } + +@@ -28,6 +32,13 @@ static void io_uring_tx_zerocopy_callbac + struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg); + struct io_kiocb *notif = cmd_to_io_kiocb(nd); + ++ if (nd->zc_report) { ++ if (success && !nd->zc_used && skb) ++ WRITE_ONCE(nd->zc_used, true); ++ else if (!success && !nd->zc_copied) ++ WRITE_ONCE(nd->zc_copied, true); ++ } ++ + if (refcount_dec_and_test(&uarg->refcnt)) { + notif->io_task_work.func = __io_notif_complete_tw; + io_req_task_work_add(notif); +@@ -55,6 +66,7 @@ struct io_kiocb *io_alloc_notif(struct i + nd->account_pages = 0; + nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; + nd->uarg.callback = io_uring_tx_zerocopy_callback; ++ nd->zc_report = nd->zc_used = nd->zc_copied = false; + refcount_set(&nd->uarg.refcnt, 1); + return notif; + } +--- a/io_uring/notif.h ++++ b/io_uring/notif.h +@@ -13,6 +13,9 @@ struct io_notif_data { + struct file *file; + struct ubuf_info uarg; + unsigned long account_pages; ++ bool zc_report; ++ bool zc_used; ++ bool zc_copied; + }; + + void io_notif_flush(struct io_kiocb *notif); diff --git a/queue-6.1/io_uring-pass-in-epoll_uring_wake-for-eventfd-signaling-and-wakeups.patch b/queue-6.1/io_uring-pass-in-epoll_uring_wake-for-eventfd-signaling-and-wakeups.patch new file mode 100644 index 00000000000..8ef81bd04c5 --- /dev/null +++ b/queue-6.1/io_uring-pass-in-epoll_uring_wake-for-eventfd-signaling-and-wakeups.patch @@ -0,0 +1,96 @@ +From 4464853277d0ccdb9914608dd1332f0fa2f9846f Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Sun, 20 Nov 2022 10:18:45 -0700 +Subject: io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups + +From: Jens Axboe + +commit 4464853277d0ccdb9914608dd1332f0fa2f9846f upstream. + +Pass in EPOLL_URING_WAKE when signaling eventfd or doing poll related +wakups, so that we can check for a circular event dependency between +eventfd and epoll. If this flag is set when our wakeup handlers are +called, then we know we have a dependency that needs to terminate +multishot requests. + +eventfd and epoll are the only such possible dependencies. + +Cc: stable@vger.kernel.org # 6.0 +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 4 ++-- + io_uring/io_uring.h | 15 +++++++++++---- + io_uring/poll.c | 8 ++++++++ + 3 files changed, 21 insertions(+), 6 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -495,7 +495,7 @@ static void io_eventfd_ops(struct rcu_he + int ops = atomic_xchg(&ev_fd->ops, 0); + + if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT)) +- eventfd_signal(ev_fd->cq_ev_fd, 1); ++ eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE); + + /* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback + * ordering in a race but if references are 0 we know we have to free +@@ -531,7 +531,7 @@ static void io_eventfd_signal(struct io_ + goto out; + + if (likely(eventfd_signal_allowed())) { +- eventfd_signal(ev_fd->cq_ev_fd, 1); ++ eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE); + } else { + atomic_inc(&ev_fd->refs); + if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) +--- a/io_uring/io_uring.h ++++ b/io_uring/io_uring.h +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + #include "io-wq.h" + #include "slist.h" + #include "filetable.h" +@@ -207,12 +208,18 @@ static inline void io_commit_cqring(stru + static inline void __io_cqring_wake(struct io_ring_ctx *ctx) + { + /* +- * wake_up_all() may seem excessive, but io_wake_function() and +- * io_should_wake() handle the termination of the loop and only +- * wake as many waiters as we need to. ++ * Trigger waitqueue handler on all waiters on our waitqueue. This ++ * won't necessarily wake up all the tasks, io_should_wake() will make ++ * that decision. ++ * ++ * Pass in EPOLLIN|EPOLL_URING_WAKE as the poll wakeup key. The latter ++ * set in the mask so that if we recurse back into our own poll ++ * waitqueue handlers, we know we have a dependency between eventfd or ++ * epoll and should terminate multishot poll at that point. + */ + if (waitqueue_active(&ctx->cq_wait)) +- wake_up_all(&ctx->cq_wait); ++ __wake_up(&ctx->cq_wait, TASK_NORMAL, 0, ++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); + } + + static inline void io_cqring_wake(struct io_ring_ctx *ctx) +--- a/io_uring/poll.c ++++ b/io_uring/poll.c +@@ -429,6 +429,14 @@ static int io_poll_wake(struct wait_queu + return 0; + + if (io_poll_get_ownership(req)) { ++ /* ++ * If we trigger a multishot poll off our own wakeup path, ++ * disable multishot as there is a circular dependency between ++ * CQ posting and triggering the event. ++ */ ++ if (mask & EPOLL_URING_WAKE) ++ poll->events |= EPOLLONESHOT; ++ + /* optional, saves extra locking for removal in tw handler */ + if (mask && poll->events & EPOLLONESHOT) { + list_del_init(&poll->wait.entry); diff --git a/queue-6.1/io_uring-protect-cq_timeouts-with-timeout_lock.patch b/queue-6.1/io_uring-protect-cq_timeouts-with-timeout_lock.patch new file mode 100644 index 00000000000..895b00c935c --- /dev/null +++ b/queue-6.1/io_uring-protect-cq_timeouts-with-timeout_lock.patch @@ -0,0 +1,39 @@ +From ea011ee10231f5fa6cbb415007048ca0bb948baf Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Fri, 2 Dec 2022 17:47:22 +0000 +Subject: io_uring: protect cq_timeouts with timeout_lock + +From: Pavel Begunkov + +commit ea011ee10231f5fa6cbb415007048ca0bb948baf upstream. + +Read cq_timeouts in io_flush_timeouts() only after taking the +timeout_lock, as it's protected by it. There are many places where we +also grab ->completion_lock, but for instance io_timeout_fn() doesn't +and still modifies cq_timeouts. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/9c79544dd6cf5c4018cb1bab99cf481a93ea46ef.1670002973.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/timeout.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/io_uring/timeout.c ++++ b/io_uring/timeout.c +@@ -72,10 +72,12 @@ static bool io_kill_timeout(struct io_ki + __cold void io_flush_timeouts(struct io_ring_ctx *ctx) + __must_hold(&ctx->completion_lock) + { +- u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); ++ u32 seq; + struct io_timeout *timeout, *tmp; + + spin_lock_irq(&ctx->timeout_lock); ++ seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); ++ + list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { + struct io_kiocb *req = cmd_to_io_kiocb(timeout); + u32 events_needed, events_got; diff --git a/queue-6.1/maple_tree-fix-mas_spanning_rebalance-on-insufficient-data.patch b/queue-6.1/maple_tree-fix-mas_spanning_rebalance-on-insufficient-data.patch new file mode 100644 index 00000000000..acc074aee58 --- /dev/null +++ b/queue-6.1/maple_tree-fix-mas_spanning_rebalance-on-insufficient-data.patch @@ -0,0 +1,56 @@ +From 0abb964aae3da746ea2fd4301599a6fa26da58db Mon Sep 17 00:00:00 2001 +From: Liam Howlett +Date: Mon, 19 Dec 2022 16:20:15 +0000 +Subject: maple_tree: fix mas_spanning_rebalance() on insufficient data + +From: Liam Howlett + +commit 0abb964aae3da746ea2fd4301599a6fa26da58db upstream. + +Mike Rapoport contacted me off-list with a regression in running criu. +Periodic tests fail with an RCU stall during execution. Although rare, it +is possible to hit this with other uses so this patch should be backported +to fix the regression. + +This patchset adds the fix and a test case to the maple tree test +suite. + + +This patch (of 2): + +An insufficient node was causing an out-of-bounds access on the node in +mas_leaf_max_gap(). The cause was the faulty detection of the new node +being a root node when overwriting many entries at the end of the tree. + +Fix the detection of a new root and ensure there is sufficient data prior +to entering the spanning rebalance loop. + +Link: https://lkml.kernel.org/r/20221219161922.2708732-1-Liam.Howlett@oracle.com +Link: https://lkml.kernel.org/r/20221219161922.2708732-2-Liam.Howlett@oracle.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam R. Howlett +Reported-by: Mike Rapoport +Tested-by: Mike Rapoport +Cc: Andrei Vagin +Cc: Mike Rapoport +Cc: Muhammad Usama Anjum +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -2989,7 +2989,9 @@ static int mas_spanning_rebalance(struct + mast->free = &free; + mast->destroy = &destroy; + l_mas.node = r_mas.node = m_mas.node = MAS_NONE; +- if (!(mast->orig_l->min && mast->orig_r->max == ULONG_MAX) && ++ ++ /* Check if this is not root and has sufficient data. */ ++ if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && + unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) + mast_spanning_rebalance(mast); + diff --git a/queue-6.1/mm-gup-disallow-foll_force-foll_write-on-hugetlb-mappings.patch b/queue-6.1/mm-gup-disallow-foll_force-foll_write-on-hugetlb-mappings.patch new file mode 100644 index 00000000000..f71e65d770d --- /dev/null +++ b/queue-6.1/mm-gup-disallow-foll_force-foll_write-on-hugetlb-mappings.patch @@ -0,0 +1,149 @@ +From f347454d034184b4f0a2caf6e14daf7848cea01c Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 31 Oct 2022 16:25:24 +0100 +Subject: mm/gup: disallow FOLL_FORCE|FOLL_WRITE on hugetlb mappings + +From: David Hildenbrand + +commit f347454d034184b4f0a2caf6e14daf7848cea01c upstream. + +hugetlb does not support fake write-faults (write faults without write +permissions). However, we are currently able to trigger a +FAULT_FLAG_WRITE fault on a VMA without VM_WRITE. + +If we'd ever want to support FOLL_FORCE|FOLL_WRITE, we'd have to teach +hugetlb to: + +(1) Leave the page mapped R/O after the fake write-fault, like + maybe_mkwrite() does. +(2) Allow writing to an exclusive anon page that's mapped R/O when + FOLL_FORCE is set, like can_follow_write_pte(). E.g., + __follow_hugetlb_must_fault() needs adjustment. + +For now, it's not clear if that added complexity is really required. +History tolds us that FOLL_FORCE is dangerous and that we better limit its +use to a bare minimum. + +-------------------------------------------------------------------------- + #include + #include + #include + #include + #include + #include + #include + #include + + int main(int argc, char **argv) + { + char *map; + int mem_fd; + + map = mmap(NULL, 2 * 1024 * 1024u, PROT_READ, + MAP_PRIVATE|MAP_ANON|MAP_HUGETLB|MAP_HUGE_2MB, -1, 0); + if (map == MAP_FAILED) { + fprintf(stderr, "mmap() failed: %d\n", errno); + return 1; + } + + mem_fd = open("/proc/self/mem", O_RDWR); + if (mem_fd < 0) { + fprintf(stderr, "open(/proc/self/mem) failed: %d\n", errno); + return 1; + } + + if (pwrite(mem_fd, "0", 1, (uintptr_t) map) == 1) { + fprintf(stderr, "write() succeeded, which is unexpected\n"); + return 1; + } + + printf("write() failed as expected: %d\n", errno); + return 0; + } +-------------------------------------------------------------------------- + +Fortunately, we have a sanity check in hugetlb_wp() in place ever since +commit 1d8d14641fd9 ("mm/hugetlb: support write-faults in shared +mappings"), that bails out instead of silently mapping a page writable in +a !PROT_WRITE VMA. + +Consequently, above reproducer triggers a warning, similar to the one +reported by szsbot: + +------------[ cut here ]------------ +WARNING: CPU: 1 PID: 3612 at mm/hugetlb.c:5313 hugetlb_wp+0x20a/0x1af0 mm/hugetlb.c:5313 +Modules linked in: +CPU: 1 PID: 3612 Comm: syz-executor250 Not tainted 6.1.0-rc2-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 +RIP: 0010:hugetlb_wp+0x20a/0x1af0 mm/hugetlb.c:5313 +Code: ea 03 80 3c 02 00 0f 85 31 14 00 00 49 8b 5f 20 31 ff 48 89 dd 83 e5 02 48 89 ee e8 70 ab b7 ff 48 85 ed 75 5b e8 76 ae b7 ff <0f> 0b 41 bd 40 00 00 00 e8 69 ae b7 ff 48 b8 00 00 00 00 00 fc ff +RSP: 0018:ffffc90003caf620 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: 0000000008640070 RCX: 0000000000000000 +RDX: ffff88807b963a80 RSI: ffffffff81c4ed2a RDI: 0000000000000007 +RBP: 0000000000000000 R08: 0000000000000007 R09: 0000000000000000 +R10: 0000000000000000 R11: 000000000008c07e R12: ffff888023805800 +R13: 0000000000000000 R14: ffffffff91217f38 R15: ffff88801d4b0360 +FS: 0000555555bba300(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fff7a47a1b8 CR3: 000000002378d000 CR4: 00000000003506e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + hugetlb_no_page mm/hugetlb.c:5755 [inline] + hugetlb_fault+0x19cc/0x2060 mm/hugetlb.c:5874 + follow_hugetlb_page+0x3f3/0x1850 mm/hugetlb.c:6301 + __get_user_pages+0x2cb/0xf10 mm/gup.c:1202 + __get_user_pages_locked mm/gup.c:1434 [inline] + __get_user_pages_remote+0x18f/0x830 mm/gup.c:2187 + get_user_pages_remote+0x84/0xc0 mm/gup.c:2260 + __access_remote_vm+0x287/0x6b0 mm/memory.c:5517 + ptrace_access_vm+0x181/0x1d0 kernel/ptrace.c:61 + generic_ptrace_pokedata kernel/ptrace.c:1323 [inline] + ptrace_request+0xb46/0x10c0 kernel/ptrace.c:1046 + arch_ptrace+0x36/0x510 arch/x86/kernel/ptrace.c:828 + __do_sys_ptrace kernel/ptrace.c:1296 [inline] + __se_sys_ptrace kernel/ptrace.c:1269 [inline] + __x64_sys_ptrace+0x178/0x2a0 kernel/ptrace.c:1269 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +[...] + +So let's silence that warning by teaching GUP code that FOLL_FORCE -- so +far -- does not apply to hugetlb. + +Note that FOLL_FORCE for read-access seems to be working as expected. The +assumption is that this has been broken forever, only ever since above +commit, we actually detect the wrong handling and WARN_ON_ONCE(). + +I assume this has been broken at least since 2014, when mm/gup.c came to +life. I failed to come up with a suitable Fixes tag quickly. + +Link: https://lkml.kernel.org/r/20221031152524.173644-1-david@redhat.com +Fixes: 1d8d14641fd9 ("mm/hugetlb: support write-faults in shared mappings") +Signed-off-by: David Hildenbrand +Reported-by: +Cc: Mike Kravetz +Cc: Peter Xu +Cc: John Hubbard +Cc: Jason Gunthorpe +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -1065,6 +1065,9 @@ static int check_vma_flags(struct vm_are + if (!(vm_flags & VM_WRITE)) { + if (!(gup_flags & FOLL_FORCE)) + return -EFAULT; ++ /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */ ++ if (is_vm_hugetlb_page(vma)) ++ return -EFAULT; + /* + * We used to let the write,force case do COW in a + * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could diff --git a/queue-6.1/ovl-fix-use-inode-directly-in-rcu-walk-mode.patch b/queue-6.1/ovl-fix-use-inode-directly-in-rcu-walk-mode.patch new file mode 100644 index 00000000000..48747e93a31 --- /dev/null +++ b/queue-6.1/ovl-fix-use-inode-directly-in-rcu-walk-mode.patch @@ -0,0 +1,45 @@ +From 672e4268b2863d7e4978dfed29552b31c2f9bd4e Mon Sep 17 00:00:00 2001 +From: Chen Zhongjin +Date: Mon, 28 Nov 2022 11:33:05 +0100 +Subject: ovl: fix use inode directly in rcu-walk mode + +From: Chen Zhongjin + +commit 672e4268b2863d7e4978dfed29552b31c2f9bd4e upstream. + +ovl_dentry_revalidate_common() can be called in rcu-walk mode. As document +said, "in rcu-walk mode, d_parent and d_inode should not be used without +care". + +Check inode here to protect access under rcu-walk mode. + +Fixes: bccece1ead36 ("ovl: allow remote upper") +Reported-and-tested-by: syzbot+a4055c78774bbf3498bb@syzkaller.appspotmail.com +Signed-off-by: Chen Zhongjin +Cc: # v5.7 +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/overlayfs/super.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -139,11 +139,16 @@ static int ovl_dentry_revalidate_common( + unsigned int flags, bool weak) + { + struct ovl_entry *oe = dentry->d_fsdata; ++ struct inode *inode = d_inode_rcu(dentry); + struct dentry *upper; + unsigned int i; + int ret = 1; + +- upper = ovl_dentry_upper(dentry); ++ /* Careful in RCU mode */ ++ if (!inode) ++ return -ECHILD; ++ ++ upper = ovl_i_dentry_upper(inode); + if (upper) + ret = ovl_revalidate_real(upper, flags, weak); + diff --git a/queue-6.1/scsi-qla2xxx-fix-crash-when-i-o-abort-times-out.patch b/queue-6.1/scsi-qla2xxx-fix-crash-when-i-o-abort-times-out.patch new file mode 100644 index 00000000000..c25a7c998c6 --- /dev/null +++ b/queue-6.1/scsi-qla2xxx-fix-crash-when-i-o-abort-times-out.patch @@ -0,0 +1,94 @@ +From 68ad83188d782b2ecef2e41ac245d27e0710fe8e Mon Sep 17 00:00:00 2001 +From: Arun Easi +Date: Tue, 29 Nov 2022 01:26:34 -0800 +Subject: scsi: qla2xxx: Fix crash when I/O abort times out + +From: Arun Easi + +commit 68ad83188d782b2ecef2e41ac245d27e0710fe8e upstream. + +While performing CPU hotplug, a crash with the following stack was seen: + +Call Trace: + qla24xx_process_response_queue+0x42a/0x970 [qla2xxx] + qla2x00_start_nvme_mq+0x3a2/0x4b0 [qla2xxx] + qla_nvme_post_cmd+0x166/0x240 [qla2xxx] + nvme_fc_start_fcp_op.part.0+0x119/0x2e0 [nvme_fc] + blk_mq_dispatch_rq_list+0x17b/0x610 + __blk_mq_sched_dispatch_requests+0xb0/0x140 + blk_mq_sched_dispatch_requests+0x30/0x60 + __blk_mq_run_hw_queue+0x35/0x90 + __blk_mq_delay_run_hw_queue+0x161/0x180 + blk_execute_rq+0xbe/0x160 + __nvme_submit_sync_cmd+0x16f/0x220 [nvme_core] + nvmf_connect_admin_queue+0x11a/0x170 [nvme_fabrics] + nvme_fc_create_association.cold+0x50/0x3dc [nvme_fc] + nvme_fc_connect_ctrl_work+0x19/0x30 [nvme_fc] + process_one_work+0x1e8/0x3c0 + +On abort timeout, completion was called without checking if the I/O was +already completed. + +Verify that I/O and abort request are indeed outstanding before attempting +completion. + +Fixes: 71c80b75ce8f ("scsi: qla2xxx: Do command completion on abort timeout") +Reported-by: Marco Patalano +Tested-by: Marco Patalano +Cc: stable@vger.kernel.org +Signed-off-by: Arun Easi +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20221129092634.15347-1-njavali@marvell.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_init.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -110,6 +110,7 @@ static void qla24xx_abort_iocb_timeout(v + struct qla_qpair *qpair = sp->qpair; + u32 handle; + unsigned long flags; ++ int sp_found = 0, cmdsp_found = 0; + + if (sp->cmd_sp) + ql_dbg(ql_dbg_async, sp->vha, 0x507c, +@@ -124,18 +125,21 @@ static void qla24xx_abort_iocb_timeout(v + spin_lock_irqsave(qpair->qp_lock_ptr, flags); + for (handle = 1; handle < qpair->req->num_outstanding_cmds; handle++) { + if (sp->cmd_sp && (qpair->req->outstanding_cmds[handle] == +- sp->cmd_sp)) ++ sp->cmd_sp)) { + qpair->req->outstanding_cmds[handle] = NULL; ++ cmdsp_found = 1; ++ } + + /* removing the abort */ + if (qpair->req->outstanding_cmds[handle] == sp) { + qpair->req->outstanding_cmds[handle] = NULL; ++ sp_found = 1; + break; + } + } + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + +- if (sp->cmd_sp) { ++ if (cmdsp_found && sp->cmd_sp) { + /* + * This done function should take care of + * original command ref: INIT +@@ -143,8 +147,10 @@ static void qla24xx_abort_iocb_timeout(v + sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); + } + +- abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT); +- sp->done(sp, QLA_OS_TIMER_EXPIRED); ++ if (sp_found) { ++ abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT); ++ sp->done(sp, QLA_OS_TIMER_EXPIRED); ++ } + } + + static void qla24xx_abort_sp_done(srb_t *sp, int res) diff --git a/queue-6.1/series b/queue-6.1/series index 09ee96eb695..29904710064 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -1125,3 +1125,20 @@ iio-addac-ad74413r-fix-integer-promotion-bug-in-ad74413_get_input_current_offset regulator-core-fix-deadlock-on-regulator-enable.patch spi-fsl_spi-don-t-change-speed-while-chipselect-is-active.patch floppy-fix-memory-leak-in-do_floppy_init.patch +gcov-add-support-for-checksum-field.patch +test_maple_tree-add-test-for-mas_spanning_rebalance-on-insufficient-data.patch +maple_tree-fix-mas_spanning_rebalance-on-insufficient-data.patch +fbdev-fbcon-release-buffer-when-fbcon_do_set_font-failed.patch +ovl-fix-use-inode-directly-in-rcu-walk-mode.patch +btrfs-do-not-bug_on-on-enomem-when-dropping-extent-items-for-a-range.patch +mm-gup-disallow-foll_force-foll_write-on-hugetlb-mappings.patch +scsi-qla2xxx-fix-crash-when-i-o-abort-times-out.patch +blk-iolatency-fix-memory-leak-on-add_disk-failures.patch +io_uring-net-introduce-ioring_send_zc_report_usage-flag.patch +io_uring-pass-in-epoll_uring_wake-for-eventfd-signaling-and-wakeups.patch +io_uring-add-completion-locking-for-iopoll.patch +io_uring-dont-remove-file-from-msg_ring-reqs.patch +io_uring-improve-io_double_lock_ctx-fail-handling.patch +io_uring-net-ensure-compat-import-handlers-clear-free_iov.patch +io_uring-net-fix-cleanup-after-recycle.patch +io_uring-protect-cq_timeouts-with-timeout_lock.patch diff --git a/queue-6.1/test_maple_tree-add-test-for-mas_spanning_rebalance-on-insufficient-data.patch b/queue-6.1/test_maple_tree-add-test-for-mas_spanning_rebalance-on-insufficient-data.patch new file mode 100644 index 00000000000..896ea98f5cf --- /dev/null +++ b/queue-6.1/test_maple_tree-add-test-for-mas_spanning_rebalance-on-insufficient-data.patch @@ -0,0 +1,69 @@ +From c5651b31f51584bd1199b3a552c8211a8523d6e1 Mon Sep 17 00:00:00 2001 +From: Liam Howlett +Date: Mon, 19 Dec 2022 16:20:15 +0000 +Subject: test_maple_tree: add test for mas_spanning_rebalance() on insufficient data + +From: Liam Howlett + +commit c5651b31f51584bd1199b3a552c8211a8523d6e1 upstream. + +Add a test to the maple tree test suite for the spanning rebalance +insufficient node issue does not go undetected again. + +Link: https://lkml.kernel.org/r/20221219161922.2708732-3-Liam.Howlett@oracle.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam R. Howlett +Cc: Andrei Vagin +Cc: Mike Rapoport +Cc: Muhammad Usama Anjum +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/test_maple_tree.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c +index f425f169ef08..497fc93ccf9e 100644 +--- a/lib/test_maple_tree.c ++++ b/lib/test_maple_tree.c +@@ -2498,6 +2498,25 @@ static noinline void check_dup(struct maple_tree *mt) + } + } + ++static noinline void check_bnode_min_spanning(struct maple_tree *mt) ++{ ++ int i = 50; ++ MA_STATE(mas, mt, 0, 0); ++ ++ mt_set_non_kernel(9999); ++ mas_lock(&mas); ++ do { ++ mas_set_range(&mas, i*10, i*10+9); ++ mas_store(&mas, check_bnode_min_spanning); ++ } while (i--); ++ ++ mas_set_range(&mas, 240, 509); ++ mas_store(&mas, NULL); ++ mas_unlock(&mas); ++ mas_destroy(&mas); ++ mt_set_non_kernel(0); ++} ++ + static DEFINE_MTREE(tree); + static int maple_tree_seed(void) + { +@@ -2742,6 +2761,10 @@ static int maple_tree_seed(void) + check_dup(&tree); + mtree_destroy(&tree); + ++ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); ++ check_bnode_min_spanning(&tree); ++ mtree_destroy(&tree); ++ + #if defined(BENCH) + skip: + #endif +-- +2.39.0 +