From: Greg Kroah-Hartman Date: Sat, 22 Apr 2023 16:39:00 +0000 (+0200) Subject: 6.2-stable patches X-Git-Tag: v4.14.314~58 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ca3e6190a12a1752a5c436e89c17bdcddaf4a475;p=thirdparty%2Fkernel%2Fstable-queue.git 6.2-stable patches added patches: asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch memstick-fix-memory-leak-if-card-device-is-never-registered.patch mptcp-fix-accept-vs-worker-race.patch mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch --- diff --git a/queue-6.2/asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch b/queue-6.2/asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch new file mode 100644 index 00000000000..c90622d11b5 --- /dev/null +++ b/queue-6.2/asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch @@ -0,0 +1,48 @@ +From de6aa72b265b72bca2b1897d5000c8f0147d3157 Mon Sep 17 00:00:00 2001 +From: Peter Ujfalusi +Date: Mon, 3 Apr 2023 12:09:09 +0300 +Subject: ASoC: SOF: ipc4-topology: Clarify bind failure caused by missing fw_module + +From: Peter Ujfalusi + +commit de6aa72b265b72bca2b1897d5000c8f0147d3157 upstream. + +The original patch uses a feature in lib/vsprintf.c to handle the invalid +address when tring to print *_fw_module->man4_module_entry.name when the +*rc_fw_module is NULL. +This case is handled by check_pointer_msg() internally and turns the +invalid pointer to '(efault)' for printing but it is hiding useful +information about the circumstances. Change the print to emmit the name +of the widget and a note on which side's fw_module is missing. + +Fixes: e3720f92e023 ("ASoC: SOF: avoid a NULL dereference with unsupported widgets") +Reported-by: Dan Carpenter +Link: https://lore.kernel.org/alsa-devel/4826f662-42f0-4a82-ba32-8bf5f8a03256@kili.mountain/ +Signed-off-by: Peter Ujfalusi +Rule: 'Cc: stable@vger.kernel.org' or 'commit upstream.' +Link: https://lore.kernel.org/r/20230403090909.18233-1-peter.ujfalusi@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/sof/ipc4-topology.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/sound/soc/sof/ipc4-topology.c ++++ b/sound/soc/sof/ipc4-topology.c +@@ -1687,10 +1687,12 @@ static int sof_ipc4_route_setup(struct s + int ret; + + if (!src_fw_module || !sink_fw_module) { +- /* The NULL module will print as "(efault)" */ +- dev_err(sdev->dev, "source %s or sink %s widget weren't set up properly\n", +- src_fw_module->man4_module_entry.name, +- sink_fw_module->man4_module_entry.name); ++ dev_err(sdev->dev, ++ "cannot bind %s -> %s, no firmware module for: %s%s\n", ++ src_widget->widget->name, sink_widget->widget->name, ++ src_fw_module ? "" : " source", ++ sink_fw_module ? "" : " sink"); ++ + return -ENODEV; + } + diff --git a/queue-6.2/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch b/queue-6.2/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch new file mode 100644 index 00000000000..0a517f3d600 --- /dev/null +++ b/queue-6.2/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch @@ -0,0 +1,151 @@ +From 659c0ce1cb9efc7f58d380ca4bb2a51ae9e30553 Mon Sep 17 00:00:00 2001 +From: Ondrej Mosnacek +Date: Fri, 17 Feb 2023 17:21:54 +0100 +Subject: kernel/sys.c: fix and improve control flow in __sys_setres[ug]id() + +From: Ondrej Mosnacek + +commit 659c0ce1cb9efc7f58d380ca4bb2a51ae9e30553 upstream. + +Linux Security Modules (LSMs) that implement the "capable" hook will +usually emit an access denial message to the audit log whenever they +"block" the current task from using the given capability based on their +security policy. + +The occurrence of a denial is used as an indication that the given task +has attempted an operation that requires the given access permission, so +the callers of functions that perform LSM permission checks must take care +to avoid calling them too early (before it is decided if the permission is +actually needed to perform the requested operation). + +The __sys_setres[ug]id() functions violate this convention by first +calling ns_capable_setid() and only then checking if the operation +requires the capability or not. It means that any caller that has the +capability granted by DAC (task's capability set) but not by MAC (LSMs) +will generate a "denied" audit record, even if is doing an operation for +which the capability is not required. + +Fix this by reordering the checks such that ns_capable_setid() is checked +last and -EPERM is returned immediately if it returns false. + +While there, also do two small optimizations: +* move the capability check before prepare_creds() and +* bail out early in case of a no-op. + +Link: https://lkml.kernel.org/r/20230217162154.837549-1-omosnace@redhat.com +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Ondrej Mosnacek +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sys.c | 69 ++++++++++++++++++++++++++++++++++------------------------- + 1 file changed, 40 insertions(+), 29 deletions(-) + +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -664,6 +664,7 @@ long __sys_setresuid(uid_t ruid, uid_t e + struct cred *new; + int retval; + kuid_t kruid, keuid, ksuid; ++ bool ruid_new, euid_new, suid_new; + + kruid = make_kuid(ns, ruid); + keuid = make_kuid(ns, euid); +@@ -678,25 +679,29 @@ long __sys_setresuid(uid_t ruid, uid_t e + if ((suid != (uid_t) -1) && !uid_valid(ksuid)) + return -EINVAL; + ++ old = current_cred(); ++ ++ /* check for no-op */ ++ if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) && ++ (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) && ++ uid_eq(keuid, old->fsuid))) && ++ (suid == (uid_t) -1 || uid_eq(ksuid, old->suid))) ++ return 0; ++ ++ ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && ++ !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid); ++ euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && ++ !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid); ++ suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && ++ !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid); ++ if ((ruid_new || euid_new || suid_new) && ++ !ns_capable_setid(old->user_ns, CAP_SETUID)) ++ return -EPERM; ++ + new = prepare_creds(); + if (!new) + return -ENOMEM; + +- old = current_cred(); +- +- retval = -EPERM; +- if (!ns_capable_setid(old->user_ns, CAP_SETUID)) { +- if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && +- !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) +- goto error; +- if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && +- !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) +- goto error; +- if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && +- !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) +- goto error; +- } +- + if (ruid != (uid_t) -1) { + new->uid = kruid; + if (!uid_eq(kruid, old->uid)) { +@@ -761,6 +766,7 @@ long __sys_setresgid(gid_t rgid, gid_t e + struct cred *new; + int retval; + kgid_t krgid, kegid, ksgid; ++ bool rgid_new, egid_new, sgid_new; + + krgid = make_kgid(ns, rgid); + kegid = make_kgid(ns, egid); +@@ -773,23 +779,28 @@ long __sys_setresgid(gid_t rgid, gid_t e + if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) + return -EINVAL; + ++ old = current_cred(); ++ ++ /* check for no-op */ ++ if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) && ++ (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) && ++ gid_eq(kegid, old->fsgid))) && ++ (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid))) ++ return 0; ++ ++ rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && ++ !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid); ++ egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && ++ !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid); ++ sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && ++ !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid); ++ if ((rgid_new || egid_new || sgid_new) && ++ !ns_capable_setid(old->user_ns, CAP_SETGID)) ++ return -EPERM; ++ + new = prepare_creds(); + if (!new) + return -ENOMEM; +- old = current_cred(); +- +- retval = -EPERM; +- if (!ns_capable_setid(old->user_ns, CAP_SETGID)) { +- if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && +- !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) +- goto error; +- if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && +- !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) +- goto error; +- if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && +- !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) +- goto error; +- } + + if (rgid != (gid_t) -1) + new->gid = krgid; diff --git a/queue-6.2/memstick-fix-memory-leak-if-card-device-is-never-registered.patch b/queue-6.2/memstick-fix-memory-leak-if-card-device-is-never-registered.patch new file mode 100644 index 00000000000..8de753ddfbc --- /dev/null +++ b/queue-6.2/memstick-fix-memory-leak-if-card-device-is-never-registered.patch @@ -0,0 +1,61 @@ +From 4b6d621c9d859ff89e68cebf6178652592676013 Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman +Date: Sat, 1 Apr 2023 22:03:27 +0200 +Subject: memstick: fix memory leak if card device is never registered + +From: Greg Kroah-Hartman + +commit 4b6d621c9d859ff89e68cebf6178652592676013 upstream. + +When calling dev_set_name() memory is allocated for the name for the +struct device. Once that structure device is registered, or attempted +to be registerd, with the driver core, the driver core will handle +cleaning up that memory when the device is removed from the system. + +Unfortunatly for the memstick code, there is an error path that causes +the struct device to never be registered, and so the memory allocated in +dev_set_name will be leaked. Fix that leak by manually freeing it right +before the memory for the device is freed. + +Cc: Maxim Levitsky +Cc: Alex Dubov +Cc: Ulf Hansson +Cc: "Rafael J. Wysocki" +Cc: Hans de Goede +Cc: Kay Sievers +Cc: linux-mmc@vger.kernel.org +Fixes: 0252c3b4f018 ("memstick: struct device - replace bus_id with dev_name(), dev_set_name()") +Cc: stable +Co-developed-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +Co-developed-by: Mirsad Goran Todorovac +Signed-off-by: Mirsad Goran Todorovac +Link: https://lore.kernel.org/r/20230401200327.16800-1-gregkh@linuxfoundation.org +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/memstick/core/memstick.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/memstick/core/memstick.c ++++ b/drivers/memstick/core/memstick.c +@@ -410,6 +410,7 @@ static struct memstick_dev *memstick_all + return card; + err_out: + host->card = old_card; ++ kfree_const(card->dev.kobj.name); + kfree(card); + return NULL; + } +@@ -468,8 +469,10 @@ static void memstick_check(struct work_s + put_device(&card->dev); + host->card = NULL; + } +- } else ++ } else { ++ kfree_const(card->dev.kobj.name); + kfree(card); ++ } + } + + out_power_off: diff --git a/queue-6.2/mptcp-fix-accept-vs-worker-race.patch b/queue-6.2/mptcp-fix-accept-vs-worker-race.patch new file mode 100644 index 00000000000..0b67105f3b3 --- /dev/null +++ b/queue-6.2/mptcp-fix-accept-vs-worker-race.patch @@ -0,0 +1,282 @@ +From 63740448a32eb662e05894425b47bcc5814136f4 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 17 Apr 2023 16:00:41 +0200 +Subject: mptcp: fix accept vs worker race + +From: Paolo Abeni + +commit 63740448a32eb662e05894425b47bcc5814136f4 upstream. + +The mptcp worker and mptcp_accept() can race, as reported by Christoph: + +refcount_t: addition on 0; use-after-free. +WARNING: CPU: 1 PID: 14351 at lib/refcount.c:25 refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25 +Modules linked in: +CPU: 1 PID: 14351 Comm: syz-executor.2 Not tainted 6.3.0-rc1-gde5e8fd0123c #11 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 +RIP: 0010:refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25 +Code: 02 31 ff 89 de e8 1b f0 a7 ff 84 db 0f 85 6e ff ff ff e8 3e f5 a7 ff 48 c7 c7 d8 c7 34 83 c6 05 6d 2d 0f 02 01 e8 cb 3d 90 ff <0f> 0b e9 4f ff ff ff e8 1f f5 a7 ff 0f b6 1d 54 2d 0f 02 31 ff 89 +RSP: 0018:ffffc90000a47bf8 EFLAGS: 00010282 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 +RDX: ffff88802eae98c0 RSI: ffffffff81097d4f RDI: 0000000000000001 +RBP: ffff88802e712180 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: ffff88802eaea148 R12: ffff88802e712100 +R13: ffff88802e712a88 R14: ffff888005cb93a8 R15: ffff88802e712a88 +FS: 0000000000000000(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f277fd89120 CR3: 0000000035486002 CR4: 0000000000370ee0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + __refcount_add include/linux/refcount.h:199 [inline] + __refcount_inc include/linux/refcount.h:250 [inline] + refcount_inc include/linux/refcount.h:267 [inline] + sock_hold include/net/sock.h:775 [inline] + __mptcp_close+0x4c6/0x4d0 net/mptcp/protocol.c:3051 + mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072 + inet_release+0x56/0xa0 net/ipv4/af_inet.c:429 + __sock_release+0x51/0xf0 net/socket.c:653 + sock_close+0x18/0x20 net/socket.c:1395 + __fput+0x113/0x430 fs/file_table.c:321 + task_work_run+0x96/0x100 kernel/task_work.c:179 + exit_task_work include/linux/task_work.h:38 [inline] + do_exit+0x4fc/0x10c0 kernel/exit.c:869 + do_group_exit+0x51/0xf0 kernel/exit.c:1019 + get_signal+0x12b0/0x1390 kernel/signal.c:2859 + arch_do_signal_or_restart+0x25/0x260 arch/x86/kernel/signal.c:306 + exit_to_user_mode_loop kernel/entry/common.c:168 [inline] + exit_to_user_mode_prepare+0x131/0x1a0 kernel/entry/common.c:203 + __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] + syscall_exit_to_user_mode+0x19/0x40 kernel/entry/common.c:296 + do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86 + entry_SYSCALL_64_after_hwframe+0x72/0xdc +RIP: 0033:0x7fec4b4926a9 +Code: Unable to access opcode bytes at 0x7fec4b49267f. +RSP: 002b:00007fec49f9dd78 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca +RAX: fffffffffffffe00 RBX: 00000000006bc058 RCX: 00007fec4b4926a9 +RDX: 0000000000000000 RSI: 0000000000000080 RDI: 00000000006bc058 +RBP: 00000000006bc050 R08: 00000000007df998 R09: 00000000007df998 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c +R13: fffffffffffffea8 R14: 000000000000000b R15: 000000000001fe40 + + +The root cause is that the worker can force fallback to TCP the first +mptcp subflow, actually deleting the unaccepted msk socket. + +We can explicitly prevent the race delaying the unaccepted msk deletion +at listener shutdown time. In case the closed subflow is later accepted, +just drop the mptcp context and let the user-space deal with the +paired mptcp socket. + +Fixes: b6985b9b8295 ("mptcp: use the workqueue to destroy unaccepted sockets") +Cc: stable@vger.kernel.org +Reported-by: Christoph Paasch +Link: https://github.com/multipath-tcp/mptcp_net-next/issues/375 +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Tested-by: Christoph Paasch +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 68 +++++++++++++++++++++++++++++++++------------------ + net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 22 +++++++++------- + 3 files changed, 58 insertions(+), 33 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2316,7 +2316,26 @@ static void __mptcp_close_ssk(struct soc + unsigned int flags) + { + struct mptcp_sock *msk = mptcp_sk(sk); +- bool need_push, dispose_it; ++ bool dispose_it, need_push = false; ++ ++ /* If the first subflow moved to a close state before accept, e.g. due ++ * to an incoming reset, mptcp either: ++ * - if either the subflow or the msk are dead, destroy the context ++ * (the subflow socket is deleted by inet_child_forget) and the msk ++ * - otherwise do nothing at the moment and take action at accept and/or ++ * listener shutdown - user-space must be able to accept() the closed ++ * socket. ++ */ ++ if (msk->in_accept_queue && msk->first == ssk) { ++ if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) ++ return; ++ ++ /* ensure later check in mptcp_worker() will dispose the msk */ ++ sock_set_flag(sk, SOCK_DEAD); ++ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); ++ mptcp_subflow_drop_ctx(ssk); ++ goto out_release; ++ } + + dispose_it = !msk->subflow || ssk != msk->subflow->sk; + if (dispose_it) +@@ -2352,18 +2371,6 @@ static void __mptcp_close_ssk(struct soc + if (!inet_csk(ssk)->icsk_ulp_ops) { + WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD)); + kfree_rcu(subflow, rcu); +- } else if (msk->in_accept_queue && msk->first == ssk) { +- /* if the first subflow moved to a close state, e.g. due to +- * incoming reset and we reach here before inet_child_forget() +- * the TCP stack could later try to close it via +- * inet_csk_listen_stop(), or deliver it to the user space via +- * accept(). +- * We can't delete the subflow - or risk a double free - nor let +- * the msk survive - or will be leaked in the non accept scenario: +- * fallback and let TCP cope with the subflow cleanup. +- */ +- WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD)); +- mptcp_subflow_drop_ctx(ssk); + } else { + /* otherwise tcp will dispose of the ssk and subflow ctx */ + if (ssk->sk_state == TCP_LISTEN) { +@@ -2378,6 +2385,8 @@ static void __mptcp_close_ssk(struct soc + /* close acquired an extra ref */ + __sock_put(ssk); + } ++ ++out_release: + release_sock(ssk); + + sock_put(ssk); +@@ -2432,21 +2441,14 @@ static void __mptcp_close_subflow(struct + mptcp_close_ssk(sk, ssk, subflow); + } + +- /* if the MPC subflow has been closed before the msk is accepted, +- * msk will never be accept-ed, close it now +- */ +- if (!msk->first && msk->in_accept_queue) { +- sock_set_flag(sk, SOCK_DEAD); +- inet_sk_state_store(sk, TCP_CLOSE); +- } + } + +-static bool mptcp_check_close_timeout(const struct sock *sk) ++static bool mptcp_should_close(const struct sock *sk) + { + s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; + struct mptcp_subflow_context *subflow; + +- if (delta >= TCP_TIMEWAIT_LEN) ++ if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) + return true; + + /* if all subflows are in closed status don't bother with additional +@@ -2654,7 +2656,7 @@ static void mptcp_worker(struct work_str + * even if it is orphaned and in FIN_WAIT2 state + */ + if (sock_flag(sk, SOCK_DEAD)) { +- if (mptcp_check_close_timeout(sk)) { ++ if (mptcp_should_close(sk)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + } +@@ -2901,6 +2903,14 @@ static void __mptcp_destroy_sock(struct + sock_put(sk); + } + ++void __mptcp_unaccepted_force_close(struct sock *sk) ++{ ++ sock_set_flag(sk, SOCK_DEAD); ++ inet_sk_state_store(sk, TCP_CLOSE); ++ mptcp_do_fastclose(sk); ++ __mptcp_destroy_sock(sk); ++} ++ + static __poll_t mptcp_check_readable(struct mptcp_sock *msk) + { + /* Concurrent splices from sk_receive_queue into receive_queue will +@@ -3728,6 +3738,18 @@ static int mptcp_stream_accept(struct so + if (!ssk->sk_socket) + mptcp_sock_graft(ssk, newsock); + } ++ ++ /* Do late cleanup for the first subflow as necessary. Also ++ * deal with bad peers not doing a complete shutdown. ++ */ ++ if (msk->first && ++ unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { ++ __mptcp_close_ssk(newsk, msk->first, ++ mptcp_subflow_ctx(msk->first), 0); ++ if (unlikely(list_empty(&msk->conn_list))) ++ inet_sk_state_store(newsk, TCP_CLOSE); ++ } ++ + release_sock(newsk); + } + +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -634,6 +634,7 @@ void mptcp_sock_graft(struct sock *sk, s + struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); + bool __mptcp_close(struct sock *sk, long timeout); + void mptcp_cancel_work(struct sock *sk); ++void __mptcp_unaccepted_force_close(struct sock *sk); + void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); + + bool mptcp_addresses_equal(const struct mptcp_addr_info *a, +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -722,9 +722,12 @@ void mptcp_subflow_drop_ctx(struct sock + if (!ctx) + return; + +- subflow_ulp_fallback(ssk, ctx); +- if (ctx->conn) +- sock_put(ctx->conn); ++ list_del(&mptcp_subflow_ctx(ssk)->node); ++ if (inet_csk(ssk)->icsk_ulp_ops) { ++ subflow_ulp_fallback(ssk, ctx); ++ if (ctx->conn) ++ sock_put(ctx->conn); ++ } + + kfree_rcu(ctx, rcu); + } +@@ -1821,6 +1824,7 @@ void mptcp_subflow_queue_clean(struct so + struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; + struct mptcp_sock *msk, *next, *head = NULL; + struct request_sock *req; ++ struct sock *sk; + + /* build a list of all unaccepted mptcp sockets */ + spin_lock_bh(&queue->rskq_lock); +@@ -1836,11 +1840,12 @@ void mptcp_subflow_queue_clean(struct so + continue; + + /* skip if already in list */ +- msk = mptcp_sk(subflow->conn); ++ sk = subflow->conn; ++ msk = mptcp_sk(sk); + if (msk->dl_next || msk == head) + continue; + +- sock_hold(subflow->conn); ++ sock_hold(sk); + msk->dl_next = head; + head = msk; + } +@@ -1854,16 +1859,13 @@ void mptcp_subflow_queue_clean(struct so + release_sock(listener_ssk); + + for (msk = head; msk; msk = next) { +- struct sock *sk = (struct sock *)msk; ++ sk = (struct sock *)msk; + + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + next = msk->dl_next; + msk->dl_next = NULL; + +- /* prevent the stack from later re-schedule the worker for +- * this socket +- */ +- inet_sk_state_store(sk, TCP_CLOSE); ++ __mptcp_unaccepted_force_close(sk); + release_sock(sk); + + /* lockdep will report a false positive ABBA deadlock diff --git a/queue-6.2/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch b/queue-6.2/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch new file mode 100644 index 00000000000..118dbfb9f34 --- /dev/null +++ b/queue-6.2/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch @@ -0,0 +1,192 @@ +From 2a6a870e44dd88f1a6a2893c65ef756a9edfb4c7 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 17 Apr 2023 16:00:40 +0200 +Subject: mptcp: stops worker on unaccepted sockets at listener close + +From: Paolo Abeni + +commit 2a6a870e44dd88f1a6a2893c65ef756a9edfb4c7 upstream. + +This is a partial revert of the blamed commit, with a relevant +change: mptcp_subflow_queue_clean() now just change the msk +socket status and stop the worker, so that the UaF issue addressed +by the blamed commit is not re-introduced. + +The above prevents the mptcp worker from running concurrently with +inet_csk_listen_stop(), as such race would trigger a warning, as +reported by Christoph: + +RSP: 002b:00007f784fe09cd8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +WARNING: CPU: 0 PID: 25807 at net/ipv4/inet_connection_sock.c:1387 inet_csk_listen_stop+0x664/0x870 net/ipv4/inet_connection_sock.c:1387 +RAX: ffffffffffffffda RBX: 00000000006bc050 RCX: 00007f7850afd6a9 +RDX: 0000000000000000 RSI: 0000000020000340 RDI: 0000000000000004 +Modules linked in: +RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c +R13: fffffffffffffea8 R14: 00000000006bc050 R15: 000000000001fe40 + + +CPU: 0 PID: 25807 Comm: syz-executor.7 Not tainted 6.2.0-g778e54711659 #7 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 +RIP: 0010:inet_csk_listen_stop+0x664/0x870 net/ipv4/inet_connection_sock.c:1387 +RAX: 0000000000000000 RBX: ffff888100dfbd40 RCX: 0000000000000000 +RDX: ffff8881363aab80 RSI: ffffffff81c494f4 RDI: 0000000000000005 +RBP: ffff888126dad080 R08: 0000000000000005 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: ffff888100dfe040 +R13: 0000000000000001 R14: 0000000000000000 R15: ffff888100dfbdd8 +FS: 00007f7850a2c800(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b32d26000 CR3: 000000012fdd8006 CR4: 0000000000770ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: + + __tcp_close+0x5b2/0x620 net/ipv4/tcp.c:2875 + __mptcp_close_ssk+0x145/0x3d0 net/mptcp/protocol.c:2427 + mptcp_destroy_common+0x8a/0x1c0 net/mptcp/protocol.c:3277 + mptcp_destroy+0x41/0x60 net/mptcp/protocol.c:3304 + __mptcp_destroy_sock+0x56/0x140 net/mptcp/protocol.c:2965 + __mptcp_close+0x38f/0x4a0 net/mptcp/protocol.c:3057 + mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072 + inet_release+0x53/0xa0 net/ipv4/af_inet.c:429 + __sock_release+0x4e/0xf0 net/socket.c:651 + sock_close+0x15/0x20 net/socket.c:1393 + __fput+0xff/0x420 fs/file_table.c:321 + task_work_run+0x8b/0xe0 kernel/task_work.c:179 + resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] + exit_to_user_mode_loop kernel/entry/common.c:171 [inline] + exit_to_user_mode_prepare+0x113/0x120 kernel/entry/common.c:203 + __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] + syscall_exit_to_user_mode+0x1d/0x40 kernel/entry/common.c:296 + do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86 + entry_SYSCALL_64_after_hwframe+0x72/0xdc +RIP: 0033:0x7f7850af70dc +RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7850af70dc +RDX: 00007f7850a2c800 RSI: 0000000000000002 RDI: 0000000000000003 +RBP: 00000000006bd980 R08: 0000000000000000 R09: 00000000000018a0 +R10: 00000000316338a4 R11: 0000000000000293 R12: 0000000000211e31 +R13: 00000000006bc05c R14: 00007f785062c000 R15: 0000000000211af0 + +Fixes: 0a3f4f1f9c27 ("mptcp: fix UaF in listener shutdown") +Cc: stable@vger.kernel.org +Reported-by: Christoph Paasch +Link: https://github.com/multipath-tcp/mptcp_net-next/issues/371 +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 6 +++- + net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 78 insertions(+), 1 deletion(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2366,8 +2366,12 @@ static void __mptcp_close_ssk(struct soc + mptcp_subflow_drop_ctx(ssk); + } else { + /* otherwise tcp will dispose of the ssk and subflow ctx */ +- if (ssk->sk_state == TCP_LISTEN) ++ if (ssk->sk_state == TCP_LISTEN) { ++ tcp_set_state(ssk, TCP_CLOSE); ++ mptcp_subflow_queue_clean(sk, ssk); ++ inet_csk_listen_stop(ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); ++ } + + __tcp_close(ssk, 0); + +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -629,6 +629,7 @@ void mptcp_close_ssk(struct sock *sk, st + struct mptcp_subflow_context *subflow); + void __mptcp_subflow_send_ack(struct sock *ssk); + void mptcp_subflow_reset(struct sock *ssk); ++void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); + void mptcp_sock_graft(struct sock *sk, struct socket *parent); + struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); + bool __mptcp_close(struct sock *sk, long timeout); +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1816,6 +1816,78 @@ static void subflow_state_change(struct + } + } + ++void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) ++{ ++ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; ++ struct mptcp_sock *msk, *next, *head = NULL; ++ struct request_sock *req; ++ ++ /* build a list of all unaccepted mptcp sockets */ ++ spin_lock_bh(&queue->rskq_lock); ++ for (req = queue->rskq_accept_head; req; req = req->dl_next) { ++ struct mptcp_subflow_context *subflow; ++ struct sock *ssk = req->sk; ++ ++ if (!sk_is_mptcp(ssk)) ++ continue; ++ ++ subflow = mptcp_subflow_ctx(ssk); ++ if (!subflow || !subflow->conn) ++ continue; ++ ++ /* skip if already in list */ ++ msk = mptcp_sk(subflow->conn); ++ if (msk->dl_next || msk == head) ++ continue; ++ ++ sock_hold(subflow->conn); ++ msk->dl_next = head; ++ head = msk; ++ } ++ spin_unlock_bh(&queue->rskq_lock); ++ if (!head) ++ return; ++ ++ /* can't acquire the msk socket lock under the subflow one, ++ * or will cause ABBA deadlock ++ */ ++ release_sock(listener_ssk); ++ ++ for (msk = head; msk; msk = next) { ++ struct sock *sk = (struct sock *)msk; ++ ++ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); ++ next = msk->dl_next; ++ msk->dl_next = NULL; ++ ++ /* prevent the stack from later re-schedule the worker for ++ * this socket ++ */ ++ inet_sk_state_store(sk, TCP_CLOSE); ++ release_sock(sk); ++ ++ /* lockdep will report a false positive ABBA deadlock ++ * between cancel_work_sync and the listener socket. ++ * The involved locks belong to different sockets WRT ++ * the existing AB chain. ++ * Using a per socket key is problematic as key ++ * deregistration requires process context and must be ++ * performed at socket disposal time, in atomic ++ * context. ++ * Just tell lockdep to consider the listener socket ++ * released here. ++ */ ++ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); ++ mptcp_cancel_work(sk); ++ mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_); ++ ++ sock_put(sk); ++ } ++ ++ /* we are still under the listener msk socket lock */ ++ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); ++} ++ + static int subflow_ulp_init(struct sock *sk) + { + struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/queue-6.2/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch b/queue-6.2/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch new file mode 100644 index 00000000000..17178700840 --- /dev/null +++ b/queue-6.2/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch @@ -0,0 +1,80 @@ +From ef832747a82dfbc22a3702219cc716f449b24e4a Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Tue, 18 Apr 2023 02:35:13 +0900 +Subject: nilfs2: initialize unused bytes in segment summary blocks + +From: Ryusuke Konishi + +commit ef832747a82dfbc22a3702219cc716f449b24e4a upstream. + +Syzbot still reports uninit-value in nilfs_add_checksums_on_logs() for +KMSAN enabled kernels after applying commit 7397031622e0 ("nilfs2: +initialize "struct nilfs_binfo_dat"->bi_pad field"). + +This is because the unused bytes at the end of each block in segment +summaries are not initialized. So this fixes the issue by padding the +unused bytes with null bytes. + +Link: https://lkml.kernel.org/r/20230417173513.12598-1-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Tested-by: Ryusuke Konishi +Reported-by: syzbot+048585f3f4227bb2b49b@syzkaller.appspotmail.com + Link: https://syzkaller.appspot.com/bug?extid=048585f3f4227bb2b49b +Cc: Alexander Potapenko +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/segment.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +--- a/fs/nilfs2/segment.c ++++ b/fs/nilfs2/segment.c +@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_b + return 0; + } + ++/** ++ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area ++ * @sci: segment constructor object ++ * ++ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of ++ * the current segment summary block. ++ */ ++static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci) ++{ ++ struct nilfs_segsum_pointer *ssp; ++ ++ ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr; ++ if (ssp->offset < ssp->bh->b_size) ++ memset(ssp->bh->b_data + ssp->offset, 0, ++ ssp->bh->b_size - ssp->offset); ++} ++ + static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) + { + sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; +@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(st + * The current segment is filled up + * (internal code) + */ ++ nilfs_segctor_zeropad_segsum(sci); + sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); + return nilfs_segctor_reset_segment_buffer(sci); + } +@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block( + goto retry; + } + if (unlikely(required)) { ++ nilfs_segctor_zeropad_segsum(sci); + err = nilfs_segbuf_extend_segsum(segbuf); + if (unlikely(err)) + goto failed; +@@ -1531,6 +1550,7 @@ static int nilfs_segctor_collect(struct + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); + sci->sc_stage = prev_stage; + } ++ nilfs_segctor_zeropad_segsum(sci); + nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); + return 0; + diff --git a/queue-6.2/series b/queue-6.2/series index 34863be5344..3098ee3df3d 100644 --- a/queue-6.2/series +++ b/queue-6.2/series @@ -68,3 +68,11 @@ wifi-ath9k-don-t-mark-channelmap-stack-variable-read-only-in-ath9k_mci_update_wl maple_tree-make-maple-state-reusable-after-mas_empty_area_rev.patch maple_tree-fix-mas_empty_area-search.patch maple_tree-fix-a-potential-memory-leak-oob-access-or-other-unpredictable-bug.patch +asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch +nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch +mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch +mptcp-fix-accept-vs-worker-race.patch +tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch +memstick-fix-memory-leak-if-card-device-is-never-registered.patch +kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch +writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch diff --git a/queue-6.2/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch b/queue-6.2/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch new file mode 100644 index 00000000000..89579cc8b4e --- /dev/null +++ b/queue-6.2/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch @@ -0,0 +1,34 @@ +From 9235756885e865070c4be2facda75262dbd85967 Mon Sep 17 00:00:00 2001 +From: Steve Chou +Date: Tue, 11 Apr 2023 11:49:28 +0800 +Subject: tools/mm/page_owner_sort.c: fix TGID output when cull=tg is used + +From: Steve Chou + +commit 9235756885e865070c4be2facda75262dbd85967 upstream. + +When using cull option with 'tg' flag, the fprintf is using pid instead +of tgid. It should use tgid instead. + +Link: https://lkml.kernel.org/r/20230411034929.2071501-1-steve_chou@pesi.com.tw +Fixes: 9c8a0a8e599f4a ("tools/vm/page_owner_sort.c: support for user-defined culling rules") +Signed-off-by: Steve Chou +Cc: Jiajian Ye +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/vm/page_owner_sort.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/vm/page_owner_sort.c ++++ b/tools/vm/page_owner_sort.c +@@ -847,7 +847,7 @@ int main(int argc, char **argv) + if (cull & CULL_PID || filter & FILTER_PID) + fprintf(fout, ", PID %d", list[i].pid); + if (cull & CULL_TGID || filter & FILTER_TGID) +- fprintf(fout, ", TGID %d", list[i].pid); ++ fprintf(fout, ", TGID %d", list[i].tgid); + if (cull & CULL_COMM || filter & FILTER_COMM) + fprintf(fout, ", task_comm_name: %s", list[i].comm); + if (cull & CULL_ALLOCATOR) { diff --git a/queue-6.2/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch b/queue-6.2/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch new file mode 100644 index 00000000000..edbf725bf0b --- /dev/null +++ b/queue-6.2/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch @@ -0,0 +1,172 @@ +From 1ba1199ec5747f475538c0d25a32804e5ba1dfde Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Mon, 10 Apr 2023 21:08:26 +0800 +Subject: writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs + +From: Baokun Li + +commit 1ba1199ec5747f475538c0d25a32804e5ba1dfde upstream. + +KASAN report null-ptr-deref: +================================================================== +BUG: KASAN: null-ptr-deref in bdi_split_work_to_wbs+0x5c5/0x7b0 +Write of size 8 at addr 0000000000000000 by task sync/943 +CPU: 5 PID: 943 Comm: sync Tainted: 6.3.0-rc5-next-20230406-dirty #461 +Call Trace: + + dump_stack_lvl+0x7f/0xc0 + print_report+0x2ba/0x340 + kasan_report+0xc4/0x120 + kasan_check_range+0x1b7/0x2e0 + __kasan_check_write+0x24/0x40 + bdi_split_work_to_wbs+0x5c5/0x7b0 + sync_inodes_sb+0x195/0x630 + sync_inodes_one_sb+0x3a/0x50 + iterate_supers+0x106/0x1b0 + ksys_sync+0x98/0x160 +[...] +================================================================== + +The race that causes the above issue is as follows: + + cpu1 cpu2 +-------------------------|------------------------- +inode_switch_wbs + INIT_WORK(&isw->work, inode_switch_wbs_work_fn) + queue_rcu_work(isw_wq, &isw->work) + // queue_work async + inode_switch_wbs_work_fn + wb_put_many(old_wb, nr_switched) + percpu_ref_put_many + ref->data->release(ref) + cgwb_release + queue_work(cgwb_release_wq, &wb->release_work) + // queue_work async + &wb->release_work + cgwb_release_workfn + ksys_sync + iterate_supers + sync_inodes_one_sb + sync_inodes_sb + bdi_split_work_to_wbs + kmalloc(sizeof(*work), GFP_ATOMIC) + // alloc memory failed + percpu_ref_exit + ref->data = NULL + kfree(data) + wb_get(wb) + percpu_ref_get(&wb->refcnt) + percpu_ref_get_many(ref, 1) + atomic_long_add(nr, &ref->data->count) + atomic64_add(i, v) + // trigger null-ptr-deref + +bdi_split_work_to_wbs() traverses &bdi->wb_list to split work into all +wbs. If the allocation of new work fails, the on-stack fallback will be +used and the reference count of the current wb is increased afterwards. +If cgroup writeback membership switches occur before getting the reference +count and the current wb is released as old_wd, then calling wb_get() or +wb_put() will trigger the null pointer dereference above. + +This issue was introduced in v4.3-rc7 (see fix tag1). Both +sync_inodes_sb() and __writeback_inodes_sb_nr() calls to +bdi_split_work_to_wbs() can trigger this issue. For scenarios called via +sync_inodes_sb(), originally commit 7fc5854f8c6e ("writeback: synchronize +sync(2) against cgroup writeback membership switches") reduced the +possibility of the issue by adding wb_switch_rwsem, but in v5.14-rc1 (see +fix tag2) removed the "inode_io_list_del_locked(inode, old_wb)" from +inode_switch_wbs_work_fn() so that wb->state contains WB_has_dirty_io, +thus old_wb is not skipped when traversing wbs in bdi_split_work_to_wbs(), +and the issue becomes easily reproducible again. + +To solve this problem, percpu_ref_exit() is called under RCU protection to +avoid race between cgwb_release_workfn() and bdi_split_work_to_wbs(). +Moreover, replace wb_get() with wb_tryget() in bdi_split_work_to_wbs(), +and skip the current wb if wb_tryget() fails because the wb has already +been shutdown. + +Link: https://lkml.kernel.org/r/20230410130826.1492525-1-libaokun1@huawei.com +Fixes: b817525a4a80 ("writeback: bdi_writeback iteration must not skip dying ones") +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Acked-by: Tejun Heo +Cc: Alexander Viro +Cc: Andreas Dilger +Cc: Christian Brauner +Cc: Dennis Zhou +Cc: Hou Tao +Cc: yangerkun +Cc: Zhang Yi +Cc: Jens Axboe +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/fs-writeback.c | 17 ++++++++++------- + mm/backing-dev.c | 12 ++++++++++-- + 2 files changed, 20 insertions(+), 9 deletions(-) + +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -976,6 +976,16 @@ restart: + continue; + } + ++ /* ++ * If wb_tryget fails, the wb has been shutdown, skip it. ++ * ++ * Pin @wb so that it stays on @bdi->wb_list. This allows ++ * continuing iteration from @wb after dropping and ++ * regrabbing rcu read lock. ++ */ ++ if (!wb_tryget(wb)) ++ continue; ++ + /* alloc failed, execute synchronously using on-stack fallback */ + work = &fallback_work; + *work = *base_work; +@@ -984,13 +994,6 @@ restart: + work->done = &fallback_work_done; + + wb_queue_work(wb, work); +- +- /* +- * Pin @wb so that it stays on @bdi->wb_list. This allows +- * continuing iteration from @wb after dropping and +- * regrabbing rcu read lock. +- */ +- wb_get(wb); + last_wb = wb; + + rcu_read_unlock(); +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -507,6 +507,15 @@ static LIST_HEAD(offline_cgwbs); + static void cleanup_offline_cgwbs_workfn(struct work_struct *work); + static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn); + ++static void cgwb_free_rcu(struct rcu_head *rcu_head) ++{ ++ struct bdi_writeback *wb = container_of(rcu_head, ++ struct bdi_writeback, rcu); ++ ++ percpu_ref_exit(&wb->refcnt); ++ kfree(wb); ++} ++ + static void cgwb_release_workfn(struct work_struct *work) + { + struct bdi_writeback *wb = container_of(work, struct bdi_writeback, +@@ -529,11 +538,10 @@ static void cgwb_release_workfn(struct w + list_del(&wb->offline_node); + spin_unlock_irq(&cgwb_lock); + +- percpu_ref_exit(&wb->refcnt); + wb_exit(wb); + bdi_put(bdi); + WARN_ON_ONCE(!list_empty(&wb->b_attached)); +- kfree_rcu(wb, rcu); ++ call_rcu(&wb->rcu, cgwb_free_rcu); + } + + static void cgwb_release(struct percpu_ref *refcnt)