]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 Apr 2023 16:38:48 +0000 (18:38 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 Apr 2023 16:38:48 +0000 (18:38 +0200)
added patches:
kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch
memstick-fix-memory-leak-if-card-device-is-never-registered.patch
nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch
writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch

queue-6.1/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch [new file with mode: 0644]
queue-6.1/memstick-fix-memory-leak-if-card-device-is-never-registered.patch [new file with mode: 0644]
queue-6.1/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch [new file with mode: 0644]
queue-6.1/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch [new file with mode: 0644]

diff --git a/queue-6.1/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch b/queue-6.1/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch
new file mode 100644 (file)
index 0000000..0a517f3
--- /dev/null
@@ -0,0 +1,151 @@
+From 659c0ce1cb9efc7f58d380ca4bb2a51ae9e30553 Mon Sep 17 00:00:00 2001
+From: Ondrej Mosnacek <omosnace@redhat.com>
+Date: Fri, 17 Feb 2023 17:21:54 +0100
+Subject: kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()
+
+From: Ondrej Mosnacek <omosnace@redhat.com>
+
+commit 659c0ce1cb9efc7f58d380ca4bb2a51ae9e30553 upstream.
+
+Linux Security Modules (LSMs) that implement the "capable" hook will
+usually emit an access denial message to the audit log whenever they
+"block" the current task from using the given capability based on their
+security policy.
+
+The occurrence of a denial is used as an indication that the given task
+has attempted an operation that requires the given access permission, so
+the callers of functions that perform LSM permission checks must take care
+to avoid calling them too early (before it is decided if the permission is
+actually needed to perform the requested operation).
+
+The __sys_setres[ug]id() functions violate this convention by first
+calling ns_capable_setid() and only then checking if the operation
+requires the capability or not.  It means that any caller that has the
+capability granted by DAC (task's capability set) but not by MAC (LSMs)
+will generate a "denied" audit record, even if is doing an operation for
+which the capability is not required.
+
+Fix this by reordering the checks such that ns_capable_setid() is checked
+last and -EPERM is returned immediately if it returns false.
+
+While there, also do two small optimizations:
+* move the capability check before prepare_creds() and
+* bail out early in case of a no-op.
+
+Link: https://lkml.kernel.org/r/20230217162154.837549-1-omosnace@redhat.com
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sys.c |   69 ++++++++++++++++++++++++++++++++++-------------------------
+ 1 file changed, 40 insertions(+), 29 deletions(-)
+
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -664,6 +664,7 @@ long __sys_setresuid(uid_t ruid, uid_t e
+       struct cred *new;
+       int retval;
+       kuid_t kruid, keuid, ksuid;
++      bool ruid_new, euid_new, suid_new;
+       kruid = make_kuid(ns, ruid);
+       keuid = make_kuid(ns, euid);
+@@ -678,25 +679,29 @@ long __sys_setresuid(uid_t ruid, uid_t e
+       if ((suid != (uid_t) -1) && !uid_valid(ksuid))
+               return -EINVAL;
++      old = current_cred();
++
++      /* check for no-op */
++      if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) &&
++          (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) &&
++                                  uid_eq(keuid, old->fsuid))) &&
++          (suid == (uid_t) -1 || uid_eq(ksuid, old->suid)))
++              return 0;
++
++      ruid_new = ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
++                 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid);
++      euid_new = euid != (uid_t) -1        && !uid_eq(keuid, old->uid) &&
++                 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid);
++      suid_new = suid != (uid_t) -1        && !uid_eq(ksuid, old->uid) &&
++                 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid);
++      if ((ruid_new || euid_new || suid_new) &&
++          !ns_capable_setid(old->user_ns, CAP_SETUID))
++              return -EPERM;
++
+       new = prepare_creds();
+       if (!new)
+               return -ENOMEM;
+-      old = current_cred();
+-
+-      retval = -EPERM;
+-      if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
+-              if (ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
+-                  !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
+-                      goto error;
+-              if (euid != (uid_t) -1        && !uid_eq(keuid, old->uid) &&
+-                  !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
+-                      goto error;
+-              if (suid != (uid_t) -1        && !uid_eq(ksuid, old->uid) &&
+-                  !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
+-                      goto error;
+-      }
+-
+       if (ruid != (uid_t) -1) {
+               new->uid = kruid;
+               if (!uid_eq(kruid, old->uid)) {
+@@ -761,6 +766,7 @@ long __sys_setresgid(gid_t rgid, gid_t e
+       struct cred *new;
+       int retval;
+       kgid_t krgid, kegid, ksgid;
++      bool rgid_new, egid_new, sgid_new;
+       krgid = make_kgid(ns, rgid);
+       kegid = make_kgid(ns, egid);
+@@ -773,23 +779,28 @@ long __sys_setresgid(gid_t rgid, gid_t e
+       if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
+               return -EINVAL;
++      old = current_cred();
++
++      /* check for no-op */
++      if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) &&
++          (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) &&
++                                  gid_eq(kegid, old->fsgid))) &&
++          (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid)))
++              return 0;
++
++      rgid_new = rgid != (gid_t) -1        && !gid_eq(krgid, old->gid) &&
++                 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid);
++      egid_new = egid != (gid_t) -1        && !gid_eq(kegid, old->gid) &&
++                 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid);
++      sgid_new = sgid != (gid_t) -1        && !gid_eq(ksgid, old->gid) &&
++                 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid);
++      if ((rgid_new || egid_new || sgid_new) &&
++          !ns_capable_setid(old->user_ns, CAP_SETGID))
++              return -EPERM;
++
+       new = prepare_creds();
+       if (!new)
+               return -ENOMEM;
+-      old = current_cred();
+-
+-      retval = -EPERM;
+-      if (!ns_capable_setid(old->user_ns, CAP_SETGID)) {
+-              if (rgid != (gid_t) -1        && !gid_eq(krgid, old->gid) &&
+-                  !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
+-                      goto error;
+-              if (egid != (gid_t) -1        && !gid_eq(kegid, old->gid) &&
+-                  !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
+-                      goto error;
+-              if (sgid != (gid_t) -1        && !gid_eq(ksgid, old->gid) &&
+-                  !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
+-                      goto error;
+-      }
+       if (rgid != (gid_t) -1)
+               new->gid = krgid;
diff --git a/queue-6.1/memstick-fix-memory-leak-if-card-device-is-never-registered.patch b/queue-6.1/memstick-fix-memory-leak-if-card-device-is-never-registered.patch
new file mode 100644 (file)
index 0000000..8de753d
--- /dev/null
@@ -0,0 +1,61 @@
+From 4b6d621c9d859ff89e68cebf6178652592676013 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Sat, 1 Apr 2023 22:03:27 +0200
+Subject: memstick: fix memory leak if card device is never registered
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+commit 4b6d621c9d859ff89e68cebf6178652592676013 upstream.
+
+When calling dev_set_name() memory is allocated for the name for the
+struct device.  Once that structure device is registered, or attempted
+to be registerd, with the driver core, the driver core will handle
+cleaning up that memory when the device is removed from the system.
+
+Unfortunatly for the memstick code, there is an error path that causes
+the struct device to never be registered, and so the memory allocated in
+dev_set_name will be leaked.  Fix that leak by manually freeing it right
+before the memory for the device is freed.
+
+Cc: Maxim Levitsky <maximlevitsky@gmail.com>
+Cc: Alex Dubov <oakad@yahoo.com>
+Cc: Ulf Hansson <ulf.hansson@linaro.org>
+Cc: "Rafael J. Wysocki" <rafael@kernel.org>
+Cc: Hans de Goede <hdegoede@redhat.com>
+Cc: Kay Sievers <kay.sievers@vrfy.org>
+Cc: linux-mmc@vger.kernel.org
+Fixes: 0252c3b4f018 ("memstick: struct device - replace bus_id with dev_name(), dev_set_name()")
+Cc: stable <stable@kernel.org>
+Co-developed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Co-developed-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
+Signed-off-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
+Link: https://lore.kernel.org/r/20230401200327.16800-1-gregkh@linuxfoundation.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/memstick/core/memstick.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/memstick/core/memstick.c
++++ b/drivers/memstick/core/memstick.c
+@@ -410,6 +410,7 @@ static struct memstick_dev *memstick_all
+       return card;
+ err_out:
+       host->card = old_card;
++      kfree_const(card->dev.kobj.name);
+       kfree(card);
+       return NULL;
+ }
+@@ -468,8 +469,10 @@ static void memstick_check(struct work_s
+                               put_device(&card->dev);
+                               host->card = NULL;
+                       }
+-              } else
++              } else {
++                      kfree_const(card->dev.kobj.name);
+                       kfree(card);
++              }
+       }
+ out_power_off:
diff --git a/queue-6.1/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch b/queue-6.1/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
new file mode 100644 (file)
index 0000000..1717870
--- /dev/null
@@ -0,0 +1,80 @@
+From ef832747a82dfbc22a3702219cc716f449b24e4a Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Tue, 18 Apr 2023 02:35:13 +0900
+Subject: nilfs2: initialize unused bytes in segment summary blocks
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit ef832747a82dfbc22a3702219cc716f449b24e4a upstream.
+
+Syzbot still reports uninit-value in nilfs_add_checksums_on_logs() for
+KMSAN enabled kernels after applying commit 7397031622e0 ("nilfs2:
+initialize "struct nilfs_binfo_dat"->bi_pad field").
+
+This is because the unused bytes at the end of each block in segment
+summaries are not initialized.  So this fixes the issue by padding the
+unused bytes with null bytes.
+
+Link: https://lkml.kernel.org/r/20230417173513.12598-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+048585f3f4227bb2b49b@syzkaller.appspotmail.com
+  Link: https://syzkaller.appspot.com/bug?extid=048585f3f4227bb2b49b
+Cc: Alexander Potapenko <glider@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/segment.c |   20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_b
+       return 0;
+ }
++/**
++ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
++ * @sci: segment constructor object
++ *
++ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
++ * the current segment summary block.
++ */
++static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
++{
++      struct nilfs_segsum_pointer *ssp;
++
++      ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
++      if (ssp->offset < ssp->bh->b_size)
++              memset(ssp->bh->b_data + ssp->offset, 0,
++                     ssp->bh->b_size - ssp->offset);
++}
++
+ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
+ {
+       sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
+@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(st
+                               * The current segment is filled up
+                               * (internal code)
+                               */
++      nilfs_segctor_zeropad_segsum(sci);
+       sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
+       return nilfs_segctor_reset_segment_buffer(sci);
+ }
+@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block(
+               goto retry;
+       }
+       if (unlikely(required)) {
++              nilfs_segctor_zeropad_segsum(sci);
+               err = nilfs_segbuf_extend_segsum(segbuf);
+               if (unlikely(err))
+                       goto failed;
+@@ -1531,6 +1550,7 @@ static int nilfs_segctor_collect(struct
+               nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
+               sci->sc_stage = prev_stage;
+       }
++      nilfs_segctor_zeropad_segsum(sci);
+       nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
+       return 0;
index 406d86c75cf59ad6d689edb24e366c1ff5af0664..75d02ee629c1961b0ac0584ae823e88a04636919 100644 (file)
@@ -58,3 +58,8 @@ loongarch-mark-3-symbol-exports-as-non-gpl.patch
 maple_tree-make-maple-state-reusable-after-mas_empty_area_rev.patch
 maple_tree-fix-mas_empty_area-search.patch
 maple_tree-fix-a-potential-memory-leak-oob-access-or-other-unpredictable-bug.patch
+nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
+tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch
+memstick-fix-memory-leak-if-card-device-is-never-registered.patch
+kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch
+writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch
diff --git a/queue-6.1/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch b/queue-6.1/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch
new file mode 100644 (file)
index 0000000..89579cc
--- /dev/null
@@ -0,0 +1,34 @@
+From 9235756885e865070c4be2facda75262dbd85967 Mon Sep 17 00:00:00 2001
+From: Steve Chou <steve_chou@pesi.com.tw>
+Date: Tue, 11 Apr 2023 11:49:28 +0800
+Subject: tools/mm/page_owner_sort.c: fix TGID output when cull=tg is used
+
+From: Steve Chou <steve_chou@pesi.com.tw>
+
+commit 9235756885e865070c4be2facda75262dbd85967 upstream.
+
+When using cull option with 'tg' flag, the fprintf is using pid instead
+of tgid. It should use tgid instead.
+
+Link: https://lkml.kernel.org/r/20230411034929.2071501-1-steve_chou@pesi.com.tw
+Fixes: 9c8a0a8e599f4a ("tools/vm/page_owner_sort.c: support for user-defined culling rules")
+Signed-off-by: Steve Chou <steve_chou@pesi.com.tw>
+Cc: Jiajian Ye <yejiajian2018@email.szu.edu.cn>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/vm/page_owner_sort.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/vm/page_owner_sort.c
++++ b/tools/vm/page_owner_sort.c
+@@ -847,7 +847,7 @@ int main(int argc, char **argv)
+                       if (cull & CULL_PID || filter & FILTER_PID)
+                               fprintf(fout, ", PID %d", list[i].pid);
+                       if (cull & CULL_TGID || filter & FILTER_TGID)
+-                              fprintf(fout, ", TGID %d", list[i].pid);
++                              fprintf(fout, ", TGID %d", list[i].tgid);
+                       if (cull & CULL_COMM || filter & FILTER_COMM)
+                               fprintf(fout, ", task_comm_name: %s", list[i].comm);
+                       if (cull & CULL_ALLOCATOR) {
diff --git a/queue-6.1/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch b/queue-6.1/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch
new file mode 100644 (file)
index 0000000..f20eed8
--- /dev/null
@@ -0,0 +1,172 @@
+From 1ba1199ec5747f475538c0d25a32804e5ba1dfde Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Mon, 10 Apr 2023 21:08:26 +0800
+Subject: writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit 1ba1199ec5747f475538c0d25a32804e5ba1dfde upstream.
+
+KASAN report null-ptr-deref:
+==================================================================
+BUG: KASAN: null-ptr-deref in bdi_split_work_to_wbs+0x5c5/0x7b0
+Write of size 8 at addr 0000000000000000 by task sync/943
+CPU: 5 PID: 943 Comm: sync Tainted: 6.3.0-rc5-next-20230406-dirty #461
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x7f/0xc0
+ print_report+0x2ba/0x340
+ kasan_report+0xc4/0x120
+ kasan_check_range+0x1b7/0x2e0
+ __kasan_check_write+0x24/0x40
+ bdi_split_work_to_wbs+0x5c5/0x7b0
+ sync_inodes_sb+0x195/0x630
+ sync_inodes_one_sb+0x3a/0x50
+ iterate_supers+0x106/0x1b0
+ ksys_sync+0x98/0x160
+[...]
+==================================================================
+
+The race that causes the above issue is as follows:
+
+           cpu1                     cpu2
+-------------------------|-------------------------
+inode_switch_wbs
+ INIT_WORK(&isw->work, inode_switch_wbs_work_fn)
+ queue_rcu_work(isw_wq, &isw->work)
+ // queue_work async
+  inode_switch_wbs_work_fn
+   wb_put_many(old_wb, nr_switched)
+    percpu_ref_put_many
+     ref->data->release(ref)
+     cgwb_release
+      queue_work(cgwb_release_wq, &wb->release_work)
+      // queue_work async
+       &wb->release_work
+       cgwb_release_workfn
+                            ksys_sync
+                             iterate_supers
+                              sync_inodes_one_sb
+                               sync_inodes_sb
+                                bdi_split_work_to_wbs
+                                 kmalloc(sizeof(*work), GFP_ATOMIC)
+                                 // alloc memory failed
+        percpu_ref_exit
+         ref->data = NULL
+         kfree(data)
+                                 wb_get(wb)
+                                  percpu_ref_get(&wb->refcnt)
+                                   percpu_ref_get_many(ref, 1)
+                                    atomic_long_add(nr, &ref->data->count)
+                                     atomic64_add(i, v)
+                                     // trigger null-ptr-deref
+
+bdi_split_work_to_wbs() traverses &bdi->wb_list to split work into all
+wbs.  If the allocation of new work fails, the on-stack fallback will be
+used and the reference count of the current wb is increased afterwards.
+If cgroup writeback membership switches occur before getting the reference
+count and the current wb is released as old_wd, then calling wb_get() or
+wb_put() will trigger the null pointer dereference above.
+
+This issue was introduced in v4.3-rc7 (see fix tag1).  Both
+sync_inodes_sb() and __writeback_inodes_sb_nr() calls to
+bdi_split_work_to_wbs() can trigger this issue.  For scenarios called via
+sync_inodes_sb(), originally commit 7fc5854f8c6e ("writeback: synchronize
+sync(2) against cgroup writeback membership switches") reduced the
+possibility of the issue by adding wb_switch_rwsem, but in v5.14-rc1 (see
+fix tag2) removed the "inode_io_list_del_locked(inode, old_wb)" from
+inode_switch_wbs_work_fn() so that wb->state contains WB_has_dirty_io,
+thus old_wb is not skipped when traversing wbs in bdi_split_work_to_wbs(),
+and the issue becomes easily reproducible again.
+
+To solve this problem, percpu_ref_exit() is called under RCU protection to
+avoid race between cgwb_release_workfn() and bdi_split_work_to_wbs().
+Moreover, replace wb_get() with wb_tryget() in bdi_split_work_to_wbs(),
+and skip the current wb if wb_tryget() fails because the wb has already
+been shutdown.
+
+Link: https://lkml.kernel.org/r/20230410130826.1492525-1-libaokun1@huawei.com
+Fixes: b817525a4a80 ("writeback: bdi_writeback iteration must not skip dying ones")
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Acked-by: Tejun Heo <tj@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Dilger <adilger.kernel@dilger.ca>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Dennis Zhou <dennis@kernel.org>
+Cc: Hou Tao <houtao1@huawei.com>
+Cc: yangerkun <yangerkun@huawei.com>
+Cc: Zhang Yi <yi.zhang@huawei.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fs-writeback.c |   17 ++++++++++-------
+ mm/backing-dev.c  |   12 ++++++++++--
+ 2 files changed, 20 insertions(+), 9 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -974,6 +974,16 @@ restart:
+                       continue;
+               }
++              /*
++               * If wb_tryget fails, the wb has been shutdown, skip it.
++               *
++               * Pin @wb so that it stays on @bdi->wb_list.  This allows
++               * continuing iteration from @wb after dropping and
++               * regrabbing rcu read lock.
++               */
++              if (!wb_tryget(wb))
++                      continue;
++
+               /* alloc failed, execute synchronously using on-stack fallback */
+               work = &fallback_work;
+               *work = *base_work;
+@@ -982,13 +992,6 @@ restart:
+               work->done = &fallback_work_done;
+               wb_queue_work(wb, work);
+-
+-              /*
+-               * Pin @wb so that it stays on @bdi->wb_list.  This allows
+-               * continuing iteration from @wb after dropping and
+-               * regrabbing rcu read lock.
+-               */
+-              wb_get(wb);
+               last_wb = wb;
+               rcu_read_unlock();
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -380,6 +380,15 @@ static LIST_HEAD(offline_cgwbs);
+ static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
+ static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
++static void cgwb_free_rcu(struct rcu_head *rcu_head)
++{
++      struct bdi_writeback *wb = container_of(rcu_head,
++                      struct bdi_writeback, rcu);
++
++      percpu_ref_exit(&wb->refcnt);
++      kfree(wb);
++}
++
+ static void cgwb_release_workfn(struct work_struct *work)
+ {
+       struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
+@@ -402,11 +411,10 @@ static void cgwb_release_workfn(struct w
+       list_del(&wb->offline_node);
+       spin_unlock_irq(&cgwb_lock);
+-      percpu_ref_exit(&wb->refcnt);
+       wb_exit(wb);
+       bdi_put(bdi);
+       WARN_ON_ONCE(!list_empty(&wb->b_attached));
+-      kfree_rcu(wb, rcu);
++      call_rcu(&wb->rcu, cgwb_free_rcu);
+ }
+ static void cgwb_release(struct percpu_ref *refcnt)