From 0ee607cc66a494cc13628b059e034be41a52e594 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Wed, 3 May 2023 18:32:15 -0400
Subject: [PATCH] Fixes for 5.10

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 queue-5.10/series                             |   1 +
 ...-fix-null-ptr-deref-write-in-bdi_spl.patch | 175 ++++++++++++++++++
 2 files changed, 176 insertions(+)
 create mode 100644 queue-5.10/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch
diff --git a/queue-5.10/series b/queue-5.10/series
index 4ad8f7ae860..af60240caed 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -12,3 +12,4 @@ iio-adc-palmas_gpadc-fix-null-dereference-on-rmmod.patch
 asoc-intel-bytcr_rt5640-add-quirk-for-the-acer-iconi.patch
 asm-generic-io.h-suppress-endianness-warnings-for-re.patch
 wireguard-timers-cast-enum-limits-members-to-int-in-prints.patch
+writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch
diff --git a/queue-5.10/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch b/queue-5.10/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch
new file mode 100644
index 00000000000..bc0b8f57f58
--- /dev/null
+++ b/queue-5.10/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch
@@ -0,0 +1,175 @@
+From fc14c1ba2539d4e9a868c994ba2f6c2b18c4f77a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Apr 2023 21:08:26 +0800
+Subject: writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 1ba1199ec5747f475538c0d25a32804e5ba1dfde ]
+
+KASAN report null-ptr-deref:
+==================================================================
+BUG: KASAN: null-ptr-deref in bdi_split_work_to_wbs+0x5c5/0x7b0
+Write of size 8 at addr 0000000000000000 by task sync/943
+CPU: 5 PID: 943 Comm: sync Tainted: 6.3.0-rc5-next-20230406-dirty #461
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x7f/0xc0
+ print_report+0x2ba/0x340
+ kasan_report+0xc4/0x120
+ kasan_check_range+0x1b7/0x2e0
+ __kasan_check_write+0x24/0x40
+ bdi_split_work_to_wbs+0x5c5/0x7b0
+ sync_inodes_sb+0x195/0x630
+ sync_inodes_one_sb+0x3a/0x50
+ iterate_supers+0x106/0x1b0
+ ksys_sync+0x98/0x160
+[...]
+==================================================================
+
+The race that causes the above issue is as follows:
+
+           cpu1                     cpu2
+-------------------------|-------------------------
+inode_switch_wbs
+ INIT_WORK(&isw->work, inode_switch_wbs_work_fn)
+ queue_rcu_work(isw_wq, &isw->work)
+ // queue_work async
+  inode_switch_wbs_work_fn
+   wb_put_many(old_wb, nr_switched)
+    percpu_ref_put_many
+     ref->data->release(ref)
+     cgwb_release
+      queue_work(cgwb_release_wq, &wb->release_work)
+      // queue_work async
+       &wb->release_work
+       cgwb_release_workfn
+                            ksys_sync
+                             iterate_supers
+                              sync_inodes_one_sb
+                               sync_inodes_sb
+                                bdi_split_work_to_wbs
+                                 kmalloc(sizeof(*work), GFP_ATOMIC)
+                                 // alloc memory failed
+        percpu_ref_exit
+         ref->data = NULL
+         kfree(data)
+                                 wb_get(wb)
+                                  percpu_ref_get(&wb->refcnt)
+                                   percpu_ref_get_many(ref, 1)
+                                    atomic_long_add(nr, &ref->data->count)
+                                     atomic64_add(i, v)
+                                     // trigger null-ptr-deref
+
+bdi_split_work_to_wbs() traverses &bdi->wb_list to split work into all
+wbs.  If the allocation of new work fails, the on-stack fallback will be
+used and the reference count of the current wb is increased afterwards.
+If cgroup writeback membership switches occur before getting the reference
+count and the current wb is released as old_wd, then calling wb_get() or
+wb_put() will trigger the null pointer dereference above.
+
+This issue was introduced in v4.3-rc7 (see fix tag1).  Both
+sync_inodes_sb() and __writeback_inodes_sb_nr() calls to
+bdi_split_work_to_wbs() can trigger this issue.  For scenarios called via
+sync_inodes_sb(), originally commit 7fc5854f8c6e ("writeback: synchronize
+sync(2) against cgroup writeback membership switches") reduced the
+possibility of the issue by adding wb_switch_rwsem, but in v5.14-rc1 (see
+fix tag2) removed the "inode_io_list_del_locked(inode, old_wb)" from
+inode_switch_wbs_work_fn() so that wb->state contains WB_has_dirty_io,
+thus old_wb is not skipped when traversing wbs in bdi_split_work_to_wbs(),
+and the issue becomes easily reproducible again.
+
+To solve this problem, percpu_ref_exit() is called under RCU protection to
+avoid race between cgwb_release_workfn() and bdi_split_work_to_wbs().
+Moreover, replace wb_get() with wb_tryget() in bdi_split_work_to_wbs(),
+and skip the current wb if wb_tryget() fails because the wb has already
+been shutdown.
+
+Link: https://lkml.kernel.org/r/20230410130826.1492525-1-libaokun1@huawei.com
+Fixes: b817525a4a80 ("writeback: bdi_writeback iteration must not skip dying ones")
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Acked-by: Tejun Heo <tj@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Dilger <adilger.kernel@dilger.ca>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Dennis Zhou <dennis@kernel.org>
+Cc: Hou Tao <houtao1@huawei.com>
+Cc: yangerkun <yangerkun@huawei.com>
+Cc: Zhang Yi <yi.zhang@huawei.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 17 ++++++++++-------
+ mm/backing-dev.c  | 11 ++++++++++-
+ 2 files changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 46c15dd2405c6..6f18459f5e381 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -884,6 +884,16 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
+ 			continue;
+ 		}
+ 
++		/*
++		 * If wb_tryget fails, the wb has been shutdown, skip it.
++		 *
++		 * Pin @wb so that it stays on @bdi->wb_list.  This allows
++		 * continuing iteration from @wb after dropping and
++		 * regrabbing rcu read lock.
++		 */
++		if (!wb_tryget(wb))
++			continue;
++
+ 		/* alloc failed, execute synchronously using on-stack fallback */
+ 		work = &fallback_work;
+ 		*work = *base_work;
+@@ -892,13 +902,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
+ 		work->done = &fallback_work_done;
+ 
+ 		wb_queue_work(wb, work);
+-
+-		/*
+-		 * Pin @wb so that it stays on @bdi->wb_list.  This allows
+-		 * continuing iteration from @wb after dropping and
+-		 * regrabbing rcu read lock.
+-		 */
+-		wb_get(wb);
+ 		last_wb = wb;
+ 
+ 		rcu_read_unlock();
+diff --git a/mm/backing-dev.c b/mm/backing-dev.c
+index ca770a783a9f9..b28f629c35271 100644
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -378,6 +378,15 @@ static void wb_exit(struct bdi_writeback *wb)
+ static DEFINE_SPINLOCK(cgwb_lock);
+ static struct workqueue_struct *cgwb_release_wq;
+ 
++static void cgwb_free_rcu(struct rcu_head *rcu_head)
++{
++	struct bdi_writeback *wb = container_of(rcu_head,
++			struct bdi_writeback, rcu);
++
++	percpu_ref_exit(&wb->refcnt);
++	kfree(wb);
++}
++
+ static void cgwb_release_workfn(struct work_struct *work)
+ {
+ 	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
+@@ -397,7 +406,7 @@ static void cgwb_release_workfn(struct work_struct *work)
+ 	fprop_local_destroy_percpu(&wb->memcg_completions);
+ 	percpu_ref_exit(&wb->refcnt);
+ 	wb_exit(wb);
+-	kfree_rcu(wb, rcu);
++	call_rcu(&wb->rcu, cgwb_free_rcu);
+ }
+ 
+ static void cgwb_release(struct percpu_ref *refcnt)
+-- 
+2.39.2
+
-- 
2.47.3