From 0ee607cc66a494cc13628b059e034be41a52e594 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 3 May 2023 18:32:15 -0400 Subject: [PATCH] Fixes for 5.10 Signed-off-by: Sasha Levin --- queue-5.10/series | 1 + ...-fix-null-ptr-deref-write-in-bdi_spl.patch | 175 ++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 queue-5.10/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch diff --git a/queue-5.10/series b/queue-5.10/series index 4ad8f7ae860..af60240caed 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -12,3 +12,4 @@ iio-adc-palmas_gpadc-fix-null-dereference-on-rmmod.patch asoc-intel-bytcr_rt5640-add-quirk-for-the-acer-iconi.patch asm-generic-io.h-suppress-endianness-warnings-for-re.patch wireguard-timers-cast-enum-limits-members-to-int-in-prints.patch +writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch diff --git a/queue-5.10/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch b/queue-5.10/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch new file mode 100644 index 00000000000..bc0b8f57f58 --- /dev/null +++ b/queue-5.10/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_spl.patch @@ -0,0 +1,175 @@ +From fc14c1ba2539d4e9a868c994ba2f6c2b18c4f77a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Apr 2023 21:08:26 +0800 +Subject: writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs + +From: Baokun Li + +[ Upstream commit 1ba1199ec5747f475538c0d25a32804e5ba1dfde ] + +KASAN report null-ptr-deref: +================================================================== +BUG: KASAN: null-ptr-deref in bdi_split_work_to_wbs+0x5c5/0x7b0 +Write of size 8 at addr 0000000000000000 by task sync/943 +CPU: 5 PID: 943 Comm: sync Tainted: 6.3.0-rc5-next-20230406-dirty #461 +Call Trace: + + dump_stack_lvl+0x7f/0xc0 + print_report+0x2ba/0x340 + kasan_report+0xc4/0x120 + kasan_check_range+0x1b7/0x2e0 + __kasan_check_write+0x24/0x40 + bdi_split_work_to_wbs+0x5c5/0x7b0 + sync_inodes_sb+0x195/0x630 + sync_inodes_one_sb+0x3a/0x50 + iterate_supers+0x106/0x1b0 + ksys_sync+0x98/0x160 +[...] +================================================================== + +The race that causes the above issue is as follows: + + cpu1 cpu2 +-------------------------|------------------------- +inode_switch_wbs + INIT_WORK(&isw->work, inode_switch_wbs_work_fn) + queue_rcu_work(isw_wq, &isw->work) + // queue_work async + inode_switch_wbs_work_fn + wb_put_many(old_wb, nr_switched) + percpu_ref_put_many + ref->data->release(ref) + cgwb_release + queue_work(cgwb_release_wq, &wb->release_work) + // queue_work async + &wb->release_work + cgwb_release_workfn + ksys_sync + iterate_supers + sync_inodes_one_sb + sync_inodes_sb + bdi_split_work_to_wbs + kmalloc(sizeof(*work), GFP_ATOMIC) + // alloc memory failed + percpu_ref_exit + ref->data = NULL + kfree(data) + wb_get(wb) + percpu_ref_get(&wb->refcnt) + percpu_ref_get_many(ref, 1) + atomic_long_add(nr, &ref->data->count) + atomic64_add(i, v) + // trigger null-ptr-deref + +bdi_split_work_to_wbs() traverses &bdi->wb_list to split work into all +wbs. If the allocation of new work fails, the on-stack fallback will be +used and the reference count of the current wb is increased afterwards. +If cgroup writeback membership switches occur before getting the reference +count and the current wb is released as old_wd, then calling wb_get() or +wb_put() will trigger the null pointer dereference above. + +This issue was introduced in v4.3-rc7 (see fix tag1). Both +sync_inodes_sb() and __writeback_inodes_sb_nr() calls to +bdi_split_work_to_wbs() can trigger this issue. For scenarios called via +sync_inodes_sb(), originally commit 7fc5854f8c6e ("writeback: synchronize +sync(2) against cgroup writeback membership switches") reduced the +possibility of the issue by adding wb_switch_rwsem, but in v5.14-rc1 (see +fix tag2) removed the "inode_io_list_del_locked(inode, old_wb)" from +inode_switch_wbs_work_fn() so that wb->state contains WB_has_dirty_io, +thus old_wb is not skipped when traversing wbs in bdi_split_work_to_wbs(), +and the issue becomes easily reproducible again. + +To solve this problem, percpu_ref_exit() is called under RCU protection to +avoid race between cgwb_release_workfn() and bdi_split_work_to_wbs(). +Moreover, replace wb_get() with wb_tryget() in bdi_split_work_to_wbs(), +and skip the current wb if wb_tryget() fails because the wb has already +been shutdown. + +Link: https://lkml.kernel.org/r/20230410130826.1492525-1-libaokun1@huawei.com +Fixes: b817525a4a80 ("writeback: bdi_writeback iteration must not skip dying ones") +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Acked-by: Tejun Heo +Cc: Alexander Viro +Cc: Andreas Dilger +Cc: Christian Brauner +Cc: Dennis Zhou +Cc: Hou Tao +Cc: yangerkun +Cc: Zhang Yi +Cc: Jens Axboe +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/fs-writeback.c | 17 ++++++++++------- + mm/backing-dev.c | 11 ++++++++++- + 2 files changed, 20 insertions(+), 8 deletions(-) + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 46c15dd2405c6..6f18459f5e381 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -884,6 +884,16 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, + continue; + } + ++ /* ++ * If wb_tryget fails, the wb has been shutdown, skip it. ++ * ++ * Pin @wb so that it stays on @bdi->wb_list. This allows ++ * continuing iteration from @wb after dropping and ++ * regrabbing rcu read lock. ++ */ ++ if (!wb_tryget(wb)) ++ continue; ++ + /* alloc failed, execute synchronously using on-stack fallback */ + work = &fallback_work; + *work = *base_work; +@@ -892,13 +902,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, + work->done = &fallback_work_done; + + wb_queue_work(wb, work); +- +- /* +- * Pin @wb so that it stays on @bdi->wb_list. This allows +- * continuing iteration from @wb after dropping and +- * regrabbing rcu read lock. +- */ +- wb_get(wb); + last_wb = wb; + + rcu_read_unlock(); +diff --git a/mm/backing-dev.c b/mm/backing-dev.c +index ca770a783a9f9..b28f629c35271 100644 +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -378,6 +378,15 @@ static void wb_exit(struct bdi_writeback *wb) + static DEFINE_SPINLOCK(cgwb_lock); + static struct workqueue_struct *cgwb_release_wq; + ++static void cgwb_free_rcu(struct rcu_head *rcu_head) ++{ ++ struct bdi_writeback *wb = container_of(rcu_head, ++ struct bdi_writeback, rcu); ++ ++ percpu_ref_exit(&wb->refcnt); ++ kfree(wb); ++} ++ + static void cgwb_release_workfn(struct work_struct *work) + { + struct bdi_writeback *wb = container_of(work, struct bdi_writeback, +@@ -397,7 +406,7 @@ static void cgwb_release_workfn(struct work_struct *work) + fprop_local_destroy_percpu(&wb->memcg_completions); + percpu_ref_exit(&wb->refcnt); + wb_exit(wb); +- kfree_rcu(wb, rcu); ++ call_rcu(&wb->rcu, cgwb_free_rcu); + } + + static void cgwb_release(struct percpu_ref *refcnt) +-- +2.39.2 + -- 2.47.3