--- /dev/null
+From 352b837a5541690d4f843819028cf2b8be83d424 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@kernel.org>
+Date: Wed, 30 Nov 2022 13:26:32 -0500
+Subject: dm cache: Fix ABBA deadlock between shrink_slab and dm_cache_metadata_abort
+
+From: Mike Snitzer <snitzer@kernel.org>
+
+commit 352b837a5541690d4f843819028cf2b8be83d424 upstream.
+
+Same ABBA deadlock pattern fixed in commit 4b60f452ec51 ("dm thin: Fix
+ABBA deadlock between shrink_slab and dm_pool_abort_metadata") to
+DM-cache's metadata.
+
+Reported-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Cc: stable@vger.kernel.org
+Fixes: 028ae9f76f29 ("dm cache: add fail io mode and needs_check flag")
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-metadata.c | 54 +++++++++++++++++++++++++++++++++++------
+ 1 file changed, 47 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/dm-cache-metadata.c
++++ b/drivers/md/dm-cache-metadata.c
+@@ -551,11 +551,13 @@ static int __create_persistent_data_obje
+ return r;
+ }
+
+-static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
++static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd,
++ bool destroy_bm)
+ {
+ dm_sm_destroy(cmd->metadata_sm);
+ dm_tm_destroy(cmd->tm);
+- dm_block_manager_destroy(cmd->bm);
++ if (destroy_bm)
++ dm_block_manager_destroy(cmd->bm);
+ }
+
+ typedef unsigned long (*flags_mutator)(unsigned long);
+@@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_
+ cmd2 = lookup(bdev);
+ if (cmd2) {
+ mutex_unlock(&table_lock);
+- __destroy_persistent_data_objects(cmd);
++ __destroy_persistent_data_objects(cmd, true);
+ kfree(cmd);
+ return cmd2;
+ }
+@@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_c
+ mutex_unlock(&table_lock);
+
+ if (!cmd->fail_io)
+- __destroy_persistent_data_objects(cmd);
++ __destroy_persistent_data_objects(cmd, true);
+ kfree(cmd);
+ }
+ }
+@@ -1808,14 +1810,52 @@ int dm_cache_metadata_needs_check(struct
+
+ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
+ {
+- int r;
++ int r = -EINVAL;
++ struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
++
++ /* fail_io is double-checked with cmd->root_lock held below */
++ if (unlikely(cmd->fail_io))
++ return r;
++
++ /*
++ * Replacement block manager (new_bm) is created and old_bm destroyed outside of
++ * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
++ * shrinker associated with the block manager's bufio client vs cmd root_lock).
++ * - must take shrinker_rwsem without holding cmd->root_lock
++ */
++ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
++ CACHE_MAX_CONCURRENT_LOCKS);
+
+ WRITE_LOCK(cmd);
+- __destroy_persistent_data_objects(cmd);
+- r = __create_persistent_data_objects(cmd, false);
++ if (cmd->fail_io) {
++ WRITE_UNLOCK(cmd);
++ goto out;
++ }
++
++ __destroy_persistent_data_objects(cmd, false);
++ old_bm = cmd->bm;
++ if (IS_ERR(new_bm)) {
++ DMERR("could not create block manager during abort");
++ cmd->bm = NULL;
++ r = PTR_ERR(new_bm);
++ goto out_unlock;
++ }
++
++ cmd->bm = new_bm;
++ r = __open_or_format_metadata(cmd, false);
++ if (r) {
++ cmd->bm = NULL;
++ goto out_unlock;
++ }
++ new_bm = NULL;
++out_unlock:
+ if (r)
+ cmd->fail_io = true;
+ WRITE_UNLOCK(cmd);
++ dm_block_manager_destroy(old_bm);
++out:
++ if (new_bm && !IS_ERR(new_bm))
++ dm_block_manager_destroy(new_bm);
+
+ return r;
+ }
--- /dev/null
+From 6a459d8edbdbe7b24db42a5a9f21e6aa9e00c2aa Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:49 +0800
+Subject: dm cache: Fix UAF in destroy()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit 6a459d8edbdbe7b24db42a5a9f21e6aa9e00c2aa upstream.
+
+Dm_cache also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in destroy().
+
+Cc: stable@vger.kernel.org
+Fixes: c6b4fcbad044e ("dm: add cache target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-target.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-cache-target.c
++++ b/drivers/md/dm-cache-target.c
+@@ -1992,6 +1992,7 @@ static void destroy(struct cache *cache)
+ if (cache->prison)
+ dm_bio_prison_destroy_v2(cache->prison);
+
++ cancel_delayed_work_sync(&cache->waker);
+ if (cache->wq)
+ destroy_workqueue(cache->wq);
+
--- /dev/null
+From 6b9973861cb2e96dcd0bb0f1baddc5c034207c5c Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@kernel.org>
+Date: Wed, 30 Nov 2022 14:02:47 -0500
+Subject: dm cache: set needs_check flag after aborting metadata
+
+From: Mike Snitzer <snitzer@kernel.org>
+
+commit 6b9973861cb2e96dcd0bb0f1baddc5c034207c5c upstream.
+
+Otherwise the commit that will be aborted will be associated with the
+metadata objects that will be torn down. Must write needs_check flag
+to metadata with a reset block manager.
+
+Found through code-inspection (and compared against dm-thin.c).
+
+Cc: stable@vger.kernel.org
+Fixes: 028ae9f76f29 ("dm cache: add fail io mode and needs_check flag")
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-target.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/md/dm-cache-target.c
++++ b/drivers/md/dm-cache-target.c
+@@ -1011,16 +1011,16 @@ static void abort_transaction(struct cac
+ if (get_cache_mode(cache) >= CM_READ_ONLY)
+ return;
+
+- if (dm_cache_metadata_set_needs_check(cache->cmd)) {
+- DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
+- set_cache_mode(cache, CM_FAIL);
+- }
+-
+ DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
+ if (dm_cache_metadata_abort(cache->cmd)) {
+ DMERR("%s: failed to abort metadata transaction", dev_name);
+ set_cache_mode(cache, CM_FAIL);
+ }
++
++ if (dm_cache_metadata_set_needs_check(cache->cmd)) {
++ DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
++ set_cache_mode(cache, CM_FAIL);
++ }
+ }
+
+ static void metadata_operation_failed(struct cache *cache, const char *op, int r)
--- /dev/null
+From e4b5957c6f749a501c464f92792f1c8e26b61a94 Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:48 +0800
+Subject: dm clone: Fix UAF in clone_dtr()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit e4b5957c6f749a501c464f92792f1c8e26b61a94 upstream.
+
+Dm_clone also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in clone_dtr().
+
+Cc: stable@vger.kernel.org
+Fixes: 7431b7835f554 ("dm: add clone target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-clone-target.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-clone-target.c
++++ b/drivers/md/dm-clone-target.c
+@@ -1977,6 +1977,7 @@ static void clone_dtr(struct dm_target *
+
+ mempool_exit(&clone->hydration_pool);
+ dm_kcopyd_client_destroy(clone->kcopyd_client);
++ cancel_delayed_work_sync(&clone->waker);
+ destroy_workqueue(clone->wq);
+ hash_table_exit(clone);
+ dm_clone_metadata_close(clone->cmd);
--- /dev/null
+From f50cb2cbabd6c4a60add93d72451728f86e4791c Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:50 +0800
+Subject: dm integrity: Fix UAF in dm_integrity_dtr()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit f50cb2cbabd6c4a60add93d72451728f86e4791c upstream.
+
+Dm_integrity also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in dm_integrity_dtr().
+
+Cc: stable@vger.kernel.org
+Fixes: 7eada909bfd7a ("dm: add integrity target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-integrity.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -4195,6 +4195,8 @@ static void dm_integrity_dtr(struct dm_t
+ BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
+ BUG_ON(!list_empty(&ic->wait_list));
+
++ if (ic->mode == 'B')
++ cancel_delayed_work_sync(&ic->bitmap_flush_work);
+ if (ic->metadata_wq)
+ destroy_workqueue(ic->metadata_wq);
+ if (ic->wait_wq)
--- /dev/null
+From 8111964f1b8524c4bb56b02cd9c7a37725ea21fd Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Wed, 30 Nov 2022 21:31:34 +0800
+Subject: dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 8111964f1b8524c4bb56b02cd9c7a37725ea21fd upstream.
+
+Following concurrent processes:
+
+ P1(drop cache) P2(kworker)
+drop_caches_sysctl_handler
+ drop_slab
+ shrink_slab
+ down_read(&shrinker_rwsem) - LOCK A
+ do_shrink_slab
+ super_cache_scan
+ prune_icache_sb
+ dispose_list
+ evict
+ ext4_evict_inode
+ ext4_clear_inode
+ ext4_discard_preallocations
+ ext4_mb_load_buddy_gfp
+ ext4_mb_init_cache
+ ext4_read_block_bitmap_nowait
+ ext4_read_bh_nowait
+ submit_bh
+ dm_submit_bio
+ do_worker
+ process_deferred_bios
+ commit
+ metadata_operation_failed
+ dm_pool_abort_metadata
+ down_write(&pmd->root_lock) - LOCK B
+ __destroy_persistent_data_objects
+ dm_block_manager_destroy
+ dm_bufio_client_destroy
+ unregister_shrinker
+ down_write(&shrinker_rwsem)
+ thin_map |
+ dm_thin_find_block ↓
+ down_read(&pmd->root_lock) --> ABBA deadlock
+
+, which triggers hung task:
+
+[ 76.974820] INFO: task kworker/u4:3:63 blocked for more than 15 seconds.
+[ 76.976019] Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910
+[ 76.978521] task:kworker/u4:3 state:D stack:0 pid:63 ppid:2
+[ 76.978534] Workqueue: dm-thin do_worker
+[ 76.978552] Call Trace:
+[ 76.978564] __schedule+0x6ba/0x10f0
+[ 76.978582] schedule+0x9d/0x1e0
+[ 76.978588] rwsem_down_write_slowpath+0x587/0xdf0
+[ 76.978600] down_write+0xec/0x110
+[ 76.978607] unregister_shrinker+0x2c/0xf0
+[ 76.978616] dm_bufio_client_destroy+0x116/0x3d0
+[ 76.978625] dm_block_manager_destroy+0x19/0x40
+[ 76.978629] __destroy_persistent_data_objects+0x5e/0x70
+[ 76.978636] dm_pool_abort_metadata+0x8e/0x100
+[ 76.978643] metadata_operation_failed+0x86/0x110
+[ 76.978649] commit+0x6a/0x230
+[ 76.978655] do_worker+0xc6e/0xd90
+[ 76.978702] process_one_work+0x269/0x630
+[ 76.978714] worker_thread+0x266/0x630
+[ 76.978730] kthread+0x151/0x1b0
+[ 76.978772] INFO: task test.sh:2646 blocked for more than 15 seconds.
+[ 76.979756] Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910
+[ 76.982111] task:test.sh state:D stack:0 pid:2646 ppid:2459
+[ 76.982128] Call Trace:
+[ 76.982139] __schedule+0x6ba/0x10f0
+[ 76.982155] schedule+0x9d/0x1e0
+[ 76.982159] rwsem_down_read_slowpath+0x4f4/0x910
+[ 76.982173] down_read+0x84/0x170
+[ 76.982177] dm_thin_find_block+0x4c/0xd0
+[ 76.982183] thin_map+0x201/0x3d0
+[ 76.982188] __map_bio+0x5b/0x350
+[ 76.982195] dm_submit_bio+0x2b6/0x930
+[ 76.982202] __submit_bio+0x123/0x2d0
+[ 76.982209] submit_bio_noacct_nocheck+0x101/0x3e0
+[ 76.982222] submit_bio_noacct+0x389/0x770
+[ 76.982227] submit_bio+0x50/0xc0
+[ 76.982232] submit_bh_wbc+0x15e/0x230
+[ 76.982238] submit_bh+0x14/0x20
+[ 76.982241] ext4_read_bh_nowait+0xc5/0x130
+[ 76.982247] ext4_read_block_bitmap_nowait+0x340/0xc60
+[ 76.982254] ext4_mb_init_cache+0x1ce/0xdc0
+[ 76.982259] ext4_mb_load_buddy_gfp+0x987/0xfa0
+[ 76.982263] ext4_discard_preallocations+0x45d/0x830
+[ 76.982274] ext4_clear_inode+0x48/0xf0
+[ 76.982280] ext4_evict_inode+0xcf/0xc70
+[ 76.982285] evict+0x119/0x2b0
+[ 76.982290] dispose_list+0x43/0xa0
+[ 76.982294] prune_icache_sb+0x64/0x90
+[ 76.982298] super_cache_scan+0x155/0x210
+[ 76.982303] do_shrink_slab+0x19e/0x4e0
+[ 76.982310] shrink_slab+0x2bd/0x450
+[ 76.982317] drop_slab+0xcc/0x1a0
+[ 76.982323] drop_caches_sysctl_handler+0xb7/0xe0
+[ 76.982327] proc_sys_call_handler+0x1bc/0x300
+[ 76.982331] proc_sys_write+0x17/0x20
+[ 76.982334] vfs_write+0x3d3/0x570
+[ 76.982342] ksys_write+0x73/0x160
+[ 76.982347] __x64_sys_write+0x1e/0x30
+[ 76.982352] do_syscall_64+0x35/0x80
+[ 76.982357] entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Function metadata_operation_failed() is called when operations failed
+on dm pool metadata, dm pool will destroy and recreate metadata. So,
+shrinker will be unregistered and registered, which could down write
+shrinker_rwsem under pmd_write_lock.
+
+Fix it by allocating dm_block_manager before locking pmd->root_lock
+and destroying old dm_block_manager after unlocking pmd->root_lock,
+then old dm_block_manager is replaced with new dm_block_manager under
+pmd->root_lock. So, shrinker register/unregister could be done without
+holding pmd->root_lock.
+
+Fetch a reproducer in [Link].
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216676
+Cc: stable@vger.kernel.org #v5.2+
+Fixes: e49e582965b3 ("dm thin: add read only and fail io modes")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin-metadata.c | 51 +++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 43 insertions(+), 8 deletions(-)
+
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -753,13 +753,15 @@ static int __create_persistent_data_obje
+ return r;
+ }
+
+-static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
++static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
++ bool destroy_bm)
+ {
+ dm_sm_destroy(pmd->data_sm);
+ dm_sm_destroy(pmd->metadata_sm);
+ dm_tm_destroy(pmd->nb_tm);
+ dm_tm_destroy(pmd->tm);
+- dm_block_manager_destroy(pmd->bm);
++ if (destroy_bm)
++ dm_block_manager_destroy(pmd->bm);
+ }
+
+ static int __begin_transaction(struct dm_pool_metadata *pmd)
+@@ -966,7 +968,7 @@ int dm_pool_metadata_close(struct dm_poo
+ }
+ pmd_write_unlock(pmd);
+ if (!pmd->fail_io)
+- __destroy_persistent_data_objects(pmd);
++ __destroy_persistent_data_objects(pmd, true);
+
+ kfree(pmd);
+ return 0;
+@@ -1875,19 +1877,52 @@ static void __set_abort_with_changes_fla
+ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
+ {
+ int r = -EINVAL;
++ struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
++
++ /* fail_io is double-checked with pmd->root_lock held below */
++ if (unlikely(pmd->fail_io))
++ return r;
++
++ /*
++ * Replacement block manager (new_bm) is created and old_bm destroyed outside of
++ * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
++ * shrinker associated with the block manager's bufio client vs pmd root_lock).
++ * - must take shrinker_rwsem without holding pmd->root_lock
++ */
++ new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
++ THIN_MAX_CONCURRENT_LOCKS);
+
+ pmd_write_lock(pmd);
+- if (pmd->fail_io)
++ if (pmd->fail_io) {
++ pmd_write_unlock(pmd);
+ goto out;
++ }
+
+ __set_abort_with_changes_flags(pmd);
+- __destroy_persistent_data_objects(pmd);
+- r = __create_persistent_data_objects(pmd, false);
++ __destroy_persistent_data_objects(pmd, false);
++ old_bm = pmd->bm;
++ if (IS_ERR(new_bm)) {
++ DMERR("could not create block manager during abort");
++ pmd->bm = NULL;
++ r = PTR_ERR(new_bm);
++ goto out_unlock;
++ }
++
++ pmd->bm = new_bm;
++ r = __open_or_format_metadata(pmd, false);
++ if (r) {
++ pmd->bm = NULL;
++ goto out_unlock;
++ }
++ new_bm = NULL;
++out_unlock:
+ if (r)
+ pmd->fail_io = true;
+-
+-out:
+ pmd_write_unlock(pmd);
++ dm_block_manager_destroy(old_bm);
++out:
++ if (new_bm && !IS_ERR(new_bm))
++ dm_block_manager_destroy(new_bm);
+
+ return r;
+ }
--- /dev/null
+From 88430ebcbc0ec637b710b947738839848c20feff Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:47 +0800
+Subject: dm thin: Fix UAF in run_timer_softirq()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit 88430ebcbc0ec637b710b947738839848c20feff upstream.
+
+When dm_resume() and dm_destroy() are concurrent, it will
+lead to UAF, as follows:
+
+ BUG: KASAN: use-after-free in __run_timers+0x173/0x710
+ Write of size 8 at addr ffff88816d9490f0 by task swapper/0/0
+<snip>
+ Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x73/0x9f
+ print_report.cold+0x132/0xaa2
+ _raw_spin_lock_irqsave+0xcd/0x160
+ __run_timers+0x173/0x710
+ kasan_report+0xad/0x110
+ __run_timers+0x173/0x710
+ __asan_store8+0x9c/0x140
+ __run_timers+0x173/0x710
+ call_timer_fn+0x310/0x310
+ pvclock_clocksource_read+0xfa/0x250
+ kvm_clock_read+0x2c/0x70
+ kvm_clock_get_cycles+0xd/0x20
+ ktime_get+0x5c/0x110
+ lapic_next_event+0x38/0x50
+ clockevents_program_event+0xf1/0x1e0
+ run_timer_softirq+0x49/0x90
+ __do_softirq+0x16e/0x62c
+ __irq_exit_rcu+0x1fa/0x270
+ irq_exit_rcu+0x12/0x20
+ sysvec_apic_timer_interrupt+0x8e/0xc0
+
+One of the concurrency UAF can be shown as below:
+
+ use free
+do_resume |
+ __find_device_hash_cell |
+ dm_get |
+ atomic_inc(&md->holders) |
+ | dm_destroy
+ | __dm_destroy
+ | if (!dm_suspended_md(md))
+ | atomic_read(&md->holders)
+ | msleep(1)
+ dm_resume |
+ __dm_resume |
+ dm_table_resume_targets |
+ pool_resume |
+ do_waker #add delay work |
+ dm_put |
+ atomic_dec(&md->holders) |
+ | dm_table_destroy
+ | pool_dtr
+ | __pool_dec
+ | __pool_destroy
+ | destroy_workqueue
+ | kfree(pool) # free pool
+ time out
+__do_softirq
+ run_timer_softirq # pool has already been freed
+
+This can be easily reproduced using:
+ 1. create thin-pool
+ 2. dmsetup suspend pool
+ 3. dmsetup resume pool
+ 4. dmsetup remove_all # Concurrent with 3
+
+The root cause of this UAF bug is that dm_resume() adds timer after
+dm_destroy() skips cancelling the timer because of suspend status.
+After timeout, it will call run_timer_softirq(), however pool has
+already been freed. The concurrency UAF bug will happen.
+
+Therefore, cancelling timer again in __pool_destroy().
+
+Cc: stable@vger.kernel.org
+Fixes: 991d9fa02da0d ("dm: add thin provisioning target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -2931,6 +2931,8 @@ static void __pool_destroy(struct pool *
+ dm_bio_prison_destroy(pool->prison);
+ dm_kcopyd_client_destroy(pool->copier);
+
++ cancel_delayed_work_sync(&pool->waker);
++ cancel_delayed_work_sync(&pool->no_space_timeout);
+ if (pool->wq)
+ destroy_workqueue(pool->wq);
+
--- /dev/null
+From 7991dbff6849f67e823b7cc0c15e5a90b0549b9f Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Thu, 8 Dec 2022 22:28:02 +0800
+Subject: dm thin: Use last transaction's pmd->root when commit failed
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 7991dbff6849f67e823b7cc0c15e5a90b0549b9f upstream.
+
+Recently we found a softlock up problem in dm thin pool btree lookup
+code due to corrupted metadata:
+
+ Kernel panic - not syncing: softlockup: hung tasks
+ CPU: 7 PID: 2669225 Comm: kworker/u16:3
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+ Workqueue: dm-thin do_worker [dm_thin_pool]
+ Call Trace:
+ <IRQ>
+ dump_stack+0x9c/0xd3
+ panic+0x35d/0x6b9
+ watchdog_timer_fn.cold+0x16/0x25
+ __run_hrtimer+0xa2/0x2d0
+ </IRQ>
+ RIP: 0010:__relink_lru+0x102/0x220 [dm_bufio]
+ __bufio_new+0x11f/0x4f0 [dm_bufio]
+ new_read+0xa3/0x1e0 [dm_bufio]
+ dm_bm_read_lock+0x33/0xd0 [dm_persistent_data]
+ ro_step+0x63/0x100 [dm_persistent_data]
+ btree_lookup_raw.constprop.0+0x44/0x220 [dm_persistent_data]
+ dm_btree_lookup+0x16f/0x210 [dm_persistent_data]
+ dm_thin_find_block+0x12c/0x210 [dm_thin_pool]
+ __process_bio_read_only+0xc5/0x400 [dm_thin_pool]
+ process_thin_deferred_bios+0x1a4/0x4a0 [dm_thin_pool]
+ process_one_work+0x3c5/0x730
+
+Following process may generate a broken btree mixed with fresh and
+stale btree nodes, which could get dm thin trapped in an infinite loop
+while looking up data block:
+ Transaction 1: pmd->root = A, A->B->C // One path in btree
+ pmd->root = X, X->Y->Z // Copy-up
+ Transaction 2: X,Z is updated on disk, Y write failed.
+ // Commit failed, dm thin becomes read-only.
+ process_bio_read_only
+ dm_thin_find_block
+ __find_block
+ dm_btree_lookup(pmd->root)
+The pmd->root points to a broken btree, Y may contain stale node
+pointing to any block, for example X, which gets dm thin trapped into
+a dead loop while looking up Z.
+
+Fix this by setting pmd->root in __open_metadata(), so that dm thin
+will use the last transaction's pmd->root if commit failed.
+
+Fetch a reproducer in [Link].
+
+Linke: https://bugzilla.kernel.org/show_bug.cgi?id=216790
+Cc: stable@vger.kernel.org
+Fixes: 991d9fa02da0 ("dm: add thin provisioning target")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Acked-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin-metadata.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -701,6 +701,15 @@ static int __open_metadata(struct dm_poo
+ goto bad_cleanup_data_sm;
+ }
+
++ /*
++ * For pool metadata opening process, root setting is redundant
++ * because it will be set again in __begin_transaction(). But dm
++ * pool aborting process really needs to get last transaction's
++ * root to avoid accessing broken btree.
++ */
++ pmd->root = le64_to_cpu(disk_super->data_mapping_root);
++ pmd->details_root = le64_to_cpu(disk_super->device_details_root);
++
+ __setup_btree_details(pmd);
+ dm_bm_unlock(sblock);
+
cpufreq-init-completion-before-kobject_init_and_add.patch
binfmt-move-install_exec_creds-after-setup_new_exec-.patch
binfmt-fix-error-return-code-in-load_elf_fdpic_binar.patch
+dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch
+dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch
+dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch
+dm-thin-fix-uaf-in-run_timer_softirq.patch
+dm-integrity-fix-uaf-in-dm_integrity_dtr.patch
+dm-clone-fix-uaf-in-clone_dtr.patch
+dm-cache-fix-uaf-in-destroy.patch
+dm-cache-set-needs_check-flag-after-aborting-metadata.patch
+tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch
--- /dev/null
+From 82470f7d9044842618c847a7166de2b7458157a7 Mon Sep 17 00:00:00 2001
+From: Zheng Yejian <zhengyejian1@huawei.com>
+Date: Wed, 7 Dec 2022 11:51:43 +0800
+Subject: tracing/hist: Fix out-of-bound write on 'action_data.var_ref_idx'
+
+From: Zheng Yejian <zhengyejian1@huawei.com>
+
+commit 82470f7d9044842618c847a7166de2b7458157a7 upstream.
+
+When generate a synthetic event with many params and then create a trace
+action for it [1], kernel panic happened [2].
+
+It is because that in trace_action_create() 'data->n_params' is up to
+SYNTH_FIELDS_MAX (current value is 64), and array 'data->var_ref_idx'
+keeps indices into array 'hist_data->var_refs' for each synthetic event
+param, but the length of 'data->var_ref_idx' is TRACING_MAP_VARS_MAX
+(current value is 16), so out-of-bound write happened when 'data->n_params'
+more than 16. In this case, 'data->match_data.event' is overwritten and
+eventually cause the panic.
+
+To solve the issue, adjust the length of 'data->var_ref_idx' to be
+SYNTH_FIELDS_MAX and add sanity checks to avoid out-of-bound write.
+
+[1]
+ # cd /sys/kernel/tracing/
+ # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\
+int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\
+int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\
+int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\
+int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\
+int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\
+int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\
+int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\
+int v63" >> synthetic_events
+ # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="bash"' >> \
+events/sched/sched_waking/trigger
+ # echo "hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid)" >> events/sched/sched_switch/trigger
+
+[2]
+BUG: unable to handle page fault for address: ffff91c900000000
+PGD 61001067 P4D 61001067 PUD 0
+Oops: 0000 [#1] PREEMPT SMP NOPTI
+CPU: 2 PID: 322 Comm: bash Tainted: G W 6.1.0-rc8+ #229
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
+RIP: 0010:strcmp+0xc/0x30
+Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee
+c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14
+07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3
+RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246
+RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000
+RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000
+RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000
+R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580
+R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538
+FS: 00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000)
+knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0
+Call Trace:
+ <TASK>
+ __find_event_file+0x55/0x90
+ action_create+0x76c/0x1060
+ event_hist_trigger_parse+0x146d/0x2060
+ ? event_trigger_write+0x31/0xd0
+ trigger_process_regex+0xbb/0x110
+ event_trigger_write+0x6b/0xd0
+ vfs_write+0xc8/0x3e0
+ ? alloc_fd+0xc0/0x160
+ ? preempt_count_add+0x4d/0xa0
+ ? preempt_count_add+0x70/0xa0
+ ksys_write+0x5f/0xe0
+ do_syscall_64+0x3b/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f1d1d0cf077
+Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e
+fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00
+f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74
+RSP: 002b:00007ffcebb0e568 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000000143 RCX: 00007f1d1d0cf077
+RDX: 0000000000000143 RSI: 00005639265aa7e0 RDI: 0000000000000001
+RBP: 00005639265aa7e0 R08: 000000000000000a R09: 0000000000000142
+R10: 000056392639c017 R11: 0000000000000246 R12: 0000000000000143
+R13: 00007f1d1d1ae6a0 R14: 00007f1d1d1aa4a0 R15: 00007f1d1d1a98a0
+ </TASK>
+Modules linked in:
+CR2: ffff91c900000000
+---[ end trace 0000000000000000 ]---
+RIP: 0010:strcmp+0xc/0x30
+Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee
+c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14
+07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3
+RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246
+RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000
+RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000
+RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000
+R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580
+R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538
+FS: 00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000)
+knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0
+
+Link: https://lore.kernel.org/linux-trace-kernel/20221207035143.2278781-1-zhengyejian1@huawei.com
+
+Cc: <mhiramat@kernel.org>
+Cc: <zanussi@kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: d380dcde9a07 ("tracing: Fix now invalid var_ref_vals assumption in trace action")
+Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events_hist.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -479,7 +479,7 @@ struct action_data {
+ * event param, and is passed to the synthetic event
+ * invocation.
+ */
+- unsigned int var_ref_idx[TRACING_MAP_VARS_MAX];
++ unsigned int var_ref_idx[SYNTH_FIELDS_MAX];
+ struct synth_event *synth_event;
+ bool use_trace_keyword;
+ char *synth_event_name;
+@@ -2752,7 +2752,9 @@ static struct hist_field *create_var_ref
+ return ref_field;
+ }
+ }
+-
++ /* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */
++ if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX)
++ return NULL;
+ ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
+ if (ref_field) {
+ if (init_var_ref(ref_field, var_field, system, event_name)) {
+@@ -4338,6 +4340,10 @@ static int trace_action_create(struct hi
+
+ lockdep_assert_held(&event_mutex);
+
++ /* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */
++ if (data->n_params > SYNTH_FIELDS_MAX)
++ return -EINVAL;
++
+ if (data->use_trace_keyword)
+ synth_event_name = data->synth_event_name;
+ else