]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jan 2023 14:02:07 +0000 (15:02 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jan 2023 14:02:07 +0000 (15:02 +0100)
added patches:
dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch
dm-cache-fix-uaf-in-destroy.patch
dm-cache-set-needs_check-flag-after-aborting-metadata.patch
dm-clone-fix-uaf-in-clone_dtr.patch
dm-integrity-fix-uaf-in-dm_integrity_dtr.patch
dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch
dm-thin-fix-uaf-in-run_timer_softirq.patch
dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch
tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch

queue-5.4/dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch [new file with mode: 0644]
queue-5.4/dm-cache-fix-uaf-in-destroy.patch [new file with mode: 0644]
queue-5.4/dm-cache-set-needs_check-flag-after-aborting-metadata.patch [new file with mode: 0644]
queue-5.4/dm-clone-fix-uaf-in-clone_dtr.patch [new file with mode: 0644]
queue-5.4/dm-integrity-fix-uaf-in-dm_integrity_dtr.patch [new file with mode: 0644]
queue-5.4/dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch [new file with mode: 0644]
queue-5.4/dm-thin-fix-uaf-in-run_timer_softirq.patch [new file with mode: 0644]
queue-5.4/dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch [new file with mode: 0644]

diff --git a/queue-5.4/dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch b/queue-5.4/dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch
new file mode 100644 (file)
index 0000000..1118a58
--- /dev/null
@@ -0,0 +1,114 @@
+From 352b837a5541690d4f843819028cf2b8be83d424 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@kernel.org>
+Date: Wed, 30 Nov 2022 13:26:32 -0500
+Subject: dm cache: Fix ABBA deadlock between shrink_slab and dm_cache_metadata_abort
+
+From: Mike Snitzer <snitzer@kernel.org>
+
+commit 352b837a5541690d4f843819028cf2b8be83d424 upstream.
+
+Same ABBA deadlock pattern fixed in commit 4b60f452ec51 ("dm thin: Fix
+ABBA deadlock between shrink_slab and dm_pool_abort_metadata") to
+DM-cache's metadata.
+
+Reported-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Cc: stable@vger.kernel.org
+Fixes: 028ae9f76f29 ("dm cache: add fail io mode and needs_check flag")
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-metadata.c |   54 +++++++++++++++++++++++++++++++++++------
+ 1 file changed, 47 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/dm-cache-metadata.c
++++ b/drivers/md/dm-cache-metadata.c
+@@ -551,11 +551,13 @@ static int __create_persistent_data_obje
+       return r;
+ }
+-static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
++static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd,
++                                            bool destroy_bm)
+ {
+       dm_sm_destroy(cmd->metadata_sm);
+       dm_tm_destroy(cmd->tm);
+-      dm_block_manager_destroy(cmd->bm);
++      if (destroy_bm)
++              dm_block_manager_destroy(cmd->bm);
+ }
+ typedef unsigned long (*flags_mutator)(unsigned long);
+@@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_
+               cmd2 = lookup(bdev);
+               if (cmd2) {
+                       mutex_unlock(&table_lock);
+-                      __destroy_persistent_data_objects(cmd);
++                      __destroy_persistent_data_objects(cmd, true);
+                       kfree(cmd);
+                       return cmd2;
+               }
+@@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_c
+               mutex_unlock(&table_lock);
+               if (!cmd->fail_io)
+-                      __destroy_persistent_data_objects(cmd);
++                      __destroy_persistent_data_objects(cmd, true);
+               kfree(cmd);
+       }
+ }
+@@ -1808,14 +1810,52 @@ int dm_cache_metadata_needs_check(struct
+ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
+ {
+-      int r;
++      int r = -EINVAL;
++      struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
++
++      /* fail_io is double-checked with cmd->root_lock held below */
++      if (unlikely(cmd->fail_io))
++              return r;
++
++      /*
++       * Replacement block manager (new_bm) is created and old_bm destroyed outside of
++       * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
++       * shrinker associated with the block manager's bufio client vs cmd root_lock).
++       * - must take shrinker_rwsem without holding cmd->root_lock
++       */
++      new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
++                                       CACHE_MAX_CONCURRENT_LOCKS);
+       WRITE_LOCK(cmd);
+-      __destroy_persistent_data_objects(cmd);
+-      r = __create_persistent_data_objects(cmd, false);
++      if (cmd->fail_io) {
++              WRITE_UNLOCK(cmd);
++              goto out;
++      }
++
++      __destroy_persistent_data_objects(cmd, false);
++      old_bm = cmd->bm;
++      if (IS_ERR(new_bm)) {
++              DMERR("could not create block manager during abort");
++              cmd->bm = NULL;
++              r = PTR_ERR(new_bm);
++              goto out_unlock;
++      }
++
++      cmd->bm = new_bm;
++      r = __open_or_format_metadata(cmd, false);
++      if (r) {
++              cmd->bm = NULL;
++              goto out_unlock;
++      }
++      new_bm = NULL;
++out_unlock:
+       if (r)
+               cmd->fail_io = true;
+       WRITE_UNLOCK(cmd);
++      dm_block_manager_destroy(old_bm);
++out:
++      if (new_bm && !IS_ERR(new_bm))
++              dm_block_manager_destroy(new_bm);
+       return r;
+ }
diff --git a/queue-5.4/dm-cache-fix-uaf-in-destroy.patch b/queue-5.4/dm-cache-fix-uaf-in-destroy.patch
new file mode 100644 (file)
index 0000000..b588c88
--- /dev/null
@@ -0,0 +1,33 @@
+From 6a459d8edbdbe7b24db42a5a9f21e6aa9e00c2aa Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:49 +0800
+Subject: dm cache: Fix UAF in destroy()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit 6a459d8edbdbe7b24db42a5a9f21e6aa9e00c2aa upstream.
+
+Dm_cache also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in destroy().
+
+Cc: stable@vger.kernel.org
+Fixes: c6b4fcbad044e ("dm: add cache target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-target.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-cache-target.c
++++ b/drivers/md/dm-cache-target.c
+@@ -1992,6 +1992,7 @@ static void destroy(struct cache *cache)
+       if (cache->prison)
+               dm_bio_prison_destroy_v2(cache->prison);
++      cancel_delayed_work_sync(&cache->waker);
+       if (cache->wq)
+               destroy_workqueue(cache->wq);
diff --git a/queue-5.4/dm-cache-set-needs_check-flag-after-aborting-metadata.patch b/queue-5.4/dm-cache-set-needs_check-flag-after-aborting-metadata.patch
new file mode 100644 (file)
index 0000000..1e533bd
--- /dev/null
@@ -0,0 +1,47 @@
+From 6b9973861cb2e96dcd0bb0f1baddc5c034207c5c Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@kernel.org>
+Date: Wed, 30 Nov 2022 14:02:47 -0500
+Subject: dm cache: set needs_check flag after aborting metadata
+
+From: Mike Snitzer <snitzer@kernel.org>
+
+commit 6b9973861cb2e96dcd0bb0f1baddc5c034207c5c upstream.
+
+Otherwise the commit that will be aborted will be associated with the
+metadata objects that will be torn down.  Must write needs_check flag
+to metadata with a reset block manager.
+
+Found through code-inspection (and compared against dm-thin.c).
+
+Cc: stable@vger.kernel.org
+Fixes: 028ae9f76f29 ("dm cache: add fail io mode and needs_check flag")
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-target.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/md/dm-cache-target.c
++++ b/drivers/md/dm-cache-target.c
+@@ -1011,16 +1011,16 @@ static void abort_transaction(struct cac
+       if (get_cache_mode(cache) >= CM_READ_ONLY)
+               return;
+-      if (dm_cache_metadata_set_needs_check(cache->cmd)) {
+-              DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
+-              set_cache_mode(cache, CM_FAIL);
+-      }
+-
+       DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
+       if (dm_cache_metadata_abort(cache->cmd)) {
+               DMERR("%s: failed to abort metadata transaction", dev_name);
+               set_cache_mode(cache, CM_FAIL);
+       }
++
++      if (dm_cache_metadata_set_needs_check(cache->cmd)) {
++              DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
++              set_cache_mode(cache, CM_FAIL);
++      }
+ }
+ static void metadata_operation_failed(struct cache *cache, const char *op, int r)
diff --git a/queue-5.4/dm-clone-fix-uaf-in-clone_dtr.patch b/queue-5.4/dm-clone-fix-uaf-in-clone_dtr.patch
new file mode 100644 (file)
index 0000000..94d3ac8
--- /dev/null
@@ -0,0 +1,33 @@
+From e4b5957c6f749a501c464f92792f1c8e26b61a94 Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:48 +0800
+Subject: dm clone: Fix UAF in clone_dtr()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit e4b5957c6f749a501c464f92792f1c8e26b61a94 upstream.
+
+Dm_clone also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in clone_dtr().
+
+Cc: stable@vger.kernel.org
+Fixes: 7431b7835f554 ("dm: add clone target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-clone-target.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-clone-target.c
++++ b/drivers/md/dm-clone-target.c
+@@ -1977,6 +1977,7 @@ static void clone_dtr(struct dm_target *
+       mempool_exit(&clone->hydration_pool);
+       dm_kcopyd_client_destroy(clone->kcopyd_client);
++      cancel_delayed_work_sync(&clone->waker);
+       destroy_workqueue(clone->wq);
+       hash_table_exit(clone);
+       dm_clone_metadata_close(clone->cmd);
diff --git a/queue-5.4/dm-integrity-fix-uaf-in-dm_integrity_dtr.patch b/queue-5.4/dm-integrity-fix-uaf-in-dm_integrity_dtr.patch
new file mode 100644 (file)
index 0000000..833aeeb
--- /dev/null
@@ -0,0 +1,34 @@
+From f50cb2cbabd6c4a60add93d72451728f86e4791c Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:50 +0800
+Subject: dm integrity: Fix UAF in dm_integrity_dtr()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit f50cb2cbabd6c4a60add93d72451728f86e4791c upstream.
+
+Dm_integrity also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in dm_integrity_dtr().
+
+Cc: stable@vger.kernel.org
+Fixes: 7eada909bfd7a ("dm: add integrity target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-integrity.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -4195,6 +4195,8 @@ static void dm_integrity_dtr(struct dm_t
+       BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
+       BUG_ON(!list_empty(&ic->wait_list));
++      if (ic->mode == 'B')
++              cancel_delayed_work_sync(&ic->bitmap_flush_work);
+       if (ic->metadata_wq)
+               destroy_workqueue(ic->metadata_wq);
+       if (ic->wait_wq)
diff --git a/queue-5.4/dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch b/queue-5.4/dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch
new file mode 100644 (file)
index 0000000..69bdf6f
--- /dev/null
@@ -0,0 +1,221 @@
+From 8111964f1b8524c4bb56b02cd9c7a37725ea21fd Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Wed, 30 Nov 2022 21:31:34 +0800
+Subject: dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 8111964f1b8524c4bb56b02cd9c7a37725ea21fd upstream.
+
+Following concurrent processes:
+
+          P1(drop cache)                P2(kworker)
+drop_caches_sysctl_handler
+ drop_slab
+  shrink_slab
+   down_read(&shrinker_rwsem)  - LOCK A
+   do_shrink_slab
+    super_cache_scan
+     prune_icache_sb
+      dispose_list
+       evict
+        ext4_evict_inode
+        ext4_clear_inode
+         ext4_discard_preallocations
+          ext4_mb_load_buddy_gfp
+           ext4_mb_init_cache
+            ext4_read_block_bitmap_nowait
+             ext4_read_bh_nowait
+              submit_bh
+               dm_submit_bio
+                                do_worker
+                                 process_deferred_bios
+                                  commit
+                                   metadata_operation_failed
+                                    dm_pool_abort_metadata
+                                     down_write(&pmd->root_lock) - LOCK B
+                                     __destroy_persistent_data_objects
+                                      dm_block_manager_destroy
+                                       dm_bufio_client_destroy
+                                        unregister_shrinker
+                                         down_write(&shrinker_rwsem)
+                thin_map                            |
+                 dm_thin_find_block                 ↓
+                  down_read(&pmd->root_lock) --> ABBA deadlock
+
+, which triggers hung task:
+
+[   76.974820] INFO: task kworker/u4:3:63 blocked for more than 15 seconds.
+[   76.976019]       Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910
+[   76.978521] task:kworker/u4:3    state:D stack:0     pid:63    ppid:2
+[   76.978534] Workqueue: dm-thin do_worker
+[   76.978552] Call Trace:
+[   76.978564]  __schedule+0x6ba/0x10f0
+[   76.978582]  schedule+0x9d/0x1e0
+[   76.978588]  rwsem_down_write_slowpath+0x587/0xdf0
+[   76.978600]  down_write+0xec/0x110
+[   76.978607]  unregister_shrinker+0x2c/0xf0
+[   76.978616]  dm_bufio_client_destroy+0x116/0x3d0
+[   76.978625]  dm_block_manager_destroy+0x19/0x40
+[   76.978629]  __destroy_persistent_data_objects+0x5e/0x70
+[   76.978636]  dm_pool_abort_metadata+0x8e/0x100
+[   76.978643]  metadata_operation_failed+0x86/0x110
+[   76.978649]  commit+0x6a/0x230
+[   76.978655]  do_worker+0xc6e/0xd90
+[   76.978702]  process_one_work+0x269/0x630
+[   76.978714]  worker_thread+0x266/0x630
+[   76.978730]  kthread+0x151/0x1b0
+[   76.978772] INFO: task test.sh:2646 blocked for more than 15 seconds.
+[   76.979756]       Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910
+[   76.982111] task:test.sh         state:D stack:0     pid:2646  ppid:2459
+[   76.982128] Call Trace:
+[   76.982139]  __schedule+0x6ba/0x10f0
+[   76.982155]  schedule+0x9d/0x1e0
+[   76.982159]  rwsem_down_read_slowpath+0x4f4/0x910
+[   76.982173]  down_read+0x84/0x170
+[   76.982177]  dm_thin_find_block+0x4c/0xd0
+[   76.982183]  thin_map+0x201/0x3d0
+[   76.982188]  __map_bio+0x5b/0x350
+[   76.982195]  dm_submit_bio+0x2b6/0x930
+[   76.982202]  __submit_bio+0x123/0x2d0
+[   76.982209]  submit_bio_noacct_nocheck+0x101/0x3e0
+[   76.982222]  submit_bio_noacct+0x389/0x770
+[   76.982227]  submit_bio+0x50/0xc0
+[   76.982232]  submit_bh_wbc+0x15e/0x230
+[   76.982238]  submit_bh+0x14/0x20
+[   76.982241]  ext4_read_bh_nowait+0xc5/0x130
+[   76.982247]  ext4_read_block_bitmap_nowait+0x340/0xc60
+[   76.982254]  ext4_mb_init_cache+0x1ce/0xdc0
+[   76.982259]  ext4_mb_load_buddy_gfp+0x987/0xfa0
+[   76.982263]  ext4_discard_preallocations+0x45d/0x830
+[   76.982274]  ext4_clear_inode+0x48/0xf0
+[   76.982280]  ext4_evict_inode+0xcf/0xc70
+[   76.982285]  evict+0x119/0x2b0
+[   76.982290]  dispose_list+0x43/0xa0
+[   76.982294]  prune_icache_sb+0x64/0x90
+[   76.982298]  super_cache_scan+0x155/0x210
+[   76.982303]  do_shrink_slab+0x19e/0x4e0
+[   76.982310]  shrink_slab+0x2bd/0x450
+[   76.982317]  drop_slab+0xcc/0x1a0
+[   76.982323]  drop_caches_sysctl_handler+0xb7/0xe0
+[   76.982327]  proc_sys_call_handler+0x1bc/0x300
+[   76.982331]  proc_sys_write+0x17/0x20
+[   76.982334]  vfs_write+0x3d3/0x570
+[   76.982342]  ksys_write+0x73/0x160
+[   76.982347]  __x64_sys_write+0x1e/0x30
+[   76.982352]  do_syscall_64+0x35/0x80
+[   76.982357]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Function metadata_operation_failed() is called when operations failed
+on dm pool metadata, dm pool will destroy and recreate metadata. So,
+shrinker will be unregistered and registered, which could down write
+shrinker_rwsem under pmd_write_lock.
+
+Fix it by allocating dm_block_manager before locking pmd->root_lock
+and destroying old dm_block_manager after unlocking pmd->root_lock,
+then old dm_block_manager is replaced with new dm_block_manager under
+pmd->root_lock. So, shrinker register/unregister could be done without
+holding pmd->root_lock.
+
+Fetch a reproducer in [Link].
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216676
+Cc: stable@vger.kernel.org #v5.2+
+Fixes: e49e582965b3 ("dm thin: add read only and fail io modes")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin-metadata.c |   51 +++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 43 insertions(+), 8 deletions(-)
+
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -753,13 +753,15 @@ static int __create_persistent_data_obje
+       return r;
+ }
+-static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
++static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
++                                            bool destroy_bm)
+ {
+       dm_sm_destroy(pmd->data_sm);
+       dm_sm_destroy(pmd->metadata_sm);
+       dm_tm_destroy(pmd->nb_tm);
+       dm_tm_destroy(pmd->tm);
+-      dm_block_manager_destroy(pmd->bm);
++      if (destroy_bm)
++              dm_block_manager_destroy(pmd->bm);
+ }
+ static int __begin_transaction(struct dm_pool_metadata *pmd)
+@@ -966,7 +968,7 @@ int dm_pool_metadata_close(struct dm_poo
+       }
+       pmd_write_unlock(pmd);
+       if (!pmd->fail_io)
+-              __destroy_persistent_data_objects(pmd);
++              __destroy_persistent_data_objects(pmd, true);
+       kfree(pmd);
+       return 0;
+@@ -1875,19 +1877,52 @@ static void __set_abort_with_changes_fla
+ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
+ {
+       int r = -EINVAL;
++      struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
++
++      /* fail_io is double-checked with pmd->root_lock held below */
++      if (unlikely(pmd->fail_io))
++              return r;
++
++      /*
++       * Replacement block manager (new_bm) is created and old_bm destroyed outside of
++       * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
++       * shrinker associated with the block manager's bufio client vs pmd root_lock).
++       * - must take shrinker_rwsem without holding pmd->root_lock
++       */
++      new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
++                                       THIN_MAX_CONCURRENT_LOCKS);
+       pmd_write_lock(pmd);
+-      if (pmd->fail_io)
++      if (pmd->fail_io) {
++              pmd_write_unlock(pmd);
+               goto out;
++      }
+       __set_abort_with_changes_flags(pmd);
+-      __destroy_persistent_data_objects(pmd);
+-      r = __create_persistent_data_objects(pmd, false);
++      __destroy_persistent_data_objects(pmd, false);
++      old_bm = pmd->bm;
++      if (IS_ERR(new_bm)) {
++              DMERR("could not create block manager during abort");
++              pmd->bm = NULL;
++              r = PTR_ERR(new_bm);
++              goto out_unlock;
++      }
++
++      pmd->bm = new_bm;
++      r = __open_or_format_metadata(pmd, false);
++      if (r) {
++              pmd->bm = NULL;
++              goto out_unlock;
++      }
++      new_bm = NULL;
++out_unlock:
+       if (r)
+               pmd->fail_io = true;
+-
+-out:
+       pmd_write_unlock(pmd);
++      dm_block_manager_destroy(old_bm);
++out:
++      if (new_bm && !IS_ERR(new_bm))
++              dm_block_manager_destroy(new_bm);
+       return r;
+ }
diff --git a/queue-5.4/dm-thin-fix-uaf-in-run_timer_softirq.patch b/queue-5.4/dm-thin-fix-uaf-in-run_timer_softirq.patch
new file mode 100644 (file)
index 0000000..26b26e5
--- /dev/null
@@ -0,0 +1,100 @@
+From 88430ebcbc0ec637b710b947738839848c20feff Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:47 +0800
+Subject: dm thin: Fix UAF in run_timer_softirq()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit 88430ebcbc0ec637b710b947738839848c20feff upstream.
+
+When dm_resume() and dm_destroy() are concurrent, it will
+lead to UAF, as follows:
+
+ BUG: KASAN: use-after-free in __run_timers+0x173/0x710
+ Write of size 8 at addr ffff88816d9490f0 by task swapper/0/0
+<snip>
+ Call Trace:
+  <IRQ>
+  dump_stack_lvl+0x73/0x9f
+  print_report.cold+0x132/0xaa2
+  _raw_spin_lock_irqsave+0xcd/0x160
+  __run_timers+0x173/0x710
+  kasan_report+0xad/0x110
+  __run_timers+0x173/0x710
+  __asan_store8+0x9c/0x140
+  __run_timers+0x173/0x710
+  call_timer_fn+0x310/0x310
+  pvclock_clocksource_read+0xfa/0x250
+  kvm_clock_read+0x2c/0x70
+  kvm_clock_get_cycles+0xd/0x20
+  ktime_get+0x5c/0x110
+  lapic_next_event+0x38/0x50
+  clockevents_program_event+0xf1/0x1e0
+  run_timer_softirq+0x49/0x90
+  __do_softirq+0x16e/0x62c
+  __irq_exit_rcu+0x1fa/0x270
+  irq_exit_rcu+0x12/0x20
+  sysvec_apic_timer_interrupt+0x8e/0xc0
+
+One of the concurrency UAF can be shown as below:
+
+        use                                  free
+do_resume                           |
+  __find_device_hash_cell           |
+    dm_get                          |
+      atomic_inc(&md->holders)      |
+                                    | dm_destroy
+                                    |   __dm_destroy
+                                    |     if (!dm_suspended_md(md))
+                                    |     atomic_read(&md->holders)
+                                    |     msleep(1)
+  dm_resume                         |
+    __dm_resume                     |
+      dm_table_resume_targets       |
+        pool_resume                 |
+          do_waker  #add delay work |
+  dm_put                            |
+    atomic_dec(&md->holders)        |
+                                    |     dm_table_destroy
+                                    |       pool_dtr
+                                    |         __pool_dec
+                                    |           __pool_destroy
+                                    |             destroy_workqueue
+                                    |             kfree(pool) # free pool
+        time out
+__do_softirq
+  run_timer_softirq # pool has already been freed
+
+This can be easily reproduced using:
+  1. create thin-pool
+  2. dmsetup suspend pool
+  3. dmsetup resume pool
+  4. dmsetup remove_all # Concurrent with 3
+
+The root cause of this UAF bug is that dm_resume() adds timer after
+dm_destroy() skips cancelling the timer because of suspend status.
+After timeout, it will call run_timer_softirq(), however pool has
+already been freed. The concurrency UAF bug will happen.
+
+Therefore, cancelling timer again in __pool_destroy().
+
+Cc: stable@vger.kernel.org
+Fixes: 991d9fa02da0d ("dm: add thin provisioning target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -2931,6 +2931,8 @@ static void __pool_destroy(struct pool *
+       dm_bio_prison_destroy(pool->prison);
+       dm_kcopyd_client_destroy(pool->copier);
++      cancel_delayed_work_sync(&pool->waker);
++      cancel_delayed_work_sync(&pool->no_space_timeout);
+       if (pool->wq)
+               destroy_workqueue(pool->wq);
diff --git a/queue-5.4/dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch b/queue-5.4/dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch
new file mode 100644 (file)
index 0000000..dbf08ba
--- /dev/null
@@ -0,0 +1,84 @@
+From 7991dbff6849f67e823b7cc0c15e5a90b0549b9f Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Thu, 8 Dec 2022 22:28:02 +0800
+Subject: dm thin: Use last transaction's pmd->root when commit failed
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 7991dbff6849f67e823b7cc0c15e5a90b0549b9f upstream.
+
+Recently we found a softlock up problem in dm thin pool btree lookup
+code due to corrupted metadata:
+
+ Kernel panic - not syncing: softlockup: hung tasks
+ CPU: 7 PID: 2669225 Comm: kworker/u16:3
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+ Workqueue: dm-thin do_worker [dm_thin_pool]
+ Call Trace:
+   <IRQ>
+   dump_stack+0x9c/0xd3
+   panic+0x35d/0x6b9
+   watchdog_timer_fn.cold+0x16/0x25
+   __run_hrtimer+0xa2/0x2d0
+   </IRQ>
+   RIP: 0010:__relink_lru+0x102/0x220 [dm_bufio]
+   __bufio_new+0x11f/0x4f0 [dm_bufio]
+   new_read+0xa3/0x1e0 [dm_bufio]
+   dm_bm_read_lock+0x33/0xd0 [dm_persistent_data]
+   ro_step+0x63/0x100 [dm_persistent_data]
+   btree_lookup_raw.constprop.0+0x44/0x220 [dm_persistent_data]
+   dm_btree_lookup+0x16f/0x210 [dm_persistent_data]
+   dm_thin_find_block+0x12c/0x210 [dm_thin_pool]
+   __process_bio_read_only+0xc5/0x400 [dm_thin_pool]
+   process_thin_deferred_bios+0x1a4/0x4a0 [dm_thin_pool]
+   process_one_work+0x3c5/0x730
+
+Following process may generate a broken btree mixed with fresh and
+stale btree nodes, which could get dm thin trapped in an infinite loop
+while looking up data block:
+ Transaction 1: pmd->root = A, A->B->C   // One path in btree
+                pmd->root = X, X->Y->Z   // Copy-up
+ Transaction 2: X,Z is updated on disk, Y write failed.
+                // Commit failed, dm thin becomes read-only.
+                process_bio_read_only
+                dm_thin_find_block
+                 __find_block
+                  dm_btree_lookup(pmd->root)
+The pmd->root points to a broken btree, Y may contain stale node
+pointing to any block, for example X, which gets dm thin trapped into
+a dead loop while looking up Z.
+
+Fix this by setting pmd->root in __open_metadata(), so that dm thin
+will use the last transaction's pmd->root if commit failed.
+
+Fetch a reproducer in [Link].
+
+Linke: https://bugzilla.kernel.org/show_bug.cgi?id=216790
+Cc: stable@vger.kernel.org
+Fixes: 991d9fa02da0 ("dm: add thin provisioning target")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Acked-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin-metadata.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -701,6 +701,15 @@ static int __open_metadata(struct dm_poo
+               goto bad_cleanup_data_sm;
+       }
++      /*
++       * For pool metadata opening process, root setting is redundant
++       * because it will be set again in __begin_transaction(). But dm
++       * pool aborting process really needs to get last transaction's
++       * root to avoid accessing broken btree.
++       */
++      pmd->root = le64_to_cpu(disk_super->data_mapping_root);
++      pmd->details_root = le64_to_cpu(disk_super->device_details_root);
++
+       __setup_btree_details(pmd);
+       dm_bm_unlock(sblock);
index dc8d11068c8b70d254e9ba45f1de35ac2a1edd26..4c56b368d2e6831f522d652ca41330e873bf8793 100644 (file)
@@ -488,3 +488,12 @@ selftests-use-optional-usercflags-and-userldflags.patch
 cpufreq-init-completion-before-kobject_init_and_add.patch
 binfmt-move-install_exec_creds-after-setup_new_exec-.patch
 binfmt-fix-error-return-code-in-load_elf_fdpic_binar.patch
+dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch
+dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch
+dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch
+dm-thin-fix-uaf-in-run_timer_softirq.patch
+dm-integrity-fix-uaf-in-dm_integrity_dtr.patch
+dm-clone-fix-uaf-in-clone_dtr.patch
+dm-cache-fix-uaf-in-destroy.patch
+dm-cache-set-needs_check-flag-after-aborting-metadata.patch
+tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch
diff --git a/queue-5.4/tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch b/queue-5.4/tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch
new file mode 100644 (file)
index 0000000..679e601
--- /dev/null
@@ -0,0 +1,153 @@
+From 82470f7d9044842618c847a7166de2b7458157a7 Mon Sep 17 00:00:00 2001
+From: Zheng Yejian <zhengyejian1@huawei.com>
+Date: Wed, 7 Dec 2022 11:51:43 +0800
+Subject: tracing/hist: Fix out-of-bound write on 'action_data.var_ref_idx'
+
+From: Zheng Yejian <zhengyejian1@huawei.com>
+
+commit 82470f7d9044842618c847a7166de2b7458157a7 upstream.
+
+When generate a synthetic event with many params and then create a trace
+action for it [1], kernel panic happened [2].
+
+It is because that in trace_action_create() 'data->n_params' is up to
+SYNTH_FIELDS_MAX (current value is 64), and array 'data->var_ref_idx'
+keeps indices into array 'hist_data->var_refs' for each synthetic event
+param, but the length of 'data->var_ref_idx' is TRACING_MAP_VARS_MAX
+(current value is 16), so out-of-bound write happened when 'data->n_params'
+more than 16. In this case, 'data->match_data.event' is overwritten and
+eventually cause the panic.
+
+To solve the issue, adjust the length of 'data->var_ref_idx' to be
+SYNTH_FIELDS_MAX and add sanity checks to avoid out-of-bound write.
+
+[1]
+ # cd /sys/kernel/tracing/
+ # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\
+int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\
+int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\
+int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\
+int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\
+int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\
+int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\
+int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\
+int v63" >> synthetic_events
+ # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="bash"' >> \
+events/sched/sched_waking/trigger
+ # echo "hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid)" >> events/sched/sched_switch/trigger
+
+[2]
+BUG: unable to handle page fault for address: ffff91c900000000
+PGD 61001067 P4D 61001067 PUD 0
+Oops: 0000 [#1] PREEMPT SMP NOPTI
+CPU: 2 PID: 322 Comm: bash Tainted: G        W          6.1.0-rc8+ #229
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
+RIP: 0010:strcmp+0xc/0x30
+Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee
+c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14
+07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3
+RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246
+RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000
+RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000
+RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000
+R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580
+R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538
+FS:  00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000)
+knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0
+Call Trace:
+ <TASK>
+ __find_event_file+0x55/0x90
+ action_create+0x76c/0x1060
+ event_hist_trigger_parse+0x146d/0x2060
+ ? event_trigger_write+0x31/0xd0
+ trigger_process_regex+0xbb/0x110
+ event_trigger_write+0x6b/0xd0
+ vfs_write+0xc8/0x3e0
+ ? alloc_fd+0xc0/0x160
+ ? preempt_count_add+0x4d/0xa0
+ ? preempt_count_add+0x70/0xa0
+ ksys_write+0x5f/0xe0
+ do_syscall_64+0x3b/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f1d1d0cf077
+Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e
+fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00
+f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74
+RSP: 002b:00007ffcebb0e568 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000000143 RCX: 00007f1d1d0cf077
+RDX: 0000000000000143 RSI: 00005639265aa7e0 RDI: 0000000000000001
+RBP: 00005639265aa7e0 R08: 000000000000000a R09: 0000000000000142
+R10: 000056392639c017 R11: 0000000000000246 R12: 0000000000000143
+R13: 00007f1d1d1ae6a0 R14: 00007f1d1d1aa4a0 R15: 00007f1d1d1a98a0
+ </TASK>
+Modules linked in:
+CR2: ffff91c900000000
+---[ end trace 0000000000000000 ]---
+RIP: 0010:strcmp+0xc/0x30
+Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee
+c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14
+07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3
+RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246
+RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000
+RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000
+RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000
+R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580
+R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538
+FS:  00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000)
+knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0
+
+Link: https://lore.kernel.org/linux-trace-kernel/20221207035143.2278781-1-zhengyejian1@huawei.com
+
+Cc: <mhiramat@kernel.org>
+Cc: <zanussi@kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: d380dcde9a07 ("tracing: Fix now invalid var_ref_vals assumption in trace action")
+Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events_hist.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -479,7 +479,7 @@ struct action_data {
+        * event param, and is passed to the synthetic event
+        * invocation.
+        */
+-      unsigned int            var_ref_idx[TRACING_MAP_VARS_MAX];
++      unsigned int            var_ref_idx[SYNTH_FIELDS_MAX];
+       struct synth_event      *synth_event;
+       bool                    use_trace_keyword;
+       char                    *synth_event_name;
+@@ -2752,7 +2752,9 @@ static struct hist_field *create_var_ref
+                       return ref_field;
+               }
+       }
+-
++      /* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */
++      if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX)
++              return NULL;
+       ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
+       if (ref_field) {
+               if (init_var_ref(ref_field, var_field, system, event_name)) {
+@@ -4338,6 +4340,10 @@ static int trace_action_create(struct hi
+       lockdep_assert_held(&event_mutex);
++      /* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */
++      if (data->n_params > SYNTH_FIELDS_MAX)
++              return -EINVAL;
++
+       if (data->use_trace_keyword)
+               synth_event_name = data->synth_event_name;
+       else