5.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Jan 2023 14:02:26 +0000 (15:02 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Jan 2023 14:02:26 +0000 (15:02 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jan 2023 14:02:26 +0000 (15:02 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jan 2023 14:02:26 +0000 (15:02 +0100)
diff --git a/queue-5.15/dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch b/queue-5.15/dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch

new file mode 100644 (file)

index 0000000..1118a58
--- /dev/null
+++ b/queue-5.15/dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch
@@ -0,0 +1,114 @@
+From 352b837a5541690d4f843819028cf2b8be83d424 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@kernel.org>
+Date: Wed, 30 Nov 2022 13:26:32 -0500
+Subject: dm cache: Fix ABBA deadlock between shrink_slab and dm_cache_metadata_abort
+
+From: Mike Snitzer <snitzer@kernel.org>
+
+commit 352b837a5541690d4f843819028cf2b8be83d424 upstream.
+
+Same ABBA deadlock pattern fixed in commit 4b60f452ec51 ("dm thin: Fix
+ABBA deadlock between shrink_slab and dm_pool_abort_metadata") to
+DM-cache's metadata.
+
+Reported-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Cc: stable@vger.kernel.org
+Fixes: 028ae9f76f29 ("dm cache: add fail io mode and needs_check flag")
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-metadata.c |   54 +++++++++++++++++++++++++++++++++++------
+ 1 file changed, 47 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/dm-cache-metadata.c
++++ b/drivers/md/dm-cache-metadata.c
+@@ -551,11 +551,13 @@ static int __create_persistent_data_obje
+       return r;
+ }
+ 
+-static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
++static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd,
++                                            bool destroy_bm)
+ {
+       dm_sm_destroy(cmd->metadata_sm);
+       dm_tm_destroy(cmd->tm);
+-      dm_block_manager_destroy(cmd->bm);
++      if (destroy_bm)
++              dm_block_manager_destroy(cmd->bm);
+ }
+ 
+ typedef unsigned long (*flags_mutator)(unsigned long);
+@@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_
+               cmd2 = lookup(bdev);
+               if (cmd2) {
+                       mutex_unlock(&table_lock);
+-                      __destroy_persistent_data_objects(cmd);
++                      __destroy_persistent_data_objects(cmd, true);
+                       kfree(cmd);
+                       return cmd2;
+               }
+@@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_c
+               mutex_unlock(&table_lock);
+ 
+               if (!cmd->fail_io)
+-                      __destroy_persistent_data_objects(cmd);
++                      __destroy_persistent_data_objects(cmd, true);
+               kfree(cmd);
+       }
+ }
+@@ -1808,14 +1810,52 @@ int dm_cache_metadata_needs_check(struct
+ 
+ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
+ {
+-      int r;
++      int r = -EINVAL;
++      struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
++
++      /* fail_io is double-checked with cmd->root_lock held below */
++      if (unlikely(cmd->fail_io))
++              return r;
++
++      /*
++       * Replacement block manager (new_bm) is created and old_bm destroyed outside of
++       * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
++       * shrinker associated with the block manager's bufio client vs cmd root_lock).
++       * - must take shrinker_rwsem without holding cmd->root_lock
++       */
++      new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
++                                       CACHE_MAX_CONCURRENT_LOCKS);
+ 
+       WRITE_LOCK(cmd);
+-      __destroy_persistent_data_objects(cmd);
+-      r = __create_persistent_data_objects(cmd, false);
++      if (cmd->fail_io) {
++              WRITE_UNLOCK(cmd);
++              goto out;
++      }
++
++      __destroy_persistent_data_objects(cmd, false);
++      old_bm = cmd->bm;
++      if (IS_ERR(new_bm)) {
++              DMERR("could not create block manager during abort");
++              cmd->bm = NULL;
++              r = PTR_ERR(new_bm);
++              goto out_unlock;
++      }
++
++      cmd->bm = new_bm;
++      r = __open_or_format_metadata(cmd, false);
++      if (r) {
++              cmd->bm = NULL;
++              goto out_unlock;
++      }
++      new_bm = NULL;
++out_unlock:
+       if (r)
+               cmd->fail_io = true;
+       WRITE_UNLOCK(cmd);
++      dm_block_manager_destroy(old_bm);
++out:
++      if (new_bm && !IS_ERR(new_bm))
++              dm_block_manager_destroy(new_bm);
+ 
+       return r;
+ }
diff --git a/queue-5.15/dm-cache-fix-uaf-in-destroy.patch b/queue-5.15/dm-cache-fix-uaf-in-destroy.patch

new file mode 100644 (file)

index 0000000..ea5676b
--- /dev/null
+++ b/queue-5.15/dm-cache-fix-uaf-in-destroy.patch
@@ -0,0 +1,33 @@
+From 6a459d8edbdbe7b24db42a5a9f21e6aa9e00c2aa Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:49 +0800
+Subject: dm cache: Fix UAF in destroy()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit 6a459d8edbdbe7b24db42a5a9f21e6aa9e00c2aa upstream.
+
+Dm_cache also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in destroy().
+
+Cc: stable@vger.kernel.org
+Fixes: c6b4fcbad044e ("dm: add cache target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-target.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-cache-target.c
++++ b/drivers/md/dm-cache-target.c
+@@ -1895,6 +1895,7 @@ static void destroy(struct cache *cache)
+       if (cache->prison)
+               dm_bio_prison_destroy_v2(cache->prison);
+ 
++      cancel_delayed_work_sync(&cache->waker);
+       if (cache->wq)
+               destroy_workqueue(cache->wq);
+ 
diff --git a/queue-5.15/dm-cache-set-needs_check-flag-after-aborting-metadata.patch b/queue-5.15/dm-cache-set-needs_check-flag-after-aborting-metadata.patch

new file mode 100644 (file)

index 0000000..b38a309
--- /dev/null
+++ b/queue-5.15/dm-cache-set-needs_check-flag-after-aborting-metadata.patch
@@ -0,0 +1,47 @@
+From 6b9973861cb2e96dcd0bb0f1baddc5c034207c5c Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@kernel.org>
+Date: Wed, 30 Nov 2022 14:02:47 -0500
+Subject: dm cache: set needs_check flag after aborting metadata
+
+From: Mike Snitzer <snitzer@kernel.org>
+
+commit 6b9973861cb2e96dcd0bb0f1baddc5c034207c5c upstream.
+
+Otherwise the commit that will be aborted will be associated with the
+metadata objects that will be torn down.  Must write needs_check flag
+to metadata with a reset block manager.
+
+Found through code-inspection (and compared against dm-thin.c).
+
+Cc: stable@vger.kernel.org
+Fixes: 028ae9f76f29 ("dm cache: add fail io mode and needs_check flag")
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-cache-target.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/md/dm-cache-target.c
++++ b/drivers/md/dm-cache-target.c
+@@ -915,16 +915,16 @@ static void abort_transaction(struct cac
+       if (get_cache_mode(cache) >= CM_READ_ONLY)
+               return;
+ 
+-      if (dm_cache_metadata_set_needs_check(cache->cmd)) {
+-              DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
+-              set_cache_mode(cache, CM_FAIL);
+-      }
+-
+       DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
+       if (dm_cache_metadata_abort(cache->cmd)) {
+               DMERR("%s: failed to abort metadata transaction", dev_name);
+               set_cache_mode(cache, CM_FAIL);
+       }
++
++      if (dm_cache_metadata_set_needs_check(cache->cmd)) {
++              DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
++              set_cache_mode(cache, CM_FAIL);
++      }
+ }
+ 
+ static void metadata_operation_failed(struct cache *cache, const char *op, int r)
diff --git a/queue-5.15/dm-clone-fix-uaf-in-clone_dtr.patch b/queue-5.15/dm-clone-fix-uaf-in-clone_dtr.patch

new file mode 100644 (file)

index 0000000..46b6571
--- /dev/null
+++ b/queue-5.15/dm-clone-fix-uaf-in-clone_dtr.patch
@@ -0,0 +1,33 @@
+From e4b5957c6f749a501c464f92792f1c8e26b61a94 Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:48 +0800
+Subject: dm clone: Fix UAF in clone_dtr()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit e4b5957c6f749a501c464f92792f1c8e26b61a94 upstream.
+
+Dm_clone also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in clone_dtr().
+
+Cc: stable@vger.kernel.org
+Fixes: 7431b7835f554 ("dm: add clone target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-clone-target.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-clone-target.c
++++ b/drivers/md/dm-clone-target.c
+@@ -1959,6 +1959,7 @@ static void clone_dtr(struct dm_target *
+ 
+       mempool_exit(&clone->hydration_pool);
+       dm_kcopyd_client_destroy(clone->kcopyd_client);
++      cancel_delayed_work_sync(&clone->waker);
+       destroy_workqueue(clone->wq);
+       hash_table_exit(clone);
+       dm_clone_metadata_close(clone->cmd);
diff --git a/queue-5.15/dm-integrity-fix-uaf-in-dm_integrity_dtr.patch b/queue-5.15/dm-integrity-fix-uaf-in-dm_integrity_dtr.patch

new file mode 100644 (file)

index 0000000..8d82c34
--- /dev/null
+++ b/queue-5.15/dm-integrity-fix-uaf-in-dm_integrity_dtr.patch
@@ -0,0 +1,34 @@
+From f50cb2cbabd6c4a60add93d72451728f86e4791c Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:50 +0800
+Subject: dm integrity: Fix UAF in dm_integrity_dtr()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit f50cb2cbabd6c4a60add93d72451728f86e4791c upstream.
+
+Dm_integrity also has the same UAF problem when dm_resume()
+and dm_destroy() are concurrent.
+
+Therefore, cancelling timer again in dm_integrity_dtr().
+
+Cc: stable@vger.kernel.org
+Fixes: 7eada909bfd7a ("dm: add integrity target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-integrity.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -4539,6 +4539,8 @@ static void dm_integrity_dtr(struct dm_t
+       BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
+       BUG_ON(!list_empty(&ic->wait_list));
+ 
++      if (ic->mode == 'B')
++              cancel_delayed_work_sync(&ic->bitmap_flush_work);
+       if (ic->metadata_wq)
+               destroy_workqueue(ic->metadata_wq);
+       if (ic->wait_wq)
diff --git a/queue-5.15/dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch b/queue-5.15/dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch

new file mode 100644 (file)

index 0000000..5e5c904
--- /dev/null
+++ b/queue-5.15/dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch
@@ -0,0 +1,221 @@
+From 8111964f1b8524c4bb56b02cd9c7a37725ea21fd Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Wed, 30 Nov 2022 21:31:34 +0800
+Subject: dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 8111964f1b8524c4bb56b02cd9c7a37725ea21fd upstream.
+
+Following concurrent processes:
+
+          P1(drop cache)                P2(kworker)
+drop_caches_sysctl_handler
+ drop_slab
+  shrink_slab
+   down_read(&shrinker_rwsem)  - LOCK A
+   do_shrink_slab
+    super_cache_scan
+     prune_icache_sb
+      dispose_list
+       evict
+        ext4_evict_inode
+        ext4_clear_inode
+         ext4_discard_preallocations
+          ext4_mb_load_buddy_gfp
+           ext4_mb_init_cache
+            ext4_read_block_bitmap_nowait
+             ext4_read_bh_nowait
+              submit_bh
+               dm_submit_bio
+                                do_worker
+                                 process_deferred_bios
+                                  commit
+                                   metadata_operation_failed
+                                    dm_pool_abort_metadata
+                                     down_write(&pmd->root_lock) - LOCK B
+                                     __destroy_persistent_data_objects
+                                      dm_block_manager_destroy
+                                       dm_bufio_client_destroy
+                                        unregister_shrinker
+                                         down_write(&shrinker_rwsem)
+                thin_map                            |
+                 dm_thin_find_block                 ↓
+                  down_read(&pmd->root_lock) --> ABBA deadlock
+
+, which triggers hung task:
+
+[   76.974820] INFO: task kworker/u4:3:63 blocked for more than 15 seconds.
+[   76.976019]       Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910
+[   76.978521] task:kworker/u4:3    state:D stack:0     pid:63    ppid:2
+[   76.978534] Workqueue: dm-thin do_worker
+[   76.978552] Call Trace:
+[   76.978564]  __schedule+0x6ba/0x10f0
+[   76.978582]  schedule+0x9d/0x1e0
+[   76.978588]  rwsem_down_write_slowpath+0x587/0xdf0
+[   76.978600]  down_write+0xec/0x110
+[   76.978607]  unregister_shrinker+0x2c/0xf0
+[   76.978616]  dm_bufio_client_destroy+0x116/0x3d0
+[   76.978625]  dm_block_manager_destroy+0x19/0x40
+[   76.978629]  __destroy_persistent_data_objects+0x5e/0x70
+[   76.978636]  dm_pool_abort_metadata+0x8e/0x100
+[   76.978643]  metadata_operation_failed+0x86/0x110
+[   76.978649]  commit+0x6a/0x230
+[   76.978655]  do_worker+0xc6e/0xd90
+[   76.978702]  process_one_work+0x269/0x630
+[   76.978714]  worker_thread+0x266/0x630
+[   76.978730]  kthread+0x151/0x1b0
+[   76.978772] INFO: task test.sh:2646 blocked for more than 15 seconds.
+[   76.979756]       Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910
+[   76.982111] task:test.sh         state:D stack:0     pid:2646  ppid:2459
+[   76.982128] Call Trace:
+[   76.982139]  __schedule+0x6ba/0x10f0
+[   76.982155]  schedule+0x9d/0x1e0
+[   76.982159]  rwsem_down_read_slowpath+0x4f4/0x910
+[   76.982173]  down_read+0x84/0x170
+[   76.982177]  dm_thin_find_block+0x4c/0xd0
+[   76.982183]  thin_map+0x201/0x3d0
+[   76.982188]  __map_bio+0x5b/0x350
+[   76.982195]  dm_submit_bio+0x2b6/0x930
+[   76.982202]  __submit_bio+0x123/0x2d0
+[   76.982209]  submit_bio_noacct_nocheck+0x101/0x3e0
+[   76.982222]  submit_bio_noacct+0x389/0x770
+[   76.982227]  submit_bio+0x50/0xc0
+[   76.982232]  submit_bh_wbc+0x15e/0x230
+[   76.982238]  submit_bh+0x14/0x20
+[   76.982241]  ext4_read_bh_nowait+0xc5/0x130
+[   76.982247]  ext4_read_block_bitmap_nowait+0x340/0xc60
+[   76.982254]  ext4_mb_init_cache+0x1ce/0xdc0
+[   76.982259]  ext4_mb_load_buddy_gfp+0x987/0xfa0
+[   76.982263]  ext4_discard_preallocations+0x45d/0x830
+[   76.982274]  ext4_clear_inode+0x48/0xf0
+[   76.982280]  ext4_evict_inode+0xcf/0xc70
+[   76.982285]  evict+0x119/0x2b0
+[   76.982290]  dispose_list+0x43/0xa0
+[   76.982294]  prune_icache_sb+0x64/0x90
+[   76.982298]  super_cache_scan+0x155/0x210
+[   76.982303]  do_shrink_slab+0x19e/0x4e0
+[   76.982310]  shrink_slab+0x2bd/0x450
+[   76.982317]  drop_slab+0xcc/0x1a0
+[   76.982323]  drop_caches_sysctl_handler+0xb7/0xe0
+[   76.982327]  proc_sys_call_handler+0x1bc/0x300
+[   76.982331]  proc_sys_write+0x17/0x20
+[   76.982334]  vfs_write+0x3d3/0x570
+[   76.982342]  ksys_write+0x73/0x160
+[   76.982347]  __x64_sys_write+0x1e/0x30
+[   76.982352]  do_syscall_64+0x35/0x80
+[   76.982357]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Function metadata_operation_failed() is called when operations failed
+on dm pool metadata, dm pool will destroy and recreate metadata. So,
+shrinker will be unregistered and registered, which could down write
+shrinker_rwsem under pmd_write_lock.
+
+Fix it by allocating dm_block_manager before locking pmd->root_lock
+and destroying old dm_block_manager after unlocking pmd->root_lock,
+then old dm_block_manager is replaced with new dm_block_manager under
+pmd->root_lock. So, shrinker register/unregister could be done without
+holding pmd->root_lock.
+
+Fetch a reproducer in [Link].
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216676
+Cc: stable@vger.kernel.org #v5.2+
+Fixes: e49e582965b3 ("dm thin: add read only and fail io modes")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin-metadata.c |   51 +++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 43 insertions(+), 8 deletions(-)
+
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -776,13 +776,15 @@ static int __create_persistent_data_obje
+       return r;
+ }
+ 
+-static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
++static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
++                                            bool destroy_bm)
+ {
+       dm_sm_destroy(pmd->data_sm);
+       dm_sm_destroy(pmd->metadata_sm);
+       dm_tm_destroy(pmd->nb_tm);
+       dm_tm_destroy(pmd->tm);
+-      dm_block_manager_destroy(pmd->bm);
++      if (destroy_bm)
++              dm_block_manager_destroy(pmd->bm);
+ }
+ 
+ static int __begin_transaction(struct dm_pool_metadata *pmd)
+@@ -989,7 +991,7 @@ int dm_pool_metadata_close(struct dm_poo
+       }
+       pmd_write_unlock(pmd);
+       if (!pmd->fail_io)
+-              __destroy_persistent_data_objects(pmd);
++              __destroy_persistent_data_objects(pmd, true);
+ 
+       kfree(pmd);
+       return 0;
+@@ -1888,19 +1890,52 @@ static void __set_abort_with_changes_fla
+ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
+ {
+       int r = -EINVAL;
++      struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
++
++      /* fail_io is double-checked with pmd->root_lock held below */
++      if (unlikely(pmd->fail_io))
++              return r;
++
++      /*
++       * Replacement block manager (new_bm) is created and old_bm destroyed outside of
++       * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
++       * shrinker associated with the block manager's bufio client vs pmd root_lock).
++       * - must take shrinker_rwsem without holding pmd->root_lock
++       */
++      new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
++                                       THIN_MAX_CONCURRENT_LOCKS);
+ 
+       pmd_write_lock(pmd);
+-      if (pmd->fail_io)
++      if (pmd->fail_io) {
++              pmd_write_unlock(pmd);
+               goto out;
++      }
+ 
+       __set_abort_with_changes_flags(pmd);
+-      __destroy_persistent_data_objects(pmd);
+-      r = __create_persistent_data_objects(pmd, false);
++      __destroy_persistent_data_objects(pmd, false);
++      old_bm = pmd->bm;
++      if (IS_ERR(new_bm)) {
++              DMERR("could not create block manager during abort");
++              pmd->bm = NULL;
++              r = PTR_ERR(new_bm);
++              goto out_unlock;
++      }
++
++      pmd->bm = new_bm;
++      r = __open_or_format_metadata(pmd, false);
++      if (r) {
++              pmd->bm = NULL;
++              goto out_unlock;
++      }
++      new_bm = NULL;
++out_unlock:
+       if (r)
+               pmd->fail_io = true;
+-
+-out:
+       pmd_write_unlock(pmd);
++      dm_block_manager_destroy(old_bm);
++out:
++      if (new_bm && !IS_ERR(new_bm))
++              dm_block_manager_destroy(new_bm);
+ 
+       return r;
+ }
diff --git a/queue-5.15/dm-thin-fix-uaf-in-run_timer_softirq.patch b/queue-5.15/dm-thin-fix-uaf-in-run_timer_softirq.patch

new file mode 100644 (file)

index 0000000..7271832
--- /dev/null
+++ b/queue-5.15/dm-thin-fix-uaf-in-run_timer_softirq.patch
@@ -0,0 +1,100 @@
+From 88430ebcbc0ec637b710b947738839848c20feff Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Tue, 29 Nov 2022 10:48:47 +0800
+Subject: dm thin: Fix UAF in run_timer_softirq()
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit 88430ebcbc0ec637b710b947738839848c20feff upstream.
+
+When dm_resume() and dm_destroy() are concurrent, it will
+lead to UAF, as follows:
+
+ BUG: KASAN: use-after-free in __run_timers+0x173/0x710
+ Write of size 8 at addr ffff88816d9490f0 by task swapper/0/0
+<snip>
+ Call Trace:
+  <IRQ>
+  dump_stack_lvl+0x73/0x9f
+  print_report.cold+0x132/0xaa2
+  _raw_spin_lock_irqsave+0xcd/0x160
+  __run_timers+0x173/0x710
+  kasan_report+0xad/0x110
+  __run_timers+0x173/0x710
+  __asan_store8+0x9c/0x140
+  __run_timers+0x173/0x710
+  call_timer_fn+0x310/0x310
+  pvclock_clocksource_read+0xfa/0x250
+  kvm_clock_read+0x2c/0x70
+  kvm_clock_get_cycles+0xd/0x20
+  ktime_get+0x5c/0x110
+  lapic_next_event+0x38/0x50
+  clockevents_program_event+0xf1/0x1e0
+  run_timer_softirq+0x49/0x90
+  __do_softirq+0x16e/0x62c
+  __irq_exit_rcu+0x1fa/0x270
+  irq_exit_rcu+0x12/0x20
+  sysvec_apic_timer_interrupt+0x8e/0xc0
+
+One of the concurrency UAF can be shown as below:
+
+        use                                  free
+do_resume                           |
+  __find_device_hash_cell           |
+    dm_get                          |
+      atomic_inc(&md->holders)      |
+                                    | dm_destroy
+                                    |   __dm_destroy
+                                    |     if (!dm_suspended_md(md))
+                                    |     atomic_read(&md->holders)
+                                    |     msleep(1)
+  dm_resume                         |
+    __dm_resume                     |
+      dm_table_resume_targets       |
+        pool_resume                 |
+          do_waker  #add delay work |
+  dm_put                            |
+    atomic_dec(&md->holders)        |
+                                    |     dm_table_destroy
+                                    |       pool_dtr
+                                    |         __pool_dec
+                                    |           __pool_destroy
+                                    |             destroy_workqueue
+                                    |             kfree(pool) # free pool
+        time out
+__do_softirq
+  run_timer_softirq # pool has already been freed
+
+This can be easily reproduced using:
+  1. create thin-pool
+  2. dmsetup suspend pool
+  3. dmsetup resume pool
+  4. dmsetup remove_all # Concurrent with 3
+
+The root cause of this UAF bug is that dm_resume() adds timer after
+dm_destroy() skips cancelling the timer because of suspend status.
+After timeout, it will call run_timer_softirq(), however pool has
+already been freed. The concurrency UAF bug will happen.
+
+Therefore, cancelling timer again in __pool_destroy().
+
+Cc: stable@vger.kernel.org
+Fixes: 991d9fa02da0d ("dm: add thin provisioning target")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -2907,6 +2907,8 @@ static void __pool_destroy(struct pool *
+       dm_bio_prison_destroy(pool->prison);
+       dm_kcopyd_client_destroy(pool->copier);
+ 
++      cancel_delayed_work_sync(&pool->waker);
++      cancel_delayed_work_sync(&pool->no_space_timeout);
+       if (pool->wq)
+               destroy_workqueue(pool->wq);
+ 
diff --git a/queue-5.15/dm-thin-resume-even-if-in-fail-mode.patch b/queue-5.15/dm-thin-resume-even-if-in-fail-mode.patch

new file mode 100644 (file)

index 0000000..e82c0e3
--- /dev/null
+++ b/queue-5.15/dm-thin-resume-even-if-in-fail-mode.patch
@@ -0,0 +1,72 @@
+From 19eb1650afeb1aa86151f61900e9e5f1de5d8d02 Mon Sep 17 00:00:00 2001
+From: Luo Meng <luomeng12@huawei.com>
+Date: Wed, 30 Nov 2022 10:09:45 +0800
+Subject: dm thin: resume even if in FAIL mode
+
+From: Luo Meng <luomeng12@huawei.com>
+
+commit 19eb1650afeb1aa86151f61900e9e5f1de5d8d02 upstream.
+
+If a thinpool set fail_io while suspending, resume will fail with:
+ device-mapper: resume ioctl on vg-thinpool  failed: Invalid argument
+
+The thin-pool also can't be removed if an in-flight bio is in the
+deferred list.
+
+This can be easily reproduced using:
+
+  echo "offline" > /sys/block/sda/device/state
+  dd if=/dev/zero of=/dev/mapper/thin bs=4K count=1
+  dmsetup suspend /dev/mapper/pool
+  mkfs.ext4 /dev/mapper/thin
+  dmsetup resume /dev/mapper/pool
+
+The root cause is maybe_resize_data_dev() will check fail_io and return
+error before called dm_resume.
+
+Fix this by adding FAIL mode check at the end of pool_preresume().
+
+Cc: stable@vger.kernel.org
+Fixes: da105ed5fd7e ("dm thin metadata: introduce dm_pool_abort_metadata")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin.c |   16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -3566,20 +3566,28 @@ static int pool_preresume(struct dm_targ
+        */
+       r = bind_control_target(pool, ti);
+       if (r)
+-              return r;
++              goto out;
+ 
+       r = maybe_resize_data_dev(ti, &need_commit1);
+       if (r)
+-              return r;
++              goto out;
+ 
+       r = maybe_resize_metadata_dev(ti, &need_commit2);
+       if (r)
+-              return r;
++              goto out;
+ 
+       if (need_commit1 || need_commit2)
+               (void) commit(pool);
++out:
++      /*
++       * When a thin-pool is PM_FAIL, it cannot be rebuilt if
++       * bio is in deferred list. Therefore need to return 0
++       * to allow pool_resume() to flush IO.
++       */
++      if (r && get_pool_mode(pool) == PM_FAIL)
++              r = 0;
+ 
+-      return 0;
++      return r;
+ }
+ 
+ static void pool_suspend_active_thins(struct pool *pool)
diff --git a/queue-5.15/dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch b/queue-5.15/dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch

new file mode 100644 (file)

index 0000000..9bb4877
--- /dev/null
+++ b/queue-5.15/dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch
@@ -0,0 +1,84 @@
+From 7991dbff6849f67e823b7cc0c15e5a90b0549b9f Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Thu, 8 Dec 2022 22:28:02 +0800
+Subject: dm thin: Use last transaction's pmd->root when commit failed
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 7991dbff6849f67e823b7cc0c15e5a90b0549b9f upstream.
+
+Recently we found a softlock up problem in dm thin pool btree lookup
+code due to corrupted metadata:
+
+ Kernel panic - not syncing: softlockup: hung tasks
+ CPU: 7 PID: 2669225 Comm: kworker/u16:3
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+ Workqueue: dm-thin do_worker [dm_thin_pool]
+ Call Trace:
+   <IRQ>
+   dump_stack+0x9c/0xd3
+   panic+0x35d/0x6b9
+   watchdog_timer_fn.cold+0x16/0x25
+   __run_hrtimer+0xa2/0x2d0
+   </IRQ>
+   RIP: 0010:__relink_lru+0x102/0x220 [dm_bufio]
+   __bufio_new+0x11f/0x4f0 [dm_bufio]
+   new_read+0xa3/0x1e0 [dm_bufio]
+   dm_bm_read_lock+0x33/0xd0 [dm_persistent_data]
+   ro_step+0x63/0x100 [dm_persistent_data]
+   btree_lookup_raw.constprop.0+0x44/0x220 [dm_persistent_data]
+   dm_btree_lookup+0x16f/0x210 [dm_persistent_data]
+   dm_thin_find_block+0x12c/0x210 [dm_thin_pool]
+   __process_bio_read_only+0xc5/0x400 [dm_thin_pool]
+   process_thin_deferred_bios+0x1a4/0x4a0 [dm_thin_pool]
+   process_one_work+0x3c5/0x730
+
+Following process may generate a broken btree mixed with fresh and
+stale btree nodes, which could get dm thin trapped in an infinite loop
+while looking up data block:
+ Transaction 1: pmd->root = A, A->B->C   // One path in btree
+                pmd->root = X, X->Y->Z   // Copy-up
+ Transaction 2: X,Z is updated on disk, Y write failed.
+                // Commit failed, dm thin becomes read-only.
+                process_bio_read_only
+                dm_thin_find_block
+                 __find_block
+                  dm_btree_lookup(pmd->root)
+The pmd->root points to a broken btree, Y may contain stale node
+pointing to any block, for example X, which gets dm thin trapped into
+a dead loop while looking up Z.
+
+Fix this by setting pmd->root in __open_metadata(), so that dm thin
+will use the last transaction's pmd->root if commit failed.
+
+Fetch a reproducer in [Link].
+
+Linke: https://bugzilla.kernel.org/show_bug.cgi?id=216790
+Cc: stable@vger.kernel.org
+Fixes: 991d9fa02da0 ("dm: add thin provisioning target")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Acked-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin-metadata.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -724,6 +724,15 @@ static int __open_metadata(struct dm_poo
+               goto bad_cleanup_data_sm;
+       }
+ 
++      /*
++       * For pool metadata opening process, root setting is redundant
++       * because it will be set again in __begin_transaction(). But dm
++       * pool aborting process really needs to get last transaction's
++       * root to avoid accessing broken btree.
++       */
++      pmd->root = le64_to_cpu(disk_super->data_mapping_root);
++      pmd->details_root = le64_to_cpu(disk_super->device_details_root);
++
+       __setup_btree_details(pmd);
+       dm_bm_unlock(sblock);
+ 
diff --git a/queue-5.15/fs-dlm-fix-sock-release-if-listen-fails.patch b/queue-5.15/fs-dlm-fix-sock-release-if-listen-fails.patch

new file mode 100644 (file)

index 0000000..c0e032e
--- /dev/null
+++ b/queue-5.15/fs-dlm-fix-sock-release-if-listen-fails.patch
@@ -0,0 +1,42 @@
+From 08ae0547e75ec3d062b6b6b9cf4830c730df68df Mon Sep 17 00:00:00 2001
+From: Alexander Aring <aahringo@redhat.com>
+Date: Thu, 27 Oct 2022 16:45:11 -0400
+Subject: fs: dlm: fix sock release if listen fails
+
+From: Alexander Aring <aahringo@redhat.com>
+
+commit 08ae0547e75ec3d062b6b6b9cf4830c730df68df upstream.
+
+This patch fixes a double sock_release() call when the listen() is
+called for the dlm lowcomms listen socket. The caller of
+dlm_listen_for_all should never care about releasing the socket if
+dlm_listen_for_all() fails, it's done now only once if listen() fails.
+
+Cc: stable@vger.kernel.org
+Fixes: 2dc6b1158c28 ("fs: dlm: introduce generic listen")
+Signed-off-by: Alexander Aring <aahringo@redhat.com>
+Signed-off-by: David Teigland <teigland@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/dlm/lowcomms.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1797,7 +1797,7 @@ static int dlm_listen_for_all(void)
+       result = sock->ops->listen(sock, 5);
+       if (result < 0) {
+               dlm_close_sock(&listen_con.sock);
+-              goto out;
++              return result;
+       }
+ 
+       return 0;
+@@ -2000,7 +2000,6 @@ fail_listen:
+       dlm_proto_ops = NULL;
+ fail_proto_ops:
+       dlm_allow_conn = 0;
+-      dlm_close_sock(&listen_con.sock);
+       work_stop();
+ fail_local:
+       deinit_local();
diff --git a/queue-5.15/fs-dlm-retry-accept-until-eagain-or-error-returns.patch b/queue-5.15/fs-dlm-retry-accept-until-eagain-or-error-returns.patch

new file mode 100644 (file)

index 0000000..1e4c48a
--- /dev/null
+++ b/queue-5.15/fs-dlm-retry-accept-until-eagain-or-error-returns.patch
@@ -0,0 +1,42 @@
+From f0f4bb431bd543ed7bebbaea3ce326cfcd5388bc Mon Sep 17 00:00:00 2001
+From: Alexander Aring <aahringo@redhat.com>
+Date: Thu, 27 Oct 2022 16:45:12 -0400
+Subject: fs: dlm: retry accept() until -EAGAIN or error returns
+
+From: Alexander Aring <aahringo@redhat.com>
+
+commit f0f4bb431bd543ed7bebbaea3ce326cfcd5388bc upstream.
+
+This patch fixes a race if we get two times an socket data ready event
+while the listen connection worker is queued. Currently it will be
+served only once but we need to do it (in this case twice) until we hit
+-EAGAIN which tells us there is no pending accept going on.
+
+This patch wraps an do while loop until we receive a return value which
+is different than 0 as it was done before commit d11ccd451b65 ("fs: dlm:
+listen socket out of connection hash").
+
+Cc: stable@vger.kernel.org
+Fixes: d11ccd451b65 ("fs: dlm: listen socket out of connection hash")
+Signed-off-by: Alexander Aring <aahringo@redhat.com>
+Signed-off-by: David Teigland <teigland@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/dlm/lowcomms.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1520,7 +1520,11 @@ static void process_recv_sockets(struct
+ 
+ static void process_listen_recv_socket(struct work_struct *work)
+ {
+-      accept_from_sock(&listen_con);
++      int ret;
++
++      do {
++              ret = accept_from_sock(&listen_con);
++      } while (!ret);
+ }
+ 
+ static void dlm_connect(struct connection *con)
diff --git a/queue-5.15/mptcp-mark-ops-structures-as-ro_after_init.patch b/queue-5.15/mptcp-mark-ops-structures-as-ro_after_init.patch

new file mode 100644 (file)

index 0000000..51182e1
--- /dev/null
+++ b/queue-5.15/mptcp-mark-ops-structures-as-ro_after_init.patch
@@ -0,0 +1,75 @@
+From 51fa7f8ebf0e25c7a9039fa3988a623d5f3855aa Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Tue, 15 Feb 2022 18:11:29 -0800
+Subject: mptcp: mark ops structures as ro_after_init
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 51fa7f8ebf0e25c7a9039fa3988a623d5f3855aa upstream.
+
+These structures are initialised from the init hooks, so we can't make
+them 'const'.  But no writes occur afterwards, so we can use ro_after_init.
+
+Also, remove bogus EXPORT_SYMBOL, the only access comes from ip
+stack, not from kernel modules.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/subflow.c |   15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -484,8 +484,7 @@ do_reset:
+ }
+ 
+ struct request_sock_ops mptcp_subflow_request_sock_ops;
+-EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
+-static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
++static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init;
+ 
+ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+ {
+@@ -506,9 +505,9 @@ drop:
+ }
+ 
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+-static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
+-static struct inet_connection_sock_af_ops subflow_v6_specific;
+-static struct inet_connection_sock_af_ops subflow_v6m_specific;
++static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
++static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
++static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
+ static struct proto tcpv6_prot_override;
+ 
+ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+@@ -790,7 +789,7 @@ dispose_child:
+       return child;
+ }
+ 
+-static struct inet_connection_sock_af_ops subflow_specific;
++static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
+ static struct proto tcp_prot_override;
+ 
+ enum mapping_status {
+@@ -1327,7 +1326,7 @@ static void subflow_write_space(struct s
+       mptcp_write_space(sk);
+ }
+ 
+-static struct inet_connection_sock_af_ops *
++static const struct inet_connection_sock_af_ops *
+ subflow_default_af_ops(struct sock *sk)
+ {
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+@@ -1342,7 +1341,7 @@ void mptcpv6_handle_mapped(struct sock *
+ {
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+-      struct inet_connection_sock_af_ops *target;
++      const struct inet_connection_sock_af_ops *target;
+ 
+       target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
+ 
diff --git a/queue-5.15/mptcp-remove-mptcp-ifdef-in-tcp-syn-cookies.patch b/queue-5.15/mptcp-remove-mptcp-ifdef-in-tcp-syn-cookies.patch

new file mode 100644 (file)

index 0000000..3f85d69
--- /dev/null
+++ b/queue-5.15/mptcp-remove-mptcp-ifdef-in-tcp-syn-cookies.patch
@@ -0,0 +1,113 @@
+From 3fff88186f047627bb128d65155f42517f8e448f Mon Sep 17 00:00:00 2001
+From: Matthieu Baerts <matthieu.baerts@tessares.net>
+Date: Fri, 9 Dec 2022 16:28:08 -0800
+Subject: mptcp: remove MPTCP 'ifdef' in TCP SYN cookies
+
+From: Matthieu Baerts <matthieu.baerts@tessares.net>
+
+commit 3fff88186f047627bb128d65155f42517f8e448f upstream.
+
+To ease the maintenance, it is often recommended to avoid having #ifdef
+preprocessor conditions.
+
+Here the section related to CONFIG_MPTCP was quite short but the next
+commit needs to add more code around. It is then cleaner to move
+specific MPTCP code to functions located in net/mptcp directory.
+
+Now that mptcp_subflow_request_sock_ops structure can be static, it can
+also be marked as "read only after init".
+
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/mptcp.h   |   12 ++++++++++--
+ net/ipv4/syncookies.c |    7 +++----
+ net/mptcp/subflow.c   |   12 +++++++++++-
+ 3 files changed, 24 insertions(+), 7 deletions(-)
+
+--- a/include/net/mptcp.h
++++ b/include/net/mptcp.h
+@@ -93,8 +93,6 @@ struct mptcp_out_options {
+ };
+ 
+ #ifdef CONFIG_MPTCP
+-extern struct request_sock_ops mptcp_subflow_request_sock_ops;
+-
+ void mptcp_init(void);
+ 
+ static inline bool sk_is_mptcp(const struct sock *sk)
+@@ -182,6 +180,9 @@ void mptcp_seq_show(struct seq_file *seq
+ int mptcp_subflow_init_cookie_req(struct request_sock *req,
+                                 const struct sock *sk_listener,
+                                 struct sk_buff *skb);
++struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
++                                             struct sock *sk_listener,
++                                             bool attach_listener);
+ 
+ __be32 mptcp_get_reset_option(const struct sk_buff *skb);
+ 
+@@ -274,6 +275,13 @@ static inline int mptcp_subflow_init_coo
+       return 0; /* TCP fallback */
+ }
+ 
++static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
++                                                           struct sock *sk_listener,
++                                                           bool attach_listener)
++{
++      return NULL;
++}
++
+ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)  { return htonl(0u); }
+ #endif /* CONFIG_MPTCP */
+ 
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -290,12 +290,11 @@ struct request_sock *cookie_tcp_reqsk_al
+       struct tcp_request_sock *treq;
+       struct request_sock *req;
+ 
+-#ifdef CONFIG_MPTCP
+       if (sk_is_mptcp(sk))
+-              ops = &mptcp_subflow_request_sock_ops;
+-#endif
++              req = mptcp_subflow_reqsk_alloc(ops, sk, false);
++      else
++              req = inet_reqsk_alloc(ops, sk, false);
+ 
+-      req = inet_reqsk_alloc(ops, sk, false);
+       if (!req)
+               return NULL;
+ 
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -483,7 +483,7 @@ do_reset:
+       mptcp_subflow_reset(sk);
+ }
+ 
+-struct request_sock_ops mptcp_subflow_request_sock_ops;
++static struct request_sock_ops mptcp_subflow_request_sock_ops __ro_after_init;
+ static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init;
+ 
+ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+@@ -536,6 +536,16 @@ drop:
+ }
+ #endif
+ 
++struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
++                                             struct sock *sk_listener,
++                                             bool attach_listener)
++{
++      ops = &mptcp_subflow_request_sock_ops;
++
++      return inet_reqsk_alloc(ops, sk_listener, attach_listener);
++}
++EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc);
++
+ /* validate hmac received in third ACK */
+ static bool subflow_hmac_valid(const struct request_sock *req,
+                              const struct mptcp_options_received *mp_opt)
diff --git a/queue-5.15/of-kexec-fix-reading-32-bit-linux-initrd-start-end-values.patch b/queue-5.15/of-kexec-fix-reading-32-bit-linux-initrd-start-end-values.patch

new file mode 100644 (file)

index 0000000..e3997e7
--- /dev/null
+++ b/queue-5.15/of-kexec-fix-reading-32-bit-linux-initrd-start-end-values.patch
@@ -0,0 +1,67 @@
+From e553ad8d7957697385e81034bf76db3b2cb2cf27 Mon Sep 17 00:00:00 2001
+From: Rob Herring <robh@kernel.org>
+Date: Mon, 28 Nov 2022 14:24:39 -0600
+Subject: of/kexec: Fix reading 32-bit "linux,initrd-{start,end}" values
+
+From: Rob Herring <robh@kernel.org>
+
+commit e553ad8d7957697385e81034bf76db3b2cb2cf27 upstream.
+
+"linux,initrd-start" and "linux,initrd-end" can be 32-bit values even on
+a 64-bit platform. Ideally, the size should be based on
+'#address-cells', but that has never been enforced in the kernel's FDT
+boot parsing code (early_init_dt_check_for_initrd()). Bootloader
+behavior is known to vary. For example, kexec always writes these as
+64-bit. The result of incorrectly reading 32-bit values is most likely
+the reserved memory for the original initrd will still be reserved
+for the new kernel. The original arm64 equivalent of this code failed to
+release the initrd reserved memory in *all* cases.
+
+Use of_read_number() to mirror the early_init_dt_check_for_initrd()
+code.
+
+Fixes: b30be4dc733e ("of: Add a common kexec FDT setup function")
+Cc: stable@vger.kernel.org
+Reported-by: Peter Maydell <peter.maydell@linaro.org>
+Link: https://lore.kernel.org/r/20221128202440.1411895-1-robh@kernel.org
+Signed-off-by: Rob Herring <robh@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/of/kexec.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/of/kexec.c
++++ b/drivers/of/kexec.c
+@@ -284,7 +284,7 @@ void *of_kexec_alloc_and_setup_fdt(const
+                                  const char *cmdline, size_t extra_fdt_size)
+ {
+       void *fdt;
+-      int ret, chosen_node;
++      int ret, chosen_node, len;
+       const void *prop;
+       size_t fdt_size;
+ 
+@@ -327,19 +327,19 @@ void *of_kexec_alloc_and_setup_fdt(const
+               goto out;
+ 
+       /* Did we boot using an initrd? */
+-      prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
++      prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", &len);
+       if (prop) {
+               u64 tmp_start, tmp_end, tmp_size;
+ 
+-              tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
++              tmp_start = of_read_number(prop, len / 4);
+ 
+-              prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
++              prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", &len);
+               if (!prop) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+ 
+-              tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
++              tmp_end = of_read_number(prop, len / 4);
+ 
+               /*
+                * kexec reserves exact initrd size, while firmware may
diff --git a/queue-5.15/perf-core-call-lsm-hook-after-copying-perf_event_attr.patch b/queue-5.15/perf-core-call-lsm-hook-after-copying-perf_event_attr.patch

new file mode 100644 (file)

index 0000000..24bed25
--- /dev/null
+++ b/queue-5.15/perf-core-call-lsm-hook-after-copying-perf_event_attr.patch
@@ -0,0 +1,41 @@
+From 0a041ebca4956292cadfb14a63ace3a9c1dcb0a3 Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@kernel.org>
+Date: Tue, 20 Dec 2022 14:31:40 -0800
+Subject: perf/core: Call LSM hook after copying perf_event_attr
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+commit 0a041ebca4956292cadfb14a63ace3a9c1dcb0a3 upstream.
+
+It passes the attr struct to the security_perf_event_open() but it's
+not initialized yet.
+
+Fixes: da97e18458fb ("perf_event: Add support for LSM and SELinux checks")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20221220223140.4020470-1-namhyung@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/events/core.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -12215,12 +12215,12 @@ SYSCALL_DEFINE5(perf_event_open,
+       if (flags & ~PERF_FLAG_ALL)
+               return -EINVAL;
+ 
+-      /* Do we allow access to perf_event_open(2) ? */
+-      err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
++      err = perf_copy_attr(attr_uptr, &attr);
+       if (err)
+               return err;
+ 
+-      err = perf_copy_attr(attr_uptr, &attr);
++      /* Do we allow access to perf_event_open(2) ? */
++      err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
+       if (err)
+               return err;
+ 
diff --git a/queue-5.15/series b/queue-5.15/series

index 5e42be2602763c08fb4c1f94077425e0871f6286..fe6e39f5bae578455f8482183b3a069727b29c85 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -75,3 +75,19 @@ cpufreq-init-completion-before-kobject_init_and_add.patch
  rtmutex-add-acquire-semantics-for-rtmutex-lock-acqui.patch
  alsa-patch_realtek-fix-dell-inspiron-plus-16.patch
  alsa-hda-realtek-apply-dual-codec-fixup-for-dell-lat.patch
+fs-dlm-fix-sock-release-if-listen-fails.patch
+fs-dlm-retry-accept-until-eagain-or-error-returns.patch
+mptcp-mark-ops-structures-as-ro_after_init.patch
+mptcp-remove-mptcp-ifdef-in-tcp-syn-cookies.patch
+dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch
+dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch
+dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch
+dm-thin-resume-even-if-in-fail-mode.patch
+dm-thin-fix-uaf-in-run_timer_softirq.patch
+dm-integrity-fix-uaf-in-dm_integrity_dtr.patch
+dm-clone-fix-uaf-in-clone_dtr.patch
+dm-cache-fix-uaf-in-destroy.patch
+dm-cache-set-needs_check-flag-after-aborting-metadata.patch
+tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch
+perf-core-call-lsm-hook-after-copying-perf_event_attr.patch
+of-kexec-fix-reading-32-bit-linux-initrd-start-end-values.patch
diff --git a/queue-5.15/tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch b/queue-5.15/tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch

new file mode 100644 (file)

index 0000000..1dda693
--- /dev/null
+++ b/queue-5.15/tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch
@@ -0,0 +1,153 @@
+From 82470f7d9044842618c847a7166de2b7458157a7 Mon Sep 17 00:00:00 2001
+From: Zheng Yejian <zhengyejian1@huawei.com>
+Date: Wed, 7 Dec 2022 11:51:43 +0800
+Subject: tracing/hist: Fix out-of-bound write on 'action_data.var_ref_idx'
+
+From: Zheng Yejian <zhengyejian1@huawei.com>
+
+commit 82470f7d9044842618c847a7166de2b7458157a7 upstream.
+
+When generate a synthetic event with many params and then create a trace
+action for it [1], kernel panic happened [2].
+
+It is because that in trace_action_create() 'data->n_params' is up to
+SYNTH_FIELDS_MAX (current value is 64), and array 'data->var_ref_idx'
+keeps indices into array 'hist_data->var_refs' for each synthetic event
+param, but the length of 'data->var_ref_idx' is TRACING_MAP_VARS_MAX
+(current value is 16), so out-of-bound write happened when 'data->n_params'
+more than 16. In this case, 'data->match_data.event' is overwritten and
+eventually cause the panic.
+
+To solve the issue, adjust the length of 'data->var_ref_idx' to be
+SYNTH_FIELDS_MAX and add sanity checks to avoid out-of-bound write.
+
+[1]
+ # cd /sys/kernel/tracing/
+ # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\
+int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\
+int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\
+int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\
+int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\
+int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\
+int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\
+int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\
+int v63" >> synthetic_events
+ # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="bash"' >> \
+events/sched/sched_waking/trigger
+ # echo "hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
+pid,pid,pid,pid,pid,pid,pid,pid,pid)" >> events/sched/sched_switch/trigger
+
+[2]
+BUG: unable to handle page fault for address: ffff91c900000000
+PGD 61001067 P4D 61001067 PUD 0
+Oops: 0000 [#1] PREEMPT SMP NOPTI
+CPU: 2 PID: 322 Comm: bash Tainted: G        W          6.1.0-rc8+ #229
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
+RIP: 0010:strcmp+0xc/0x30
+Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee
+c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14
+07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3
+RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246
+RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000
+RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000
+RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000
+R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580
+R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538
+FS:  00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000)
+knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0
+Call Trace:
+ <TASK>
+ __find_event_file+0x55/0x90
+ action_create+0x76c/0x1060
+ event_hist_trigger_parse+0x146d/0x2060
+ ? event_trigger_write+0x31/0xd0
+ trigger_process_regex+0xbb/0x110
+ event_trigger_write+0x6b/0xd0
+ vfs_write+0xc8/0x3e0
+ ? alloc_fd+0xc0/0x160
+ ? preempt_count_add+0x4d/0xa0
+ ? preempt_count_add+0x70/0xa0
+ ksys_write+0x5f/0xe0
+ do_syscall_64+0x3b/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f1d1d0cf077
+Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e
+fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00
+f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74
+RSP: 002b:00007ffcebb0e568 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000000143 RCX: 00007f1d1d0cf077
+RDX: 0000000000000143 RSI: 00005639265aa7e0 RDI: 0000000000000001
+RBP: 00005639265aa7e0 R08: 000000000000000a R09: 0000000000000142
+R10: 000056392639c017 R11: 0000000000000246 R12: 0000000000000143
+R13: 00007f1d1d1ae6a0 R14: 00007f1d1d1aa4a0 R15: 00007f1d1d1a98a0
+ </TASK>
+Modules linked in:
+CR2: ffff91c900000000
+---[ end trace 0000000000000000 ]---
+RIP: 0010:strcmp+0xc/0x30
+Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee
+c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14
+07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3
+RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246
+RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000
+RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000
+RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000
+R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580
+R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538
+FS:  00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000)
+knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0
+
+Link: https://lore.kernel.org/linux-trace-kernel/20221207035143.2278781-1-zhengyejian1@huawei.com
+
+Cc: <mhiramat@kernel.org>
+Cc: <zanussi@kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: d380dcde9a07 ("tracing: Fix now invalid var_ref_vals assumption in trace action")
+Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events_hist.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -452,7 +452,7 @@ struct action_data {
+        * event param, and is passed to the synthetic event
+        * invocation.
+        */
+-      unsigned int            var_ref_idx[TRACING_MAP_VARS_MAX];
++      unsigned int            var_ref_idx[SYNTH_FIELDS_MAX];
+       struct synth_event      *synth_event;
+       bool                    use_trace_keyword;
+       char                    *synth_event_name;
+@@ -1895,7 +1895,9 @@ static struct hist_field *create_var_ref
+                       return ref_field;
+               }
+       }
+-
++      /* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */
++      if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX)
++              return NULL;
+       ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
+       if (ref_field) {
+               if (init_var_ref(ref_field, var_field, system, event_name)) {
+@@ -3524,6 +3526,10 @@ static int trace_action_create(struct hi
+ 
+       lockdep_assert_held(&event_mutex);
+ 
++      /* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */
++      if (data->n_params > SYNTH_FIELDS_MAX)
++              return -EINVAL;
++
+       if (data->use_trace_keyword)
+               synth_event_name = data->synth_event_name;
+       else
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Jan 2023 14:02:26 +0000 (15:02 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Jan 2023 14:02:26 +0000 (15:02 +0100)
queue-5.15/dm-cache-fix-abba-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-cache-fix-uaf-in-destroy.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-cache-set-needs_check-flag-after-aborting-metadata.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-clone-fix-uaf-in-clone_dtr.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-integrity-fix-uaf-in-dm_integrity_dtr.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-thin-fix-abba-deadlock-between-shrink_slab-and-dm_pool_abort_metadata.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-thin-fix-uaf-in-run_timer_softirq.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-thin-resume-even-if-in-fail-mode.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-thin-use-last-transaction-s-pmd-root-when-commit-failed.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/fs-dlm-fix-sock-release-if-listen-fails.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/fs-dlm-retry-accept-until-eagain-or-error-returns.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mptcp-mark-ops-structures-as-ro_after_init.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mptcp-remove-mptcp-ifdef-in-tcp-syn-cookies.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/of-kexec-fix-reading-32-bit-linux-initrd-start-end-values.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/perf-core-call-lsm-hook-after-copying-perf_event_attr.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history
queue-5.15/tracing-hist-fix-out-of-bound-write-on-action_data.var_ref_idx.patch	[new file with mode: 0644]	patch \| blob