From: Greg Kroah-Hartman Date: Wed, 19 Jun 2024 07:07:35 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v6.1.95~62 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d916b86ee19b3d424ff2057d9dfcda21bb615de3;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch --- diff --git a/queue-6.6/btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch b/queue-6.6/btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch new file mode 100644 index 00000000000..60de9779be1 --- /dev/null +++ b/queue-6.6/btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch @@ -0,0 +1,116 @@ +From 87463f7e0250d471fac41e7c9c45ae21d83b5f85 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Mon, 5 Jun 2023 10:51:08 +0200 +Subject: btrfs: zoned: factor out DUP bg handling from btrfs_load_block_group_zone_info + +From: Christoph Hellwig + +commit 87463f7e0250d471fac41e7c9c45ae21d83b5f85 upstream. + +Split the code handling a type DUP block group from +btrfs_load_block_group_zone_info to make the code more readable. + +Reviewed-by: Johannes Thumshirn +Signed-off-by: Christoph Hellwig +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 79 +++++++++++++++++++++++++++++-------------------------- + 1 file changed, 42 insertions(+), 37 deletions(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1392,6 +1392,47 @@ static int btrfs_load_block_group_single + return 0; + } + ++static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, ++ struct map_lookup *map, ++ struct zone_info *zone_info, ++ unsigned long *active) ++{ ++ if (map->type & BTRFS_BLOCK_GROUP_DATA) { ++ btrfs_err(bg->fs_info, ++ "zoned: profile DUP not yet supported on data bg"); ++ return -EINVAL; ++ } ++ ++ if (zone_info[0].alloc_offset == WP_MISSING_DEV) { ++ btrfs_err(bg->fs_info, ++ "zoned: cannot recover write pointer for zone %llu", ++ zone_info[0].physical); ++ return -EIO; ++ } ++ if (zone_info[1].alloc_offset == WP_MISSING_DEV) { ++ btrfs_err(bg->fs_info, ++ "zoned: cannot recover write pointer for zone %llu", ++ zone_info[1].physical); ++ return -EIO; ++ } ++ if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { ++ btrfs_err(bg->fs_info, ++ "zoned: write pointer offset mismatch of zones in DUP profile"); ++ return -EIO; ++ } ++ ++ if (test_bit(0, active) != test_bit(1, active)) { ++ if (!btrfs_zone_activate(bg)) ++ return -EIO; ++ } else if (test_bit(0, active)) { ++ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); ++ } ++ ++ bg->alloc_offset = zone_info[0].alloc_offset; ++ bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); ++ return 0; ++} ++ + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + { + struct btrfs_fs_info *fs_info = cache->fs_info; +@@ -1481,43 +1522,7 @@ int btrfs_load_block_group_zone_info(str + ret = btrfs_load_block_group_single(cache, &zone_info[0], active); + break; + case BTRFS_BLOCK_GROUP_DUP: +- if (map->type & BTRFS_BLOCK_GROUP_DATA) { +- btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg"); +- ret = -EINVAL; +- goto out; +- } +- if (zone_info[0].alloc_offset == WP_MISSING_DEV) { +- btrfs_err(fs_info, +- "zoned: cannot recover write pointer for zone %llu", +- zone_info[0].physical); +- ret = -EIO; +- goto out; +- } +- if (zone_info[1].alloc_offset == WP_MISSING_DEV) { +- btrfs_err(fs_info, +- "zoned: cannot recover write pointer for zone %llu", +- zone_info[1].physical); +- ret = -EIO; +- goto out; +- } +- if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { +- btrfs_err(fs_info, +- "zoned: write pointer offset mismatch of zones in DUP profile"); +- ret = -EIO; +- goto out; +- } +- if (test_bit(0, active) != test_bit(1, active)) { +- if (!btrfs_zone_activate(cache)) { +- ret = -EIO; +- goto out; +- } +- } else { +- if (test_bit(0, active)) +- set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, +- &cache->runtime_flags); +- } +- cache->alloc_offset = zone_info[0].alloc_offset; +- cache->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); ++ ret = btrfs_load_block_group_dup(cache, map, zone_info, active); + break; + case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID0: diff --git a/queue-6.6/btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch b/queue-6.6/btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch new file mode 100644 index 00000000000..4841ca60837 --- /dev/null +++ b/queue-6.6/btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch @@ -0,0 +1,235 @@ +From 09a46725cc84165af452d978a3532d6b97a28796 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Mon, 5 Jun 2023 10:51:06 +0200 +Subject: btrfs: zoned: factor out per-zone logic from btrfs_load_block_group_zone_info + +From: Christoph Hellwig + +commit 09a46725cc84165af452d978a3532d6b97a28796 upstream. + +Split out a helper for the body of the per-zone loop in +btrfs_load_block_group_zone_info to make the function easier to read and +modify. + +Reviewed-by: Johannes Thumshirn +Signed-off-by: Christoph Hellwig +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 184 +++++++++++++++++++++++++++---------------------------- + 1 file changed, 92 insertions(+), 92 deletions(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1288,19 +1288,103 @@ struct zone_info { + u64 alloc_offset; + }; + ++static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, ++ struct zone_info *info, unsigned long *active, ++ struct map_lookup *map) ++{ ++ struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; ++ struct btrfs_device *device = map->stripes[zone_idx].dev; ++ int dev_replace_is_ongoing = 0; ++ unsigned int nofs_flag; ++ struct blk_zone zone; ++ int ret; ++ ++ info->physical = map->stripes[zone_idx].physical; ++ ++ if (!device->bdev) { ++ info->alloc_offset = WP_MISSING_DEV; ++ return 0; ++ } ++ ++ /* Consider a zone as active if we can allow any number of active zones. */ ++ if (!device->zone_info->max_active_zones) ++ __set_bit(zone_idx, active); ++ ++ if (!btrfs_dev_is_sequential(device, info->physical)) { ++ info->alloc_offset = WP_CONVENTIONAL; ++ return 0; ++ } ++ ++ /* This zone will be used for allocation, so mark this zone non-empty. */ ++ btrfs_dev_clear_zone_empty(device, info->physical); ++ ++ down_read(&dev_replace->rwsem); ++ dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); ++ if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) ++ btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); ++ up_read(&dev_replace->rwsem); ++ ++ /* ++ * The group is mapped to a sequential zone. Get the zone write pointer ++ * to determine the allocation offset within the zone. ++ */ ++ WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); ++ nofs_flag = memalloc_nofs_save(); ++ ret = btrfs_get_dev_zone(device, info->physical, &zone); ++ memalloc_nofs_restore(nofs_flag); ++ if (ret) { ++ if (ret != -EIO && ret != -EOPNOTSUPP) ++ return ret; ++ info->alloc_offset = WP_MISSING_DEV; ++ return 0; ++ } ++ ++ if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { ++ btrfs_err_in_rcu(fs_info, ++ "zoned: unexpected conventional zone %llu on device %s (devid %llu)", ++ zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), ++ device->devid); ++ return -EIO; ++ } ++ ++ info->capacity = (zone.capacity << SECTOR_SHIFT); ++ ++ switch (zone.cond) { ++ case BLK_ZONE_COND_OFFLINE: ++ case BLK_ZONE_COND_READONLY: ++ btrfs_err(fs_info, ++ "zoned: offline/readonly zone %llu on device %s (devid %llu)", ++ (info->physical >> device->zone_info->zone_size_shift), ++ rcu_str_deref(device->name), device->devid); ++ info->alloc_offset = WP_MISSING_DEV; ++ break; ++ case BLK_ZONE_COND_EMPTY: ++ info->alloc_offset = 0; ++ break; ++ case BLK_ZONE_COND_FULL: ++ info->alloc_offset = info->capacity; ++ break; ++ default: ++ /* Partially used zone. */ ++ info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); ++ __set_bit(zone_idx, active); ++ break; ++ } ++ ++ return 0; ++} ++ + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + { + struct btrfs_fs_info *fs_info = cache->fs_info; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; + struct extent_map *em; + struct map_lookup *map; +- struct btrfs_device *device; + u64 logical = cache->start; + u64 length = cache->length; + struct zone_info *zone_info = NULL; + int ret; + int i; +- unsigned int nofs_flag; + unsigned long *active = NULL; + u64 last_alloc = 0; + u32 num_sequential = 0, num_conventional = 0; +@@ -1345,98 +1429,14 @@ int btrfs_load_block_group_zone_info(str + } + + for (i = 0; i < map->num_stripes; i++) { +- struct zone_info *info = &zone_info[i]; +- bool is_sequential; +- struct blk_zone zone; +- struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; +- int dev_replace_is_ongoing = 0; +- +- device = map->stripes[i].dev; +- info->physical = map->stripes[i].physical; +- +- if (device->bdev == NULL) { +- info->alloc_offset = WP_MISSING_DEV; +- continue; +- } +- +- is_sequential = btrfs_dev_is_sequential(device, info->physical); +- if (is_sequential) +- num_sequential++; +- else +- num_conventional++; +- +- /* +- * Consider a zone as active if we can allow any number of +- * active zones. +- */ +- if (!device->zone_info->max_active_zones) +- __set_bit(i, active); +- +- if (!is_sequential) { +- info->alloc_offset = WP_CONVENTIONAL; +- continue; +- } +- +- /* +- * This zone will be used for allocation, so mark this zone +- * non-empty. +- */ +- btrfs_dev_clear_zone_empty(device, info->physical); +- +- down_read(&dev_replace->rwsem); +- dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); +- if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) +- btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); +- up_read(&dev_replace->rwsem); +- +- /* +- * The group is mapped to a sequential zone. Get the zone write +- * pointer to determine the allocation offset within the zone. +- */ +- WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); +- nofs_flag = memalloc_nofs_save(); +- ret = btrfs_get_dev_zone(device, info->physical, &zone); +- memalloc_nofs_restore(nofs_flag); +- if (ret == -EIO || ret == -EOPNOTSUPP) { +- ret = 0; +- info->alloc_offset = WP_MISSING_DEV; +- continue; +- } else if (ret) { ++ ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); ++ if (ret) + goto out; +- } +- +- if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { +- btrfs_err_in_rcu(fs_info, +- "zoned: unexpected conventional zone %llu on device %s (devid %llu)", +- zone.start << SECTOR_SHIFT, +- rcu_str_deref(device->name), device->devid); +- ret = -EIO; +- goto out; +- } +- +- info->capacity = (zone.capacity << SECTOR_SHIFT); + +- switch (zone.cond) { +- case BLK_ZONE_COND_OFFLINE: +- case BLK_ZONE_COND_READONLY: +- btrfs_err(fs_info, +- "zoned: offline/readonly zone %llu on device %s (devid %llu)", +- info->physical >> device->zone_info->zone_size_shift, +- rcu_str_deref(device->name), device->devid); +- info->alloc_offset = WP_MISSING_DEV; +- break; +- case BLK_ZONE_COND_EMPTY: +- info->alloc_offset = 0; +- break; +- case BLK_ZONE_COND_FULL: +- info->alloc_offset = info->capacity; +- break; +- default: +- /* Partially used zone */ +- info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); +- __set_bit(i, active); +- break; +- } ++ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) ++ num_conventional++; ++ else ++ num_sequential++; + } + + if (num_sequential > 0) diff --git a/queue-6.6/btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch b/queue-6.6/btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch new file mode 100644 index 00000000000..e5aaed836e0 --- /dev/null +++ b/queue-6.6/btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch @@ -0,0 +1,67 @@ +From 9e0e3e74dc6928a0956f4e27e24d473c65887e96 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Mon, 5 Jun 2023 10:51:07 +0200 +Subject: btrfs: zoned: factor out single bg handling from btrfs_load_block_group_zone_info + +From: Christoph Hellwig + +commit 9e0e3e74dc6928a0956f4e27e24d473c65887e96 upstream. + +Split the code handling a type single block group from +btrfs_load_block_group_zone_info to make the code more readable. + +Reviewed-by: Johannes Thumshirn +Signed-off-by: Christoph Hellwig +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 30 +++++++++++++++++++----------- + 1 file changed, 19 insertions(+), 11 deletions(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1374,6 +1374,24 @@ static int btrfs_load_zone_info(struct b + return 0; + } + ++static int btrfs_load_block_group_single(struct btrfs_block_group *bg, ++ struct zone_info *info, ++ unsigned long *active) ++{ ++ if (info->alloc_offset == WP_MISSING_DEV) { ++ btrfs_err(bg->fs_info, ++ "zoned: cannot recover write pointer for zone %llu", ++ info->physical); ++ return -EIO; ++ } ++ ++ bg->alloc_offset = info->alloc_offset; ++ bg->zone_capacity = info->capacity; ++ if (test_bit(0, active)) ++ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); ++ return 0; ++} ++ + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + { + struct btrfs_fs_info *fs_info = cache->fs_info; +@@ -1460,17 +1478,7 @@ int btrfs_load_block_group_zone_info(str + + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ +- if (zone_info[0].alloc_offset == WP_MISSING_DEV) { +- btrfs_err(fs_info, +- "zoned: cannot recover write pointer for zone %llu", +- zone_info[0].physical); +- ret = -EIO; +- goto out; +- } +- cache->alloc_offset = zone_info[0].alloc_offset; +- cache->zone_capacity = zone_info[0].capacity; +- if (test_bit(0, active)) +- set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); ++ ret = btrfs_load_block_group_single(cache, &zone_info[0], active); + break; + case BTRFS_BLOCK_GROUP_DUP: + if (map->type & BTRFS_BLOCK_GROUP_DATA) { diff --git a/queue-6.6/btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch b/queue-6.6/btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch new file mode 100644 index 00000000000..08fec8e6e94 --- /dev/null +++ b/queue-6.6/btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch @@ -0,0 +1,107 @@ +From 0090d6e1b210551e63cf43958dc7a1ec942cdde9 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 8 May 2024 11:51:07 +0100 +Subject: btrfs: zoned: fix use-after-free due to race with dev replace + +From: Filipe Manana + +commit 0090d6e1b210551e63cf43958dc7a1ec942cdde9 upstream. + +While loading a zone's info during creation of a block group, we can race +with a device replace operation and then trigger a use-after-free on the +device that was just replaced (source device of the replace operation). + +This happens because at btrfs_load_zone_info() we extract a device from +the chunk map into a local variable and then use the device while not +under the protection of the device replace rwsem. So if there's a device +replace operation happening when we extract the device and that device +is the source of the replace operation, we will trigger a use-after-free +if before we finish using the device the replace operation finishes and +frees the device. + +Fix this by enlarging the critical section under the protection of the +device replace rwsem so that all uses of the device are done inside the +critical section. + +CC: stable@vger.kernel.org # 6.1.x: 15c12fcc50a1: btrfs: zoned: introduce a zone_info struct in btrfs_load_block_group_zone_info +CC: stable@vger.kernel.org # 6.1.x: 09a46725cc84: btrfs: zoned: factor out per-zone logic from btrfs_load_block_group_zone_info +CC: stable@vger.kernel.org # 6.1.x: 9e0e3e74dc69: btrfs: zoned: factor out single bg handling from btrfs_load_block_group_zone_info +CC: stable@vger.kernel.org # 6.1.x: 87463f7e0250: btrfs: zoned: factor out DUP bg handling from btrfs_load_block_group_zone_info +CC: stable@vger.kernel.org # 6.1.x +Reviewed-by: Johannes Thumshirn +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1293,7 +1293,7 @@ static int btrfs_load_zone_info(struct b + struct map_lookup *map) + { + struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; +- struct btrfs_device *device = map->stripes[zone_idx].dev; ++ struct btrfs_device *device; + int dev_replace_is_ongoing = 0; + unsigned int nofs_flag; + struct blk_zone zone; +@@ -1301,7 +1301,11 @@ static int btrfs_load_zone_info(struct b + + info->physical = map->stripes[zone_idx].physical; + ++ down_read(&dev_replace->rwsem); ++ device = map->stripes[zone_idx].dev; ++ + if (!device->bdev) { ++ up_read(&dev_replace->rwsem); + info->alloc_offset = WP_MISSING_DEV; + return 0; + } +@@ -1311,6 +1315,7 @@ static int btrfs_load_zone_info(struct b + __set_bit(zone_idx, active); + + if (!btrfs_dev_is_sequential(device, info->physical)) { ++ up_read(&dev_replace->rwsem); + info->alloc_offset = WP_CONVENTIONAL; + return 0; + } +@@ -1318,11 +1323,9 @@ static int btrfs_load_zone_info(struct b + /* This zone will be used for allocation, so mark this zone non-empty. */ + btrfs_dev_clear_zone_empty(device, info->physical); + +- down_read(&dev_replace->rwsem); + dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) + btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); +- up_read(&dev_replace->rwsem); + + /* + * The group is mapped to a sequential zone. Get the zone write pointer +@@ -1333,6 +1336,7 @@ static int btrfs_load_zone_info(struct b + ret = btrfs_get_dev_zone(device, info->physical, &zone); + memalloc_nofs_restore(nofs_flag); + if (ret) { ++ up_read(&dev_replace->rwsem); + if (ret != -EIO && ret != -EOPNOTSUPP) + return ret; + info->alloc_offset = WP_MISSING_DEV; +@@ -1344,6 +1348,7 @@ static int btrfs_load_zone_info(struct b + "zoned: unexpected conventional zone %llu on device %s (devid %llu)", + zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), + device->devid); ++ up_read(&dev_replace->rwsem); + return -EIO; + } + +@@ -1371,6 +1376,8 @@ static int btrfs_load_zone_info(struct b + break; + } + ++ up_read(&dev_replace->rwsem); ++ + return 0; + } + diff --git a/queue-6.6/btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch b/queue-6.6/btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch new file mode 100644 index 00000000000..2eaf327fddf --- /dev/null +++ b/queue-6.6/btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch @@ -0,0 +1,242 @@ +From 15c12fcc50a1b12a747f8b6ec05cdb18c537a4d1 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Mon, 5 Jun 2023 10:51:05 +0200 +Subject: btrfs: zoned: introduce a zone_info struct in btrfs_load_block_group_zone_info + +From: Christoph Hellwig + +commit 15c12fcc50a1b12a747f8b6ec05cdb18c537a4d1 upstream. + +Add a new zone_info structure to hold per-zone information in +btrfs_load_block_group_zone_info and prepare for breaking out helpers +from it. + +Reviewed-by: Johannes Thumshirn +Signed-off-by: Christoph Hellwig +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 84 ++++++++++++++++++++++++------------------------------- + 1 file changed, 37 insertions(+), 47 deletions(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1282,6 +1282,12 @@ out: + return ret; + } + ++struct zone_info { ++ u64 physical; ++ u64 capacity; ++ u64 alloc_offset; ++}; ++ + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + { + struct btrfs_fs_info *fs_info = cache->fs_info; +@@ -1291,12 +1297,10 @@ int btrfs_load_block_group_zone_info(str + struct btrfs_device *device; + u64 logical = cache->start; + u64 length = cache->length; ++ struct zone_info *zone_info = NULL; + int ret; + int i; + unsigned int nofs_flag; +- u64 *alloc_offsets = NULL; +- u64 *caps = NULL; +- u64 *physical = NULL; + unsigned long *active = NULL; + u64 last_alloc = 0; + u32 num_sequential = 0, num_conventional = 0; +@@ -1328,20 +1332,8 @@ int btrfs_load_block_group_zone_info(str + goto out; + } + +- alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS); +- if (!alloc_offsets) { +- ret = -ENOMEM; +- goto out; +- } +- +- caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS); +- if (!caps) { +- ret = -ENOMEM; +- goto out; +- } +- +- physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS); +- if (!physical) { ++ zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); ++ if (!zone_info) { + ret = -ENOMEM; + goto out; + } +@@ -1353,20 +1345,21 @@ int btrfs_load_block_group_zone_info(str + } + + for (i = 0; i < map->num_stripes; i++) { ++ struct zone_info *info = &zone_info[i]; + bool is_sequential; + struct blk_zone zone; + struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; + int dev_replace_is_ongoing = 0; + + device = map->stripes[i].dev; +- physical[i] = map->stripes[i].physical; ++ info->physical = map->stripes[i].physical; + + if (device->bdev == NULL) { +- alloc_offsets[i] = WP_MISSING_DEV; ++ info->alloc_offset = WP_MISSING_DEV; + continue; + } + +- is_sequential = btrfs_dev_is_sequential(device, physical[i]); ++ is_sequential = btrfs_dev_is_sequential(device, info->physical); + if (is_sequential) + num_sequential++; + else +@@ -1380,7 +1373,7 @@ int btrfs_load_block_group_zone_info(str + __set_bit(i, active); + + if (!is_sequential) { +- alloc_offsets[i] = WP_CONVENTIONAL; ++ info->alloc_offset = WP_CONVENTIONAL; + continue; + } + +@@ -1388,25 +1381,25 @@ int btrfs_load_block_group_zone_info(str + * This zone will be used for allocation, so mark this zone + * non-empty. + */ +- btrfs_dev_clear_zone_empty(device, physical[i]); ++ btrfs_dev_clear_zone_empty(device, info->physical); + + down_read(&dev_replace->rwsem); + dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) +- btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]); ++ btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); + up_read(&dev_replace->rwsem); + + /* + * The group is mapped to a sequential zone. Get the zone write + * pointer to determine the allocation offset within the zone. + */ +- WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size)); ++ WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); + nofs_flag = memalloc_nofs_save(); +- ret = btrfs_get_dev_zone(device, physical[i], &zone); ++ ret = btrfs_get_dev_zone(device, info->physical, &zone); + memalloc_nofs_restore(nofs_flag); + if (ret == -EIO || ret == -EOPNOTSUPP) { + ret = 0; +- alloc_offsets[i] = WP_MISSING_DEV; ++ info->alloc_offset = WP_MISSING_DEV; + continue; + } else if (ret) { + goto out; +@@ -1421,27 +1414,26 @@ int btrfs_load_block_group_zone_info(str + goto out; + } + +- caps[i] = (zone.capacity << SECTOR_SHIFT); ++ info->capacity = (zone.capacity << SECTOR_SHIFT); + + switch (zone.cond) { + case BLK_ZONE_COND_OFFLINE: + case BLK_ZONE_COND_READONLY: + btrfs_err(fs_info, + "zoned: offline/readonly zone %llu on device %s (devid %llu)", +- physical[i] >> device->zone_info->zone_size_shift, ++ info->physical >> device->zone_info->zone_size_shift, + rcu_str_deref(device->name), device->devid); +- alloc_offsets[i] = WP_MISSING_DEV; ++ info->alloc_offset = WP_MISSING_DEV; + break; + case BLK_ZONE_COND_EMPTY: +- alloc_offsets[i] = 0; ++ info->alloc_offset = 0; + break; + case BLK_ZONE_COND_FULL: +- alloc_offsets[i] = caps[i]; ++ info->alloc_offset = info->capacity; + break; + default: + /* Partially used zone */ +- alloc_offsets[i] = +- ((zone.wp - zone.start) << SECTOR_SHIFT); ++ info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); + __set_bit(i, active); + break; + } +@@ -1468,15 +1460,15 @@ int btrfs_load_block_group_zone_info(str + + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ +- if (alloc_offsets[0] == WP_MISSING_DEV) { ++ if (zone_info[0].alloc_offset == WP_MISSING_DEV) { + btrfs_err(fs_info, + "zoned: cannot recover write pointer for zone %llu", +- physical[0]); ++ zone_info[0].physical); + ret = -EIO; + goto out; + } +- cache->alloc_offset = alloc_offsets[0]; +- cache->zone_capacity = caps[0]; ++ cache->alloc_offset = zone_info[0].alloc_offset; ++ cache->zone_capacity = zone_info[0].capacity; + if (test_bit(0, active)) + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); + break; +@@ -1486,21 +1478,21 @@ int btrfs_load_block_group_zone_info(str + ret = -EINVAL; + goto out; + } +- if (alloc_offsets[0] == WP_MISSING_DEV) { ++ if (zone_info[0].alloc_offset == WP_MISSING_DEV) { + btrfs_err(fs_info, + "zoned: cannot recover write pointer for zone %llu", +- physical[0]); ++ zone_info[0].physical); + ret = -EIO; + goto out; + } +- if (alloc_offsets[1] == WP_MISSING_DEV) { ++ if (zone_info[1].alloc_offset == WP_MISSING_DEV) { + btrfs_err(fs_info, + "zoned: cannot recover write pointer for zone %llu", +- physical[1]); ++ zone_info[1].physical); + ret = -EIO; + goto out; + } +- if (alloc_offsets[0] != alloc_offsets[1]) { ++ if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { + btrfs_err(fs_info, + "zoned: write pointer offset mismatch of zones in DUP profile"); + ret = -EIO; +@@ -1516,8 +1508,8 @@ int btrfs_load_block_group_zone_info(str + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, + &cache->runtime_flags); + } +- cache->alloc_offset = alloc_offsets[0]; +- cache->zone_capacity = min(caps[0], caps[1]); ++ cache->alloc_offset = zone_info[0].alloc_offset; ++ cache->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); + break; + case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID0: +@@ -1570,9 +1562,7 @@ out: + cache->physical_map = NULL; + } + bitmap_free(active); +- kfree(physical); +- kfree(caps); +- kfree(alloc_offsets); ++ kfree(zone_info); + free_extent_map(em); + + return ret; diff --git a/queue-6.6/series b/queue-6.6/series index ecad7ec48e6..95420a18c2d 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -233,3 +233,8 @@ intel_th-pci-add-sapphire-rapids-soc-support.patch intel_th-pci-add-meteor-lake-s-support.patch intel_th-pci-add-lunar-lake-support.patch pmdomain-ti-sci-fix-duplicate-pd-referrals.patch +btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch +btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch +btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch +btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch +btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch