]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jun 2024 07:07:35 +0000 (09:07 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jun 2024 07:07:35 +0000 (09:07 +0200)
added patches:
btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch
btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch
btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch
btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch
btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch

queue-6.6/btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch [new file with mode: 0644]
queue-6.6/btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch [new file with mode: 0644]
queue-6.6/btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch [new file with mode: 0644]
queue-6.6/btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch [new file with mode: 0644]
queue-6.6/btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch b/queue-6.6/btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch
new file mode 100644 (file)
index 0000000..60de977
--- /dev/null
@@ -0,0 +1,116 @@
+From 87463f7e0250d471fac41e7c9c45ae21d83b5f85 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 5 Jun 2023 10:51:08 +0200
+Subject: btrfs: zoned: factor out DUP bg handling from btrfs_load_block_group_zone_info
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 87463f7e0250d471fac41e7c9c45ae21d83b5f85 upstream.
+
+Split the code handling a type DUP block group from
+btrfs_load_block_group_zone_info to make the code more readable.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |   79 +++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 42 insertions(+), 37 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1392,6 +1392,47 @@ static int btrfs_load_block_group_single
+       return 0;
+ }
++static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
++                                    struct map_lookup *map,
++                                    struct zone_info *zone_info,
++                                    unsigned long *active)
++{
++      if (map->type & BTRFS_BLOCK_GROUP_DATA) {
++              btrfs_err(bg->fs_info,
++                        "zoned: profile DUP not yet supported on data bg");
++              return -EINVAL;
++      }
++
++      if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
++              btrfs_err(bg->fs_info,
++                        "zoned: cannot recover write pointer for zone %llu",
++                        zone_info[0].physical);
++              return -EIO;
++      }
++      if (zone_info[1].alloc_offset == WP_MISSING_DEV) {
++              btrfs_err(bg->fs_info,
++                        "zoned: cannot recover write pointer for zone %llu",
++                        zone_info[1].physical);
++              return -EIO;
++      }
++      if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
++              btrfs_err(bg->fs_info,
++                        "zoned: write pointer offset mismatch of zones in DUP profile");
++              return -EIO;
++      }
++
++      if (test_bit(0, active) != test_bit(1, active)) {
++              if (!btrfs_zone_activate(bg))
++                      return -EIO;
++      } else if (test_bit(0, active)) {
++              set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
++      }
++
++      bg->alloc_offset = zone_info[0].alloc_offset;
++      bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
++      return 0;
++}
++
+ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
+ {
+       struct btrfs_fs_info *fs_info = cache->fs_info;
+@@ -1481,43 +1522,7 @@ int btrfs_load_block_group_zone_info(str
+               ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
+               break;
+       case BTRFS_BLOCK_GROUP_DUP:
+-              if (map->type & BTRFS_BLOCK_GROUP_DATA) {
+-                      btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg");
+-                      ret = -EINVAL;
+-                      goto out;
+-              }
+-              if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
+-                      btrfs_err(fs_info,
+-                      "zoned: cannot recover write pointer for zone %llu",
+-                              zone_info[0].physical);
+-                      ret = -EIO;
+-                      goto out;
+-              }
+-              if (zone_info[1].alloc_offset == WP_MISSING_DEV) {
+-                      btrfs_err(fs_info,
+-                      "zoned: cannot recover write pointer for zone %llu",
+-                              zone_info[1].physical);
+-                      ret = -EIO;
+-                      goto out;
+-              }
+-              if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
+-                      btrfs_err(fs_info,
+-                      "zoned: write pointer offset mismatch of zones in DUP profile");
+-                      ret = -EIO;
+-                      goto out;
+-              }
+-              if (test_bit(0, active) != test_bit(1, active)) {
+-                      if (!btrfs_zone_activate(cache)) {
+-                              ret = -EIO;
+-                              goto out;
+-                      }
+-              } else {
+-                      if (test_bit(0, active))
+-                              set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
+-                                      &cache->runtime_flags);
+-              }
+-              cache->alloc_offset = zone_info[0].alloc_offset;
+-              cache->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
++              ret = btrfs_load_block_group_dup(cache, map, zone_info, active);
+               break;
+       case BTRFS_BLOCK_GROUP_RAID1:
+       case BTRFS_BLOCK_GROUP_RAID0:
diff --git a/queue-6.6/btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch b/queue-6.6/btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch
new file mode 100644 (file)
index 0000000..4841ca6
--- /dev/null
@@ -0,0 +1,235 @@
+From 09a46725cc84165af452d978a3532d6b97a28796 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 5 Jun 2023 10:51:06 +0200
+Subject: btrfs: zoned: factor out per-zone logic from btrfs_load_block_group_zone_info
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 09a46725cc84165af452d978a3532d6b97a28796 upstream.
+
+Split out a helper for the body of the per-zone loop in
+btrfs_load_block_group_zone_info to make the function easier to read and
+modify.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |  184 +++++++++++++++++++++++++++----------------------------
+ 1 file changed, 92 insertions(+), 92 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1288,19 +1288,103 @@ struct zone_info {
+       u64 alloc_offset;
+ };
++static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
++                              struct zone_info *info, unsigned long *active,
++                              struct map_lookup *map)
++{
++      struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
++      struct btrfs_device *device = map->stripes[zone_idx].dev;
++      int dev_replace_is_ongoing = 0;
++      unsigned int nofs_flag;
++      struct blk_zone zone;
++      int ret;
++
++      info->physical = map->stripes[zone_idx].physical;
++
++      if (!device->bdev) {
++              info->alloc_offset = WP_MISSING_DEV;
++              return 0;
++      }
++
++      /* Consider a zone as active if we can allow any number of active zones. */
++      if (!device->zone_info->max_active_zones)
++              __set_bit(zone_idx, active);
++
++      if (!btrfs_dev_is_sequential(device, info->physical)) {
++              info->alloc_offset = WP_CONVENTIONAL;
++              return 0;
++      }
++
++      /* This zone will be used for allocation, so mark this zone non-empty. */
++      btrfs_dev_clear_zone_empty(device, info->physical);
++
++      down_read(&dev_replace->rwsem);
++      dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
++      if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
++              btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical);
++      up_read(&dev_replace->rwsem);
++
++      /*
++       * The group is mapped to a sequential zone. Get the zone write pointer
++       * to determine the allocation offset within the zone.
++       */
++      WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size));
++      nofs_flag = memalloc_nofs_save();
++      ret = btrfs_get_dev_zone(device, info->physical, &zone);
++      memalloc_nofs_restore(nofs_flag);
++      if (ret) {
++              if (ret != -EIO && ret != -EOPNOTSUPP)
++                      return ret;
++              info->alloc_offset = WP_MISSING_DEV;
++              return 0;
++      }
++
++      if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
++              btrfs_err_in_rcu(fs_info,
++              "zoned: unexpected conventional zone %llu on device %s (devid %llu)",
++                      zone.start << SECTOR_SHIFT, rcu_str_deref(device->name),
++                      device->devid);
++              return -EIO;
++      }
++
++      info->capacity = (zone.capacity << SECTOR_SHIFT);
++
++      switch (zone.cond) {
++      case BLK_ZONE_COND_OFFLINE:
++      case BLK_ZONE_COND_READONLY:
++              btrfs_err(fs_info,
++              "zoned: offline/readonly zone %llu on device %s (devid %llu)",
++                        (info->physical >> device->zone_info->zone_size_shift),
++                        rcu_str_deref(device->name), device->devid);
++              info->alloc_offset = WP_MISSING_DEV;
++              break;
++      case BLK_ZONE_COND_EMPTY:
++              info->alloc_offset = 0;
++              break;
++      case BLK_ZONE_COND_FULL:
++              info->alloc_offset = info->capacity;
++              break;
++      default:
++              /* Partially used zone. */
++              info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT);
++              __set_bit(zone_idx, active);
++              break;
++      }
++
++      return 0;
++}
++
+ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
+ {
+       struct btrfs_fs_info *fs_info = cache->fs_info;
+       struct extent_map_tree *em_tree = &fs_info->mapping_tree;
+       struct extent_map *em;
+       struct map_lookup *map;
+-      struct btrfs_device *device;
+       u64 logical = cache->start;
+       u64 length = cache->length;
+       struct zone_info *zone_info = NULL;
+       int ret;
+       int i;
+-      unsigned int nofs_flag;
+       unsigned long *active = NULL;
+       u64 last_alloc = 0;
+       u32 num_sequential = 0, num_conventional = 0;
+@@ -1345,98 +1429,14 @@ int btrfs_load_block_group_zone_info(str
+       }
+       for (i = 0; i < map->num_stripes; i++) {
+-              struct zone_info *info = &zone_info[i];
+-              bool is_sequential;
+-              struct blk_zone zone;
+-              struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+-              int dev_replace_is_ongoing = 0;
+-
+-              device = map->stripes[i].dev;
+-              info->physical = map->stripes[i].physical;
+-
+-              if (device->bdev == NULL) {
+-                      info->alloc_offset = WP_MISSING_DEV;
+-                      continue;
+-              }
+-
+-              is_sequential = btrfs_dev_is_sequential(device, info->physical);
+-              if (is_sequential)
+-                      num_sequential++;
+-              else
+-                      num_conventional++;
+-
+-              /*
+-               * Consider a zone as active if we can allow any number of
+-               * active zones.
+-               */
+-              if (!device->zone_info->max_active_zones)
+-                      __set_bit(i, active);
+-
+-              if (!is_sequential) {
+-                      info->alloc_offset = WP_CONVENTIONAL;
+-                      continue;
+-              }
+-
+-              /*
+-               * This zone will be used for allocation, so mark this zone
+-               * non-empty.
+-               */
+-              btrfs_dev_clear_zone_empty(device, info->physical);
+-
+-              down_read(&dev_replace->rwsem);
+-              dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
+-              if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
+-                      btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical);
+-              up_read(&dev_replace->rwsem);
+-
+-              /*
+-               * The group is mapped to a sequential zone. Get the zone write
+-               * pointer to determine the allocation offset within the zone.
+-               */
+-              WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size));
+-              nofs_flag = memalloc_nofs_save();
+-              ret = btrfs_get_dev_zone(device, info->physical, &zone);
+-              memalloc_nofs_restore(nofs_flag);
+-              if (ret == -EIO || ret == -EOPNOTSUPP) {
+-                      ret = 0;
+-                      info->alloc_offset = WP_MISSING_DEV;
+-                      continue;
+-              } else if (ret) {
++              ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map);
++              if (ret)
+                       goto out;
+-              }
+-
+-              if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
+-                      btrfs_err_in_rcu(fs_info,
+-      "zoned: unexpected conventional zone %llu on device %s (devid %llu)",
+-                              zone.start << SECTOR_SHIFT,
+-                              rcu_str_deref(device->name), device->devid);
+-                      ret = -EIO;
+-                      goto out;
+-              }
+-
+-              info->capacity = (zone.capacity << SECTOR_SHIFT);
+-              switch (zone.cond) {
+-              case BLK_ZONE_COND_OFFLINE:
+-              case BLK_ZONE_COND_READONLY:
+-                      btrfs_err(fs_info,
+-              "zoned: offline/readonly zone %llu on device %s (devid %llu)",
+-                                info->physical >> device->zone_info->zone_size_shift,
+-                                rcu_str_deref(device->name), device->devid);
+-                      info->alloc_offset = WP_MISSING_DEV;
+-                      break;
+-              case BLK_ZONE_COND_EMPTY:
+-                      info->alloc_offset = 0;
+-                      break;
+-              case BLK_ZONE_COND_FULL:
+-                      info->alloc_offset = info->capacity;
+-                      break;
+-              default:
+-                      /* Partially used zone */
+-                      info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT);
+-                      __set_bit(i, active);
+-                      break;
+-              }
++              if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
++                      num_conventional++;
++              else
++                      num_sequential++;
+       }
+       if (num_sequential > 0)
diff --git a/queue-6.6/btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch b/queue-6.6/btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch
new file mode 100644 (file)
index 0000000..e5aaed8
--- /dev/null
@@ -0,0 +1,67 @@
+From 9e0e3e74dc6928a0956f4e27e24d473c65887e96 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 5 Jun 2023 10:51:07 +0200
+Subject: btrfs: zoned: factor out single bg handling from btrfs_load_block_group_zone_info
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 9e0e3e74dc6928a0956f4e27e24d473c65887e96 upstream.
+
+Split the code handling a type single block group from
+btrfs_load_block_group_zone_info to make the code more readable.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |   30 +++++++++++++++++++-----------
+ 1 file changed, 19 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1374,6 +1374,24 @@ static int btrfs_load_zone_info(struct b
+       return 0;
+ }
++static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
++                                       struct zone_info *info,
++                                       unsigned long *active)
++{
++      if (info->alloc_offset == WP_MISSING_DEV) {
++              btrfs_err(bg->fs_info,
++                      "zoned: cannot recover write pointer for zone %llu",
++                      info->physical);
++              return -EIO;
++      }
++
++      bg->alloc_offset = info->alloc_offset;
++      bg->zone_capacity = info->capacity;
++      if (test_bit(0, active))
++              set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
++      return 0;
++}
++
+ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
+ {
+       struct btrfs_fs_info *fs_info = cache->fs_info;
+@@ -1460,17 +1478,7 @@ int btrfs_load_block_group_zone_info(str
+       switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+       case 0: /* single */
+-              if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
+-                      btrfs_err(fs_info,
+-                      "zoned: cannot recover write pointer for zone %llu",
+-                              zone_info[0].physical);
+-                      ret = -EIO;
+-                      goto out;
+-              }
+-              cache->alloc_offset = zone_info[0].alloc_offset;
+-              cache->zone_capacity = zone_info[0].capacity;
+-              if (test_bit(0, active))
+-                      set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
++              ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
+               break;
+       case BTRFS_BLOCK_GROUP_DUP:
+               if (map->type & BTRFS_BLOCK_GROUP_DATA) {
diff --git a/queue-6.6/btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch b/queue-6.6/btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch
new file mode 100644 (file)
index 0000000..08fec8e
--- /dev/null
@@ -0,0 +1,107 @@
+From 0090d6e1b210551e63cf43958dc7a1ec942cdde9 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 8 May 2024 11:51:07 +0100
+Subject: btrfs: zoned: fix use-after-free due to race with dev replace
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 0090d6e1b210551e63cf43958dc7a1ec942cdde9 upstream.
+
+While loading a zone's info during creation of a block group, we can race
+with a device replace operation and then trigger a use-after-free on the
+device that was just replaced (source device of the replace operation).
+
+This happens because at btrfs_load_zone_info() we extract a device from
+the chunk map into a local variable and then use the device while not
+under the protection of the device replace rwsem. So if there's a device
+replace operation happening when we extract the device and that device
+is the source of the replace operation, we will trigger a use-after-free
+if before we finish using the device the replace operation finishes and
+frees the device.
+
+Fix this by enlarging the critical section under the protection of the
+device replace rwsem so that all uses of the device are done inside the
+critical section.
+
+CC: stable@vger.kernel.org # 6.1.x: 15c12fcc50a1: btrfs: zoned: introduce a zone_info struct in btrfs_load_block_group_zone_info
+CC: stable@vger.kernel.org # 6.1.x: 09a46725cc84: btrfs: zoned: factor out per-zone logic from btrfs_load_block_group_zone_info
+CC: stable@vger.kernel.org # 6.1.x: 9e0e3e74dc69: btrfs: zoned: factor out single bg handling from btrfs_load_block_group_zone_info
+CC: stable@vger.kernel.org # 6.1.x: 87463f7e0250: btrfs: zoned: factor out DUP bg handling from btrfs_load_block_group_zone_info
+CC: stable@vger.kernel.org # 6.1.x
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1293,7 +1293,7 @@ static int btrfs_load_zone_info(struct b
+                               struct map_lookup *map)
+ {
+       struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+-      struct btrfs_device *device = map->stripes[zone_idx].dev;
++      struct btrfs_device *device;
+       int dev_replace_is_ongoing = 0;
+       unsigned int nofs_flag;
+       struct blk_zone zone;
+@@ -1301,7 +1301,11 @@ static int btrfs_load_zone_info(struct b
+       info->physical = map->stripes[zone_idx].physical;
++      down_read(&dev_replace->rwsem);
++      device = map->stripes[zone_idx].dev;
++
+       if (!device->bdev) {
++              up_read(&dev_replace->rwsem);
+               info->alloc_offset = WP_MISSING_DEV;
+               return 0;
+       }
+@@ -1311,6 +1315,7 @@ static int btrfs_load_zone_info(struct b
+               __set_bit(zone_idx, active);
+       if (!btrfs_dev_is_sequential(device, info->physical)) {
++              up_read(&dev_replace->rwsem);
+               info->alloc_offset = WP_CONVENTIONAL;
+               return 0;
+       }
+@@ -1318,11 +1323,9 @@ static int btrfs_load_zone_info(struct b
+       /* This zone will be used for allocation, so mark this zone non-empty. */
+       btrfs_dev_clear_zone_empty(device, info->physical);
+-      down_read(&dev_replace->rwsem);
+       dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
+       if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
+               btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical);
+-      up_read(&dev_replace->rwsem);
+       /*
+        * The group is mapped to a sequential zone. Get the zone write pointer
+@@ -1333,6 +1336,7 @@ static int btrfs_load_zone_info(struct b
+       ret = btrfs_get_dev_zone(device, info->physical, &zone);
+       memalloc_nofs_restore(nofs_flag);
+       if (ret) {
++              up_read(&dev_replace->rwsem);
+               if (ret != -EIO && ret != -EOPNOTSUPP)
+                       return ret;
+               info->alloc_offset = WP_MISSING_DEV;
+@@ -1344,6 +1348,7 @@ static int btrfs_load_zone_info(struct b
+               "zoned: unexpected conventional zone %llu on device %s (devid %llu)",
+                       zone.start << SECTOR_SHIFT, rcu_str_deref(device->name),
+                       device->devid);
++              up_read(&dev_replace->rwsem);
+               return -EIO;
+       }
+@@ -1371,6 +1376,8 @@ static int btrfs_load_zone_info(struct b
+               break;
+       }
++      up_read(&dev_replace->rwsem);
++
+       return 0;
+ }
diff --git a/queue-6.6/btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch b/queue-6.6/btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch
new file mode 100644 (file)
index 0000000..2eaf327
--- /dev/null
@@ -0,0 +1,242 @@
+From 15c12fcc50a1b12a747f8b6ec05cdb18c537a4d1 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 5 Jun 2023 10:51:05 +0200
+Subject: btrfs: zoned: introduce a zone_info struct in btrfs_load_block_group_zone_info
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 15c12fcc50a1b12a747f8b6ec05cdb18c537a4d1 upstream.
+
+Add a new zone_info structure to hold per-zone information in
+btrfs_load_block_group_zone_info and prepare for breaking out helpers
+from it.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |   84 ++++++++++++++++++++++++-------------------------------
+ 1 file changed, 37 insertions(+), 47 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1282,6 +1282,12 @@ out:
+       return ret;
+ }
++struct zone_info {
++      u64 physical;
++      u64 capacity;
++      u64 alloc_offset;
++};
++
+ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
+ {
+       struct btrfs_fs_info *fs_info = cache->fs_info;
+@@ -1291,12 +1297,10 @@ int btrfs_load_block_group_zone_info(str
+       struct btrfs_device *device;
+       u64 logical = cache->start;
+       u64 length = cache->length;
++      struct zone_info *zone_info = NULL;
+       int ret;
+       int i;
+       unsigned int nofs_flag;
+-      u64 *alloc_offsets = NULL;
+-      u64 *caps = NULL;
+-      u64 *physical = NULL;
+       unsigned long *active = NULL;
+       u64 last_alloc = 0;
+       u32 num_sequential = 0, num_conventional = 0;
+@@ -1328,20 +1332,8 @@ int btrfs_load_block_group_zone_info(str
+               goto out;
+       }
+-      alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS);
+-      if (!alloc_offsets) {
+-              ret = -ENOMEM;
+-              goto out;
+-      }
+-
+-      caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS);
+-      if (!caps) {
+-              ret = -ENOMEM;
+-              goto out;
+-      }
+-
+-      physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS);
+-      if (!physical) {
++      zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS);
++      if (!zone_info) {
+               ret = -ENOMEM;
+               goto out;
+       }
+@@ -1353,20 +1345,21 @@ int btrfs_load_block_group_zone_info(str
+       }
+       for (i = 0; i < map->num_stripes; i++) {
++              struct zone_info *info = &zone_info[i];
+               bool is_sequential;
+               struct blk_zone zone;
+               struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+               int dev_replace_is_ongoing = 0;
+               device = map->stripes[i].dev;
+-              physical[i] = map->stripes[i].physical;
++              info->physical = map->stripes[i].physical;
+               if (device->bdev == NULL) {
+-                      alloc_offsets[i] = WP_MISSING_DEV;
++                      info->alloc_offset = WP_MISSING_DEV;
+                       continue;
+               }
+-              is_sequential = btrfs_dev_is_sequential(device, physical[i]);
++              is_sequential = btrfs_dev_is_sequential(device, info->physical);
+               if (is_sequential)
+                       num_sequential++;
+               else
+@@ -1380,7 +1373,7 @@ int btrfs_load_block_group_zone_info(str
+                       __set_bit(i, active);
+               if (!is_sequential) {
+-                      alloc_offsets[i] = WP_CONVENTIONAL;
++                      info->alloc_offset = WP_CONVENTIONAL;
+                       continue;
+               }
+@@ -1388,25 +1381,25 @@ int btrfs_load_block_group_zone_info(str
+                * This zone will be used for allocation, so mark this zone
+                * non-empty.
+                */
+-              btrfs_dev_clear_zone_empty(device, physical[i]);
++              btrfs_dev_clear_zone_empty(device, info->physical);
+               down_read(&dev_replace->rwsem);
+               dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
+               if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
+-                      btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]);
++                      btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical);
+               up_read(&dev_replace->rwsem);
+               /*
+                * The group is mapped to a sequential zone. Get the zone write
+                * pointer to determine the allocation offset within the zone.
+                */
+-              WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size));
++              WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size));
+               nofs_flag = memalloc_nofs_save();
+-              ret = btrfs_get_dev_zone(device, physical[i], &zone);
++              ret = btrfs_get_dev_zone(device, info->physical, &zone);
+               memalloc_nofs_restore(nofs_flag);
+               if (ret == -EIO || ret == -EOPNOTSUPP) {
+                       ret = 0;
+-                      alloc_offsets[i] = WP_MISSING_DEV;
++                      info->alloc_offset = WP_MISSING_DEV;
+                       continue;
+               } else if (ret) {
+                       goto out;
+@@ -1421,27 +1414,26 @@ int btrfs_load_block_group_zone_info(str
+                       goto out;
+               }
+-              caps[i] = (zone.capacity << SECTOR_SHIFT);
++              info->capacity = (zone.capacity << SECTOR_SHIFT);
+               switch (zone.cond) {
+               case BLK_ZONE_COND_OFFLINE:
+               case BLK_ZONE_COND_READONLY:
+                       btrfs_err(fs_info,
+               "zoned: offline/readonly zone %llu on device %s (devid %llu)",
+-                                physical[i] >> device->zone_info->zone_size_shift,
++                                info->physical >> device->zone_info->zone_size_shift,
+                                 rcu_str_deref(device->name), device->devid);
+-                      alloc_offsets[i] = WP_MISSING_DEV;
++                      info->alloc_offset = WP_MISSING_DEV;
+                       break;
+               case BLK_ZONE_COND_EMPTY:
+-                      alloc_offsets[i] = 0;
++                      info->alloc_offset = 0;
+                       break;
+               case BLK_ZONE_COND_FULL:
+-                      alloc_offsets[i] = caps[i];
++                      info->alloc_offset = info->capacity;
+                       break;
+               default:
+                       /* Partially used zone */
+-                      alloc_offsets[i] =
+-                                      ((zone.wp - zone.start) << SECTOR_SHIFT);
++                      info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT);
+                       __set_bit(i, active);
+                       break;
+               }
+@@ -1468,15 +1460,15 @@ int btrfs_load_block_group_zone_info(str
+       switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+       case 0: /* single */
+-              if (alloc_offsets[0] == WP_MISSING_DEV) {
++              if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
+                       btrfs_err(fs_info,
+                       "zoned: cannot recover write pointer for zone %llu",
+-                              physical[0]);
++                              zone_info[0].physical);
+                       ret = -EIO;
+                       goto out;
+               }
+-              cache->alloc_offset = alloc_offsets[0];
+-              cache->zone_capacity = caps[0];
++              cache->alloc_offset = zone_info[0].alloc_offset;
++              cache->zone_capacity = zone_info[0].capacity;
+               if (test_bit(0, active))
+                       set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
+               break;
+@@ -1486,21 +1478,21 @@ int btrfs_load_block_group_zone_info(str
+                       ret = -EINVAL;
+                       goto out;
+               }
+-              if (alloc_offsets[0] == WP_MISSING_DEV) {
++              if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
+                       btrfs_err(fs_info,
+                       "zoned: cannot recover write pointer for zone %llu",
+-                              physical[0]);
++                              zone_info[0].physical);
+                       ret = -EIO;
+                       goto out;
+               }
+-              if (alloc_offsets[1] == WP_MISSING_DEV) {
++              if (zone_info[1].alloc_offset == WP_MISSING_DEV) {
+                       btrfs_err(fs_info,
+                       "zoned: cannot recover write pointer for zone %llu",
+-                              physical[1]);
++                              zone_info[1].physical);
+                       ret = -EIO;
+                       goto out;
+               }
+-              if (alloc_offsets[0] != alloc_offsets[1]) {
++              if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
+                       btrfs_err(fs_info,
+                       "zoned: write pointer offset mismatch of zones in DUP profile");
+                       ret = -EIO;
+@@ -1516,8 +1508,8 @@ int btrfs_load_block_group_zone_info(str
+                               set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
+                                       &cache->runtime_flags);
+               }
+-              cache->alloc_offset = alloc_offsets[0];
+-              cache->zone_capacity = min(caps[0], caps[1]);
++              cache->alloc_offset = zone_info[0].alloc_offset;
++              cache->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
+               break;
+       case BTRFS_BLOCK_GROUP_RAID1:
+       case BTRFS_BLOCK_GROUP_RAID0:
+@@ -1570,9 +1562,7 @@ out:
+               cache->physical_map = NULL;
+       }
+       bitmap_free(active);
+-      kfree(physical);
+-      kfree(caps);
+-      kfree(alloc_offsets);
++      kfree(zone_info);
+       free_extent_map(em);
+       return ret;
index ecad7ec48e6a6a7e8e2920a4ee496823df247600..95420a18c2d9d67bdd976b2a078560ab54e1be02 100644 (file)
@@ -233,3 +233,8 @@ intel_th-pci-add-sapphire-rapids-soc-support.patch
 intel_th-pci-add-meteor-lake-s-support.patch
 intel_th-pci-add-lunar-lake-support.patch
 pmdomain-ti-sci-fix-duplicate-pd-referrals.patch
+btrfs-zoned-introduce-a-zone_info-struct-in-btrfs_load_block_group_zone_info.patch
+btrfs-zoned-factor-out-per-zone-logic-from-btrfs_load_block_group_zone_info.patch
+btrfs-zoned-factor-out-single-bg-handling-from-btrfs_load_block_group_zone_info.patch
+btrfs-zoned-factor-out-dup-bg-handling-from-btrfs_load_block_group_zone_info.patch
+btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch