]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
btrfs: zoned: reserve zones for an active metadata/system block group
authorNaohiro Aota <naohiro.aota@wdc.com>
Mon, 7 Aug 2023 16:12:36 +0000 (01:12 +0900)
committerDavid Sterba <dsterba@suse.com>
Mon, 21 Aug 2023 12:52:19 +0000 (14:52 +0200)
Ensure a metadata and system block group can be activated on write time, by
leaving a certain number of active zones when trying to activate a data
block group.

Zones for two metadata block groups (normal and tree-log) and one system
block group are reserved, according to the profile type: two zones per
block group on the DUP profile and one zone per block group otherwise.

The reservation must be freed once a non-data block group is allocated. If
not, we over-reserve the active zones and data block group activation will
suffer. For the dynamic reservation count, we need to manage the
reservation count per device.

The reservation count variable is protected by
fs_info->zone_active_bgs_lock.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/disk-io.c
fs/btrfs/zoned.c
fs/btrfs/zoned.h

index a38d3f9a1a0391e23b5f5f21bb817f4bf6c90f2a..5022a3fd778dd8348623e7e5963c8cb54c94f04d 100644 (file)
@@ -3467,6 +3467,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 
        btrfs_free_zone_cache(fs_info);
 
+       btrfs_check_active_zone_reservation(fs_info);
+
        if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
            !btrfs_check_rw_degradable(fs_info, NULL)) {
                btrfs_warn(fs_info,
index d7188680c8c6e9630149410e7645c07846397e85..fc69041bb6b4b659295d41e570e8f05e35ea1a95 100644 (file)
@@ -1889,6 +1889,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
        struct map_lookup *map;
        struct btrfs_device *device;
        u64 physical;
+       const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA);
        bool ret;
        int i;
 
@@ -1910,19 +1911,40 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
                goto out_unlock;
        }
 
+       spin_lock(&fs_info->zone_active_bgs_lock);
        for (i = 0; i < map->num_stripes; i++) {
+               struct btrfs_zoned_device_info *zinfo;
+               int reserved = 0;
+
                device = map->stripes[i].dev;
                physical = map->stripes[i].physical;
+               zinfo = device->zone_info;
 
-               if (device->zone_info->max_active_zones == 0)
+               if (zinfo->max_active_zones == 0)
                        continue;
 
+               if (is_data)
+                       reserved = zinfo->reserved_active_zones;
+               /*
+                * For the data block group, leave active zones for one
+                * metadata block group and one system block group.
+                */
+               if (atomic_read(&zinfo->active_zones_left) <= reserved) {
+                       ret = false;
+                       spin_unlock(&fs_info->zone_active_bgs_lock);
+                       goto out_unlock;
+               }
+
                if (!btrfs_dev_set_active_zone(device, physical)) {
                        /* Cannot activate the zone */
                        ret = false;
+                       spin_unlock(&fs_info->zone_active_bgs_lock);
                        goto out_unlock;
                }
+               if (!is_data)
+                       zinfo->reserved_active_zones--;
        }
+       spin_unlock(&fs_info->zone_active_bgs_lock);
 
        /* Successfully activated all the zones */
        set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
@@ -2061,18 +2083,21 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
                const u64 physical = map->stripes[i].physical;
+               struct btrfs_zoned_device_info *zinfo = device->zone_info;
 
-               if (device->zone_info->max_active_zones == 0)
+               if (zinfo->max_active_zones == 0)
                        continue;
 
                ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
                                       physical >> SECTOR_SHIFT,
-                                      device->zone_info->zone_size >> SECTOR_SHIFT,
+                                      zinfo->zone_size >> SECTOR_SHIFT,
                                       GFP_NOFS);
 
                if (ret)
                        return ret;
 
+               if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
+                       zinfo->reserved_active_zones++;
                btrfs_dev_clear_active_zone(device, physical);
        }
 
@@ -2111,8 +2136,10 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
 
        /* Check if there is a device with active zones left */
        mutex_lock(&fs_info->chunk_mutex);
+       spin_lock(&fs_info->zone_active_bgs_lock);
        list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
                struct btrfs_zoned_device_info *zinfo = device->zone_info;
+               int reserved = 0;
 
                if (!device->bdev)
                        continue;
@@ -2122,17 +2149,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
                        break;
                }
 
+               if (flags & BTRFS_BLOCK_GROUP_DATA)
+                       reserved = zinfo->reserved_active_zones;
+
                switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
                case 0: /* single */
-                       ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+                       ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved));
                        break;
                case BTRFS_BLOCK_GROUP_DUP:
-                       ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+                       ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved));
                        break;
                }
                if (ret)
                        break;
        }
+       spin_unlock(&fs_info->zone_active_bgs_lock);
        mutex_unlock(&fs_info->chunk_mutex);
 
        if (!ret)
@@ -2374,3 +2405,55 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
 
        return 0;
 }
+
+/*
+ * Reserve zones for one metadata block group, one tree-log block group, and one
+ * system block group.
+ */
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_block_group *block_group;
+       struct btrfs_device *device;
+       /* Reserve zones for normal SINGLE metadata and tree-log block group. */
+       unsigned int metadata_reserve = 2;
+       /* Reserve a zone for SINGLE system block group. */
+       unsigned int system_reserve = 1;
+
+       if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
+               return;
+
+       /*
+        * This function is called from the mount context. So, there is no
+        * parallel process touching the bits. No need for read_seqretry().
+        */
+       if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+               metadata_reserve = 4;
+       if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+               system_reserve = 2;
+
+       /* Apply the reservation on all the devices. */
+       mutex_lock(&fs_devices->device_list_mutex);
+       list_for_each_entry(device, &fs_devices->devices, dev_list) {
+               if (!device->bdev)
+                       continue;
+
+               device->zone_info->reserved_active_zones =
+                       metadata_reserve + system_reserve;
+       }
+       mutex_unlock(&fs_devices->device_list_mutex);
+
+       /* Release reservation for currently active block groups. */
+       spin_lock(&fs_info->zone_active_bgs_lock);
+       list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+               struct map_lookup *map = block_group->physical_map;
+
+               if (!(block_group->flags &
+                     (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
+                       continue;
+
+               for (int i = 0; i < map->num_stripes; i++)
+                       map->stripes[i].dev->zone_info->reserved_active_zones--;
+       }
+       spin_unlock(&fs_info->zone_active_bgs_lock);
+}
index 74ec37a25808a8c88fa156ddba54072e076167b8..b9cec523b77842a34bcc6424181cffe86bc28418 100644 (file)
@@ -22,6 +22,11 @@ struct btrfs_zoned_device_info {
        u8  zone_size_shift;
        u32 nr_zones;
        unsigned int max_active_zones;
+       /*
+        * Reserved active zones for one metadata and one system block group.
+        * It can vary per-device depending on the allocation status.
+        */
+       int reserved_active_zones;
        atomic_t active_zones_left;
        unsigned long *seq_zones;
        unsigned long *empty_zones;
@@ -78,6 +83,7 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
 int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
 int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
                                struct btrfs_space_info *space_info, bool do_finish);
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
                                     struct blk_zone *zone)
@@ -252,6 +258,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
+static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
+
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)