]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
btrfs: zoned: introduce space_info->active_total_bytes
authorNaohiro Aota <naohiro.aota@wdc.com>
Fri, 8 Jul 2022 23:18:45 +0000 (08:18 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 17 Aug 2022 12:42:27 +0000 (14:42 +0200)
[ Upstream commit 6a921de589926a350634e6e279f43fa5b9dbf5ba ]

The active_total_bytes, like the total_bytes, accounts for the total bytes
of active block groups in the space_info.

With an introduction of active_total_bytes, we can check if the reserved
bytes can be written to the block groups without activating a new block
group. The check is necessary for metadata allocation on zoned
filesystem. We cannot finish a block group, which may require waiting
for the current transaction, from the metadata allocation context.
Instead, we need to ensure the ongoing allocation (reserved bytes) fits
in active block groups.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/btrfs/block-group.c
fs/btrfs/space-info.c
fs/btrfs/space-info.h
fs/btrfs/zoned.c

index 1deca5164c23cd561bbb0986d769a7d484ffd0ae..88f59a2e4113d3d02accba7364a2a8f4a215ffb1 100644 (file)
@@ -1033,8 +1033,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                        < block_group->zone_unusable);
                WARN_ON(block_group->space_info->disk_total
                        < block_group->length * factor);
+               WARN_ON(block_group->zone_is_active &&
+                       block_group->space_info->active_total_bytes
+                       < block_group->length);
        }
        block_group->space_info->total_bytes -= block_group->length;
+       if (block_group->zone_is_active)
+               block_group->space_info->active_total_bytes -= block_group->length;
        block_group->space_info->bytes_readonly -=
                (block_group->length - block_group->zone_unusable);
        block_group->space_info->bytes_zone_unusable -=
@@ -2102,7 +2107,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
        trace_btrfs_add_block_group(info, cache, 0);
        btrfs_update_space_info(info, cache->flags, cache->length,
                                cache->used, cache->bytes_super,
-                               cache->zone_unusable, &space_info);
+                               cache->zone_unusable, cache->zone_is_active,
+                               &space_info);
 
        cache->space_info = space_info;
 
@@ -2172,7 +2178,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
                }
 
                btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
-                                       0, 0, &space_info);
+                                       0, 0, false, &space_info);
                bg->space_info = space_info;
                link_block_group(bg);
 
@@ -2553,7 +2559,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
        trace_btrfs_add_block_group(fs_info, cache, 1);
        btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
                                cache->bytes_super, cache->zone_unusable,
-                               &cache->space_info);
+                               cache->zone_is_active, &cache->space_info);
        btrfs_update_global_block_rsv(fs_info);
 
        link_block_group(cache);
index 98a84b523be6a397c4796616de026909dcfb0dc0..4867199cf983c4eb6b4ca3f97bfbf3a02c420778 100644 (file)
@@ -295,7 +295,7 @@ out:
 void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
                             u64 total_bytes, u64 bytes_used,
                             u64 bytes_readonly, u64 bytes_zone_unusable,
-                            struct btrfs_space_info **space_info)
+                            bool active, struct btrfs_space_info **space_info)
 {
        struct btrfs_space_info *found;
        int factor;
@@ -306,6 +306,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
        ASSERT(found);
        spin_lock(&found->lock);
        found->total_bytes += total_bytes;
+       if (active)
+               found->active_total_bytes += total_bytes;
        found->disk_total += total_bytes * factor;
        found->bytes_used += bytes_used;
        found->disk_used += bytes_used * factor;
@@ -369,6 +371,22 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
        return avail;
 }
 
+static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info,
+                                      struct btrfs_space_info *space_info)
+{
+       /*
+        * On regular filesystem, all total_bytes are always writable. On zoned
+        * filesystem, there may be a limitation imposed by max_active_zones.
+        * For metadata allocation, we cannot finish an existing active block
+        * group to avoid a deadlock. Thus, we need to consider only the active
+        * groups to be writable for metadata space.
+        */
+       if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
+               return space_info->total_bytes;
+
+       return space_info->active_total_bytes;
+}
+
 int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
                         struct btrfs_space_info *space_info, u64 bytes,
                         enum btrfs_reserve_flush_enum flush)
@@ -386,7 +404,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
        else
                avail = calc_available_free_space(fs_info, space_info, flush);
 
-       if (used + bytes < space_info->total_bytes + avail)
+       if (used + bytes < writable_total_bytes(fs_info, space_info) + avail)
                return 1;
        return 0;
 }
@@ -422,7 +440,7 @@ again:
                ticket = list_first_entry(head, struct reserve_ticket, list);
 
                /* Check and see if our ticket can be satisfied now. */
-               if ((used + ticket->bytes <= space_info->total_bytes) ||
+               if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) ||
                    btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
                                         flush)) {
                        btrfs_space_info_update_bytes_may_use(fs_info,
@@ -753,6 +771,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
 {
        u64 used;
        u64 avail;
+       u64 total;
        u64 to_reclaim = space_info->reclaim_size;
 
        lockdep_assert_held(&space_info->lock);
@@ -767,8 +786,9 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
         * space.  If that's the case add in our overage so we make sure to put
         * appropriate pressure on the flushing state machine.
         */
-       if (space_info->total_bytes + avail < used)
-               to_reclaim += used - (space_info->total_bytes + avail);
+       total = writable_total_bytes(fs_info, space_info);
+       if (total + avail < used)
+               to_reclaim += used - (total + avail);
 
        return to_reclaim;
 }
@@ -778,9 +798,12 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
 {
        u64 global_rsv_size = fs_info->global_block_rsv.reserved;
        u64 ordered, delalloc;
-       u64 thresh = div_factor_fine(space_info->total_bytes, 90);
+       u64 total = writable_total_bytes(fs_info, space_info);
+       u64 thresh;
        u64 used;
 
+       thresh = div_factor_fine(total, 90);
+
        lockdep_assert_held(&space_info->lock);
 
        /* If we're just plain full then async reclaim just slows us down. */
@@ -842,8 +865,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
                                           BTRFS_RESERVE_FLUSH_ALL);
        used = space_info->bytes_used + space_info->bytes_reserved +
               space_info->bytes_readonly + global_rsv_size;
-       if (used < space_info->total_bytes)
-               thresh += space_info->total_bytes - used;
+       if (used < total)
+               thresh += total - used;
        thresh >>= space_info->clamp;
 
        used = space_info->bytes_pinned;
@@ -1560,7 +1583,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
         * can_overcommit() to ensure we can overcommit to continue.
         */
        if (!pending_tickets &&
-           ((used + orig_bytes <= space_info->total_bytes) ||
+           ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) ||
             btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
                btrfs_space_info_update_bytes_may_use(fs_info, space_info,
                                                      orig_bytes);
index 137206b8049fca3c27a02d5403850e768f60d95b..b8cee27df21347a61039e8602a6e9bbea272dd67 100644 (file)
@@ -17,6 +17,8 @@ struct btrfs_space_info {
        u64 bytes_may_use;      /* number of bytes that may be used for
                                   delalloc/allocations */
        u64 bytes_readonly;     /* total bytes that are read only */
+       /* Total bytes in the space, but only accounts active block groups. */
+       u64 active_total_bytes;
        u64 bytes_zone_unusable;        /* total bytes that are unusable until
                                           resetting the device zone */
 
@@ -122,7 +124,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
 void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
                             u64 total_bytes, u64 bytes_used,
                             u64 bytes_readonly, u64 bytes_zone_unusable,
-                            struct btrfs_space_info **space_info);
+                            bool active, struct btrfs_space_info **space_info);
 void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
                                        u64 chunk_size);
 struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
index 17068179728346f887c8efb0db3a41988f85f319..2ffc6d50d20d5bd05ee435d101f178683203d807 100644 (file)
@@ -1841,6 +1841,7 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
 bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 {
        struct btrfs_fs_info *fs_info = block_group->fs_info;
+       struct btrfs_space_info *space_info = block_group->space_info;
        struct map_lookup *map;
        struct btrfs_device *device;
        u64 physical;
@@ -1852,6 +1853,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
        map = block_group->physical_map;
 
+       spin_lock(&space_info->lock);
        spin_lock(&block_group->lock);
        if (block_group->zone_is_active) {
                ret = true;
@@ -1880,7 +1882,10 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
        /* Successfully activated all the zones */
        block_group->zone_is_active = 1;
+       space_info->active_total_bytes += block_group->length;
        spin_unlock(&block_group->lock);
+       btrfs_try_granting_tickets(fs_info, space_info);
+       spin_unlock(&space_info->lock);
 
        /* For the active block group list */
        btrfs_get_block_group(block_group);
@@ -1893,6 +1898,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
 out_unlock:
        spin_unlock(&block_group->lock);
+       spin_unlock(&space_info->lock);
        return ret;
 }