]> git.ipfire.org Git - people/ms/linux.git/commitdiff
btrfs: zoned: wait for extent buffer IOs before finishing a zone
authorNaohiro Aota <naohiro.aota@wdc.com>
Fri, 9 Sep 2022 06:59:55 +0000 (15:59 +0900)
committerDavid Sterba <dsterba@suse.com>
Tue, 13 Sep 2022 12:05:18 +0000 (14:05 +0200)
Before sending REQ_OP_ZONE_FINISH to a zone, we need to ensure that
ongoing IOs already finished. Or, we will see a "Zone Is Full" error for
the IOs, as the ZONE_FINISH command makes the zone full.

We ensure that with btrfs_wait_block_group_reservations() and
btrfs_wait_ordered_roots() for a data block group. And, for a metadata
block group, the comparison of alloc_offset vs meta_write_pointer mostly
ensures IOs for the allocated region already sent. However, there still
can be a little time frame where the IOs are sent but not yet completed.

Introduce wait_eb_writebacks() to ensure such IOs are completed for a
metadata block group. It walks the buffer_radix to find extent buffers in
the block group and calls wait_on_extent_buffer_writeback() on them.

Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
CC: stable@vger.kernel.org # 5.19+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/zoned.c

index 62e7007a7e46c6ea8bc04f4ba48638498052c11b..73c6929f7be66163d72a36d4790154ba7e882978 100644 (file)
@@ -1918,10 +1918,44 @@ out_unlock:
        return ret;
 }
 
+static void wait_eb_writebacks(struct btrfs_block_group *block_group)
+{
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
+       const u64 end = block_group->start + block_group->length;
+       struct radix_tree_iter iter;
+       struct extent_buffer *eb;
+       void __rcu **slot;
+
+       rcu_read_lock();
+       radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
+                                block_group->start >> fs_info->sectorsize_bits) {
+               eb = radix_tree_deref_slot(slot);
+               if (!eb)
+                       continue;
+               if (radix_tree_deref_retry(eb)) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
+
+               if (eb->start < block_group->start)
+                       continue;
+               if (eb->start >= end)
+                       break;
+
+               slot = radix_tree_iter_resume(slot, &iter);
+               rcu_read_unlock();
+               wait_on_extent_buffer_writeback(eb);
+               rcu_read_lock();
+       }
+       rcu_read_unlock();
+}
+
 static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
 {
        struct btrfs_fs_info *fs_info = block_group->fs_info;
        struct map_lookup *map;
+       const bool is_metadata = (block_group->flags &
+                       (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
        int ret = 0;
        int i;
 
@@ -1932,8 +1966,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
        }
 
        /* Check if we have unwritten allocated space */
-       if ((block_group->flags &
-            (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
+       if (is_metadata &&
            block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
                spin_unlock(&block_group->lock);
                return -EAGAIN;
@@ -1958,6 +1991,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
                /* No need to wait for NOCOW writers. Zoned mode does not allow that */
                btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
                                         block_group->length);
+               /* Wait for extent buffers to be written. */
+               if (is_metadata)
+                       wait_eb_writebacks(block_group);
 
                spin_lock(&block_group->lock);