]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfs: prevent gc from picking the same zone twice
authorChristoph Hellwig <hch@lst.de>
Thu, 23 Oct 2025 15:17:02 +0000 (17:17 +0200)
committerCarlos Maiolino <cem@kernel.org>
Fri, 31 Oct 2025 11:06:03 +0000 (12:06 +0100)
When we are picking a zone for gc it might already be in the pipeline
which can lead to us moving the same data twice resulting in in write
amplification and a very unfortunate case where we keep on garbage
collecting the zone we just filled with migrated data stopping all
forward progress.

Fix this by introducing a count of on-going GC operations on a zone, and
skip any zone with ongoing GC when picking a new victim.

Fixes: 080d01c41 ("xfs: implement zoned garbage collection")
Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Co-developed-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
fs/xfs/libxfs/xfs_rtgroup.h
fs/xfs/xfs_zone_gc.c

index d36a6ae0abe57fafc67048fd7546f61e068caf20..d4fcf591e63d08c9989439324499fbda8146a722 100644 (file)
@@ -50,6 +50,12 @@ struct xfs_rtgroup {
                uint8_t                 *rtg_rsum_cache;
                struct xfs_open_zone    *rtg_open_zone;
        };
+
+       /*
+        * Count of outstanding GC operations for zoned XFS.  Any RTG with a
+        * non-zero rtg_gccount will not be picked as new GC victim.
+        */
+       atomic_t                rtg_gccount;
 };
 
 /*
index 109877d9a6bf9e62d58f1846a89c8f7a4eab8937..4ade5444553207bf35d2f64e57f76ec402f52e7c 100644 (file)
@@ -114,6 +114,8 @@ struct xfs_gc_bio {
        /* Open Zone being written to */
        struct xfs_open_zone            *oz;
 
+       struct xfs_rtgroup              *victim_rtg;
+
        /* Bio used for reads and writes, including the bvec used by it */
        struct bio_vec                  bv;
        struct bio                      bio;    /* must be last */
@@ -264,6 +266,7 @@ xfs_zone_gc_iter_init(
        iter->rec_count = 0;
        iter->rec_idx = 0;
        iter->victim_rtg = victim_rtg;
+       atomic_inc(&victim_rtg->rtg_gccount);
 }
 
 /*
@@ -362,6 +365,7 @@ xfs_zone_gc_query(
 
        return 0;
 done:
+       atomic_dec(&iter->victim_rtg->rtg_gccount);
        xfs_rtgroup_rele(iter->victim_rtg);
        iter->victim_rtg = NULL;
        return 0;
@@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from(
                if (!rtg)
                        continue;
 
+               /*
+                * If the zone is already undergoing GC, don't pick it again.
+                *
+                * This prevents us from picking one of the zones for which we
+                * already submitted GC I/O, but for which the remapping hasn't
+                * concluded yet.  This won't cause data corruption, but
+                * increases write amplification and slows down GC, so this is
+                * a bad thing.
+                */
+               if (atomic_read(&rtg->rtg_gccount)) {
+                       xfs_rtgroup_rele(rtg);
+                       continue;
+               }
+
                /* skip zones that are just waiting for a reset */
                if (rtg_rmap(rtg)->i_used_blocks == 0 ||
                    rtg_rmap(rtg)->i_used_blocks >= victim_used) {
@@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk(
        chunk->scratch = &data->scratch[data->scratch_idx];
        chunk->data = data;
        chunk->oz = oz;
+       chunk->victim_rtg = iter->victim_rtg;
+       atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
+       atomic_inc(&chunk->victim_rtg->rtg_gccount);
 
        bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
        bio->bi_end_io = xfs_zone_gc_end_io;
@@ -710,6 +731,8 @@ static void
 xfs_zone_gc_free_chunk(
        struct xfs_gc_bio       *chunk)
 {
+       atomic_dec(&chunk->victim_rtg->rtg_gccount);
+       xfs_rtgroup_rele(chunk->victim_rtg);
        list_del(&chunk->entry);
        xfs_open_zone_put(chunk->oz);
        xfs_irele(chunk->ip);
@@ -770,6 +793,10 @@ xfs_zone_gc_split_write(
        split_chunk->oz = chunk->oz;
        atomic_inc(&chunk->oz->oz_ref);
 
+       split_chunk->victim_rtg = chunk->victim_rtg;
+       atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
+       atomic_inc(&chunk->victim_rtg->rtg_gccount);
+
        chunk->offset += split_len;
        chunk->len -= split_len;
        chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len);