]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfs: handle too many open zones when mounting
authorChristoph Hellwig <hch@lst.de>
Tue, 31 Mar 2026 15:26:06 +0000 (17:26 +0200)
committerCarlos Maiolino <cem@kernel.org>
Tue, 7 Apr 2026 11:16:59 +0000 (13:16 +0200)
When running on conventional zones or devices, the zoned allocator does
not have a real write pointer, but instead fakes it up at mount time
based on the last block recorded in the rmap.  This can create spurious
"open" zones when the last written blocks in a conventional zone are
invalidated.  Add a loop to the mount code to find the conventional zone
with the highest used block in the rmap tree and "finish" it until we
are below the open zones limit.

While we're at it, also error out if there are too many open sequential
zones, which can only happen when the user overrode the max open zones
limit (or with really buggy hardware reducing the limit, but not much
we can do about that).

Fixes: 4e4d52075577 ("xfs: add the zoned space allocator")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
fs/xfs/xfs_trace.h
fs/xfs/xfs_zone_alloc.c

index 60d1e605dfa5b5dd3e4bbf46cd441f5416a05d78..c5ad26a1d7bb25c73bae8fce7acedc7405dcec39 100644 (file)
@@ -461,6 +461,7 @@ DEFINE_EVENT(xfs_zone_alloc_class, name,                    \
 DEFINE_ZONE_ALLOC_EVENT(xfs_zone_record_blocks);
 DEFINE_ZONE_ALLOC_EVENT(xfs_zone_skip_blocks);
 DEFINE_ZONE_ALLOC_EVENT(xfs_zone_alloc_blocks);
+DEFINE_ZONE_ALLOC_EVENT(xfs_zone_spurious_open);
 
 TRACE_EVENT(xfs_zone_gc_select_victim,
        TP_PROTO(struct xfs_rtgroup *rtg, unsigned int bucket),
index e9f1d9d086202dddaddf57ce7528b08e6fdefa42..5f8b6cbeebfdb23eee09e0c73c616408bdcdb263 100644 (file)
@@ -1253,6 +1253,77 @@ xfs_report_zones(
        return 0;
 }
 
+static inline bool
+xfs_zone_is_conv(
+       struct xfs_rtgroup      *rtg)
+{
+       return !bdev_zone_is_seq(rtg_mount(rtg)->m_rtdev_targp->bt_bdev,
+                       xfs_gbno_to_daddr(rtg_group(rtg), 0));
+}
+
+static struct xfs_open_zone *
+xfs_find_fullest_conventional_open_zone(
+       struct xfs_mount        *mp)
+{
+       struct xfs_zone_info    *zi = mp->m_zone_info;
+       struct xfs_open_zone    *found = NULL, *oz;
+
+       spin_lock(&zi->zi_open_zones_lock);
+       list_for_each_entry(oz, &zi->zi_open_zones, oz_entry) {
+               if (!xfs_zone_is_conv(oz->oz_rtg))
+                       continue;
+               if (!found || oz->oz_allocated > found->oz_allocated)
+                       found = oz;
+       }
+       spin_unlock(&zi->zi_open_zones_lock);
+
+       return found;
+}
+
+/*
+ * Find the fullest conventional zones and remove them from the open zone pool
+ * until we are at the open zone limit.
+ *
+ * We can end up with spurious "open" zones when the last blocks in a fully
+ * written zone were invalidate as there is no write pointer for conventional
+ * zones.
+ *
+ * If we are still over the limit when there is no conventional open zone left,
+ * the user overrode the max open zones limit using the max_open_zones mount
+ * option we should fail.
+ */
+static int
+xfs_finish_spurious_open_zones(
+       struct xfs_mount        *mp,
+       struct xfs_init_zones   *iz)
+{
+       struct xfs_zone_info    *zi = mp->m_zone_info;
+
+       while (zi->zi_nr_open_zones > mp->m_max_open_zones) {
+               struct xfs_open_zone    *oz;
+               xfs_filblks_t           adjust;
+
+               oz = xfs_find_fullest_conventional_open_zone(mp);
+               if (!oz) {
+                       xfs_err(mp,
+"too many open zones for max_open_zones limit (%u/%u)",
+                       zi->zi_nr_open_zones, mp->m_max_open_zones);
+                       return -EINVAL;
+               }
+
+               xfs_rtgroup_lock(oz->oz_rtg, XFS_RTGLOCK_RMAP);
+               adjust = rtg_blocks(oz->oz_rtg) - oz->oz_written;
+               trace_xfs_zone_spurious_open(oz, oz->oz_written, adjust);
+               oz->oz_written = rtg_blocks(oz->oz_rtg);
+               xfs_open_zone_mark_full(oz);
+               xfs_rtgroup_unlock(oz->oz_rtg, XFS_RTGLOCK_RMAP);
+               iz->available -= adjust;
+               iz->reclaimable += adjust;
+       }
+
+       return 0;
+}
+
 int
 xfs_mount_zones(
        struct xfs_mount        *mp)
@@ -1294,6 +1365,10 @@ xfs_mount_zones(
        if (error)
                goto out_free_zone_info;
 
+       error = xfs_finish_spurious_open_zones(mp, &iz);
+       if (error)
+               goto out_free_zone_info;
+
        xfs_set_freecounter(mp, XC_FREE_RTAVAILABLE, iz.available);
        xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
                        iz.available + iz.reclaimable);