xfs: cache open zone in inode->i_private

author Christoph Hellwig <hch@lst.de>

Fri, 17 Oct 2025 03:55:41 +0000 (05:55 +0200)

committer Carlos Maiolino <cem@kernel.org>

Tue, 21 Oct 2025 09:32:50 +0000 (11:32 +0200)
author Christoph Hellwig <hch@lst.de>
Fri, 17 Oct 2025 03:55:41 +0000 (05:55 +0200)
committer Carlos Maiolino <cem@kernel.org>
Tue, 21 Oct 2025 09:32:50 +0000 (11:32 +0200)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index f046d1215b043c918be5599012d9160f3d2a3d50..b871dfde372b526366f307651d9958e0375abdf7 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -236,7 +236,6 @@ typedef struct xfs_mount {
         bool                    m_update_sb;    /* sb needs update in mount */
         unsigned int            m_max_open_zones;
         unsigned int            m_zonegc_low_space;
-       struct xfs_mru_cache    *m_zone_cache;  /* Inode to open zone cache */
  
         /* max_atomic_write mount option value */
         unsigned long long      m_awu_max_bytes;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index e85a156dc17d162decc7c010a74496cb44d38d36..464ae1e657d998f38d4f9ac152e3c0b4db2b8714 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -786,6 +786,12 @@ xfs_fs_evict_inode(
  
         truncate_inode_pages_final(&inode->i_data);
         clear_inode(inode);
+
+       if (IS_ENABLED(CONFIG_XFS_RT) &&
+           S_ISREG(inode->i_mode) && inode->i_private) {
+               xfs_open_zone_put(inode->i_private);
+               inode->i_private = NULL;
+       }
  }
  
  static void
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c

index 1b462cd5d8faab60cccb0d592c39dca6e28f6135..23cdab4515bb2cb062a49fc04e8f784d72f0c4c1 100644 (file)
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -26,14 +26,22 @@
  #include "xfs_trace.h"
  #include "xfs_mru_cache.h"
  
+static void
+xfs_open_zone_free_rcu(
+       struct callback_head    *cb)
+{
+       struct xfs_open_zone    *oz = container_of(cb, typeof(*oz), oz_rcu);
+
+       xfs_rtgroup_rele(oz->oz_rtg);
+       kfree(oz);
+}
+
  void
  xfs_open_zone_put(
         struct xfs_open_zone    *oz)
  {
-       if (atomic_dec_and_test(&oz->oz_ref)) {
-               xfs_rtgroup_rele(oz->oz_rtg);
-               kfree(oz);
-       }
+       if (atomic_dec_and_test(&oz->oz_ref))
+               call_rcu(&oz->oz_rcu, xfs_open_zone_free_rcu);
  }
  
  static inline uint32_t
@@ -756,98 +764,55 @@ xfs_mark_rtg_boundary(
                 ioend->io_flags |= IOMAP_IOEND_BOUNDARY;
  }
  
-/*
- * Cache the last zone written to for an inode so that it is considered first
- * for subsequent writes.
- */
-struct xfs_zone_cache_item {
-       struct xfs_mru_cache_elem       mru;
-       struct xfs_open_zone            *oz;
-};
-
-static inline struct xfs_zone_cache_item *
-xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
-{
-       return container_of(mru, struct xfs_zone_cache_item, mru);
-}
-
-static void
-xfs_zone_cache_free_func(
-       void                            *data,
-       struct xfs_mru_cache_elem       *mru)
-{
-       struct xfs_zone_cache_item      *item = xfs_zone_cache_item(mru);
-
-       xfs_open_zone_put(item->oz);
-       kfree(item);
-}
-
  /*
   * Check if we have a cached last open zone available for the inode and
   * if yes return a reference to it.
   */
  static struct xfs_open_zone *
-xfs_cached_zone(
-       struct xfs_mount                *mp,
-       struct xfs_inode                *ip)
+xfs_get_cached_zone(
+       struct xfs_inode        *ip)
  {
-       struct xfs_mru_cache_elem       *mru;
-       struct xfs_open_zone            *oz;
+       struct xfs_open_zone    *oz;
  
-       mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
-       if (!mru)
-               return NULL;
-       oz = xfs_zone_cache_item(mru)->oz;
+       rcu_read_lock();
+       oz = VFS_I(ip)->i_private;
         if (oz) {
                 /*
                  * GC only steals open zones at mount time, so no GC zones
                  * should end up in the cache.
                  */
                 ASSERT(!oz->oz_is_gc);
-               ASSERT(atomic_read(&oz->oz_ref) > 0);
-               atomic_inc(&oz->oz_ref);
+               if (!atomic_inc_not_zero(&oz->oz_ref))
+                       oz = NULL;
         }
-       xfs_mru_cache_done(mp->m_zone_cache);
+       rcu_read_unlock();
+
         return oz;
  }
  
  /*
- * Update the last used zone cache for a given inode.
+ * Stash our zone in the inode so that is is reused for future allocations.
   *
- * The caller must have a reference on the open zone.
+ * The open_zone structure will be pinned until either the inode is freed or
+ * until the cached open zone is replaced with a different one because the
+ * current one was full when we tried to use it.  This means we keep any
+ * open zone around forever as long as any inode that used it for the last
+ * write is cached, which slightly increases the memory use of cached inodes
+ * that were every written to, but significantly simplifies the cached zone
+ * lookup.  Because the open_zone is clearly marked as full when all data
+ * in the underlying RTG was written, the caching is always safe.
   */
  static void
-xfs_zone_cache_create_association(
-       struct xfs_inode                *ip,
-       struct xfs_open_zone            *oz)
+xfs_set_cached_zone(
+       struct xfs_inode        *ip,
+       struct xfs_open_zone    *oz)
  {
-       struct xfs_mount                *mp = ip->i_mount;
-       struct xfs_zone_cache_item      *item = NULL;
-       struct xfs_mru_cache_elem       *mru;
+       struct xfs_open_zone    *old_oz;
  
-       ASSERT(atomic_read(&oz->oz_ref) > 0);
         atomic_inc(&oz->oz_ref);
-
-       mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
-       if (mru) {
-               /*
-                * If we have an association already, update it to point to the
-                * new zone.
-                */
-               item = xfs_zone_cache_item(mru);
-               xfs_open_zone_put(item->oz);
-               item->oz = oz;
-               xfs_mru_cache_done(mp->m_zone_cache);
-               return;
-       }
-
-       item = kmalloc(sizeof(*item), GFP_KERNEL);
-       if (!item) {
-               xfs_open_zone_put(oz);
-               return;
-       }
-       item->oz = oz;
-       xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
+       old_oz = xchg(&VFS_I(ip)->i_private, oz);
+       if (old_oz)
+               xfs_open_zone_put(old_oz);
  }
  
  static void
@@ -891,15 +856,14 @@ xfs_zone_alloc_and_submit(
          * the inode is still associated with a zone and use that if so.
          */
         if (!*oz)
-               *oz = xfs_cached_zone(mp, ip);
+               *oz = xfs_get_cached_zone(ip);
  
         if (!*oz) {
  select_zone:
                 *oz = xfs_select_zone(mp, write_hint, pack_tight);
                 if (!*oz)
                         goto out_error;
-
-               xfs_zone_cache_create_association(ip, *oz);
+               xfs_set_cached_zone(ip, *oz);
         }
  
         alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
@@ -977,6 +941,12 @@ xfs_free_open_zones(
                 xfs_open_zone_put(oz);
         }
         spin_unlock(&zi->zi_open_zones_lock);
+
+       /*
+        * Wait for all open zones to be freed so that they drop the group
+        * references:
+        */
+       rcu_barrier();
  }
  
  struct xfs_init_zones {
@@ -1290,14 +1260,6 @@ xfs_mount_zones(
         error = xfs_zone_gc_mount(mp);
         if (error)
                 goto out_free_zone_info;
-
-       /*
-        * Set up a mru cache to track inode to open zone for data placement
-        * purposes. The magic values for group count and life time is the
-        * same as the defaults for file streams, which seems sane enough.
-        */
-       xfs_mru_cache_create(&mp->m_zone_cache, mp,
-                       5000, 10, xfs_zone_cache_free_func);
         return 0;
  
  out_free_zone_info:
@@ -1311,5 +1273,4 @@ xfs_unmount_zones(
  {
         xfs_zone_gc_unmount(mp);
         xfs_free_zone_info(mp->m_zone_info);
-       xfs_mru_cache_destroy(mp->m_zone_cache);
  }
diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h

index 35e6de3d25ed2c1abef2ba47164989a9a5d0f971..4322e26dd99a1a9c6092c7c0a0605c109be62b53 100644 (file)
--- a/fs/xfs/xfs_zone_priv.h
+++ b/fs/xfs/xfs_zone_priv.h
@@ -44,6 +44,8 @@ struct xfs_open_zone {
          * the life time of an open zone.
          */
         struct xfs_rtgroup      *oz_rtg;
+
+       struct rcu_head         oz_rcu;
  };
  
  /*
author	Christoph Hellwig <hch@lst.de>
	Fri, 17 Oct 2025 03:55:41 +0000 (05:55 +0200)
committer	Carlos Maiolino <cem@kernel.org>
	Tue, 21 Oct 2025 09:32:50 +0000 (11:32 +0200)
fs/xfs/xfs_mount.h		patch \| blob \| blame \| history
fs/xfs/xfs_super.c		patch \| blob \| blame \| history
fs/xfs/xfs_zone_alloc.c		patch \| blob \| blame \| history
fs/xfs/xfs_zone_priv.h		patch \| blob \| blame \| history