xfs: add large atomic writes checks in xfs_direct_write_iomap_begin()

author John Garry <john.g.garry@oracle.com>

Wed, 7 May 2025 21:18:30 +0000 (14:18 -0700)

committer Darrick J. Wong <djwong@kernel.org>

Wed, 7 May 2025 21:25:32 +0000 (14:25 -0700)
author John Garry <john.g.garry@oracle.com>
Wed, 7 May 2025 21:18:30 +0000 (14:18 -0700)
committer Darrick J. Wong <djwong@kernel.org>
Wed, 7 May 2025 21:25:32 +0000 (14:25 -0700)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 166fba2ff1ef40efa7f40a0981c5f89d9f8bce9d..ff05e6b1b0bbd27111e2ccfbc5177293910843d6 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -798,6 +798,38 @@ imap_spans_range(
         return true;
  }
  
+static bool
+xfs_bmap_hw_atomic_write_possible(
+       struct xfs_inode        *ip,
+       struct xfs_bmbt_irec    *imap,
+       xfs_fileoff_t           offset_fsb,
+       xfs_fileoff_t           end_fsb)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fsize_t             len = XFS_FSB_TO_B(mp, end_fsb - offset_fsb);
+
+       /*
+        * atomic writes are required to be naturally aligned for disk blocks,
+        * which ensures that we adhere to block layer rules that we won't
+        * straddle any boundary or violate write alignment requirement.
+        */
+       if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
+               return false;
+
+       /*
+        * Spanning multiple extents would mean that multiple BIOs would be
+        * issued, and so would lose atomicity required for REQ_ATOMIC-based
+        * atomics.
+        */
+       if (!imap_spans_range(imap, offset_fsb, end_fsb))
+               return false;
+
+       /*
+        * The ->iomap_begin caller should ensure this, but check anyway.
+        */
+       return len <= xfs_inode_buftarg(ip)->bt_bdev_awu_max;
+}
+
  static int
  xfs_direct_write_iomap_begin(
         struct inode            *inode,
@@ -812,9 +844,11 @@ xfs_direct_write_iomap_begin(
         struct xfs_bmbt_irec    imap, cmap;
         xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
         xfs_fileoff_t           end_fsb = xfs_iomap_end_fsb(mp, offset, length);
+       xfs_fileoff_t           orig_end_fsb = end_fsb;
         int                     nimaps = 1, error = 0;
         bool                    shared = false;
         u16                     iomap_flags = 0;
+       bool                    needs_alloc;
         unsigned int            lockmode;
         u64                     seq;
  
@@ -875,13 +909,37 @@ relock:
                                 (flags & IOMAP_DIRECT) || IS_DAX(inode));
                 if (error)
                         goto out_unlock;
-               if (shared)
+               if (shared) {
+                       if ((flags & IOMAP_ATOMIC) &&
+                           !xfs_bmap_hw_atomic_write_possible(ip, &cmap,
+                                       offset_fsb, end_fsb)) {
+                               error = -ENOPROTOOPT;
+                               goto out_unlock;
+                       }
                         goto out_found_cow;
+               }
                 end_fsb = imap.br_startoff + imap.br_blockcount;
                 length = XFS_FSB_TO_B(mp, end_fsb) - offset;
         }
  
-       if (imap_needs_alloc(inode, flags, &imap, nimaps))
+       needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
+
+       if (flags & IOMAP_ATOMIC) {
+               error = -ENOPROTOOPT;
+               /*
+                * If we allocate less than what is required for the write
+                * then we may end up with multiple extents, which means that
+                * REQ_ATOMIC-based cannot be used, so avoid this possibility.
+                */
+               if (needs_alloc && orig_end_fsb - offset_fsb > 1)
+                       goto out_unlock;
+
+               if (!xfs_bmap_hw_atomic_write_possible(ip, &imap, offset_fsb,
+                               orig_end_fsb))
+                       goto out_unlock;
+       }
+
+       if (needs_alloc)
                 goto allocate_blocks;
  
         /*
author	John Garry <john.g.garry@oracle.com>
	Wed, 7 May 2025 21:18:30 +0000 (14:18 -0700)
committer	Darrick J. Wong <djwong@kernel.org>
	Wed, 7 May 2025 21:25:32 +0000 (14:25 -0700)