]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfs: add large atomic writes checks in xfs_direct_write_iomap_begin()
authorJohn Garry <john.g.garry@oracle.com>
Wed, 7 May 2025 21:18:30 +0000 (14:18 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Wed, 7 May 2025 21:25:32 +0000 (14:25 -0700)
For when large atomic writes (> 1x FS block) are supported, there will be
various occasions when HW offload may not be possible.

Such instances include:
- unaligned extent mapping wrt write length
- extent mappings which do not cover the full write, e.g. the write spans
  sparse or mixed-mapping extents
- the write length is greater than HW offload can support
- no hardware support at all

In those cases, we need to fallback to the CoW-based atomic write mode. For
this, report special code -ENOPROTOOPT to inform the caller that HW
offload-based method is not possible.

In addition to the occasions mentioned, if the write covers an unallocated
range, we again judge that we need to rely on the CoW-based method when we
would need to allocate anything more than 1x block. This is because if we
allocate less blocks that is required for the write, then again HW
offload-based method would not be possible. So we are taking a pessimistic
approach to writes covering unallocated space.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: various cleanups]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
fs/xfs/xfs_iomap.c

index 166fba2ff1ef40efa7f40a0981c5f89d9f8bce9d..ff05e6b1b0bbd27111e2ccfbc5177293910843d6 100644 (file)
@@ -798,6 +798,38 @@ imap_spans_range(
        return true;
 }
 
+static bool
+xfs_bmap_hw_atomic_write_possible(
+       struct xfs_inode        *ip,
+       struct xfs_bmbt_irec    *imap,
+       xfs_fileoff_t           offset_fsb,
+       xfs_fileoff_t           end_fsb)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fsize_t             len = XFS_FSB_TO_B(mp, end_fsb - offset_fsb);
+
+       /*
+        * atomic writes are required to be naturally aligned for disk blocks,
+        * which ensures that we adhere to block layer rules that we won't
+        * straddle any boundary or violate write alignment requirement.
+        */
+       if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
+               return false;
+
+       /*
+        * Spanning multiple extents would mean that multiple BIOs would be
+        * issued, and so would lose atomicity required for REQ_ATOMIC-based
+        * atomics.
+        */
+       if (!imap_spans_range(imap, offset_fsb, end_fsb))
+               return false;
+
+       /*
+        * The ->iomap_begin caller should ensure this, but check anyway.
+        */
+       return len <= xfs_inode_buftarg(ip)->bt_bdev_awu_max;
+}
+
 static int
 xfs_direct_write_iomap_begin(
        struct inode            *inode,
@@ -812,9 +844,11 @@ xfs_direct_write_iomap_begin(
        struct xfs_bmbt_irec    imap, cmap;
        xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
        xfs_fileoff_t           end_fsb = xfs_iomap_end_fsb(mp, offset, length);
+       xfs_fileoff_t           orig_end_fsb = end_fsb;
        int                     nimaps = 1, error = 0;
        bool                    shared = false;
        u16                     iomap_flags = 0;
+       bool                    needs_alloc;
        unsigned int            lockmode;
        u64                     seq;
 
@@ -875,13 +909,37 @@ relock:
                                (flags & IOMAP_DIRECT) || IS_DAX(inode));
                if (error)
                        goto out_unlock;
-               if (shared)
+               if (shared) {
+                       if ((flags & IOMAP_ATOMIC) &&
+                           !xfs_bmap_hw_atomic_write_possible(ip, &cmap,
+                                       offset_fsb, end_fsb)) {
+                               error = -ENOPROTOOPT;
+                               goto out_unlock;
+                       }
                        goto out_found_cow;
+               }
                end_fsb = imap.br_startoff + imap.br_blockcount;
                length = XFS_FSB_TO_B(mp, end_fsb) - offset;
        }
 
-       if (imap_needs_alloc(inode, flags, &imap, nimaps))
+       needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
+
+       if (flags & IOMAP_ATOMIC) {
+               error = -ENOPROTOOPT;
+               /*
+                * If we allocate less than what is required for the write
+                * then we may end up with multiple extents, which means that
+                * REQ_ATOMIC-based cannot be used, so avoid this possibility.
+                */
+               if (needs_alloc && orig_end_fsb - offset_fsb > 1)
+                       goto out_unlock;
+
+               if (!xfs_bmap_hw_atomic_write_possible(ip, &imap, offset_fsb,
+                               orig_end_fsb))
+                       goto out_unlock;
+       }
+
+       if (needs_alloc)
                goto allocate_blocks;
 
        /*