]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
xfs: implement direct writes to zoned RT devices
authorChristoph Hellwig <hch@lst.de>
Mon, 27 Jan 2025 14:35:00 +0000 (15:35 +0100)
committerChristoph Hellwig <hch@lst.de>
Mon, 3 Mar 2025 15:17:07 +0000 (08:17 -0700)
Direct writes to zoned RT devices are extremely simple.  After taking the
block reservation before acquiring the iolock, the iomap direct I/O calls
into ->iomap_begin which will return a "fake" iomap for the entire
requested range.  The actual block allocation is then done from the
submit_io handler using code shared with the buffered I/O path.

The iomap_dio_ops set the bio_set to the (iomap) ioend one and initialize
the embedded ioend, which allows reusing the existing ioend based buffered
I/O completion path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.h
fs/xfs/xfs_file.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.h

index f7f70bb4e19d64c44845119d2e3f9e01890d4856..26a04a783489674e4c52146c73ddbae9daaba8cc 100644 (file)
@@ -158,7 +158,9 @@ xfs_end_ioend(
        else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN)
                error = xfs_iomap_write_unwritten(ip, offset, size, false);
 
-       if (!error && xfs_ioend_is_append(ioend))
+       if (!error &&
+           !(ioend->io_flags & IOMAP_IOEND_DIRECT) &&
+           xfs_ioend_is_append(ioend))
                error = xfs_setfilesize(ip, offset, size);
 done:
        if (is_zoned)
@@ -205,7 +207,7 @@ xfs_end_io(
        }
 }
 
-static void
+void
 xfs_end_bio(
        struct bio              *bio)
 {
index e0bd684197643d3bcbd3460b230cd517b1ca471d..5a7a0f1a0b49e89d8706a4c032f9f6635cba3f7d 100644 (file)
@@ -9,6 +9,7 @@
 extern const struct address_space_operations xfs_address_space_operations;
 extern const struct address_space_operations xfs_dax_aops;
 
-int    xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
+int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
+void xfs_end_bio(struct bio *bio);
 
 #endif /* __XFS_AOPS_H__ */
index 512d2f56b20691f471624421fc9266664e9cc7c1..fe8cf9d96eb0cae94cba20ad423ad32279dfb925 100644 (file)
@@ -25,6 +25,7 @@
 #include "xfs_iomap.h"
 #include "xfs_reflink.h"
 #include "xfs_file.h"
+#include "xfs_aops.h"
 #include "xfs_zone_alloc.h"
 
 #include <linux/dax.h>
@@ -548,6 +549,9 @@ xfs_dio_write_end_io(
        loff_t                  offset = iocb->ki_pos;
        unsigned int            nofs_flag;
 
+       ASSERT(!xfs_is_zoned_inode(ip) ||
+              !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
+
        trace_xfs_end_io_direct_write(ip, offset, size);
 
        if (xfs_is_shutdown(ip->i_mount))
@@ -627,14 +631,51 @@ static const struct iomap_dio_ops xfs_dio_write_ops = {
        .end_io         = xfs_dio_write_end_io,
 };
 
+static void
+xfs_dio_zoned_submit_io(
+       const struct iomap_iter *iter,
+       struct bio              *bio,
+       loff_t                  file_offset)
+{
+       struct xfs_mount        *mp = XFS_I(iter->inode)->i_mount;
+       struct xfs_zone_alloc_ctx *ac = iter->private;
+       xfs_filblks_t           count_fsb;
+       struct iomap_ioend      *ioend;
+
+       count_fsb = XFS_B_TO_FSB(mp, bio->bi_iter.bi_size);
+       if (count_fsb > ac->reserved_blocks) {
+               xfs_err(mp,
+"allocation (%lld) larger than reservation (%lld).",
+                       count_fsb, ac->reserved_blocks);
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               bio_io_error(bio);
+               return;
+       }
+       ac->reserved_blocks -= count_fsb;
+
+       bio->bi_end_io = xfs_end_bio;
+       ioend = iomap_init_ioend(iter->inode, bio, file_offset,
+                       IOMAP_IOEND_DIRECT);
+       xfs_zone_alloc_and_submit(ioend, &ac->open_zone);
+}
+
+static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
+       .bio_set        = &iomap_ioend_bioset,
+       .submit_io      = xfs_dio_zoned_submit_io,
+       .end_io         = xfs_dio_write_end_io,
+};
+
 /*
- * Handle block aligned direct I/O writes
+ * Handle block aligned direct I/O writes.
  */
 static noinline ssize_t
 xfs_file_dio_write_aligned(
        struct xfs_inode        *ip,
        struct kiocb            *iocb,
-       struct iov_iter         *from)
+       struct iov_iter         *from,
+       const struct iomap_ops  *ops,
+       const struct iomap_dio_ops *dops,
+       struct xfs_zone_alloc_ctx *ac)
 {
        unsigned int            iolock = XFS_IOLOCK_SHARED;
        ssize_t                 ret;
@@ -642,7 +683,7 @@ xfs_file_dio_write_aligned(
        ret = xfs_ilock_iocb_for_write(iocb, &iolock);
        if (ret)
                return ret;
-       ret = xfs_file_write_checks(iocb, from, &iolock, NULL);
+       ret = xfs_file_write_checks(iocb, from, &iolock, ac);
        if (ret)
                goto out_unlock;
 
@@ -656,11 +697,31 @@ xfs_file_dio_write_aligned(
                iolock = XFS_IOLOCK_SHARED;
        }
        trace_xfs_file_direct_write(iocb, from);
-       ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
-                          &xfs_dio_write_ops, 0, NULL, 0);
+       ret = iomap_dio_rw(iocb, from, ops, dops, 0, ac, 0);
 out_unlock:
-       if (iolock)
-               xfs_iunlock(ip, iolock);
+       xfs_iunlock(ip, iolock);
+       return ret;
+}
+
+/*
+ * Handle block aligned direct I/O writes to zoned devices.
+ */
+static noinline ssize_t
+xfs_file_dio_write_zoned(
+       struct xfs_inode        *ip,
+       struct kiocb            *iocb,
+       struct iov_iter         *from)
+{
+       struct xfs_zone_alloc_ctx ac = { };
+       ssize_t                 ret;
+
+       ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac);
+       if (ret < 0)
+               return ret;
+       ret = xfs_file_dio_write_aligned(ip, iocb, from,
+                       &xfs_zoned_direct_write_iomap_ops,
+                       &xfs_dio_zoned_write_ops, &ac);
+       xfs_zoned_space_unreserve(ip, &ac);
        return ret;
 }
 
@@ -777,7 +838,10 @@ xfs_file_dio_write(
            (xfs_is_always_cow_inode(ip) &&
             (iov_iter_alignment(from) & ip->i_mount->m_blockmask)))
                return xfs_file_dio_write_unaligned(ip, iocb, from);
-       return xfs_file_dio_write_aligned(ip, iocb, from);
+       if (xfs_is_zoned_inode(ip))
+               return xfs_file_dio_write_zoned(ip, iocb, from);
+       return xfs_file_dio_write_aligned(ip, iocb, from,
+                       &xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
 }
 
 static noinline ssize_t
index 0e64a0ce162273b613066aa11e3761a1756c625b..30e257f683bb57588ef68bdb6bd04d8fa261b40a 100644 (file)
@@ -965,6 +965,59 @@ const struct iomap_ops xfs_direct_write_iomap_ops = {
        .iomap_begin            = xfs_direct_write_iomap_begin,
 };
 
+#ifdef CONFIG_XFS_RT
+/*
+ * This is really simple.  The space has already been reserved before taking the
+ * IOLOCK, the actual block allocation is done just before submitting the bio
+ * and only recorded in the extent map on I/O completion.
+ */
+static int
+xfs_zoned_direct_write_iomap_begin(
+       struct inode            *inode,
+       loff_t                  offset,
+       loff_t                  length,
+       unsigned                flags,
+       struct iomap            *iomap,
+       struct iomap            *srcmap)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       int                     error;
+
+       ASSERT(!(flags & IOMAP_OVERWRITE_ONLY));
+
+       /*
+        * Needs to be pushed down into the allocator so that only writes into
+        * a single zone can be supported.
+        */
+       if (flags & IOMAP_NOWAIT)
+               return -EAGAIN;
+
+       /*
+        * Ensure the extent list is in memory in so that we don't have to do
+        * read it from the I/O completion handler.
+        */
+       if (xfs_need_iread_extents(&ip->i_df)) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+               if (error)
+                       return error;
+       }
+
+       iomap->type = IOMAP_MAPPED;
+       iomap->flags = IOMAP_F_DIRTY;
+       iomap->bdev = ip->i_mount->m_rtdev_targp->bt_bdev;
+       iomap->offset = offset;
+       iomap->length = length;
+       iomap->flags = IOMAP_F_ANON_WRITE;
+       return 0;
+}
+
+const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
+       .iomap_begin            = xfs_zoned_direct_write_iomap_begin,
+};
+#endif /* CONFIG_XFS_RT */
+
 static int
 xfs_dax_write_iomap_end(
        struct inode            *inode,
index bc8a00cad85442118515f7e6248cc3ee16141f4c..d330c4a581b194de3ed5f98c859612c399e40914 100644 (file)
@@ -51,6 +51,7 @@ xfs_aligned_fsb_count(
 
 extern const struct iomap_ops xfs_buffered_write_iomap_ops;
 extern const struct iomap_ops xfs_direct_write_iomap_ops;
+extern const struct iomap_ops xfs_zoned_direct_write_iomap_ops;
 extern const struct iomap_ops xfs_read_iomap_ops;
 extern const struct iomap_ops xfs_seek_iomap_ops;
 extern const struct iomap_ops xfs_xattr_iomap_ops;