]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
nfs/localio: add direct IO enablement with sync and async IO support
authorMike Snitzer <snitzer@kernel.org>
Sat, 16 Nov 2024 01:40:53 +0000 (20:40 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 19 Sep 2025 14:35:43 +0000 (16:35 +0200)
[ Upstream commit 3feec68563dda59517f83d19123aa287a1dfd068 ]

This commit simply adds the required O_DIRECT plumbing.  It doesn't
address the fact that NFS doesn't ensure all writes are page aligned
(nor device logical block size aligned as required by O_DIRECT).

Because NFS will read-modify-write for IO that isn't aligned, LOCALIO
will not use O_DIRECT semantics by default if/when an application
requests the use of O_DIRECT.  Allow the use of O_DIRECT semantics by:
1: Adding a flag to the nfs_pgio_header struct to allow the NFS
   O_DIRECT layer to signal that O_DIRECT was used by the application
2: Adding a 'localio_O_DIRECT_semantics' NFS module parameter that
   when enabled will cause LOCALIO to use O_DIRECT semantics (this may
   cause IO to fail if applications do not properly align their IO).

This commit is derived from code developed by Weston Andros Adamson.

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
Stable-dep-of: 992203a1fba5 ("nfs/localio: restore creds before releasing pageio data")
Signed-off-by: Sasha Levin <sashal@kernel.org>
Documentation/filesystems/nfs/localio.rst
fs/nfs/direct.c
fs/nfs/localio.c
include/linux/nfs_xdr.h

index bd1967e2eab3293bb333b1b58cd3c792aa2fb2df..20fc901a08f4d32fa52aaa75ccfa9d4b43a69b83 100644 (file)
@@ -306,6 +306,19 @@ is issuing IO to the underlying local filesystem that it is sharing with
 the NFS server. See: fs/nfs/localio.c:nfs_local_doio() and
 fs/nfs/localio.c:nfs_local_commit().
 
+With normal NFS that makes use of RPC to issue IO to the server, if an
+application uses O_DIRECT the NFS client will bypass the pagecache but
+the NFS server will not. Because the NFS server's use of buffered IO
+affords applications to be less precise with their alignment when
+issuing IO to the NFS client. LOCALIO can be configured to use O_DIRECT
+semantics by setting the 'localio_O_DIRECT_semantics' nfs module
+parameter to Y, e.g.:
+
+  echo Y > /sys/module/nfs/parameters/localio_O_DIRECT_semantics
+
+Once enabled, it will cause LOCALIO to use O_DIRECT semantics (this may
+cause IO to fail if applications do not properly align their IO).
+
 Security
 ========
 
index c1f1b826888c983418e0caf8ffcbb8d56c58eb4a..f159cfc125adcfb138ef8f75ae1b6a48afa0582b 100644 (file)
@@ -320,6 +320,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error)
 static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
 {
        get_dreq(hdr->dreq);
+       set_bit(NFS_IOHDR_ODIRECT, &hdr->flags);
 }
 
 static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
index ab305dfc712696c663c3c1c146e3e9922cef20b3..8fb145124e93bd3055b5379ac12875594b8587f6 100644 (file)
@@ -35,6 +35,7 @@ struct nfs_local_kiocb {
        struct bio_vec          *bvec;
        struct nfs_pgio_header  *hdr;
        struct work_struct      work;
+       void (*aio_complete_work)(struct work_struct *);
        struct nfsd_file        *localio;
 };
 
@@ -50,6 +51,11 @@ static void nfs_local_fsync_work(struct work_struct *work);
 static bool localio_enabled __read_mostly = true;
 module_param(localio_enabled, bool, 0644);
 
+static bool localio_O_DIRECT_semantics __read_mostly = false;
+module_param(localio_O_DIRECT_semantics, bool, 0644);
+MODULE_PARM_DESC(localio_O_DIRECT_semantics,
+                "LOCALIO will use O_DIRECT semantics to filesystem.");
+
 static inline bool nfs_client_is_local(const struct nfs_client *clp)
 {
        return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
@@ -287,10 +293,19 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
                kfree(iocb);
                return NULL;
        }
-       init_sync_kiocb(&iocb->kiocb, file);
+
+       if (localio_O_DIRECT_semantics &&
+           test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
+               iocb->kiocb.ki_filp = file;
+               iocb->kiocb.ki_flags = IOCB_DIRECT;
+       } else
+               init_sync_kiocb(&iocb->kiocb, file);
+
        iocb->kiocb.ki_pos = hdr->args.offset;
        iocb->hdr = hdr;
        iocb->kiocb.ki_flags &= ~IOCB_APPEND;
+       iocb->aio_complete_work = NULL;
+
        return iocb;
 }
 
@@ -345,6 +360,18 @@ nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
        nfs_local_hdr_release(hdr, hdr->task.tk_ops);
 }
 
+/*
+ * Complete the I/O from iocb->kiocb.ki_complete()
+ *
+ * Note that this function can be called from a bottom half context,
+ * hence we need to queue the rpc_call_done() etc to a workqueue
+ */
+static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
+{
+       INIT_WORK(&iocb->work, iocb->aio_complete_work);
+       queue_work(nfsiod_workqueue, &iocb->work);
+}
+
 static void
 nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
 {
@@ -367,6 +394,23 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
                        status > 0 ? status : 0, hdr->res.eof);
 }
 
+static void nfs_local_read_aio_complete_work(struct work_struct *work)
+{
+       struct nfs_local_kiocb *iocb =
+               container_of(work, struct nfs_local_kiocb, work);
+
+       nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
+{
+       struct nfs_local_kiocb *iocb =
+               container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+       nfs_local_read_done(iocb, ret);
+       nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
+}
+
 static void nfs_local_call_read(struct work_struct *work)
 {
        struct nfs_local_kiocb *iocb =
@@ -381,10 +425,10 @@ static void nfs_local_call_read(struct work_struct *work)
        nfs_local_iter_init(&iter, iocb, READ);
 
        status = filp->f_op->read_iter(&iocb->kiocb, &iter);
-       WARN_ON_ONCE(status == -EIOCBQUEUED);
-
-       nfs_local_read_done(iocb, status);
-       nfs_local_pgio_release(iocb);
+       if (status != -EIOCBQUEUED) {
+               nfs_local_read_done(iocb, status);
+               nfs_local_pgio_release(iocb);
+       }
 
        revert_creds(save_cred);
 }
@@ -412,6 +456,11 @@ nfs_do_local_read(struct nfs_pgio_header *hdr,
        nfs_local_pgio_init(hdr, call_ops);
        hdr->res.eof = false;
 
+       if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+               iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
+               iocb->aio_complete_work = nfs_local_read_aio_complete_work;
+       }
+
        INIT_WORK(&iocb->work, nfs_local_call_read);
        queue_work(nfslocaliod_workqueue, &iocb->work);
 
@@ -541,6 +590,24 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
        nfs_local_pgio_done(hdr, status);
 }
 
+static void nfs_local_write_aio_complete_work(struct work_struct *work)
+{
+       struct nfs_local_kiocb *iocb =
+               container_of(work, struct nfs_local_kiocb, work);
+
+       nfs_local_vfs_getattr(iocb);
+       nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
+{
+       struct nfs_local_kiocb *iocb =
+               container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+       nfs_local_write_done(iocb, ret);
+       nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
+}
+
 static void nfs_local_call_write(struct work_struct *work)
 {
        struct nfs_local_kiocb *iocb =
@@ -559,11 +626,11 @@ static void nfs_local_call_write(struct work_struct *work)
        file_start_write(filp);
        status = filp->f_op->write_iter(&iocb->kiocb, &iter);
        file_end_write(filp);
-       WARN_ON_ONCE(status == -EIOCBQUEUED);
-
-       nfs_local_write_done(iocb, status);
-       nfs_local_vfs_getattr(iocb);
-       nfs_local_pgio_release(iocb);
+       if (status != -EIOCBQUEUED) {
+               nfs_local_write_done(iocb, status);
+               nfs_local_vfs_getattr(iocb);
+               nfs_local_pgio_release(iocb);
+       }
 
        revert_creds(save_cred);
        current->flags = old_flags;
@@ -599,10 +666,16 @@ nfs_do_local_write(struct nfs_pgio_header *hdr,
        case NFS_FILE_SYNC:
                iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
        }
+
        nfs_local_pgio_init(hdr, call_ops);
 
        nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
 
+       if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+               iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
+               iocb->aio_complete_work = nfs_local_write_aio_complete_work;
+       }
+
        INIT_WORK(&iocb->work, nfs_local_call_write);
        queue_work(nfslocaliod_workqueue, &iocb->work);
 
index 12d8e47bc5a38863b2831fb16056f742fa81612e..b48d94f099657caf5a04ac0016d309956091b3c8 100644 (file)
@@ -1637,6 +1637,7 @@ enum {
        NFS_IOHDR_RESEND_PNFS,
        NFS_IOHDR_RESEND_MDS,
        NFS_IOHDR_UNSTABLE_WRITES,
+       NFS_IOHDR_ODIRECT,
 };
 
 struct nfs_io_completion;