]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
nfs/localio: add refcounting for each iocb IO associated with NFS pgio header
authorMike Snitzer <snitzer@kernel.org>
Mon, 27 Oct 2025 13:08:32 +0000 (09:08 -0400)
committerAnna Schumaker <anna.schumaker@oracle.com>
Mon, 10 Nov 2025 15:32:28 +0000 (10:32 -0500)
Improve completion handling of as many as 3 IOs associated with each
misaligned DIO by using a atomic_t to track completion of each IO.

Update nfs_local_pgio_done() to use precise atomic_t accounting for
remaining iov_iter (up to 3) associated with each iocb, so that each
NFS LOCALIO pgio header is only released after all IOs have completed.
But also allow early return if/when a short read or write occurs.

Fixes reported BUG: KASAN: slab-use-after-free in nfs_local_call_read:
https://lore.kernel.org/linux-nfs/aPSvi5Yr2lGOh5Jh@dell-per750-06-vm-07.rhts.eng.pek2.redhat.com/

Reported-by: Yongcheng Yang <yoyang@redhat.com>
Fixes: c817248fc831 ("nfs/localio: add proper O_DIRECT support for READ and WRITE")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
fs/nfs/localio.c

index 0383d6eb2f46669fe74aa643bf49eb2fcecdd014..647fa19b0479ec93f2056d1b3f10a92ae5f7a356 100644 (file)
@@ -42,7 +42,7 @@ struct nfs_local_kiocb {
        /* Begin mostly DIO-specific members */
        size_t                  end_len;
        short int               end_iter_index;
-       short int               n_iters;
+       atomic_t                n_iters;
        bool                    iter_is_dio_aligned[NFSLOCAL_MAX_IOS];
        loff_t                  offset[NFSLOCAL_MAX_IOS] ____cacheline_aligned;
        struct iov_iter         iters[NFSLOCAL_MAX_IOS];
@@ -407,6 +407,7 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
                iters[n_iters].count = local_dio->start_len;
                iocb->offset[n_iters] = iocb->hdr->args.offset;
                iocb->iter_is_dio_aligned[n_iters] = false;
+               atomic_inc(&iocb->n_iters);
                ++n_iters;
        }
 
@@ -425,6 +426,7 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
                /* Save index and length of end */
                iocb->end_iter_index = n_iters;
                iocb->end_len = local_dio->end_len;
+               atomic_inc(&iocb->n_iters);
                ++n_iters;
        }
 
@@ -448,7 +450,6 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
        }
        ++n_iters;
 
-       iocb->n_iters = n_iters;
        return n_iters;
 }
 
@@ -474,6 +475,12 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
        }
        len = hdr->args.count - total;
 
+       /*
+        * For each iocb, iocb->n_iter is always at least 1 and we always
+        * end io after first nfs_local_pgio_done call unless misaligned DIO.
+        */
+       atomic_set(&iocb->n_iters, 1);
+
        if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
                struct nfs_local_dio local_dio;
 
@@ -486,7 +493,6 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
        iocb->offset[0] = hdr->args.offset;
        iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len);
        iocb->iter_is_dio_aligned[0] = false;
-       iocb->n_iters = 1;
 }
 
 static void
@@ -506,9 +512,11 @@ nfs_local_pgio_init(struct nfs_pgio_header *hdr,
                hdr->task.tk_start = ktime_get();
 }
 
-static void
-nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status)
+static bool
+nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status, bool force)
 {
+       struct nfs_pgio_header *hdr = iocb->hdr;
+
        /* Must handle partial completions */
        if (status >= 0) {
                hdr->res.count += status;
@@ -519,6 +527,12 @@ nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status)
                hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
                hdr->task.tk_status = status;
        }
+
+       if (force)
+               return true;
+
+       BUG_ON(atomic_read(&iocb->n_iters) <= 0);
+       return atomic_dec_and_test(&iocb->n_iters);
 }
 
 static void
@@ -549,11 +563,11 @@ static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
        queue_work(nfsiod_workqueue, &iocb->work);
 }
 
-static void
-nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
+static void nfs_local_read_done(struct nfs_local_kiocb *iocb)
 {
        struct nfs_pgio_header *hdr = iocb->hdr;
        struct file *filp = iocb->kiocb.ki_filp;
+       long status = hdr->task.tk_status;
 
        if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
                /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
@@ -574,12 +588,18 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
                        status > 0 ? status : 0, hdr->res.eof);
 }
 
+static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb)
+{
+       nfs_local_read_done(iocb);
+       nfs_local_pgio_release(iocb);
+}
+
 static void nfs_local_read_aio_complete_work(struct work_struct *work)
 {
        struct nfs_local_kiocb *iocb =
                container_of(work, struct nfs_local_kiocb, work);
 
-       nfs_local_pgio_release(iocb);
+       nfs_local_read_iocb_done(iocb);
 }
 
 static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
@@ -587,8 +607,10 @@ static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
        struct nfs_local_kiocb *iocb =
                container_of(kiocb, struct nfs_local_kiocb, kiocb);
 
-       nfs_local_pgio_done(iocb->hdr, ret);
-       nfs_local_read_done(iocb, ret);
+       /* AIO completion of DIO read should always be last to complete */
+       if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
+               return;
+
        nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
 }
 
@@ -599,10 +621,13 @@ static void nfs_local_call_read(struct work_struct *work)
        struct file *filp = iocb->kiocb.ki_filp;
        const struct cred *save_cred;
        ssize_t status;
+       int n_iters;
 
        save_cred = override_creds(filp->f_cred);
 
-       for (int i = 0; i < iocb->n_iters ; i++) {
+       n_iters = atomic_read(&iocb->n_iters);
+       for (int i = 0; i < n_iters ; i++) {
+               /* DIO-aligned middle is always issued last with AIO completion */
                if (iocb->iter_is_dio_aligned[i]) {
                        iocb->kiocb.ki_flags |= IOCB_DIRECT;
                        iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
@@ -612,18 +637,14 @@ static void nfs_local_call_read(struct work_struct *work)
                iocb->kiocb.ki_pos = iocb->offset[i];
                status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
                if (status != -EIOCBQUEUED) {
-                       nfs_local_pgio_done(iocb->hdr, status);
-                       if (iocb->hdr->task.tk_status)
+                       if (nfs_local_pgio_done(iocb, status, false)) {
+                               nfs_local_read_iocb_done(iocb);
                                break;
+                       }
                }
        }
 
        revert_creds(save_cred);
-
-       if (status != -EIOCBQUEUED) {
-               nfs_local_read_done(iocb, status);
-               nfs_local_pgio_release(iocb);
-       }
 }
 
 static int
@@ -738,11 +759,10 @@ static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
        fattr->du.nfs3.used = stat.blocks << 9;
 }
 
-static void
-nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
+static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
 {
        struct nfs_pgio_header *hdr = iocb->hdr;
-       struct inode *inode = hdr->inode;
+       long status = hdr->task.tk_status;
 
        dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
 
@@ -761,10 +781,17 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
                nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
                status = -ENOSPC;
                /* record -ENOSPC in terms of nfs_local_pgio_done */
-               nfs_local_pgio_done(hdr, status);
+               (void) nfs_local_pgio_done(iocb, status, true);
        }
        if (hdr->task.tk_status < 0)
-               nfs_reset_boot_verifier(inode);
+               nfs_reset_boot_verifier(hdr->inode);
+}
+
+static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb)
+{
+       nfs_local_write_done(iocb);
+       nfs_local_vfs_getattr(iocb);
+       nfs_local_pgio_release(iocb);
 }
 
 static void nfs_local_write_aio_complete_work(struct work_struct *work)
@@ -772,8 +799,7 @@ static void nfs_local_write_aio_complete_work(struct work_struct *work)
        struct nfs_local_kiocb *iocb =
                container_of(work, struct nfs_local_kiocb, work);
 
-       nfs_local_vfs_getattr(iocb);
-       nfs_local_pgio_release(iocb);
+       nfs_local_write_iocb_done(iocb);
 }
 
 static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
@@ -781,8 +807,10 @@ static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
        struct nfs_local_kiocb *iocb =
                container_of(kiocb, struct nfs_local_kiocb, kiocb);
 
-       nfs_local_pgio_done(iocb->hdr, ret);
-       nfs_local_write_done(iocb, ret);
+       /* AIO completion of DIO write should always be last to complete */
+       if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
+               return;
+
        nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
 }
 
@@ -793,13 +821,17 @@ static void nfs_local_call_write(struct work_struct *work)
        struct file *filp = iocb->kiocb.ki_filp;
        unsigned long old_flags = current->flags;
        const struct cred *save_cred;
+       bool force_done = false;
        ssize_t status;
+       int n_iters;
 
        current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
        save_cred = override_creds(filp->f_cred);
 
        file_start_write(filp);
-       for (int i = 0; i < iocb->n_iters ; i++) {
+       n_iters = atomic_read(&iocb->n_iters);
+       for (int i = 0; i < n_iters ; i++) {
+               /* DIO-aligned middle is always issued last with AIO completion */
                if (iocb->iter_is_dio_aligned[i]) {
                        iocb->kiocb.ki_flags |= IOCB_DIRECT;
                        iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
@@ -812,35 +844,27 @@ static void nfs_local_call_write(struct work_struct *work)
                        if (unlikely(status >= 0 && status < iocb->iters[i].count)) {
                                /* partial write */
                                if (i == iocb->end_iter_index) {
-                                       /* Must not account partial end, otherwise, due
-                                        * to end being issued before middle: the partial
+                                       /* Must not account DIO partial end, otherwise (due
+                                        * to end being issued before middle): the partial
                                         * write accounting in nfs_local_write_done()
                                         * would incorrectly advance hdr->args.offset
                                         */
                                        status = 0;
                                } else {
-                                       /* Partial write at start or buffered middle,
-                                        * exit early.
-                                        */
-                                       nfs_local_pgio_done(iocb->hdr, status);
-                                       break;
+                                       /* Partial write at start or middle, force done */
+                                       force_done = true;
                                }
                        }
-                       nfs_local_pgio_done(iocb->hdr, status);
-                       if (iocb->hdr->task.tk_status)
+                       if (nfs_local_pgio_done(iocb, status, force_done)) {
+                               nfs_local_write_iocb_done(iocb);
                                break;
+                       }
                }
        }
        file_end_write(filp);
 
        revert_creds(save_cred);
        current->flags = old_flags;
-
-       if (status != -EIOCBQUEUED) {
-               nfs_local_write_done(iocb, status);
-               nfs_local_vfs_getattr(iocb);
-               nfs_local_pgio_release(iocb);
-       }
 }
 
 static int