NFS: Fix a race when updating an existing write

author Trond Myklebust <trond.myklebust@hammerspace.com>

Sat, 16 Aug 2025 14:25:20 +0000 (07:25 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 4 Sep 2025 13:30:20 +0000 (15:30 +0200)
author Trond Myklebust <trond.myklebust@hammerspace.com>
Sat, 16 Aug 2025 14:25:20 +0000 (07:25 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 4 Sep 2025 13:30:20 +0000 (15:30 +0200)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c

index 30e2488eb84cedd7ed4cbaa16f316751938ccf1e..0ea3916ed1dcb1d6d4f54b5eae1e22369b62c2cf 100644 (file)
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -272,13 +272,14 @@ nfs_page_group_unlock(struct nfs_page *req)
         nfs_page_clear_headlock(req);
  }
  
-/*
- * nfs_page_group_sync_on_bit_locked
+/**
+ * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set
+ * @req: request in page group
+ * @bit: PG_* bit that is used to sync page group
   *
   * must be called with page group lock held
   */
-static bool
-nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
  {
         struct nfs_page *head = req->wb_head;
         struct nfs_page *tmp;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index 289ff8e9f78a0a1fba0dfe26b074da08a91f4703..cb1e9996fcc8ec13e07976929a224256b413b74f 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -156,20 +156,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
         }
  }
  
-static int
-nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
+static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
  {
-       int ret;
-
-       if (!test_bit(PG_REMOVE, &req->wb_flags))
-               return 0;
-       ret = nfs_page_group_lock(req);
-       if (ret)
-               return ret;
         if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
                 nfs_page_set_inode_ref(req, inode);
-       nfs_page_group_unlock(req);
-       return 0;
  }
  
  static struct nfs_page *nfs_folio_private_request(struct folio *folio)
@@ -238,36 +228,6 @@ static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
         return req;
  }
  
-static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio)
-{
-       struct inode *inode = folio_file_mapping(folio)->host;
-       struct nfs_page *req, *head;
-       int ret;
-
-       for (;;) {
-               req = nfs_folio_find_head_request(folio);
-               if (!req)
-                       return req;
-               head = nfs_page_group_lock_head(req);
-               if (head != req)
-                       nfs_release_request(req);
-               if (IS_ERR(head))
-                       return head;
-               ret = nfs_cancel_remove_inode(head, inode);
-               if (ret < 0) {
-                       nfs_unlock_and_release_request(head);
-                       return ERR_PTR(ret);
-               }
-               /* Ensure that nobody removed the request before we locked it */
-               if (head == nfs_folio_private_request(folio))
-                       break;
-               if (folio_test_swapcache(folio))
-                       break;
-               nfs_unlock_and_release_request(head);
-       }
-       return head;
-}
-
  /* Adjust the file length if we're writing beyond the end */
  static void nfs_grow_file(struct folio *folio, unsigned int offset,
                           unsigned int count)
@@ -621,20 +581,37 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
         struct nfs_commit_info cinfo;
         int ret;
  
-       nfs_init_cinfo_from_inode(&cinfo, inode);
         /*
          * A reference is taken only on the head request which acts as a
          * reference to the whole page group - the group will not be destroyed
          * until the head reference is released.
          */
-       head = nfs_folio_find_and_lock_request(folio);
-       if (IS_ERR_OR_NULL(head))
-               return head;
+retry:
+       head = nfs_folio_find_head_request(folio);
+       if (!head)
+               return NULL;
+
+       while (!nfs_lock_request(head)) {
+               ret = nfs_wait_on_request(head);
+               if (ret < 0) {
+                       nfs_release_request(head);
+                       return ERR_PTR(ret);
+               }
+       }
  
         ret = nfs_page_group_lock(head);
         if (ret < 0)
                 goto out_unlock;
  
+       /* Ensure that nobody removed the request before we locked it */
+       if (head != folio->private && !folio_test_swapcache(folio)) {
+               nfs_page_group_unlock(head);
+               nfs_unlock_and_release_request(head);
+               goto retry;
+       }
+
+       nfs_cancel_remove_inode(head, inode);
+
         /* lock each request in the page group */
         for (subreq = head->wb_this_page;
              subreq != head;
@@ -855,7 +832,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
  {
         struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
  
-       if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
+       nfs_page_group_lock(req);
+       if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) {
                 struct folio *folio = nfs_page_to_folio(req->wb_head);
                 struct address_space *mapping = folio_file_mapping(folio);
  
@@ -867,6 +845,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
                 }
                 spin_unlock(&mapping->private_lock);
         }
+       nfs_page_group_unlock(req);
  
         if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
                 atomic_long_dec(&nfsi->nrequests);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h

index 3a0f7ebe53883adc3a60180e44333e00a7f10424..6a46069c5a36899346651edc77483439acd52e69 100644 (file)
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -162,6 +162,7 @@ extern void nfs_join_page_group(struct nfs_page *head,
  extern int nfs_page_group_lock(struct nfs_page *);
  extern void nfs_page_group_unlock(struct nfs_page *);
  extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int);
  extern int nfs_page_set_headlock(struct nfs_page *req);
  extern void nfs_page_clear_headlock(struct nfs_page *req);
  extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
author	Trond Myklebust <trond.myklebust@hammerspace.com>
	Sat, 16 Aug 2025 14:25:20 +0000 (07:25 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 4 Sep 2025 13:30:20 +0000 (15:30 +0200)
fs/nfs/pagelist.c		patch \| blob \| blame \| history
fs/nfs/write.c		patch \| blob \| blame \| history
include/linux/nfs_page.h		patch \| blob \| blame \| history