]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
NFSv4/pNFS: Fix a race to wake on NFS_LAYOUT_DRAIN
authorBenjamin Coddington <bcodding@redhat.com>
Thu, 19 Jun 2025 15:02:21 +0000 (11:02 -0400)
committerAnna Schumaker <anna.schumaker@oracle.com>
Mon, 23 Jun 2025 15:01:16 +0000 (11:01 -0400)
We found a few different systems hung up in writeback waiting on the same
page lock, and one task waiting on the NFS_LAYOUT_DRAIN bit in
pnfs_update_layout(), however the pnfs_layout_hdr's plh_outstanding count
was zero.

It seems most likely that this is another race between the waiter and waker
similar to commit ed0172af5d6f ("SUNRPC: Fix a race to wake a sync task").
Fix it up by applying the advised barrier.

Fixes: 880265c77ac4 ("pNFS: Avoid a live lock condition in pnfs_update_layout()")
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
fs/nfs/pnfs.c

index 3adb7d0dbec7ac645545395322b48849cd3b190e..1a7ec68bde15328b3a4d02489eaeb2faffc8ec02 100644 (file)
@@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
 static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
 {
        if (atomic_dec_and_test(&lo->plh_outstanding) &&
-           test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+           test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) {
+               smp_mb__after_atomic();
                wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
+       }
 }
 
 static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)