]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
NFSD: Track SCSI Persistent Registration Fencing per Client with xarray
authorDai Ngo <dai.ngo@oracle.com>
Mon, 19 Jan 2026 15:41:26 +0000 (10:41 -0500)
committerChuck Lever <chuck.lever@oracle.com>
Mon, 30 Mar 2026 01:25:09 +0000 (21:25 -0400)
When a client holding pNFS SCSI layouts becomes unresponsive, the
server revokes access by preempting the client's SCSI persistent
reservation key. A layout recall is issued for each layout the
client holds; if the client fails to respond, each recall triggers
a fence operation. The first preempt for a given device succeeds
and removes the client's key registration. Subsequent preempts for
the same device fail because the key is no longer registered.

Update the NFS server to handle SCSI persistent registration
fencing on a per-client and per-device basis by utilizing an
xarray associated with the nfs4_client structure.

Each xarray entry is indexed by the dev_t of a block device
registered by the client. The entry maintains a flag indicating
whether this device has already been fenced for the corresponding
client.

When the server issues a persistent registration key to a client,
it creates a new xarray entry at the dev_t index with the fenced
flag initialized to 0.

Before performing a fence via nfsd4_scsi_fence_client, the server
checks the corresponding entry using the device's dev_t. If the
fenced flag is already set, the fence operation is skipped;
otherwise, the flag is set to 1 and fencing proceeds.

The xarray is destroyed when the nfs4_client is released in
__destroy_client.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
fs/nfsd/blocklayout.c
fs/nfsd/nfs4state.c
fs/nfsd/state.h

index a7cfba29990e452d082630b8813227f8f337bd3d..8b987fca1e600f39232a1e84042151404999187d 100644 (file)
@@ -273,6 +273,51 @@ const struct nfsd4_layout_ops bl_layout_ops = {
 #endif /* CONFIG_NFSD_BLOCKLAYOUT */
 
 #ifdef CONFIG_NFSD_SCSILAYOUT
+
+#define NFSD_MDS_PR_FENCED     XA_MARK_0
+
+/*
+ * Clear the fence flag if the device already has an entry. This occurs
+ * when a client re-registers after a previous fence, allowing new
+ * layouts for this device.
+ *
+ * Insert only on first registration. This bounds cl_dev_fences to the
+ * count of devices this client has accessed, preventing unbounded growth.
+ */
+static inline int nfsd4_scsi_fence_insert(struct nfs4_client *clp,
+                                         dev_t device)
+{
+       struct xarray *xa = &clp->cl_dev_fences;
+       int ret;
+
+       xa_lock(xa);
+       ret = __xa_insert(xa, device, XA_ZERO_ENTRY, GFP_KERNEL);
+       if (ret == -EBUSY) {
+               __xa_clear_mark(xa, device, NFSD_MDS_PR_FENCED);
+               ret = 0;
+       }
+       xa_unlock(xa);
+       return ret;
+}
+
+static inline bool nfsd4_scsi_fence_set(struct nfs4_client *clp, dev_t device)
+{
+       struct xarray *xa = &clp->cl_dev_fences;
+       bool skip;
+
+       xa_lock(xa);
+       skip = xa_get_mark(xa, device, NFSD_MDS_PR_FENCED);
+       if (!skip)
+               __xa_set_mark(xa, device, NFSD_MDS_PR_FENCED);
+       xa_unlock(xa);
+       return skip;
+}
+
+static inline void nfsd4_scsi_fence_clear(struct nfs4_client *clp, dev_t device)
+{
+       xa_clear_mark(&clp->cl_dev_fences, device, NFSD_MDS_PR_FENCED);
+}
+
 #define NFSD_MDS_PR_KEY                0x0100000000000000ULL
 
 /*
@@ -342,6 +387,10 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
                goto out_free_dev;
        }
 
+       ret = nfsd4_scsi_fence_insert(clp, sb->s_bdev->bd_dev);
+       if (ret < 0)
+               goto out_free_dev;
+
        ret = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
        if (ret) {
                pr_err("pNFS: failed to register key for device %s.\n",
@@ -401,9 +450,32 @@ nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
        struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
        int status;
 
+       if (nfsd4_scsi_fence_set(clp, bdev->bd_dev))
+               return;
+
        status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
                        nfsd4_scsi_pr_key(clp),
                        PR_EXCLUSIVE_ACCESS_REG_ONLY, true);
+       /*
+        * Reset to allow retry only when the command could not have
+        * reached the device. Negative status means a local error
+        * (e.g., -ENOMEM) prevented the command from being sent.
+        * PR_STS_PATH_FAILED, PR_STS_PATH_FAST_FAILED, and
+        * PR_STS_RETRY_PATH_FAILURE indicate transport path failures
+        * before device delivery.
+        *
+        * For all other errors, the command may have reached the device
+        * and the preempt may have succeeded. Avoid resetting, since
+        * retrying a successful preempt returns PR_STS_IOERR or
+        * PR_STS_RESERVATION_CONFLICT, which would cause an infinite
+        * retry loop.
+        */
+       if (status < 0 ||
+           status == PR_STS_PATH_FAILED ||
+           status == PR_STS_PATH_FAST_FAILED ||
+           status == PR_STS_RETRY_PATH_FAILURE)
+               nfsd4_scsi_fence_clear(clp, bdev->bd_dev);
+
        trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
 }
 
index d8b0bd8ac842a51b9515975ce508bf58618fd678..023fd665b8994dd7697211a0bdd7ea912efa660a 100644 (file)
@@ -2381,6 +2381,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
        INIT_LIST_HEAD(&clp->cl_revoked);
 #ifdef CONFIG_NFSD_PNFS
        INIT_LIST_HEAD(&clp->cl_lo_states);
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+       xa_init(&clp->cl_dev_fences);
 #endif
        INIT_LIST_HEAD(&clp->async_copies);
        spin_lock_init(&clp->async_lock);
@@ -2543,6 +2546,9 @@ __destroy_client(struct nfs4_client *clp)
                svc_xprt_put(clp->cl_cb_conn.cb_xprt);
        atomic_add_unless(&nn->nfs4_client_count, -1, 0);
        nfsd4_dec_courtesy_client_count(nn, clp);
+#ifdef CONFIG_NFSD_SCSILAYOUT
+       xa_destroy(&clp->cl_dev_fences);
+#endif
        free_client(clp);
        wake_up_all(&expiry_wq);
 }
index c0ca115c3b74bbcb9d7f39533071a701a13c8614..99aeaab9cf2b194ad5b62add147bcc2471181f0e 100644 (file)
@@ -527,6 +527,9 @@ struct nfs4_client {
 
        struct nfsd4_cb_recall_any      *cl_ra;
        time64_t                cl_ra_time;
+#ifdef CONFIG_NFSD_SCSILAYOUT
+       struct xarray           cl_dev_fences;
+#endif
 };
 
 /* struct nfs4_client_reset