echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
EOF
+
+If the nfsd server needs to fence a non-responding client and the
+fencing operation fails, the server logs a warning message in the
+system log with the following format:
+
+ FENCE failed client[IP_address] clid[#n] device[dev_name]
+
+ Where:
+
+ IP_address: refers to the IP address of the affected client.
+ #n: indicates the unique client identifier.
+ dev_name: specifies the name of the block device related
+ to the fencing attempt.
+
+The server will repeatedly retry the operation indefinitely. During
+this time, access to the affected file is restricted for all other
+clients. This is to prevent potential data corruption if multiple
+clients access the same file simultaneously.
+
+To restore access to the affected file for other clients, the admin
+needs to take the following actions:
+
+ . shutdown or power off the client being fenced.
+ . manually expire the client to release all its state on the server:
+
+ echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+
+ Where:
+
+ clid: is the unique client identifier displayed in the system log.
On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
enabled, and the file system is mounted using the NFSv4.1 protocol
version (mount -o vers=4.1).
+
+If the nfsd server needs to fence a non-responding client and the
+fencing operation fails, the server logs a warning message in the
+system log with the following format:
+
+ FENCE failed client[IP_address] clid[#n] device[dev_name]
+
+ Where:
+
+ IP_address: refers to the IP address of the affected client.
+ #n: indicates the unique client identifier.
+ dev_name: specifies the name of the block device related
+ to the fencing attempt.
+
+The server will repeatedly retry the operation indefinitely. During
+this time, access to the affected file is restricted for all other
+clients. This is to prevent potential data corruption if multiple
+clients access the same file simultaneously.
+
+To restore access to the affected file for other clients, the admin
+needs to take the following actions:
+
+ . shutdown or power off the client being fenced.
+ . manually expire the client to release all its state on the server:
+
+ echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+
+ Where:
+
+ clid: is the unique client identifier displayed in the system log.
+
bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *);
void (*lm_expire_lock)(void);
+ bool (*lm_breaker_timedout)(struct file_lease *);
locking rules:
lm_lock_expirable yes no no
lm_expire_lock no no yes
lm_open_conflict yes no no
+lm_breaker_timedout yes no no
====================== ============= ================= =========
buffer_head
{
struct file_lock_context *ctx = inode->i_flctx;
struct file_lease *fl, *tmp;
+ bool remove;
lockdep_assert_held(&ctx->flc_lock);
trace_time_out_leases(inode, fl);
if (past_time(fl->fl_downgrade_time))
lease_modify(fl, F_RDLCK, dispose);
- if (past_time(fl->fl_break_time))
- lease_modify(fl, F_UNLCK, dispose);
+
+ remove = true;
+ if (past_time(fl->fl_break_time)) {
+ /*
+ * Consult the lease manager when a lease break times
+ * out to determine whether the lease should be disposed
+ * of.
+ */
+ if (fl->fl_lmops && fl->fl_lmops->lm_breaker_timedout)
+ remove = fl->fl_lmops->lm_breaker_timedout(fl);
+ if (remove)
+ lease_modify(fl, F_UNLCK, dispose);
+ }
}
}
restart:
fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
break_time = fl->fl_break_time;
- if (break_time != 0)
- break_time -= jiffies;
- if (break_time == 0)
+ if (break_time != 0) {
+ if (time_after(jiffies, break_time)) {
+ fl->fl_break_time = jiffies + lease_break_time * HZ;
+ break_time = lease_break_time * HZ;
+ } else
+ break_time -= jiffies;
+ } else
break_time++;
locks_insert_block(&fl->c, &new_fl->c, leases_conflict);
trace_break_lease_block(inode, new_fl);
ret = 0;
}
xa_unlock(xa);
+ clp->cl_fence_retry_warn = false;
return ret;
}
return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
}
-static void
+/*
+ * Perform the fence operation to prevent the client from accessing the
+ * block device. If a fence operation is already in progress, wait for
+ * it to complete before checking the NFSD_MDS_PR_FENCED flag. Once the
+ * operation is complete, check the flag. If NFSD_MDS_PR_FENCED is set,
+ * update the layout stateid by setting the ls_fenced flag to indicate
+ * that the client has been fenced.
+ *
+ * The cl_fence_mutex ensures that the fence operation has been fully
+ * completed, rather than just in progress, when returning from this
+ * function.
+ *
+ * Return true if client was fenced otherwise return false.
+ */
+static bool
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
int status;
+ bool ret;
- if (nfsd4_scsi_fence_set(clp, bdev->bd_dev))
- return;
+ mutex_lock(&clp->cl_fence_mutex);
+ if (nfsd4_scsi_fence_set(clp, bdev->bd_dev)) {
+ mutex_unlock(&clp->cl_fence_mutex);
+ return true;
+ }
status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp),
* PR_STS_RESERVATION_CONFLICT, which would cause an infinite
* retry loop.
*/
- if (status < 0 ||
- status == PR_STS_PATH_FAILED ||
- status == PR_STS_PATH_FAST_FAILED ||
- status == PR_STS_RETRY_PATH_FAILURE)
+ switch (status) {
+ case 0:
+ case PR_STS_IOERR:
+ case PR_STS_RESERVATION_CONFLICT:
+ ret = true;
+ break;
+ default:
+ /* retry-able and other errors */
+ ret = false;
nfsd4_scsi_fence_clear(clp, bdev->bd_dev);
+ break;
+ }
+ mutex_unlock(&clp->cl_fence_mutex);
trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
+ return ret;
}
const struct nfsd4_layout_ops scsi_layout_ops = {
static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
static const struct lease_manager_operations nfsd4_layouts_lm_ops;
+static void nfsd4_layout_fence_worker(struct work_struct *work);
+
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
[LAYOUT_FLEX_FILES] = &ff_layout_ops,
trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid);
+ spin_lock(&ls->ls_lock);
+ if (delayed_work_pending(&ls->ls_fence_work)) {
+ spin_unlock(&ls->ls_lock);
+ cancel_delayed_work_sync(&ls->ls_fence_work);
+ } else
+ spin_unlock(&ls->ls_lock);
+
spin_lock(&clp->cl_lock);
list_del_init(&ls->ls_perclnt);
spin_unlock(&clp->cl_lock);
list_add(&ls->ls_perfile, &fp->fi_lo_states);
spin_unlock(&fp->fi_lock);
+ ls->ls_fenced = false;
+ ls->ls_fence_delay = 0;
+ INIT_DELAYED_WORK(&ls->ls_fence_work, nfsd4_layout_fence_worker);
+
trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid);
return ls;
}
nfsd4_layout_lm_break(struct file_lease *fl)
{
/*
- * We don't want the locks code to timeout the lease for us;
- * we'll remove it ourself if a layout isn't returned
- * in time:
+ * Enforce break lease timeout to prevent NFSD
+ * thread from hanging in __break_lease.
*/
- fl->fl_break_time = 0;
nfsd4_recall_file_layout(fl->c.flc_owner);
return false;
}
return 0;
}
+static void
+nfsd4_layout_fence_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct nfs4_layout_stateid *ls = container_of(dwork,
+ struct nfs4_layout_stateid, ls_fence_work);
+ struct nfsd_file *nf;
+ struct block_device *bdev;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn;
+
+ /*
+ * The workqueue clears WORK_STRUCT_PENDING before invoking
+ * this callback. Re-arm immediately so that
+ * delayed_work_pending() returns true while the fence
+ * operation is in progress, preventing
+ * lm_breaker_timedout() from taking a duplicate reference.
+ */
+ mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+
+ spin_lock(&ls->ls_lock);
+ if (list_empty(&ls->ls_layouts)) {
+ spin_unlock(&ls->ls_lock);
+dispose:
+ cancel_delayed_work(&ls->ls_fence_work);
+ /* unlock the lease so that tasks waiting on it can proceed */
+ nfsd4_close_layout(ls);
+
+ ls->ls_fenced = true;
+ nfs4_put_stid(&ls->ls_stid);
+ return;
+ }
+ spin_unlock(&ls->ls_lock);
+
+ rcu_read_lock();
+ nf = nfsd_file_get(ls->ls_file);
+ rcu_read_unlock();
+ if (!nf)
+ goto dispose;
+
+ clp = ls->ls_stid.sc_client;
+ nn = net_generic(clp->net, nfsd_net_id);
+ bdev = nf->nf_file->f_path.mnt->mnt_sb->s_bdev;
+ if (nfsd4_layout_ops[ls->ls_layout_type]->fence_client(ls, nf)) {
+ /* fenced ok */
+ nfsd_file_put(nf);
+ pr_warn("%s: FENCED client[%pISpc] clid[%d] to device[%s]\n",
+ __func__, (struct sockaddr *)&clp->cl_addr,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ bdev->bd_disk->disk_name);
+ goto dispose;
+ }
+ /* fence failed */
+ nfsd_file_put(nf);
+
+ if (!clp->cl_fence_retry_warn) {
+ pr_warn("%s: FENCE failed client[%pISpc] clid[%d] device[%s]\n",
+ __func__, (struct sockaddr *)&clp->cl_addr,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ bdev->bd_disk->disk_name);
+ clp->cl_fence_retry_warn = true;
+ }
+ /*
+ * The fence worker retries the fencing operation indefinitely to
+ * prevent data corruption. The admin needs to take the following
+ * actions to restore access to the file for other clients:
+ *
+ * . shutdown or power off the client being fenced.
+ * . manually expire the client to release all its state on the server;
+ * echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+ *
+ * Where:
+ *
+ * clid: is the unique client identifier displayed in
+ * the warning message above.
+ */
+ if (!ls->ls_fence_delay)
+ ls->ls_fence_delay = HZ;
+ else
+ ls->ls_fence_delay = min(ls->ls_fence_delay << 1,
+ MAX_FENCE_DELAY);
+ mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, ls->ls_fence_delay);
+}
+
+/**
+ * nfsd4_layout_lm_breaker_timedout - The layout recall has timed out.
+ * @fl: file to check
+ *
+ * If the layout type supports a fence operation, schedule a worker to
+ * fence the client from accessing the block device.
+ *
+ * This function runs under the protection of the spin_lock flc_lock.
+ * At this time, the file_lease associated with the layout stateid is
+ * on the flc_list. A reference count is incremented on the layout
+ * stateid to prevent it from being freed while the fence worker is
+ * executing. Once the fence worker finishes its operation, it releases
+ * this reference.
+ *
+ * The fence worker continues to run until either the client has been
+ * fenced or the layout becomes invalid. The layout can become invalid
+ * as a result of a LAYOUTRETURN or when the CB_LAYOUT recall callback
+ * has completed.
+ *
+ * Return true if the file_lease should be disposed of by the caller;
+ * otherwise, return false.
+ */
+static bool
+nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
+{
+ struct nfs4_layout_stateid *ls = fl->c.flc_owner;
+
+ if ((!nfsd4_layout_ops[ls->ls_layout_type]->fence_client) ||
+ ls->ls_fenced)
+ return true;
+ if (delayed_work_pending(&ls->ls_fence_work))
+ return false;
+ /*
+ * Make sure layout has not been returned yet before
+ * taking a reference count on the layout stateid.
+ */
+ spin_lock(&ls->ls_lock);
+ if (list_empty(&ls->ls_layouts) ||
+ !refcount_inc_not_zero(&ls->ls_stid.sc_count)) {
+ spin_unlock(&ls->ls_lock);
+ return true;
+ }
+ spin_unlock(&ls->ls_lock);
+
+ mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+ return false;
+}
+
static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
.lm_break = nfsd4_layout_lm_break,
.lm_change = nfsd4_layout_lm_change,
.lm_open_conflict = nfsd4_layout_lm_open_conflict,
+ .lm_breaker_timedout = nfsd4_layout_lm_breaker_timedout,
};
int
#endif
#ifdef CONFIG_NFSD_SCSILAYOUT
xa_init(&clp->cl_dev_fences);
+ mutex_init(&clp->cl_fence_mutex);
#endif
INIT_LIST_HEAD(&clp->async_copies);
spin_lock_init(&clp->async_lock);
struct xdr_stream;
+/* Cap exponential backoff between fence retries at 3 minutes */
+#define MAX_FENCE_DELAY ((unsigned int)(3 * 60 * HZ))
+
struct nfsd4_deviceid_map {
struct list_head hash;
u64 idx;
struct svc_rqst *rqstp,
struct nfsd4_layoutcommit *lcp);
- void (*fence_client)(struct nfs4_layout_stateid *ls,
+ bool (*fence_client)(struct nfs4_layout_stateid *ls,
struct nfsd_file *file);
};
struct list_head cl_lru; /* tail queue */
#ifdef CONFIG_NFSD_PNFS
struct list_head cl_lo_states; /* outstanding layout states */
+ bool cl_fence_retry_warn;
#endif
struct xdr_netobj cl_name; /* id generated by client */
nfs4_verifier cl_verifier; /* generated by client */
time64_t cl_ra_time;
#ifdef CONFIG_NFSD_SCSILAYOUT
struct xarray cl_dev_fences;
+ struct mutex cl_fence_mutex;
#endif
};
stateid_t ls_recall_sid;
bool ls_recalled;
struct mutex ls_mutex;
+
+ struct delayed_work ls_fence_work;
+ unsigned int ls_fence_delay;
+ bool ls_fenced;
};
static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
void (*lm_setup)(struct file_lease *, void **);
bool (*lm_breaker_owns_lease)(struct file_lease *);
int (*lm_open_conflict)(struct file *, int);
+ bool (*lm_breaker_timedout)(struct file_lease *fl);
};
struct lock_manager {