From bbbf1529ea9b85072e58c164a9a5d82554ffa941 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 2 Aug 2025 12:56:37 +0200 Subject: [PATCH] gfs2: New gfs2_withdraw_helper Currently, when a gfs2 filesystem is withdrawn, an "offline" uevent is triggered that invokes gfs2-util's gfs2_withdraw_helper script. The purpose of this script is to deactivate the filesystem's block device so that it can be withdrawn immediately, even before all the filesystem's caches have been discarded. The script provided by gfs2-utils never did anything useful, and there was no way for it to report back its status to the kernel. To fix that, extend the gfs2_withdraw_helper mechanism so that the script can report one of the following results by writing the corresponding value into "/sys$DEVPATH/lock_module/withdraw": 0 - The shared block device has been marked inactive. Future write operations will fail. 1 - The shared block device may still be active and carry out write operations. If the "offline" uevent isn't reacted upon within the timeout configured in /sys$DEVPATH/tune/withdraw_helper_timeout (default 5 seconds), the event handler is assumed to have failed. In addition, add an additional "errors=deactivate" mount option. With these changes, if fatal errors are detected on a gfs2 filesystem and the filesystem is mounted with the "errors=panic" option, the kernel will panic immediately. Otherwise, an attempt will be made to deactivate the underlying block device. If successful, the kernel will release all cluster-wide locks immediately so that the rest of the cluster can continue. If unsuccessful, the kernel will either panic ("errors=deactivate"), or it will purge all filesystem I/O before releasing all cluster-wide locks ("errors=withdraw"). Note that the gfs2_withdraw_helper script still needs to be fixed to take advantage of these improvements. It could be changed to use a mechanism like LVM Persistent Reservations. "dmsetup suspend" is not a suitable mechanism as it infinitely postpones I/O operations, which may prevent withdraw from completing. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 9 +++--- fs/gfs2/ops_fstype.c | 9 ++++-- fs/gfs2/super.c | 3 ++ fs/gfs2/sys.c | 24 ++++++--------- fs/gfs2/util.c | 73 +++++++++++++++++++++++++++++++++++++++----- 5 files changed, 90 insertions(+), 28 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7a6ad36413d18..d05d8fe4e456b 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -537,8 +537,7 @@ struct gfs2_statfs_change_host { #define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW #define GFS2_ERRORS_WITHDRAW 0 -#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */ -#define GFS2_ERRORS_RO 2 /* place holder for future feature */ +#define GFS2_ERRORS_DEACTIVATE 1 #define GFS2_ERRORS_PANIC 3 struct gfs2_args { @@ -554,7 +553,7 @@ struct gfs2_args { unsigned int ar_data:2; /* ordered/writeback */ unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ - unsigned int ar_errors:2; /* errors=withdraw | panic */ + unsigned int ar_errors:2; /* errors=withdraw | deactivate | panic */ unsigned int ar_nobarrier:1; /* do not send barriers */ unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */ @@ -580,6 +579,7 @@ struct gfs2_tune { unsigned int gt_complain_secs; unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; + unsigned int gt_withdraw_helper_timeout; }; enum { @@ -711,7 +711,8 @@ struct gfs2_sbd { wait_queue_head_t sd_async_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; - struct completion sd_wdack; + struct completion sd_withdraw_helper; + int sd_withdraw_helper_status; struct delayed_work sd_control_work; /* Inode Stuff */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 8f5f72e8312c2..39ad1e6246353 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -60,6 +60,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_new_files_jdata = 0; gt->gt_max_readahead = BIT(18); gt->gt_complain_secs = 10; + gt->gt_withdraw_helper_timeout = 5; } void free_sbd(struct gfs2_sbd *sdp) @@ -92,7 +93,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_waitqueue_head(&sdp->sd_async_glock_wait); atomic_set(&sdp->sd_glock_disposal, 0); init_completion(&sdp->sd_locking_init); - init_completion(&sdp->sd_wdack); + init_completion(&sdp->sd_withdraw_helper); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); @@ -1395,12 +1396,14 @@ static const struct constant_table gfs2_param_data[] = { }; enum opt_errors { - Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, - Opt_errors_panic = GFS2_ERRORS_PANIC, + Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, + Opt_errors_deactivate = GFS2_ERRORS_DEACTIVATE, + Opt_errors_panic = GFS2_ERRORS_PANIC, }; static const struct constant_table gfs2_param_errors[] = { {"withdraw", Opt_errors_withdraw }, + {"deactivate", Opt_errors_deactivate }, {"panic", Opt_errors_panic }, {} }; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6472e92571a63..0c398866dbb43 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1145,6 +1145,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) case GFS2_ERRORS_WITHDRAW: state = "withdraw"; break; + case GFS2_ERRORS_DEACTIVATE: + state = "deactivate"; + break; case GFS2_ERRORS_PANIC: state = "panic"; break; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index db3bc4aee8756..7051db9dbea02 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -425,26 +425,20 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) return len; } -static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf) -{ - int val = completion_done(&sdp->sd_wdack) ? 1 : 0; - - return sprintf(buf, "%d\n", val); -} - -static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +static ssize_t withdraw_helper_status_store(struct gfs2_sbd *sdp, + const char *buf, + size_t len) { int ret, val; ret = kstrtoint(buf, 0, &val); if (ret) return ret; - - if ((val == 1) && - !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) - complete(&sdp->sd_wdack); - else + if (val < 0 || val > 1) return -EINVAL; + + sdp->sd_withdraw_helper_status = val; + complete(&sdp->sd_withdraw_helper); return len; } @@ -591,7 +585,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); -GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store); +GDLM_ATTR(withdraw, 0200, NULL, withdraw_helper_status_store); GDLM_ATTR(jid, 0644, jid_show, jid_store); GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); GDLM_ATTR(first_done, 0444, first_done_show, NULL); @@ -690,6 +684,7 @@ TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); +TUNE_ATTR(withdraw_helper_timeout, 1); static struct attribute *tune_attrs[] = { &tune_attr_quota_warn_period.attr, @@ -700,6 +695,7 @@ static struct attribute *tune_attrs[] = { &tune_attr_statfs_quantum.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, + &tune_attr_withdraw_helper_timeout.attr, NULL, }; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 09fcfc04769b4..ff63070ed6dee 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -171,32 +171,91 @@ void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) va_end(args); } +/** + * gfs2_offline_uevent - run gfs2_withdraw_helper + * @sdp: The GFS2 superblock + */ +static bool gfs2_offline_uevent(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + long timeout; + + /* Skip protocol "lock_nolock" which doesn't require shared storage. */ + if (!ls->ls_ops->lm_lock) + return false; + + /* + * The gfs2_withdraw_helper replies by writing one of the following + * status codes to "/sys$DEVPATH/lock_module/withdraw": + * + * 0 - The shared block device has been marked inactive. Future write + * operations will fail. + * + * 1 - The shared block device may still be active and carry out + * write operations. + * + * If the "offline" uevent isn't reacted upon in time, the event + * handler is assumed to have failed. + */ + + sdp->sd_withdraw_helper_status = -1; + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); + timeout = gfs2_tune_get(sdp, gt_withdraw_helper_timeout) * HZ; + wait_for_completion_timeout(&sdp->sd_withdraw_helper, timeout); + if (sdp->sd_withdraw_helper_status == -1) { + fs_err(sdp, "%s timed out\n", "gfs2_withdraw_helper"); + } else { + fs_err(sdp, "%s %s with status %d\n", + "gfs2_withdraw_helper", + sdp->sd_withdraw_helper_status == 0 ? + "succeeded" : "failed", + sdp->sd_withdraw_helper_status); + } + return sdp->sd_withdraw_helper_status == 0; +} + void gfs2_withdraw_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work); struct lm_lockstruct *ls = &sdp->sd_lockstruct; const struct lm_lockops *lm = ls->ls_ops; + bool device_inactive; if (test_bit(SDF_KILL, &sdp->sd_flags)) return; BUG_ON(sdp->sd_args.ar_debug); - do_withdraw(sdp); + /* + * Try to deactivate the shared block device so that no more I/O will + * go through. If successful, we can immediately trigger remote + * recovery. Otherwise, we must first empty out all our local caches. + */ - kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); + device_inactive = gfs2_offline_uevent(sdp); - if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) - wait_for_completion(&sdp->sd_wdack); + if (sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE && !device_inactive) + panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); + + if (lm->lm_unmount) { + if (device_inactive) { + lm->lm_unmount(sdp, false); + do_withdraw(sdp); + } else { + do_withdraw(sdp); + lm->lm_unmount(sdp, false); + } + } else { + do_withdraw(sdp); + } - if (lm->lm_unmount) - lm->lm_unmount(sdp, false); fs_err(sdp, "file system withdrawn\n"); } void gfs2_withdraw(struct gfs2_sbd *sdp) { - if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { + if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW || + sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE) { if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) return; -- 2.47.3