]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
gfs2: New gfs2_withdraw_helper
authorAndreas Gruenbacher <agruenba@redhat.com>
Sat, 2 Aug 2025 10:56:37 +0000 (12:56 +0200)
committerAndreas Gruenbacher <agruenba@redhat.com>
Wed, 26 Nov 2025 23:52:27 +0000 (23:52 +0000)
Currently, when a gfs2 filesystem is withdrawn, an "offline" uevent is
triggered that invokes gfs2-util's gfs2_withdraw_helper script.  The
purpose of this script is to deactivate the filesystem's block device so
that it can be withdrawn immediately, even before all the filesystem's
caches have been discarded.  The script provided by gfs2-utils never did
anything useful, and there was no way for it to report back its status
to the kernel.

To fix that, extend the gfs2_withdraw_helper mechanism so that the
script can report one of the following results by writing the
corresponding value into "/sys$DEVPATH/lock_module/withdraw":

 0 - The shared block device has been marked inactive.  Future write
     operations will fail.

 1 - The shared block device may still be active and carry out
     write operations.

If the "offline" uevent isn't reacted upon within the timeout configured
in /sys$DEVPATH/tune/withdraw_helper_timeout (default 5 seconds), the
event handler is assumed to have failed.

In addition, add an additional "errors=deactivate" mount option.

With these changes, if fatal errors are detected on a gfs2 filesystem
and the filesystem is mounted with the "errors=panic" option, the kernel
will panic immediately.  Otherwise, an attempt will be made to
deactivate the underlying block device.  If successful, the kernel will
release all cluster-wide locks immediately so that the rest of the
cluster can continue.  If unsuccessful, the kernel will either panic
("errors=deactivate"), or it will purge all filesystem I/O before
releasing all cluster-wide locks ("errors=withdraw").

Note that the gfs2_withdraw_helper script still needs to be fixed to
take advantage of these improvements.  It could be changed to use a
mechanism like LVM Persistent Reservations.  "dmsetup suspend" is not a
suitable mechanism as it infinitely postpones I/O operations, which may
prevent withdraw from completing.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
fs/gfs2/incore.h
fs/gfs2/ops_fstype.c
fs/gfs2/super.c
fs/gfs2/sys.c
fs/gfs2/util.c

index 7a6ad36413d18f14599b6331a570748b268001ed..d05d8fe4e456b0db25c09bb6b41aafffb539df83 100644 (file)
@@ -537,8 +537,7 @@ struct gfs2_statfs_change_host {
 
 #define GFS2_ERRORS_DEFAULT     GFS2_ERRORS_WITHDRAW
 #define GFS2_ERRORS_WITHDRAW    0
-#define GFS2_ERRORS_CONTINUE    1 /* place holder for future feature */
-#define GFS2_ERRORS_RO          2 /* place holder for future feature */
+#define GFS2_ERRORS_DEACTIVATE  1
 #define GFS2_ERRORS_PANIC       3
 
 struct gfs2_args {
@@ -554,7 +553,7 @@ struct gfs2_args {
        unsigned int ar_data:2;                 /* ordered/writeback */
        unsigned int ar_meta:1;                 /* mount metafs */
        unsigned int ar_discard:1;              /* discard requests */
-       unsigned int ar_errors:2;               /* errors=withdraw | panic */
+       unsigned int ar_errors:2;               /* errors=withdraw | deactivate | panic */
        unsigned int ar_nobarrier:1;            /* do not send barriers */
        unsigned int ar_rgrplvb:1;              /* use lvbs for rgrp info */
        unsigned int ar_got_rgrplvb:1;          /* Was the rgrplvb opt given? */
@@ -580,6 +579,7 @@ struct gfs2_tune {
        unsigned int gt_complain_secs;
        unsigned int gt_statfs_quantum;
        unsigned int gt_statfs_slow;
+       unsigned int gt_withdraw_helper_timeout;
 };
 
 enum {
@@ -711,7 +711,8 @@ struct gfs2_sbd {
        wait_queue_head_t sd_async_glock_wait;
        atomic_t sd_glock_disposal;
        struct completion sd_locking_init;
-       struct completion sd_wdack;
+       struct completion sd_withdraw_helper;
+       int sd_withdraw_helper_status;
        struct delayed_work sd_control_work;
 
        /* Inode Stuff */
index 8f5f72e8312c215a5057b866cef2c4bba118f88c..39ad1e62463538e75d3e41d0b99b620d76dfe044 100644 (file)
@@ -60,6 +60,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
        gt->gt_new_files_jdata = 0;
        gt->gt_max_readahead = BIT(18);
        gt->gt_complain_secs = 10;
+       gt->gt_withdraw_helper_timeout = 5;
 }
 
 void free_sbd(struct gfs2_sbd *sdp)
@@ -92,7 +93,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        init_waitqueue_head(&sdp->sd_async_glock_wait);
        atomic_set(&sdp->sd_glock_disposal, 0);
        init_completion(&sdp->sd_locking_init);
-       init_completion(&sdp->sd_wdack);
+       init_completion(&sdp->sd_withdraw_helper);
        spin_lock_init(&sdp->sd_statfs_spin);
 
        spin_lock_init(&sdp->sd_rindex_spin);
@@ -1395,12 +1396,14 @@ static const struct constant_table gfs2_param_data[] = {
 };
 
 enum opt_errors {
-       Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
-       Opt_errors_panic    = GFS2_ERRORS_PANIC,
+       Opt_errors_withdraw   = GFS2_ERRORS_WITHDRAW,
+       Opt_errors_deactivate = GFS2_ERRORS_DEACTIVATE,
+       Opt_errors_panic      = GFS2_ERRORS_PANIC,
 };
 
 static const struct constant_table gfs2_param_errors[] = {
        {"withdraw",   Opt_errors_withdraw },
+       {"deactivate", Opt_errors_deactivate },
        {"panic",      Opt_errors_panic },
        {}
 };
index 6472e92571a636b052625e3bd797e1f78011324e..0c398866dbb4380d83b13d7a837a6bae7368c3a6 100644 (file)
@@ -1145,6 +1145,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
                case GFS2_ERRORS_WITHDRAW:
                        state = "withdraw";
                        break;
+               case GFS2_ERRORS_DEACTIVATE:
+                       state = "deactivate";
+                       break;
                case GFS2_ERRORS_PANIC:
                        state = "panic";
                        break;
index db3bc4aee87565cf442a6ebf8d43b4079761abfd..7051db9dbea02f9ae8d5ab3ca389ef8908ef8b0b 100644 (file)
@@ -425,26 +425,20 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
        return len;
 }
 
-static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
-{
-       int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
-
-       return sprintf(buf, "%d\n", val);
-}
-
-static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+static ssize_t withdraw_helper_status_store(struct gfs2_sbd *sdp,
+                                           const char *buf,
+                                           size_t len)
 {
        int ret, val;
 
        ret = kstrtoint(buf, 0, &val);
        if (ret)
                return ret;
-
-       if ((val == 1) &&
-           !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
-               complete(&sdp->sd_wdack);
-       else
+       if (val < 0 || val > 1)
                return -EINVAL;
+
+       sdp->sd_withdraw_helper_status = val;
+       complete(&sdp->sd_withdraw_helper);
        return len;
 }
 
@@ -591,7 +585,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
 GDLM_ATTR(proto_name,          0444, proto_name_show,          NULL);
 GDLM_ATTR(block,               0644, block_show,               block_store);
-GDLM_ATTR(withdraw,            0644, wdack_show,               wdack_store);
+GDLM_ATTR(withdraw,            0200, NULL,                     withdraw_helper_status_store);
 GDLM_ATTR(jid,                 0644, jid_show,                 jid_store);
 GDLM_ATTR(first,               0644, lkfirst_show,             lkfirst_store);
 GDLM_ATTR(first_done,          0444, first_done_show,          NULL);
@@ -690,6 +684,7 @@ TUNE_ATTR(statfs_slow, 0);
 TUNE_ATTR(new_files_jdata, 0);
 TUNE_ATTR(statfs_quantum, 1);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
+TUNE_ATTR(withdraw_helper_timeout, 1);
 
 static struct attribute *tune_attrs[] = {
        &tune_attr_quota_warn_period.attr,
@@ -700,6 +695,7 @@ static struct attribute *tune_attrs[] = {
        &tune_attr_statfs_quantum.attr,
        &tune_attr_quota_scale.attr,
        &tune_attr_new_files_jdata.attr,
+       &tune_attr_withdraw_helper_timeout.attr,
        NULL,
 };
 
index 09fcfc04769b4cbd6a12b632226723b53efbda00..ff63070ed6dee5bd2149b17f5068683126b1c991 100644 (file)
@@ -171,32 +171,91 @@ void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
        va_end(args);
 }
 
+/**
+ * gfs2_offline_uevent - run gfs2_withdraw_helper
+ * @sdp: The GFS2 superblock
+ */
+static bool gfs2_offline_uevent(struct gfs2_sbd *sdp)
+{
+       struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+       long timeout;
+
+       /* Skip protocol "lock_nolock" which doesn't require shared storage. */
+       if (!ls->ls_ops->lm_lock)
+               return false;
+
+       /*
+        * The gfs2_withdraw_helper replies by writing one of the following
+        * status codes to "/sys$DEVPATH/lock_module/withdraw":
+        *
+        * 0 - The shared block device has been marked inactive.  Future write
+        *     operations will fail.
+        *
+        * 1 - The shared block device may still be active and carry out
+        *     write operations.
+        *
+        * If the "offline" uevent isn't reacted upon in time, the event
+        * handler is assumed to have failed.
+        */
+
+       sdp->sd_withdraw_helper_status = -1;
+       kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
+       timeout = gfs2_tune_get(sdp, gt_withdraw_helper_timeout) * HZ;
+       wait_for_completion_timeout(&sdp->sd_withdraw_helper, timeout);
+       if (sdp->sd_withdraw_helper_status == -1) {
+               fs_err(sdp, "%s timed out\n", "gfs2_withdraw_helper");
+       } else {
+               fs_err(sdp, "%s %s with status %d\n",
+                      "gfs2_withdraw_helper",
+                      sdp->sd_withdraw_helper_status == 0 ?
+                      "succeeded" : "failed",
+                      sdp->sd_withdraw_helper_status);
+       }
+       return sdp->sd_withdraw_helper_status == 0;
+}
+
 void gfs2_withdraw_func(struct work_struct *work)
 {
        struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work);
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
        const struct lm_lockops *lm = ls->ls_ops;
+       bool device_inactive;
 
        if (test_bit(SDF_KILL, &sdp->sd_flags))
                return;
 
        BUG_ON(sdp->sd_args.ar_debug);
 
-       do_withdraw(sdp);
+       /*
+        * Try to deactivate the shared block device so that no more I/O will
+        * go through.  If successful, we can immediately trigger remote
+        * recovery.  Otherwise, we must first empty out all our local caches.
+        */
 
-       kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
+       device_inactive = gfs2_offline_uevent(sdp);
 
-       if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
-               wait_for_completion(&sdp->sd_wdack);
+       if (sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE && !device_inactive)
+               panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
+
+       if (lm->lm_unmount) {
+               if (device_inactive) {
+                       lm->lm_unmount(sdp, false);
+                       do_withdraw(sdp);
+               } else {
+                       do_withdraw(sdp);
+                       lm->lm_unmount(sdp, false);
+               }
+       } else {
+               do_withdraw(sdp);
+       }
 
-       if (lm->lm_unmount)
-               lm->lm_unmount(sdp, false);
        fs_err(sdp, "file system withdrawn\n");
 }
 
 void gfs2_withdraw(struct gfs2_sbd *sdp)
 {
-       if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
+       if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW ||
+           sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE) {
                if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags))
                        return;