]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
mdadm: Do not start reshape before switchroot
authorMateusz Kusiak <mateusz.kusiak@intel.com>
Thu, 10 Oct 2024 10:31:06 +0000 (10:31 +0000)
committerMariusz Tkaczyk <mtkaczyk@kernel.org>
Mon, 13 Jan 2025 09:24:49 +0000 (10:24 +0100)
There are numerous issues for --grow --continue in switchroot phrase,
they include:
* Events being missed for restarting grow-continue service. This is
  apparent mostly on OS on RAID scenarios. When a checkpoint (next step)
  is committed, we have no reliable way to gracefully stop reshape until
  it reaches that checkpoint. During boot, there's heavy I/O utilisation,
  which causes sync speed drop, and naturally checkpoint takes longer to
  reach. This further causes systemd to forcefully kill grow-continue
  service due to timeouts, which results in udev event being missed for
  grow-continue service restart.
* Grow-continue (seemingly) was not designed to be restarted without
  reassembly, some things like stopping chunksize (to lower) migration
  were straight up not working until recently.
This patch makes grow-continue (actual reshape) start after switchroot
phrase. This way we should not encounter issues related to restarting
the service.

Add checks not start a reshape if in initrd, let it initialise only.
Change grow-continue udev rule to be triggered whenever there's a
reshape happening in metadata, rely on udev event to kick reshape after
switchroot. Add handle_forking helper function for reshapes to avoid
duplicating code.

Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
Grow.c
mdadm_status.h
udev-md-raid-arrays.rules
util.c

diff --git a/Grow.c b/Grow.c
index 0d9e3b537dd5c260a665c8170e032dabc1057b9d..2719346c58a3ee100aacebfe66a3856e66ed52ab 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -2995,6 +2995,34 @@ static void catch_term(int sig)
        sigterm = 1;
 }
 
+
+/**
+ * handle_forking() - Handle reshape forking.
+ *
+ * @forked: if already forked.
+ * @devname: device name.
+ * Returns: -1 if fork() failed,
+ *           0 if child process,
+ *           1 if job delegated to forked process or systemd.
+ *
+ * This function is a helper function for reshapes for fork handling.
+ */
+static mdadm_status_t handle_forking(bool forked, char *devname)
+{
+       if (forked)
+               return MDADM_STATUS_FORKED;
+
+       if (devname && continue_via_systemd(devname, GROW_SERVICE, NULL))
+               return MDADM_STATUS_SUCCESS;
+
+       switch (fork()) {
+       case -1: return MDADM_STATUS_ERROR; /* error */
+       case 0: return MDADM_STATUS_FORKED; /* child */
+       default: return MDADM_STATUS_SUCCESS; /* parent */
+       }
+
+}
+
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
                         int force, struct mddev_dev *devlist,
@@ -3485,33 +3513,35 @@ started:
        if (restart)
                sysfs_set_str(sra, NULL, "array_state", "active");
 
-       if (!forked)
-               if (continue_via_systemd(container ?: sra->sys_name,
-                                        GROW_SERVICE, NULL)) {
-                       free(fdlist);
-                       free(offsets);
-                       sysfs_free(sra);
-                       return 0;
-               }
+       /* Do not run in initrd */
+       if (in_initrd()) {
+               free(fdlist);
+               free(offsets);
+               sysfs_free(sra);
+               pr_info("Reshape has to be continued from location %llu when root filesystem has been mounted.\n",
+                       sra->reshape_progress);
+               return 1;
+       }
 
        /* Now we just need to kick off the reshape and watch, while
         * handling backups of the data...
         * This is all done by a forked background process.
         */
-       switch(forked ? 0 : fork()) {
-       case -1:
+       switch (handle_forking(forked, container ? container : sra->sys_name)) {
+       default: /* Unused, only to satisfy compiler. */
+       case MDADM_STATUS_ERROR: /* error */
                pr_err("Cannot run child to monitor reshape: %s\n",
                        strerror(errno));
                abort_reshape(sra);
                goto release;
-       default:
+       case MDADM_STATUS_FORKED: /* child */
+               map_fork();
+               break;
+       case MDADM_STATUS_SUCCESS: /* parent */
                free(fdlist);
                free(offsets);
                sysfs_free(sra);
                return 0;
-       case 0:
-               map_fork();
-               break;
        }
 
        /* Close unused file descriptor in the forked process */
@@ -3680,22 +3710,19 @@ int reshape_container(char *container, char *devname,
         */
        ping_monitor(container);
 
-       if (!forked)
-               if (continue_via_systemd(container, GROW_SERVICE, NULL))
-                       return 0;
-
-       switch (forked ? 0 : fork()) {
-       case -1: /* error */
+       switch (handle_forking(forked, container)) {
+       default: /* Unused, only to satisfy compiler. */
+       case MDADM_STATUS_ERROR: /* error */
                perror("Cannot fork to complete reshape\n");
                unfreeze(st);
                return 1;
-       default: /* parent */
-               printf("%s: multi-array reshape continues in background\n", Name);
-               return 0;
-       case 0: /* child */
+       case MDADM_STATUS_FORKED: /* child */
                manage_fork_fds(0);
                map_fork();
                break;
+       case MDADM_STATUS_SUCCESS: /* parent */
+               printf("%s: multi-array reshape continues in background\n", Name);
+               return 0;
        }
 
        /* close unused handle in child process
@@ -3791,6 +3818,12 @@ int reshape_container(char *container, char *devname,
                                   c->backup_file, c->verbose, 1, restart);
                close(fd);
 
+               /* Do not run reshape in initrd but let it initialize.*/
+               if (in_initrd()) {
+                       sysfs_free(cc);
+                       exit(0);
+               }
+
                restart = 0;
                if (rv)
                        break;
index 905105e2469a42bc1b2b3961c381502ec91e4a88..e244127cdf089292ddb98841b95b7560cde320aa 100644 (file)
@@ -7,7 +7,8 @@ typedef enum mdadm_status {
        MDADM_STATUS_SUCCESS = 0,
        MDADM_STATUS_ERROR,
        MDADM_STATUS_UNDEF,
-       MDADM_STATUS_MEM_FAIL
+       MDADM_STATUS_MEM_FAIL,
+       MDADM_STATUS_FORKED
 } mdadm_status_t;
 
 #endif
index 4e64b249b2db561f446618cacaaa7e1ac2e494cd..d8de6d00cff650711b904ef3157b965f335c0111 100644 (file)
@@ -15,7 +15,6 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state"
 ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state"
 TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
 ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
-ATTR{md/sync_action}=="reshape", ENV{RESHAPE_ACTIVE}="yes"
 LABEL="md_ignore_state"
 
 IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
@@ -40,6 +39,6 @@ ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service"
 ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/usr/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c"
 ENV{MD_MON_THIS}=="?*", TEST=="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@initrd-%c.service"
 ENV{MD_MON_THIS}=="?*", TEST!="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service"
-ENV{RESHAPE_ACTIVE}=="yes", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service"
+ENV{MD_RESHAPE_ACTIVE}=="True", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service"
 
 LABEL="md_end"
diff --git a/util.c b/util.c
index 6aa44a803a1fe5e70e83b34bd2663d6fd07159d3..8099852f611c2d03d23bdb7274b21e08be084933 100644 (file)
--- a/util.c
+++ b/util.c
@@ -2307,6 +2307,7 @@ int continue_via_systemd(char *devnm, char *service_name, char *prefix)
        int pid, status;
        char pathbuf[1024];
 
+       dprintf("Start %s service\n", service_name);
        /* Simply return that service cannot be started */
        if (check_env("MDADM_NO_SYSTEMCTL"))
                return 0;