]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - mdmon.c
mdmon: pass symbolic name to mdmon instead of device name.
[thirdparty/mdadm.git] / mdmon.c
diff --git a/mdmon.c b/mdmon.c
index 407f637add3b400c5b72c401a3da00063df38dbf..73c244af91bcbd92a657f250b8f8ba55b3af7b55 100644 (file)
--- a/mdmon.c
+++ b/mdmon.c
@@ -1,3 +1,22 @@
+/*
+ * mdmon - monitor external metadata arrays
+ *
+ * Copyright (C) 2007-2008 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2007-2008 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
 
 /*
  * md array manager.
 #include       <sys/un.h>
 #include       <sys/mman.h>
 #include       <sys/syscall.h>
+#include       <sys/wait.h>
 #include       <stdio.h>
 #include       <errno.h>
 #include       <string.h>
 #include       <fcntl.h>
 #include       <signal.h>
+#include       <dirent.h>
 
 #include       <sched.h>
 
@@ -48,28 +69,39 @@ struct active_array *pending_discard;
 
 int mon_tid, mgr_tid;
 
+int sigterm;
+
 int run_child(void *v)
 {
        struct supertype *c = v;
 
-       mon_tid = syscall(SYS_gettid);
        do_monitor(c);
        return 0;
 }
 
-int clone_monitor(struct supertype *container)
+#ifdef __ia64__
+int __clone2(int (*fn)(void *),
+           void *child_stack_base, size_t stack_size,
+           int flags, void *arg, ...
+        /* pid_t *pid, struct user_desc *tls, pid_t *ctid */ );
+#endif
+ int clone_monitor(struct supertype *container)
 {
        static char stack[4096];
-       int rv;
-
 
-       rv = clone(run_child, stack+4096-64,
+#ifdef __ia64__
+       mon_tid = __clone2(run_child, stack, sizeof(stack),
+                  CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
+                  container);
+#else
+       mon_tid = clone(run_child, stack+4096-64,
                   CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
                   container);
+#endif
 
        mgr_tid = syscall(SYS_gettid);
-       
-       return rv;
+
+       return mon_tid;
 }
 
 static struct superswitch *find_metadata_methods(char *vers)
@@ -82,27 +114,48 @@ static struct superswitch *find_metadata_methods(char *vers)
 }
 
 
-static int make_pidfile(char *devname, int o_excl)
+int make_pidfile(char *devname, int o_excl)
 {
        char path[100];
        char pid[10];
        int fd;
+       int n;
+
+       if (sigterm)
+               return -1;
+
        sprintf(path, "/var/run/mdadm/%s.pid", devname);
 
        fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
        if (fd < 0)
-               return -1;
+               return -errno;
        sprintf(pid, "%d\n", getpid());
-       write(fd, pid, strlen(pid));
+       n = write(fd, pid, strlen(pid));
        close(fd);
+       if (n < 0)
+               return -errno;
        return 0;
 }
 
+int is_container_member(struct mdstat_ent *mdstat, char *container)
+{
+       if (mdstat->metadata_version == NULL ||
+           strncmp(mdstat->metadata_version, "external:", 9) != 0 ||
+           !is_subarray(mdstat->metadata_version+9) ||
+           strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 ||
+           mdstat->metadata_version[10+strlen(container)] != '/')
+               return 0;
+
+       return 1;
+}
+
+void remove_pidfile(char *devname);
 static void try_kill_monitor(char *devname)
 {
        char buf[100];
        int fd;
        pid_t pid;
+       struct mdstat_ent *mdstat;
 
        sprintf(buf, "/var/run/mdadm/%s.pid", devname);
        fd = open(buf, O_RDONLY);
@@ -117,6 +170,10 @@ static void try_kill_monitor(char *devname)
        close(fd);
        pid = strtoul(buf, NULL, 10);
 
+       /* first rule of survival... don't off yourself */
+       if (pid == getpid())
+               return;
+
        /* kill this process if it is mdmon */
        sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
        fd = open(buf, O_RDONLY);
@@ -128,25 +185,44 @@ static void try_kill_monitor(char *devname)
                return;
        }
 
-       if (strstr(buf, "mdmon") != NULL)
-               kill(pid, SIGTERM);
+       if (!strstr(buf, "mdmon"))
+               return;
+
+       kill(pid, SIGTERM);
+
+       mdstat = mdstat_read(0, 0);
+       for ( ; mdstat; mdstat = mdstat->next)
+               if (is_container_member(mdstat, devname)) {
+                       sprintf(buf, "/dev/%s", mdstat->dev);
+                       WaitClean(buf, 0);
+               }
+       free_mdstat(mdstat);
+       remove_pidfile(devname);
 }
 
 void remove_pidfile(char *devname)
 {
        char buf[100];
 
+       if (sigterm)
+               return;
+
        sprintf(buf, "/var/run/mdadm/%s.pid", devname);
        unlink(buf);
+       sprintf(buf, "/var/run/mdadm/%s.sock", devname);
+       unlink(buf);
 }
 
-static int make_control_sock(char *devname)
+int make_control_sock(char *devname)
 {
        char path[100];
        int sfd;
        long fl;
        struct sockaddr_un addr;
 
+       if (sigterm)
+               return -1;
+
        sprintf(path, "/var/run/mdadm/%s.sock", devname);
        unlink(path);
        sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
@@ -166,11 +242,39 @@ static int make_control_sock(char *devname)
        return sfd;
 }
 
+int socket_hup_requested;
+static void hup(int sig)
+{
+       socket_hup_requested = 1;
+}
+
+static void term(int sig)
+{
+       sigterm = 1;
+}
+
 static void wake_me(int sig)
 {
 
 }
 
+/* if we are debugging and starting mdmon by hand then don't fork */
+static int do_fork(void)
+{
+       #ifdef DEBUG
+       if (check_env("MDADM_NO_MDMON"))
+               return 0;
+       #endif
+
+       return 1;
+}
+
+void usage(void)
+{
+       fprintf(stderr, "Usage: mdmon [--switch-root dir] /device/name/for/container\n");
+       exit(2);
+}
+
 int main(int argc, char *argv[])
 {
        int mdfd;
@@ -178,55 +282,83 @@ int main(int argc, char *argv[])
        struct supertype *container;
        sigset_t set;
        struct sigaction act;
+       int pfd[2];
+       int status;
+       int ignore;
+       char *container_name = NULL;
+       char *switchroot = NULL;
+       int devnum;
+       char *devname;
+
+       switch (argc) {
+       case 2:
+               container_name = argv[1];
+               break;
+       case 4:
+               if (strcmp(argv[1], "--switch-root") != 0) {
+                       fprintf(stderr, "mdmon: unknown argument %s\n", argv[1]);
+                       usage();
+               }
+               switchroot = argv[2];
+               container_name = argv[3];
+               break;
+       default:
+               usage();
+       }
 
-       if (argc != 2) {
-               fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
-               exit(2);
+       devnum = devname2devnum(container_name);
+       devname = devnum2devname(devnum);
+       if (strcmp(container_name, devname) != 0) {
+               fprintf(stderr, "mdmon: %s is not a valid md device name\n",
+                       container_name);
+               exit(1);
        }
-       mdfd = open(argv[1], O_RDWR);
+       mdfd = open_dev(devnum);
        if (mdfd < 0) {
-               fprintf(stderr, "md-manage: %s: %s\n", argv[1],
+               fprintf(stderr, "mdmon: %s: %s\n", container_name,
                        strerror(errno));
                exit(1);
        }
        if (md_get_version(mdfd) < 0) {
-               fprintf(stderr, "md-manage: %s: Not an md device\n",
-                       argv[1]);
+               fprintf(stderr, "mdmon: %s: Not an md device\n",
+                       container_name);
                exit(1);
        }
 
-       /* hopefully it is a container - we'll check later */
-
-       container = malloc(sizeof(*container));
-       container->devnum = fd2devnum(mdfd);
-       container->devname = devnum2devname(container->devnum);
-       container->device_name = argv[1];
-
-       /* If this fails, we hope it already exists */
-       mkdir("/var/run/mdadm", 0600);
-       /* pid file lives in /var/run/mdadm/mdXX.pid */
-       if (make_pidfile(container->devname, O_EXCL) < 0) {
-               if (ping_monitor(container->devname) == 0) {
-                       fprintf(stderr, "mdmon: %s already managed\n",
-                               container->devname);
-                       exit(3);
-               } else {
-                       /* cleanup the old monitor, this one is taking over */
-                       try_kill_monitor(container->devname);
-                       if (make_pidfile(container->devname, 0) < 0) {
-                               fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
-                                       container->devname);
-                               exit(3);
+       /* Fork, and have the child tell us when they are ready */
+       if (do_fork()) {
+               if (pipe(pfd) != 0) {
+                       fprintf(stderr, "mdmon: failed to create pipe\n");
+                       exit(1);
+               }
+               switch(fork()) {
+               case -1:
+                       fprintf(stderr, "mdmon: failed to fork: %s\n",
+                               strerror(errno));
+                       exit(1);
+               case 0: /* child */
+                       close(pfd[0]);
+                       break;
+               default: /* parent */
+                       close(pfd[1]);
+                       if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
+                               wait(&status);
+                               status = WEXITSTATUS(status);
                        }
+                       exit(status);
                }
-       }
+       } else
+               pfd[0] = pfd[1] = -1;
 
-       container->sock = make_control_sock(container->devname);
-       if (container->sock < 0) {
-               fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
+       container = malloc(sizeof(*container));
+       container->devnum = devnum;
+       container->devname = devname;
+       container->arrays = NULL;
+
+       if (!container->devname) {
+               fprintf(stderr, "mdmon: failed to allocate container name string\n");
                exit(3);
        }
-       container->arrays = NULL;
 
        mdi = sysfs_read(mdfd, container->devnum,
                         GET_VERSION|GET_LEVEL|GET_DEVS);
@@ -238,57 +370,130 @@ int main(int argc, char *argv[])
        }
        if (mdi->array.level != UnSet) {
                fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
-                       argv[1]);
+                       container_name);
                exit(3);
        }
        if (mdi->array.major_version != -1 ||
            mdi->array.minor_version != -2) {
                fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
-                       argv[1]);
+                       container_name);
                exit(3);
        }
 
        container->ss = find_metadata_methods(mdi->text_version);
        if (container->ss == NULL) {
                fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
-                       argv[1], mdi->text_version);
+                       container_name, mdi->text_version);
                exit(3);
        }
 
        container->devs = NULL;
        for (di = mdi->devs; di; di = di->next) {
                struct mdinfo *cd = malloc(sizeof(*cd));
-               cd = di;
+               *cd = *di;
                cd->next = container->devs;
                container->devs = cd;
        }
        sysfs_free(mdi);
 
-
-       if (container->ss->load_super(container, mdfd, argv[1])) {
-               fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
-                       argv[1]);
-               exit(3);
-       }
-       close(mdfd);
-       close(mdfd);
-
-       mlockall(MCL_FUTURE);
-
        /* SIGUSR is sent between parent and child.  So both block it
         * and enable it only with pselect.
         */
        sigemptyset(&set);
        sigaddset(&set, SIGUSR1);
+       sigaddset(&set, SIGHUP);
+       sigaddset(&set, SIGALRM);
+       sigaddset(&set, SIGTERM);
        sigprocmask(SIG_BLOCK, &set, NULL);
        act.sa_handler = wake_me;
        act.sa_flags = 0;
        sigaction(SIGUSR1, &act, NULL);
+       sigaction(SIGALRM, &act, NULL);
+       act.sa_handler = hup;
+       sigaction(SIGHUP, &act, NULL);
+       act.sa_handler = term;
+       sigaction(SIGTERM, &act, NULL);
        act.sa_handler = SIG_IGN;
        sigaction(SIGPIPE, &act, NULL);
 
+       if (switchroot) {
+               /* we assume we assume that /sys /proc /dev are available in
+                * the new root (see nash:setuproot)
+                *
+                * kill any monitors in the current namespace and change
+                * to the new one
+                */
+               try_kill_monitor(container->devname);
+               if (chroot(switchroot) != 0) {
+                       fprintf(stderr, "mdmon: failed to chroot to '%s': %s\n",
+                               switchroot, strerror(errno));
+                       exit(4);
+               }
+       }
+
+       /* If this fails, we hope it already exists 
+        * pid file lives in /var/run/mdadm/mdXX.pid
+        */
+       mkdir("/var", 0600);
+       mkdir("/var/run", 0600);
+       mkdir("/var/run/mdadm", 0600);
+       ignore = chdir("/");
+       if (make_pidfile(container->devname, O_EXCL) < 0) {
+               if (ping_monitor(container->devname) == 0) {
+                       fprintf(stderr, "mdmon: %s already managed\n",
+                               container->devname);
+                       exit(3);
+               } else {
+                       int err;
+
+                       /* cleanup the old monitor, this one is taking over */
+                       try_kill_monitor(container->devname);
+                       err = make_pidfile(container->devname, 0);
+                       if (err < 0) {
+                               fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
+                                       container->devname);
+                               if (err == -EROFS) {
+                                       /* FIXME implement a mechanism to
+                                        * prevent duplicate monitor instances
+                                        */
+                                       fprintf(stderr,
+                                               "mdmon: continuing on read-only file system\n");
+                               } else
+                                       exit(3);
+                       }
+               }
+       }
+       container->sock = make_control_sock(container->devname);
+
+       if (container->ss->load_super(container, mdfd, container_name)) {
+               fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
+                       container_name);
+               exit(3);
+       }
+       close(mdfd);
+
+       /* Ok, this is close enough.  We can say goodbye to our parent now.
+        */
+       status = 0;
+       if (write(pfd[1], &status, sizeof(status)) < 0)
+               fprintf(stderr, "mdmon: failed to notify our parent: %d\n",
+                       getppid());
+       close(pfd[1]);
+
+       setsid();
+       close(0);
+       open("/dev/null", O_RDWR);
+       close(1);
+       ignore = dup(0);
+#ifndef DEBUG
+       close(2);
+       ignore = dup(0);
+#endif
+
+       mlockall(MCL_FUTURE);
+
        if (clone_monitor(container) < 0) {
-               fprintf(stderr, "md-manage: failed to start monitor process: %s\n",
+               fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
                        strerror(errno));
                exit(2);
        }