]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - mdmon.c
Improve partition table code.
[thirdparty/mdadm.git] / mdmon.c
diff --git a/mdmon.c b/mdmon.c
index 73c244af91bcbd92a657f250b8f8ba55b3af7b55..961aa77873e8404944da25c988e6ab17e11b9963 100644 (file)
--- a/mdmon.c
+++ b/mdmon.c
@@ -1,8 +1,8 @@
 /*
  * mdmon - monitor external metadata arrays
  *
- * Copyright (C) 2007-2008 Neil Brown <neilb@suse.de>
- * Copyright (C) 2007-2008 Intel Corporation
+ * Copyright (C) 2007-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2007-2009 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -113,20 +113,19 @@ static struct superswitch *find_metadata_methods(char *vers)
        return NULL;
 }
 
-
-int make_pidfile(char *devname, int o_excl)
+static int make_pidfile(char *devname)
 {
        char path[100];
        char pid[10];
        int fd;
        int n;
 
-       if (sigterm)
-               return -1;
-
-       sprintf(path, "/var/run/mdadm/%s.pid", devname);
+       if (mkdir(pid_dir, 0600) < 0 &&
+           errno != EEXIST)
+               return -errno;
+       sprintf(path, "%s/%s.pid", pid_dir, devname);
 
-       fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
+       fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600);
        if (fd < 0)
                return -errno;
        sprintf(pid, "%d\n", getpid());
@@ -149,26 +148,12 @@ int is_container_member(struct mdstat_ent *mdstat, char *container)
        return 1;
 }
 
-void remove_pidfile(char *devname);
-static void try_kill_monitor(char *devname)
+static void try_kill_monitor(pid_t pid, char *devname, int sock)
 {
        char buf[100];
        int fd;
-       pid_t pid;
-       struct mdstat_ent *mdstat;
-
-       sprintf(buf, "/var/run/mdadm/%s.pid", devname);
-       fd = open(buf, O_RDONLY);
-       if (fd < 0)
-               return;
-
-       if (read(fd, buf, sizeof(buf)) < 0) {
-               close(fd);
-               return;
-       }
-
-       close(fd);
-       pid = strtoul(buf, NULL, 10);
+       int n;
+       long fl;
 
        /* first rule of survival... don't off yourself */
        if (pid == getpid())
@@ -180,40 +165,40 @@ static void try_kill_monitor(char *devname)
        if (fd < 0)
                return;
 
-       if (read(fd, buf, sizeof(buf)) < 0) {
-               close(fd);
-               return;
-       }
+       n = read(fd, buf, sizeof(buf)-1);
+       buf[sizeof(buf)-1] = 0;
+       close(fd);
 
-       if (!strstr(buf, "mdmon"))
+       if (n < 0 || !strstr(buf, "mdmon"))
                return;
 
        kill(pid, SIGTERM);
 
-       mdstat = mdstat_read(0, 0);
-       for ( ; mdstat; mdstat = mdstat->next)
-               if (is_container_member(mdstat, devname)) {
-                       sprintf(buf, "/dev/%s", mdstat->dev);
-                       WaitClean(buf, 0);
-               }
-       free_mdstat(mdstat);
-       remove_pidfile(devname);
+       /* Wait for monitor to exit by reading from the socket, after
+        * clearing the non-blocking flag */
+       fl = fcntl(sock, F_GETFL, 0);
+       fl &= ~O_NONBLOCK;
+       fcntl(sock, F_SETFL, fl);
+       n = read(sock, buf, 100);
+       /* Ignore result, it is just the wait that
+        * matters 
+        */
 }
 
 void remove_pidfile(char *devname)
 {
        char buf[100];
 
-       if (sigterm)
-               return;
-
-       sprintf(buf, "/var/run/mdadm/%s.pid", devname);
+       sprintf(buf, "%s/%s.pid", pid_dir, devname);
        unlink(buf);
-       sprintf(buf, "/var/run/mdadm/%s.sock", devname);
+       sprintf(buf, "%s/%s.sock", pid_dir, devname);
        unlink(buf);
+       if (strcmp(pid_dir, ALT_RUN) == 0)
+               /* try to clean up when we are finished with this dir */
+               rmdir(pid_dir);
 }
 
-int make_control_sock(char *devname)
+static int make_control_sock(char *devname)
 {
        char path[100];
        int sfd;
@@ -223,7 +208,7 @@ int make_control_sock(char *devname)
        if (sigterm)
                return -1;
 
-       sprintf(path, "/var/run/mdadm/%s.sock", devname);
+       sprintf(path, "%s/%s.sock", pid_dir, devname);
        unlink(path);
        sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
        if (sfd < 0)
@@ -242,12 +227,6 @@ int make_control_sock(char *devname)
        return sfd;
 }
 
-int socket_hup_requested;
-static void hup(int sig)
-{
-       socket_hup_requested = 1;
-}
-
 static void term(int sig)
 {
        sigterm = 1;
@@ -271,71 +250,124 @@ static int do_fork(void)
 
 void usage(void)
 {
-       fprintf(stderr, "Usage: mdmon [--switch-root dir] /device/name/for/container\n");
+       fprintf(stderr, "Usage: mdmon [--all] [--takeover] CONTAINER\n");
        exit(2);
 }
 
+static int mdmon(char *devname, int devnum, int must_fork, int takeover);
+
 int main(int argc, char *argv[])
 {
-       int mdfd;
-       struct mdinfo *mdi, *di;
-       struct supertype *container;
-       sigset_t set;
-       struct sigaction act;
-       int pfd[2];
-       int status;
-       int ignore;
        char *container_name = NULL;
-       char *switchroot = NULL;
        int devnum;
        char *devname;
-
-       switch (argc) {
-       case 2:
-               container_name = argv[1];
-               break;
-       case 4:
-               if (strcmp(argv[1], "--switch-root") != 0) {
-                       fprintf(stderr, "mdmon: unknown argument %s\n", argv[1]);
+       int status = 0;
+       int arg;
+       int all = 0;
+       int takeover = 0;
+
+       for (arg = 1; arg < argc; arg++) {
+               if (strncmp(argv[arg], "--all",5) == 0 ||
+                   strcmp(argv[arg], "/proc/mdstat") == 0) {
+                       container_name = argv[arg];
+                       all = 1;
+               } else if (strcmp(argv[arg], "--takeover") == 0)
+                       takeover = 1;
+               else if (container_name == NULL)
+                       container_name = argv[arg];
+               else
                        usage();
-               }
-               switchroot = argv[2];
-               container_name = argv[3];
-               break;
-       default:
+       }
+       if (container_name == NULL)
                usage();
+
+       if (all) {
+               struct mdstat_ent *mdstat, *e;
+               int container_len = strlen(container_name);
+
+               /* launch an mdmon instance for each container found */
+               mdstat = mdstat_read(0, 0);
+               for (e = mdstat; e; e = e->next) {
+                       if (strncmp(e->metadata_version, "external:", 9) == 0 &&
+                           !is_subarray(&e->metadata_version[9])) {
+                               devname = devnum2devname(e->devnum);
+                               /* update cmdline so this mdmon instance can be
+                                * distinguished from others in a call to ps(1)
+                                */
+                               if (strlen(devname) <= container_len) {
+                                       memset(container_name, 0, container_len);
+                                       sprintf(container_name, "%s", devname);
+                               }
+                               status |= mdmon(devname, e->devnum, 1,
+                                               takeover);
+                       }
+               }
+               free_mdstat(mdstat);
+
+               return status;
+       } else if (strncmp(container_name, "md", 2) == 0) {
+               devnum = devname2devnum(container_name);
+               devname = devnum2devname(devnum);
+               if (strcmp(container_name, devname) != 0)
+                       devname = NULL;
+       } else {
+               struct stat st;
+
+               devnum = NoMdDev;
+               if (stat(container_name, &st) == 0)
+                       devnum = stat2devnum(&st);
+               if (devnum == NoMdDev)
+                       devname = NULL;
+               else
+                       devname = devnum2devname(devnum);
        }
 
-       devnum = devname2devnum(container_name);
-       devname = devnum2devname(devnum);
-       if (strcmp(container_name, devname) != 0) {
+       if (!devname) {
                fprintf(stderr, "mdmon: %s is not a valid md device name\n",
                        container_name);
                exit(1);
        }
+       return mdmon(devname, devnum, do_fork(), takeover);
+}
+
+static int mdmon(char *devname, int devnum, int must_fork, int takeover)
+{
+       int mdfd;
+       struct mdinfo *mdi, *di;
+       struct supertype *container;
+       sigset_t set;
+       struct sigaction act;
+       int pfd[2];
+       int status;
+       int ignore;
+       pid_t victim = -1;
+       int victim_sock = -1;
+
+       dprintf("starting mdmon for %s\n", devname);
+
        mdfd = open_dev(devnum);
        if (mdfd < 0) {
-               fprintf(stderr, "mdmon: %s: %s\n", container_name,
+               fprintf(stderr, "mdmon: %s: %s\n", devname,
                        strerror(errno));
-               exit(1);
+               return 1;
        }
        if (md_get_version(mdfd) < 0) {
                fprintf(stderr, "mdmon: %s: Not an md device\n",
-                       container_name);
-               exit(1);
+                       devname);
+               return 1;
        }
 
        /* Fork, and have the child tell us when they are ready */
-       if (do_fork()) {
+       if (must_fork) {
                if (pipe(pfd) != 0) {
                        fprintf(stderr, "mdmon: failed to create pipe\n");
-                       exit(1);
+                       return 1;
                }
                switch(fork()) {
                case -1:
                        fprintf(stderr, "mdmon: failed to fork: %s\n",
                                strerror(errno));
-                       exit(1);
+                       return 1;
                case 0: /* child */
                        close(pfd[0]);
                        break;
@@ -345,15 +377,17 @@ int main(int argc, char *argv[])
                                wait(&status);
                                status = WEXITSTATUS(status);
                        }
-                       exit(status);
+                       return status;
                }
        } else
                pfd[0] = pfd[1] = -1;
 
-       container = malloc(sizeof(*container));
+       container = calloc(1, sizeof(*container));
        container->devnum = devnum;
        container->devname = devname;
        container->arrays = NULL;
+       container->subarray[0] = 0;
+       container->sock = -1;
 
        if (!container->devname) {
                fprintf(stderr, "mdmon: failed to allocate container name string\n");
@@ -361,7 +395,7 @@ int main(int argc, char *argv[])
        }
 
        mdi = sysfs_read(mdfd, container->devnum,
-                        GET_VERSION|GET_LEVEL|GET_DEVS);
+                        GET_VERSION|GET_LEVEL|GET_DEVS|SKIP_GONE_DEVS);
 
        if (!mdi) {
                fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
@@ -370,20 +404,20 @@ int main(int argc, char *argv[])
        }
        if (mdi->array.level != UnSet) {
                fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
-                       container_name);
+                       devname);
                exit(3);
        }
        if (mdi->array.major_version != -1 ||
            mdi->array.minor_version != -2) {
                fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
-                       container_name);
+                       devname);
                exit(3);
        }
 
        container->ss = find_metadata_methods(mdi->text_version);
        if (container->ss == NULL) {
                fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
-                       container_name, mdi->text_version);
+                       devname, mdi->text_version);
                exit(3);
        }
 
@@ -401,85 +435,77 @@ int main(int argc, char *argv[])
         */
        sigemptyset(&set);
        sigaddset(&set, SIGUSR1);
-       sigaddset(&set, SIGHUP);
-       sigaddset(&set, SIGALRM);
        sigaddset(&set, SIGTERM);
        sigprocmask(SIG_BLOCK, &set, NULL);
        act.sa_handler = wake_me;
        act.sa_flags = 0;
        sigaction(SIGUSR1, &act, NULL);
-       sigaction(SIGALRM, &act, NULL);
-       act.sa_handler = hup;
-       sigaction(SIGHUP, &act, NULL);
        act.sa_handler = term;
        sigaction(SIGTERM, &act, NULL);
        act.sa_handler = SIG_IGN;
        sigaction(SIGPIPE, &act, NULL);
 
-       if (switchroot) {
-               /* we assume we assume that /sys /proc /dev are available in
-                * the new root (see nash:setuproot)
-                *
-                * kill any monitors in the current namespace and change
-                * to the new one
-                */
-               try_kill_monitor(container->devname);
-               if (chroot(switchroot) != 0) {
-                       fprintf(stderr, "mdmon: failed to chroot to '%s': %s\n",
-                               switchroot, strerror(errno));
-                       exit(4);
-               }
+       pid_dir = VAR_RUN;
+       victim = mdmon_pid(container->devnum);
+       if (victim < 0) {
+               pid_dir = ALT_RUN;
+               victim = mdmon_pid(container->devnum);
        }
+       if (victim >= 0)
+               victim_sock = connect_monitor(container->devname);
 
-       /* If this fails, we hope it already exists 
-        * pid file lives in /var/run/mdadm/mdXX.pid
-        */
-       mkdir("/var", 0600);
-       mkdir("/var/run", 0600);
-       mkdir("/var/run/mdadm", 0600);
        ignore = chdir("/");
-       if (make_pidfile(container->devname, O_EXCL) < 0) {
-               if (ping_monitor(container->devname) == 0) {
+       if (!takeover && victim > 0 && victim_sock >= 0) {
+               if (fping_monitor(victim_sock) == 0) {
                        fprintf(stderr, "mdmon: %s already managed\n",
                                container->devname);
                        exit(3);
-               } else {
-                       int err;
-
-                       /* cleanup the old monitor, this one is taking over */
-                       try_kill_monitor(container->devname);
-                       err = make_pidfile(container->devname, 0);
-                       if (err < 0) {
-                               fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
-                                       container->devname);
-                               if (err == -EROFS) {
-                                       /* FIXME implement a mechanism to
-                                        * prevent duplicate monitor instances
-                                        */
-                                       fprintf(stderr,
-                                               "mdmon: continuing on read-only file system\n");
-                               } else
-                                       exit(3);
-                       }
                }
+               close(victim_sock);
        }
-       container->sock = make_control_sock(container->devname);
-
-       if (container->ss->load_super(container, mdfd, container_name)) {
+       if (container->ss->load_super(container, mdfd, devname)) {
                fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
-                       container_name);
+                       devname);
                exit(3);
        }
        close(mdfd);
 
        /* Ok, this is close enough.  We can say goodbye to our parent now.
         */
+       if (victim > 0)
+               remove_pidfile(devname);
+       pid_dir = VAR_RUN;
+       if (make_pidfile(devname) < 0) {
+               /* Try the alternate */
+               pid_dir = ALT_RUN;
+               if (make_pidfile(devname) < 0) {
+                       fprintf(stderr, "mdmon: Neither %s nor %s are writable\n"
+                               "       cannot create .pid or .sock files.  Aborting\n",
+                               VAR_RUN, ALT_RUN);
+                       exit(3);
+               }
+       }
+       container->sock = make_control_sock(devname);
+
        status = 0;
        if (write(pfd[1], &status, sizeof(status)) < 0)
                fprintf(stderr, "mdmon: failed to notify our parent: %d\n",
                        getppid());
        close(pfd[1]);
 
+       mlockall(MCL_CURRENT | MCL_FUTURE);
+
+       if (clone_monitor(container) < 0) {
+               fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
+                       strerror(errno));
+               exit(2);
+       }
+
+       if (victim > 0) {
+               try_kill_monitor(victim, container->devname, victim_sock);
+               close(victim_sock);
+       }
+
        setsid();
        close(0);
        open("/dev/null", O_RDWR);
@@ -490,14 +516,6 @@ int main(int argc, char *argv[])
        ignore = dup(0);
 #endif
 
-       mlockall(MCL_FUTURE);
-
-       if (clone_monitor(container) < 0) {
-               fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
-                       strerror(errno));
-               exit(2);
-       }
-
        do_manager(container);
 
        exit(0);