X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=mdmon.c;h=cb4173a020dc51549478fdf1b0e1899b2ad7c2cf;hp=d35c05b2d7a9ac779610b3cc932627d318ccdec2;hb=98dbd96605fe7f995934ddc778f06e7b21dbcef4;hpb=9fe32043178f221526b6d59f3bbce58f777089da diff --git a/mdmon.c b/mdmon.c index d35c05b2..cb4173a0 100644 --- a/mdmon.c +++ b/mdmon.c @@ -1,3 +1,22 @@ +/* + * mdmon - monitor external metadata arrays + * + * Copyright (C) 2007-2008 Neil Brown + * Copyright (C) 2007-2008 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ /* * md array manager. @@ -38,6 +57,7 @@ #include #include #include +#include #include @@ -49,6 +69,8 @@ struct active_array *pending_discard; int mon_tid, mgr_tid; +int sigterm; + int run_child(void *v) { struct supertype *c = v; @@ -70,37 +92,49 @@ int clone_monitor(struct supertype *container) return mon_tid; } -static struct superswitch *find_metadata_methods(char *vers) -{ - if (strcmp(vers, "ddf") == 0) - return &super_ddf; - if (strcmp(vers, "imsm") == 0) - return &super_imsm; - return NULL; -} - -static int make_pidfile(char *devname, int o_excl) +int make_pidfile(char *devname, int o_excl) { char path[100]; char pid[10]; int fd; + int n; + + if (sigterm) + return -1; + sprintf(path, "/var/run/mdadm/%s.pid", devname); fd = open(path, O_RDWR|O_CREAT|o_excl, 0600); if (fd < 0) - return -1; + return -errno; sprintf(pid, "%d\n", getpid()); - write(fd, pid, strlen(pid)); + n = write(fd, pid, strlen(pid)); close(fd); + if (n < 0) + return -errno; return 0; } +int is_container_member(struct mdstat_ent *mdstat, char *container) +{ + if (mdstat->metadata_version == NULL || + strncmp(mdstat->metadata_version, "external:", 9) != 0 || + !is_subarray(mdstat->metadata_version+9) || + strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 || + mdstat->metadata_version[10+strlen(container)] != '/') + return 0; + + return 1; +} + +void remove_pidfile(char *devname); static void try_kill_monitor(char *devname) { char buf[100]; int fd; pid_t pid; + struct mdstat_ent *mdstat; sprintf(buf, "/var/run/mdadm/%s.pid", devname); fd = open(buf, O_RDONLY); @@ -115,6 +149,10 @@ static void try_kill_monitor(char *devname) close(fd); pid = strtoul(buf, NULL, 10); + /* first rule of survival... don't off yourself */ + if (pid == getpid()) + return; + /* kill this process if it is mdmon */ sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid); fd = open(buf, O_RDONLY); @@ -126,25 +164,44 @@ static void try_kill_monitor(char *devname) return; } - if (strstr(buf, "mdmon") != NULL) - kill(pid, SIGTERM); + if (!strstr(buf, "mdmon")) + return; + + kill(pid, SIGTERM); + + mdstat = mdstat_read(0, 0); + for ( ; mdstat; mdstat = mdstat->next) + if (is_container_member(mdstat, devname)) { + sprintf(buf, "/dev/%s", mdstat->dev); + WaitClean(buf, 0); + } + free_mdstat(mdstat); + remove_pidfile(devname); } void remove_pidfile(char *devname) { char buf[100]; + if (sigterm) + return; + sprintf(buf, "/var/run/mdadm/%s.pid", devname); unlink(buf); + sprintf(buf, "/var/run/mdadm/%s.sock", devname); + unlink(buf); } -static int make_control_sock(char *devname) +int make_control_sock(char *devname) { char path[100]; int sfd; long fl; struct sockaddr_un addr; + if (sigterm) + return -1; + sprintf(path, "/var/run/mdadm/%s.sock", devname); unlink(path); sfd = socket(PF_LOCAL, SOCK_STREAM, 0); @@ -164,11 +221,39 @@ static int make_control_sock(char *devname) return sfd; } +int socket_hup_requested; +static void hup(int sig) +{ + socket_hup_requested = 1; +} + +static void term(int sig) +{ + sigterm = 1; +} + static void wake_me(int sig) { } +/* if we are debugging and starting mdmon by hand then don't fork */ +static int do_fork(void) +{ + #ifdef DEBUG + if (check_env("MDADM_NO_MDMON")) + return 0; + #endif + + return 1; +} + +void usage(void) +{ + fprintf(stderr, "Usage: mdmon [--switch-root dir] /device/name/for/container\n"); + exit(2); +} + int main(int argc, char *argv[]) { int mdfd; @@ -178,73 +263,73 @@ int main(int argc, char *argv[]) struct sigaction act; int pfd[2]; int status; + int ignore; + char *container_name = NULL; + char *switchroot = NULL; - if (argc != 2) { - fprintf(stderr, "Usage: md-manage /device/name/for/container\n"); - exit(2); + switch (argc) { + case 2: + container_name = argv[1]; + break; + case 4: + if (strcmp(argv[1], "--switch-root") != 0) { + fprintf(stderr, "mdmon: unknown argument %s\n", argv[1]); + usage(); + } + switchroot = argv[2]; + container_name = argv[3]; + break; + default: + usage(); } - mdfd = open(argv[1], O_RDWR); + + mdfd = open(container_name, O_RDWR); if (mdfd < 0) { - fprintf(stderr, "md-manage: %s: %s\n", argv[1], + fprintf(stderr, "mdmon: %s: %s\n", container_name, strerror(errno)); exit(1); } if (md_get_version(mdfd) < 0) { - fprintf(stderr, "md-manage: %s: Not an md device\n", - argv[1]); + fprintf(stderr, "mdmon: %s: Not an md device\n", + container_name); exit(1); } /* Fork, and have the child tell us when they are ready */ - pipe(pfd); - switch(fork()){ - case -1: - fprintf(stderr, "mdmon: failed to fork: %s\n", - strerror(errno)); - exit(1); - case 0: /* child */ - close(pfd[0]); - break; - default: /* parent */ - close(pfd[1]); - if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) { - wait(&status); - status = WEXITSTATUS(status); + if (do_fork()) { + if (pipe(pfd) != 0) { + fprintf(stderr, "mdmon: failed to create pipe\n"); + exit(1); } - exit(status); - } - /* hopefully it is a container - we'll check later */ + switch(fork()) { + case -1: + fprintf(stderr, "mdmon: failed to fork: %s\n", + strerror(errno)); + exit(1); + case 0: /* child */ + close(pfd[0]); + break; + default: /* parent */ + close(pfd[1]); + if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) { + wait(&status); + status = WEXITSTATUS(status); + } + exit(status); + } + } else + pfd[0] = pfd[1] = -1; container = malloc(sizeof(*container)); container->devnum = fd2devnum(mdfd); container->devname = devnum2devname(container->devnum); - container->device_name = argv[1]; - - /* If this fails, we hope it already exists */ - mkdir("/var/run/mdadm", 0600); - /* pid file lives in /var/run/mdadm/mdXX.pid */ - if (make_pidfile(container->devname, O_EXCL) < 0) { - if (ping_monitor(container->devname) == 0) { - fprintf(stderr, "mdmon: %s already managed\n", - container->devname); - exit(3); - } else { - /* cleanup the old monitor, this one is taking over */ - try_kill_monitor(container->devname); - if (make_pidfile(container->devname, 0) < 0) { - fprintf(stderr, "mdmon: %s Cannot create pidfile\n", - container->devname); - exit(3); - } - } - } + container->device_name = container_name; + container->arrays = NULL; - container->sock = make_control_sock(container->devname); - if (container->sock < 0) { - fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n"); + if (!container->devname) { + fprintf(stderr, "mdmon: failed to allocate container name string\n"); exit(3); } - container->arrays = NULL; mdi = sysfs_read(mdfd, container->devnum, GET_VERSION|GET_LEVEL|GET_DEVS); @@ -256,72 +341,129 @@ int main(int argc, char *argv[]) } if (mdi->array.level != UnSet) { fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n", - argv[1]); + container_name); exit(3); } if (mdi->array.major_version != -1 || mdi->array.minor_version != -2) { fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n", - argv[1]); + container_name); exit(3); } container->ss = find_metadata_methods(mdi->text_version); if (container->ss == NULL) { fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n", - argv[1], mdi->text_version); + container_name, mdi->text_version); exit(3); } container->devs = NULL; for (di = mdi->devs; di; di = di->next) { struct mdinfo *cd = malloc(sizeof(*cd)); - cd = di; + *cd = *di; cd->next = container->devs; container->devs = cd; } sysfs_free(mdi); + /* SIGUSR is sent between parent and child. So both block it + * and enable it only with pselect. + */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + sigaddset(&set, SIGHUP); + sigaddset(&set, SIGALRM); + sigaddset(&set, SIGTERM); + sigprocmask(SIG_BLOCK, &set, NULL); + act.sa_handler = wake_me; + act.sa_flags = 0; + sigaction(SIGUSR1, &act, NULL); + sigaction(SIGALRM, &act, NULL); + act.sa_handler = hup; + sigaction(SIGHUP, &act, NULL); + act.sa_handler = term; + sigaction(SIGTERM, &act, NULL); + act.sa_handler = SIG_IGN; + sigaction(SIGPIPE, &act, NULL); - if (container->ss->load_super(container, mdfd, argv[1])) { + if (switchroot) { + /* we assume we assume that /sys /proc /dev are available in + * the new root (see nash:setuproot) + * + * kill any monitors in the current namespace and change + * to the new one + */ + try_kill_monitor(container->devname); + if (chroot(switchroot) != 0) { + fprintf(stderr, "mdmon: failed to chroot to '%s': %s\n", + switchroot, strerror(errno)); + exit(4); + } + } + + /* If this fails, we hope it already exists + * pid file lives in /var/run/mdadm/mdXX.pid + */ + mkdir("/var", 0600); + mkdir("/var/run", 0600); + mkdir("/var/run/mdadm", 0600); + ignore = chdir("/"); + if (make_pidfile(container->devname, O_EXCL) < 0) { + if (ping_monitor(container->devname) == 0) { + fprintf(stderr, "mdmon: %s already managed\n", + container->devname); + exit(3); + } else { + int err; + + /* cleanup the old monitor, this one is taking over */ + try_kill_monitor(container->devname); + err = make_pidfile(container->devname, 0); + if (err < 0) { + fprintf(stderr, "mdmon: %s Cannot create pidfile\n", + container->devname); + if (err == -EROFS) { + /* FIXME implement a mechanism to + * prevent duplicate monitor instances + */ + fprintf(stderr, + "mdmon: continuing on read-only file system\n"); + } else + exit(3); + } + } + } + container->sock = make_control_sock(container->devname); + + if (container->ss->load_super(container, mdfd, container_name)) { fprintf(stderr, "mdmon: Cannot load metadata for %s\n", - argv[1]); + container_name); exit(3); } /* Ok, this is close enough. We can say goodbye to our parent now. */ status = 0; - write(pfd[1], &status, sizeof(status)); + if (write(pfd[1], &status, sizeof(status)) < 0) + fprintf(stderr, "mdmon: failed to notify our parent: %d\n", + getppid()); close(pfd[1]); - chdir("/"); setsid(); close(0); open("/dev/null", O_RDWR); close(1); - dup(0); + ignore = dup(0); #ifndef DEBUG close(2); - dup(0); + ignore = dup(0); #endif mlockall(MCL_FUTURE); - /* SIGUSR is sent between parent and child. So both block it - * and enable it only with pselect. - */ - sigemptyset(&set); - sigaddset(&set, SIGUSR1); - sigprocmask(SIG_BLOCK, &set, NULL); - act.sa_handler = wake_me; - act.sa_flags = 0; - sigaction(SIGUSR1, &act, NULL); - act.sa_handler = SIG_IGN; - sigaction(SIGPIPE, &act, NULL); - if (clone_monitor(container) < 0) { - fprintf(stderr, "md-manage: failed to start monitor process: %s\n", + fprintf(stderr, "mdmon: failed to start monitor process: %s\n", strerror(errno)); exit(2); }