X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=mdmon.c;h=4936260615a06193f0930a011d1f75835886e894;hb=69b2fcc5bb7ee6608790513042110a15d36a21bc;hp=7ba8be04a8d547653eb520b276d61eae7785c19f;hpb=1ed3f38758ff23dabfa3f67e2a02ff98d9d0fea8;p=thirdparty%2Fmdadm.git diff --git a/mdmon.c b/mdmon.c index 7ba8be04..49362606 100644 --- a/mdmon.c +++ b/mdmon.c @@ -1,3 +1,22 @@ +/* + * mdmon - monitor external metadata arrays + * + * Copyright (C) 2007-2009 Neil Brown + * Copyright (C) 2007-2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ /* * md array manager. @@ -26,110 +45,134 @@ #include #include +#include #include #include #include #include +#include +#include #include #include #include #include #include - +#include +#ifdef USE_PTHREADS +#include +#else #include +#endif #include "mdadm.h" #include "mdmon.h" struct active_array *discard_this; struct active_array *pending_discard; -struct md_generic_cmd *active_cmd; -int run_child(void *v) +int mon_tid, mgr_tid; + +int sigterm; + +#ifdef USE_PTHREADS +static void *run_child(void *v) { struct supertype *c = v; - sigset_t set; - /* SIGUSR is sent from child to parent, So child must block it */ - sigemptyset(&set); - sigaddset(&set, SIGUSR1); - sigprocmask(SIG_BLOCK, &set, NULL); + mon_tid = syscall(SYS_gettid); do_monitor(c); return 0; } -int clone_monitor(struct supertype *container) +static int clone_monitor(struct supertype *container) { - static char stack[4096]; - int rv; + pthread_attr_t attr; + pthread_t thread; + int rc; + + mon_tid = -1; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 4096); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + rc = pthread_create(&thread, &attr, run_child, container); + if (rc) + return rc; + while (mon_tid == -1) + usleep(10); + pthread_attr_destroy(&attr); + + mgr_tid = syscall(SYS_gettid); + + return mon_tid; +} +#else /* USE_PTHREADS */ +static int run_child(void *v) +{ + struct supertype *c = v; + + do_monitor(c); + return 0; +} - rv = pipe(container->mgr_pipe); - if (rv < 0) - return rv; - rv = pipe(container->mon_pipe); - if (rv < 0) - goto err_mon_pipe; +#ifdef __ia64__ +int __clone2(int (*fn)(void *), + void *child_stack_base, size_t stack_size, + int flags, void *arg, ... + /* pid_t *pid, struct user_desc *tls, pid_t *ctid */ ); +#endif +static int clone_monitor(struct supertype *container) +{ + static char stack[4096]; - rv = clone(run_child, stack+4096-64, +#ifdef __ia64__ + mon_tid = __clone2(run_child, stack, sizeof(stack), CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD, container); - if (rv < 0) - goto err_clone; - else - return rv; - - err_clone: - close(container->mon_pipe[0]); - close(container->mon_pipe[1]); - err_mon_pipe: - close(container->mgr_pipe[0]); - close(container->mgr_pipe[1]); - - return rv; -} +#else + mon_tid = clone(run_child, stack+4096-64, + CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD, + container); +#endif -static struct superswitch *find_metadata_methods(char *vers) -{ - if (strcmp(vers, "ddf") == 0) - return &super_ddf; - return NULL; -} + mgr_tid = syscall(SYS_gettid); + return mon_tid; +} +#endif /* USE_PTHREADS */ -static int make_pidfile(char *devname, int o_excl) +static int make_pidfile(char *devname) { char path[100]; char pid[10]; int fd; - sprintf(path, "/var/run/mdadm/%s.pid", devname); + int n; - fd = open(path, O_RDWR|O_CREAT|o_excl, 0600); + if (mkdir(MDMON_DIR, 0755) < 0 && + errno != EEXIST) + return -errno; + sprintf(path, "%s/%s.pid", MDMON_DIR, devname); + + fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600); if (fd < 0) - return -1; + return -errno; sprintf(pid, "%d\n", getpid()); - write(fd, pid, strlen(pid)); + n = write(fd, pid, strlen(pid)); close(fd); + if (n < 0) + return -errno; return 0; } -static void try_kill_monitor(char *devname) +static void try_kill_monitor(pid_t pid, char *devname, int sock) { char buf[100]; int fd; - pid_t pid; - - sprintf(buf, "/var/run/mdadm/%s.pid", devname); - fd = open(buf, O_RDONLY); - if (fd < 0) - return; + int n; + long fl; - if (read(fd, buf, sizeof(buf)) < 0) { - close(fd); + /* first rule of survival... don't off yourself */ + if (pid == getpid()) return; - } - - close(fd); - pid = strtoul(buf, NULL, 10); /* kill this process if it is mdmon */ sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid); @@ -137,13 +180,34 @@ static void try_kill_monitor(char *devname) if (fd < 0) return; - if (read(fd, buf, sizeof(buf)) < 0) { - close(fd); + n = read(fd, buf, sizeof(buf)-1); + buf[sizeof(buf)-1] = 0; + close(fd); + + if (n < 0 || !strstr(buf, "mdmon")) return; - } - if (strstr(buf, "mdmon") != NULL) - kill(pid, SIGTERM); + kill(pid, SIGTERM); + + /* Wait for monitor to exit by reading from the socket, after + * clearing the non-blocking flag */ + fl = fcntl(sock, F_GETFL, 0); + fl &= ~O_NONBLOCK; + fcntl(sock, F_SETFL, fl); + n = read(sock, buf, 100); + /* Ignore result, it is just the wait that + * matters + */ +} + +void remove_pidfile(char *devname) +{ + char buf[100]; + + sprintf(buf, "%s/%s.pid", MDMON_DIR, devname); + unlink(buf); + sprintf(buf, "%s/%s.sock", MDMON_DIR, devname); + unlink(buf); } static int make_control_sock(char *devname) @@ -153,7 +217,10 @@ static int make_control_sock(char *devname) long fl; struct sockaddr_un addr; - sprintf(path, "/var/run/mdadm/%s.sock", devname); + if (sigterm) + return -1; + + sprintf(path, "%s/%s.sock", MDMON_DIR, devname); unlink(path); sfd = socket(PF_LOCAL, SOCK_STREAM, 0); if (sfd < 0) @@ -172,62 +239,173 @@ static int make_control_sock(char *devname) return sfd; } +static void term(int sig) +{ + sigterm = 1; +} + +static void wake_me(int sig) +{ + +} + +/* if we are debugging and starting mdmon by hand then don't fork */ +static int do_fork(void) +{ + #ifdef DEBUG + if (check_env("MDADM_NO_MDMON")) + return 0; + #endif + + return 1; +} + +void usage(void) +{ + fprintf(stderr, "Usage: mdmon [--all] [--takeover] CONTAINER\n"); + exit(2); +} + +static int mdmon(char *devname, int devnum, int must_fork, int takeover); + int main(int argc, char *argv[]) +{ + char *container_name = NULL; + int devnum; + char *devname; + int status = 0; + int arg; + int all = 0; + int takeover = 0; + + for (arg = 1; arg < argc; arg++) { + if (strncmp(argv[arg], "--all",5) == 0 || + strcmp(argv[arg], "/proc/mdstat") == 0) { + container_name = argv[arg]; + all = 1; + } else if (strcmp(argv[arg], "--takeover") == 0) + takeover = 1; + else if (container_name == NULL) + container_name = argv[arg]; + else + usage(); + } + if (container_name == NULL) + usage(); + + if (all) { + struct mdstat_ent *mdstat, *e; + int container_len = strlen(container_name); + + /* launch an mdmon instance for each container found */ + mdstat = mdstat_read(0, 0); + for (e = mdstat; e; e = e->next) { + if (strncmp(e->metadata_version, "external:", 9) == 0 && + !is_subarray(&e->metadata_version[9])) { + devname = devnum2devname(e->devnum); + /* update cmdline so this mdmon instance can be + * distinguished from others in a call to ps(1) + */ + if (strlen(devname) <= (unsigned)container_len) { + memset(container_name, 0, container_len); + sprintf(container_name, "%s", devname); + } + status |= mdmon(devname, e->devnum, 1, + takeover); + } + } + free_mdstat(mdstat); + + return status; + } else if (strncmp(container_name, "md", 2) == 0) { + devnum = devname2devnum(container_name); + devname = devnum2devname(devnum); + if (strcmp(container_name, devname) != 0) + devname = NULL; + } else { + struct stat st; + + devnum = NoMdDev; + if (stat(container_name, &st) == 0) + devnum = stat2devnum(&st); + if (devnum == NoMdDev) + devname = NULL; + else + devname = devnum2devname(devnum); + } + + if (!devname) { + fprintf(stderr, "mdmon: %s is not a valid md device name\n", + container_name); + exit(1); + } + return mdmon(devname, devnum, do_fork(), takeover); +} + +static int mdmon(char *devname, int devnum, int must_fork, int takeover) { int mdfd; struct mdinfo *mdi, *di; struct supertype *container; - if (argc != 2) { - fprintf(stderr, "Usage: md-manage /device/name/for/container\n"); - exit(2); - } - mdfd = open(argv[1], O_RDWR); + sigset_t set; + struct sigaction act; + int pfd[2]; + int status; + int ignore; + pid_t victim = -1; + int victim_sock = -1; + + dprintf("starting mdmon for %s\n", devname); + + mdfd = open_dev(devnum); if (mdfd < 0) { - fprintf(stderr, "md-manage: %s: %s\n", argv[1], + fprintf(stderr, "mdmon: %s: %s\n", devname, strerror(errno)); - exit(1); + return 1; } if (md_get_version(mdfd) < 0) { - fprintf(stderr, "md-manage: %s: Not an md device\n", - argv[1]); - exit(1); + fprintf(stderr, "mdmon: %s: Not an md device\n", + devname); + return 1; } - /* hopefully it is a container - we'll check later */ - - container = malloc(sizeof(*container)); - container->devfd = mdfd; - container->devnum = fd2devnum(mdfd); - container->devname = devnum2devname(container->devnum); - - /* If this fails, we hope it already exists */ - mkdir("/var/run/mdadm", 0600); - /* pid file lives in /var/run/mdadm/mdXX.pid */ - if (make_pidfile(container->devname, O_EXCL) < 0) { - if (ping_monitor(container->devname) == 0) { - fprintf(stderr, "mdmon: %s already managed\n", - container->devname); - exit(3); - } else { - /* cleanup the old monitor, this one is taking over */ - try_kill_monitor(container->devname); - if (make_pidfile(container->devname, 0) < 0) { - fprintf(stderr, "mdmon: %s Cannot create pidfile\n", - container->devname); - exit(3); + /* Fork, and have the child tell us when they are ready */ + if (must_fork) { + if (pipe(pfd) != 0) { + fprintf(stderr, "mdmon: failed to create pipe\n"); + return 1; + } + switch(fork()) { + case -1: + fprintf(stderr, "mdmon: failed to fork: %s\n", + strerror(errno)); + return 1; + case 0: /* child */ + close(pfd[0]); + break; + default: /* parent */ + close(pfd[1]); + if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) { + wait(&status); + status = WEXITSTATUS(status); } + return status; } - } + } else + pfd[0] = pfd[1] = -1; + + container = calloc(1, sizeof(*container)); + container->devnum = devnum; + container->devname = devname; + container->arrays = NULL; + container->sock = -1; - container->sock = make_control_sock(container->devname); - if (container->sock < 0) { - fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n"); + if (!container->devname) { + fprintf(stderr, "mdmon: failed to allocate container name string\n"); exit(3); } - container->arrays = NULL; - mdi = sysfs_read(mdfd, container->devnum, - GET_VERSION|GET_LEVEL|GET_DEVS); + mdi = sysfs_read(mdfd, container->devnum, GET_VERSION|GET_LEVEL|GET_DEVS); if (!mdi) { fprintf(stderr, "mdmon: failed to load sysfs info for %s\n", @@ -236,48 +414,105 @@ int main(int argc, char *argv[]) } if (mdi->array.level != UnSet) { fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n", - argv[1]); + devname); exit(3); } if (mdi->array.major_version != -1 || mdi->array.minor_version != -2) { fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n", - argv[1]); + devname); exit(3); } - container->ss = find_metadata_methods(mdi->text_version); + container->ss = version_to_superswitch(mdi->text_version); if (container->ss == NULL) { - fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n", - argv[1], mdi->text_version); + fprintf(stderr, "mdmon: %s uses unsupported metadata: %s\n", + devname, mdi->text_version); exit(3); } container->devs = NULL; for (di = mdi->devs; di; di = di->next) { struct mdinfo *cd = malloc(sizeof(*cd)); - cd = di; + *cd = *di; cd->next = container->devs; container->devs = cd; } sysfs_free(mdi); - - if (container->ss->load_super(container, mdfd, argv[1])) { + /* SIGUSR is sent between parent and child. So both block it + * and enable it only with pselect. + */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + sigaddset(&set, SIGTERM); + sigprocmask(SIG_BLOCK, &set, NULL); + act.sa_handler = wake_me; + act.sa_flags = 0; + sigaction(SIGUSR1, &act, NULL); + act.sa_handler = term; + sigaction(SIGTERM, &act, NULL); + act.sa_handler = SIG_IGN; + sigaction(SIGPIPE, &act, NULL); + + victim = mdmon_pid(container->devnum); + if (victim >= 0) + victim_sock = connect_monitor(container->devname); + + ignore = chdir("/"); + if (!takeover && victim > 0 && victim_sock >= 0) { + if (fping_monitor(victim_sock) == 0) { + fprintf(stderr, "mdmon: %s already managed\n", + container->devname); + exit(3); + } + close(victim_sock); + } + if (container->ss->load_super(container, mdfd, devname)) { fprintf(stderr, "mdmon: Cannot load metadata for %s\n", - argv[1]); + devname); exit(3); } + close(mdfd); + /* Ok, this is close enough. We can say goodbye to our parent now. + */ + if (victim > 0) + remove_pidfile(devname); + if (make_pidfile(devname) < 0) { + exit(3); + } + container->sock = make_control_sock(devname); + + status = 0; + if (write(pfd[1], &status, sizeof(status)) < 0) + fprintf(stderr, "mdmon: failed to notify our parent: %d\n", + getppid()); + close(pfd[1]); - mlockall(MCL_FUTURE); + mlockall(MCL_CURRENT | MCL_FUTURE); if (clone_monitor(container) < 0) { - fprintf(stderr, "md-manage: failed to start monitor process: %s\n", + fprintf(stderr, "mdmon: failed to start monitor process: %s\n", strerror(errno)); exit(2); } + if (victim > 0) { + try_kill_monitor(victim, container->devname, victim_sock); + close(victim_sock); + } + + setsid(); + close(0); + open("/dev/null", O_RDWR); + close(1); + ignore = dup(0); +#ifndef DEBUG + close(2); + ignore = dup(0); +#endif + do_manager(container); exit(0);