X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=mdmon.c;h=ff985d291ee9045ede3daf68c1184af3ee0384eb;hb=007087d0898a045901e4e120296e6d9b845b20a6;hp=31994d8c55ec18871f86b7d1b999b4917194cacc;hpb=ae80545ac3b75d8acea6060b040e5b4f345690e4;p=thirdparty%2Fmdadm.git diff --git a/mdmon.c b/mdmon.c index 31994d8c..ff985d29 100644 --- a/mdmon.c +++ b/mdmon.c @@ -58,12 +58,17 @@ #include #include #include - +#ifdef USE_PTHREADS +#include +#else #include +#endif #include "mdadm.h" #include "mdmon.h" +char const Name[] = "mdmon"; + struct active_array *discard_this; struct active_array *pending_discard; @@ -71,7 +76,39 @@ int mon_tid, mgr_tid; int sigterm; -int run_child(void *v) +#ifdef USE_PTHREADS +static void *run_child(void *v) +{ + struct supertype *c = v; + + mon_tid = syscall(SYS_gettid); + do_monitor(c); + return 0; +} + +static int clone_monitor(struct supertype *container) +{ + pthread_attr_t attr; + pthread_t thread; + int rc; + + mon_tid = -1; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 4096); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + rc = pthread_create(&thread, &attr, run_child, container); + if (rc) + return rc; + while (mon_tid == -1) + usleep(10); + pthread_attr_destroy(&attr); + + mgr_tid = syscall(SYS_gettid); + + return mon_tid; +} +#else /* USE_PTHREADS */ +static int run_child(void *v) { struct supertype *c = v; @@ -85,7 +122,7 @@ int __clone2(int (*fn)(void *), int flags, void *arg, ... /* pid_t *pid, struct user_desc *tls, pid_t *ctid */ ); #endif - int clone_monitor(struct supertype *container) +static int clone_monitor(struct supertype *container) { static char stack[4096]; @@ -103,30 +140,21 @@ int __clone2(int (*fn)(void *), return mon_tid; } +#endif /* USE_PTHREADS */ -static struct superswitch *find_metadata_methods(char *vers) -{ - if (strcmp(vers, "ddf") == 0) - return &super_ddf; - if (strcmp(vers, "imsm") == 0) - return &super_imsm; - return NULL; -} - - -int make_pidfile(char *devname, int o_excl) +static int make_pidfile(char *devname) { char path[100]; char pid[10]; int fd; int n; - if (sigterm) - return -1; - - sprintf(path, "/var/run/mdadm/%s.pid", devname); + if (mkdir(MDMON_DIR, 0755) < 0 && + errno != EEXIST) + return -errno; + sprintf(path, "%s/%s.pid", MDMON_DIR, devname); - fd = open(path, O_RDWR|O_CREAT|o_excl, 0600); + fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600); if (fd < 0) return -errno; sprintf(pid, "%d\n", getpid()); @@ -137,38 +165,13 @@ int make_pidfile(char *devname, int o_excl) return 0; } -int is_container_member(struct mdstat_ent *mdstat, char *container) -{ - if (mdstat->metadata_version == NULL || - strncmp(mdstat->metadata_version, "external:", 9) != 0 || - !is_subarray(mdstat->metadata_version+9) || - strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 || - mdstat->metadata_version[10+strlen(container)] != '/') - return 0; - - return 1; -} - -void remove_pidfile(char *devname); -static void try_kill_monitor(char *devname) +static void try_kill_monitor(pid_t pid, char *devname, int sock) { char buf[100]; int fd; - pid_t pid; - struct mdstat_ent *mdstat; - - sprintf(buf, "/var/run/mdadm/%s.pid", devname); - fd = open(buf, O_RDONLY); - if (fd < 0) - return; - - if (read(fd, buf, sizeof(buf)) < 0) { - close(fd); - return; - } - - close(fd); - pid = strtoul(buf, NULL, 10); + int n; + long fl; + int rv; /* first rule of survival... don't off yourself */ if (pid == getpid()) @@ -180,40 +183,48 @@ static void try_kill_monitor(char *devname) if (fd < 0) return; - if (read(fd, buf, sizeof(buf)) < 0) { - close(fd); - return; - } + n = read(fd, buf, sizeof(buf)-1); + buf[sizeof(buf)-1] = 0; + close(fd); - if (!strstr(buf, "mdmon")) + if (n < 0 || !(strstr(buf, "mdmon") || + strstr(buf, "@dmon"))) return; kill(pid, SIGTERM); - mdstat = mdstat_read(0, 0); - for ( ; mdstat; mdstat = mdstat->next) - if (is_container_member(mdstat, devname)) { - sprintf(buf, "/dev/%s", mdstat->dev); - WaitClean(buf, 0); - } - free_mdstat(mdstat); - remove_pidfile(devname); + if (sock < 0) + return; + + /* Wait for monitor to exit by reading from the socket, after + * clearing the non-blocking flag */ + fl = fcntl(sock, F_GETFL, 0); + fl &= ~O_NONBLOCK; + fcntl(sock, F_SETFL, fl); + n = read(sock, buf, 100); + + /* If there is I/O going on it might took some time to get to + * clean state. Wait for monitor to exit fully to avoid races. + * Ping it with SIGUSR1 in case that it is sleeping */ + for (n = 0; n < 25; n++) { + rv = kill(pid, SIGUSR1); + if (rv < 0) + break; + usleep(200000); + } } void remove_pidfile(char *devname) { char buf[100]; - if (sigterm) - return; - - sprintf(buf, "/var/run/mdadm/%s.pid", devname); + sprintf(buf, "%s/%s.pid", MDMON_DIR, devname); unlink(buf); - sprintf(buf, "/var/run/mdadm/%s.sock", devname); + sprintf(buf, "%s/%s.sock", MDMON_DIR, devname); unlink(buf); } -int make_control_sock(char *devname) +static int make_control_sock(char *devname) { char path[100]; int sfd; @@ -223,7 +234,7 @@ int make_control_sock(char *devname) if (sigterm) return -1; - sprintf(path, "/var/run/mdadm/%s.sock", devname); + sprintf(path, "%s/%s.sock", MDMON_DIR, devname); unlink(path); sfd = socket(PF_LOCAL, SOCK_STREAM, 0); if (sfd < 0) @@ -231,7 +242,8 @@ int make_control_sock(char *devname) addr.sun_family = PF_LOCAL; strcpy(addr.sun_path, path); - if (bind(sfd, &addr, sizeof(addr)) < 0) { + umask(077); /* ensure no world write access */ + if (bind(sfd, (struct sockaddr*)&addr, sizeof(addr)) < 0) { close(sfd); return -1; } @@ -242,12 +254,6 @@ int make_control_sock(char *devname) return sfd; } -int socket_hup_requested; -static void hup(int sig) -{ - socket_hup_requested = 1; -} - static void term(int sig) { sigterm = 1; @@ -271,81 +277,126 @@ static int do_fork(void) void usage(void) { - fprintf(stderr, "Usage: mdmon /device/name/for/container [target_dir]\n"); + fprintf(stderr, +"Usage: mdmon [options] CONTAINER\n" +"\n" +"Options are:\n" +" --help -h : This message\n" +" --all -a : All devices\n" +" --foreground -F : Run in foreground (do not fork)\n" +" --takeover -t : Takeover container\n" +); exit(2); } -int mdmon(char *devname, int devnum, int scan, char *switchroot); +static int mdmon(char *devnm, int must_fork, int takeover); int main(int argc, char *argv[]) { char *container_name = NULL; - char *switchroot = NULL; - int devnum; - char *devname; - int scan = 0; + char *devnm = NULL; int status = 0; + int opt; + int all = 0; + int takeover = 0; + int dofork = 1; + static struct option options[] = { + {"all", 0, NULL, 'a'}, + {"takeover", 0, NULL, 't'}, + {"help", 0, NULL, 'h'}, + {"offroot", 0, NULL, OffRootOpt}, + {"foreground", 0, NULL, 'F'}, + {NULL, 0, NULL, 0} + }; + + if (in_initrd()) { + /* + * set first char of argv[0] to @. This is used by + * systemd to signal that the task was launched from + * initrd/initramfs and should be preserved during shutdown + */ + argv[0][0] = '@'; + } - switch (argc) { - case 3: - switchroot = argv[2]; - case 2: - container_name = argv[1]; - break; - default: - usage(); + while ((opt = getopt_long(argc, argv, "thaF", options, NULL)) != -1) { + switch (opt) { + case 'a': + container_name = argv[optind-1]; + all = 1; + break; + case 't': + takeover = 1; + break; + case 'F': + dofork = 0; + break; + case OffRootOpt: + argv[0][0] = '@'; + break; + case 'h': + default: + usage(); + break; + } + } + + if (all == 0 && container_name == NULL) { + if (argv[optind]) + container_name = argv[optind]; } - if (strcmp(container_name, "/proc/mdstat") == 0) { + if (container_name == NULL) + usage(); + + if (argc - optind > 1) + usage(); + + if (strcmp(container_name, "/proc/mdstat") == 0) + all = 1; + + if (all) { struct mdstat_ent *mdstat, *e; + int container_len = strlen(container_name); /* launch an mdmon instance for each container found */ - scan = 1; mdstat = mdstat_read(0, 0); for (e = mdstat; e; e = e->next) { - if (strncmp(e->metadata_version, "external:", 9) == 0 && + if (e->metadata_version && + strncmp(e->metadata_version, "external:", 9) == 0 && !is_subarray(&e->metadata_version[9])) { - devname = devnum2devname(e->devnum); /* update cmdline so this mdmon instance can be * distinguished from others in a call to ps(1) */ - if (strlen(devname) <= strlen(container_name)) { - memset(container_name, 0, strlen(container_name)); - sprintf(container_name, "%s", devname); + if (strlen(e->devnm) <= (unsigned)container_len) { + memset(container_name, 0, container_len); + sprintf(container_name, "%s", e->devnm); } - status |= mdmon(devname, e->devnum, scan, - switchroot); + status |= mdmon(e->devnm, 1, takeover); } } free_mdstat(mdstat); return status; } else if (strncmp(container_name, "md", 2) == 0) { - devnum = devname2devnum(container_name); - devname = devnum2devname(devnum); - if (strcmp(container_name, devname) != 0) - devname = NULL; + int id = devnm2devid(container_name); + if (id) + devnm = container_name; } else { struct stat st; - devnum = NoMdDev; if (stat(container_name, &st) == 0) - devnum = stat2devnum(&st); - if (devnum == NoMdDev) - devname = NULL; - else - devname = devnum2devname(devnum); + devnm = xstrdup(stat2devnm(&st)); } - if (!devname) { - fprintf(stderr, "mdmon: %s is not a valid md device name\n", + if (!devnm) { + pr_err("%s is not a valid md device name\n", container_name); exit(1); } - return mdmon(devname, devnum, scan, switchroot); + return mdmon(devnm, dofork && do_fork(), takeover); } -int mdmon(char *devname, int devnum, int scan, char *switchroot) +static int mdmon(char *devnm, int must_fork, int takeover) { int mdfd; struct mdinfo *mdi, *di; @@ -355,31 +406,26 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot) int pfd[2]; int status; int ignore; + pid_t victim = -1; + int victim_sock = -1; + + dprintf("starting mdmon for %s\n", devnm); - dprintf("starting mdmon for %s in %s\n", - devname, switchroot ? : "/"); - mdfd = open_dev(devnum); + mdfd = open_dev(devnm); if (mdfd < 0) { - fprintf(stderr, "mdmon: %s: %s\n", devname, - strerror(errno)); - return 1; - } - if (md_get_version(mdfd) < 0) { - fprintf(stderr, "mdmon: %s: Not an md device\n", - devname); + pr_err("%s: %s\n", devnm, strerror(errno)); return 1; } /* Fork, and have the child tell us when they are ready */ - if (do_fork() || scan) { + if (must_fork) { if (pipe(pfd) != 0) { - fprintf(stderr, "mdmon: failed to create pipe\n"); + pr_err("failed to create pipe\n"); return 1; } switch(fork()) { case -1: - fprintf(stderr, "mdmon: failed to fork: %s\n", - strerror(errno)); + pr_err("failed to fork: %s\n", strerror(errno)); return 1; case 0: /* child */ close(pfd[0]); @@ -390,52 +436,44 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot) wait(&status); status = WEXITSTATUS(status); } + close(pfd[0]); return status; } } else pfd[0] = pfd[1] = -1; - container = calloc(1, sizeof(*container)); - container->devnum = devnum; - container->devname = devname; + container = xcalloc(1, sizeof(*container)); + strcpy(container->devnm, devnm); container->arrays = NULL; - container->subarray[0] = 0; + container->sock = -1; - if (!container->devname) { - fprintf(stderr, "mdmon: failed to allocate container name string\n"); - exit(3); - } - - mdi = sysfs_read(mdfd, container->devnum, - GET_VERSION|GET_LEVEL|GET_DEVS|SKIP_GONE_DEVS); + mdi = sysfs_read(mdfd, container->devnm, GET_VERSION|GET_LEVEL|GET_DEVS); if (!mdi) { - fprintf(stderr, "mdmon: failed to load sysfs info for %s\n", - container->devname); + pr_err("failed to load sysfs info for %s\n", container->devnm); exit(3); } if (mdi->array.level != UnSet) { - fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n", - devname); + pr_err("%s is not a container - cannot monitor\n", devnm); exit(3); } if (mdi->array.major_version != -1 || mdi->array.minor_version != -2) { - fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n", - devname); + pr_err("%s does not use external metadata - cannot monitor\n", + devnm); exit(3); } - container->ss = find_metadata_methods(mdi->text_version); + container->ss = version_to_superswitch(mdi->text_version); if (container->ss == NULL) { - fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n", - devname, mdi->text_version); + pr_err("%s uses unsupported metadata: %s\n", + devnm, mdi->text_version); exit(3); } container->devs = NULL; for (di = mdi->devs; di; di = di->next) { - struct mdinfo *cd = malloc(sizeof(*cd)); + struct mdinfo *cd = xmalloc(sizeof(*cd)); *cd = *di; cd->next = container->devs; container->devs = cd; @@ -447,84 +485,65 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot) */ sigemptyset(&set); sigaddset(&set, SIGUSR1); - sigaddset(&set, SIGHUP); - sigaddset(&set, SIGALRM); sigaddset(&set, SIGTERM); sigprocmask(SIG_BLOCK, &set, NULL); act.sa_handler = wake_me; act.sa_flags = 0; sigaction(SIGUSR1, &act, NULL); - sigaction(SIGALRM, &act, NULL); - act.sa_handler = hup; - sigaction(SIGHUP, &act, NULL); act.sa_handler = term; sigaction(SIGTERM, &act, NULL); act.sa_handler = SIG_IGN; sigaction(SIGPIPE, &act, NULL); - if (switchroot) { - /* we assume we assume that /sys /proc /dev are available in - * the new root (see nash:setuproot) - * - * kill any monitors in the current namespace and change - * to the new one - */ - try_kill_monitor(container->devname); - if (chroot(switchroot) != 0) { - fprintf(stderr, "mdmon: failed to chroot to '%s': %s\n", - switchroot, strerror(errno)); - exit(4); - } - } + victim = mdmon_pid(container->devnm); + if (victim >= 0) + victim_sock = connect_monitor(container->devnm); - /* If this fails, we hope it already exists - * pid file lives in /var/run/mdadm/mdXX.pid - */ - mkdir("/var", 0600); - mkdir("/var/run", 0600); - mkdir("/var/run/mdadm", 0600); ignore = chdir("/"); - if (make_pidfile(container->devname, O_EXCL) < 0) { - if (ping_monitor(container->devname) == 0) { - fprintf(stderr, "mdmon: %s already managed\n", - container->devname); + if (!takeover && victim > 0 && victim_sock >= 0) { + if (fping_monitor(victim_sock) == 0) { + pr_err("%s already managed\n", container->devnm); exit(3); - } else { - int err; - - /* cleanup the old monitor, this one is taking over */ - try_kill_monitor(container->devname); - err = make_pidfile(container->devname, 0); - if (err < 0) { - fprintf(stderr, "mdmon: %s Cannot create pidfile\n", - container->devname); - if (err == -EROFS) { - /* FIXME implement a mechanism to - * prevent duplicate monitor instances - */ - fprintf(stderr, - "mdmon: continuing on read-only file system\n"); - } else - exit(3); - } } + close(victim_sock); + victim_sock = -1; } - container->sock = make_control_sock(container->devname); - - if (container->ss->load_super(container, mdfd, devname)) { - fprintf(stderr, "mdmon: Cannot load metadata for %s\n", - devname); + if (container->ss->load_container(container, mdfd, devnm)) { + pr_err("Cannot load metadata for %s\n", devnm); exit(3); } close(mdfd); /* Ok, this is close enough. We can say goodbye to our parent now. */ + if (victim > 0) + remove_pidfile(devnm); + if (make_pidfile(devnm) < 0) { + exit(3); + } + container->sock = make_control_sock(devnm); + status = 0; - if (write(pfd[1], &status, sizeof(status)) < 0) - fprintf(stderr, "mdmon: failed to notify our parent: %d\n", - getppid()); - close(pfd[1]); + if (pfd[1] >= 0) { + if (write(pfd[1], &status, sizeof(status)) < 0) + pr_err("failed to notify our parent: %d\n", + getppid()); + close(pfd[1]); + } + + mlockall(MCL_CURRENT | MCL_FUTURE); + + if (clone_monitor(container) < 0) { + pr_err("failed to start monitor process: %s\n", + strerror(errno)); + exit(2); + } + + if (victim > 0) { + try_kill_monitor(victim, container->devnm, victim_sock); + if (victim_sock >= 0) + close(victim_sock); + } setsid(); close(0); @@ -536,15 +555,47 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot) ignore = dup(0); #endif - mlockall(MCL_FUTURE); - - if (clone_monitor(container) < 0) { - fprintf(stderr, "mdmon: failed to start monitor process: %s\n", - strerror(errno)); - exit(2); - } + /* This silliness is to stop the compiler complaining + * that we ignore 'ignore' + */ + if (ignore) + ignore++; do_manager(container); exit(0); } + +/* Some stub functions so super-* can link with us */ +int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape, + struct supertype *st, unsigned long blocks, + int *fds, unsigned long long *offsets, + int dests, int *destfd, unsigned long long *destoffsets) +{ + return 0; +} + +int restore_stripes(int *dest, unsigned long long *offsets, + int raid_disks, int chunk_size, int level, int layout, + int source, unsigned long long read_offset, + unsigned long long start, unsigned long long length, + char *src_buf) +{ + return 1; +} + +int save_stripes(int *source, unsigned long long *offsets, + int raid_disks, int chunk_size, int level, int layout, + int nwrites, int *dest, + unsigned long long start, unsigned long long length, + char *buf) +{ + return 0; +} + +struct superswitch super0 = { + .name = "0.90", +}; +struct superswitch super1 = { + .name = "1.x", +};