+/*
+ * mdmon - monitor external metadata arrays
+ *
+ * Copyright (C) 2007-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2007-2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
/*
* md array manager.
#include <string.h>
#include <fcntl.h>
#include <signal.h>
-
+#include <dirent.h>
+#ifdef USE_PTHREADS
+#include <pthread.h>
+#else
#include <sched.h>
+#endif
#include "mdadm.h"
#include "mdmon.h"
int mon_tid, mgr_tid;
-int run_child(void *v)
+int sigterm;
+
+#ifdef USE_PTHREADS
+static void *run_child(void *v)
{
struct supertype *c = v;
+ mon_tid = syscall(SYS_gettid);
do_monitor(c);
return 0;
}
-int clone_monitor(struct supertype *container)
+static int clone_monitor(struct supertype *container)
+{
+ pthread_attr_t attr;
+ pthread_t thread;
+ int rc;
+
+ mon_tid = -1;
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 4096);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ rc = pthread_create(&thread, &attr, run_child, container);
+ if (rc)
+ return rc;
+ while (mon_tid == -1)
+ usleep(10);
+ pthread_attr_destroy(&attr);
+
+ mgr_tid = syscall(SYS_gettid);
+
+ return mon_tid;
+}
+#else /* USE_PTHREADS */
+static int run_child(void *v)
+{
+ struct supertype *c = v;
+
+ do_monitor(c);
+ return 0;
+}
+
+#ifdef __ia64__
+int __clone2(int (*fn)(void *),
+ void *child_stack_base, size_t stack_size,
+ int flags, void *arg, ...
+ /* pid_t *pid, struct user_desc *tls, pid_t *ctid */ );
+#endif
+static int clone_monitor(struct supertype *container)
{
static char stack[4096];
+#ifdef __ia64__
+ mon_tid = __clone2(run_child, stack, sizeof(stack),
+ CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
+ container);
+#else
mon_tid = clone(run_child, stack+4096-64,
CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
container);
+#endif
mgr_tid = syscall(SYS_gettid);
return mon_tid;
}
+#endif /* USE_PTHREADS */
-static struct superswitch *find_metadata_methods(char *vers)
-{
- if (strcmp(vers, "ddf") == 0)
- return &super_ddf;
- if (strcmp(vers, "imsm") == 0)
- return &super_imsm;
- return NULL;
-}
-
-
-int make_pidfile(char *devname, int o_excl)
+static int make_pidfile(char *devname)
{
char path[100];
char pid[10];
int fd;
int n;
- sprintf(path, "/var/run/mdadm/%s.pid", devname);
+ if (mkdir(MDMON_DIR, 0755) < 0 &&
+ errno != EEXIST)
+ return -errno;
+ sprintf(path, "%s/%s.pid", MDMON_DIR, devname);
- fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
+ fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600);
if (fd < 0)
return -errno;
sprintf(pid, "%d\n", getpid());
return 0;
}
-static void try_kill_monitor(char *devname)
+static void try_kill_monitor(pid_t pid, char *devname, int sock)
{
char buf[100];
int fd;
- pid_t pid;
-
- sprintf(buf, "/var/run/mdadm/%s.pid", devname);
- fd = open(buf, O_RDONLY);
- if (fd < 0)
- return;
+ int n;
+ long fl;
- if (read(fd, buf, sizeof(buf)) < 0) {
- close(fd);
+ /* first rule of survival... don't off yourself */
+ if (pid == getpid())
return;
- }
-
- close(fd);
- pid = strtoul(buf, NULL, 10);
/* kill this process if it is mdmon */
sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
if (fd < 0)
return;
- if (read(fd, buf, sizeof(buf)) < 0) {
- close(fd);
+ n = read(fd, buf, sizeof(buf)-1);
+ buf[sizeof(buf)-1] = 0;
+ close(fd);
+
+ if (n < 0 || !strstr(buf, "mdmon"))
return;
- }
- if (strstr(buf, "mdmon") != NULL)
- kill(pid, SIGTERM);
+ kill(pid, SIGTERM);
+
+ /* Wait for monitor to exit by reading from the socket, after
+ * clearing the non-blocking flag */
+ fl = fcntl(sock, F_GETFL, 0);
+ fl &= ~O_NONBLOCK;
+ fcntl(sock, F_SETFL, fl);
+ n = read(sock, buf, 100);
+ /* Ignore result, it is just the wait that
+ * matters
+ */
}
void remove_pidfile(char *devname)
{
char buf[100];
- sprintf(buf, "/var/run/mdadm/%s.pid", devname);
+ sprintf(buf, "%s/%s.pid", MDMON_DIR, devname);
unlink(buf);
- sprintf(buf, "/var/run/mdadm/%s.sock", devname);
+ sprintf(buf, "%s/%s.sock", MDMON_DIR, devname);
unlink(buf);
}
-int make_control_sock(char *devname)
+static int make_control_sock(char *devname)
{
char path[100];
int sfd;
long fl;
struct sockaddr_un addr;
- sprintf(path, "/var/run/mdadm/%s.sock", devname);
+ if (sigterm)
+ return -1;
+
+ sprintf(path, "%s/%s.sock", MDMON_DIR, devname);
unlink(path);
sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
if (sfd < 0)
return sfd;
}
-int socket_hup_requested;
-static void hup(int sig)
+static void term(int sig)
{
- socket_hup_requested = 1;
+ sigterm = 1;
}
static void wake_me(int sig)
static int do_fork(void)
{
#ifdef DEBUG
- if (env_no_mdmon())
+ if (check_env("MDADM_NO_MDMON"))
return 0;
#endif
return 1;
}
+void usage(void)
+{
+ fprintf(stderr, "Usage: mdmon [--all] [--takeover] CONTAINER\n");
+ exit(2);
+}
+static int mdmon(char *devname, int devnum, int must_fork, int takeover);
int main(int argc, char *argv[])
+{
+ char *container_name = NULL;
+ int devnum;
+ char *devname;
+ int status = 0;
+ int arg;
+ int all = 0;
+ int takeover = 0;
+
+ for (arg = 1; arg < argc; arg++) {
+ if (strncmp(argv[arg], "--all",5) == 0 ||
+ strcmp(argv[arg], "/proc/mdstat") == 0) {
+ container_name = argv[arg];
+ all = 1;
+ } else if (strcmp(argv[arg], "--takeover") == 0)
+ takeover = 1;
+ else if (container_name == NULL)
+ container_name = argv[arg];
+ else
+ usage();
+ }
+ if (container_name == NULL)
+ usage();
+
+ if (all) {
+ struct mdstat_ent *mdstat, *e;
+ int container_len = strlen(container_name);
+
+ /* launch an mdmon instance for each container found */
+ mdstat = mdstat_read(0, 0);
+ for (e = mdstat; e; e = e->next) {
+ if (strncmp(e->metadata_version, "external:", 9) == 0 &&
+ !is_subarray(&e->metadata_version[9])) {
+ devname = devnum2devname(e->devnum);
+ /* update cmdline so this mdmon instance can be
+ * distinguished from others in a call to ps(1)
+ */
+ if (strlen(devname) <= (unsigned)container_len) {
+ memset(container_name, 0, container_len);
+ sprintf(container_name, "%s", devname);
+ }
+ status |= mdmon(devname, e->devnum, 1,
+ takeover);
+ }
+ }
+ free_mdstat(mdstat);
+
+ return status;
+ } else if (strncmp(container_name, "md", 2) == 0) {
+ devnum = devname2devnum(container_name);
+ devname = devnum2devname(devnum);
+ if (strcmp(container_name, devname) != 0)
+ devname = NULL;
+ } else {
+ struct stat st;
+
+ devnum = NoMdDev;
+ if (stat(container_name, &st) == 0)
+ devnum = stat2devnum(&st);
+ if (devnum == NoMdDev)
+ devname = NULL;
+ else
+ devname = devnum2devname(devnum);
+ }
+
+ if (!devname) {
+ fprintf(stderr, "mdmon: %s is not a valid md device name\n",
+ container_name);
+ exit(1);
+ }
+ return mdmon(devname, devnum, do_fork(), takeover);
+}
+
+static int mdmon(char *devname, int devnum, int must_fork, int takeover)
{
int mdfd;
struct mdinfo *mdi, *di;
int pfd[2];
int status;
int ignore;
+ pid_t victim = -1;
+ int victim_sock = -1;
- if (argc != 2) {
- fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
- exit(2);
- }
- mdfd = open(argv[1], O_RDWR);
+ dprintf("starting mdmon for %s\n", devname);
+
+ mdfd = open_dev(devnum);
if (mdfd < 0) {
- fprintf(stderr, "md-manage: %s: %s\n", argv[1],
+ fprintf(stderr, "mdmon: %s: %s\n", devname,
strerror(errno));
- exit(1);
+ return 1;
}
if (md_get_version(mdfd) < 0) {
- fprintf(stderr, "md-manage: %s: Not an md device\n",
- argv[1]);
- exit(1);
+ fprintf(stderr, "mdmon: %s: Not an md device\n",
+ devname);
+ return 1;
}
/* Fork, and have the child tell us when they are ready */
- if (do_fork()) {
+ if (must_fork) {
if (pipe(pfd) != 0) {
fprintf(stderr, "mdmon: failed to create pipe\n");
- exit(1);
+ return 1;
}
switch(fork()) {
case -1:
fprintf(stderr, "mdmon: failed to fork: %s\n",
strerror(errno));
- exit(1);
+ return 1;
case 0: /* child */
close(pfd[0]);
break;
wait(&status);
status = WEXITSTATUS(status);
}
- exit(status);
+ return status;
}
} else
pfd[0] = pfd[1] = -1;
- /* hopefully it is a container - we'll check later */
-
- container = malloc(sizeof(*container));
- container->devnum = fd2devnum(mdfd);
- container->devname = devnum2devname(container->devnum);
- container->device_name = argv[1];
-
- /* If this fails, we hope it already exists */
- mkdir("/var/run/mdadm", 0600);
- /* pid file lives in /var/run/mdadm/mdXX.pid */
- if (make_pidfile(container->devname, O_EXCL) < 0) {
- if (ping_monitor(container->devname) == 0) {
- fprintf(stderr, "mdmon: %s already managed\n",
- container->devname);
- exit(3);
- } else {
- int err;
-
- /* cleanup the old monitor, this one is taking over */
- try_kill_monitor(container->devname);
- err = make_pidfile(container->devname, 0);
- if (err < 0) {
- fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
- container->devname);
- if (err == -EROFS) {
- /* FIXME implement a mechanism to
- * prevent duplicate monitor instances
- */
- fprintf(stderr,
- "mdmon: continuing on read-only file system\n");
- } else
- exit(3);
- }
- }
- }
- container->sock = make_control_sock(container->devname);
+ container = calloc(1, sizeof(*container));
+ container->devnum = devnum;
+ container->devname = devname;
container->arrays = NULL;
+ container->subarray[0] = 0;
+ container->sock = -1;
- mdi = sysfs_read(mdfd, container->devnum,
- GET_VERSION|GET_LEVEL|GET_DEVS);
+ if (!container->devname) {
+ fprintf(stderr, "mdmon: failed to allocate container name string\n");
+ exit(3);
+ }
+
+ mdi = sysfs_read(mdfd, container->devnum, GET_VERSION|GET_LEVEL|GET_DEVS);
if (!mdi) {
fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
}
if (mdi->array.level != UnSet) {
fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
- argv[1]);
+ devname);
exit(3);
}
if (mdi->array.major_version != -1 ||
mdi->array.minor_version != -2) {
fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
- argv[1]);
+ devname);
exit(3);
}
- container->ss = find_metadata_methods(mdi->text_version);
+ container->ss = version_to_superswitch(mdi->text_version);
if (container->ss == NULL) {
- fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
- argv[1], mdi->text_version);
+ fprintf(stderr, "mdmon: %s uses unsupported metadata: %s\n",
+ devname, mdi->text_version);
exit(3);
}
}
sysfs_free(mdi);
+ /* SIGUSR is sent between parent and child. So both block it
+ * and enable it only with pselect.
+ */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGTERM);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+ act.sa_handler = wake_me;
+ act.sa_flags = 0;
+ sigaction(SIGUSR1, &act, NULL);
+ act.sa_handler = term;
+ sigaction(SIGTERM, &act, NULL);
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGPIPE, &act, NULL);
- if (container->ss->load_super(container, mdfd, argv[1])) {
+ victim = mdmon_pid(container->devnum);
+ if (victim >= 0)
+ victim_sock = connect_monitor(container->devname);
+
+ ignore = chdir("/");
+ if (!takeover && victim > 0 && victim_sock >= 0) {
+ if (fping_monitor(victim_sock) == 0) {
+ fprintf(stderr, "mdmon: %s already managed\n",
+ container->devname);
+ exit(3);
+ }
+ close(victim_sock);
+ }
+ if (container->ss->load_super(container, mdfd, devname)) {
fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
- argv[1]);
+ devname);
exit(3);
}
+ close(mdfd);
/* Ok, this is close enough. We can say goodbye to our parent now.
*/
+ if (victim > 0)
+ remove_pidfile(devname);
+ if (make_pidfile(devname) < 0) {
+ exit(3);
+ }
+ container->sock = make_control_sock(devname);
+
status = 0;
if (write(pfd[1], &status, sizeof(status)) < 0)
fprintf(stderr, "mdmon: failed to notify our parent: %d\n",
getppid());
close(pfd[1]);
- ignore = chdir("/");
+ mlockall(MCL_CURRENT | MCL_FUTURE);
+
+ if (clone_monitor(container) < 0) {
+ fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
+ strerror(errno));
+ exit(2);
+ }
+
+ if (victim > 0) {
+ try_kill_monitor(victim, container->devname, victim_sock);
+ close(victim_sock);
+ }
+
setsid();
close(0);
open("/dev/null", O_RDWR);
ignore = dup(0);
#endif
- mlockall(MCL_FUTURE);
-
- /* SIGUSR is sent between parent and child. So both block it
- * and enable it only with pselect.
- */
- sigemptyset(&set);
- sigaddset(&set, SIGUSR1);
- sigaddset(&set, SIGHUP);
- sigaddset(&set, SIGALRM);
- sigprocmask(SIG_BLOCK, &set, NULL);
- act.sa_handler = wake_me;
- act.sa_flags = 0;
- sigaction(SIGUSR1, &act, NULL);
- sigaction(SIGALRM, &act, NULL);
- act.sa_handler = hup;
- sigaction(SIGHUP, &act, NULL);
- act.sa_handler = SIG_IGN;
- sigaction(SIGPIPE, &act, NULL);
-
- if (clone_monitor(container) < 0) {
- fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
- strerror(errno));
- exit(2);
- }
-
do_manager(container);
exit(0);