From f94d52f43ea70665f0293de0fe8940d64931e893 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 12 Jul 2008 20:27:40 +1000 Subject: [PATCH] Handle device removal from container This really should be done in mdadm, not mdmon. We ensure the device won't be suddenly commited as a hot-spare using O_EXCL, then check the 'holders' sysfs directory to make sure it is only in use once. --- Manage.c | 113 +++++++++++++++++++++++++++------------------------- managemon.c | 8 ---- mdadm.h | 1 + monitor.c | 43 -------------------- msg.c | 13 ------ msg.h | 8 ---- sysfs.c | 63 +++++++++++++++++++++++++++++ 7 files changed, 123 insertions(+), 126 deletions(-) diff --git a/Manage.c b/Manage.c index 9197eea5..efc7c7a8 100644 --- a/Manage.c +++ b/Manage.c @@ -171,54 +171,6 @@ int Manage_reconfig(char *devname, int fd, int layout) return 0; } -static int -add_remove_device_container(int fd, int add_remove, struct stat *stb) -{ - int devnum = fd2devnum(fd); - char *devname = devnum2devname(devnum); - int sfd = devname ? connect_monitor(devname) : -1; - struct md_message msg; - int err = 0; - - if (devname && sfd < 0) { - fprintf(stderr, Name ": Cannot connect to monitor for %s: %s\n", - devname, strerror(errno)); - free(devname); - return 1; - } else if (sfd < 0) { - fprintf(stderr, Name ": Cannot determine container name for" - " device number %d\n", devnum); - return 1; - } - - if (add_remove) - ack(sfd, 0, 0); - else if (send_remove_device(sfd, stb->st_rdev, 0, 0) != 0) { - fprintf(stderr, Name ": Failed to send \'%s device\'" - " message to the container monitor\n", - add_remove ? "add" : "remove"); - err = 1; - } - - /* check the reply */ - if (!err && receive_message(sfd, &msg, 0) != 0) { - fprintf(stderr, Name ": Failed to receive an acknowledgement" - " from the container monitor\n"); - err = 1; - } - - if (!err && msg.seq != 0) { - fprintf(stderr, Name ": %s device failed error code %d\n", - add_remove ? "Add" : "Remove", msg.seq); - err = 1; - } - - free(devname); - close(sfd); - - return err; -} - int Manage_subdevs(char *devname, int fd, mddev_dev_t devlist, int verbose) { @@ -244,6 +196,7 @@ int Manage_subdevs(char *devname, int fd, struct supertype *st, *tst; int duuid[4]; int ouuid[4]; + int lfd = -1; if (ioctl(fd, GET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": cannot get array info for %s\n", @@ -270,6 +223,7 @@ int Manage_subdevs(char *devname, int fd, unsigned long long ldsize; char dvname[20]; char *dnprintable = dv->devname; + int err; next = dv->next; jnext = 0; @@ -359,8 +313,7 @@ int Manage_subdevs(char *devname, int fd, " \'member\' array, perform this" " operation on the parent container\n"); return 1; - } else if (tst->ss->external) - return add_remove_device_container(fd, 1, &stb); + } /* Make sure it isn't in use (in 2.6 or later) */ tfd = open(dv->devname, O_RDONLY|O_EXCL); if (tfd < 0) { @@ -381,7 +334,9 @@ int Manage_subdevs(char *devname, int fd, } close(tfd); - if (array.major_version == 0 && + + if (!tst->ss->external && + array.major_version == 0 && md_get_version(fd)%100 < 2) { if (ioctl(fd, HOT_ADD_DISK, (unsigned long)stb.st_rdev)==0) { @@ -556,15 +511,65 @@ int Manage_subdevs(char *devname, int fd, " \'member\' array, perform this" " operation on the parent container\n"); return 1; - } else if (tst->ss->external) - return add_remove_device_container(fd, 0, &stb); + } + if (tst->ss->external) { + /* To remove a device from a container, we must + * check that it isn't in use in an array. + * This involves looking in the 'holders' + * directory - there must be just one entry, + * the container. + * To ensure that it doesn't get used as a + * hold spare while we are checking, we + * get an O_EXCL open on the container + */ + int dnum = fd2devnum(fd); + lfd = open_dev_excl(dnum); + if (lfd < 0) { + fprintf(stderr, Name + ": Cannot get exclusive access " + " to container - odd\n"); + return 1; + } + if (!sysfs_unique_holder(dnum, stb.st_rdev)) { + fprintf(stderr, Name + ": %s is %s, cannot remove.\n", + dnprintable, + errno == EEXIST ? "still in use": + "not a member"); + close(lfd); + return 1; + } + } /* FIXME check that it is a current member */ - if (ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev)) { + err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev); + if (err && errno == ENODEV) { + /* Old kernels rejected this if no personality + * registered */ + struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS); + struct mdinfo *dv = NULL; + if (sra) + dv = sra->devs; + for ( ; dv ; dv=dv->next) + if (dv->disk.major == major(stb.st_rdev) && + dv->disk.minor == minor(stb.st_rdev)) + break; + if (dv) + err = sysfs_set_str(sra, dv, + "state", "remove"); + else + err = -1; + if (sra) + sysfs_free(sra); + } + if (err) { fprintf(stderr, Name ": hot remove failed " "for %s: %s\n", dnprintable, strerror(errno)); + if (lfd >= 0) + close(lfd); return 1; } + close(lfd); if (verbose >= 0) fprintf(stderr, Name ": hot removed %s\n", dnprintable); diff --git a/managemon.c b/managemon.c index 4592a991..569449b5 100644 --- a/managemon.c +++ b/managemon.c @@ -434,20 +434,12 @@ void manage(struct mdstat_ent *mdstat, struct supertype *container) static int handle_message(struct supertype *container, struct md_message *msg) { - int err; struct md_generic_cmd *cmd = msg->buf; if (!cmd) return 0; switch (cmd->action) { - case md_action_remove_device: - - /* forward to the monitor */ - active_cmd = cmd; - write(container->mgr_pipe[1], &err, 1); - read(container->mon_pipe[0], &err, 1); - return err; default: return -1; diff --git a/mdadm.h b/mdadm.h index 30d7ab7a..ba9174ea 100644 --- a/mdadm.h +++ b/mdadm.h @@ -348,6 +348,7 @@ extern int sysfs_set_array(struct mdinfo *sra, extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd); extern int sysfs_disk_to_sg(int fd); extern int sysfs_disk_to_scsi_id(int fd, __u32 *id); +extern int sysfs_unique_holder(int devnum, long rdev); extern int save_stripes(int *source, unsigned long long *offsets, diff --git a/monitor.c b/monitor.c index 6377cc4a..5bb25596 100644 --- a/monitor.c +++ b/monitor.c @@ -367,54 +367,11 @@ static void reconcile_failed(struct active_array *aa, struct mdinfo *failed) } } -static int handle_remove_device(struct md_remove_device_cmd *cmd, struct active_array *aa) -{ - struct active_array *a; - struct mdinfo *victim; - int rv; - - /* scan all arrays for the given device, if ->state_fd is closed (-1) - * in all cases then mark the disk as removed in the metadata. - * Otherwise reply that it is busy. - */ - - /* pass1 check that it is not in use anywhere */ - /* note: we are safe from re-adds as long as the device exists in the - * container - */ - for (a = aa; a; a = a->next) { - if (!a->container) - continue; - victim = find_device(a, major(cmd->rdev), minor(cmd->rdev)); - if (!victim) - continue; - if (victim->state_fd > 0) - return -EBUSY; - } - - /* pass2 schedule and process removal per array */ - for (a = aa; a; a = a->next) { - if (!a->container) - continue; - victim = find_device(a, major(cmd->rdev), minor(cmd->rdev)); - if (!victim) - continue; - victim->curr_state |= DS_REMOVE; - rv = read_and_act(a); - if (rv < 0) - return rv; - } - - return 0; -} - static int handle_pipe(struct md_generic_cmd *cmd, struct active_array *aa) { switch (cmd->action) { case md_action_ping_monitor: return 0; - case md_action_remove_device: - return handle_remove_device((void *) cmd, aa); } return -1; diff --git a/msg.c b/msg.c index 6082365e..d0e505dc 100644 --- a/msg.c +++ b/msg.c @@ -185,19 +185,6 @@ int nack(int fd, int err, int tmo) return send_message(fd, &msg, tmo); } -int send_remove_device(int fd, dev_t rdev, int seq, int tmo) -{ - struct md_remove_device_cmd cmd = { .action = md_action_remove_device, - .rdev = rdev - }; - struct md_message msg = { .seq = seq, - .num_bytes = sizeof(cmd), - .buf = &cmd - }; - - return send_message(fd, &msg, tmo); -} - int connect_monitor(char *devname) { char path[100]; diff --git a/msg.h b/msg.h index afe14040..ccaa571b 100644 --- a/msg.h +++ b/msg.h @@ -29,24 +29,17 @@ struct md_message { enum md_message_action { md_action_ping_monitor, - md_action_remove_device, }; struct md_generic_cmd { enum md_message_action action; }; -struct md_remove_device_cmd { - enum md_message_action action; - dev_t rdev; -}; - /* union of all known command types, used to sanity check ->num_bytes * on the receive path */ union md_message_commands { struct md_generic_cmd generic; - struct md_remove_device_cmd remove; }; extern const int start_magic; @@ -58,5 +51,4 @@ extern int ack(int fd, int seq, int tmo); extern int nack(int fd, int err, int tmo); extern int connect_monitor(char *devname); extern int ping_monitor(char *devname); -extern int send_remove_device(int fd, dev_t rdev, int seq, int tmo); diff --git a/sysfs.c b/sysfs.c index ae10b1e9..a2844213 100644 --- a/sysfs.c +++ b/sysfs.c @@ -515,3 +515,66 @@ int sysfs_disk_to_scsi_id(int fd, __u32 *id) return 0; } + + +int sysfs_unique_holder(int devnum, long rdev) +{ + /* Check that devnum is a holder of rdev, + * and is the only holder. + * we should be locked against races by + * an O_EXCL on devnum + */ + DIR *dir; + struct dirent *de; + char dirname[100]; + char l; + int found = 0; + sprintf(dirname, "/sys/dev/block/%d:%d/holders", + major(rdev), minor(rdev)); + dir = opendir(dirname); + errno = ENOENT; + if (!dir) + return 0; + l = strlen(dirname); + while ((de = readdir(dir)) != NULL) { + char buf[10]; + int n; + int mj, mn; + char c; + int fd; + + if (de->d_ino == 0) + continue; + if (de->d_name[0] == '.') + continue; + strcpy(dirname+l, "/"); + strcat(dirname+l, de->d_name); + strcat(dirname+l, "/dev"); + fd = open(dirname, O_RDONLY); + if (fd < 0) { + errno = ENOENT; + break; + } + n = read(fd, buf, sizeof(buf)-1); + close(fd); + buf[n] = 0; + if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 || + c != '\n') { + errno = ENOENT; + break; + } + if (mj != MD_MAJOR) + mn = -1-(mn>>6); + + if (devnum != mn) { + errno = EEXIST; + break; + } + found = 1; + } + closedir(dir); + if (de) + return 0; + else + return found; +} -- 2.39.2