X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=Manage.c;h=65235bc1836f5e02c22c3e703cea2f39789b8023;hp=5219e6eb8760ec2b287f10f3d6567297bbe72838;hb=1124b3cf29dad544e8a5aa01e5f9e94e7be1765a;hpb=68c7d6d790f856b4e8301d0afa7fc6873a0d4bb8 diff --git a/Manage.c b/Manage.c index 5219e6eb..65235bc1 100644 --- a/Manage.c +++ b/Manage.c @@ -30,6 +30,7 @@ #include "mdadm.h" #include "md_u.h" #include "md_p.h" +#include #define REGISTER_DEV _IO (MD_MAJOR, 1) #define START_MD _IO (MD_MAJOR, 2) @@ -45,11 +46,57 @@ int Manage_ro(char *devname, int fd, int readonly) * */ mdu_array_info_t array; +#ifndef MDASSEMBLE + struct mdinfo *mdi; +#endif if (md_get_version(fd) < 9000) { fprintf(stderr, Name ": need md driver version 0.90.0 or later\n"); return 1; } +#ifndef MDASSEMBLE + /* If this is an externally-manage array, we need to modify the + * metadata_version so that mdmon doesn't undo our change. + */ + mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION); + if (mdi && + mdi->array.major_version == -1 && + mdi->array.level > 0 && + is_subarray(mdi->text_version)) { + char vers[64]; + strcpy(vers, "external:"); + strcat(vers, mdi->text_version); + if (readonly > 0) { + int rv; + /* We set readonly ourselves. */ + vers[9] = '-'; + sysfs_set_str(mdi, NULL, "metadata_version", vers); + + close(fd); + rv = sysfs_set_str(mdi, NULL, "array_state", "readonly"); + + if (rv < 0) { + fprintf(stderr, Name ": failed to set readonly for %s: %s\n", + devname, strerror(errno)); + + vers[9] = mdi->text_version[0]; + sysfs_set_str(mdi, NULL, "metadata_version", vers); + return 1; + } + } else { + char *cp; + /* We cannot set read/write - must signal mdmon */ + vers[9] = '/'; + sysfs_set_str(mdi, NULL, "metadata_version", vers); + + cp = strchr(vers+10, '/'); + if (*cp) + *cp = 0; + ping_monitor(vers+10); + } + return 0; + } +#endif if (ioctl(fd, GET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": %s does not appear to be active.\n", devname); @@ -74,17 +121,70 @@ int Manage_ro(char *devname, int fd, int readonly) #ifndef MDASSEMBLE +static void remove_devices(int devnum, char *path) +{ + /* Remove all 'standard' devices for 'devnum', including + * partitions. Also remove names at 'path' - possibly with + * partition suffixes - which link to those names. + */ + char base[40]; + char *path2; + char link[1024]; + int n; + int part; + char *be; + char *pe; + + if (devnum >= 0) + sprintf(base, "/dev/md%d", devnum); + else + sprintf(base, "/dev/md_d%d", -1-devnum); + be = base + strlen(base); + if (path) { + path2 = malloc(strlen(path)+20); + strcpy(path2, path); + pe = path2 + strlen(path2); + } else + path = NULL; + + for (part = 0; part < 16; part++) { + if (part) { + sprintf(be, "p%d", part); + if (path) { + if (isdigit(pe[-1])) + sprintf(pe, "p%d", part); + else + sprintf(pe, "%d", part); + } + } + /* FIXME test if really is md device ?? */ + unlink(base); + if (path) { + n = readlink(path2, link, sizeof(link)); + if (n && strlen(base) == n && + strncmp(link, base, n) == 0) + unlink(path2); + } + } +} + + int Manage_runstop(char *devname, int fd, int runstop, int quiet) { /* Run or stop the array. array must already be configured * required >= 0.90.0 + * Only print failure messages if quiet == 0; + * quiet > 0 means really be quiet + * quiet < 0 means we will try again if it fails. */ mdu_param_t param; /* unused */ if (runstop == -1 && md_get_version(fd) < 9000) { if (ioctl(fd, STOP_MD, 0)) { - if (!quiet) fprintf(stderr, Name ": stopping device %s failed: %s\n", - devname, strerror(errno)); + if (quiet == 0) fprintf(stderr, + Name ": stopping device %s " + "failed: %s\n", + devname, strerror(errno)); return 1; } } @@ -111,20 +211,77 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet) } else if (runstop < 0){ struct map_ent *map = NULL; struct stat stb; - if (ioctl(fd, STOP_ARRAY, NULL)) { - if (quiet==0) - fprintf(stderr, Name ": fail to stop array %s: %s\n", + struct mdinfo *mdi; + int devnum; + /* If this is an mdmon managed array, just write 'inactive' + * to the array state and let mdmon clear up. + */ + devnum = fd2devnum(fd); + mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION); + if (mdi && + mdi->array.level > 0 && + is_subarray(mdi->text_version)) { + /* This is mdmon managed. */ + close(fd); + if (sysfs_set_str(mdi, NULL, + "array_state", "inactive") < 0) { + if (quiet == 0) + fprintf(stderr, Name + ": failed to stop array %s: %s\n", + devname, strerror(errno)); + return 1; + } + + /* Give monitor a chance to act */ + ping_monitor(mdi->text_version); + + fd = open(devname, O_RDONLY); + } else if (mdi && + mdi->array.major_version == -1 && + mdi->array.minor_version == -2 && + !is_subarray(mdi->text_version)) { + /* container, possibly mdmon-managed. + * Make sure mdmon isn't opening it, which + * would interfere with the 'stop' + */ + ping_monitor(mdi->sys_name); + } + + if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) { + if (quiet == 0) { + fprintf(stderr, Name + ": failed to stop array %s: %s\n", devname, strerror(errno)); + if (errno == EBUSY) + fprintf(stderr, "Perhaps a running " + "process, mounted filesystem " + "or active volume group?\n"); + } + if (mdi) + sysfs_free(mdi); return 1; } + /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array + * was stopped, so We'll do it here just to be sure. Drop any + * partitions as well... + */ + if (fd >= 0) + ioctl(fd, BLKRRPART, 0); + if (mdi) + sysfs_uevent(mdi, "change"); + + + if (devnum != NoMdDev && + (stat("/dev/.udev", &stb) != 0 || + check_env("MDADM_NO_UDEV"))) { + struct map_ent *mp = map_by_devnum(&map, devnum); + remove_devices(devnum, mp ? mp->path : NULL); + } + + if (quiet <= 0) fprintf(stderr, Name ": stopped %s\n", devname); - if (fstat(fd, &stb) == 0) { - int devnum; - if (major(stb.st_rdev) == MD_MAJOR) - devnum = minor(stb.st_rdev); - else - devnum = -1-(minor(stb.st_rdev)>>6); + if (devnum != NoMdDev) { map_delete(&map, devnum); map_write(map); map_free(map); @@ -188,25 +345,42 @@ int Manage_subdevs(char *devname, int fd, */ mdu_array_info_t array; mdu_disk_info_t disc; + unsigned long long array_size; mddev_dev_t dv, next = NULL; struct stat stb; int j, jnext = 0; int tfd; - struct supertype *st; - void *dsuper = NULL; - void *osuper = NULL; /* original super */ + struct supertype *st, *tst; int duuid[4]; int ouuid[4]; + int lfd = -1; if (ioctl(fd, GET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": cannot get array info for %s\n", devname); return 1; } + + /* array.size is only 32 bit and may be truncated. + * So read from sysfs if possible, and record number of sectors + */ + + array_size = get_component_size(fd); + if (array_size <= 0) + array_size = array.size * 2; + + tst = super_by_fd(fd); + if (!tst) { + fprintf(stderr, Name ": unsupport array - version %d.%d\n", + array.major_version, array.minor_version); + return 1; + } + for (dv = devlist, j=0 ; dv; dv = next, j = jnext) { unsigned long long ldsize; char dvname[20]; char *dnprintable = dv->devname; + int err; next = dv->next; jnext = 0; @@ -291,32 +465,35 @@ int Manage_subdevs(char *devname, int fd, return 1; case 'a': /* add the device */ - st = super_by_version(array.major_version, - array.minor_version); - if (!st) { - fprintf(stderr, Name ": unsupport array - version %d.%d\n", - array.major_version, array.minor_version); + if (tst->subarray[0]) { + fprintf(stderr, Name ": Cannot add disks to a" + " \'member\' array, perform this" + " operation on the parent container\n"); return 1; } - /* Make sure it isn't in use (in 2.6 or later) */ - tfd = open(dv->devname, O_RDONLY|O_EXCL); + tfd = open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT); if (tfd < 0) { fprintf(stderr, Name ": Cannot open %s: %s\n", dv->devname, strerror(errno)); return 1; } remove_partitions(tfd); + + st = dup_super(tst); + if (array.not_persistent==0) - st->ss->load_super(st, tfd, &osuper, NULL); - /* will use osuper later */ + st->ss->load_super(st, tfd, NULL); + if (!get_dev_size(tfd, dv->devname, &ldsize)) { close(tfd); return 1; } close(tfd); - if (array.major_version == 0 && + + if (!tst->ss->external && + array.major_version == 0 && md_get_version(fd)%100 < 2) { if (ioctl(fd, HOT_ADD_DISK, (unsigned long)stb.st_rdev)==0) { @@ -331,20 +508,16 @@ int Manage_subdevs(char *devname, int fd, return 1; } - if (array.not_persistent == 0) { - - /* Make sure device is large enough */ - if (st->ss->avail_size(st, ldsize/512) < - array.size) { - fprintf(stderr, Name ": %s not large enough to join array\n", - dv->devname); - return 1; - } + if (array.not_persistent == 0 || tst->ss->external) { /* need to find a sample superblock to copy, and - * a spare slot to use + * a spare slot to use. + * For 'external' array (well, container based), + * We can just load the metadata for the array. */ - for (j=0; jmax_devs; j++) { + if (tst->ss->external) { + tst->ss->load_super(tst, fd, NULL); + } else for (j = 0; j < tst->max_devs; j++) { char *dev; int dfd; disc.number = j; @@ -358,22 +531,33 @@ int Manage_subdevs(char *devname, int fd, if (!dev) continue; dfd = dev_open(dev, O_RDONLY); if (dfd < 0) continue; - if (st->ss->load_super(st, dfd, &dsuper, NULL)) { + if (tst->ss->load_super(tst, dfd, + NULL)) { close(dfd); continue; } close(dfd); break; } - if (!dsuper) { + /* FIXME this is a bad test to be using */ + if (!tst->sb) { fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n"); return 1; } + + /* Make sure device is large enough */ + if (tst->ss->avail_size(tst, ldsize/512) < + array_size) { + fprintf(stderr, Name ": %s not large enough to join array\n", + dv->devname); + return 1; + } + /* Possibly this device was recently part of the array * and was temporarily removed, and is now being re-added. * If so, we can simply re-add it. */ - st->ss->uuid_from_super(st, duuid, dsuper); + tst->ss->uuid_from_super(tst, duuid); /* re-add doesn't work for version-1 superblocks * before 2.6.18 :-( @@ -381,27 +565,34 @@ int Manage_subdevs(char *devname, int fd, if (array.major_version == 1 && get_linux_version() <= 2006018) ; - else if (osuper) { - st->ss->uuid_from_super(st, ouuid, osuper); + else if (st->sb) { + st->ss->uuid_from_super(st, ouuid); if (memcmp(duuid, ouuid, sizeof(ouuid))==0) { /* looks close enough for now. Kernel * will worry about whether a bitmap * based reconstruction is possible. */ struct mdinfo mdi; - st->ss->getinfo_super(st, &mdi, osuper); + st->ss->getinfo_super(st, &mdi); disc.major = major(stb.st_rdev); disc.minor = minor(stb.st_rdev); disc.number = mdi.disk.number; disc.raid_disk = mdi.disk.raid_disk; disc.state = mdi.disk.state; - if (dv->writemostly) + if (dv->writemostly == 1) disc.state |= 1 << MD_DISK_WRITEMOSTLY; + if (dv->writemostly == 2) + disc.state &= ~(1 << MD_DISK_WRITEMOSTLY); if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) { if (verbose >= 0) fprintf(stderr, Name ": re-added %s\n", dv->devname); continue; } + if (errno == ENOMEM || errno == EROFS) { + fprintf(stderr, Name ": add new device failed for %s: %s\n", + dv->devname, strerror(errno)); + return 1; + } /* fall back on normal-add */ } } @@ -409,7 +600,7 @@ int Manage_subdevs(char *devname, int fd, /* non-persistent. Must ensure that new drive * is at least array.size big. */ - if (ldsize/512 < array.size) { + if (ldsize/512 < array_size) { fprintf(stderr, Name ": %s not large enough to join array\n", dv->devname); return 1; @@ -420,7 +611,7 @@ int Manage_subdevs(char *devname, int fd, * we must choose the same free number, which requires * starting at 'raid_disks' and counting up */ - for (j = array.raid_disks; j< st->max_devs; j++) { + for (j = array.raid_disks; j< tst->max_devs; j++) { disc.number = j; if (ioctl(fd, GET_DISK_INFO, &disc)) break; @@ -433,11 +624,21 @@ int Manage_subdevs(char *devname, int fd, disc.minor = minor(stb.st_rdev); disc.number =j; disc.state = 0; - if (array.not_persistent==0) { - if (dv->writemostly) + if (array.not_persistent==0 || tst->ss->external) { + int dfd; + if (dv->writemostly == 1) disc.state |= 1 << MD_DISK_WRITEMOSTLY; - st->ss->add_to_super(st, dsuper, &disc); - if (st->ss->write_init_super(st, dsuper, &disc, dv->devname)) + dfd = open(dv->devname, O_RDWR | O_EXCL|O_DIRECT); + if (tst->ss->add_to_super(tst, &disc, dfd, + dv->devname)) { + close(dfd); + return 1; + } + /* write_init_super will close 'dfd' */ + if (tst->ss->external) + /* mdmon will write the metadata */ + close(dfd); + else if (tst->ss->write_init_super(tst)) return 1; } else if (dv->re_add) { /* this had better be raid1. @@ -446,7 +647,7 @@ int Manage_subdevs(char *devname, int fd, */ char *used = malloc(array.raid_disks); memset(used, 0, array.raid_disks); - for (j=0; j< st->max_devs; j++) { + for (j=0; j< tst->max_devs; j++) { mdu_disk_info_t disc2; disc2.number = j; if (ioctl(fd, GET_DISK_INFO, &disc2)) @@ -468,9 +669,54 @@ int Manage_subdevs(char *devname, int fd, break; } } - if (dv->writemostly) + if (dv->writemostly == 1) disc.state |= (1 << MD_DISK_WRITEMOSTLY); - if (ioctl(fd,ADD_NEW_DISK, &disc)) { + if (tst->ss->external) { + /* add a disk to an external metadata container + * only if mdmon is around to see it + */ + struct mdinfo new_mdi; + struct mdinfo *sra; + int container_fd; + int devnum = fd2devnum(fd); + + container_fd = open_dev_excl(devnum); + if (container_fd < 0) { + fprintf(stderr, Name ": add failed for %s:" + " could not get exclusive access to container\n", + dv->devname); + return 1; + } + + if (!mdmon_running(devnum)) { + fprintf(stderr, Name ": add failed for %s: mdmon not running\n", + dv->devname); + close(container_fd); + return 1; + } + + sra = sysfs_read(container_fd, -1, 0); + if (!sra) { + fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n", + dv->devname); + close(container_fd); + return 1; + } + sra->array.level = LEVEL_CONTAINER; + /* Need to set data_offset and component_size */ + tst->ss->getinfo_super(tst, &new_mdi); + new_mdi.disk.major = disc.major; + new_mdi.disk.minor = disc.minor; + if (sysfs_add_disk(sra, &new_mdi, 0) != 0) { + fprintf(stderr, Name ": add new device to external metadata" + " failed for %s\n", dv->devname); + close(container_fd); + return 1; + } + ping_monitor(devnum2devname(devnum)); + sysfs_free(sra); + close(container_fd); + } else if (ioctl(fd, ADD_NEW_DISK, &disc)) { fprintf(stderr, Name ": add new device failed for %s as %d: %s\n", dv->devname, j, strerror(errno)); return 1; @@ -481,13 +727,94 @@ int Manage_subdevs(char *devname, int fd, case 'r': /* hot remove */ + if (tst->subarray[0]) { + fprintf(stderr, Name ": Cannot remove disks from a" + " \'member\' array, perform this" + " operation on the parent container\n"); + return 1; + } + if (tst->ss->external) { + /* To remove a device from a container, we must + * check that it isn't in use in an array. + * This involves looking in the 'holders' + * directory - there must be just one entry, + * the container. + * To ensure that it doesn't get used as a + * hold spare while we are checking, we + * get an O_EXCL open on the container + */ + int dnum = fd2devnum(fd); + lfd = open_dev_excl(dnum); + if (lfd < 0) { + fprintf(stderr, Name + ": Cannot get exclusive access " + " to container - odd\n"); + return 1; + } + /* in the detached case it is not possible to + * check if we are the unique holder, so just + * rely on the 'detached' checks + */ + if (strcmp(dv->devname, "detached") == 0 || + sysfs_unique_holder(dnum, stb.st_rdev)) + /* pass */; + else { + fprintf(stderr, Name + ": %s is %s, cannot remove.\n", + dnprintable, + errno == EEXIST ? "still in use": + "not a member"); + close(lfd); + return 1; + } + } /* FIXME check that it is a current member */ - if (ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev)) { + err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev); + if (err && errno == ENODEV) { + /* Old kernels rejected this if no personality + * registered */ + struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS); + struct mdinfo *dv = NULL; + if (sra) + dv = sra->devs; + for ( ; dv ; dv=dv->next) + if (dv->disk.major == major(stb.st_rdev) && + dv->disk.minor == minor(stb.st_rdev)) + break; + if (dv) + err = sysfs_set_str(sra, dv, + "state", "remove"); + else + err = -1; + if (sra) + sysfs_free(sra); + } + if (err) { fprintf(stderr, Name ": hot remove failed " "for %s: %s\n", dnprintable, strerror(errno)); + if (lfd >= 0) + close(lfd); return 1; } + if (tst->ss->external) { + /* + * Before dropping our exclusive open we make an + * attempt at preventing mdmon from seeing an + * 'add' event before reconciling this 'remove' + * event. + */ + char *name = devnum2devname(fd2devnum(fd)); + + if (!name) { + fprintf(stderr, Name ": unable to get container name\n"); + return 1; + } + + ping_manager(name); + free(name); + } + close(lfd); if (verbose >= 0) fprintf(stderr, Name ": hot removed %s\n", dnprintable);