/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2012 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
#ifndef MDASSEMBLE
struct mdinfo *mdi;
#endif
+ int rv = 0;
if (md_get_version(fd) < 9000) {
- fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
+ pr_err("need md driver version 0.90.0 or later\n");
return 1;
}
#ifndef MDASSEMBLE
- /* If this is an externally-manage array, we need to modify the
+ /* If this is an externally-managed array, we need to modify the
* metadata_version so that mdmon doesn't undo our change.
*/
mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
if (mdi &&
mdi->array.major_version == -1 &&
- mdi->array.level > 0 &&
is_subarray(mdi->text_version)) {
char vers[64];
strcpy(vers, "external:");
rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
if (rv < 0) {
- fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
+ pr_err("failed to set readonly for %s: %s\n",
devname, strerror(errno));
vers[9] = mdi->text_version[0];
sysfs_set_str(mdi, NULL, "metadata_version", vers);
- return 1;
+ rv = 1;
+ goto out;
}
} else {
char *cp;
sysfs_set_str(mdi, NULL, "metadata_version", vers);
cp = strchr(vers+10, '/');
- if (*cp)
+ if (cp)
*cp = 0;
ping_monitor(vers+10);
+ if (mdi->array.level <= 0)
+ sysfs_set_str(mdi, NULL, "array_state", "active");
}
- return 0;
+ goto out;
}
#endif
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
- fprintf(stderr, Name ": %s does not appear to be active.\n",
+ pr_err("%s does not appear to be active.\n",
devname);
- return 1;
+ rv = 1;
+ goto out;
}
- if (readonly>0) {
+ if (readonly > 0) {
if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
- fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
+ pr_err("failed to set readonly for %s: %s\n",
devname, strerror(errno));
- return 1;
+ rv = 1;
+ goto out;
}
} else if (readonly < 0) {
if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
- fprintf(stderr, Name ": failed to set writable for %s: %s\n",
+ pr_err("failed to set writable for %s: %s\n",
devname, strerror(errno));
- return 1;
+ rv = 1;
+ goto out;
}
}
- return 0;
+out:
+#ifndef MDASSEMBLE
+ if (mdi)
+ sysfs_free(mdi);
+#endif
+ return rv;
}
#ifndef MDASSEMBLE
static void remove_devices(int devnum, char *path)
{
- /*
+ /*
* Remove names at 'path' - possibly with
* partition suffixes - which link to the 'standard'
* name for devnum. These were probably created
sprintf(base, "/dev/md_d%d", -1-devnum);
be = base + strlen(base);
- path2 = malloc(strlen(path)+20);
+ path2 = xmalloc(strlen(path)+20);
strcpy(path2, path);
pe = path2 + strlen(path2);
-
+
for (part = 0; part < 16; part++) {
if (part) {
sprintf(be, "p%d", part);
sprintf(pe, "%d", part);
}
n = readlink(path2, link, sizeof(link));
- if (n && (int)strlen(base) == n &&
+ if (n > 0 && (int)strlen(base) == n &&
strncmp(link, base, n) == 0)
unlink(path2);
}
free(path2);
}
-
-int Manage_runstop(char *devname, int fd, int runstop, int quiet)
+int Manage_runstop(char *devname, int fd, int runstop,
+ int verbose, int will_retry)
{
- /* Run or stop the array. array must already be configured
- * required >= 0.90.0
- * Only print failure messages if quiet == 0;
- * quiet > 0 means really be quiet
- * quiet < 0 means we will try again if it fails.
+ /* Run or stop the array. Array must already be configured
+ * 'Run' requires >= 0.90.0
+ * 'will_retry' is only relevant for 'stop', and means
+ * that error messages are not wanted.
*/
mdu_param_t param; /* unused */
+ int rv = 0;
+
+ if (will_retry && verbose == 0)
+ verbose = -1;
if (runstop == -1 && md_get_version(fd) < 9000) {
- if (ioctl(fd, STOP_MD, 0)) {
- if (quiet == 0) fprintf(stderr,
- Name ": stopping device %s "
- "failed: %s\n",
- devname, strerror(errno));
- return 1;
- }
+ if (ioctl(fd, STOP_MD, 0) == 0)
+ return 0;
+ pr_err("stopping device %s "
+ "failed: %s\n",
+ devname, strerror(errno));
+ return 1;
}
if (md_get_version(fd) < 9000) {
- fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
- return 1;
- }
- /*
- if (ioctl(fd, GET_ARRAY_INFO, &array)) {
- fprintf(stderr, Name ": %s does not appear to be active.\n",
- devname);
+ pr_err("need md driver version 0.90.0 or later\n");
return 1;
}
- */
- if (runstop>0) {
+
+ if (runstop > 0) {
if (ioctl(fd, RUN_ARRAY, ¶m)) {
- fprintf(stderr, Name ": failed to run array %s: %s\n",
- devname, strerror(errno));
+ if (verbose >= 0)
+ pr_err("failed to run array %s: %s\n",
+ devname, strerror(errno));
return 1;
}
- if (quiet <= 0)
- fprintf(stderr, Name ": started %s\n", devname);
+ if (verbose >= 0)
+ pr_err("started %s\n", devname);
} else if (runstop < 0){
struct map_ent *map = NULL;
struct stat stb;
struct mdinfo *mdi;
int devnum;
+ int err;
+ int count;
/* If this is an mdmon managed array, just write 'inactive'
* to the array state and let mdmon clear up.
*/
devnum = fd2devnum(fd);
+ /* Get EXCL access first. If this fails, then attempting
+ * to stop is probably a bad idea.
+ */
+ close(fd);
+ fd = open(devname, O_RDONLY|O_EXCL);
+ if (fd < 0 || fd2devnum(fd) != devnum) {
+ if (fd >= 0)
+ close(fd);
+ if (verbose >= 0)
+ pr_err("Cannot get exclusive access to %s:"
+ "Perhaps a running "
+ "process, mounted filesystem "
+ "or active volume group?\n",
+ devname);
+ return 1;
+ }
mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
if (mdi &&
mdi->array.level > 0 &&
is_subarray(mdi->text_version)) {
+ int err;
/* This is mdmon managed. */
close(fd);
- if (sysfs_set_str(mdi, NULL,
- "array_state", "inactive") < 0) {
- if (quiet == 0)
- fprintf(stderr, Name
- ": failed to stop array %s: %s\n",
- devname, strerror(errno));
- return 1;
+
+ /* As we have an O_EXCL open, any use of the device
+ * which blocks STOP_ARRAY is probably a transient use,
+ * so it is reasonable to retry for a while - 5 seconds.
+ */
+ count = 25;
+ while (count &&
+ (err = sysfs_set_str(mdi, NULL,
+ "array_state",
+ "inactive")) < 0
+ && errno == EBUSY) {
+ usleep(200000);
+ count--;
+ }
+ if (err) {
+ if (verbose >= 0)
+ pr_err("failed to stop array %s: %s\n",
+ devname, strerror(errno));
+ rv = 1;
+ goto out;
}
/* Give monitor a chance to act */
ping_monitor(mdi->text_version);
- fd = open(devname, O_RDONLY);
+ fd = open_dev_excl(devnum);
+ if (fd < 0) {
+ if (verbose >= 0)
+ pr_err("failed to completely stop %s"
+ ": Device is busy\n",
+ devname);
+ rv = 1;
+ goto out;
+ }
} else if (mdi &&
mdi->array.major_version == -1 &&
mdi->array.minor_version == -2 &&
* which are members of this array
*/
mds = mdstat_read(0, 0);
- for (m=mds; m; m=m->next)
+ for (m = mds; m; m = m->next)
if (m->metadata_version &&
strncmp(m->metadata_version, "external:", 9)==0 &&
is_subarray(m->metadata_version+9) &&
devname2devnum(m->metadata_version+10) == devnum) {
- if (!quiet)
- fprintf(stderr, Name
- ": Cannot stop container %s: "
- "member %s still active\n",
- devname, m->dev);
+ if (verbose >= 0)
+ pr_err("Cannot stop container %s: "
+ "member %s still active\n",
+ devname, m->dev);
free_mdstat(mds);
- if (mdi)
- sysfs_free(mdi);
- return 1;
+ rv = 1;
+ goto out;
}
}
- if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) {
- if (quiet == 0) {
- fprintf(stderr, Name
- ": failed to stop array %s: %s\n",
- devname, strerror(errno));
+ /* As we have an O_EXCL open, any use of the device
+ * which blocks STOP_ARRAY is probably a transient use,
+ * so it is reasonable to retry for a while - 5 seconds.
+ */
+ count = 25; err = 0;
+ while (count && fd >= 0
+ && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
+ && errno == EBUSY) {
+ usleep(200000);
+ count --;
+ }
+ if (fd >= 0 && err) {
+ if (verbose >= 0) {
+ pr_err("failed to stop array %s: %s\n",
+ devname, strerror(errno));
if (errno == EBUSY)
fprintf(stderr, "Perhaps a running "
"process, mounted filesystem "
"or active volume group?\n");
}
- if (mdi)
- sysfs_free(mdi);
- return 1;
+ rv = 1;
+ goto out;
}
/* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
* was stopped, so We'll do it here just to be sure. Drop any
if (mdi)
sysfs_uevent(mdi, "change");
-
if (devnum != NoMdDev &&
(stat("/dev/.udev", &stb) != 0 ||
check_env("MDADM_NO_UDEV"))) {
remove_devices(devnum, mp ? mp->path : NULL);
}
-
- if (quiet <= 0)
- fprintf(stderr, Name ": stopped %s\n", devname);
+ if (verbose >= 0)
+ pr_err("stopped %s\n", devname);
map_lock(&map);
map_remove(&map, devnum);
map_unlock(&map);
+ out:
+ if (mdi)
+ sysfs_free(mdi);
+ }
+ return rv;
+}
+
+static void add_faulty(struct mddev_dev *dv, int fd, char disp)
+{
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ int remaining_disks;
+ int i;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+ return;
+
+ remaining_disks = array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+ struct mddev_dev *new;
+ char buf[40];
+ disk.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ remaining_disks--;
+ if ((disk.state & 1) == 0) /* not faulty */
+ continue;
+ sprintf(buf, "%d:%d", disk.major, disk.minor);
+ new = xmalloc(sizeof(*new));
+ new->devname = xstrdup(buf);
+ new->disposition = disp;
+ new->next = dv->next;
+ dv->next = new;
+ dv = new;
+ }
+}
+
+static void add_detached(struct mddev_dev *dv, int fd, char disp)
+{
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ int remaining_disks;
+ int i;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+ return;
+
+ remaining_disks = array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+ struct mddev_dev *new;
+ char buf[40];
+ int sfd;
+ disk.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ remaining_disks--;
+ if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
+ continue;
+ sprintf(buf, "%d:%d", disk.major, disk.minor);
+ sfd = dev_open(buf, O_RDONLY);
+ if (sfd >= 0) {
+ /* Not detached */
+ close(sfd);
+ continue;
+ }
+ if (errno != ENXIO)
+ /* Probably not detached */
+ continue;
+ new = xmalloc(sizeof(*new));
+ new->devname = xstrdup(buf);
+ new->disposition = disp;
+ new->next = dv->next;
+ dv->next = new;
+ dv = new;
}
+}
+
+int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
+ struct supertype *dev_st, struct supertype *tst,
+ unsigned long rdev,
+ char *update, char *devname, int verbose,
+ mdu_array_info_t *array)
+{
+ struct mdinfo mdi;
+ int duuid[4];
+ int ouuid[4];
+
+ dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
+ dev_st->ss->uuid_from_super(dev_st, ouuid);
+ if (tst->sb)
+ tst->ss->uuid_from_super(tst, duuid);
+ else
+ /* Assume uuid matches: kernel will check */
+ memcpy(duuid, ouuid, sizeof(ouuid));
+ if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
+ !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
+ memcmp(duuid, ouuid, sizeof(ouuid))==0) {
+ /* Looks like it is worth a
+ * try. Need to make sure
+ * kernel will accept it
+ * though.
+ */
+ mdu_disk_info_t disc;
+ /* re-add doesn't work for version-1 superblocks
+ * before 2.6.18 :-(
+ */
+ if (array->major_version == 1 &&
+ get_linux_version() <= 2006018)
+ goto skip_re_add;
+ disc.number = mdi.disk.number;
+ if (ioctl(fd, GET_DISK_INFO, &disc) != 0
+ || disc.major != 0 || disc.minor != 0
+ )
+ goto skip_re_add;
+ disc.major = major(rdev);
+ disc.minor = minor(rdev);
+ disc.number = mdi.disk.number;
+ disc.raid_disk = mdi.disk.raid_disk;
+ disc.state = mdi.disk.state;
+ if (dv->writemostly == 1)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ if (dv->writemostly == 2)
+ disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
+ remove_partitions(tfd);
+ if (update || dv->writemostly > 0) {
+ int rv = -1;
+ tfd = dev_open(dv->devname, O_RDWR);
+ if (tfd < 0) {
+ pr_err("failed to open %s for"
+ " superblock update during re-add\n", dv->devname);
+ return -1;
+ }
+
+ if (dv->writemostly == 1)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "writemostly",
+ devname, verbose, 0, NULL);
+ if (dv->writemostly == 2)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "readwrite",
+ devname, verbose, 0, NULL);
+ if (update)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, update,
+ devname, verbose, 0, NULL);
+ if (rv == 0)
+ rv = dev_st->ss->store_super(dev_st, tfd);
+ close(tfd);
+ if (rv != 0) {
+ pr_err("failed to update"
+ " superblock during re-add\n");
+ return -1;
+ }
+ }
+ /* don't even try if disk is marked as faulty */
+ errno = 0;
+ if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
+ if (verbose >= 0)
+ pr_err("re-added %s\n", dv->devname);
+ return 1;
+ }
+ if (errno == ENOMEM || errno == EROFS) {
+ pr_err("add new device failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ if (dv->disposition == 'M')
+ return 0;
+ return -1;
+ }
+ }
+skip_re_add:
return 0;
}
-int Manage_resize(char *devname, int fd, long long size, int raid_disks)
+int Manage_add(int fd, int tfd, struct mddev_dev *dv,
+ struct supertype *tst, mdu_array_info_t *array,
+ int force, int verbose, char *devname,
+ char *update, unsigned long rdev, unsigned long long array_size)
+{
+ unsigned long long ldsize;
+ struct supertype *dev_st = NULL;
+ int j;
+ mdu_disk_info_t disc;
+
+ if (!get_dev_size(tfd, dv->devname, &ldsize)) {
+ if (dv->disposition == 'M')
+ return 0;
+ else
+ return -1;
+ }
+
+ if (tst->ss->validate_geometry(
+ tst, array->level, array->layout,
+ array->raid_disks, NULL,
+ ldsize >> 9, INVALID_SECTORS, NULL, NULL, 0) == 0) {
+ if (!force) {
+ pr_err("%s is larger than %s can "
+ "effectively use.\n"
+ " Add --force is you "
+ "really want to add this device.\n",
+ dv->devname, devname);
+ return -1;
+ }
+ pr_err("%s is larger than %s can "
+ "effectively use.\n"
+ " Adding anyway as --force "
+ "was given.\n",
+ dv->devname, devname);
+ }
+ if (!tst->ss->external &&
+ array->major_version == 0 &&
+ md_get_version(fd)%100 < 2) {
+ if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
+ if (verbose >= 0)
+ pr_err("hot added %s\n",
+ dv->devname);
+ return 1;
+ }
+
+ pr_err("hot add failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ return -1;
+ }
+
+ if (array->not_persistent == 0 || tst->ss->external) {
+
+ /* need to find a sample superblock to copy, and
+ * a spare slot to use.
+ * For 'external' array (well, container based),
+ * We can just load the metadata for the array->
+ */
+ int array_failed;
+ if (tst->sb)
+ /* already loaded */;
+ else if (tst->ss->external) {
+ tst->ss->load_container(tst, fd, NULL);
+ } else for (j = 0; j < tst->max_devs; j++) {
+ char *dev;
+ int dfd;
+ disc.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ continue;
+ if (disc.major==0 && disc.minor==0)
+ continue;
+ if ((disc.state & 4)==0) /* sync */
+ continue;
+ /* Looks like a good device to try */
+ dev = map_dev(disc.major, disc.minor, 1);
+ if (!dev)
+ continue;
+ dfd = dev_open(dev, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ if (tst->ss->load_super(tst, dfd,
+ NULL)) {
+ close(dfd);
+ continue;
+ }
+ close(dfd);
+ break;
+ }
+ /* FIXME this is a bad test to be using */
+ if (!tst->sb && dv->disposition != 'a') {
+ /* we are re-adding a device to a
+ * completely dead array - have to depend
+ * on kernel to check
+ */
+ } else if (!tst->sb) {
+ pr_err("cannot load array metadata from %s\n", devname);
+ return -1;
+ }
+
+ /* Make sure device is large enough */
+ if (tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
+ array_size) {
+ if (dv->disposition == 'M')
+ return 0;
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+
+ /* Possibly this device was recently part of
+ * the array and was temporarily removed, and
+ * is now being re-added. If so, we can
+ * simply re-add it.
+ */
+
+ if (array->not_persistent==0) {
+ dev_st = dup_super(tst);
+ dev_st->ss->load_super(dev_st, tfd, NULL);
+ }
+ if (dev_st && dev_st->sb) {
+ int rv = attempt_re_add(fd, tfd, dv,
+ dev_st, tst,
+ rdev,
+ update, devname,
+ verbose,
+ array);
+ dev_st->ss->free_super(dev_st);
+ if (rv)
+ return rv;
+ }
+ if (dv->disposition == 'M') {
+ if (verbose > 0)
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return 0;
+ }
+ if (dv->disposition == 'A') {
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return -1;
+ }
+ if (array->active_disks < array->raid_disks) {
+ char *avail = xcalloc(array->raid_disks, 1);
+ int d;
+ int found = 0;
+
+ for (d = 0; d < MAX_DISKS && found < array->active_disks; d++) {
+ disc.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ continue;
+ if (disc.major == 0 && disc.minor == 0)
+ continue;
+ if (!(disc.state & (1<<MD_DISK_SYNC)))
+ continue;
+ avail[disc.raid_disk] = 1;
+ found++;
+ }
+ array_failed = !enough(array->level, array->raid_disks,
+ array->layout, 1, avail);
+ } else
+ array_failed = 0;
+ if (array_failed) {
+ pr_err("%s has failed so using --add cannot work and might destroy\n",
+ devname);
+ pr_err("data on %s. You should stop the array and re-assemble it.\n",
+ dv->devname);
+ return -1;
+ }
+ } else {
+ /* non-persistent. Must ensure that new drive
+ * is at least array->size big.
+ */
+ if (ldsize/512 < array_size) {
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+ }
+ /* committed to really trying this device now*/
+ remove_partitions(tfd);
+
+ /* in 2.6.17 and earlier, version-1 superblocks won't
+ * use the number we write, but will choose a free number.
+ * we must choose the same free number, which requires
+ * starting at 'raid_disks' and counting up
+ */
+ for (j = array->raid_disks; j < tst->max_devs; j++) {
+ disc.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ break;
+ if (disc.major==0 && disc.minor==0)
+ break;
+ if (disc.state & 8) /* removed */
+ break;
+ }
+ disc.major = major(rdev);
+ disc.minor = minor(rdev);
+ disc.number =j;
+ disc.state = 0;
+ if (array->not_persistent==0) {
+ int dfd;
+ if (dv->writemostly == 1)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname, INVALID_SECTORS))
+ return -1;
+ if (tst->ss->write_init_super(tst))
+ return -1;
+ } else if (dv->disposition == 'A') {
+ /* this had better be raid1.
+ * As we are "--re-add"ing we must find a spare slot
+ * to fill.
+ */
+ char *used = xcalloc(array->raid_disks, 1);
+ for (j = 0; j < tst->max_devs; j++) {
+ mdu_disk_info_t disc2;
+ disc2.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc2))
+ continue;
+ if (disc2.major==0 && disc2.minor==0)
+ continue;
+ if (disc2.state & 8) /* removed */
+ continue;
+ if (disc2.raid_disk < 0)
+ continue;
+ if (disc2.raid_disk > array->raid_disks)
+ continue;
+ used[disc2.raid_disk] = 1;
+ }
+ for (j = 0 ; j < array->raid_disks; j++)
+ if (!used[j]) {
+ disc.raid_disk = j;
+ disc.state |= (1<<MD_DISK_SYNC);
+ break;
+ }
+ free(used);
+ }
+ if (dv->writemostly == 1)
+ disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+ if (tst->ss->external) {
+ /* add a disk
+ * to an external metadata container */
+ struct mdinfo new_mdi;
+ struct mdinfo *sra;
+ int container_fd;
+ int devnum = fd2devnum(fd);
+ int dfd;
+
+ container_fd = open_dev_excl(devnum);
+ if (container_fd < 0) {
+ pr_err("add failed for %s:"
+ " could not get exclusive access to container\n",
+ dv->devname);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+
+ Kill(dv->devname, NULL, 0, -1, 0);
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (mdmon_running(tst->container_dev))
+ tst->update_tail = &tst->updates;
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname, INVALID_SECTORS)) {
+ close(dfd);
+ close(container_fd);
+ return -1;
+ }
+ if (tst->update_tail)
+ flush_metadata_updates(tst);
+ else
+ tst->ss->sync_metadata(tst);
+
+ sra = sysfs_read(container_fd, -1, 0);
+ if (!sra) {
+ pr_err("add failed for %s: sysfs_read failed\n",
+ dv->devname);
+ close(container_fd);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+ sra->array.level = LEVEL_CONTAINER;
+ /* Need to set data_offset and component_size */
+ tst->ss->getinfo_super(tst, &new_mdi, NULL);
+ new_mdi.disk.major = disc.major;
+ new_mdi.disk.minor = disc.minor;
+ new_mdi.recovery_start = 0;
+ /* Make sure fds are closed as they are O_EXCL which
+ * would block add_disk */
+ tst->ss->free_super(tst);
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ pr_err("add new device to external metadata"
+ " failed for %s\n", dv->devname);
+ close(container_fd);
+ sysfs_free(sra);
+ return -1;
+ }
+ ping_monitor_by_id(devnum);
+ sysfs_free(sra);
+ close(container_fd);
+ } else {
+ tst->ss->free_super(tst);
+ if (ioctl(fd, ADD_NEW_DISK, &disc)) {
+ pr_err("add new device failed for %s as %d: %s\n",
+ dv->devname, j, strerror(errno));
+ return -1;
+ }
+ }
+ if (verbose >= 0)
+ pr_err("added %s\n", dv->devname);
+ return 1;
+}
+
+int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
+ int sysfd, unsigned long rdev, int verbose, char *devname)
{
- mdu_array_info_t info;
- if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
- fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
- devname, strerror(errno));
+ int lfd = -1;
+ int err;
+
+ if (tst->ss->external) {
+ /* To remove a device from a container, we must
+ * check that it isn't in use in an array.
+ * This involves looking in the 'holders'
+ * directory - there must be just one entry,
+ * the container.
+ * To ensure that it doesn't get used as a
+ * hot spare while we are checking, we
+ * get an O_EXCL open on the container
+ */
+ int ret;
+ int dnum = fd2devnum(fd);
+ lfd = open_dev_excl(dnum);
+ if (lfd < 0) {
+ pr_err("Cannot get exclusive access "
+ " to container - odd\n");
+ return -1;
+ }
+ /* We may not be able to check on holders in
+ * sysfs, either because we don't have the dev num
+ * (rdev == 0) or because the device has been detached
+ * and the 'holders' directory no longer exists
+ * (ret == -1). In that case, assume it is OK to
+ * remove.
+ */
+ if (rdev == 0)
+ ret = -1;
+ else
+ ret = sysfs_unique_holder(dnum, rdev);
+ if (ret == 0) {
+ pr_err("%s is not a member, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ if (ret >= 2) {
+ pr_err("%s is still in use, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ }
+ /* FIXME check that it is a current member */
+ if (sysfd >= 0) {
+ /* device has been removed and we don't know
+ * the major:minor number
+ */
+ int n = write(sysfd, "remove", 6);
+ if (n != 6)
+ err = -1;
+ else
+ err = 0;
+ } else {
+ err = ioctl(fd, HOT_REMOVE_DISK, rdev);
+ if (err && errno == ENODEV) {
+ /* Old kernels rejected this if no personality
+ * is registered */
+ struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
+ struct mdinfo *dv = NULL;
+ if (sra)
+ dv = sra->devs;
+ for ( ; dv ; dv=dv->next)
+ if (dv->disk.major == (int)major(rdev) &&
+ dv->disk.minor == (int)minor(rdev))
+ break;
+ if (dv)
+ err = sysfs_set_str(sra, dv,
+ "state", "remove");
+ else
+ err = -1;
+ if (sra)
+ sysfs_free(sra);
+ }
+ }
+ if (err) {
+ pr_err("hot remove failed "
+ "for %s: %s\n", dv->devname,
+ strerror(errno));
+ if (lfd >= 0)
+ close(lfd);
+ return -1;
+ }
+ if (tst->ss->external) {
+ /*
+ * Before dropping our exclusive open we make an
+ * attempt at preventing mdmon from seeing an
+ * 'add' event before reconciling this 'remove'
+ * event.
+ */
+ char *name = devnum2devname(fd2devnum(fd));
+
+ if (!name) {
+ pr_err("unable to get container name\n");
+ return -1;
+ }
+
+ ping_manager(name);
+ free(name);
+ }
+ if (lfd >= 0)
+ close(lfd);
+ if (verbose >= 0)
+ pr_err("hot removed %s from %s\n",
+ dv->devname, devname);
+ return 1;
+}
+
+int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ if (tst->ss->external) {
+ pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
+ return -1;
+ }
+ /* Need to find the device in sysfs and add 'want_replacement' to the
+ * status.
+ */
+ mdi = sysfs_read(fd, -1, GET_DEVS);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.raid_disk < 0) {
+ pr_err("%s is not active and so cannot be replaced.\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_str(mdi, di,
+ "state", "want_replacement");
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to request replacement for %s\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s (device %d in %s) for replacement\n",
+ dv->devname, di->disk.raid_disk, devname);
+ /* If there is a matching 'with', we need to tell it which
+ * raid disk
+ */
+ while (dv && dv->disposition != 'W')
+ dv = dv->next;
+ if (dv) {
+ dv->disposition = 'w';
+ dv->used = di->disk.raid_disk;
+ }
return 1;
}
- if (size >= 0)
- info.size = size;
- if (raid_disks > 0)
- info.raid_disks = raid_disks;
- if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
- fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
- devname, strerror(errno));
+ sysfs_free(mdi);
+ pr_err("%s not found in %s so cannot --replace it\n",
+ dv->devname, devname);
+ return -1;
+}
+
+int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
+ mdi = sysfs_read(fd, -1, GET_DEVS|GET_STATE);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.state & (1<<MD_DISK_FAULTY)) {
+ pr_err("%s is faulty and cannot be a replacement\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ if (di->disk.raid_disk >= 0) {
+ pr_err("%s is active and cannot be a replacement\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_num(mdi, di,
+ "slot", dv->used);
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to %s as preferred replacement.\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s in %s as replacement for device %d\n",
+ dv->devname, devname, dv->used);
return 1;
}
- return 0;
+ sysfs_free(mdi);
+ pr_err("%s not found in %s so cannot make it preferred replacement\n",
+ dv->devname, devname);
+ return -1;
}
int Manage_subdevs(char *devname, int fd,
- mddev_dev_t devlist, int verbose, int test)
+ struct mddev_dev *devlist, int verbose, int test,
+ char *update, int force)
{
- /* do something to each dev.
+ /* Do something to each dev.
* devmode can be
* 'a' - add the device
* try HOT_ADD_DISK
* If that fails EINVAL, try ADD_NEW_DISK
- * 'r' - remove the device HOT_REMOVE_DISK
+ * 'A' - re-add the device
+ * 'r' - remove the device: HOT_REMOVE_DISK
* device can be 'faulty' or 'detached' in which case all
* matching devices are removed.
* 'f' - set the device faulty SET_DISK_FAULTY
* device can be 'detached' in which case any device that
* is inaccessible will be marked faulty.
+ * 'R' - mark this device as wanting replacement.
+ * 'W' - this device is added if necessary and activated as
+ * a replacement for a previous 'R' device.
+ * -----
+ * 'w' - 'W' will be changed to 'w' when it is paired with
+ * a 'R' device. If a 'W' is found while walking the list
+ * it must be unpaired, and is an error.
+ * 'M' - this is created by a 'missing' target. It is a slight
+ * variant on 'A'
+ * 'F' - Another variant of 'A', where the device was faulty
+ * so must be removed from the array first.
+ *
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
*/
- mddev_dev_t add_devlist = NULL;
mdu_array_info_t array;
- mdu_disk_info_t disc;
unsigned long long array_size;
- mddev_dev_t dv, next = NULL;
+ struct mddev_dev *dv;
struct stat stb;
- int j, jnext = 0;
int tfd = -1;
- struct supertype *st, *tst;
- int duuid[4];
- int ouuid[4];
- int lfd = -1;
+ struct supertype *tst;
+ char *subarray = NULL;
int sysfd = -1;
int count = 0; /* number of actions taken */
+ struct mdinfo info;
+ int frozen = 0;
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
- fprintf(stderr, Name ": cannot get array info for %s\n",
+ pr_err("Cannot get array info for %s\n",
devname);
- return 1;
+ goto abort;
}
+ sysfs_init(&info, fd, 0);
- /* array.size is only 32 bit and may be truncated.
+ /* array.size is only 32 bits and may be truncated.
* So read from sysfs if possible, and record number of sectors
*/
if (array_size <= 0)
array_size = array.size * 2;
- tst = super_by_fd(fd);
+ tst = super_by_fd(fd, &subarray);
if (!tst) {
- fprintf(stderr, Name ": unsupport array - version %d.%d\n",
+ pr_err("unsupport array - version %d.%d\n",
array.major_version, array.minor_version);
- return 1;
+ goto abort;
}
stb.st_rdev = 0;
- for (dv = devlist, j=0 ; dv; dv = next, j = jnext) {
- unsigned long long ldsize;
- char dvname[20];
- char *dnprintable = dv->devname;
- char *add_dev = dv->devname;
- int err;
- int re_add_failed = 0;
-
- next = dv->next;
- jnext = 0;
-
- if (strcmp(dv->devname, "failed")==0 ||
- strcmp(dv->devname, "faulty")==0) {
- if (dv->disposition != 'r') {
- fprintf(stderr, Name ": %s only meaningful "
- "with -r, not -%c\n",
+ for (dv = devlist; dv; dv = dv->next) {
+ int rv;
+
+ if (strcmp(dv->devname, "failed") == 0 ||
+ strcmp(dv->devname, "faulty") == 0) {
+ if (dv->disposition != 'A'
+ && dv->disposition != 'r') {
+ pr_err("%s only meaningful "
+ "with -r or --re-add, not -%c\n",
dv->devname, dv->disposition);
- return 1;
+ goto abort;
}
- for (; j < array.raid_disks + array.nr_disks ; j++) {
- unsigned dev;
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- continue;
- if (disc.major == 0 && disc.minor == 0)
- continue;
- if ((disc.state & 1) == 0) /* faulty */
- continue;
- dev = makedev(disc.major, disc.minor);
- if (stb.st_rdev == dev)
- /* already did that one */
- continue;
- stb.st_rdev = dev;
- next = dv;
- /* same slot again next time - things might
- * have reshuffled */
- jnext = j;
- sprintf(dvname,"%d:%d", disc.major, disc.minor);
- dnprintable = dvname;
- break;
- }
- if (jnext == 0)
- continue;
- } else if (strcmp(dv->devname, "detached") == 0) {
+ add_faulty(dv, fd, (dv->disposition == 'A'
+ ? 'F' : 'r'));
+ continue;
+ }
+ if (strcmp(dv->devname, "detached") == 0) {
if (dv->disposition != 'r' && dv->disposition != 'f') {
- fprintf(stderr, Name ": %s only meaningful "
+ pr_err("%s only meaningful "
"with -r of -f, not -%c\n",
dv->devname, dv->disposition);
- return 1;
+ goto abort;
}
- for (; j < array.raid_disks + array.nr_disks; j++) {
- int sfd;
- unsigned dev;
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- continue;
- if (disc.major == 0 && disc.minor == 0)
- continue;
- sprintf(dvname,"%d:%d", disc.major, disc.minor);
- sfd = dev_open(dvname, O_RDONLY);
- if (sfd >= 0) {
- close(sfd);
- continue;
- }
- if (dv->disposition == 'f' &&
- (disc.state & 1) == 1) /* already faulty */
- continue;
- if (errno != ENXIO)
- continue;
- dev = makedev(disc.major, disc.minor);
- if (stb.st_rdev == dev)
- /* already did that one */
- continue;
- stb.st_rdev = dev;
- next = dv;
- /* same slot again next time - things might
- * have reshuffled */
- jnext = j;
- dnprintable = dvname;
- break;
- }
- if (jnext == 0)
- continue;
- } else if (strcmp(dv->devname, "missing") == 0) {
- if (dv->disposition != 'a' || dv->re_add == 0) {
- fprintf(stderr, Name ": 'missing' only meaningful "
- "with --re-add\n");
- return 1;
+ add_detached(dv, fd, dv->disposition);
+ continue;
+ }
+
+ if (strcmp(dv->devname, "missing") == 0) {
+ struct mddev_dev *add_devlist = NULL;
+ struct mddev_dev **dp;
+ if (dv->disposition != 'A') {
+ pr_err("'missing' only meaningful "
+ "with --re-add\n");
+ goto abort;
}
- if (add_devlist == NULL)
- add_devlist = conf_get_devs();
+ add_devlist = conf_get_devs();
if (add_devlist == NULL) {
- fprintf(stderr, Name ": no devices to scan for missing members.");
+ pr_err("no devices to scan for missing members.");
continue;
}
- add_dev = add_devlist->devname;
- add_devlist = add_devlist->next;
- if (add_devlist != NULL)
- next = dv;
- if (stat(add_dev, &stb) < 0)
- continue;
- } else if (strchr(dv->devname, '/') == NULL &&
- strchr(dv->devname, ':') == NULL &&
- strlen(dv->devname) < 50) {
+ for (dp = &add_devlist; *dp; dp = & (*dp)->next)
+ /* 'M' (for 'missing') is like 'A' without errors */
+ (*dp)->disposition = 'M';
+ *dp = dv->next;
+ dv->next = add_devlist;
+ continue;
+ }
+
+ if (strchr(dv->devname, '/') == NULL &&
+ strchr(dv->devname, ':') == NULL &&
+ strlen(dv->devname) < 50) {
/* Assume this is a kernel-internal name like 'sda1' */
int found = 0;
char dname[55];
if (dv->disposition != 'r' && dv->disposition != 'f') {
- fprintf(stderr, Name ": %s only meaningful "
+ pr_err("%s only meaningful "
"with -r or -f, not -%c\n",
dv->devname, dv->disposition);
- return 1;
+ goto abort;
}
sprintf(dname, "dev-%s", dv->devname);
if (!found) {
sysfd = sysfs_open(fd2devnum(fd), dname, "state");
if (sysfd < 0) {
- fprintf(stderr, Name ": %s does not appear "
+ pr_err("%s does not appear "
"to be a component of %s\n",
dv->devname, devname);
- return 1;
+ goto abort;
}
}
} else {
- j = 0;
-
tfd = dev_open(dv->devname, O_RDONLY);
- if (tfd < 0 && dv->disposition == 'r' &&
- lstat(dv->devname, &stb) == 0)
- /* Be happy, the lstat worked, that is
- * enough for --remove
- */
- ;
+ if (tfd >= 0)
+ fstat(tfd, &stb);
else {
- if (tfd < 0 || fstat(tfd, &stb) != 0) {
- fprintf(stderr, Name ": cannot find %s: %s\n",
- dv->devname, strerror(errno));
- if (tfd >= 0)
- close(tfd);
- return 1;
+ int open_err = errno;
+ if (stat(dv->devname, &stb) != 0) {
+ pr_err("Cannot find %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
+ }
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (dv->disposition == 'M')
+ /* non-fatal. Also improbable */
+ continue;
+ pr_err("%s is not a block device.\n",
+ dv->devname);
+ goto abort;
+ }
+ if (dv->disposition == 'r')
+ /* Be happy, the stat worked, that is
+ * enough for --remove
+ */
+ ;
+ else {
+ if (dv->disposition == 'M')
+ /* non-fatal */
+ continue;
+ pr_err("Cannot open %s: %s\n",
+ dv->devname, strerror(open_err));
+ goto abort;
}
- close(tfd);
- tfd = -1;
- }
- if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- fprintf(stderr, Name ": %s is not a "
- "block device.\n",
- dv->devname);
- return 1;
}
}
switch(dv->disposition){
default:
- fprintf(stderr, Name ": internal error - devmode[%s]=%d\n",
+ pr_err("internal error - devmode[%s]=%d\n",
dv->devname, dv->disposition);
- return 1;
+ goto abort;
case 'a':
+ case 'A':
+ case 'M': /* --re-add missing */
+ case 'F': /* --re-add faulty */
/* add the device */
- if (tst->subarray[0]) {
- fprintf(stderr, Name ": Cannot add disks to a"
+ if (subarray) {
+ pr_err("Cannot add disks to a"
" \'member\' array, perform this"
" operation on the parent container\n");
- return 1;
+ goto abort;
}
+ if (dv->disposition == 'F')
+ /* Need to remove first */
+ ioctl(fd, HOT_REMOVE_DISK,
+ (unsigned long)stb.st_rdev);
/* Make sure it isn't in use (in 2.6 or later) */
- tfd = dev_open(add_dev, O_RDONLY|O_EXCL|O_DIRECT);
- if (tfd < 0 && add_dev != dv->devname)
- continue;
- if (tfd < 0) {
- fprintf(stderr, Name ": Cannot open %s: %s\n",
- dv->devname, strerror(errno));
- return 1;
- }
-
- st = dup_super(tst);
-
- if (array.not_persistent==0)
- st->ss->load_super(st, tfd, NULL);
-
- if (add_dev == dv->devname) {
- if (!get_dev_size(tfd, dv->devname, &ldsize)) {
- close(tfd);
- return 1;
- }
- } else if (!get_dev_size(tfd, NULL, &ldsize)) {
- close(tfd);
- tfd = -1;
- continue;
- }
-
- if (!tst->ss->external &&
- array.major_version == 0 &&
- md_get_version(fd)%100 < 2) {
- close(tfd);
- tfd = -1;
- if (ioctl(fd, HOT_ADD_DISK,
- (unsigned long)stb.st_rdev)==0) {
- if (verbose >= 0)
- fprintf(stderr, Name ": hot added %s\n",
- add_dev);
- continue;
- }
-
- fprintf(stderr, Name ": hot add failed for %s: %s\n",
- add_dev, strerror(errno));
- return 1;
- }
-
- if (array.not_persistent == 0 || tst->ss->external) {
-
- /* need to find a sample superblock to copy, and
- * a spare slot to use.
- * For 'external' array (well, container based),
- * We can just load the metadata for the array.
- */
- if (tst->sb)
- /* already loaded */;
- else if (tst->ss->external) {
- tst->ss->load_super(tst, fd, NULL);
- } else for (j = 0; j < tst->max_devs; j++) {
- char *dev;
- int dfd;
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- continue;
- if (disc.major==0 && disc.minor==0)
- continue;
- if ((disc.state & 4)==0) continue; /* sync */
- /* Looks like a good device to try */
- dev = map_dev(disc.major, disc.minor, 1);
- if (!dev) continue;
- dfd = dev_open(dev, O_RDONLY);
- if (dfd < 0) continue;
- if (tst->ss->load_super(tst, dfd,
- NULL)) {
- close(dfd);
- continue;
- }
- close(dfd);
- break;
- }
- /* FIXME this is a bad test to be using */
- if (!tst->sb) {
- close(tfd);
- fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n");
- return 1;
- }
-
- /* Make sure device is large enough */
- if (tst->ss->avail_size(tst, ldsize/512) <
- array_size) {
- close(tfd);
- tfd = -1;
- if (add_dev != dv->devname)
- continue;
- fprintf(stderr, Name ": %s not large enough to join array\n",
- dv->devname);
- return 1;
- }
-
- /* Possibly this device was recently part of the array
- * and was temporarily removed, and is now being re-added.
- * If so, we can simply re-add it.
- */
- tst->ss->uuid_from_super(tst, duuid);
-
- /* re-add doesn't work for version-1 superblocks
- * before 2.6.18 :-(
- */
- if (array.major_version == 1 &&
- get_linux_version() <= 2006018)
- ;
- else if (st->sb) {
- struct mdinfo mdi;
- st->ss->getinfo_super(st, &mdi, NULL);
- st->ss->uuid_from_super(st, ouuid);
- if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
- !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
- memcmp(duuid, ouuid, sizeof(ouuid))==0) {
- /* look like it is worth a try. Need to
- * make sure kernel will accept it though.
- */
- disc.number = mdi.disk.number;
- if (ioctl(fd, GET_DISK_INFO, &disc) != 0
- || disc.major != 0 || disc.minor != 0
- || !enough_fd(fd))
- goto skip_re_add;
- disc.major = major(stb.st_rdev);
- disc.minor = minor(stb.st_rdev);
- disc.number = mdi.disk.number;
- disc.raid_disk = mdi.disk.raid_disk;
- disc.state = mdi.disk.state;
- if (dv->writemostly == 1)
- disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- if (dv->writemostly == 2)
- disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
- remove_partitions(tfd);
- close(tfd);
- tfd = -1;
- /* don't even try if disk is marked as faulty */
- errno = 0;
- if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
- if (verbose >= 0)
- fprintf(stderr, Name ": re-added %s\n", add_dev);
- count++;
- continue;
- }
- if (errno == ENOMEM || errno == EROFS) {
- fprintf(stderr, Name ": add new device failed for %s: %s\n",
- add_dev, strerror(errno));
- if (add_dev != dv->devname)
- continue;
- return 1;
- }
- skip_re_add:
- re_add_failed = 1;
- }
- }
- if (add_dev != dv->devname) {
- if (verbose > 0)
- fprintf(stderr, Name
- ": --re-add for %s to %s is not possible\n",
- add_dev, devname);
- if (tfd >= 0) {
- close(tfd);
- tfd = -1;
- }
- continue;
- }
- if (dv->re_add) {
- if (tfd >= 0)
- close(tfd);
- fprintf(stderr, Name
- ": --re-add for %s to %s is not possible\n",
- dv->devname, devname);
- return 1;
- }
- if (re_add_failed) {
- fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n",
- dv->devname, devname);
- fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n",
- dv->devname);
- fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n",
- dv->devname);
- if (tfd >= 0)
- close(tfd);
- return 1;
- }
- } else {
- /* non-persistent. Must ensure that new drive
- * is at least array.size big.
- */
- if (ldsize/512 < array_size) {
- fprintf(stderr, Name ": %s not large enough to join array\n",
- dv->devname);
- if (tfd >= 0)
- close(tfd);
- return 1;
- }
- }
- /* committed to really trying this device now*/
+ tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
if (tfd >= 0) {
- remove_partitions(tfd);
- close(tfd);
- tfd = -1;
- }
- /* in 2.6.17 and earlier, version-1 superblocks won't
- * use the number we write, but will choose a free number.
- * we must choose the same free number, which requires
- * starting at 'raid_disks' and counting up
- */
- for (j = array.raid_disks; j< tst->max_devs; j++) {
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- break;
- if (disc.major==0 && disc.minor==0)
- break;
- if (disc.state & 8) /* removed */
- break;
- }
- disc.major = major(stb.st_rdev);
- disc.minor = minor(stb.st_rdev);
- disc.number =j;
- disc.state = 0;
- if (array.not_persistent==0 || tst->ss->external) {
- int dfd;
- if (dv->writemostly == 1)
- disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
- if (tst->ss->add_to_super(tst, &disc, dfd,
- dv->devname)) {
- close(dfd);
- return 1;
- }
- /* write_init_super will close 'dfd' */
- if (tst->ss->external)
- /* mdmon will write the metadata */
- close(dfd);
- else if (tst->ss->write_init_super(tst))
- return 1;
- } else if (dv->re_add) {
- /* this had better be raid1.
- * As we are "--re-add"ing we must find a spare slot
- * to fill.
+ /* We know no-one else is using it. We'll
+ * need non-exclusive access to add it, so
+ * do that now.
*/
- char *used = malloc(array.raid_disks);
- memset(used, 0, array.raid_disks);
- for (j=0; j< tst->max_devs; j++) {
- mdu_disk_info_t disc2;
- disc2.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc2))
- continue;
- if (disc2.major==0 && disc2.minor==0)
- continue;
- if (disc2.state & 8) /* removed */
- continue;
- if (disc2.raid_disk < 0)
- continue;
- if (disc2.raid_disk > array.raid_disks)
- continue;
- used[disc2.raid_disk] = 1;
- }
- for (j=0 ; j<array.raid_disks; j++)
- if (!used[j]) {
- disc.raid_disk = j;
- disc.state |= (1<<MD_DISK_SYNC);
- break;
- }
- free(used);
+ close(tfd);
+ tfd = dev_open(dv->devname, O_RDONLY);
+ }
+ if (tfd < 0) {
+ if (dv->disposition == 'M')
+ continue;
+ pr_err("Cannot open %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
}
- if (dv->writemostly == 1)
- disc.state |= (1 << MD_DISK_WRITEMOSTLY);
- if (tst->ss->external) {
- /* add a disk to an external metadata container
- * only if mdmon is around to see it
- */
- struct mdinfo new_mdi;
- struct mdinfo *sra;
- int container_fd;
- int devnum = fd2devnum(fd);
-
- container_fd = open_dev_excl(devnum);
- if (container_fd < 0) {
- fprintf(stderr, Name ": add failed for %s:"
- " could not get exclusive access to container\n",
- dv->devname);
- return 1;
- }
-
- if (!mdmon_running(devnum)) {
- fprintf(stderr, Name ": add failed for %s: mdmon not running\n",
- dv->devname);
- close(container_fd);
- return 1;
- }
-
- sra = sysfs_read(container_fd, -1, 0);
- if (!sra) {
- fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n",
- dv->devname);
- close(container_fd);
- return 1;
- }
- sra->array.level = LEVEL_CONTAINER;
- /* Need to set data_offset and component_size */
- tst->ss->getinfo_super(tst, &new_mdi, NULL);
- new_mdi.disk.major = disc.major;
- new_mdi.disk.minor = disc.minor;
- new_mdi.recovery_start = 0;
- if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
- fprintf(stderr, Name ": add new device to external metadata"
- " failed for %s\n", dv->devname);
- close(container_fd);
- return 1;
- }
- ping_monitor(devnum2devname(devnum));
- sysfs_free(sra);
- close(container_fd);
- } else if (ioctl(fd, ADD_NEW_DISK, &disc)) {
- fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
- dv->devname, j, strerror(errno));
- return 1;
+ if (!frozen) {
+ if (sysfs_freeze_array(&info) == 1)
+ frozen = 1;
+ else
+ frozen = -1;
}
- if (verbose >= 0)
- fprintf(stderr, Name ": added %s\n", dv->devname);
+ rv = Manage_add(fd, tfd, dv, tst, &array,
+ force, verbose, devname, update,
+ stb.st_rdev, array_size);
+ close(tfd);
+ tfd = -1;
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
break;
case 'r':
/* hot remove */
- if (tst->subarray[0]) {
- fprintf(stderr, Name ": Cannot remove disks from a"
+ if (subarray) {
+ pr_err("Cannot remove disks from a"
" \'member\' array, perform this"
" operation on the parent container\n");
- if (sysfd >= 0)
- close(sysfd);
- return 1;
- }
- if (tst->ss->external) {
- /* To remove a device from a container, we must
- * check that it isn't in use in an array.
- * This involves looking in the 'holders'
- * directory - there must be just one entry,
- * the container.
- * To ensure that it doesn't get used as a
- * hold spare while we are checking, we
- * get an O_EXCL open on the container
- */
- int dnum = fd2devnum(fd);
- lfd = open_dev_excl(dnum);
- if (lfd < 0) {
- fprintf(stderr, Name
- ": Cannot get exclusive access "
- " to container - odd\n");
- if (sysfd >= 0)
- close(sysfd);
- return 1;
- }
- /* in the detached case it is not possible to
- * check if we are the unique holder, so just
- * rely on the 'detached' checks
- */
- if (strcmp(dv->devname, "detached") == 0 ||
- sysfd >= 0 ||
- sysfs_unique_holder(dnum, stb.st_rdev))
- /* pass */;
- else {
- fprintf(stderr, Name
- ": %s is %s, cannot remove.\n",
- dnprintable,
- errno == EEXIST ? "still in use":
- "not a member");
- close(lfd);
- return 1;
- }
- }
- /* FIXME check that it is a current member */
- if (sysfd >= 0) {
- /* device has been removed and we don't know
- * the major:minor number
- */
- int n = write(sysfd, "remove", 6);
- if (n != 6)
- err = -1;
- else
- err = 0;
+ rv = -1;
+ } else
+ rv = Manage_remove(tst, fd, dv, sysfd,
+ stb.st_rdev, verbose,
+ devname);
+ if (sysfd >= 0)
close(sysfd);
- sysfd = -1;
- } else {
- err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
- if (err && errno == ENODEV) {
- /* Old kernels rejected this if no personality
- * registered */
- struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
- struct mdinfo *dv = NULL;
- if (sra)
- dv = sra->devs;
- for ( ; dv ; dv=dv->next)
- if (dv->disk.major == (int)major(stb.st_rdev) &&
- dv->disk.minor == (int)minor(stb.st_rdev))
- break;
- if (dv)
- err = sysfs_set_str(sra, dv,
- "state", "remove");
- else
- err = -1;
- if (sra)
- sysfs_free(sra);
- }
- }
- if (err) {
- fprintf(stderr, Name ": hot remove failed "
- "for %s: %s\n", dnprintable,
- strerror(errno));
- if (lfd >= 0)
- close(lfd);
- return 1;
- }
- if (tst->ss->external) {
- /*
- * Before dropping our exclusive open we make an
- * attempt at preventing mdmon from seeing an
- * 'add' event before reconciling this 'remove'
- * event.
- */
- char *name = devnum2devname(fd2devnum(fd));
-
- if (!name) {
- fprintf(stderr, Name ": unable to get container name\n");
- return 1;
- }
-
- ping_manager(name);
- free(name);
- }
- if (lfd >= 0)
- close(lfd);
- count++;
- if (verbose >= 0)
- fprintf(stderr, Name ": hot removed %s from %s\n",
- dnprintable, devname);
+ sysfd = -1;
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
break;
case 'f': /* set faulty */
if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
(sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
(unsigned long) stb.st_rdev))) {
- fprintf(stderr, Name ": set device faulty failed for %s: %s\n",
- dnprintable, strerror(errno));
+ pr_err("set device faulty failed for %s: %s\n",
+ dv->devname, strerror(errno));
if (sysfd >= 0)
close(sysfd);
- return 1;
+ goto abort;
}
if (sysfd >= 0)
close(sysfd);
sysfd = -1;
count++;
if (verbose >= 0)
- fprintf(stderr, Name ": set %s faulty in %s\n",
- dnprintable, devname);
+ pr_err("set %s faulty in %s\n",
+ dv->devname, devname);
+ break;
+ case 'R': /* Mark as replaceable */
+ if (subarray) {
+ pr_err("Cannot replace disks in a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ rv = -1;
+ } else {
+ if (!frozen) {
+ if (sysfs_freeze_array(&info) == 1)
+ frozen = 1;
+ else
+ frozen = -1;
+ }
+ rv = Manage_replace(tst, fd, dv,
+ stb.st_rdev, verbose,
+ devname);
+ }
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
+ break;
+ case 'W': /* --with device that doesn't match */
+ pr_err("No matching --replace device for --with %s\n",
+ dv->devname);
+ goto abort;
+ case 'w': /* --with device which was matched */
+ rv = Manage_with(tst, fd, dv,
+ stb.st_rdev, verbose, devname);
+ if (rv < 0)
+ goto abort;
break;
}
}
+ if (frozen > 0)
+ sysfs_set_str(&info, NULL, "sync_action","idle");
if (test && count == 0)
return 2;
return 0;
+
+abort:
+ if (frozen > 0)
+ sysfs_set_str(&info, NULL, "sync_action","idle");
+ return 1;
}
int autodetect(void)
return rv;
}
-int Update_subarray(char *dev, char *subarray, char *update, mddev_ident_t ident, int quiet)
+int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
{
struct supertype supertype, *st = &supertype;
int fd, rv = 2;
memset(st, 0, sizeof(*st));
- fd = open_subarray(dev, subarray, st, quiet);
+ fd = open_subarray(dev, subarray, st, verbose < 0);
if (fd < 0)
return 2;
if (!st->ss->update_subarray) {
- if (!quiet)
- fprintf(stderr,
- Name ": Operation not supported for %s metadata\n",
- st->ss->name);
+ if (verbose >= 0)
+ pr_err("Operation not supported for %s metadata\n",
+ st->ss->name);
goto free_super;
}
if (mdmon_running(st->devnum))
st->update_tail = &st->updates;
- rv = st->ss->update_subarray(st, update, ident);
+ rv = st->ss->update_subarray(st, subarray, update, ident);
if (rv) {
- if (!quiet)
- fprintf(stderr, Name ": Failed to update %s of subarray-%s in %s\n",
+ if (verbose >= 0)
+ pr_err("Failed to update %s of subarray-%s in %s\n",
update, subarray, dev);
} else if (st->update_tail)
flush_metadata_updates(st);
else
st->ss->sync_metadata(st);
- if (rv == 0 && strcmp(update, "name") == 0 && !quiet)
- fprintf(stderr,
- Name ": Updated subarray-%s name from %s, UUIDs may have changed\n",
- subarray, dev);
+ if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
+ pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
+ subarray, dev);
free_super:
st->ss->free_super(st);
return rv;
}
+
+/* Move spare from one array to another If adding to destination array fails
+ * add back to original array.
+ * Returns 1 on success, 0 on failure */
+int move_spare(char *from_devname, char *to_devname, dev_t devid)
+{
+ struct mddev_dev devlist;
+ char devname[20];
+
+ /* try to remove and add */
+ int fd1 = open(to_devname, O_RDONLY);
+ int fd2 = open(from_devname, O_RDONLY);
+
+ if (fd1 < 0 || fd2 < 0) {
+ if (fd1>=0) close(fd1);
+ if (fd2>=0) close(fd2);
+ return 0;
+ }
+
+ devlist.next = NULL;
+ devlist.used = 0;
+ devlist.writemostly = 0;
+ devlist.devname = devname;
+ sprintf(devname, "%d:%d", major(devid), minor(devid));
+
+ devlist.disposition = 'r';
+ if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
+ devlist.disposition = 'a';
+ if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
+ /* make sure manager is aware of changes */
+ ping_manager(to_devname);
+ ping_manager(from_devname);
+ close(fd1);
+ close(fd2);
+ return 1;
+ }
+ else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
+ }
+ close(fd1);
+ close(fd2);
+ return 0;
+}
#endif