/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
#include "md_p.h"
#include <ctype.h>
-#define REGISTER_DEV _IO (MD_MAJOR, 1)
-#define START_MD _IO (MD_MAJOR, 2)
-#define STOP_MD _IO (MD_MAJOR, 3)
+#define REGISTER_DEV _IO (MD_MAJOR, 1)
+#define START_MD _IO (MD_MAJOR, 2)
+#define STOP_MD _IO (MD_MAJOR, 3)
int Manage_ro(char *devname, int fd, int readonly)
{
* use RESTART_ARRAY_RW or STOP_ARRAY_RO
*
*/
- mdu_array_info_t array;
-#ifndef MDASSEMBLE
struct mdinfo *mdi;
-#endif
int rv = 0;
- if (md_get_version(fd) < 9000) {
- fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
- return 1;
- }
-#ifndef MDASSEMBLE
- /* If this is an externally-manage array, we need to modify the
+ /* If this is an externally-managed array, we need to modify the
* metadata_version so that mdmon doesn't undo our change.
*/
- mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
+ mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
if (mdi &&
mdi->array.major_version == -1 &&
is_subarray(mdi->text_version)) {
rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
if (rv < 0) {
- fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
+ pr_err("failed to set readonly for %s: %s\n",
devname, strerror(errno));
vers[9] = mdi->text_version[0];
}
goto out;
}
-#endif
- if (ioctl(fd, GET_ARRAY_INFO, &array)) {
- fprintf(stderr, Name ": %s does not appear to be active.\n",
- devname);
+
+ if (!md_array_active(fd)) {
+ pr_err("%s does not appear to be active.\n", devname);
rv = 1;
goto out;
}
- if (readonly>0) {
+ if (readonly > 0) {
if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
- fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
+ pr_err("failed to set readonly for %s: %s\n",
devname, strerror(errno));
rv = 1;
goto out;
}
} else if (readonly < 0) {
if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
- fprintf(stderr, Name ": failed to set writable for %s: %s\n",
+ pr_err("failed to set writable for %s: %s\n",
devname, strerror(errno));
rv = 1;
goto out;
}
}
out:
-#ifndef MDASSEMBLE
- if (mdi)
- sysfs_free(mdi);
-#endif
+ sysfs_free(mdi);
return rv;
}
-#ifndef MDASSEMBLE
-
-static void remove_devices(int devnum, char *path)
+static void remove_devices(char *devnm, char *path)
{
- /*
+ /*
* Remove names at 'path' - possibly with
* partition suffixes - which link to the 'standard'
- * name for devnum. These were probably created
+ * name for devnm. These were probably created
* by mdadm when the array was assembled.
*/
char base[40];
if (!path)
return;
- if (devnum >= 0)
- sprintf(base, "/dev/md%d", devnum);
- else
- sprintf(base, "/dev/md_d%d", -1-devnum);
+ sprintf(base, "/dev/%s", devnm);
be = base + strlen(base);
- path2 = malloc(strlen(path)+20);
+ path2 = xmalloc(strlen(path)+20);
strcpy(path2, path);
pe = path2 + strlen(path2);
-
+
for (part = 0; part < 16; part++) {
if (part) {
sprintf(be, "p%d", part);
}
free(path2);
}
-
-int Manage_runstop(char *devname, int fd, int runstop, int quiet)
+int Manage_run(char *devname, int fd, struct context *c)
{
- /* Run or stop the array. array must already be configured
- * required >= 0.90.0
- * Only print failure messages if quiet == 0;
- * quiet > 0 means really be quiet
- * quiet < 0 means we will try again if it fails.
+ /* Run the array. Array must already be configured
+ * Requires >= 0.90.0
*/
- mdu_param_t param; /* unused */
- int rv = 0;
+ char nm[32], *nmp;
- if (runstop == -1 && md_get_version(fd) < 9000) {
- if (ioctl(fd, STOP_MD, 0)) {
- if (quiet == 0) fprintf(stderr,
- Name ": stopping device %s "
- "failed: %s\n",
- devname, strerror(errno));
- return 1;
- }
+ nmp = fd2devnm(fd);
+ if (!nmp) {
+ pr_err("Cannot find %s in sysfs!!\n", devname);
+ return 1;
}
+ strcpy(nm, nmp);
+ return IncrementalScan(c, nm);
+}
- if (md_get_version(fd) < 9000) {
- fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
- return 1;
+int Manage_stop(char *devname, int fd, int verbose, int will_retry)
+{
+ /* Stop the array. Array must already be configured
+ * 'will_retry' means that error messages are not wanted.
+ */
+ int rv = 0;
+ struct map_ent *map = NULL;
+ struct mdinfo *mdi;
+ char devnm[32];
+ char container[32];
+ int err;
+ int count;
+ char buf[32];
+ unsigned long long rd1, rd2;
+
+ if (will_retry && verbose == 0)
+ verbose = -1;
+
+ strcpy(devnm, fd2devnm(fd));
+ /* Get EXCL access first. If this fails, then attempting
+ * to stop is probably a bad idea.
+ */
+ mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
+ if (mdi && is_subarray(mdi->text_version)) {
+ char *sl;
+ strncpy(container, mdi->text_version+1, sizeof(container));
+ container[sizeof(container)-1] = 0;
+ sl = strchr(container, '/');
+ if (sl)
+ *sl = 0;
+ } else
+ container[0] = 0;
+ close(fd);
+ count = 5;
+ while (((fd = ((devname[0] == '/')
+ ?open(devname, O_RDONLY|O_EXCL)
+ :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
+ || strcmp(fd2devnm(fd), devnm) != 0)
+ && container[0]
+ && mdmon_running(container)
+ && count) {
+ /* Can't open, so something might be wrong. However it
+ * is a container, so we might be racing with mdmon, so
+ * retry for a bit.
+ */
+ if (fd >= 0)
+ close(fd);
+ flush_mdmon(container);
+ count--;
}
- /*
- if (ioctl(fd, GET_ARRAY_INFO, &array)) {
- fprintf(stderr, Name ": %s does not appear to be active.\n",
- devname);
+ if (fd < 0 || strcmp(fd2devnm(fd), devnm) != 0) {
+ if (fd >= 0)
+ close(fd);
+ if (verbose >= 0)
+ pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
+ devname);
return 1;
}
- */
- if (runstop>0) {
- if (ioctl(fd, RUN_ARRAY, ¶m)) {
- fprintf(stderr, Name ": failed to run array %s: %s\n",
- devname, strerror(errno));
- return 1;
- }
- if (quiet <= 0)
- fprintf(stderr, Name ": started %s\n", devname);
- } else if (runstop < 0){
- struct map_ent *map = NULL;
- struct stat stb;
- struct mdinfo *mdi;
- int devnum;
+ /* If this is an mdmon managed array, just write 'inactive'
+ * to the array state and let mdmon clear up.
+ */
+ if (mdi &&
+ mdi->array.level > 0 &&
+ is_subarray(mdi->text_version)) {
int err;
- int count;
- /* If this is an mdmon managed array, just write 'inactive'
- * to the array state and let mdmon clear up.
- */
- devnum = fd2devnum(fd);
- /* Get EXCL access first. If this fails, then attempting
- * to stop is probably a bad idea.
- */
+ /* This is mdmon managed. */
close(fd);
- fd = open(devname, O_RDONLY|O_EXCL);
- if (fd < 0 || fd2devnum(fd) != devnum) {
- if (fd >= 0)
- close(fd);
- fprintf(stderr,
- Name ": Cannot get exclusive access to %s:"
- "Perhaps a running "
- "process, mounted filesystem "
- "or active volume group?\n",
- devname);
- return 1;
+
+ /* As we had an O_EXCL open, any use of the device
+ * which blocks STOP_ARRAY is probably a transient use,
+ * so it is reasonable to retry for a while - 5 seconds.
+ */
+ count = 25;
+ while (count &&
+ (err = sysfs_set_str(mdi, NULL,
+ "array_state",
+ "inactive")) < 0
+ && errno == EBUSY) {
+ usleep(200000);
+ count--;
+ }
+ if (err) {
+ if (verbose >= 0)
+ pr_err("failed to stop array %s: %s\n",
+ devname, strerror(errno));
+ rv = 1;
+ goto out;
}
- mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
- if (mdi &&
- mdi->array.level > 0 &&
- is_subarray(mdi->text_version)) {
- int err;
- /* This is mdmon managed. */
- close(fd);
- count = 25;
- while (count &&
- (err = sysfs_set_str(mdi, NULL,
- "array_state",
- "inactive")) < 0
- && errno == EBUSY) {
- usleep(200000);
- count--;
- }
- if (err && !quiet) {
- fprintf(stderr, Name
- ": failed to stop array %s: %s\n",
- devname, strerror(errno));
- rv = 1;
- goto out;
- }
+ /* Give monitor a chance to act */
+ ping_monitor(mdi->text_version);
- /* Give monitor a chance to act */
- ping_monitor(mdi->text_version);
+ fd = open_dev_excl(devnm);
+ if (fd < 0) {
+ if (verbose >= 0)
+ pr_err("failed to completely stop %s: Device is busy\n",
+ devname);
+ rv = 1;
+ goto out;
+ }
+ } else if (mdi &&
+ mdi->array.major_version == -1 &&
+ mdi->array.minor_version == -2 &&
+ !is_subarray(mdi->text_version)) {
+ struct mdstat_ent *mds, *m;
+ /* container, possibly mdmon-managed.
+ * Make sure mdmon isn't opening it, which
+ * would interfere with the 'stop'
+ */
+ ping_monitor(mdi->sys_name);
- fd = open_dev_excl(devnum);
- if (fd < 0) {
- fprintf(stderr, Name
- ": failed to completely stop %s"
- ": Device is busy\n",
- devname);
+ /* now check that there are no existing arrays
+ * which are members of this array
+ */
+ mds = mdstat_read(0, 0);
+ for (m = mds; m; m = m->next)
+ if (m->metadata_version &&
+ strncmp(m->metadata_version, "external:", 9)==0 &&
+ metadata_container_matches(m->metadata_version+9,
+ devnm)) {
+ if (verbose >= 0)
+ pr_err("Cannot stop container %s: member %s still active\n",
+ devname, m->devnm);
+ free_mdstat(mds);
rv = 1;
goto out;
}
- } else if (mdi &&
- mdi->array.major_version == -1 &&
- mdi->array.minor_version == -2 &&
- !is_subarray(mdi->text_version)) {
- struct mdstat_ent *mds, *m;
- /* container, possibly mdmon-managed.
- * Make sure mdmon isn't opening it, which
- * would interfere with the 'stop'
- */
- ping_monitor(mdi->sys_name);
+ }
- /* now check that there are no existing arrays
- * which are members of this array
- */
- mds = mdstat_read(0, 0);
- for (m=mds; m; m=m->next)
- if (m->metadata_version &&
- strncmp(m->metadata_version, "external:", 9)==0 &&
- is_subarray(m->metadata_version+9) &&
- devname2devnum(m->metadata_version+10) == devnum) {
- if (!quiet)
- fprintf(stderr, Name
- ": Cannot stop container %s: "
- "member %s still active\n",
- devname, m->dev);
- free_mdstat(mds);
- rv = 1;
- goto out;
- }
+ /* If the array is undergoing a reshape which changes the number
+ * of devices, then it would be nice to stop it at a point where
+ * it has completed a full number of stripes in both old and
+ * new layouts as this will allow the reshape to be reverted.
+ * So if 'sync_action' is "reshape" and 'raid_disks' shows two
+ * different numbers, then
+ * - freeze reshape
+ * - set sync_max to next multiple of both data_disks and
+ * chunk sizes (or next but one)
+ * - unfreeze reshape
+ * - wait on 'sync_completed' for that point to be reached.
+ */
+ if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
+ sysfs_attribute_available(mdi, NULL, "sync_action") &&
+ sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
+ sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
+ strcmp(buf, "reshape\n") == 0 &&
+ sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
+ unsigned long long position, curr;
+ unsigned long long chunk1, chunk2;
+ unsigned long long rddiv, chunkdiv;
+ unsigned long long sectors;
+ unsigned long long sync_max, old_sync_max;
+ unsigned long long completed;
+ int backwards = 0;
+ int delay;
+ int scfd;
+
+ delay = 40;
+ while (rd1 > rd2 && delay > 0 &&
+ sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
+ /* must be in the critical section - wait a bit */
+ delay -= 1;
+ usleep(100000);
}
- /* As we have an O_EXCL open, any use of the device
- * which blocks STOP_ARRAY is probably a transient use,
- * so it is reasonable to retry for a while - 5 seconds.
- */
- count = 25; err = 0;
- while (count && fd >= 0
- && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
- && errno == EBUSY) {
- usleep(200000);
- count --;
+ if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
+ goto done;
+ /* Array is frozen */
+
+ rd1 -= mdi->array.level == 6 ? 2 : 1;
+ rd2 -= mdi->array.level == 6 ? 2 : 1;
+ sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
+ if (strncmp(buf, "back", 4) == 0)
+ backwards = 1;
+ if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
+ /* reshape must have finished now */
+ sysfs_set_str(mdi, NULL, "sync_action", "idle");
+ goto done;
}
- if (fd >= 0 && err) {
- if (quiet == 0) {
- fprintf(stderr, Name
- ": failed to stop array %s: %s\n",
- devname, strerror(errno));
- if (errno == EBUSY)
- fprintf(stderr, "Perhaps a running "
- "process, mounted filesystem "
- "or active volume group?\n");
+ sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
+ chunk1 /= 512;
+ chunk2 /= 512;
+ rddiv = GCD(rd1, rd2);
+ chunkdiv = GCD(chunk1, chunk2);
+ sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
+
+ if (backwards) {
+ /* Need to subtract 'reshape_position' from
+ * array size to get equivalent of sync_max.
+ * Size calculation based on raid5_size in kernel.
+ */
+ unsigned long long size = mdi->component_size;
+ size &= ~(chunk1-1);
+ size &= ~(chunk2-1);
+ /* rd1 must be smaller */
+ /* Reshape may have progressed further backwards than
+ * recorded, so target even further back (hence "-1")
+ */
+ position = (position / sectors - 1) * sectors;
+ /* rd1 is always the conversion factor between 'sync'
+ * position and 'reshape' position.
+ * We read 1 "new" stripe worth of data from where-ever,
+ * and when write out that full stripe.
+ */
+ sync_max = size - position/rd1;
+ } else {
+ /* Reshape will very likely be beyond position, and it may
+ * be too late to stop at '+1', so aim for '+2'
+ */
+ position = (position / sectors + 2) * sectors;
+ sync_max = position/rd1;
+ }
+ if (sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) < 0)
+ old_sync_max = mdi->component_size;
+ /* Must not advance sync_max as that could confuse
+ * the reshape monitor */
+ if (sync_max < old_sync_max)
+ sysfs_set_num(mdi, NULL, "sync_max", sync_max);
+ sysfs_set_str(mdi, NULL, "sync_action", "idle");
+
+ /* That should have set things going again. Now we
+ * wait a little while (3 second max) for sync_completed
+ * to reach the target.
+ * The reshape process can block for 500msec if
+ * the sync speed limit is hit, so we need to wait
+ * a lot longer than that. 1 second is usually
+ * enough. 3 is safe.
+ */
+ delay = 3000;
+ scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
+ while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
+ unsigned long long max_completed;
+ sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
+ sysfs_fd_get_str(scfd, buf, sizeof(buf));
+ if (strncmp(buf, "none", 4) == 0) {
+ /* Either reshape has aborted, or hasn't
+ * quite started yet. Wait a bit and
+ * check 'sync_action' to see.
+ */
+ usleep(10000);
+ sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
+ if (strncmp(buf, "reshape", 7) != 0)
+ break;
}
- rv = 1;
- goto out;
+
+ if (sysfs_fd_get_two(scfd, &completed,
+ &max_completed) == 2 &&
+ /* 'completed' sometimes reads as max-uulong */
+ completed < max_completed &&
+ (completed > sync_max ||
+ (completed == sync_max && curr != position))) {
+ while (completed > sync_max) {
+ sync_max += sectors / rd1;
+ if (backwards)
+ position -= sectors;
+ else
+ position += sectors;
+ }
+ if (sync_max < old_sync_max)
+ sysfs_set_num(mdi, NULL, "sync_max", sync_max);
+ }
+
+ if (!backwards && curr >= position)
+ break;
+ if (backwards && curr <= position)
+ break;
+ sysfs_wait(scfd, &delay);
}
+ if (scfd >= 0)
+ close(scfd);
+
+ }
+done:
+
+ /* As we have an O_EXCL open, any use of the device
+ * which blocks STOP_ARRAY is probably a transient use,
+ * so it is reasonable to retry for a while - 5 seconds.
+ */
+ count = 25; err = 0;
+ while (count && fd >= 0
+ && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
+ && errno == EBUSY) {
+ usleep(200000);
+ count --;
+ }
+ if (fd >= 0 && err) {
+ if (verbose >= 0) {
+ pr_err("failed to stop array %s: %s\n",
+ devname, strerror(errno));
+ if (errno == EBUSY)
+ cont_err("Perhaps a running process, mounted filesystem or active volume group?\n");
+ }
+ rv = 1;
+ goto out;
+ }
+
+ if (get_linux_version() < 2006028) {
/* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
* was stopped, so We'll do it here just to be sure. Drop any
* partitions as well...
ioctl(fd, BLKRRPART, 0);
if (mdi)
sysfs_uevent(mdi, "change");
+ }
+
+ if (devnm[0] && use_udev()) {
+ struct map_ent *mp = map_by_devnm(&map, devnm);
+ remove_devices(devnm, mp ? mp->path : NULL);
+ }
+
+ if (verbose >= 0)
+ pr_err("stopped %s\n", devname);
+ map_lock(&map);
+ map_remove(&map, devnm);
+ map_unlock(&map);
+out:
+ sysfs_free(mdi);
+
+ return rv;
+}
+
+static struct mddev_dev *add_one(struct mddev_dev *dv, char *name, char disp)
+{
+ struct mddev_dev *new;
+ new = xmalloc(sizeof(*new));
+ memset(new, 0, sizeof(*new));
+ new->devname = xstrdup(name);
+ new->disposition = disp;
+ new->next = dv->next;
+ dv->next = new;
+ return new;
+}
+
+static void add_faulty(struct mddev_dev *dv, int fd, char disp)
+{
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ int remaining_disks;
+ int i;
+
+ if (md_get_array_info(fd, &array) != 0)
+ return;
+
+ remaining_disks = array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+ char buf[40];
+ disk.number = i;
+ if (md_get_disk_info(fd, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ remaining_disks--;
+ if ((disk.state & 1) == 0) /* not faulty */
+ continue;
+ sprintf(buf, "%d:%d", disk.major, disk.minor);
+ dv = add_one(dv, buf, disp);
+ }
+}
+
+static void add_detached(struct mddev_dev *dv, int fd, char disp)
+{
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ int remaining_disks;
+ int i;
-
- if (devnum != NoMdDev &&
- (stat("/dev/.udev", &stb) != 0 ||
- check_env("MDADM_NO_UDEV"))) {
- struct map_ent *mp = map_by_devnum(&map, devnum);
- remove_devices(devnum, mp ? mp->path : NULL);
+ if (md_get_array_info(fd, &array) != 0)
+ return;
+
+ remaining_disks = array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+ char buf[40];
+ int sfd;
+ disk.number = i;
+ if (md_get_disk_info(fd, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ remaining_disks--;
+ if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
+ continue;
+ sprintf(buf, "%d:%d", disk.major, disk.minor);
+ sfd = dev_open(buf, O_RDONLY);
+ if (sfd >= 0) {
+ /* Not detached */
+ close(sfd);
+ continue;
}
+ if (errno != ENXIO)
+ /* Probably not detached */
+ continue;
+ dv = add_one(dv, buf, disp);
+ }
+}
+static void add_set(struct mddev_dev *dv, int fd, char set_char)
+{
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ int remaining_disks;
+ int copies, set;
+ int i;
- if (quiet <= 0)
- fprintf(stderr, Name ": stopped %s\n", devname);
- map_lock(&map);
- map_remove(&map, devnum);
- map_unlock(&map);
- out:
- if (mdi)
- sysfs_free(mdi);
+ if (md_get_array_info(fd, &array) != 0)
+ return;
+ if (array.level != 10)
+ return;
+ copies = ((array.layout & 0xff) *
+ ((array.layout >> 8) & 0xff));
+ if (array.raid_disks % copies)
+ return;
+
+ remaining_disks = array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+ char buf[40];
+ disk.number = i;
+ if (md_get_disk_info(fd, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ remaining_disks--;
+ set = disk.raid_disk % copies;
+ if (set_char != set + 'A')
+ continue;
+ sprintf(buf, "%d:%d", disk.major, disk.minor);
+ dv = add_one(dv, buf, dv->disposition);
}
- return rv;
}
-int Manage_resize(char *devname, int fd, long long size, int raid_disks)
+int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
+ struct supertype *dev_st, struct supertype *tst,
+ unsigned long rdev,
+ char *update, char *devname, int verbose,
+ mdu_array_info_t *array)
{
- mdu_array_info_t info;
- if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
- fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
- devname, strerror(errno));
+ struct mdinfo mdi;
+ int duuid[4];
+ int ouuid[4];
+
+ dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
+ dev_st->ss->uuid_from_super(dev_st, ouuid);
+ if (tst->sb)
+ tst->ss->uuid_from_super(tst, duuid);
+ else
+ /* Assume uuid matches: kernel will check */
+ memcpy(duuid, ouuid, sizeof(ouuid));
+ if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
+ !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
+ memcmp(duuid, ouuid, sizeof(ouuid))==0) {
+ /* Looks like it is worth a
+ * try. Need to make sure
+ * kernel will accept it
+ * though.
+ */
+ mdu_disk_info_t disc;
+ /* re-add doesn't work for version-1 superblocks
+ * before 2.6.18 :-(
+ */
+ if (array->major_version == 1 &&
+ get_linux_version() <= 2006018)
+ goto skip_re_add;
+ disc.number = mdi.disk.number;
+ if (md_get_disk_info(fd, &disc) != 0 ||
+ disc.major != 0 || disc.minor != 0)
+ goto skip_re_add;
+ disc.major = major(rdev);
+ disc.minor = minor(rdev);
+ disc.number = mdi.disk.number;
+ disc.raid_disk = mdi.disk.raid_disk;
+ disc.state = mdi.disk.state;
+ if (array->state & (1 << MD_SB_CLUSTERED)) {
+ /* extra flags are needed when adding to a cluster as
+ * there are two cases to distinguish
+ */
+ if (dv->disposition == 'c')
+ disc.state |= (1 << MD_DISK_CANDIDATE);
+ else
+ disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+ }
+ if (dv->writemostly == FlagSet)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ if (dv->writemostly == FlagClear)
+ disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
+ if (dv->failfast == FlagSet)
+ disc.state |= 1 << MD_DISK_FAILFAST;
+ if (dv->failfast == FlagClear)
+ disc.state &= ~(1 << MD_DISK_FAILFAST);
+ remove_partitions(tfd);
+ if (update || dv->writemostly != FlagDefault
+ || dv->failfast != FlagDefault) {
+ int rv = -1;
+ tfd = dev_open(dv->devname, O_RDWR);
+ if (tfd < 0) {
+ pr_err("failed to open %s for superblock update during re-add\n", dv->devname);
+ return -1;
+ }
+
+ if (dv->writemostly == FlagSet)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "writemostly",
+ devname, verbose, 0, NULL);
+ if (dv->writemostly == FlagClear)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "readwrite",
+ devname, verbose, 0, NULL);
+ if (dv->failfast == FlagSet)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "failfast",
+ devname, verbose, 0, NULL);
+ if (dv->failfast == FlagClear)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "nofailfast",
+ devname, verbose, 0, NULL);
+ if (update)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, update,
+ devname, verbose, 0, NULL);
+ if (rv == 0)
+ rv = dev_st->ss->store_super(dev_st, tfd);
+ close(tfd);
+ if (rv != 0) {
+ pr_err("failed to update superblock during re-add\n");
+ return -1;
+ }
+ }
+ /* don't even try if disk is marked as faulty */
+ errno = 0;
+ if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
+ if (verbose >= 0)
+ pr_err("re-added %s\n", dv->devname);
+ return 1;
+ }
+ if (errno == ENOMEM || errno == EROFS) {
+ pr_err("add new device failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ if (dv->disposition == 'M')
+ return 0;
+ return -1;
+ }
+ }
+skip_re_add:
+ return 0;
+}
+
+int Manage_add(int fd, int tfd, struct mddev_dev *dv,
+ struct supertype *tst, mdu_array_info_t *array,
+ int force, int verbose, char *devname,
+ char *update, unsigned long rdev, unsigned long long array_size,
+ int raid_slot)
+{
+ unsigned long long ldsize;
+ struct supertype *dev_st;
+ int j;
+ mdu_disk_info_t disc;
+
+ if (!get_dev_size(tfd, dv->devname, &ldsize)) {
+ if (dv->disposition == 'M')
+ return 0;
+ else
+ return -1;
+ }
+
+ if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
+ /* More than 4TB is wasted on v0.90 */
+ if (!force) {
+ pr_err("%s is larger than %s can effectively use.\n"
+ " Add --force is you really want to add this device.\n",
+ dv->devname, devname);
+ return -1;
+ }
+ pr_err("%s is larger than %s can effectively use.\n"
+ " Adding anyway as --force was given.\n",
+ dv->devname, devname);
+ }
+ if (!tst->ss->external && array->major_version == 0) {
+ if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
+ if (verbose >= 0)
+ pr_err("hot added %s\n",
+ dv->devname);
+ return 1;
+ }
+
+ pr_err("hot add failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ return -1;
+ }
+
+ if (array->not_persistent == 0 || tst->ss->external) {
+
+ /* need to find a sample superblock to copy, and
+ * a spare slot to use.
+ * For 'external' array (well, container based),
+ * We can just load the metadata for the array->
+ */
+ int array_failed;
+ if (tst->sb)
+ /* already loaded */;
+ else if (tst->ss->external) {
+ tst->ss->load_container(tst, fd, NULL);
+ } else for (j = 0; j < tst->max_devs; j++) {
+ char *dev;
+ int dfd;
+ disc.number = j;
+ if (md_get_disk_info(fd, &disc))
+ continue;
+ if (disc.major==0 && disc.minor==0)
+ continue;
+ if ((disc.state & 4)==0) /* sync */
+ continue;
+ /* Looks like a good device to try */
+ dev = map_dev(disc.major, disc.minor, 1);
+ if (!dev)
+ continue;
+ dfd = dev_open(dev, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ if (tst->ss->load_super(tst, dfd,
+ NULL)) {
+ close(dfd);
+ continue;
+ }
+ close(dfd);
+ break;
+ }
+ /* FIXME this is a bad test to be using */
+ if (!tst->sb && (dv->disposition != 'a'
+ && dv->disposition != 'S')) {
+ /* we are re-adding a device to a
+ * completely dead array - have to depend
+ * on kernel to check
+ */
+ } else if (!tst->sb) {
+ pr_err("cannot load array metadata from %s\n", devname);
+ return -1;
+ }
+
+ /* Make sure device is large enough */
+ if (dv->disposition != 'j' && /* skip size check for Journal */
+ tst->sb &&
+ tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
+ array_size) {
+ if (dv->disposition == 'M')
+ return 0;
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+
+ /* Possibly this device was recently part of
+ * the array and was temporarily removed, and
+ * is now being re-added. If so, we can
+ * simply re-add it.
+ */
+
+ if (array->not_persistent == 0) {
+ dev_st = dup_super(tst);
+ dev_st->ss->load_super(dev_st, tfd, NULL);
+ if (dev_st->sb && dv->disposition != 'S') {
+ int rv;
+
+ rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
+ rdev, update, devname,
+ verbose, array);
+ dev_st->ss->free_super(dev_st);
+ if (rv)
+ return rv;
+ }
+ }
+ if (dv->disposition == 'M') {
+ if (verbose > 0)
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return 0;
+ }
+ if (dv->disposition == 'A') {
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return -1;
+ }
+ if (array->active_disks < array->raid_disks) {
+ char *avail = xcalloc(array->raid_disks, 1);
+ int d;
+ int found = 0;
+
+ for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
+ disc.number = d;
+ if (md_get_disk_info(fd, &disc))
+ continue;
+ if (disc.major == 0 && disc.minor == 0)
+ continue;
+ if (!(disc.state & (1<<MD_DISK_SYNC)))
+ continue;
+ avail[disc.raid_disk] = 1;
+ found++;
+ }
+ array_failed = !enough(array->level, array->raid_disks,
+ array->layout, 1, avail);
+ free(avail);
+ } else
+ array_failed = 0;
+ if (array_failed) {
+ pr_err("%s has failed so using --add cannot work and might destroy\n",
+ devname);
+ pr_err("data on %s. You should stop the array and re-assemble it.\n",
+ dv->devname);
+ return -1;
+ }
+ } else {
+ /* non-persistent. Must ensure that new drive
+ * is at least array->size big.
+ */
+ if (ldsize/512 < array_size) {
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+ }
+ /* committed to really trying this device now*/
+ remove_partitions(tfd);
+
+ /* in 2.6.17 and earlier, version-1 superblocks won't
+ * use the number we write, but will choose a free number.
+ * we must choose the same free number, which requires
+ * starting at 'raid_disks' and counting up
+ */
+ for (j = array->raid_disks; j < tst->max_devs; j++) {
+ disc.number = j;
+ if (md_get_disk_info(fd, &disc))
+ break;
+ if (disc.major==0 && disc.minor==0)
+ break;
+ if (disc.state & 8) /* removed */
+ break;
+ }
+ disc.major = major(rdev);
+ disc.minor = minor(rdev);
+ if (raid_slot < 0)
+ disc.number = j;
+ else
+ disc.number = raid_slot;
+ disc.state = 0;
+
+ /* only add journal to array that supports journaling */
+ if (dv->disposition == 'j') {
+ struct mdinfo mdi;
+ struct mdinfo *mdp;
+
+ mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
+ if (!mdp) {
+ pr_err("%s unable to read array state.\n", devname);
+ return -1;
+ }
+
+ if (mdp->array_state != ARRAY_READONLY) {
+ sysfs_free(mdp);
+ pr_err("%s is not readonly, cannot add journal.\n", devname);
+ return -1;
+ }
+
+ sysfs_free(mdp);
+
+ tst->ss->getinfo_super(tst, &mdi, NULL);
+ if (mdi.journal_device_required == 0) {
+ pr_err("%s does not support journal device.\n", devname);
+ return -1;
+ }
+ disc.raid_disk = 0;
+ }
+
+ if (array->not_persistent==0) {
+ int dfd;
+ if (dv->disposition == 'j')
+ disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
+ if (dv->writemostly == FlagSet)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ if (dv->failfast == FlagSet)
+ disc.state |= 1 << MD_DISK_FAILFAST;
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname, INVALID_SECTORS))
+ return -1;
+ if (tst->ss->write_init_super(tst))
+ return -1;
+ } else if (dv->disposition == 'A') {
+ /* this had better be raid1.
+ * As we are "--re-add"ing we must find a spare slot
+ * to fill.
+ */
+ char *used = xcalloc(array->raid_disks, 1);
+ for (j = 0; j < tst->max_devs; j++) {
+ mdu_disk_info_t disc2;
+ disc2.number = j;
+ if (md_get_disk_info(fd, &disc2))
+ continue;
+ if (disc2.major==0 && disc2.minor==0)
+ continue;
+ if (disc2.state & 8) /* removed */
+ continue;
+ if (disc2.raid_disk < 0)
+ continue;
+ if (disc2.raid_disk > array->raid_disks)
+ continue;
+ used[disc2.raid_disk] = 1;
+ }
+ for (j = 0 ; j < array->raid_disks; j++)
+ if (!used[j]) {
+ disc.raid_disk = j;
+ disc.state |= (1<<MD_DISK_SYNC);
+ break;
+ }
+ free(used);
+ }
+
+ if (array->state & (1 << MD_SB_CLUSTERED)) {
+ if (dv->disposition == 'c')
+ disc.state |= (1 << MD_DISK_CANDIDATE);
+ else
+ disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+ }
+
+ if (dv->writemostly == FlagSet)
+ disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+ if (dv->failfast == FlagSet)
+ disc.state |= (1 << MD_DISK_FAILFAST);
+ if (tst->ss->external) {
+ /* add a disk
+ * to an external metadata container */
+ struct mdinfo new_mdi;
+ struct mdinfo *sra;
+ int container_fd;
+ char devnm[32];
+ int dfd;
+
+ strcpy(devnm, fd2devnm(fd));
+
+ container_fd = open_dev_excl(devnm);
+ if (container_fd < 0) {
+ pr_err("add failed for %s: could not get exclusive access to container\n",
+ dv->devname);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+
+ Kill(dv->devname, NULL, 0, -1, 0);
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (mdmon_running(tst->container_devnm))
+ tst->update_tail = &tst->updates;
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname, INVALID_SECTORS)) {
+ close(dfd);
+ close(container_fd);
+ return -1;
+ }
+ if (tst->update_tail)
+ flush_metadata_updates(tst);
+ else
+ tst->ss->sync_metadata(tst);
+
+ sra = sysfs_read(container_fd, NULL, 0);
+ if (!sra) {
+ pr_err("add failed for %s: sysfs_read failed\n",
+ dv->devname);
+ close(container_fd);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+ sra->array.level = LEVEL_CONTAINER;
+ /* Need to set data_offset and component_size */
+ tst->ss->getinfo_super(tst, &new_mdi, NULL);
+ new_mdi.disk.major = disc.major;
+ new_mdi.disk.minor = disc.minor;
+ new_mdi.recovery_start = 0;
+ /* Make sure fds are closed as they are O_EXCL which
+ * would block add_disk */
+ tst->ss->free_super(tst);
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ pr_err("add new device to external metadata failed for %s\n", dv->devname);
+ close(container_fd);
+ sysfs_free(sra);
+ return -1;
+ }
+ ping_monitor(devnm);
+ sysfs_free(sra);
+ close(container_fd);
+ } else {
+ tst->ss->free_super(tst);
+ if (ioctl(fd, ADD_NEW_DISK, &disc)) {
+ if (dv->disposition == 'j')
+ pr_err("Failed to hot add %s as journal, "
+ "please try restart %s.\n", dv->devname, devname);
+ else
+ pr_err("add new device failed for %s as %d: %s\n",
+ dv->devname, j, strerror(errno));
+ return -1;
+ }
+ if (dv->disposition == 'j') {
+ pr_err("Journal added successfully, making %s read-write\n", devname);
+ if (Manage_ro(devname, fd, -1))
+ pr_err("Failed to make %s read-write\n", devname);
+ }
+
+ }
+ if (verbose >= 0)
+ pr_err("added %s\n", dv->devname);
+ return 1;
+}
+
+int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
+ int sysfd, unsigned long rdev, int force, int verbose, char *devname)
+{
+ int lfd = -1;
+ int err;
+
+ if (tst->ss->external) {
+ /* To remove a device from a container, we must
+ * check that it isn't in use in an array.
+ * This involves looking in the 'holders'
+ * directory - there must be just one entry,
+ * the container.
+ * To ensure that it doesn't get used as a
+ * hot spare while we are checking, we
+ * get an O_EXCL open on the container
+ */
+ int ret;
+ char devnm[32];
+ strcpy(devnm, fd2devnm(fd));
+ lfd = open_dev_excl(devnm);
+ if (lfd < 0) {
+ pr_err("Cannot get exclusive access to container - odd\n");
+ return -1;
+ }
+ /* We may not be able to check on holders in
+ * sysfs, either because we don't have the dev num
+ * (rdev == 0) or because the device has been detached
+ * and the 'holders' directory no longer exists
+ * (ret == -1). In that case, assume it is OK to
+ * remove.
+ */
+ if (rdev == 0)
+ ret = -1;
+ else {
+ /*
+ * The drive has already been set to 'faulty', however
+ * monitor might not have had time to process it and the
+ * drive might still have an entry in the 'holders'
+ * directory. Try a few times to avoid a false error
+ */
+ int count = 20;
+
+ do {
+ ret = sysfs_unique_holder(devnm, rdev);
+ if (ret < 2)
+ break;
+ usleep(100 * 1000); /* 100ms */
+ } while (--count > 0);
+
+ if (ret == 0) {
+ pr_err("%s is not a member, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ if (ret >= 2) {
+ pr_err("%s is still in use, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ }
+ }
+ /* FIXME check that it is a current member */
+ if (sysfd >= 0) {
+ /* device has been removed and we don't know
+ * the major:minor number
+ */
+ err = sys_hot_remove_disk(sysfd, force);
+ } else {
+ err = hot_remove_disk(fd, rdev, force);
+ if (err && errno == ENODEV) {
+ /* Old kernels rejected this if no personality
+ * is registered */
+ struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
+ struct mdinfo *dv = NULL;
+ if (sra)
+ dv = sra->devs;
+ for ( ; dv ; dv=dv->next)
+ if (dv->disk.major == (int)major(rdev) &&
+ dv->disk.minor == (int)minor(rdev))
+ break;
+ if (dv)
+ err = sysfs_set_str(sra, dv,
+ "state", "remove");
+ else
+ err = -1;
+ sysfs_free(sra);
+ }
+ }
+ if (err) {
+ pr_err("hot remove failed for %s: %s\n", dv->devname,
+ strerror(errno));
+ if (lfd >= 0)
+ close(lfd);
+ return -1;
+ }
+ if (tst->ss->external) {
+ /*
+ * Before dropping our exclusive open we make an
+ * attempt at preventing mdmon from seeing an
+ * 'add' event before reconciling this 'remove'
+ * event.
+ */
+ char *devnm = fd2devnm(fd);
+
+ if (!devnm) {
+ pr_err("unable to get container name\n");
+ return -1;
+ }
+
+ ping_manager(devnm);
+ }
+ if (lfd >= 0)
+ close(lfd);
+ if (verbose >= 0)
+ pr_err("hot removed %s from %s\n",
+ dv->devname, devname);
+ return 1;
+}
+
+int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ if (tst->ss->external) {
+ pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
+ return -1;
+ }
+ /* Need to find the device in sysfs and add 'want_replacement' to the
+ * status.
+ */
+ mdi = sysfs_read(fd, NULL, GET_DEVS);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.raid_disk < 0) {
+ pr_err("%s is not active and so cannot be replaced.\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_str(mdi, di,
+ "state", "want_replacement");
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to request replacement for %s\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s (device %d in %s) for replacement\n",
+ dv->devname, di->disk.raid_disk, devname);
+ /* If there is a matching 'with', we need to tell it which
+ * raid disk
+ */
+ while (dv && dv->disposition != 'W')
+ dv = dv->next;
+ if (dv) {
+ dv->disposition = 'w';
+ dv->used = di->disk.raid_disk;
+ }
return 1;
}
- if (size >= 0)
- info.size = size;
- if (raid_disks > 0)
- info.raid_disks = raid_disks;
- if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
- fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
- devname, strerror(errno));
+ sysfs_free(mdi);
+ pr_err("%s not found in %s so cannot --replace it\n",
+ dv->devname, devname);
+ return -1;
+}
+
+int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
+ mdi = sysfs_read(fd, NULL, GET_DEVS|GET_STATE);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.state & (1<<MD_DISK_FAULTY)) {
+ pr_err("%s is faulty and cannot be a replacement\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ if (di->disk.raid_disk >= 0) {
+ pr_err("%s is active and cannot be a replacement\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_num(mdi, di,
+ "slot", dv->used);
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to set %s as preferred replacement.\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s in %s as replacement for device %d\n",
+ dv->devname, devname, dv->used);
return 1;
}
- return 0;
+ sysfs_free(mdi);
+ pr_err("%s not found in %s so cannot make it preferred replacement\n",
+ dv->devname, devname);
+ return -1;
}
int Manage_subdevs(char *devname, int fd,
struct mddev_dev *devlist, int verbose, int test,
char *update, int force)
{
- /* do something to each dev.
+ /* Do something to each dev.
* devmode can be
* 'a' - add the device
* try HOT_ADD_DISK
* If that fails EINVAL, try ADD_NEW_DISK
- * 'r' - remove the device HOT_REMOVE_DISK
+ * 'S' - add the device as a spare - don't try re-add
+ * 'j' - add the device as a journal device
+ * 'A' - re-add the device
+ * 'r' - remove the device: HOT_REMOVE_DISK
* device can be 'faulty' or 'detached' in which case all
* matching devices are removed.
* 'f' - set the device faulty SET_DISK_FAULTY
* device can be 'detached' in which case any device that
* is inaccessible will be marked faulty.
+ * 'R' - mark this device as wanting replacement.
+ * 'W' - this device is added if necessary and activated as
+ * a replacement for a previous 'R' device.
+ * -----
+ * 'w' - 'W' will be changed to 'w' when it is paired with
+ * a 'R' device. If a 'W' is found while walking the list
+ * it must be unpaired, and is an error.
+ * 'M' - this is created by a 'missing' target. It is a slight
+ * variant on 'A'
+ * 'F' - Another variant of 'A', where the device was faulty
+ * so must be removed from the array first.
+ * 'c' - confirm the device as found (for clustered environments)
+ *
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
*/
- struct mddev_dev *add_devlist = NULL;
mdu_array_info_t array;
- mdu_disk_info_t disc;
unsigned long long array_size;
- struct mddev_dev *dv, *next = NULL;
- struct stat stb;
- int j, jnext = 0;
+ struct mddev_dev *dv;
int tfd = -1;
- struct supertype *st, *tst;
+ struct supertype *tst;
char *subarray = NULL;
- int duuid[4];
- int ouuid[4];
- int lfd = -1;
int sysfd = -1;
int count = 0; /* number of actions taken */
-
- if (ioctl(fd, GET_ARRAY_INFO, &array)) {
- fprintf(stderr, Name ": cannot get array info for %s\n",
- devname);
- return 1;
+ struct mdinfo info;
+ struct mdinfo devinfo;
+ int frozen = 0;
+ int busy = 0;
+ int raid_slot = -1;
+
+ if (sysfs_init(&info, fd, NULL)) {
+ pr_err("sysfs not availabile for %s\n", devname);
+ goto abort;
}
- /* array.size is only 32 bit and may be truncated.
+ if (md_get_array_info(fd, &array)) {
+ pr_err("Cannot get array info for %s\n", devname);
+ goto abort;
+ }
+ /* array.size is only 32 bits and may be truncated.
* So read from sysfs if possible, and record number of sectors
*/
tst = super_by_fd(fd, &subarray);
if (!tst) {
- fprintf(stderr, Name ": unsupport array - version %d.%d\n",
+ pr_err("unsupport array - version %d.%d\n",
array.major_version, array.minor_version);
- return 1;
+ goto abort;
}
- stb.st_rdev = 0;
- for (dv = devlist, j=0 ; dv; dv = next, j = jnext) {
- unsigned long long ldsize;
- char dvname[20];
- char *dnprintable = dv->devname;
- char *add_dev = dv->devname;
- int err;
- int re_add_failed = 0;
-
- next = dv->next;
- jnext = 0;
+ for (dv = devlist; dv; dv = dv->next) {
+ unsigned long rdev = 0; /* device to add/remove etc */
+ int rv;
+ int mj,mn;
+
+ raid_slot = -1;
+ if (dv->disposition == 'c') {
+ rv = parse_cluster_confirm_arg(dv->devname,
+ &dv->devname,
+ &raid_slot);
+ if (rv) {
+ pr_err("Could not get the devname of cluster\n");
+ goto abort;
+ }
+ }
- if (strcmp(dv->devname, "failed")==0 ||
- strcmp(dv->devname, "faulty")==0) {
- int remaining_disks = array.nr_disks;
- if (dv->disposition != 'r') {
- fprintf(stderr, Name ": %s only meaningful "
- "with -r, not -%c\n",
+ if (strcmp(dv->devname, "failed") == 0 ||
+ strcmp(dv->devname, "faulty") == 0) {
+ if (dv->disposition != 'A'
+ && dv->disposition != 'r') {
+ pr_err("%s only meaningful with -r or --re-add, not -%c\n",
dv->devname, dv->disposition);
- return 1;
- }
- for (; j < 1024 && remaining_disks > 0; j++) {
- unsigned dev;
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- continue;
- if (disc.major == 0 && disc.minor == 0)
- continue;
- remaining_disks --;
- if ((disc.state & 1) == 0) /* faulty */
- continue;
- dev = makedev(disc.major, disc.minor);
- if (stb.st_rdev == dev)
- /* already did that one */
- continue;
- stb.st_rdev = dev;
- next = dv;
- /* same slot again next time - things might
- * have reshuffled */
- jnext = j;
- sprintf(dvname,"%d:%d", disc.major, disc.minor);
- dnprintable = dvname;
- break;
+ goto abort;
}
- if (next != dv)
- continue;
- } else if (strcmp(dv->devname, "detached") == 0) {
- int remaining_disks = array.nr_disks;
+ add_faulty(dv, fd, (dv->disposition == 'A'
+ ? 'F' : 'r'));
+ continue;
+ }
+ if (strcmp(dv->devname, "detached") == 0) {
if (dv->disposition != 'r' && dv->disposition != 'f') {
- fprintf(stderr, Name ": %s only meaningful "
- "with -r of -f, not -%c\n",
+ pr_err("%s only meaningful with -r of -f, not -%c\n",
dv->devname, dv->disposition);
- return 1;
+ goto abort;
}
- for (; j < 1024 && remaining_disks > 0; j++) {
- int sfd;
- unsigned dev;
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- continue;
- if (disc.major == 0 && disc.minor == 0)
- continue;
- remaining_disks --;
- sprintf(dvname,"%d:%d", disc.major, disc.minor);
- sfd = dev_open(dvname, O_RDONLY);
- if (sfd >= 0) {
- close(sfd);
- continue;
- }
- if (dv->disposition == 'f' &&
- (disc.state & 1) == 1) /* already faulty */
- continue;
- if (errno != ENXIO)
- continue;
- dev = makedev(disc.major, disc.minor);
- if (stb.st_rdev == dev)
- /* already did that one */
- continue;
- stb.st_rdev = dev;
- next = dv;
- /* same slot again next time - things might
- * have reshuffled */
- jnext = j;
- dnprintable = dvname;
+ add_detached(dv, fd, dv->disposition);
+ continue;
+ }
+
+ if (strcmp(dv->devname, "missing") == 0) {
+ struct mddev_dev *add_devlist;
+ struct mddev_dev **dp;
+ if (dv->disposition == 'c') {
+ rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
break;
}
- if (next != dv)
- continue;
- } else if (strcmp(dv->devname, "missing") == 0) {
- if (dv->disposition != 'a' || dv->re_add == 0) {
- fprintf(stderr, Name ": 'missing' only meaningful "
- "with --re-add\n");
- return 1;
+
+ if (dv->disposition != 'A') {
+ pr_err("'missing' only meaningful with --re-add\n");
+ goto abort;
}
- if (add_devlist == NULL)
- add_devlist = conf_get_devs();
+ add_devlist = conf_get_devs();
if (add_devlist == NULL) {
- fprintf(stderr, Name ": no devices to scan for missing members.");
+ pr_err("no devices to scan for missing members.");
continue;
}
- add_dev = add_devlist->devname;
- add_devlist = add_devlist->next;
- if (add_devlist != NULL)
- next = dv;
- if (stat(add_dev, &stb) < 0)
- continue;
- } else if (strchr(dv->devname, '/') == NULL &&
- strchr(dv->devname, ':') == NULL &&
- strlen(dv->devname) < 50) {
+ for (dp = &add_devlist; *dp; dp = & (*dp)->next)
+ /* 'M' (for 'missing') is like 'A' without errors */
+ (*dp)->disposition = 'M';
+ *dp = dv->next;
+ dv->next = add_devlist;
+ continue;
+ }
+
+ if (strncmp(dv->devname, "set-", 4) == 0 &&
+ strlen(dv->devname) == 5) {
+ int copies;
+
+ if (dv->disposition != 'r' &&
+ dv->disposition != 'f') {
+ pr_err("'%s' only meaningful with -r or -f\n",
+ dv->devname);
+ goto abort;
+ }
+ if (array.level != 10) {
+ pr_err("'%s' only meaningful with RAID10 arrays\n",
+ dv->devname);
+ goto abort;
+ }
+ copies = ((array.layout & 0xff) *
+ ((array.layout >> 8) & 0xff));
+ if (array.raid_disks % copies != 0 ||
+ dv->devname[4] < 'A' ||
+ dv->devname[4] >= 'A' + copies ||
+ copies > 26) {
+ pr_err("'%s' not meaningful with this array\n",
+ dv->devname);
+ goto abort;
+ }
+ add_set(dv, fd, dv->devname[4]);
+ continue;
+ }
+
+ if (strchr(dv->devname, '/') == NULL &&
+ strchr(dv->devname, ':') == NULL &&
+ strlen(dv->devname) < 50) {
/* Assume this is a kernel-internal name like 'sda1' */
int found = 0;
char dname[55];
if (dv->disposition != 'r' && dv->disposition != 'f') {
- fprintf(stderr, Name ": %s only meaningful "
- "with -r or -f, not -%c\n",
+ pr_err("%s only meaningful with -r or -f, not -%c\n",
dv->devname, dv->disposition);
- return 1;
+ goto abort;
}
sprintf(dname, "dev-%s", dv->devname);
- sysfd = sysfs_open(fd2devnum(fd), dname, "block/dev");
+ sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
if (sysfd >= 0) {
char dn[20];
- int mj,mn;
if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
sscanf(dn, "%d:%d", &mj,&mn) == 2) {
- stb.st_rdev = makedev(mj,mn);
+ rdev = makedev(mj,mn);
found = 1;
}
close(sysfd);
sysfd = -1;
}
if (!found) {
- sysfd = sysfs_open(fd2devnum(fd), dname, "state");
+ sysfd = sysfs_open(fd2devnm(fd), dname, "state");
if (sysfd < 0) {
- fprintf(stderr, Name ": %s does not appear "
- "to be a component of %s\n",
+ pr_err("%s does not appear to be a component of %s\n",
dv->devname, devname);
- return 1;
+ goto abort;
}
}
+ } else if ((dv->disposition == 'r' || dv->disposition == 'f')
+ && get_maj_min(dv->devname, &mj, &mn)) {
+ /* for 'fail' and 'remove', the device might
+ * not exist.
+ */
+ rdev = makedev(mj, mn);
} else {
- j = 0;
-
+ struct stat stb;
tfd = dev_open(dv->devname, O_RDONLY);
- if (tfd < 0 && dv->disposition == 'r' &&
- lstat(dv->devname, &stb) == 0)
- /* Be happy, the lstat worked, that is
- * enough for --remove
- */
- ;
- else {
- if (tfd < 0 || fstat(tfd, &stb) != 0) {
- fprintf(stderr, Name ": cannot find %s: %s\n",
- dv->devname, strerror(errno));
- if (tfd >= 0)
- close(tfd);
- return 1;
- }
+ if (tfd >= 0) {
+ fstat(tfd, &stb);
close(tfd);
- tfd = -1;
- }
- if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- fprintf(stderr, Name ": %s is not a "
- "block device.\n",
- dv->devname);
- return 1;
+ } else {
+ int open_err = errno;
+ if (stat(dv->devname, &stb) != 0) {
+ pr_err("Cannot find %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
+ }
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (dv->disposition == 'M')
+ /* non-fatal. Also improbable */
+ continue;
+ pr_err("%s is not a block device.\n",
+ dv->devname);
+ goto abort;
+ }
+ if (dv->disposition == 'r')
+ /* Be happy, the stat worked, that is
+ * enough for --remove
+ */
+ ;
+ else {
+ if (dv->disposition == 'M')
+ /* non-fatal */
+ continue;
+ pr_err("Cannot open %s: %s\n",
+ dv->devname, strerror(open_err));
+ goto abort;
+ }
}
+ rdev = stb.st_rdev;
}
switch(dv->disposition){
default:
- fprintf(stderr, Name ": internal error - devmode[%s]=%d\n",
+ pr_err("internal error - devmode[%s]=%d\n",
dv->devname, dv->disposition);
- return 1;
+ goto abort;
case 'a':
+ case 'S': /* --add-spare */
+ case 'j': /* --add-journal */
+ case 'A':
+ case 'M': /* --re-add missing */
+ case 'F': /* --re-add faulty */
+ case 'c': /* --cluster-confirm */
/* add the device */
if (subarray) {
- fprintf(stderr, Name ": Cannot add disks to a"
- " \'member\' array, perform this"
- " operation on the parent container\n");
- return 1;
- }
- /* Make sure it isn't in use (in 2.6 or later) */
- tfd = dev_open(add_dev, O_RDONLY|O_EXCL|O_DIRECT);
- if (tfd < 0 && add_dev != dv->devname)
- continue;
- if (tfd < 0) {
- fprintf(stderr, Name ": Cannot open %s: %s\n",
- dv->devname, strerror(errno));
- return 1;
- }
-
- st = dup_super(tst);
-
- if (array.not_persistent==0)
- st->ss->load_super(st, tfd, NULL);
-
- if (add_dev == dv->devname) {
- if (!get_dev_size(tfd, dv->devname, &ldsize)) {
- st->ss->free_super(st);
- close(tfd);
- return 1;
- }
- } else if (!get_dev_size(tfd, NULL, &ldsize)) {
- st->ss->free_super(st);
- close(tfd);
- tfd = -1;
- continue;
- }
-
- if (tst->ss->validate_geometry(
- tst, array.level, array.layout,
- array.raid_disks, NULL,
- ldsize >> 9, NULL, NULL, 0) == 0) {
- if (!force) {
- fprintf(stderr, Name
- ": %s is larger than %s can "
- "effectively use.\n"
- " Add --force is you "
- "really wan to add this device.\n",
- add_dev, devname);
- st->ss->free_super(st);
- close(tfd);
- return 1;
- }
- fprintf(stderr, Name
- ": %s is larger than %s can "
- "effectively use.\n"
- " Adding anyway as --force "
- "was given.\n",
- add_dev, devname);
- }
- if (!tst->ss->external &&
- array.major_version == 0 &&
- md_get_version(fd)%100 < 2) {
- close(tfd);
- st->ss->free_super(st);
- tfd = -1;
- if (ioctl(fd, HOT_ADD_DISK,
- (unsigned long)stb.st_rdev)==0) {
- if (verbose >= 0)
- fprintf(stderr, Name ": hot added %s\n",
- add_dev);
- continue;
- }
-
- fprintf(stderr, Name ": hot add failed for %s: %s\n",
- add_dev, strerror(errno));
- return 1;
+ pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
+ goto abort;
}
- if (array.not_persistent == 0 || tst->ss->external) {
-
- /* need to find a sample superblock to copy, and
- * a spare slot to use.
- * For 'external' array (well, container based),
- * We can just load the metadata for the array.
- */
- if (tst->sb)
- /* already loaded */;
- else if (tst->ss->external) {
- tst->ss->load_container(tst, fd, NULL);
- } else for (j = 0; j < tst->max_devs; j++) {
- char *dev;
- int dfd;
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- continue;
- if (disc.major==0 && disc.minor==0)
- continue;
- if ((disc.state & 4)==0) continue; /* sync */
- /* Looks like a good device to try */
- dev = map_dev(disc.major, disc.minor, 1);
- if (!dev) continue;
- dfd = dev_open(dev, O_RDONLY);
- if (dfd < 0) continue;
- if (tst->ss->load_super(tst, dfd,
- NULL)) {
- close(dfd);
- continue;
- }
- close(dfd);
+ /* Let's first try to write re-add to sysfs */
+ if (rdev != 0 &&
+ (dv->disposition == 'A' || dv->disposition == 'F')) {
+ sysfs_init_dev(&devinfo, rdev);
+ if (sysfs_set_str(&info, &devinfo, "state", "re-add") == 0) {
+ pr_err("re-add %s to %s succeed\n",
+ dv->devname, info.sys_name);
break;
}
- /* FIXME this is a bad test to be using */
- if (!tst->sb) {
- close(tfd);
- st->ss->free_super(st);
- fprintf(stderr, Name ": cannot load array metadata from %s\n", devname);
- return 1;
- }
-
- /* Make sure device is large enough */
- if (tst->ss->avail_size(tst, ldsize/512) <
- array_size) {
- close(tfd);
- tfd = -1;
- st->ss->free_super(st);
- if (add_dev != dv->devname)
- continue;
- fprintf(stderr, Name ": %s not large enough to join array\n",
- dv->devname);
- return 1;
- }
-
- /* Possibly this device was recently part of the array
- * and was temporarily removed, and is now being re-added.
- * If so, we can simply re-add it.
- */
- tst->ss->uuid_from_super(tst, duuid);
-
- if (st->sb) {
- struct mdinfo mdi;
- st->ss->getinfo_super(st, &mdi, NULL);
- st->ss->uuid_from_super(st, ouuid);
- if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
- !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
- memcmp(duuid, ouuid, sizeof(ouuid))==0) {
- /* look like it is worth a try. Need to
- * make sure kernel will accept it though.
- */
- /* re-add doesn't work for version-1 superblocks
- * before 2.6.18 :-(
- */
- if (array.major_version == 1 &&
- get_linux_version() <= 2006018)
- goto skip_re_add;
- disc.number = mdi.disk.number;
- if (ioctl(fd, GET_DISK_INFO, &disc) != 0
- || disc.major != 0 || disc.minor != 0
- || !enough_fd(fd))
- goto skip_re_add;
- disc.major = major(stb.st_rdev);
- disc.minor = minor(stb.st_rdev);
- disc.number = mdi.disk.number;
- disc.raid_disk = mdi.disk.raid_disk;
- disc.state = mdi.disk.state;
- if (dv->writemostly == 1)
- disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- if (dv->writemostly == 2)
- disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
- remove_partitions(tfd);
- close(tfd);
- tfd = -1;
- if (update || dv->writemostly > 0) {
- int rv = -1;
- tfd = dev_open(dv->devname, O_RDWR);
- if (tfd < 0) {
- fprintf(stderr, Name ": failed to open %s for"
- " superblock update during re-add\n", dv->devname);
- st->ss->free_super(st);
- return 1;
- }
-
- if (dv->writemostly == 1)
- rv = st->ss->update_super(
- st, NULL, "writemostly",
- devname, verbose, 0, NULL);
- if (dv->writemostly == 2)
- rv = st->ss->update_super(
- st, NULL, "readwrite",
- devname, verbose, 0, NULL);
- if (update)
- rv = st->ss->update_super(
- st, NULL, update,
- devname, verbose, 0, NULL);
- if (rv == 0)
- rv = st->ss->store_super(st, tfd);
- close(tfd);
- tfd = -1;
- if (rv != 0) {
- fprintf(stderr, Name ": failed to update"
- " superblock during re-add\n");
- st->ss->free_super(st);
- return 1;
- }
- }
- /* don't even try if disk is marked as faulty */
- errno = 0;
- if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
- if (verbose >= 0)
- fprintf(stderr, Name ": re-added %s\n", add_dev);
- count++;
- st->ss->free_super(st);
- continue;
- }
- if (errno == ENOMEM || errno == EROFS) {
- fprintf(stderr, Name ": add new device failed for %s: %s\n",
- add_dev, strerror(errno));
- st->ss->free_super(st);
- if (add_dev != dv->devname)
- continue;
- return 1;
- }
- skip_re_add:
- re_add_failed = 1;
- }
- st->ss->free_super(st);
- }
- if (add_dev != dv->devname) {
- if (verbose > 0)
- fprintf(stderr, Name
- ": --re-add for %s to %s is not possible\n",
- add_dev, devname);
- if (tfd >= 0) {
- close(tfd);
- tfd = -1;
- }
- continue;
- }
- if (dv->re_add) {
- if (tfd >= 0)
- close(tfd);
- fprintf(stderr, Name
- ": --re-add for %s to %s is not possible\n",
- dv->devname, devname);
- return 1;
- }
- if (re_add_failed) {
- fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n",
- dv->devname, devname);
- fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n",
- dv->devname);
- fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n",
- dv->devname);
- if (tfd >= 0)
- close(tfd);
- return 1;
- }
- } else {
- /* non-persistent. Must ensure that new drive
- * is at least array.size big.
- */
- if (ldsize/512 < array_size) {
- fprintf(stderr, Name ": %s not large enough to join array\n",
- dv->devname);
- if (tfd >= 0)
- close(tfd);
- return 1;
- }
}
- /* committed to really trying this device now*/
+
+ if (dv->disposition == 'F')
+ /* Need to remove first */
+ hot_remove_disk(fd, rdev, force);
+ /* Make sure it isn't in use (in 2.6 or later) */
+ tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
if (tfd >= 0) {
- remove_partitions(tfd);
+ /* We know no-one else is using it. We'll
+ * need non-exclusive access to add it, so
+ * do that now.
+ */
close(tfd);
- tfd = -1;
- }
- /* in 2.6.17 and earlier, version-1 superblocks won't
- * use the number we write, but will choose a free number.
- * we must choose the same free number, which requires
- * starting at 'raid_disks' and counting up
- */
- for (j = array.raid_disks; j< tst->max_devs; j++) {
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- break;
- if (disc.major==0 && disc.minor==0)
- break;
- if (disc.state & 8) /* removed */
- break;
+ tfd = dev_open(dv->devname, O_RDONLY);
}
- disc.major = major(stb.st_rdev);
- disc.minor = minor(stb.st_rdev);
- disc.number =j;
- disc.state = 0;
- if (array.not_persistent==0) {
- int dfd;
- if (dv->writemostly == 1)
- disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
- if (tst->ss->add_to_super(tst, &disc, dfd,
- dv->devname)) {
- close(dfd);
- return 1;
- }
- if (tst->ss->write_init_super(tst)) {
- close(dfd);
- return 1;
- }
- } else if (dv->re_add) {
- /* this had better be raid1.
- * As we are "--re-add"ing we must find a spare slot
- * to fill.
- */
- char *used = malloc(array.raid_disks);
- memset(used, 0, array.raid_disks);
- for (j=0; j< tst->max_devs; j++) {
- mdu_disk_info_t disc2;
- disc2.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc2))
- continue;
- if (disc2.major==0 && disc2.minor==0)
- continue;
- if (disc2.state & 8) /* removed */
- continue;
- if (disc2.raid_disk < 0)
- continue;
- if (disc2.raid_disk > array.raid_disks)
- continue;
- used[disc2.raid_disk] = 1;
- }
- for (j=0 ; j<array.raid_disks; j++)
- if (!used[j]) {
- disc.raid_disk = j;
- disc.state |= (1<<MD_DISK_SYNC);
- break;
- }
- free(used);
+ if (tfd < 0) {
+ if (dv->disposition == 'M')
+ continue;
+ pr_err("Cannot open %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
}
- if (dv->writemostly == 1)
- disc.state |= (1 << MD_DISK_WRITEMOSTLY);
- if (tst->ss->external) {
- /* add a disk
- * to an external metadata container */
- struct mdinfo new_mdi;
- struct mdinfo *sra;
- int container_fd;
- int devnum = fd2devnum(fd);
- int dfd;
-
- container_fd = open_dev_excl(devnum);
- if (container_fd < 0) {
- fprintf(stderr, Name ": add failed for %s:"
- " could not get exclusive access to container\n",
- dv->devname);
- tst->ss->free_super(tst);
- return 1;
- }
-
- dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
- if (mdmon_running(tst->container_dev))
- tst->update_tail = &tst->updates;
- if (tst->ss->add_to_super(tst, &disc, dfd,
- dv->devname)) {
- close(dfd);
- close(container_fd);
- return 1;
- }
- if (tst->update_tail)
- flush_metadata_updates(tst);
+ if (!frozen) {
+ if (sysfs_freeze_array(&info) == 1)
+ frozen = 1;
else
- tst->ss->sync_metadata(tst);
-
- sra = sysfs_read(container_fd, -1, 0);
- if (!sra) {
- fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n",
- dv->devname);
- close(container_fd);
- tst->ss->free_super(tst);
- return 1;
- }
- sra->array.level = LEVEL_CONTAINER;
- /* Need to set data_offset and component_size */
- tst->ss->getinfo_super(tst, &new_mdi, NULL);
- new_mdi.disk.major = disc.major;
- new_mdi.disk.minor = disc.minor;
- new_mdi.recovery_start = 0;
- /* Make sure fds are closed as they are O_EXCL which
- * would block add_disk */
- tst->ss->free_super(tst);
- if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
- fprintf(stderr, Name ": add new device to external metadata"
- " failed for %s\n", dv->devname);
- close(container_fd);
- sysfs_free(sra);
- return 1;
- }
- ping_monitor_by_id(devnum);
- sysfs_free(sra);
- close(container_fd);
- } else {
- tst->ss->free_super(tst);
- if (ioctl(fd, ADD_NEW_DISK, &disc)) {
- fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
- dv->devname, j, strerror(errno));
- return 1;
- }
+ frozen = -1;
}
- if (verbose >= 0)
- fprintf(stderr, Name ": added %s\n", dv->devname);
+ rv = Manage_add(fd, tfd, dv, tst, &array,
+ force, verbose, devname, update,
+ rdev, array_size, raid_slot);
+ close(tfd);
+ tfd = -1;
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
break;
case 'r':
/* hot remove */
if (subarray) {
- fprintf(stderr, Name ": Cannot remove disks from a"
- " \'member\' array, perform this"
- " operation on the parent container\n");
- if (sysfd >= 0)
- close(sysfd);
- return 1;
- }
- if (tst->ss->external) {
- /* To remove a device from a container, we must
- * check that it isn't in use in an array.
- * This involves looking in the 'holders'
- * directory - there must be just one entry,
- * the container.
- * To ensure that it doesn't get used as a
- * hold spare while we are checking, we
- * get an O_EXCL open on the container
- */
- int dnum = fd2devnum(fd);
- lfd = open_dev_excl(dnum);
- if (lfd < 0) {
- fprintf(stderr, Name
- ": Cannot get exclusive access "
- " to container - odd\n");
- if (sysfd >= 0)
- close(sysfd);
- return 1;
- }
- /* in the detached case it is not possible to
- * check if we are the unique holder, so just
- * rely on the 'detached' checks
- */
- if (strcmp(dv->devname, "detached") == 0 ||
- sysfd >= 0 ||
- sysfs_unique_holder(dnum, stb.st_rdev))
- /* pass */;
- else {
- fprintf(stderr, Name
- ": %s is %s, cannot remove.\n",
- dnprintable,
- errno == EEXIST ? "still in use":
- "not a member");
- close(lfd);
- return 1;
- }
- }
- /* FIXME check that it is a current member */
- if (sysfd >= 0) {
- /* device has been removed and we don't know
- * the major:minor number
- */
- int n = write(sysfd, "remove", 6);
- if (n != 6)
- err = -1;
- else
- err = 0;
+ pr_err("Cannot remove disks from a \'member\' array, perform this operation on the parent container\n");
+ rv = -1;
+ } else
+ rv = Manage_remove(tst, fd, dv, sysfd,
+ rdev, verbose, force,
+ devname);
+ if (sysfd >= 0)
close(sysfd);
- sysfd = -1;
- } else {
- err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
- if (err && errno == ENODEV) {
- /* Old kernels rejected this if no personality
- * registered */
- struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
- struct mdinfo *dv = NULL;
- if (sra)
- dv = sra->devs;
- for ( ; dv ; dv=dv->next)
- if (dv->disk.major == (int)major(stb.st_rdev) &&
- dv->disk.minor == (int)minor(stb.st_rdev))
- break;
- if (dv)
- err = sysfs_set_str(sra, dv,
- "state", "remove");
- else
- err = -1;
- if (sra)
- sysfs_free(sra);
- }
- }
- if (err) {
- fprintf(stderr, Name ": hot remove failed "
- "for %s: %s\n", dnprintable,
- strerror(errno));
- if (lfd >= 0)
- close(lfd);
- return 1;
- }
- if (tst->ss->external) {
- /*
- * Before dropping our exclusive open we make an
- * attempt at preventing mdmon from seeing an
- * 'add' event before reconciling this 'remove'
- * event.
- */
- char *name = devnum2devname(fd2devnum(fd));
-
- if (!name) {
- fprintf(stderr, Name ": unable to get container name\n");
- return 1;
- }
-
- ping_manager(name);
- free(name);
- }
- if (lfd >= 0)
- close(lfd);
- count++;
- if (verbose >= 0)
- fprintf(stderr, Name ": hot removed %s from %s\n",
- dnprintable, devname);
+ sysfd = -1;
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
break;
case 'f': /* set faulty */
/* FIXME check current member */
if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
(sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
- (unsigned long) stb.st_rdev))) {
- fprintf(stderr, Name ": set device faulty failed for %s: %s\n",
- dnprintable, strerror(errno));
+ rdev))) {
+ if (errno == EBUSY)
+ busy = 1;
+ pr_err("set device faulty failed for %s: %s\n",
+ dv->devname, strerror(errno));
if (sysfd >= 0)
close(sysfd);
- return 1;
+ goto abort;
}
if (sysfd >= 0)
close(sysfd);
sysfd = -1;
count++;
if (verbose >= 0)
- fprintf(stderr, Name ": set %s faulty in %s\n",
- dnprintable, devname);
+ pr_err("set %s faulty in %s\n",
+ dv->devname, devname);
+ break;
+ case 'R': /* Mark as replaceable */
+ if (subarray) {
+ pr_err("Cannot replace disks in a \'member\' array, perform this operation on the parent container\n");
+ rv = -1;
+ } else {
+ if (!frozen) {
+ if (sysfs_freeze_array(&info) == 1)
+ frozen = 1;
+ else
+ frozen = -1;
+ }
+ rv = Manage_replace(tst, fd, dv,
+ rdev, verbose,
+ devname);
+ }
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
+ break;
+ case 'W': /* --with device that doesn't match */
+ pr_err("No matching --replace device for --with %s\n",
+ dv->devname);
+ goto abort;
+ case 'w': /* --with device which was matched */
+ rv = Manage_with(tst, fd, dv,
+ rdev, verbose, devname);
+ if (rv < 0)
+ goto abort;
break;
}
}
+ if (frozen > 0)
+ sysfs_set_str(&info, NULL, "sync_action","idle");
if (test && count == 0)
return 2;
return 0;
+
+abort:
+ if (frozen > 0)
+ sysfs_set_str(&info, NULL, "sync_action","idle");
+ return !test && busy ? 2 : 1;
}
int autodetect(void)
return rv;
}
-int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet)
+int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
{
struct supertype supertype, *st = &supertype;
int fd, rv = 2;
memset(st, 0, sizeof(*st));
- fd = open_subarray(dev, subarray, st, quiet);
+ fd = open_subarray(dev, subarray, st, verbose < 0);
if (fd < 0)
return 2;
if (!st->ss->update_subarray) {
- if (!quiet)
- fprintf(stderr,
- Name ": Operation not supported for %s metadata\n",
- st->ss->name);
+ if (verbose >= 0)
+ pr_err("Operation not supported for %s metadata\n",
+ st->ss->name);
goto free_super;
}
- if (mdmon_running(st->devnum))
+ if (mdmon_running(st->devnm))
st->update_tail = &st->updates;
rv = st->ss->update_subarray(st, subarray, update, ident);
if (rv) {
- if (!quiet)
- fprintf(stderr, Name ": Failed to update %s of subarray-%s in %s\n",
+ if (verbose >= 0)
+ pr_err("Failed to update %s of subarray-%s in %s\n",
update, subarray, dev);
} else if (st->update_tail)
flush_metadata_updates(st);
else
st->ss->sync_metadata(st);
- if (rv == 0 && strcmp(update, "name") == 0 && !quiet)
- fprintf(stderr,
- Name ": Updated subarray-%s name from %s, UUIDs may have changed\n",
- subarray, dev);
+ if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
+ pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
+ subarray, dev);
free_super:
st->ss->free_super(st);
return rv;
}
-/* Move spare from one array to another
- * If adding to destination array fails
- * add back to original array
+/* Move spare from one array to another If adding to destination array fails
+ * add back to original array.
* Returns 1 on success, 0 on failure */
int move_spare(char *from_devname, char *to_devname, dev_t devid)
{
devlist.next = NULL;
devlist.used = 0;
- devlist.re_add = 0;
- devlist.writemostly = 0;
+ devlist.writemostly = FlagDefault;
+ devlist.failfast = FlagDefault;
devlist.devname = devname;
sprintf(devname, "%d:%d", major(devid), minor(devid));
close(fd2);
return 0;
}
-#endif