mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
if (mdi &&
mdi->array.major_version == -1 &&
- mdi->array.level > 0 &&
is_subarray(mdi->text_version)) {
char vers[64];
strcpy(vers, "external:");
if (*cp)
*cp = 0;
ping_monitor(vers+10);
+ if (mdi->array.level <= 0)
+ sysfs_set_str(mdi, NULL, "array_state", "active");
}
return 0;
}
struct stat stb;
struct mdinfo *mdi;
int devnum;
+ int err;
+ int count;
/* If this is an mdmon managed array, just write 'inactive'
* to the array state and let mdmon clear up.
*/
devnum = fd2devnum(fd);
+ /* Get EXCL access first. If this fails, then attempting
+ * to stop is probably a bad idea.
+ */
+ close(fd);
+ fd = open(devname, O_RDONLY|O_EXCL);
+ if (fd < 0 || fd2devnum(fd) != devnum) {
+ if (fd >= 0)
+ close(fd);
+ fprintf(stderr,
+ Name ": Cannot get exclusive access to %s:"
+ "Perhaps a running "
+ "process, mounted filesystem "
+ "or active volume group?\n",
+ devname);
+ return 1;
+ }
mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
if (mdi &&
mdi->array.level > 0 &&
is_subarray(mdi->text_version)) {
+ int err;
/* This is mdmon managed. */
close(fd);
- if (sysfs_set_str(mdi, NULL,
- "array_state", "inactive") < 0) {
- if (quiet == 0)
- fprintf(stderr, Name
- ": failed to stop array %s: %s\n",
- devname, strerror(errno));
+
+ count = 25;
+ while (count &&
+ (err = sysfs_set_str(mdi, NULL,
+ "array_state",
+ "inactive")) < 0
+ && errno == EBUSY) {
+ usleep(200000);
+ count--;
+ }
+ if (err && !quiet) {
+ fprintf(stderr, Name
+ ": failed to stop array %s: %s\n",
+ devname, strerror(errno));
return 1;
}
/* Give monitor a chance to act */
ping_monitor(mdi->text_version);
- fd = open(devname, O_RDONLY);
+ fd = open_dev_excl(devnum);
+ if (fd < 0) {
+ fprintf(stderr, Name
+ ": failed to completely stop %s"
+ ": Device is busy\n",
+ devname);
+ return 1;
+ }
} else if (mdi &&
mdi->array.major_version == -1 &&
mdi->array.minor_version == -2 &&
}
}
- if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) {
+ /* As we have an O_EXCL open, any use of the device
+ * which blocks STOP_ARRAY is probably a transient use,
+ * so it is reasonable to retry for a while - 5 seconds.
+ */
+ count = 25; err = 0;
+ while (count && fd >= 0
+ && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
+ && errno == EBUSY) {
+ usleep(200000);
+ count --;
+ }
+ if (fd >= 0 && err) {
if (quiet == 0) {
fprintf(stderr, Name
": failed to stop array %s: %s\n",
}
int Manage_subdevs(char *devname, int fd,
- mddev_dev_t devlist, int verbose, int test)
+ struct mddev_dev *devlist, int verbose, int test,
+ char *update, int force)
{
/* do something to each dev.
* devmode can be
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
*/
- mddev_dev_t add_devlist = NULL;
+ struct mddev_dev *add_devlist = NULL;
mdu_array_info_t array;
mdu_disk_info_t disc;
unsigned long long array_size;
- mddev_dev_t dv, next = NULL;
+ struct mddev_dev *dv, *next = NULL;
struct stat stb;
int j, jnext = 0;
int tfd = -1;
struct supertype *st, *tst;
+ char *subarray = NULL;
int duuid[4];
int ouuid[4];
int lfd = -1;
if (array_size <= 0)
array_size = array.size * 2;
- tst = super_by_fd(fd);
+ tst = super_by_fd(fd, &subarray);
if (!tst) {
fprintf(stderr, Name ": unsupport array - version %d.%d\n",
array.major_version, array.minor_version);
char *dnprintable = dv->devname;
char *add_dev = dv->devname;
int err;
+ int re_add_failed = 0;
next = dv->next;
jnext = 0;
if (strcmp(dv->devname, "failed")==0 ||
strcmp(dv->devname, "faulty")==0) {
+ int remaining_disks = array.nr_disks;
if (dv->disposition != 'r') {
fprintf(stderr, Name ": %s only meaningful "
"with -r, not -%c\n",
dv->devname, dv->disposition);
return 1;
}
- for (; j < array.raid_disks + array.nr_disks ; j++) {
+ for (; j < 1024 && remaining_disks > 0; j++) {
unsigned dev;
disc.number = j;
if (ioctl(fd, GET_DISK_INFO, &disc))
continue;
if (disc.major == 0 && disc.minor == 0)
continue;
+ remaining_disks --;
if ((disc.state & 1) == 0) /* faulty */
continue;
dev = makedev(disc.major, disc.minor);
dnprintable = dvname;
break;
}
- if (jnext == 0)
+ if (next != dv)
continue;
} else if (strcmp(dv->devname, "detached") == 0) {
+ int remaining_disks = array.nr_disks;
if (dv->disposition != 'r' && dv->disposition != 'f') {
fprintf(stderr, Name ": %s only meaningful "
"with -r of -f, not -%c\n",
dv->devname, dv->disposition);
return 1;
}
- for (; j < array.raid_disks + array.nr_disks; j++) {
+ for (; j < 1024 && remaining_disks > 0; j++) {
int sfd;
unsigned dev;
disc.number = j;
continue;
if (disc.major == 0 && disc.minor == 0)
continue;
+ remaining_disks --;
sprintf(dvname,"%d:%d", disc.major, disc.minor);
sfd = dev_open(dvname, O_RDONLY);
if (sfd >= 0) {
dnprintable = dvname;
break;
}
- if (jnext == 0)
+ if (next != dv)
continue;
} else if (strcmp(dv->devname, "missing") == 0) {
if (dv->disposition != 'a' || dv->re_add == 0) {
if (stat(add_dev, &stb) < 0)
continue;
} else if (strchr(dv->devname, '/') == NULL &&
+ strchr(dv->devname, ':') == NULL &&
strlen(dv->devname) < 50) {
/* Assume this is a kernel-internal name like 'sda1' */
int found = 0;
return 1;
case 'a':
/* add the device */
- if (tst->subarray[0]) {
+ if (subarray) {
fprintf(stderr, Name ": Cannot add disks to a"
" \'member\' array, perform this"
" operation on the parent container\n");
continue;
}
+ if (tst->ss->validate_geometry(
+ tst, array.level, array.layout,
+ array.raid_disks, NULL,
+ ldsize >> 9, NULL, NULL, 0) == 0) {
+ if (!force) {
+ fprintf(stderr, Name
+ ": %s is larger than %s can "
+ "effectively use.\n"
+ " Add --force is you "
+ "really wan to add this device.\n",
+ add_dev, devname);
+ close(tfd);
+ return 1;
+ }
+ fprintf(stderr, Name
+ ": %s is larger than %s can "
+ "effectively use.\n"
+ " Adding anyway as --force "
+ "was given.\n",
+ add_dev, devname);
+ }
if (!tst->ss->external &&
array.major_version == 0 &&
md_get_version(fd)%100 < 2) {
if (tst->sb)
/* already loaded */;
else if (tst->ss->external) {
- tst->ss->load_super(tst, fd, NULL);
+ tst->ss->load_container(tst, fd, NULL);
} else for (j = 0; j < tst->max_devs; j++) {
char *dev;
int dfd;
/* FIXME this is a bad test to be using */
if (!tst->sb) {
close(tfd);
- fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n");
+ fprintf(stderr, Name ": cannot load array metadata from %s\n", devname);
return 1;
}
*/
tst->ss->uuid_from_super(tst, duuid);
- /* re-add doesn't work for version-1 superblocks
- * before 2.6.18 :-(
- */
- if (array.major_version == 1 &&
- get_linux_version() <= 2006018)
- ;
- else if (st->sb) {
+ if (st->sb) {
+ struct mdinfo mdi;
+ st->ss->getinfo_super(st, &mdi, NULL);
st->ss->uuid_from_super(st, ouuid);
- if (memcmp(duuid, ouuid, sizeof(ouuid))==0) {
- /* looks close enough for now. Kernel
- * will worry about whether a bitmap
- * based reconstruction is possible.
+ if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
+ !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
+ memcmp(duuid, ouuid, sizeof(ouuid))==0) {
+ /* look like it is worth a try. Need to
+ * make sure kernel will accept it though.
+ */
+ /* re-add doesn't work for version-1 superblocks
+ * before 2.6.18 :-(
*/
- struct mdinfo mdi;
- st->ss->getinfo_super(st, &mdi);
+ if (array.major_version == 1 &&
+ get_linux_version() <= 2006018)
+ goto skip_re_add;
+ disc.number = mdi.disk.number;
+ if (ioctl(fd, GET_DISK_INFO, &disc) != 0
+ || disc.major != 0 || disc.minor != 0
+ || !enough_fd(fd))
+ goto skip_re_add;
disc.major = major(stb.st_rdev);
disc.minor = minor(stb.st_rdev);
disc.number = mdi.disk.number;
remove_partitions(tfd);
close(tfd);
tfd = -1;
+ if (update || dv->writemostly > 0) {
+ int rv = -1;
+ tfd = dev_open(dv->devname, O_RDWR);
+ if (tfd < 0) {
+ fprintf(stderr, Name ": failed to open %s for"
+ " superblock update during re-add\n", dv->devname);
+ return 1;
+ }
+
+ if (dv->writemostly == 1)
+ rv = st->ss->update_super(
+ st, NULL, "writemostly",
+ devname, verbose, 0, NULL);
+ if (dv->writemostly == 2)
+ rv = st->ss->update_super(
+ st, NULL, "readwrite",
+ devname, verbose, 0, NULL);
+ if (update)
+ rv = st->ss->update_super(
+ st, NULL, update,
+ devname, verbose, 0, NULL);
+ if (rv == 0)
+ rv = st->ss->store_super(st, tfd);
+ close(tfd);
+ tfd = -1;
+ if (rv != 0) {
+ fprintf(stderr, Name ": failed to update"
+ " superblock during re-add\n");
+ return 1;
+ }
+ }
/* don't even try if disk is marked as faulty */
errno = 0;
- if ((disc.state & 1) == 0 &&
- ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
+ if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
if (verbose >= 0)
fprintf(stderr, Name ": re-added %s\n", add_dev);
count++;
continue;
return 1;
}
- /* fall back on normal-add */
+ skip_re_add:
+ re_add_failed = 1;
}
+ st->ss->free_super(st);
}
if (add_dev != dv->devname) {
if (verbose > 0)
dv->devname, devname);
return 1;
}
+ if (re_add_failed) {
+ fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n",
+ dv->devname, devname);
+ fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n",
+ dv->devname);
+ fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n",
+ dv->devname);
+ if (tfd >= 0)
+ close(tfd);
+ return 1;
+ }
} else {
/* non-persistent. Must ensure that new drive
* is at least array.size big.
disc.minor = minor(stb.st_rdev);
disc.number =j;
disc.state = 0;
- if (array.not_persistent==0 || tst->ss->external) {
+ if (array.not_persistent==0) {
int dfd;
if (dv->writemostly == 1)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
close(dfd);
return 1;
}
- /* write_init_super will close 'dfd' */
- if (tst->ss->external)
- /* mdmon will write the metadata */
+ if (tst->ss->write_init_super(tst)) {
close(dfd);
- else if (tst->ss->write_init_super(tst))
return 1;
+ }
} else if (dv->re_add) {
/* this had better be raid1.
* As we are "--re-add"ing we must find a spare slot
if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
if (tst->ss->external) {
- /* add a disk to an external metadata container
- * only if mdmon is around to see it
- */
+ /* add a disk
+ * to an external metadata container */
struct mdinfo new_mdi;
struct mdinfo *sra;
int container_fd;
int devnum = fd2devnum(fd);
+ int dfd;
container_fd = open_dev_excl(devnum);
if (container_fd < 0) {
fprintf(stderr, Name ": add failed for %s:"
" could not get exclusive access to container\n",
dv->devname);
+ tst->ss->free_super(tst);
return 1;
}
- if (!mdmon_running(devnum)) {
- fprintf(stderr, Name ": add failed for %s: mdmon not running\n",
- dv->devname);
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (mdmon_running(tst->container_dev))
+ tst->update_tail = &tst->updates;
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname)) {
+ close(dfd);
close(container_fd);
return 1;
}
+ if (tst->update_tail)
+ flush_metadata_updates(tst);
+ else
+ tst->ss->sync_metadata(tst);
sra = sysfs_read(container_fd, -1, 0);
if (!sra) {
fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n",
dv->devname);
close(container_fd);
+ tst->ss->free_super(tst);
return 1;
}
sra->array.level = LEVEL_CONTAINER;
/* Need to set data_offset and component_size */
- tst->ss->getinfo_super(tst, &new_mdi);
+ tst->ss->getinfo_super(tst, &new_mdi, NULL);
new_mdi.disk.major = disc.major;
new_mdi.disk.minor = disc.minor;
new_mdi.recovery_start = 0;
+ /* Make sure fds are closed as they are O_EXCL which
+ * would block add_disk */
+ tst->ss->free_super(tst);
if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
fprintf(stderr, Name ": add new device to external metadata"
" failed for %s\n", dv->devname);
close(container_fd);
+ sysfs_free(sra);
return 1;
}
- ping_monitor(devnum2devname(devnum));
+ ping_monitor_by_id(devnum);
sysfs_free(sra);
close(container_fd);
- } else if (ioctl(fd, ADD_NEW_DISK, &disc)) {
- fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
- dv->devname, j, strerror(errno));
- return 1;
+ } else {
+ tst->ss->free_super(tst);
+ if (ioctl(fd, ADD_NEW_DISK, &disc)) {
+ fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
+ dv->devname, j, strerror(errno));
+ return 1;
+ }
}
if (verbose >= 0)
fprintf(stderr, Name ": added %s\n", dv->devname);
case 'r':
/* hot remove */
- if (tst->subarray[0]) {
+ if (subarray) {
fprintf(stderr, Name ": Cannot remove disks from a"
" \'member\' array, perform this"
" operation on the parent container\n");
return rv;
}
-int Update_subarray(char *dev, char *subarray, char *update, mddev_ident_t ident, int quiet)
+int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet)
{
struct supertype supertype, *st = &supertype;
int fd, rv = 2;
memset(st, 0, sizeof(*st));
- if (snprintf(st->subarray, sizeof(st->subarray), "%s", subarray) >=
- (signed)sizeof(st->subarray)) {
- if (!quiet)
- fprintf(stderr,
- Name ": Input overflow for subarray '%s' > %zu bytes\n",
- subarray, sizeof(st->subarray) - 1);
- return 2;
- }
- fd = open_subarray(dev, st, quiet);
+ fd = open_subarray(dev, subarray, st, quiet);
if (fd < 0)
return 2;
if (mdmon_running(st->devnum))
st->update_tail = &st->updates;
- rv = st->ss->update_subarray(st, update, ident);
+ rv = st->ss->update_subarray(st, subarray, update, ident);
if (rv) {
if (!quiet)
return rv;
}
+
+/* Move spare from one array to another
+ * If adding to destination array fails
+ * add back to original array
+ * Returns 1 on success, 0 on failure */
+int move_spare(char *from_devname, char *to_devname, dev_t devid)
+{
+ struct mddev_dev devlist;
+ char devname[20];
+
+ /* try to remove and add */
+ int fd1 = open(to_devname, O_RDONLY);
+ int fd2 = open(from_devname, O_RDONLY);
+
+ if (fd1 < 0 || fd2 < 0) {
+ if (fd1>=0) close(fd1);
+ if (fd2>=0) close(fd2);
+ return 0;
+ }
+
+ devlist.next = NULL;
+ devlist.used = 0;
+ devlist.re_add = 0;
+ devlist.writemostly = 0;
+ devlist.devname = devname;
+ sprintf(devname, "%d:%d", major(devid), minor(devid));
+
+ devlist.disposition = 'r';
+ if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
+ devlist.disposition = 'a';
+ if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
+ /* make sure manager is aware of changes */
+ ping_manager(to_devname);
+ ping_manager(from_devname);
+ close(fd1);
+ close(fd2);
+ return 1;
+ }
+ else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
+ }
+ close(fd1);
+ close(fd2);
+ return 0;
+}
#endif