+ unsigned long long ldsize;
+ struct supertype *dev_st = NULL;
+ int j;
+ mdu_disk_info_t disc;
+
+ if (!get_dev_size(tfd, dv->devname, &ldsize)) {
+ if (dv->disposition == 'M')
+ return 0;
+ else
+ return -1;
+ }
+
+ if (tst->ss->validate_geometry(
+ tst, array->level, array->layout,
+ array->raid_disks, NULL,
+ ldsize >> 9, INVALID_SECTORS, NULL, NULL, 0) == 0) {
+ if (!force) {
+ pr_err("%s is larger than %s can "
+ "effectively use.\n"
+ " Add --force is you "
+ "really want to add this device.\n",
+ dv->devname, devname);
+ return -1;
+ }
+ pr_err("%s is larger than %s can "
+ "effectively use.\n"
+ " Adding anyway as --force "
+ "was given.\n",
+ dv->devname, devname);
+ }
+ if (!tst->ss->external &&
+ array->major_version == 0 &&
+ md_get_version(fd)%100 < 2) {
+ if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
+ if (verbose >= 0)
+ pr_err("hot added %s\n",
+ dv->devname);
+ return 1;
+ }
+
+ pr_err("hot add failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ return -1;
+ }
+
+ if (array->not_persistent == 0 || tst->ss->external) {
+
+ /* need to find a sample superblock to copy, and
+ * a spare slot to use.
+ * For 'external' array (well, container based),
+ * We can just load the metadata for the array->
+ */
+ int array_failed;
+ if (tst->sb)
+ /* already loaded */;
+ else if (tst->ss->external) {
+ tst->ss->load_container(tst, fd, NULL);
+ } else for (j = 0; j < tst->max_devs; j++) {
+ char *dev;
+ int dfd;
+ disc.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ continue;
+ if (disc.major==0 && disc.minor==0)
+ continue;
+ if ((disc.state & 4)==0) /* sync */
+ continue;
+ /* Looks like a good device to try */
+ dev = map_dev(disc.major, disc.minor, 1);
+ if (!dev)
+ continue;
+ dfd = dev_open(dev, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ if (tst->ss->load_super(tst, dfd,
+ NULL)) {
+ close(dfd);
+ continue;
+ }
+ close(dfd);
+ break;
+ }
+ /* FIXME this is a bad test to be using */
+ if (!tst->sb && dv->disposition != 'a') {
+ /* we are re-adding a device to a
+ * completely dead array - have to depend
+ * on kernel to check
+ */
+ } else if (!tst->sb) {
+ pr_err("cannot load array metadata from %s\n", devname);
+ return -1;
+ }
+
+ /* Make sure device is large enough */
+ if (tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
+ array_size) {
+ if (dv->disposition == 'M')
+ return 0;
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+
+ /* Possibly this device was recently part of
+ * the array and was temporarily removed, and
+ * is now being re-added. If so, we can
+ * simply re-add it.
+ */
+
+ if (array->not_persistent==0) {
+ dev_st = dup_super(tst);
+ dev_st->ss->load_super(dev_st, tfd, NULL);
+ }
+ if (dev_st && dev_st->sb) {
+ int rv = attempt_re_add(fd, tfd, dv,
+ dev_st, tst,
+ rdev,
+ update, devname,
+ verbose,
+ array);
+ dev_st->ss->free_super(dev_st);
+ if (rv)
+ return rv;
+ }
+ if (dv->disposition == 'M') {
+ if (verbose > 0)
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return 0;
+ }
+ if (dv->disposition == 'A') {
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return -1;
+ }
+ if (array->active_disks < array->raid_disks) {
+ char *avail = xcalloc(array->raid_disks, 1);
+ int d;
+ int found = 0;
+
+ for (d = 0; d < MAX_DISKS && found < array->active_disks; d++) {
+ disc.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ continue;
+ if (disc.major == 0 && disc.minor == 0)
+ continue;
+ if (!(disc.state & (1<<MD_DISK_SYNC)))
+ continue;
+ avail[disc.raid_disk] = 1;
+ found++;
+ }
+ array_failed = !enough(array->level, array->raid_disks,
+ array->layout, 1, avail);
+ } else
+ array_failed = 0;
+ if (array_failed) {
+ pr_err("%s has failed so using --add cannot work and might destroy\n",
+ devname);
+ pr_err("data on %s. You should stop the array and re-assemble it.\n",
+ dv->devname);
+ return -1;
+ }
+ } else {
+ /* non-persistent. Must ensure that new drive
+ * is at least array->size big.
+ */
+ if (ldsize/512 < array_size) {
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+ }
+ /* committed to really trying this device now*/
+ remove_partitions(tfd);
+
+ /* in 2.6.17 and earlier, version-1 superblocks won't
+ * use the number we write, but will choose a free number.
+ * we must choose the same free number, which requires
+ * starting at 'raid_disks' and counting up
+ */
+ for (j = array->raid_disks; j < tst->max_devs; j++) {
+ disc.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ break;
+ if (disc.major==0 && disc.minor==0)
+ break;
+ if (disc.state & 8) /* removed */
+ break;
+ }
+ disc.major = major(rdev);
+ disc.minor = minor(rdev);
+ disc.number =j;
+ disc.state = 0;
+ if (array->not_persistent==0) {
+ int dfd;
+ if (dv->writemostly == 1)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname, INVALID_SECTORS))
+ return -1;
+ if (tst->ss->write_init_super(tst))
+ return -1;
+ } else if (dv->disposition == 'A') {
+ /* this had better be raid1.
+ * As we are "--re-add"ing we must find a spare slot
+ * to fill.
+ */
+ char *used = xcalloc(array->raid_disks, 1);
+ for (j = 0; j < tst->max_devs; j++) {
+ mdu_disk_info_t disc2;
+ disc2.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc2))
+ continue;
+ if (disc2.major==0 && disc2.minor==0)
+ continue;
+ if (disc2.state & 8) /* removed */
+ continue;
+ if (disc2.raid_disk < 0)
+ continue;
+ if (disc2.raid_disk > array->raid_disks)
+ continue;
+ used[disc2.raid_disk] = 1;
+ }
+ for (j = 0 ; j < array->raid_disks; j++)
+ if (!used[j]) {
+ disc.raid_disk = j;
+ disc.state |= (1<<MD_DISK_SYNC);
+ break;
+ }
+ free(used);
+ }
+ if (dv->writemostly == 1)
+ disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+ if (tst->ss->external) {
+ /* add a disk
+ * to an external metadata container */
+ struct mdinfo new_mdi;
+ struct mdinfo *sra;
+ int container_fd;
+ char devnm[32];
+ int dfd;
+
+ strcpy(devnm, fd2devnm(fd));
+
+ container_fd = open_dev_excl(devnm);
+ if (container_fd < 0) {
+ pr_err("add failed for %s:"
+ " could not get exclusive access to container\n",
+ dv->devname);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+
+ Kill(dv->devname, NULL, 0, -1, 0);
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (mdmon_running(tst->container_devnm))
+ tst->update_tail = &tst->updates;
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname, INVALID_SECTORS)) {
+ close(dfd);
+ close(container_fd);
+ return -1;
+ }
+ if (tst->update_tail)
+ flush_metadata_updates(tst);
+ else
+ tst->ss->sync_metadata(tst);
+
+ sra = sysfs_read(container_fd, NULL, 0);
+ if (!sra) {
+ pr_err("add failed for %s: sysfs_read failed\n",
+ dv->devname);
+ close(container_fd);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+ sra->array.level = LEVEL_CONTAINER;
+ /* Need to set data_offset and component_size */
+ tst->ss->getinfo_super(tst, &new_mdi, NULL);
+ new_mdi.disk.major = disc.major;
+ new_mdi.disk.minor = disc.minor;
+ new_mdi.recovery_start = 0;
+ /* Make sure fds are closed as they are O_EXCL which
+ * would block add_disk */
+ tst->ss->free_super(tst);
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ pr_err("add new device to external metadata"
+ " failed for %s\n", dv->devname);
+ close(container_fd);
+ sysfs_free(sra);
+ return -1;
+ }
+ ping_monitor(devnm);
+ sysfs_free(sra);
+ close(container_fd);
+ } else {
+ tst->ss->free_super(tst);
+ if (ioctl(fd, ADD_NEW_DISK, &disc)) {
+ pr_err("add new device failed for %s as %d: %s\n",
+ dv->devname, j, strerror(errno));
+ return -1;
+ }
+ }
+ if (verbose >= 0)
+ pr_err("added %s\n", dv->devname);
+ return 1;
+}
+
+int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
+ int sysfd, unsigned long rdev, int verbose, char *devname)
+{
+ int lfd = -1;
+ int err;
+
+ if (tst->ss->external) {
+ /* To remove a device from a container, we must
+ * check that it isn't in use in an array.
+ * This involves looking in the 'holders'
+ * directory - there must be just one entry,
+ * the container.
+ * To ensure that it doesn't get used as a
+ * hot spare while we are checking, we
+ * get an O_EXCL open on the container
+ */
+ int ret;
+ char devnm[32];
+ strcpy(devnm, fd2devnm(fd));
+ lfd = open_dev_excl(devnm);
+ if (lfd < 0) {
+ pr_err("Cannot get exclusive access "
+ " to container - odd\n");
+ return -1;
+ }
+ /* We may not be able to check on holders in
+ * sysfs, either because we don't have the dev num
+ * (rdev == 0) or because the device has been detached
+ * and the 'holders' directory no longer exists
+ * (ret == -1). In that case, assume it is OK to
+ * remove.
+ */
+ if (rdev == 0)
+ ret = -1;
+ else
+ ret = sysfs_unique_holder(devnm, rdev);
+ if (ret == 0) {
+ pr_err("%s is not a member, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ if (ret >= 2) {
+ pr_err("%s is still in use, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ }
+ /* FIXME check that it is a current member */
+ if (sysfd >= 0) {
+ /* device has been removed and we don't know
+ * the major:minor number
+ */
+ int n = write(sysfd, "remove", 6);
+ if (n != 6)
+ err = -1;
+ else
+ err = 0;
+ } else {
+ err = ioctl(fd, HOT_REMOVE_DISK, rdev);
+ if (err && errno == ENODEV) {
+ /* Old kernels rejected this if no personality
+ * is registered */
+ struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
+ struct mdinfo *dv = NULL;
+ if (sra)
+ dv = sra->devs;
+ for ( ; dv ; dv=dv->next)
+ if (dv->disk.major == (int)major(rdev) &&
+ dv->disk.minor == (int)minor(rdev))
+ break;
+ if (dv)
+ err = sysfs_set_str(sra, dv,
+ "state", "remove");
+ else
+ err = -1;
+ if (sra)
+ sysfs_free(sra);
+ }
+ }
+ if (err) {
+ pr_err("hot remove failed "
+ "for %s: %s\n", dv->devname,
+ strerror(errno));
+ if (lfd >= 0)
+ close(lfd);
+ return -1;
+ }
+ if (tst->ss->external) {
+ /*
+ * Before dropping our exclusive open we make an
+ * attempt at preventing mdmon from seeing an
+ * 'add' event before reconciling this 'remove'
+ * event.
+ */
+ char *devnm = fd2devnm(fd);
+
+ if (!devnm) {
+ pr_err("unable to get container name\n");
+ return -1;
+ }
+
+ ping_manager(devnm);
+ }
+ if (lfd >= 0)
+ close(lfd);
+ if (verbose >= 0)
+ pr_err("hot removed %s from %s\n",
+ dv->devname, devname);
+ return 1;
+}
+
+int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ if (tst->ss->external) {
+ pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
+ return -1;
+ }
+ /* Need to find the device in sysfs and add 'want_replacement' to the
+ * status.
+ */
+ mdi = sysfs_read(fd, NULL, GET_DEVS);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.raid_disk < 0) {
+ pr_err("%s is not active and so cannot be replaced.\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_str(mdi, di,
+ "state", "want_replacement");
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to request replacement for %s\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s (device %d in %s) for replacement\n",
+ dv->devname, di->disk.raid_disk, devname);
+ /* If there is a matching 'with', we need to tell it which
+ * raid disk
+ */
+ while (dv && dv->disposition != 'W')
+ dv = dv->next;
+ if (dv) {
+ dv->disposition = 'w';
+ dv->used = di->disk.raid_disk;
+ }