+
+ /* Impose these changes on a single array. First
+ * check that the metadata is OK with the change. */
+
+ if (reshape_super(st, 0, info.new_level,
+ info.new_layout, info.new_chunk,
+ info.array.raid_disks, info.delta_disks,
+ c->backup_file, devname, APPLY_METADATA_CHANGES,
+ c->verbose)) {
+ rv = 1;
+ goto release;
+ }
+ sync_metadata(st);
+ rv = reshape_array(container, fd, devname, st, &info, c->force,
+ devlist, data_offset, c->backup_file, c->verbose,
+ 0, 0, 0);
+ frozen = 0;
+ }
+release:
+ sysfs_free(sra);
+ if (frozen > 0)
+ unfreeze(st);
+ return rv;
+}
+
+/* verify_reshape_position()
+ * Function checks if reshape position in metadata is not farther
+ * than position in md.
+ * Return value:
+ * 0 : not valid sysfs entry
+ * it can be caused by not started reshape, it should be started
+ * by reshape array or raid0 array is before takeover
+ * -1 : error, reshape position is obviously wrong
+ * 1 : success, reshape progress correct or updated
+*/
+static int verify_reshape_position(struct mdinfo *info, int level)
+{
+ int ret_val = 0;
+ char buf[40];
+ int rv;
+
+ /* read sync_max, failure can mean raid0 array */
+ rv = sysfs_get_str(info, NULL, "sync_max", buf, 40);
+
+ if (rv > 0) {
+ char *ep;
+ unsigned long long position = strtoull(buf, &ep, 0);
+
+ dprintf(Name": Read sync_max sysfs entry is: %s\n", buf);
+ if (!(ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))) {
+ position *= get_data_disks(level,
+ info->new_layout,
+ info->array.raid_disks);
+ if (info->reshape_progress < position) {
+ dprintf("Corrected reshape progress (%llu) to "
+ "md position (%llu)\n",
+ info->reshape_progress, position);
+ info->reshape_progress = position;
+ ret_val = 1;
+ } else if (info->reshape_progress > position) {
+ pr_err("Fatal error: array "
+ "reshape was not properly frozen "
+ "(expected reshape position is %llu, "
+ "but reshape progress is %llu.\n",
+ position, info->reshape_progress);
+ ret_val = -1;
+ } else {
+ dprintf("Reshape position in md and metadata "
+ "are the same;");
+ ret_val = 1;
+ }
+ }
+ } else if (rv == 0) {
+ /* for valid sysfs entry, 0-length content
+ * should be indicated as error
+ */
+ ret_val = -1;
+ }
+
+ return ret_val;
+}
+
+static unsigned long long choose_offset(unsigned long long lo,
+ unsigned long long hi,
+ unsigned long long min,
+ unsigned long long max)
+{
+ /* Choose a new offset between hi and lo.
+ * It must be between min and max, but
+ * we would prefer something near the middle of hi/lo, and also
+ * prefer to be aligned to a big power of 2.
+ *
+ * So we start with the middle, then for each bit,
+ * starting at '1' and increasing, if it is set, we either
+ * add it or subtract it if possible, preferring the option
+ * which is furthest from the boundary.
+ *
+ * We stop once we get a 1MB alignment. As units are in sectors,
+ * 1MB = 2*1024 sectors.
+ */
+ unsigned long long choice = (lo + hi) / 2;
+ unsigned long long bit = 1;
+
+ for (bit = 1; bit < 2*1024; bit = bit << 1) {
+ unsigned long long bigger, smaller;
+ if (! (bit & choice))
+ continue;
+ bigger = choice + bit;
+ smaller = choice - bit;
+ if (bigger > max && smaller < min)
+ break;
+ if (bigger > max)
+ choice = smaller;
+ else if (smaller < min)
+ choice = bigger;
+ else if (hi - bigger > smaller - lo)
+ choice = bigger;
+ else
+ choice = smaller;
+ }
+ return choice;
+}
+
+static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
+ char *devname, int delta_disks,
+ unsigned long long data_offset,
+ unsigned long long min,
+ int can_fallback)
+{
+ struct mdinfo *sd;
+ int dir = 0;
+ int err = 0;
+ unsigned long long before, after;
+
+ /* Need to find min space before and after so same is used
+ * on all devices
+ */
+ before = UINT64_MAX;
+ after = UINT64_MAX;
+ for (sd = sra->devs; sd; sd = sd->next) {
+ char *dn;
+ int dfd;
+ int rv;
+ struct supertype *st2;
+ struct mdinfo info2;
+
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ dn = map_dev(sd->disk.major, sd->disk.minor, 0);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0) {
+ pr_err("%s: cannot open component %s\n",
+ devname, dn ? dn : "-unknown-");
+ goto release;
+ }
+ st2 = dup_super(st);
+ rv = st2->ss->load_super(st2,dfd, NULL);
+ close(dfd);
+ if (rv) {
+ free(st2);
+ pr_err("%s: cannot get superblock from %s\n",
+ devname, dn);
+ goto release;
+ }
+ st2->ss->getinfo_super(st2, &info2, NULL);
+ st2->ss->free_super(st2);
+ free(st2);
+ if (info2.space_before == 0 &&
+ info2.space_after == 0) {
+ /* Metadata doesn't support data_offset changes */
+ return 1;
+ }
+ if (before > info2.space_before)
+ before = info2.space_before;
+ if (after > info2.space_after)
+ after = info2.space_after;
+
+ if (data_offset != INVALID_SECTORS) {
+ if (dir == 0) {
+ if (info2.data_offset == data_offset) {
+ pr_err("%s: already has that data_offset\n",
+ dn);
+ goto release;
+ }
+ if (data_offset < info2.data_offset)
+ dir = -1;
+ else
+ dir = 1;
+ } else if ((data_offset <= info2.data_offset && dir == 1) ||
+ (data_offset >= info2.data_offset && dir == -1)) {
+ pr_err("%s: differing data offsets on devices make this --data-offset setting impossible\n",
+ dn);
+ goto release;
+ }
+ }
+ }
+ if (before == UINT64_MAX)
+ /* impossible really, there must be no devices */
+ return 1;
+
+ for (sd = sra->devs; sd; sd = sd->next) {
+ char *dn = map_dev(sd->disk.major, sd->disk.minor, 0);
+ unsigned long long new_data_offset;
+
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (delta_disks < 0) {
+ /* Don't need any space as array is shrinking
+ * just move data_offset up by min
+ */
+ if (data_offset == INVALID_SECTORS)
+ new_data_offset = sd->data_offset + min;
+ else {
+ if (data_offset < sd->data_offset + min) {
+ pr_err("--data-offset too small for %s\n",
+ dn);
+ goto release;
+ }
+ new_data_offset = data_offset;
+ }
+ } else if (delta_disks > 0) {
+ /* need space before */
+ if (before < min) {
+ if (can_fallback)
+ goto fallback;
+ pr_err("Insufficient head-space for reshape on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset == INVALID_SECTORS)
+ new_data_offset = sd->data_offset - min;
+ else {
+ if (data_offset > sd->data_offset - min) {
+ pr_err("--data-offset too large for %s\n",
+ dn);
+ goto release;
+ }
+ new_data_offset = data_offset;
+ }
+ } else {
+ if (dir == 0) {
+ /* can move up or down. If 'data_offset'
+ * was set we would have already decided,
+ * so just choose direction with most space.
+ */
+ if (before > after)
+ dir = -1;
+ else
+ dir = 1;
+ }
+ sysfs_set_str(sra, NULL, "reshape_direction",
+ dir == 1 ? "backwards" : "forwards");
+ if (dir > 0) {
+ /* Increase data offset */
+ if (after < min) {
+ if (can_fallback)
+ goto fallback;
+ pr_err("Insufficient tail-space for reshape on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset != INVALID_SECTORS &&
+ data_offset < sd->data_offset + min) {
+ pr_err("--data-offset too small on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset != INVALID_SECTORS)
+ new_data_offset = data_offset;
+ else
+ new_data_offset = choose_offset(sd->data_offset,
+ sd->data_offset + after,
+ sd->data_offset + min,
+ sd->data_offset + after);
+ } else {
+ /* Decrease data offset */
+ if (before < min) {
+ if (can_fallback)
+ goto fallback;
+ pr_err("insufficient head-room on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset != INVALID_SECTORS &&
+ data_offset < sd->data_offset - min) {
+ pr_err("--data-offset too small on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset != INVALID_SECTORS)
+ new_data_offset = data_offset;
+ else
+ new_data_offset = choose_offset(sd->data_offset - before,
+ sd->data_offset,
+ sd->data_offset - before,
+ sd->data_offset - min);
+ }
+ }
+ err = sysfs_set_num(sra, sd, "new_offset", new_data_offset);
+ if (err < 0 && errno == E2BIG) {
+ /* try again after increasing data size to max */
+ err = sysfs_set_num(sra, sd, "size", 0);
+ if (err < 0 && errno == EINVAL &&
+ !(sd->disk.state & (1<<MD_DISK_SYNC))) {
+ /* some kernels have a bug where you cannot
+ * use '0' on spare devices. */
+ sysfs_set_num(sra, sd, "size",
+ (sra->component_size + after)/2);
+ }
+ err = sysfs_set_num(sra, sd, "new_offset",
+ new_data_offset);
+ }
+ if (err < 0) {
+ if (errno == E2BIG && data_offset != INVALID_SECTORS) {
+ pr_err("data-offset is too big for %s\n",
+ dn);
+ goto release;
+ }
+ if (sd == sra->devs &&
+ (errno == ENOENT || errno == E2BIG))
+ /* Early kernel, no 'new_offset' file,
+ * or kernel doesn't like us.
+ * For RAID5/6 this is not fatal
+ */
+ return 1;
+ pr_err("Cannot set new_offset for %s\n",
+ dn);
+ break;
+ }
+ }
+ return err;
+release:
+ return -1;
+fallback:
+ /* Just use a backup file */
+ return 1;
+}
+
+static int raid10_reshape(char *container, int fd, char *devname,
+ struct supertype *st, struct mdinfo *info,
+ struct reshape *reshape,
+ unsigned long long data_offset,
+ int force, int verbose)
+{
+ /* Changing raid_disks, layout, chunksize or possibly
+ * just data_offset for a RAID10.
+ * We must always change data_offset. We change by at least
+ * ->min_offset_change which is the largest of the old and new
+ * chunk sizes.
+ * If raid_disks is increasing, then data_offset must decrease
+ * by at least this copy size.
+ * If raid_disks is unchanged, data_offset must increase or
+ * decrease by at least min_offset_change but preferably by much more.
+ * We choose half of the available space.
+ * If raid_disks is decreasing, data_offset must increase by
+ * at least min_offset_change. To allow of this, component_size
+ * must be decreased by the same amount.
+ *
+ * So we calculate the required minimum and direction, possibly
+ * reduce the component_size, then iterate through the devices
+ * and set the new_data_offset.
+ * If that all works, we set chunk_size, layout, raid_disks, and start
+ * 'reshape'
+ */
+ struct mdinfo *sra;
+ unsigned long long min;
+ int err = 0;
+
+ sra = sysfs_read(fd, NULL,
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK
+ );
+ if (!sra) {
+ pr_err("%s: Cannot get array details from sysfs\n",
+ devname);
+ goto release;
+ }
+ min = reshape->min_offset_change;
+
+ if (info->delta_disks)
+ sysfs_set_str(sra, NULL, "reshape_direction",
+ info->delta_disks < 0 ? "backwards" : "forwards");
+ if (info->delta_disks < 0 &&
+ info->space_after < min) {
+ int rv = sysfs_set_num(sra, NULL, "component_size",
+ (sra->component_size -
+ min)/2);
+ if (rv) {
+ pr_err("cannot reduce component size\n");
+ goto release;
+ }
+ }
+ err = set_new_data_offset(sra, st, devname, info->delta_disks, data_offset,
+ min, 0);
+ if (err == 1) {
+ pr_err("Cannot set new_data_offset: RAID10 reshape not\n");
+ cont_err("supported on this kernel\n");
+ err = -1;
+ }
+ if (err < 0)
+ goto release;
+
+ if (!err && sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
+ err = errno;
+ if (!err && sysfs_set_num(sra, NULL, "layout", reshape->after.layout) < 0)
+ err = errno;
+ if (!err && sysfs_set_num(sra, NULL, "raid_disks",
+ info->array.raid_disks + info->delta_disks) < 0)
+ err = errno;
+ if (!err && sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0)
+ err = errno;
+ if (err) {
+ pr_err("Cannot set array shape for %s\n",
+ devname);
+ if (err == EBUSY &&
+ (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err(" Bitmap must be removed before"
+ " shape can be changed\n");
+ goto release;
+ }
+ sysfs_free(sra);
+ return 0;
+release:
+ sysfs_free(sra);
+ return 1;
+}
+
+static void get_space_after(int fd, struct supertype *st, struct mdinfo *info)
+{
+ struct mdinfo *sra, *sd;
+ /* Initialisation to silence compiler warning */
+ unsigned long long min_space_before = 0, min_space_after = 0;
+ int first = 1;
+
+ sra = sysfs_read(fd, NULL, GET_DEVS);
+ if (!sra)
+ return;
+ for (sd = sra->devs; sd; sd = sd->next) {
+ char *dn;
+ int dfd;
+ struct supertype *st2;
+ struct mdinfo info2;
+
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ dn = map_dev(sd->disk.major, sd->disk.minor, 0);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0)
+ break;
+ st2 = dup_super(st);
+ if (st2->ss->load_super(st2,dfd, NULL)) {
+ close(dfd);
+ free(st2);
+ break;
+ }
+ close(dfd);
+ st2->ss->getinfo_super(st2, &info2, NULL);
+ st2->ss->free_super(st2);
+ free(st2);
+ if (first ||
+ min_space_before > info2.space_before)
+ min_space_before = info2.space_before;
+ if (first ||
+ min_space_after > info2.space_after)
+ min_space_after = info2.space_after;
+ first = 0;
+ }
+ if (sd == NULL && !first) {
+ info->space_after = min_space_after;
+ info->space_before = min_space_before;
+ }
+ sysfs_free(sra);
+}
+
+static void update_cache_size(char *container, struct mdinfo *sra,
+ struct mdinfo *info,
+ int disks, unsigned long long blocks)
+{
+ /* Check that the internal stripe cache is
+ * large enough, or it won't work.
+ * It must hold at least 4 stripes of the larger
+ * chunk size
+ */
+ unsigned long cache;
+ cache = max(info->array.chunk_size, info->new_chunk);
+ cache *= 4; /* 4 stripes minimum */
+ cache /= 512; /* convert to sectors */
+ /* make sure there is room for 'blocks' with a bit to spare */
+ if (cache < 16 + blocks / disks)
+ cache = 16 + blocks / disks;
+ cache /= (4096/512); /* Covert from sectors to pages */
+
+ if (sra->cache_size < cache)
+ subarray_set_num(container, sra, "stripe_cache_size",
+ cache+1);
+}
+
+static int impose_reshape(struct mdinfo *sra,
+ struct mdinfo *info,
+ struct supertype *st,
+ int fd,
+ int restart,
+ char *devname, char *container,
+ struct reshape *reshape)
+{
+ struct mdu_array_info_s array;
+
+ sra->new_chunk = info->new_chunk;
+
+ if (restart) {
+ /* for external metadata checkpoint saved by mdmon can be lost
+ * or missed /due to e.g. crash/. Check if md is not during
+ * restart farther than metadata points to.
+ * If so, this means metadata information is obsolete.
+ */
+ if (st->ss->external)
+ verify_reshape_position(info, reshape->level);
+ sra->reshape_progress = info->reshape_progress;
+ } else {
+ sra->reshape_progress = 0;
+ if (reshape->after.data_disks < reshape->before.data_disks)
+ /* start from the end of the new array */
+ sra->reshape_progress = (sra->component_size
+ * reshape->after.data_disks);
+ }
+
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ if (info->array.chunk_size == info->new_chunk &&
+ reshape->before.layout == reshape->after.layout &&
+ st->ss->external == 0) {
+ /* use SET_ARRAY_INFO but only if reshape hasn't started */
+ array.raid_disks = reshape->after.data_disks + reshape->parity;
+ if (!restart &&
+ ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ int err = errno;
+
+ pr_err("Cannot set device shape for %s: %s\n",
+ devname, strerror(errno));
+
+ if (err == EBUSY &&
+ (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err("Bitmap must be removed before"
+ " shape can be changed\n");
+
+ goto release;
+ }
+ } else if (!restart) {
+ /* set them all just in case some old 'new_*' value
+ * persists from some earlier problem.
+ */
+ int err = 0;
+ if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
+ err = errno;
+ if (!err && sysfs_set_num(sra, NULL, "layout",
+ reshape->after.layout) < 0)
+ err = errno;
+ if (!err && subarray_set_num(container, sra, "raid_disks",
+ reshape->after.data_disks +
+ reshape->parity) < 0)
+ err = errno;
+ if (err) {
+ pr_err("Cannot set device shape for %s\n",
+ devname);
+
+ if (err == EBUSY &&
+ (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err("Bitmap must be removed before"
+ " shape can be changed\n");
+ goto release;
+ }
+ }
+ return 0;
+release:
+ return -1;
+}
+
+static int impose_level(int fd, int level, char *devname, int verbose)
+{
+ char *c;
+ struct mdu_array_info_s array;
+ struct mdinfo info;
+ sysfs_init(&info, fd, NULL);
+
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ if (level == 0 &&
+ (array.level >= 4 && array.level <= 6)) {
+ /* To convert to RAID0 we need to fail and
+ * remove any non-data devices. */
+ int found = 0;
+ int d;
+ int data_disks = array.raid_disks - 1;
+ if (array.level == 6)
+ data_disks -= 1;
+ if (array.level == 5 &&
+ array.layout != ALGORITHM_PARITY_N)
+ return -1;
+ if (array.level == 6 &&
+ array.layout != ALGORITHM_PARITY_N_6)
+ return -1;
+ sysfs_set_str(&info, NULL,"sync_action", "idle");
+ /* First remove any spares so no recovery starts */
+ for (d = 0, found = 0;
+ d < MAX_DISKS && found < array.nr_disks;
+ d++) {
+ mdu_disk_info_t disk;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ found++;
+ if ((disk.state & (1 << MD_DISK_ACTIVE))
+ && disk.raid_disk < data_disks)
+ /* keep this */
+ continue;
+ ioctl(fd, HOT_REMOVE_DISK,
+ makedev(disk.major, disk.minor));
+ }
+ /* Now fail anything left */
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ for (d = 0, found = 0;
+ d < MAX_DISKS && found < array.nr_disks;
+ d++) {
+ int cnt;
+ mdu_disk_info_t disk;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ found++;
+ if ((disk.state & (1 << MD_DISK_ACTIVE))
+ && disk.raid_disk < data_disks)
+ /* keep this */
+ continue;
+ ioctl(fd, SET_DISK_FAULTY,
+ makedev(disk.major, disk.minor));
+ cnt = 5;
+ while (ioctl(fd, HOT_REMOVE_DISK,
+ makedev(disk.major, disk.minor)) < 0
+ && errno == EBUSY
+ && cnt--) {
+ usleep(10000);
+ }
+ }
+ }
+ c = map_num(pers, level);
+ if (c) {
+ int err = sysfs_set_str(&info, NULL, "level", c);
+ if (err) {
+ err = errno;
+ pr_err("%s: could not set level to %s\n",
+ devname, c);
+ if (err == EBUSY &&
+ (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err("Bitmap must be removed"
+ " before level can be changed\n");
+ return err;