sysfs_set_str(sra, NULL, "sync_max", "max");
}
-int remove_disks_on_raid10_to_raid0_takeover(struct supertype *st,
- struct mdinfo *sra,
- int layout)
+int remove_disks_for_takeover(struct supertype *st,
+ struct mdinfo *sra,
+ int layout)
{
int nr_of_copies;
struct mdinfo *remaining;
int slot;
- nr_of_copies = layout & 0xff;
+ if (sra->array.level == 10)
+ nr_of_copies = layout & 0xff;
+ else if (sra->array.level == 1)
+ nr_of_copies = sra->array.raid_disks;
+ else
+ return 1;
remaining = sra->devs;
sra->devs = NULL;
switch (info->array.level) {
case 1:
/* RAID1 can convert to RAID1 with different disks, or
- * raid5 with 2 disks
+ * raid5 with 2 disks, or
+ * raid0 with 1 disk
*/
+ if (info->new_level == 0) {
+ re->level = 0;
+ re->before.data_disks = 1;
+ re->after.data_disks = 1;
+ re->before.layout = 0;
+ re->backup_blocks = 0;
+ re->parity = 0;
+ return NULL;
+ }
if (info->new_level == 1) {
if (info->delta_disks == UnSet)
/* Don't know what to do */
return NULL;
}
if (info->array.raid_disks == 2 &&
- info->array.raid_disks == 5) {
- /* simple in-place conversion */
+ info->new_level == 5) {
re->level = 5;
- re->parity = 1;
re->before.data_disks = 1;
re->before.layout = ALGORITHM_LEFT_SYMMETRIC;
- re->backup_blocks = 0;
- return NULL;
+ info->array.chunk_size = 65536;
+ break;
}
/* Could do some multi-stage conversions, but leave that to
* later.
return NULL;
}
if (re->after.data_disks == 1 && re->before.data_disks == 1) {
- /* chunks can layout changes make no difference */
+ /* chunk and layout changes make no difference */
re->backup_blocks = 0;
return NULL;
}
size = array.size;
}
- /* ========= check for Raid10 -> Raid0 conversion ===============
+ /* ========= check for Raid10/Raid1 -> Raid0 conversion ===============
* current implementation assumes that following conditions must be met:
- * - far_copies == 1
- * - near_copies == 2
+ * - RAID10:
+ * - far_copies == 1
+ * - near_copies == 2
*/
- if (level == 0 && array.level == 10 && sra &&
- array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) {
+ if ((level == 0 && array.level == 10 && sra &&
+ array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) ||
+ (level == 0 && array.level == 1 && sra)) {
int err;
- err = remove_disks_on_raid10_to_raid0_takeover(st, sra, array.layout);
+ err = remove_disks_for_takeover(st, sra, array.layout);
if (err) {
dprintf(Name": Array cannot be reshaped\n");
if (cfd > -1)
rv = 1;
goto release;
}
+ /* FIXME this is added with no justification - why is it here */
+ ping_monitor(container);
}
info.array = array;
fprintf(stderr, Name ": level of %s changed to %s\n",
devname, c);
orig_level = info->array.level;
- }
- if (reshape.level > 0 && st->ss->external &&
- !mdmon_running(st->container_dev)) {
- start_mdmon(st->container_dev);
- ping_monitor(container);
+ if (reshape.level > 0 && st->ss->external) {
+ /* make sure mdmon is aware of the new level */
+ if (!mdmon_running(st->container_dev))
+ start_mdmon(st->container_dev);
+ ping_monitor(container);
+ }
}
-
/* ->reshape_super might have chosen some spares from the
* container that it wants to be part of the new array.
* We can collect them with ->container_content and give
sync_metadata(st);
sra->new_chunk = info->new_chunk;
-
+
if (info->reshape_active)
- /* nothing needed here */;
- else if (info->array.chunk_size == info->new_chunk &&
+ sra->reshape_progress = info->reshape_progress;
+ else {
+ sra->reshape_progress = 0;
+ if (reshape.after.data_disks < reshape.before.data_disks)
+ /* start from the end of the new array */
+ sra->reshape_progress = (sra->component_size
+ * reshape.after.data_disks);
+ }
+
+ if (info->array.chunk_size == info->new_chunk &&
reshape.before.layout == reshape.after.layout &&
st->ss->external == 0) {
+ /* use SET_ARRAY_INFO but only if reshape hasn't started */
array.raid_disks = reshape.after.data_disks + reshape.parity;
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ if (!info->reshape_active &&
+ ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
int err = errno;
fprintf(stderr,
}
} else {
/* set them all just in case some old 'new_*' value
- * persists from some earlier problem
+ * persists from some earlier problem.
+ * We even set them when restarting in the middle. They will
+ * already be set in that case so this will be a no-op,
+ * but it is hard to tell the difference.
*/
int err = 0;
if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
int advancing = (reshape->after.data_disks
>= reshape->before.data_disks);
- unsigned long long need_backup; /* need to eventually backup all the way
- * to here
+ unsigned long long need_backup; /* All data between start of array and
+ * here will at some point need to
+ * be backed up.
*/
unsigned long long read_offset, write_offset;
unsigned long long write_range;
read_offset = info->reshape_progress / reshape->before.data_disks;
write_offset = info->reshape_progress / reshape->after.data_disks;
write_range = info->new_chunk/512;
+ if (reshape->before.data_disks == reshape->after.data_disks)
+ need_backup = array_size;
+ else
+ need_backup = reshape->backup_blocks;
if (advancing) {
- need_backup = 0;
- if (read_offset < write_offset + write_range) {
+ if (read_offset < write_offset + write_range)
max_progress = backup_point;
- if (reshape->before.data_disks == reshape->after.data_disks)
- need_backup = array_size;
- else
- need_backup = reshape->backup_blocks;
- } else {
+ else
max_progress =
read_offset *
reshape->after.data_disks;
- }
} else {
- need_backup = array_size;
- if (read_offset > write_offset - write_range) {
+ if (read_offset > write_offset - write_range)
+ /* Can only progress as far as has been backed up,
+ * which must be suspended */
max_progress = backup_point;
- if (max_progress >= info->reshape_progress)
- need_backup = 0;
- } else {
- max_progress =
- read_offset *
- reshape->after.data_disks;
- /* If we are using internal metadata, then we can
- * progress all the way to the suspend_point without
- * worrying about backing-up/suspending along the
- * way.
- */
- if (max_progress < *suspend_point &&
- info->array.major_version >= 0)
- max_progress = *suspend_point;
+ else if (info->reshape_progress <= need_backup)
+ max_progress = backup_point;
+ else {
+ if (info->array.major_version >= 0)
+ /* Can progress until backup is needed */
+ max_progress = need_backup;
+ else {
+ /* Can progress until metadata update is required */
+ max_progress =
+ read_offset *
+ reshape->after.data_disks;
+ /* but data must be suspended */
+ if (max_progress < *suspend_point)
+ max_progress = *suspend_point;
+ }
}
}
max_progress = *suspend_point;
}
} else {
- if ((need_backup < info->reshape_progress
- || info->array.major_version < 0) &&
- *suspend_point > info->reshape_progress - target) {
- if (need_backup > *suspend_point - 2 * target)
- *suspend_point = need_backup;
- else if (*suspend_point >= 2 * target)
- *suspend_point -= 2 * target;
- else
+ if (info->array.major_version >= 0) {
+ /* Only need to suspend when about to backup */
+ if (info->reshape_progress < need_backup * 2 &&
+ *suspend_point > 0) {
*suspend_point = 0;
- sysfs_set_num(info, NULL, "suspend_lo", *suspend_point);
+ sysfs_set_num(info, NULL, "suspend_lo", 0);
+ sysfs_set_num(info, NULL, "suspend_hi", need_backup);
+ }
+ } else {
+ /* Need to suspend continually */
+ if (info->reshape_progress < *suspend_point)
+ *suspend_point = info->reshape_progress;
+ if (*suspend_point + target < info->reshape_progress)
+ /* No need to move suspend region yet */;
+ else {
+ if (*suspend_point >= 2 * target)
+ *suspend_point -= 2 * target;
+ else
+ *suspend_point = 0;
+ sysfs_set_num(info, NULL, "suspend_lo",
+ *suspend_point);
+ }
if (max_progress < *suspend_point)
max_progress = *suspend_point;
}
action, 20) <= 0 ||
strncmp(action, "reshape", 7) != 0)
break;
+ /* Some kernels reset 'sync_completed' to zero
+ * before setting 'sync_action' to 'idle'.
+ * So we need these extra tests.
+ */
+ if (completed == 0 && advancing
+ && info->reshape_progress > 0)
+ break;
+ if (completed == 0 && !advancing
+ && info->reshape_progress < (info->component_size
+ * reshape->after.data_disks))
+ break;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
select(fd+1, NULL, NULL, &rfds, NULL);
/* We return the need_backup flag. Caller will decide
* how much - a multiple of ->backup_blocks up to *suspend_point
*/
- return advancing
- ? (need_backup > info->reshape_progress)
- : (need_backup < info->reshape_progress);
+ if (advancing)
+ return need_backup > info->reshape_progress;
+ else
+ return need_backup >= info->reshape_progress;
check_progress:
/* if we couldn't read a number from sync_completed, then
odata--;
/* Check that array hasn't become degraded, else we might backup the wrong data */
- sysfs_get_ll(sra, NULL, "degraded", &ll);
+ if (sysfs_get_ll(sra, NULL, "degraded", &ll) < 0)
+ return -1; /* FIXME this error is ignored */
new_degraded = (int)ll;
if (new_degraded != *degraded) {
/* check each device to ensure it is still working */
suspend_point = 0;
} else {
array_size = sra->component_size * reshape->before.data_disks;
- backup_point = array_size;
+ backup_point = reshape->backup_blocks;
suspend_point = array_size;
}
int fd;
int bsbsize;
char *devname, namebuf[20];
+ unsigned long long lo, hi;
/* This was a spare and may have some saved data on it.
* Load the superblock, find and load the
}
if (bsb.magic[15] == '1') {
- if (info->delta_disks >= 0) {
- /* reshape_progress is increasing */
- if (__le64_to_cpu(bsb.arraystart) + __le64_to_cpu(bsb.length) <
- info->reshape_progress) {
- nonew:
- if (verbose)
- fprintf(stderr, Name ": backup-metadata found on %s but is not needed\n", devname);
- continue; /* No new data here */
+ if (bsb.length == 0)
+ continue;
+ if (info->delta_disks >= 0) {
+ /* reshape_progress is increasing */
+ if (__le64_to_cpu(bsb.arraystart)
+ + __le64_to_cpu(bsb.length)
+ < info->reshape_progress) {
+ nonew:
+ if (verbose)
+ fprintf(stderr, Name
+ ": backup-metadata found on %s but is not needed\n", devname);
+ continue; /* No new data here */
+ }
+ } else {
+ /* reshape_progress is decreasing */
+ if (__le64_to_cpu(bsb.arraystart) >=
+ info->reshape_progress)
+ goto nonew; /* No new data here */
}
} else {
- /* reshape_progress is decreasing */
- if (__le64_to_cpu(bsb.arraystart) >=
- info->reshape_progress)
- goto nonew; /* No new data here */
- }
- } else {
- if (info->delta_disks >= 0) {
- /* reshape_progress is increasing */
- if (__le64_to_cpu(bsb.arraystart) + __le64_to_cpu(bsb.length) <
- info->reshape_progress &&
- __le64_to_cpu(bsb.arraystart2) + __le64_to_cpu(bsb.length2) <
- info->reshape_progress)
- goto nonew; /* No new data here */
- } else {
- /* reshape_progress is decreasing */
- if (__le64_to_cpu(bsb.arraystart) >=
- info->reshape_progress &&
- __le64_to_cpu(bsb.arraystart2) >=
- info->reshape_progress)
- goto nonew; /* No new data here */
- }
+ if (bsb.length == 0 && bsb.length2 == 0)
+ continue;
+ if (info->delta_disks >= 0) {
+ /* reshape_progress is increasing */
+ if ((__le64_to_cpu(bsb.arraystart)
+ + __le64_to_cpu(bsb.length)
+ < info->reshape_progress)
+ &&
+ (__le64_to_cpu(bsb.arraystart2)
+ + __le64_to_cpu(bsb.length2)
+ < info->reshape_progress))
+ goto nonew; /* No new data here */
+ } else {
+ /* reshape_progress is decreasing */
+ if (__le64_to_cpu(bsb.arraystart) >=
+ info->reshape_progress &&
+ __le64_to_cpu(bsb.arraystart2) >=
+ info->reshape_progress)
+ goto nonew; /* No new data here */
+ }
}
if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0) {
second_fail:
if (verbose)
- fprintf(stderr, Name ": Failed to verify secondary backup-metadata block on %s\n",
+ fprintf(stderr, Name
+ ": Failed to verify secondary backup-metadata block on %s\n",
devname);
continue; /* Cannot seek */
}
/* Ok, so the data is restored. Let's update those superblocks. */
- if (info->delta_disks >= 0) {
+ lo = hi = 0;
+ if (bsb.length) {
+ lo = __le64_to_cpu(bsb.arraystart);
+ hi = lo + __le64_to_cpu(bsb.length);
+ }
+ if (bsb.magic[15] == '2' && bsb.length2) {
+ unsigned long long lo1, hi1;
+ lo1 = __le64_to_cpu(bsb.arraystart2);
+ hi1 = lo1 + __le64_to_cpu(bsb.length2);
+ if (lo == hi) {
+ lo = lo1;
+ hi = hi1;
+ } else if (lo < lo1)
+ hi = hi1;
+ else
+ lo = lo1;
+ }
+ if (lo < hi &&
+ (info->reshape_progress < lo ||
+ info->reshape_progress > hi))
+ /* backup does not affect reshape_progress*/ ;
+ else if (info->delta_disks >= 0) {
info->reshape_progress = __le64_to_cpu(bsb.arraystart) +
__le64_to_cpu(bsb.length);
if (bsb.magic[15] == '2') {