/*
* When reshaping an array we might need to backup some data.
* This is written to all spares with a 'super_block' describing it.
- * The superblock goes 1K form the end of the used space on the
+ * The superblock goes 4K from the end of the used space on the
* device.
* It if written after the backup is complete.
* It has the following structure.
return 1;
}
+ if (size >= 0 &&
+ (chunksize || level!= UnSet || layout_str || raid_disks)) {
+ fprintf(stderr, Name ": cannot change component size at the same time "
+ "as other changes.\n"
+ " Change size first, then check data is intact before "
+ "making other changes.\n");
+ return 1;
+ }
+
if (raid_disks && raid_disks < array.raid_disks && array.level > 1 &&
get_linux_version() < 2006032 &&
!check_env("MDADM_FORCE_FEWER")) {
layout_str = "parity-last";
} else {
c = map_num(pers, level);
- if (c == NULL)
- return 1;/* not possible */
+ if (c == NULL) {
+ rv = 1;/* not possible */
+ goto release;
+ }
err = sysfs_set_str(sra, NULL, "level", c);
if (err) {
fprintf(stderr, Name ": %s: could not set level to %s\n",
if (nlayout == UnSet) {
fprintf(stderr, Name ": layout %s not understood for raid5.\n",
layout_str);
- return 1;
+ rv = 1;
+ goto release;
}
break;
if (nlayout == UnSet) {
fprintf(stderr, Name ": layout %s not understood for raid6.\n",
layout_str);
- return 1;
+ rv = 1;
+ goto release;
}
break;
}
ndata--;
}
+ if (odata == ndata &&
+ get_linux_version() < 2006032) {
+ fprintf(stderr, Name ": in-place reshape is not safe before 2.6.32, sorry.\n");
+ break;
+ }
+
/* Check that we can hold all the data */
- size = ndata * array.size;
+ size = ndata * (long long)array.size;
get_dev_size(fd, NULL, &array_size);
if (size < (array_size/1024)) {
fprintf(stderr, Name ": this change will reduce the size of the array.\n"
/* LCM == product / GCD */
blocks = ochunk/512 * nchunk/512 * odata * ndata / a;
+ sysfs_free(sra);
+ sra = sysfs_read(fd, 0,
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+ GET_CACHE);
+
if (ndata == odata) {
/* Make 'blocks' bigger for better throughput, but
* not so big that we reject it below.
+ * Try for 16 megabytes
*/
- if (blocks * 32 < sra->component_size)
- blocks *= 16;
+ while (blocks * 32 < sra->component_size &&
+ blocks < 16*1024*2)
+ blocks *= 2;
} else
fprintf(stderr, Name ": Need to backup %luK of critical "
"section..\n", blocks/2);
- sysfs_free(sra);
- sra = sysfs_read(fd, 0,
- GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
- GET_CACHE);
if (!sra) {
fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
devname);
char *dn = map_dev(sd->disk.major,
sd->disk.minor, 1);
fdlist[d] = dev_open(dn, O_RDWR);
- offsets[d] = (sra->component_size - blocks - 8)*512;
+ offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
if (fdlist[d]<0) {
fprintf(stderr, Name ": %s: cannot open component %s\n",
devname, dn?dn:"-unknown");
cache = (nchunk < ochunk) ? ochunk : nchunk;
cache = cache * 4 / 4096;
+ if (cache < blocks / 8 / odisks + 16)
+ /* Make it big enough to hold 'blocks' */
+ cache = blocks / 8 / odisks + 16;
if (sra->cache_size < cache)
sysfs_set_num(sra, NULL, "stripe_cache_size",
cache+1);
int *sources, unsigned long long *offsets,
int disks, int chunk, int level, int layout,
int dests, int *destfd, unsigned long long *destoffsets,
- int part,
+ int part, int *degraded,
char *buf)
{
/* Backup 'blocks' sectors at 'offset' on each device of the array,
int odata = disks;
int rv = 0;
int i;
+ unsigned long long new_degraded;
//printf("offset %llu\n", offset);
if (level >= 4)
odata--;
if (level == 6)
odata--;
sysfs_set_num(sra, NULL, "suspend_hi", (offset + stripes * chunk/512) * odata);
+ /* Check that array hasn't become degraded, else we might backup the wrong data */
+ sysfs_get_ll(sra, NULL, "degraded", &new_degraded);
+ if (new_degraded != *degraded) {
+ /* check each device to ensure it is still working */
+ struct mdinfo *sd;
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+ char sbuf[20];
+ if (sysfs_get_str(sra, sd, "state", sbuf, 20) < 0 ||
+ strstr(sbuf, "faulty") ||
+ strstr(sbuf, "in_sync") == NULL) {
+ /* this device is dead */
+ sd->disk.state = (1<<MD_DISK_FAULTY);
+ if (sd->disk.raid_disk >= 0 &&
+ sources[sd->disk.raid_disk] >= 0) {
+ close(sources[sd->disk.raid_disk]);
+ sources[sd->disk.raid_disk] = -1;
+ }
+ }
+ }
+ }
+ *degraded = new_degraded;
+ }
if (part) {
bsb.arraystart2 = __cpu_to_le64(offset * odata);
bsb.length2 = __cpu_to_le64(stripes * chunk/512 * odata);
lseek64(destfd[i], destoffsets[i] - 4096, 0);
write(destfd[i], &bsb, 512);
+ if (destoffsets[i] > 4096) {
+ lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0);
+ write(destfd[i], &bsb, 512);
+ }
fsync(destfd[i]);
}
int dests, int *destfd, unsigned long long *destoffsets)
{
char *buf;
+ int degraded = 0;
posix_memalign((void**)&buf, 4096, disks * chunk);
sysfs_set_num(sra, NULL, "suspend_hi", 0);
grow_backup(sra, 0, stripes,
fds, offsets, disks, chunk, level, layout,
dests, destfd, destoffsets,
- 0, buf);
+ 0, °raded, buf);
validate(afd, destfd[0], destoffsets[0]);
wait_backup(sra, 0, stripes * chunk / 512, stripes * chunk / 512,
dests, destfd, destoffsets,
char *buf;
unsigned long long start;
int rv;
+ int degraded = 0;
posix_memalign((void**)&buf, 4096, disks * chunk);
start = sra->component_size - stripes * chunk/512;
fds, offsets,
disks, chunk, level, layout,
dests, destfd, destoffsets,
- 0, buf);
+ 0, °raded, buf);
validate(afd, destfd[0], destoffsets[0]);
wait_backup(sra, start, stripes*chunk/512, 0,
dests, destfd, destoffsets, 0);
int part;
char *buf;
unsigned long long speed;
+ int degraded = 0;
posix_memalign((void**)&buf, 4096, disks * chunk);
fds, offsets,
disks, chunk, level, layout,
dests, destfd, destoffsets,
- 0, buf);
+ 0, °raded, buf);
grow_backup(sra, (start + stripes) * chunk/512, stripes,
fds, offsets,
disks, chunk, level, layout,
dests, destfd, destoffsets,
- 1, buf);
+ 1, °raded, buf);
validate(afd, destfd[0], destoffsets[0]);
part = 0;
start += stripes * 2; /* where to read next */
fds, offsets,
disks, chunk, level, layout,
dests, destfd, destoffsets,
- part, buf);
+ part, °raded, buf);
start += stripes;
part = 1 - part;
validate(afd, destfd[0], destoffsets[0]);
old_disks = cnt;
for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
struct mdinfo dinfo;
- char buf[4096];
int fd;
int bsbsize;
char *devname, namebuf[20];
}
/* There should be a duplicate backup superblock 4k before here */
if (lseek64(fd, -4096, 1) < 0 ||
- read(fd, buf, 4096) != 4096)
+ read(fd, &bsb2, 4096) != 4096)
goto second_fail; /* Cannot find leading superblock */
if (bsb.magic[15] == '1')
bsbsize = offsetof(struct mdp_backup_super, pad1);
else
bsbsize = offsetof(struct mdp_backup_super, pad);
- if (memcmp(buf, &bsb, bsbsize) != 0)
+ if (memcmp(&bsb2, &bsb, bsbsize) != 0)
goto second_fail; /* Cannot find leading superblock */
/* Now need the data offsets for all devices. */
continue;
st->ss->getinfo_super(st, &dinfo);
st->ss->free_super(st);
- offsets[j] = dinfo.data_offset;
+ offsets[j] = dinfo.data_offset * 512;
}
printf(Name ": restoring critical section\n");
info->new_level,
info->new_layout,
fd, __le64_to_cpu(bsb.devstart)*512,
- __le64_to_cpu(bsb.arraystart),
+ __le64_to_cpu(bsb.arraystart)*512,
__le64_to_cpu(bsb.length)*512)) {
/* didn't succeed, so giveup */
if (verbose)
info->new_layout,
fd, __le64_to_cpu(bsb.devstart)*512 +
__le64_to_cpu(bsb.devstart2)*512,
- __le64_to_cpu(bsb.arraystart2),
+ __le64_to_cpu(bsb.arraystart2)*512,
__le64_to_cpu(bsb.length2)*512)) {
/* didn't succeed, so giveup */
if (verbose)
int d;
struct mdinfo *sra, *sd;
int rv;
+ int cache;
int done = 0;
err = sysfs_set_str(info, NULL, "array_state", "readonly");
/* LCM == product / GCD */
blocks = ochunk/512 * nchunk/512 * odata * ndata / a;
+ sra = sysfs_read(-1, devname2devnum(info->sys_name),
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+ GET_CACHE);
+
+
if (ndata == odata)
- blocks *= 16;
+ while (blocks * 32 < sra->component_size &&
+ blocks < 16*1024*2)
+ blocks *= 2;
stripes = blocks / (info->array.chunk_size/512) / odata;
+ /* check that the internal stripe cache is
+ * large enough, or it won't work.
+ */
+ cache = (nchunk < ochunk) ? ochunk : nchunk;
+ cache = cache * 4 / 4096;
+ if (cache < blocks / 8 / odisks + 16)
+ /* Make it big enough to hold 'blocks' */
+ cache = blocks / 8 / odisks + 16;
+ if (sra->cache_size < cache)
+ sysfs_set_num(sra, NULL, "stripe_cache_size",
+ cache+1);
memset(&bsb, 0, 512);
memcpy(bsb.magic, "md_backup_data-1", 16);
for (d=0; d<odisks; d++)
fds[d] = -1;
- sra = sysfs_read(-1, devname2devnum(info->sys_name),
- GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
- GET_CACHE);
-
for (sd = sra->devs; sd; sd = sd->next) {
if (sd->disk.state & (1<<MD_DISK_FAULTY))
continue;