/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2004 Neil Brown <neilb@cse.unsw.edu.au>
+ * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
*/
struct mdinfo info;
- void *super = NULL;
struct stat stb;
int nfd, fd2;
int d, nd;
struct supertype *st = NULL;
-
+
if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
fprintf(stderr, Name ": cannot get array info for %s\n", devname);
return 1;
}
- st = super_by_version(info.array.major_version, info.array.minor_version);
+ st = super_by_fd(fd);
if (!st) {
fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
return 1;
return 1;
}
- nfd = open(newdev, O_RDWR|O_EXCL);
+ nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
if (nfd < 0) {
fprintf(stderr, Name ": cannot open %s\n", newdev);
return 1;
d);
return 1;
}
- dv = map_dev(disk.major, disk.minor);
+ dv = map_dev(disk.major, disk.minor, 1);
if (!dv) {
fprintf(stderr, Name ": cannot find device file for device %d\n",
d);
return 1;
}
- fd2 = open(dv, O_RDWR);
+ fd2 = dev_open(dv, O_RDWR);
if (!fd2) {
fprintf(stderr, Name ": cannot open device file %s\n", dv);
return 1;
}
- if (super) free(super);
- super= NULL;
- if (st->ss->load_super(st, fd2, &super, NULL)) {
+ st->ss->free_super(st);
+
+ if (st->ss->load_super(st, fd2, NULL)) {
fprintf(stderr, Name ": cannot find super block on %s\n", dv);
close(fd2);
return 1;
/* Ok, looks good. Lets update the superblock and write it out to
* newdev.
*/
-
+
info.disk.number = d;
info.disk.major = major(stb.st_rdev);
info.disk.minor = minor(stb.st_rdev);
info.disk.raid_disk = d;
info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
- st->ss->update_super(&info, super, "grow", newdev, 0);
+ st->ss->update_super(st, &info, "linear-grow-new", newdev,
+ 0, 0, NULL);
- if (st->ss->store_super(st, nfd, super)) {
- fprintf(stderr, Name ": Cannot store new superblock on %s\n", newdev);
+ if (st->ss->store_super(st, nfd)) {
+ fprintf(stderr, Name ": Cannot store new superblock on %s\n",
+ newdev);
close(nfd);
return 1;
}
d);
return 1;
}
- dv = map_dev(disk.major, disk.minor);
+ dv = map_dev(disk.major, disk.minor, 1);
if (!dv) {
fprintf(stderr, Name ": cannot find device file for device %d\n",
d);
return 1;
}
- fd2 = open(dv, O_RDWR);
+ fd2 = dev_open(dv, O_RDWR);
if (fd2 < 0) {
fprintf(stderr, Name ": cannot open device file %s\n", dv);
return 1;
}
- if (st->ss->load_super(st, fd2, &super, NULL)) {
+ if (st->ss->load_super(st, fd2, NULL)) {
fprintf(stderr, Name ": cannot find super block on %s\n", dv);
close(fd);
return 1;
info.array.nr_disks = nd+1;
info.array.active_disks = nd+1;
info.array.working_disks = nd+1;
- info.disk.number = nd;
- info.disk.major = major(stb.st_rdev);
- info.disk.minor = minor(stb.st_rdev);
- info.disk.raid_disk = nd;
- info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
- st->ss->update_super(&info, super, "grow", dv, 0);
-
- if (st->ss->store_super(st, fd2, super)) {
+
+ st->ss->update_super(st, &info, "linear-grow-update", dv,
+ 0, 0, NULL);
+
+ if (st->ss->store_super(st, fd2)) {
fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
close(fd2);
return 1;
devname);
return 1;
}
+ if (array.level <= 0) {
+ fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n",
+ map_num(pers, array.level)?:"of this array");
+ return 1;
+ }
bitmapsize = array.size;
bitmapsize <<= 1;
-#ifdef BLKGETSIZE64
- if (ioctl(fd, BLKGETSIZE64, &array_size) == 0 &&
+ if (get_dev_size(fd, NULL, &array_size) &&
array_size > (0x7fffffffULL<<9)) {
/* Array is big enough that we cannot trust array.size
* try other approaches
*/
bitmapsize = get_component_size(fd);
}
-#endif
if (bitmapsize == 0) {
fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
return 1;
}
if (array.level == 10) {
- int ncopies = (array.layout&255)*(array.layout>>8);
+ int ncopies = (array.layout&255)*((array.layout>>8)&255);
bitmapsize = bitmapsize * array.raid_disks / ncopies;
}
- st = super_by_version(array.major_version, array.minor_version);
+ st = super_by_fd(fd);
if (!st) {
fprintf(stderr, Name ": Cannot understand version %d.%d\n",
array.major_version, array.minor_version);
continue;
if ((disk.state & (1<<MD_DISK_SYNC))==0)
continue;
- dv = map_dev(disk.major, disk.minor);
+ dv = map_dev(disk.major, disk.minor, 1);
if (dv) {
- void *super;
- int fd2 = open(dv, O_RDWR);
+ int fd2 = dev_open(dv, O_RDWR);
if (fd2 < 0)
continue;
- if (st->ss->load_super(st, fd2, &super, NULL)==0) {
- if (st->ss->add_internal_bitmap(st, super,
- chunk, delay, write_behind,
- bitmapsize, 0, major))
- st->ss->write_bitmap(st, fd2, super);
+ if (st->ss->load_super(st, fd2, NULL)==0) {
+ if (st->ss->add_internal_bitmap(
+ st,
+ &chunk, delay, write_behind,
+ bitmapsize, 0, major)
+ )
+ st->ss->write_bitmap(st, fd2);
else {
fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
close(fd2);
int bitmap_fd;
int d;
int max_devs = st->max_devs;
- void *super = NULL;
- if (chunk == UnSet)
- chunk = DEFAULT_BITMAP_CHUNK;
/* try to load a superblock */
for (d=0; d<max_devs; d++) {
if ((disk.major==0 && disk.minor==0) ||
(disk.state & (1<<MD_DISK_REMOVED)))
continue;
- dv = map_dev(disk.major, disk.minor);
+ dv = map_dev(disk.major, disk.minor, 1);
if (!dv) continue;
- fd2 = open(dv, O_RDONLY);
+ fd2 = dev_open(dv, O_RDONLY);
if (fd2 >= 0 &&
- st->ss->load_super(st, fd2, &super, NULL) == 0) {
+ st->ss->load_super(st, fd2, NULL) == 0) {
close(fd2);
- st->ss->uuid_from_super(uuid, super);
+ st->ss->uuid_from_super(st, uuid);
break;
}
close(fd2);
return __cpu_to_le32(csum);
}
-int Grow_reshape(char *devname, int fd, int quiet,
+int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
long long size,
int level, int layout, int chunksize, int raid_disks)
{
int d, i, spares;
int nrdisks;
int err;
- void *super = NULL;
- struct sysarray *sra;
- struct sysdev *sd;
+ struct mdinfo *sra;
+ struct mdinfo *sd;
if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
fprintf(stderr, Name ": %s is not an active md array - aborting\n",
* other from happening. Later we could block
* resync for the duration via 'sync_action'...
*/
- if (raid_disks >= 0)
+ if (raid_disks > 0) {
array.raid_disks = raid_disks;
- if (size >= 0)
+ if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+ }
+ if (size >= 0) {
array.size = size;
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
- fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
- devname, strerror(errno));
- return 1;
+ if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
}
return 0;
case 4:
case 5:
case 6:
- st = super_by_version(array.major_version,
- array.minor_version);
- /* size can be changed independantly.
+ st = super_by_fd(fd);
+
+ /* size can be changed independently.
* layout/chunksize/raid_disks/level can be changed
* though the kernel may not support it all.
* If 'suspend_lo' is not present in devfs, then
last_block = nstripe * ndata;
ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
}
- printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
+ fprintf(stderr, Name ": Need to backup %lluK of critical "
+ "section..\n", last_block/2);
sra = sysfs_read(fd, 0,
- GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+ GET_CACHE);
if (!sra) {
fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
devname);
devname);
return 1;
}
- if (sra->spares == 0) {
- fprintf(stderr, Name ": %s: Cannot grow - need a spare to backup critical section\n",
+ if (sra->array.spare_disks == 0 && backup_file == NULL) {
+ fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
devname);
return 1;
}
- nrdisks = array.nr_disks + sra->spares;
+ nrdisks = array.nr_disks + sra->array.spare_disks;
/* Now we need to open all these devices so we can read/write.
*/
- fdlist = malloc(nrdisks * sizeof(int));
- offsets = malloc(nrdisks * sizeof(offsets[0]));
+ fdlist = malloc((1+nrdisks) * sizeof(int));
+ offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
if (!fdlist || !offsets) {
fprintf(stderr, Name ": malloc failed: grow aborted\n");
return 1;
}
- for (d=0; d< nrdisks; d++)
+ for (d=0; d <= nrdisks; d++)
fdlist[d] = -1;
d = array.raid_disks;
for (sd = sra->devs; sd; sd=sd->next) {
- if (sd->state & (1<<MD_DISK_FAULTY))
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
continue;
- if (sd->state & (1<<MD_DISK_SYNC)) {
- char *dn = map_dev(sd->major, sd->minor);
- fdlist[sd->role] = open(dn, O_RDONLY);
- offsets[sd->role] = sd->offset;
- if (fdlist[sd->role] < 0) {
+ if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+ char *dn = map_dev(sd->disk.major,
+ sd->disk.minor, 1);
+ fdlist[sd->disk.raid_disk]
+ = dev_open(dn, O_RDONLY);
+ offsets[sd->disk.raid_disk] = sd->data_offset;
+ if (fdlist[sd->disk.raid_disk] < 0) {
fprintf(stderr, Name ": %s: cannot open component %s\n",
- devname, dn);
+ devname, dn?dn:"-unknown-");
goto abort;
}
} else {
/* spare */
- char *dn = map_dev(sd->major, sd->minor);
- fdlist[d] = open(dn, O_RDWR);
- offsets[d] = sd->offset;
+ char *dn = map_dev(sd->disk.major,
+ sd->disk.minor, 1);
+ fdlist[d] = dev_open(dn, O_RDWR);
+ offsets[d] = sd->data_offset;
if (fdlist[d]<0) {
fprintf(stderr, Name ": %s: cannot open component %s\n",
- devname, dn);
+ devname, dn?dn:"-unknown");
goto abort;
}
d++;
" --grow aborted\n", devname, i);
goto abort;
}
+ spares = sra->array.spare_disks;
+ if (backup_file) {
+ fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, S_IRUSR | S_IWUSR);
+ if (fdlist[d] < 0) {
+ fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ goto abort;
+ }
+ offsets[d] = 8;
+ d++;
+ spares++;
+ }
if (fdlist[array.raid_disks] < 0) {
- fprintf(stderr, Name ": %s: failed to find a spare - --grow aborted\n",
+ fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
devname);
goto abort;
}
/* Find a superblock */
- if (st->ss->load_super(st, fdlist[0], &super, NULL)) {
+ if (st->ss->load_super(st, fdlist[0], NULL)) {
fprintf(stderr, Name ": %s: Cannot find a superblock\n",
devname);
goto abort;
}
- spares = sra->spares;
- /* Decide offset for the backup and llseek the spares */
+ memcpy(bsb.magic, "md_backup_data-1", 16);
+ st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
+ bsb.mtime = __cpu_to_le64(time(0));
+ bsb.arraystart = 0;
+ bsb.length = __cpu_to_le64(last_block);
+
+ /* Decide offset for the backup, llseek the spares, and write
+ * a leading superblock 4K earlier.
+ */
for (i=array.raid_disks; i<d; i++) {
- offsets[i] += sra->component_size - last_block - 8;
- if (lseek64(fdlist[i], offsets[i]<<9, 0) != offsets[i]<<9) {
+ char buf[4096];
+ if (i==d-1 && backup_file) {
+ /* This is the backup file */
+ offsets[i] = 8;
+ } else
+ offsets[i] += sra->component_size - last_block - 8;
+ if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
+ != (offsets[i]<<9) - 4096) {
fprintf(stderr, Name ": could not seek...\n");
goto abort;
}
+ memset(buf, 0, sizeof(buf));
+ bsb.devstart = __cpu_to_le64(offsets[i]);
+ bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
+ memcpy(buf, &bsb, sizeof(bsb));
+ if (write(fdlist[i], buf, 4096) != 4096) {
+ fprintf(stderr, Name ": could not write leading superblock\n");
+ goto abort;
+ }
}
array.level = nlevel;
array.raid_disks = ndisks;
array.chunk_size = nchunk;
array.layout = nlayout;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ if (errno == ENOSPC) {
+ /* stripe cache is not big enough.
+ * It needs to be 4 times chunksize_size,
+ * and we assume pagesize is 4K
+ */
+ if (sra->cache_size < 4 * (nchunk/4096)) {
+ sysfs_set_num(sra, NULL,
+ "stripe_cache_size",
+ 4 * (nchunk/4096) +1);
+ if (ioctl(fd, SET_ARRAY_INFO,
+ &array) == 0)
+ goto ok;
+ }
+ }
fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
devname, strerror(errno));
goto abort;
}
+ ok: ;
/* suspend the relevant region */
sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
err = save_stripes(fdlist, offsets,
odisks, ochunk, olevel, olayout,
spares, fdlist+odisks,
- 0ULL, nstripe*512);
+ 0ULL, last_block*512);
/* abort if there was an error */
if (err < 0) {
devname);
goto abort_resume;
}
- /* FIXME write superblocks */
- memcpy(bsb.magic, "md_backup_data-1", 16);
- st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
- bsb.mtime = __cpu_to_le64(time(0));
- bsb.arraystart = 0;
- bsb.length = __cpu_to_le64(last_block);
+
for (i=odisks; i<d ; i++) {
bsb.devstart = __cpu_to_le64(offsets[i]);
bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
- write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb)) {
+ write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
+ fsync(fdlist[i]) != 0) {
fprintf(stderr, Name ": %s: fail to save metadata for critical region backups.\n",
devname);
goto abort_resume;
break;
sleep(1);
}
-
+
/* invalidate superblocks */
memset(&bsb, 0, sizeof(bsb));
for (i=odisks; i<d ; i++) {
lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
- write(fdlist[i], &bsb, sizeof(bsb));
+ if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
+ fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
+ devname, i);
+ }
}
/* unsuspend. */
close(fdlist[i]);
free(fdlist);
free(offsets);
+ if (backup_file)
+ unlink(backup_file);
- printf(Name ": ... critical section passed.\n");
+ fprintf(stderr, Name ": ... critical section passed.\n");
break;
}
return 0;
close(fdlist[i]);
free(fdlist);
free(offsets);
+ if (backup_file)
+ unlink(backup_file);
return 1;
}
* write that data into the array and update the super blocks with
* the new reshape_progress
*/
-int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt)
+int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
{
int i, j;
int old_disks;
- int err = 0;
unsigned long long *offsets;
+ unsigned long long nstripe, ostripe, last_block;
+ int ndata, odata;
if (info->delta_disks < 0)
return 1; /* cannot handle a shrink */
old_disks = info->array.raid_disks - info->delta_disks;
- for (i=old_disks; i<cnt; i++) {
- void *super = NULL;
+ for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
struct mdinfo dinfo;
- struct mddev_ident_s id;
struct mdp_backup_super bsb;
+ char buf[4096];
+ int fd;
/* This was a spare and may have some saved data on it.
* Load the superblock, find and load the
* If the backup contains no new info, just return
* else restore data and update all superblocks
*/
- if (fdlist[i] < 0)
- continue;
- if (st->ss->load_super(st, fdlist[i], &super, NULL))
- continue;
+ if (i == old_disks-1) {
+ fd = open(backup_file, O_RDONLY);
+ if (fd<0)
+ continue;
+ } else {
+ fd = fdlist[i];
+ if (fd < 0)
+ continue;
+ if (st->ss->load_super(st, fd, NULL))
+ continue;
- st->ss->getinfo_super(&dinfo, &id, super);
- free(super); super = NULL;
- if (lseek64(fdlist[i],
- (dinfo.data_offset + dinfo.component_size - 8) <<9,
- 0) < 0)
- continue; /* Cannot seek */
- if (read(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb))
+ st->ss->getinfo_super(st, &dinfo);
+ st->ss->free_super(st);
+
+ if (lseek64(fd,
+ (dinfo.data_offset + dinfo.component_size - 8) <<9,
+ 0) < 0)
+ continue; /* Cannot seek */
+ }
+ if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
continue; /* Cannot read */
if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
continue;
info->reshape_progress)
continue; /* No new data here */
- if (lseek64(fdlist[i], __le64_to_cpu(bsb.devstart)*512, 0)< 0)
+ if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
continue; /* Cannot seek */
+ /* There should be a duplicate backup superblock 4k before here */
+ if (lseek64(fd, -4096, 1) < 0 ||
+ read(fd, buf, 4096) != 4096 ||
+ memcmp(buf, &bsb, sizeof(bsb)) != 0)
+ continue; /* Cannot find leading superblock */
/* Now need the data offsets for all devices. */
offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
for(j=0; j<info->array.raid_disks; j++) {
if (fdlist[j] < 0)
continue;
- if (st->ss->load_super(st, fdlist[j], &super, NULL))
+ if (st->ss->load_super(st, fdlist[j], NULL))
/* FIXME should be this be an error */
continue;
- st->ss->getinfo_super(&dinfo, &id, super);
- free(super); super = NULL;
+ st->ss->getinfo_super(st, &dinfo);
+ st->ss->free_super(st);
offsets[j] = dinfo.data_offset;
}
printf(Name ": restoring critical section\n");
info->new_chunk,
info->new_level,
info->new_layout,
- fdlist[i], __le64_to_cpu(bsb.devstart)*512,
+ fd, __le64_to_cpu(bsb.devstart)*512,
0, __le64_to_cpu(bsb.length)*512)) {
/* didn't succeed, so giveup */
- return -1;
+ return 1;
}
/* Ok, so the data is restored. Let's update those superblocks. */
for (j=0; j<info->array.raid_disks; j++) {
if (fdlist[j] < 0) continue;
- if (st->ss->load_super(st, fdlist[j], &super, NULL))
+ if (st->ss->load_super(st, fdlist[j], NULL))
continue;
- st->ss->getinfo_super(&dinfo, &id, super);
+ st->ss->getinfo_super(st, &dinfo);
dinfo.reshape_progress = __le64_to_cpu(bsb.length);
- st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0);
- st->ss->store_super(st, fdlist[j], super);
- free(super);
+ st->ss->update_super(st, &dinfo,
+ "_reshape_progress",
+ NULL,0, 0, NULL);
+ st->ss->store_super(st, fdlist[j]);
+ st->ss->free_super(st);
}
/* And we are done! */
return 0;
}
- return err;
+ /* Didn't find any backup data, try to see if any
+ * was needed.
+ */
+ nstripe = ostripe = 0;
+ odata = info->array.raid_disks - info->delta_disks - 1;
+ if (info->array.level == 6) odata--; /* number of data disks */
+ ndata = info->array.raid_disks - 1;
+ if (info->new_level == 6) ndata--;
+ last_block = 0;
+ while (nstripe >= ostripe) {
+ nstripe += info->new_chunk / 512;
+ last_block = nstripe * ndata;
+ ostripe = last_block / odata / (info->array.chunk_size/512) *
+ (info->array.chunk_size/512);
+ }
+
+ if (info->reshape_progress >= last_block)
+ return 0;
+ /* needed to recover critical section! */
+ return 1;
}