X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=Grow.c;h=de53df18559c69fabcabe2f8daa5d627b594e6a1;hp=623daf3fe910ec08c3fb16e1f203eaca9f6a6b68;hb=16c6fa807c1e43a1a12bbeb7668b4166d14b2cc8;hpb=34163fc7cfa4346ca7f2ca08039e0c5a3cf901ab diff --git a/Grow.c b/Grow.c index 623daf3f..de53df18 100644 --- a/Grow.c +++ b/Grow.c @@ -92,13 +92,13 @@ int Grow_Add_device(char *devname, int fd, char *newdev) d); return 1; } - dv = map_dev(disk.major, disk.minor); + dv = map_dev(disk.major, disk.minor, 1); if (!dv) { fprintf(stderr, Name ": cannot find device file for device %d\n", d); return 1; } - fd2 = open(dv, O_RDWR); + fd2 = dev_open(dv, O_RDWR); if (!fd2) { fprintf(stderr, Name ": cannot open device file %s\n", dv); return 1; @@ -154,13 +154,13 @@ int Grow_Add_device(char *devname, int fd, char *newdev) d); return 1; } - dv = map_dev(disk.major, disk.minor); + dv = map_dev(disk.major, disk.minor, 1); if (!dv) { fprintf(stderr, Name ": cannot find device file for device %d\n", d); return 1; } - fd2 = open(dv, O_RDWR); + fd2 = dev_open(dv, O_RDWR); if (fd2 < 0) { fprintf(stderr, Name ": cannot open device file %s\n", dv); return 1; @@ -192,7 +192,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev) return 0; } -int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind) +int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force) { /* * First check that array doesn't have a bitmap @@ -206,9 +206,20 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int mdu_bitmap_file_t bmf; mdu_array_info_t array; struct supertype *st; + int major = BITMAP_MAJOR_HI; + int vers = md_get_version(fd); + unsigned long long bitmapsize, array_size; + + if (vers < 9003) { + major = BITMAP_MAJOR_HOSTENDIAN; +#ifdef __BIG_ENDIAN + fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n" + " between different architectured. Consider upgrading the Linux kernel.\n"); +#endif + } if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) { - if (errno == ENOMEM) + if (errno == ENOMEM) fprintf(stderr, Name ": Memory allocation failure.\n"); else fprintf(stderr, Name ": bitmaps not supported by this kernel.\n"); @@ -244,6 +255,27 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int devname); return 1; } + bitmapsize = array.size; + bitmapsize <<= 1; +#ifdef BLKGETSIZE64 + if (ioctl(fd, BLKGETSIZE64, &array_size) == 0 && + array_size > (0x7fffffffULL<<9)) { + /* Array is big enough that we cannot trust array.size + * try other approaches + */ + bitmapsize = get_component_size(fd); + } +#endif + if (bitmapsize == 0) { + fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n"); + return 1; + } + + if (array.level == 10) { + int ncopies = (array.layout&255)*(array.layout>>8); + bitmapsize = bitmapsize * array.raid_disks / ncopies; + } + st = super_by_version(array.major_version, array.minor_version); if (!st) { fprintf(stderr, Name ": Cannot understand version %d.%d\n", @@ -266,17 +298,22 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int continue; if ((disk.state & (1<ss->load_super(st, fd2, &super, NULL)==0) { - st->ss->add_internal_bitmap(st, super, - chunk, delay, write_behind, - array.size); - st->ss->write_bitmap(st, fd2, super); + if (st->ss->add_internal_bitmap(st, super, + chunk, delay, write_behind, + bitmapsize, 0, major)) + st->ss->write_bitmap(st, fd2, super); + else { + fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n"); + close(fd2); + return 1; + } } close(fd2); } @@ -306,9 +343,9 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int if ((disk.major==0 && disk.minor==0) || (disk.state & (1<= 0 && st->ss->load_super(st, fd2, &super, NULL) == 0) { close(fd2); @@ -321,13 +358,13 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int fprintf(stderr, Name ": cannot find UUID for array!\n"); return 1; } - if (CreateBitmap(file, 0, (char*)uuid, chunk, - delay, write_behind, array.size*2ULL)) { + if (CreateBitmap(file, force, (char*)uuid, chunk, + delay, write_behind, bitmapsize, major)) { return 1; } bitmap_fd = open(file, O_RDWR); if (bitmap_fd < 0) { - fprintf(stderr, Name ": weird: %s cannot be openned\n", + fprintf(stderr, Name ": weird: %s cannot be opened\n", file); return 1; } @@ -341,4 +378,580 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int return 0; } + +/* + * When reshaping an array we might need to backup some data. + * This is written to all spares with a 'super_block' describing it. + * The superblock goes 1K form the end of the used space on the + * device. + * It if written after the backup is complete. + * It has the following structure. + */ + +struct mdp_backup_super { + char magic[16]; /* md_backup_data-1 */ + __u8 set_uuid[16]; + __u64 mtime; + /* start/sizes in 512byte sectors */ + __u64 devstart; + __u64 arraystart; + __u64 length; + __u32 sb_csum; /* csum of preceeding bytes. */ +}; + +int bsb_csum(char *buf, int len) +{ + int i; + int csum = 0; + for (i=0; i= 0) { + fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n", + devname); + return 1; + } + if (level != UnSet && level != LEVEL_FAULTY) { + fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n", + devname); + return 1; + } + if (chunksize || raid_disks) { + fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n", + devname); + return 1; + } + if (layout == UnSet) + return 0; /* nothing to do.... */ + + array.layout = layout; + if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + fprintf(stderr, Name ": Cannot set layout for %s: %s\n", + devname, strerror(errno)); + return 1; + } + if (!quiet) + printf("layout for %s set to %d\n", devname, array.layout); + return 0; + + case 1: /* raid_disks and size can each be changed. They are independant */ + + if (level != UnSet && level != 1) { + fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n", + devname); + return 1; + } + if (chunksize || layout != UnSet) { + fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n", + devname); + return 1; + } + + /* Each can trigger a resync/recovery which will block the + * other from happening. Later we could block + * resync for the duration via 'sync_action'... + */ + if (raid_disks >= 0) + array.raid_disks = raid_disks; + if (size >= 0) + array.size = size; + if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n", + devname, strerror(errno)); + return 1; + } + return 0; + + case 4: + case 5: + case 6: + st = super_by_version(array.major_version, + array.minor_version); + /* size can be changed independantly. + * layout/chunksize/raid_disks/level can be changed + * though the kernel may not support it all. + * If 'suspend_lo' is not present in devfs, then + * these cannot be changed. + */ + if (size >= 0) { + /* Cannot change other details as well.. */ + if (layout != UnSet || + chunksize != 0 || + raid_disks != 0 || + level != UnSet) { + fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n", + devname, c); + return 1; + } + array.size = size; + if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n", + devname, strerror(errno)); + return 1; + } + return 0; + } + /* Ok, just change the shape. This can be awkward. + * There are three possibilities. + * 1/ The array will shrink. We don't support this + * possibility. Maybe one day... + * 2/ The array will not change size. This is easy enough + * to do, but not reliably. If the process is aborted + * the array *will* be corrupted. So maybe we can allow + * this but only if the user is really certain. e.g. + * --really-risk-everything + * 3/ The array will grow. This can be reliably achieved. + * However the kernel's restripe routines will cheerfully + * overwrite some early data before it is safe. So we + * need to make a backup of the early parts of the array + * and be ready to restore it if rebuild aborts very early. + * + * We backup data by writing it to all spares (there must be + * at least 1, so even raid6->raid5 requires a spare to be + * present). + * + * So: we enumerate the devices in the array and + * make sure we can open all of them. + * Then we freeze the early part of the array and + * backup to the various spares. + * Then we request changes and start the reshape. + * Monitor progress until it has passed the danger zone. + * and finally invalidate the copied data and unfreeze the + * start of the array. + * + * Before we can do this we need to decide: + * - will the array grow? Just calculate size + * - how much needs to be saved: count stripes. + * - where to save data... good question. + * + */ + nlevel = olevel = array.level; + nchunk = ochunk = array.chunk_size; + nlayout = olayout = array.layout; + ndisks = odisks = array.raid_disks; + + if (level != UnSet) nlevel = level; + if (chunksize) nchunk = chunksize; + if (layout != UnSet) nlayout = layout; + if (raid_disks) ndisks = raid_disks; + + odata = odisks-1; + if (olevel == 6) odata--; /* number of data disks */ + ndata = ndisks-1; + if (nlevel == 6) ndata--; + + if (ndata < odata) { + fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n", + devname); + return 1; + } + if (ndata == odata) { + fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n", + devname); + return 1; + } + /* Well, it is growing... so how much do we need to backup. + * Need to backup a full number of new-stripes, such that the + * last one does not over-write any place that it would be read + * from + */ + nstripe = ostripe = 0; + while (nstripe >= ostripe) { + nstripe += nchunk/512; + last_block = nstripe * ndata; + ostripe = last_block / odata / (ochunk/512) * (ochunk/512); + } + printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2); + + sra = sysfs_read(fd, 0, + GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE); + if (!sra) { + fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n", + devname); + return 1; + } + + if (last_block >= sra->component_size/2) { + fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n", + devname); + return 1; + } + if (sra->spares == 0 && backup_file == NULL) { + fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n", + devname); + return 1; + } + + nrdisks = array.nr_disks + sra->spares; + /* Now we need to open all these devices so we can read/write. + */ + fdlist = malloc((1+nrdisks) * sizeof(int)); + offsets = malloc((1+nrdisks) * sizeof(offsets[0])); + if (!fdlist || !offsets) { + fprintf(stderr, Name ": malloc failed: grow aborted\n"); + return 1; + } + for (d=0; d <= nrdisks; d++) + fdlist[d] = -1; + d = array.raid_disks; + for (sd = sra->devs; sd; sd=sd->next) { + if (sd->state & (1<state & (1<major, sd->minor, 1); + fdlist[sd->role] = dev_open(dn, O_RDONLY); + offsets[sd->role] = sd->offset; + if (fdlist[sd->role] < 0) { + fprintf(stderr, Name ": %s: cannot open component %s\n", + devname, dn); + goto abort; + } + } else { + /* spare */ + char *dn = map_dev(sd->major, sd->minor, 1); + fdlist[d] = dev_open(dn, O_RDWR); + offsets[d] = sd->offset; + if (fdlist[d]<0) { + fprintf(stderr, Name ": %s: cannot open component %s\n", + devname, dn); + goto abort; + } + d++; + } + } + for (i=0 ; ispares; + if (backup_file) { + fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, 0600); + if (fdlist[d] < 0) { + fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n", + devname, backup_file, strerror(errno)); + goto abort; + } + offsets[d] = 8; + d++; + spares++; + } + if (fdlist[array.raid_disks] < 0) { + fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n", + devname); + goto abort; + } + + /* Find a superblock */ + if (st->ss->load_super(st, fdlist[0], &super, NULL)) { + fprintf(stderr, Name ": %s: Cannot find a superblock\n", + devname); + goto abort; + } + + + memcpy(bsb.magic, "md_backup_data-1", 16); + st->ss->uuid_from_super((int*)&bsb.set_uuid, super); + bsb.mtime = __cpu_to_le64(time(0)); + bsb.arraystart = 0; + bsb.length = __cpu_to_le64(last_block); + + /* Decide offset for the backup, llseek the spares, and write + * a leading superblock 4K earlier. + */ + for (i=array.raid_disks; icomponent_size - last_block - 8; + if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0) + != (offsets[i]<<9) - 4096) { + fprintf(stderr, Name ": could not seek...\n"); + goto abort; + } + memset(buf, 0, sizeof(buf)); + bsb.devstart = __cpu_to_le64(offsets[i]); + bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)); + memcpy(buf, &bsb, sizeof(bsb)); + if (write(fdlist[i], buf, 4096) != 4096) { + fprintf(stderr, Name ": could not write leading superblock\n"); + goto abort; + } + } + array.level = nlevel; + array.raid_disks = ndisks; + array.chunk_size = nchunk; + array.layout = nlayout; + if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n", + devname, strerror(errno)); + goto abort; + } + + /* suspend the relevant region */ + sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */ + if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 || + sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) { + fprintf(stderr, Name ": %s: failed to suspend device.\n", + devname); + goto abort_resume; + } + + + err = save_stripes(fdlist, offsets, + odisks, ochunk, olevel, olayout, + spares, fdlist+odisks, + 0ULL, last_block*512); + + /* abort if there was an error */ + if (err < 0) { + fprintf(stderr, Name ": %s: failed to save critical region\n", + devname); + goto abort_resume; + } + + for (i=odisks; i= nstripe) + break; + sleep(1); + } + /* invalidate superblocks */ + memset(&bsb, 0, sizeof(bsb)); + for (i=odisks; i= 0) + close(fdlist[i]); + free(fdlist); + free(offsets); + if (backup_file) + unlink(backup_file); + + printf(Name ": ... critical section passed.\n"); + break; + } + return 0; + + + abort_resume: + sysfs_set_num(sra, NULL, "suspend_lo", last_block); + abort: + for (i=0; i= 0) + close(fdlist[i]); + free(fdlist); + free(offsets); + if (backup_file) + unlink(backup_file); + return 1; + +} + +/* + * If any spare contains md_back_data-1 which is recent wrt mtime, + * write that data into the array and update the super blocks with + * the new reshape_progress + */ +int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file) +{ + int i, j; + int old_disks; + int err = 0; + unsigned long long *offsets; + + if (info->delta_disks < 0) + return 1; /* cannot handle a shrink */ + if (info->new_level != info->array.level || + info->new_layout != info->array.layout || + info->new_chunk != info->array.chunk_size) + return 1; /* Can only handle change in disks */ + + old_disks = info->array.raid_disks - info->delta_disks; + + for (i=old_disks-(backup_file?1:0); iss->load_super(st, fd, &super, NULL)) + continue; + + st->ss->getinfo_super(&dinfo, super); + free(super); super = NULL; + if (lseek64(fd, + (dinfo.data_offset + dinfo.component_size - 8) <<9, + 0) < 0) + continue; /* Cannot seek */ + } + if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb)) + continue; /* Cannot read */ + if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0) + continue; + if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb))) + continue; /* bad checksum */ + if (memcmp(bsb.set_uuid,info->uuid, 16) != 0) + continue; /* Wrong uuid */ + + if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 || + info->array.utime < __le64_to_cpu(bsb.mtime)) + continue; /* time stamp is too bad */ + + if (__le64_to_cpu(bsb.arraystart) != 0) + continue; /* Can only handle backup from start of array */ + if (__le64_to_cpu(bsb.length) < + info->reshape_progress) + continue; /* No new data here */ + + if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0) + continue; /* Cannot seek */ + /* There should be a duplicate backup superblock 4k before here */ + if (lseek64(fd, -4096, 1) < 0 || + read(fd, buf, 4096) != 4096 || + memcmp(buf, &bsb, sizeof(buf)) != 0) + continue; /* Cannot find leading superblock */ + + /* Now need the data offsets for all devices. */ + offsets = malloc(sizeof(*offsets)*info->array.raid_disks); + for(j=0; jarray.raid_disks; j++) { + if (fdlist[j] < 0) + continue; + if (st->ss->load_super(st, fdlist[j], &super, NULL)) + /* FIXME should be this be an error */ + continue; + st->ss->getinfo_super(&dinfo, super); + free(super); super = NULL; + offsets[j] = dinfo.data_offset; + } + printf(Name ": restoring critical section\n"); + + if (restore_stripes(fdlist, offsets, + info->array.raid_disks, + info->new_chunk, + info->new_level, + info->new_layout, + fd, __le64_to_cpu(bsb.devstart)*512, + 0, __le64_to_cpu(bsb.length)*512)) { + /* didn't succeed, so giveup */ + return -1; + } + + /* Ok, so the data is restored. Let's update those superblocks. */ + + for (j=0; jarray.raid_disks; j++) { + if (fdlist[j] < 0) continue; + if (st->ss->load_super(st, fdlist[j], &super, NULL)) + continue; + st->ss->getinfo_super(&dinfo, super); + dinfo.reshape_progress = __le64_to_cpu(bsb.length); + st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0); + st->ss->store_super(st, fdlist[j], super); + free(super); + } + + /* And we are done! */ + return 0; + } + return err; +}