X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Grow.c;h=a33a2802ea3b716fd229f020d98537762e456f5a;hb=db20d4135e0db3830f0bbf99b81922b902628214;hp=36c35f9c71c65b26bf544a1bd5ee11326fa824b9;hpb=14e5b4d72b4db55e688581d98ec47131554f747c;p=thirdparty%2Fmdadm.git diff --git a/Grow.c b/Grow.c index 36c35f9c..a33a2802 100644 --- a/Grow.c +++ b/Grow.c @@ -51,33 +51,41 @@ int Grow_Add_device(char *devname, int fd, char *newdev) int nfd, fd2; int d, nd; struct supertype *st = NULL; - + char *subarray = NULL; if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) { fprintf(stderr, Name ": cannot get array info for %s\n", devname); return 1; } - st = super_by_fd(fd); + if (info.array.level != -1) { + fprintf(stderr, Name ": can only add devices to linear arrays\n"); + return 1; + } + + st = super_by_fd(fd, &subarray); if (!st) { fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version); return 1; } - if (info.array.level != -1) { - fprintf(stderr, Name ": can only add devices to linear arrays\n"); - return 1; + if (subarray) { + fprintf(stderr, Name ": Cannot grow linear sub-arrays yet\n"); + free(subarray); + free(st); } nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT); if (nfd < 0) { fprintf(stderr, Name ": cannot open %s\n", newdev); + free(st); return 1; } fstat(nfd, &stb); if ((stb.st_mode & S_IFMT) != S_IFBLK) { fprintf(stderr, Name ": %s is not a block device!\n", newdev); close(nfd); + free(st); return 1; } /* now check out all the devices and make sure we can read the superblock */ @@ -85,28 +93,37 @@ int Grow_Add_device(char *devname, int fd, char *newdev) mdu_disk_info_t disk; char *dv; + st->ss->free_super(st); + disk.number = d; if (ioctl(fd, GET_DISK_INFO, &disk) < 0) { fprintf(stderr, Name ": cannot get device detail for device %d\n", d); + close(nfd); + free(st); return 1; } dv = map_dev(disk.major, disk.minor, 1); if (!dv) { fprintf(stderr, Name ": cannot find device file for device %d\n", d); + close(nfd); + free(st); return 1; } fd2 = dev_open(dv, O_RDWR); if (!fd2) { fprintf(stderr, Name ": cannot open device file %s\n", dv); + close(nfd); + free(st); return 1; } - st->ss->free_super(st); if (st->ss->load_super(st, fd2, NULL)) { fprintf(stderr, Name ": cannot find super block on %s\n", dv); + close(nfd); close(fd2); + free(st); return 1; } close(fd2); @@ -204,6 +221,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int mdu_bitmap_file_t bmf; mdu_array_info_t array; struct supertype *st; + char *subarray = NULL; int major = BITMAP_MAJOR_HI; int vers = md_get_version(fd); unsigned long long bitmapsize, array_size; @@ -253,6 +271,11 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int devname); return 1; } + + if (strcmp(file, "none") == 0) { + fprintf(stderr, Name ": no bitmap found on %s\n", devname); + return 1; + } if (array.level <= 0) { fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n", map_num(pers, array.level)?:"of this array"); @@ -277,17 +300,25 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int bitmapsize = bitmapsize * array.raid_disks / ncopies; } - st = super_by_fd(fd); + st = super_by_fd(fd, &subarray); if (!st) { fprintf(stderr, Name ": Cannot understand version %d.%d\n", array.major_version, array.minor_version); return 1; } - if (strcmp(file, "none") == 0) { - fprintf(stderr, Name ": no bitmap found on %s\n", devname); + if (subarray) { + fprintf(stderr, Name ": Cannot add bitmaps to sub-arrays yet\n"); + free(subarray); + free(st); return 1; - } else if (strcmp(file, "internal") == 0) { + } + if (strcmp(file, "internal") == 0) { int d; + if (st->ss->add_internal_bitmap == NULL) { + fprintf(stderr, Name ": Internal bitmaps not supported " + "with %s metadata\n", st->ss->name); + return 1; + } for (d=0; d< st->max_devs; d++) { mdu_disk_info_t disk; char *dv; @@ -381,7 +412,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int /* * When reshaping an array we might need to backup some data. * This is written to all spares with a 'super_block' describing it. - * The superblock goes 1K form the end of the used space on the + * The superblock goes 4K from the end of the used space on the * device. * It if written after the backup is complete. * It has the following structure. @@ -404,7 +435,7 @@ static struct mdp_backup_super { __u8 pad[512-68-32]; } __attribute__((aligned(512))) bsb, bsb2; -int bsb_csum(char *buf, int len) +__u32 bsb_csum(char *buf, int len) { int i; int csum = 0; @@ -496,11 +527,12 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, char *c; int rv = 0; struct supertype *st; + char *subarray = NULL; int nchunk, ochunk; int nlayout, olayout; int ndisks, odisks; - int ndata, odata; + unsigned int ndata, odata; int orig_level = UnSet; char alt_layout[40]; int *fdlist; @@ -510,7 +542,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, int err; int frozen; unsigned long a,b, blocks, stripes; - int cache; + unsigned long cache; unsigned long long array_size; int changed = 0; int done; @@ -541,7 +573,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, return 1; } sra = sysfs_read(fd, 0, GET_LEVEL); - frozen = freeze_array(sra); + if (sra) + frozen = freeze_array(sra); + else { + fprintf(stderr, Name ": failed to read sysfs parameters for %s\n", + devname); + return 1; + } if (frozen < 0) { fprintf(stderr, Name ": %s is performing resync/recovery and cannot" " be reshaped\n", devname); @@ -563,16 +601,27 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, } else rv = ioctl(fd, SET_ARRAY_INFO, &array); if (rv != 0) { + int err = errno; fprintf(stderr, Name ": Cannot set device size for %s: %s\n", - devname, strerror(errno)); + devname, strerror(err)); + if (err == EBUSY && + (array.state & (1<= sra->component_size/2) { fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n", devname); rv = 1; break; } - nrdisks = array.nr_disks + sra->array.spare_disks; + nrdisks = array.raid_disks + sra->array.spare_disks; /* Now we need to open all these devices so we can read/write. */ fdlist = malloc((1+nrdisks) * sizeof(int)); @@ -982,7 +1043,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, char *dn = map_dev(sd->disk.major, sd->disk.minor, 1); fdlist[d] = dev_open(dn, O_RDWR); - offsets[d] = (sra->component_size - blocks - 8)*512; + offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512; if (fdlist[d]<0) { fprintf(stderr, Name ": %s: cannot open component %s\n", devname, dn?dn:"-unknown"); @@ -1024,7 +1085,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, break; } memset(buf, 0, 512); - for (i=0; i < blocks + 1 ; i++) { + for (i=0; i < (signed)blocks + 1 ; i++) { if (write(fdlist[d], buf, 512) != 512) { fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n", devname, backup_file, strerror(errno)); @@ -1060,12 +1121,16 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, if (ochunk == nchunk && olayout == nlayout) { array.raid_disks = ndisks; if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + int err = errno; rv = 1; fprintf(stderr, Name ": Cannot set device shape for %s: %s\n", devname, strerror(errno)); if (ndisks < odisks && get_linux_version() < 2006030) fprintf(stderr, Name ": linux 2.6.30 or later required\n"); + if (err == EBUSY && + (array.state & (1<= 4) odata--; if (level == 6) odata--; - sysfs_set_num(sra, NULL, "suspend_hi", (offset + stripes * chunk/512) * odata); + sysfs_set_num(sra, NULL, "suspend_hi", (offset + stripes * (chunk/512)) * odata); /* Check that array hasn't become degraded, else we might backup the wrong data */ - sysfs_get_ll(sra, NULL, "degraded", &new_degraded); + sysfs_get_ll(sra, NULL, "degraded", &ll); + new_degraded = (int)ll; if (new_degraded != *degraded) { /* check each device to ensure it is still working */ struct mdinfo *sd; @@ -1276,10 +1351,10 @@ int grow_backup(struct mdinfo *sra, } if (part) { bsb.arraystart2 = __cpu_to_le64(offset * odata); - bsb.length2 = __cpu_to_le64(stripes * chunk/512 * odata); + bsb.length2 = __cpu_to_le64(stripes * (chunk/512) * odata); } else { bsb.arraystart = __cpu_to_le64(offset * odata); - bsb.length = __cpu_to_le64(stripes * chunk/512 * odata); + bsb.length = __cpu_to_le64(stripes * (chunk/512) * odata); } if (part) bsb.magic[15] = '2'; @@ -1306,12 +1381,24 @@ int grow_backup(struct mdinfo *sra, bsb.sb_csum2 = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum2)-((char*)&bsb)); - lseek64(destfd[i], destoffsets[i] - 4096, 0); - write(destfd[i], &bsb, 512); + rv = -1; + if ((unsigned long long)lseek64(destfd[i], destoffsets[i] - 4096, 0) + != destoffsets[i] - 4096) + break; + if (write(destfd[i], &bsb, 512) != 512) + break; + if (destoffsets[i] > 4096) { + if ((unsigned long long)lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0) != + destoffsets[i]+stripes*chunk*odata) + break; + if (write(destfd[i], &bsb, 512) != 512) + break; + } fsync(destfd[i]); + rv = 0; } - return 0; + return rv; } /* in 2.6.30, the value reported by sync_completed can be @@ -1324,6 +1411,7 @@ int grow_backup(struct mdinfo *sra, * The various caller give appropriate values so that * every works. */ +/* FIXME return value is often ignored */ int wait_backup(struct mdinfo *sra, unsigned long long offset, /* per device */ unsigned long long blocks, /* per device */ @@ -1337,6 +1425,7 @@ int wait_backup(struct mdinfo *sra, int fd = sysfs_get_fd(sra, NULL, "sync_completed"); unsigned long long completed; int i; + int rv; if (fd < 0) return -1; @@ -1368,28 +1457,34 @@ int wait_backup(struct mdinfo *sra, bsb.length = __cpu_to_le64(0); } bsb.mtime = __cpu_to_le64(time(0)); + rv = 0; for (i = 0; i < dests; i++) { bsb.devstart = __cpu_to_le64(destoffsets[i]/512); bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)); if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0) bsb.sb_csum2 = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum2)-((char*)&bsb)); - lseek64(destfd[i], destoffsets[i]-4096, 0); - write(destfd[i], &bsb, 512); + if ((unsigned long long)lseek64(destfd[i], destoffsets[i]-4096, 0) != + destoffsets[i]-4096) + rv = -1; + if (rv == 0 && + write(destfd[i], &bsb, 512) != 512) + rv = -1; fsync(destfd[i]); } - return 0; + return rv; } static void fail(char *msg) { - write(2, msg, strlen(msg)); - write(2, "\n", 1); - exit(1); + int rv; + rv = (write(2, msg, strlen(msg)) != (int)strlen(msg)); + rv |= (write(2, "\n", 1) != 1); + exit(rv ? 1 : 2); } static char *abuf, *bbuf; -static int abuflen; +static unsigned long long abuflen; static void validate(int afd, int bfd, unsigned long long offset) { /* check that the data in the backup against the array. @@ -1421,27 +1516,33 @@ static void validate(int afd, int bfd, unsigned long long offset) free(abuf); free(bbuf); abuflen = len; - posix_memalign((void**)&abuf, 4096, abuflen); - posix_memalign((void**)&bbuf, 4096, abuflen); + if (posix_memalign((void**)&abuf, 4096, abuflen) || + posix_memalign((void**)&bbuf, 4096, abuflen)) { + abuflen = 0; + /* just stop validating on mem-alloc failure */ + return; + } } lseek64(bfd, offset, 0); - if (read(bfd, bbuf, len) != len) { - printf("len %llu\n", len); + if ((unsigned long long)read(bfd, bbuf, len) != len) { + //printf("len %llu\n", len); fail("read first backup failed"); } lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0); - if (read(afd, abuf, len) != len) + if ((unsigned long long)read(afd, abuf, len) != len) fail("read first from array failed"); if (memcmp(bbuf, abuf, len) != 0) { + #if 0 int i; printf("offset=%llu len=%llu\n", - __le64_to_cpu(bsb2.arraystart)*512, len); + (unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len); for (i=0; icomponent_size - stripes * chunk/512; + if (posix_memalign((void**)&buf, 4096, disks * chunk)) + return 0; + start = sra->component_size - stripes * (chunk/512); sysfs_set_num(sra, NULL, "sync_max", start); sysfs_set_str(sra, NULL, "sync_action", "reshape"); sysfs_set_num(sra, NULL, "suspend_lo", 0); sysfs_set_num(sra, NULL, "suspend_hi", 0); - rv = wait_backup(sra, 0, start - stripes * chunk/512, stripes * chunk/512, + rv = wait_backup(sra, 0, start - stripes * (chunk/512), stripes * (chunk/512), dests, destfd, destoffsets, 0); if (rv < 0) return 0; @@ -1519,9 +1623,9 @@ static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes, dests, destfd, destoffsets, 0, °raded, buf); validate(afd, destfd[0], destoffsets[0]); - wait_backup(sra, start, stripes*chunk/512, 0, + wait_backup(sra, start, stripes*(chunk/512), 0, dests, destfd, destoffsets, 0); - sysfs_set_num(sra, NULL, "suspend_lo", (stripes * chunk/512) * data); + sysfs_set_num(sra, NULL, "suspend_lo", (stripes * (chunk/512)) * data); free(buf); /* FIXME this should probably be numeric */ sysfs_set_str(sra, NULL, "sync_max", "max"); @@ -1542,7 +1646,8 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes, int degraded = 0; - posix_memalign((void**)&buf, 4096, disks * chunk); + if (posix_memalign((void**)&buf, 4096, disks * chunk)) + return 0; sysfs_set_num(sra, NULL, "suspend_lo", 0); sysfs_set_num(sra, NULL, "suspend_hi", 0); @@ -1555,7 +1660,7 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes, disks, chunk, level, layout, dests, destfd, destoffsets, 0, °raded, buf); - grow_backup(sra, (start + stripes) * chunk/512, stripes, + grow_backup(sra, (start + stripes) * (chunk/512), stripes, fds, offsets, disks, chunk, level, layout, dests, destfd, destoffsets, @@ -1565,16 +1670,16 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes, start += stripes * 2; /* where to read next */ size = sra->component_size / (chunk/512); while (start < size) { - if (wait_backup(sra, (start-stripes*2)*chunk/512, - stripes*chunk/512, 0, + if (wait_backup(sra, (start-stripes*2)*(chunk/512), + stripes*(chunk/512), 0, dests, destfd, destoffsets, part) < 0) return 0; - sysfs_set_num(sra, NULL, "suspend_lo", start*chunk/512 * data); + sysfs_set_num(sra, NULL, "suspend_lo", start*(chunk/512) * data); if (start + stripes > size) tailstripes = (size - start); - grow_backup(sra, start*chunk/512, tailstripes, + grow_backup(sra, start*(chunk/512), tailstripes, fds, offsets, disks, chunk, level, layout, dests, destfd, destoffsets, @@ -1583,15 +1688,15 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes, part = 1 - part; validate(afd, destfd[0], destoffsets[0]); } - if (wait_backup(sra, (start-stripes*2) * chunk/512, stripes * chunk/512, 0, + if (wait_backup(sra, (start-stripes*2) * (chunk/512), stripes * (chunk/512), 0, dests, destfd, destoffsets, part) < 0) return 0; - sysfs_set_num(sra, NULL, "suspend_lo", ((start-stripes)*chunk/512) * data); - wait_backup(sra, (start-stripes) * chunk/512, tailstripes * chunk/512, 0, + sysfs_set_num(sra, NULL, "suspend_lo", ((start-stripes)*(chunk/512)) * data); + wait_backup(sra, (start-stripes) * (chunk/512), tailstripes * (chunk/512), 0, dests, destfd, destoffsets, 1-part); - sysfs_set_num(sra, NULL, "suspend_lo", (size*chunk/512) * data); + sysfs_set_num(sra, NULL, "suspend_lo", (size*(chunk/512)) * data); sysfs_set_num(sra, NULL, "sync_speed_min", speed); free(buf); return 1; @@ -1628,7 +1733,6 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt old_disks = cnt; for (i=old_disks-(backup_file?1:0); iss->load_super(st, fd, NULL)) continue; - st->ss->getinfo_super(st, &dinfo); + st->ss->getinfo_super(st, &dinfo, NULL); st->ss->free_super(st); if (lseek64(fd, @@ -1695,11 +1799,23 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt continue; /* Wrong uuid */ } - if (info->array.utime > __le64_to_cpu(bsb.mtime) + 10*60 || - info->array.utime < __le64_to_cpu(bsb.mtime) - 10*60) { - if (verbose) - fprintf(stderr, Name ": too-old timestamp on backup-metadata on %s\n", devname); - continue; /* time stamp is too bad */ + /* array utime and backup-mtime should be updated at much the same time, but it seems that + * sometimes they aren't... So allow considerable flexability in matching, and allow + * this test to be overridden by an environment variable. + */ + if (info->array.utime > (int)__le64_to_cpu(bsb.mtime) + 2*60*60 || + info->array.utime < (int)__le64_to_cpu(bsb.mtime) - 10*60) { + if (check_env("MDADM_GROW_ALLOW_OLD")) { + fprintf(stderr, Name ": accepting backup with timestamp %lu " + "for array with timestamp %lu\n", + (unsigned long)__le64_to_cpu(bsb.mtime), + (unsigned long)info->array.utime); + } else { + if (verbose) + fprintf(stderr, Name ": too-old timestamp on " + "backup-metadata on %s\n", devname); + continue; /* time stamp is too bad */ + } } if (bsb.magic[15] == '1') { @@ -1744,13 +1860,13 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt } /* There should be a duplicate backup superblock 4k before here */ if (lseek64(fd, -4096, 1) < 0 || - read(fd, buf, 4096) != 4096) + read(fd, &bsb2, sizeof(bsb2)) != sizeof(bsb2)) goto second_fail; /* Cannot find leading superblock */ if (bsb.magic[15] == '1') bsbsize = offsetof(struct mdp_backup_super, pad1); else bsbsize = offsetof(struct mdp_backup_super, pad); - if (memcmp(buf, &bsb, bsbsize) != 0) + if (memcmp(&bsb2, &bsb, bsbsize) != 0) goto second_fail; /* Cannot find leading superblock */ /* Now need the data offsets for all devices. */ @@ -1761,7 +1877,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt if (st->ss->load_super(st, fdlist[j], NULL)) /* FIXME should be this be an error */ continue; - st->ss->getinfo_super(st, &dinfo); + st->ss->getinfo_super(st, &dinfo, NULL); st->ss->free_super(st); offsets[j] = dinfo.data_offset * 512; } @@ -1823,7 +1939,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt if (fdlist[j] < 0) continue; if (st->ss->load_super(st, fdlist[j], NULL)) continue; - st->ss->getinfo_super(st, &dinfo); + st->ss->getinfo_super(st, &dinfo, NULL); dinfo.reshape_progress = info->reshape_progress; st->ss->update_super(st, &dinfo, "_reshape_progress", @@ -1897,7 +2013,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, int d; struct mdinfo *sra, *sd; int rv; - int cache; + unsigned long cache; int done = 0; err = sysfs_set_str(info, NULL, "array_state", "readonly"); @@ -1907,7 +2023,13 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, /* make sure reshape doesn't progress until we are ready */ sysfs_set_str(info, NULL, "sync_max", "0"); sysfs_set_str(info, NULL, "array_state", "active"); /* FIXME or clean */ - + + sra = sysfs_read(-1, devname2devnum(info->sys_name), + GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| + GET_CACHE); + if (!sra) + return 1; + /* ndisks is not growing, so raid_disks is old and +delta is new */ odisks = info->array.raid_disks; ndisks = odisks + info->delta_disks; @@ -1920,9 +2042,8 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, ochunk = info->array.chunk_size; nchunk = info->new_chunk; - - a = ochunk/512 * odata; - b = nchunk/512 * ndata; + a = (ochunk/512) * odata; + b = (nchunk/512) * ndata; /* Find GCD */ while (a != b) { if (a < b) @@ -1931,12 +2052,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, a -= b; } /* LCM == product / GCD */ - blocks = ochunk/512 * nchunk/512 * odata * ndata / a; - - sra = sysfs_read(-1, devname2devnum(info->sys_name), - GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| - GET_CACHE); - + blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a; if (ndata == odata) while (blocks * 32 < sra->component_size &&