X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Grow.c;h=06343a0512e7226a98f6dac8c0b1a38eedd7d8ed;hb=c61c4dd01df75d3a56809db19ec827203505830b;hp=27e0d7024c7404fac5da9c510f2057183d434c32;hpb=9ce510be9c71bae002a3b68ad138b164c908150a;p=thirdparty%2Fmdadm.git diff --git a/Grow.c b/Grow.c index 27e0d702..06343a05 100644 --- a/Grow.c +++ b/Grow.c @@ -288,6 +288,11 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int return 1; } else if (strcmp(file, "internal") == 0) { int d; + if (st->ss->add_internal_bitmap == NULL) { + fprintf(stderr, Name ": Internal bitmaps not supported " + "with %s metadata\n", st->ss->name); + return 1; + } for (d=0; d< st->max_devs; d++) { mdu_disk_info_t disk; char *dv; @@ -381,7 +386,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int /* * When reshaping an array we might need to backup some data. * This is written to all spares with a 'super_block' describing it. - * The superblock goes 1K form the end of the used space on the + * The superblock goes 4K from the end of the used space on the * device. * It if written after the backup is complete. * It has the following structure. @@ -541,7 +546,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, return 1; } sra = sysfs_read(fd, 0, GET_LEVEL); - frozen = freeze_array(sra); + if (sra) + frozen = freeze_array(sra); + else { + fprintf(stderr, Name ": failed to read sysfs parameters for %s\n", + devname); + return 1; + } if (frozen < 0) { fprintf(stderr, Name ": %s is performing resync/recovery and cannot" " be reshaped\n", devname); @@ -563,16 +574,27 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, } else rv = ioctl(fd, SET_ARRAY_INFO, &array); if (rv != 0) { + int err = errno; fprintf(stderr, Name ": Cannot set device size for %s: %s\n", - devname, strerror(errno)); + devname, strerror(err)); + if (err == EBUSY && + (array.state & (1<= sra->component_size/2) { fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n", devname); @@ -982,7 +1012,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, char *dn = map_dev(sd->disk.major, sd->disk.minor, 1); fdlist[d] = dev_open(dn, O_RDWR); - offsets[d] = (sra->component_size - blocks - 8)*512; + offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512; if (fdlist[d]<0) { fprintf(stderr, Name ": %s: cannot open component %s\n", devname, dn?dn:"-unknown"); @@ -1060,12 +1090,16 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, if (ochunk == nchunk && olayout == nlayout) { array.raid_disks = ndisks; if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + int err = errno; rv = 1; fprintf(stderr, Name ": Cannot set device shape for %s: %s\n", devname, strerror(errno)); if (ndisks < odisks && get_linux_version() < 2006030) fprintf(stderr, Name ": linux 2.6.30 or later required\n"); + if (err == EBUSY && + (array.state & (1< 4096) { + if (lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0) != + destoffsets[i]+stripes*chunk*odata) + rv = 1; + rv = rv ?: write(destfd[i], &bsb, 512); + } fsync(destfd[i]); } - return 0; + return rv; } /* in 2.6.30, the value reported by sync_completed can be @@ -1324,6 +1373,7 @@ int grow_backup(struct mdinfo *sra, * The various caller give appropriate values so that * every works. */ +/* FIXME return value is often ignored */ int wait_backup(struct mdinfo *sra, unsigned long long offset, /* per device */ unsigned long long blocks, /* per device */ @@ -1337,6 +1387,7 @@ int wait_backup(struct mdinfo *sra, int fd = sysfs_get_fd(sra, NULL, "sync_completed"); unsigned long long completed; int i; + int rv; if (fd < 0) return -1; @@ -1368,24 +1419,28 @@ int wait_backup(struct mdinfo *sra, bsb.length = __cpu_to_le64(0); } bsb.mtime = __cpu_to_le64(time(0)); + rv = 0; for (i = 0; i < dests; i++) { bsb.devstart = __cpu_to_le64(destoffsets[i]/512); bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)); if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0) bsb.sb_csum2 = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum2)-((char*)&bsb)); - lseek64(destfd[i], destoffsets[i]-4096, 0); - write(destfd[i], &bsb, 512); + if (lseek64(destfd[i], destoffsets[i]-4096, 0) != + destoffsets[i]-4096) + rv = 1; + rv = rv ?: write(destfd[i], &bsb, 512); fsync(destfd[i]); } - return 0; + return rv; } static void fail(char *msg) { - write(2, msg, strlen(msg)); - write(2, "\n", 1); - exit(1); + int rv; + rv = write(2, msg, strlen(msg)); + rv |= write(2, "\n", 1); + exit(rv ? 1 : 2); } static char *abuf, *bbuf; @@ -1421,27 +1476,33 @@ static void validate(int afd, int bfd, unsigned long long offset) free(abuf); free(bbuf); abuflen = len; - posix_memalign((void**)&abuf, 4096, abuflen); - posix_memalign((void**)&bbuf, 4096, abuflen); + if (posix_memalign((void**)&abuf, 4096, abuflen) || + posix_memalign((void**)&bbuf, 4096, abuflen)) { + abuflen = 0; + /* just stop validating on mem-alloc failure */ + return; + } } lseek64(bfd, offset, 0); if (read(bfd, bbuf, len) != len) { - printf("len %llu\n", len); + //printf("len %llu\n", len); fail("read first backup failed"); } lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0); if (read(afd, abuf, len) != len) fail("read first from array failed"); if (memcmp(bbuf, abuf, len) != 0) { + #if 0 int i; printf("offset=%llu len=%llu\n", - __le64_to_cpu(bsb2.arraystart)*512, len); + (unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len); for (i=0; icomponent_size - stripes * chunk/512; + if (posix_memalign((void**)&buf, 4096, disks * chunk)) + return 0; + start = sra->component_size - stripes * (chunk/512); sysfs_set_num(sra, NULL, "sync_max", start); sysfs_set_str(sra, NULL, "sync_action", "reshape"); sysfs_set_num(sra, NULL, "suspend_lo", 0); sysfs_set_num(sra, NULL, "suspend_hi", 0); - rv = wait_backup(sra, 0, start - stripes * chunk/512, stripes * chunk/512, + rv = wait_backup(sra, 0, start - stripes * (chunk/512), stripes * (chunk/512), dests, destfd, destoffsets, 0); if (rv < 0) return 0; @@ -1519,9 +1583,9 @@ static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes, dests, destfd, destoffsets, 0, °raded, buf); validate(afd, destfd[0], destoffsets[0]); - wait_backup(sra, start, stripes*chunk/512, 0, + wait_backup(sra, start, stripes*(chunk/512), 0, dests, destfd, destoffsets, 0); - sysfs_set_num(sra, NULL, "suspend_lo", (stripes * chunk/512) * data); + sysfs_set_num(sra, NULL, "suspend_lo", (stripes * (chunk/512)) * data); free(buf); /* FIXME this should probably be numeric */ sysfs_set_str(sra, NULL, "sync_max", "max"); @@ -1542,7 +1606,8 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes, int degraded = 0; - posix_memalign((void**)&buf, 4096, disks * chunk); + if (posix_memalign((void**)&buf, 4096, disks * chunk)) + return 0; sysfs_set_num(sra, NULL, "suspend_lo", 0); sysfs_set_num(sra, NULL, "suspend_hi", 0); @@ -1555,7 +1620,7 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes, disks, chunk, level, layout, dests, destfd, destoffsets, 0, °raded, buf); - grow_backup(sra, (start + stripes) * chunk/512, stripes, + grow_backup(sra, (start + stripes) * (chunk/512), stripes, fds, offsets, disks, chunk, level, layout, dests, destfd, destoffsets, @@ -1565,16 +1630,16 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes, start += stripes * 2; /* where to read next */ size = sra->component_size / (chunk/512); while (start < size) { - if (wait_backup(sra, (start-stripes*2)*chunk/512, - stripes*chunk/512, 0, + if (wait_backup(sra, (start-stripes*2)*(chunk/512), + stripes*(chunk/512), 0, dests, destfd, destoffsets, part) < 0) return 0; - sysfs_set_num(sra, NULL, "suspend_lo", start*chunk/512 * data); + sysfs_set_num(sra, NULL, "suspend_lo", start*(chunk/512) * data); if (start + stripes > size) tailstripes = (size - start); - grow_backup(sra, start*chunk/512, tailstripes, + grow_backup(sra, start*(chunk/512), tailstripes, fds, offsets, disks, chunk, level, layout, dests, destfd, destoffsets, @@ -1583,15 +1648,15 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes, part = 1 - part; validate(afd, destfd[0], destoffsets[0]); } - if (wait_backup(sra, (start-stripes*2) * chunk/512, stripes * chunk/512, 0, + if (wait_backup(sra, (start-stripes*2) * (chunk/512), stripes * (chunk/512), 0, dests, destfd, destoffsets, part) < 0) return 0; - sysfs_set_num(sra, NULL, "suspend_lo", ((start-stripes)*chunk/512) * data); - wait_backup(sra, (start-stripes) * chunk/512, tailstripes * chunk/512, 0, + sysfs_set_num(sra, NULL, "suspend_lo", ((start-stripes)*(chunk/512)) * data); + wait_backup(sra, (start-stripes) * (chunk/512), tailstripes * (chunk/512), 0, dests, destfd, destoffsets, 1-part); - sysfs_set_num(sra, NULL, "suspend_lo", (size*chunk/512) * data); + sysfs_set_num(sra, NULL, "suspend_lo", (size*(chunk/512)) * data); sysfs_set_num(sra, NULL, "sync_speed_min", speed); free(buf); return 1; @@ -1628,7 +1693,6 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt old_disks = cnt; for (i=old_disks-(backup_file?1:0); iarray.utime > __le64_to_cpu(bsb.mtime) + 10*60 || + /* array utime and backup-mtime should be updated at much the same time, but it seems that + * sometimes they aren't... So allow considerable flexability in matching, and allow + * this test to be overridden by an environment variable. + */ + if (info->array.utime > __le64_to_cpu(bsb.mtime) + 2*60*60 || info->array.utime < __le64_to_cpu(bsb.mtime) - 10*60) { - if (verbose) - fprintf(stderr, Name ": too-old timestamp on backup-metadata on %s\n", devname); - continue; /* time stamp is too bad */ + if (check_env("MDADM_GROW_ALLOW_OLD")) { + fprintf(stderr, Name ": accepting backup with timestamp %lu " + "for array with timestamp %lu\n", + (unsigned long)__le64_to_cpu(bsb.mtime), + (unsigned long)info->array.utime); + } else { + if (verbose) + fprintf(stderr, Name ": too-old timestamp on " + "backup-metadata on %s\n", devname); + continue; /* time stamp is too bad */ + } } if (bsb.magic[15] == '1') { @@ -1744,13 +1820,13 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt } /* There should be a duplicate backup superblock 4k before here */ if (lseek64(fd, -4096, 1) < 0 || - read(fd, buf, 4096) != 4096) + read(fd, &bsb2, sizeof(bsb2)) != sizeof(bsb2)) goto second_fail; /* Cannot find leading superblock */ if (bsb.magic[15] == '1') bsbsize = offsetof(struct mdp_backup_super, pad1); else bsbsize = offsetof(struct mdp_backup_super, pad); - if (memcmp(buf, &bsb, bsbsize) != 0) + if (memcmp(&bsb2, &bsb, bsbsize) != 0) goto second_fail; /* Cannot find leading superblock */ /* Now need the data offsets for all devices. */ @@ -1763,7 +1839,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt continue; st->ss->getinfo_super(st, &dinfo); st->ss->free_super(st); - offsets[j] = dinfo.data_offset; + offsets[j] = dinfo.data_offset * 512; } printf(Name ": restoring critical section\n"); @@ -1907,7 +1983,13 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, /* make sure reshape doesn't progress until we are ready */ sysfs_set_str(info, NULL, "sync_max", "0"); sysfs_set_str(info, NULL, "array_state", "active"); /* FIXME or clean */ - + + sra = sysfs_read(-1, devname2devnum(info->sys_name), + GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| + GET_CACHE); + if (!sra) + return 1; + /* ndisks is not growing, so raid_disks is old and +delta is new */ odisks = info->array.raid_disks; ndisks = odisks + info->delta_disks; @@ -1920,9 +2002,8 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, ochunk = info->array.chunk_size; nchunk = info->new_chunk; - - a = ochunk/512 * odata; - b = nchunk/512 * ndata; + a = (ochunk/512) * odata; + b = (nchunk/512) * ndata; /* Find GCD */ while (a != b) { if (a < b) @@ -1931,12 +2012,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, a -= b; } /* LCM == product / GCD */ - blocks = ochunk/512 * nchunk/512 * odata * ndata / a; - - sra = sysfs_read(-1, devname2devnum(info->sys_name), - GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| - GET_CACHE); - + blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a; if (ndata == odata) while (blocks * 32 < sra->component_size &&