]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
Support external metadata recovery-resume
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index 9204edf6145a7fa83ebbbe4aba235f8a2d1e622d..a654d4e84f2aba2571797e11698772bc53d50535 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -381,7 +381,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
 /*
  * When reshaping an array we might need to backup some data.
  * This is written to all spares with a 'super_block' describing it.
- * The superblock goes 1K form the end of the used space on the
+ * The superblock goes 4K from the end of the used space on the
  * device.
  * It if written after the backup is complete.
  * It has the following structure.
@@ -524,6 +524,15 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                return 1;
        }
 
+       if (size >= 0 &&
+           (chunksize || level!= UnSet || layout_str || raid_disks)) {
+               fprintf(stderr, Name ": cannot change component size at the same time "
+                       "as other changes.\n"
+                       "   Change size first, then check data is intact before "
+                       "making other changes.\n");
+               return 1;
+       }
+
        if (raid_disks && raid_disks < array.raid_disks && array.level > 1 &&
            get_linux_version() < 2006032 &&
            !check_env("MDADM_FORCE_FEWER")) {
@@ -644,8 +653,10 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                layout_str = "parity-last";
                } else {
                        c = map_num(pers, level);
-                       if (c == NULL)
-                               return 1;/* not possible */
+                       if (c == NULL) {
+                               rv = 1;/* not possible */
+                               goto release;
+                       }
                        err = sysfs_set_str(sra, NULL, "level", c);
                        if (err) {
                                fprintf(stderr, Name ": %s: could not set level to %s\n",
@@ -849,7 +860,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                if (nlayout == UnSet) {
                                        fprintf(stderr, Name ": layout %s not understood for raid5.\n",
                                                layout_str);
-                                       return 1;
+                                       rv = 1;
+                                       goto release;
                                }
                                break;
 
@@ -858,7 +870,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                if (nlayout == UnSet) {
                                        fprintf(stderr, Name ": layout %s not understood for raid6.\n",
                                                layout_str);
-                                       return 1;
+                                       rv = 1;
+                                       goto release;
                                }
                                break;
                        }
@@ -871,8 +884,14 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        ndata--;
                }
 
+               if (odata == ndata &&
+                   get_linux_version() < 2006032) {
+                       fprintf(stderr, Name ": in-place reshape is not safe before 2.6.32, sorry.\n");
+                       break;
+               }
+
                /* Check that we can hold all the data */
-               size = ndata * array.size;
+               size = ndata * (long long)array.size;
                get_dev_size(fd, NULL, &array_size);
                if (size < (array_size/1024)) {
                        fprintf(stderr, Name ": this change will reduce the size of the array.\n"
@@ -908,9 +927,11 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                if (ndata == odata) {
                        /* Make 'blocks' bigger for better throughput, but
                         * not so big that we reject it below.
+                        * Try for 16 megabytes
                         */
-                       if (blocks * 32 < sra->component_size)
-                               blocks *= 16;
+                       while (blocks * 32 < sra->component_size &&
+                              blocks < 16*1024*2)
+                              blocks *= 2;
                } else
                        fprintf(stderr, Name ": Need to backup %luK of critical "
                                "section..\n", blocks/2);
@@ -961,7 +982,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                char *dn = map_dev(sd->disk.major,
                                                   sd->disk.minor, 1);
                                fdlist[d] = dev_open(dn, O_RDWR);
-                               offsets[d] = (sra->component_size - blocks - 8)*512;
+                               offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
                                if (fdlist[d]<0) {
                                        fprintf(stderr, Name ": %s: cannot open component %s\n",
                                                devname, dn?dn:"-unknown");
@@ -1026,6 +1047,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                
                cache = (nchunk < ochunk) ? ochunk : nchunk;
                cache = cache * 4 / 4096;
+               if (cache < blocks / 8 / odisks + 16)
+                       /* Make it big enough to hold 'blocks' */
+                       cache = blocks / 8 / odisks + 16;
                if (sra->cache_size < cache)
                        sysfs_set_num(sra, NULL, "stripe_cache_size",
                                      cache+1);
@@ -1284,6 +1308,10 @@ int grow_backup(struct mdinfo *sra,
 
                lseek64(destfd[i], destoffsets[i] - 4096, 0);
                write(destfd[i], &bsb, 512);
+               if (destoffsets[i] > 4096) {
+                       lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0);
+                       write(destfd[i], &bsb, 512);
+               }
                fsync(destfd[i]);
        }
 
@@ -1604,7 +1632,6 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                old_disks = cnt;
        for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
                struct mdinfo dinfo;
-               char buf[4096];
                int fd;
                int bsbsize;
                char *devname, namebuf[20];
@@ -1720,13 +1747,13 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                }
                /* There should be a duplicate backup superblock 4k before here */
                if (lseek64(fd, -4096, 1) < 0 ||
-                   read(fd, buf, 4096) != 4096)
+                   read(fd, &bsb2, 4096) != 4096)
                        goto second_fail; /* Cannot find leading superblock */
                if (bsb.magic[15] == '1')
                        bsbsize = offsetof(struct mdp_backup_super, pad1);
                else
                        bsbsize = offsetof(struct mdp_backup_super, pad);
-               if (memcmp(buf, &bsb, bsbsize) != 0)
+               if (memcmp(&bsb2, &bsb, bsbsize) != 0)
                        goto second_fail; /* Cannot find leading superblock */
 
                /* Now need the data offsets for all devices. */
@@ -1739,7 +1766,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                                continue;
                        st->ss->getinfo_super(st, &dinfo);
                        st->ss->free_super(st);
-                       offsets[j] = dinfo.data_offset;
+                       offsets[j] = dinfo.data_offset * 512;
                }
                printf(Name ": restoring critical section\n");
 
@@ -1873,6 +1900,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
        int d;
        struct mdinfo *sra, *sd;
        int rv;
+       int cache;
        int done = 0;
 
        err = sysfs_set_str(info, NULL, "array_state", "readonly");
@@ -1908,10 +1936,28 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
        /* LCM == product / GCD */
        blocks = ochunk/512 * nchunk/512 * odata * ndata / a;
 
+       sra = sysfs_read(-1, devname2devnum(info->sys_name),
+                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+                        GET_CACHE);
+
+
        if (ndata == odata)
-               blocks *= 16;
+               while (blocks * 32 < sra->component_size &&
+                      blocks < 16*1024*2)
+                       blocks *= 2;
        stripes = blocks / (info->array.chunk_size/512) / odata;
 
+       /* check that the internal stripe cache is
+        * large enough, or it won't work.
+        */
+       cache = (nchunk < ochunk) ? ochunk : nchunk;
+       cache = cache * 4 / 4096;
+       if (cache < blocks / 8 / odisks + 16)
+               /* Make it big enough to hold 'blocks' */
+               cache = blocks / 8 / odisks + 16;
+       if (sra->cache_size < cache)
+               sysfs_set_num(sra, NULL, "stripe_cache_size",
+                             cache+1);
 
        memset(&bsb, 0, 512);
        memcpy(bsb.magic, "md_backup_data-1", 16);
@@ -1927,10 +1973,6 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
        for (d=0; d<odisks; d++)
                fds[d] = -1;
 
-       sra = sysfs_read(-1, devname2devnum(info->sys_name),
-                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
-                        GET_CACHE);
-
        for (sd = sra->devs; sd; sd = sd->next) {
                if (sd->disk.state & (1<<MD_DISK_FAULTY))
                        continue;