]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
Support restarting of a reshape on --assemble
authorNeil Brown <neilb@suse.de>
Mon, 20 Mar 2006 03:17:31 +0000 (03:17 +0000)
committerNeil Brown <neilb@suse.de>
Mon, 20 Mar 2006 03:17:31 +0000 (03:17 +0000)
Signed-off-by: Neil Brown <neilb@suse.de>
Assemble.c
Grow.c
mdadm.h
restripe.c
super0.c
super1.c
sysfs.c

index cc906db51bcfb09e296e7f98e829e1fca28bf69a..480c512cd5cdfe06576b101987ce57b1e704bcf4 100644 (file)
@@ -551,7 +551,7 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
                int fd;
                fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL);
                if (fd < 0) {
-                       fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
+                       fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
                                devices[chosen_drive].devname);
                        return 1;
                }
@@ -564,6 +564,37 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
                close(fd);
        }
 
+       /* If we are in the middle of a reshape we may need to restore saved data
+        * that was moved aside due to the reshape overwriting live data
+        * The code of doing this lives in Grow.c
+        */
+       if (info.reshape_active) {
+               int err = 0;
+               int *fdlist = malloc(sizeof(int)* bestcnt);
+               for (i=0; i<bestcnt; i++) {
+                       int j = best[i];
+                       if (j >= 0) {
+                               fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL);
+                               if (fdlist[i] < 0) {
+                                       fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
+                                               devices[j].devname);
+                                       err = 1;
+                                       break;
+                               }
+                       } else
+                               fdlist[i] = -1;
+               }
+               if (!err)
+                       err = Grow_restart(st, &info, fdlist, bestcnt);
+               while (i>0) {
+                       i--;
+                       if (fdlist[i]>=0) close(fdlist[i]);
+               }
+               if (err) {
+                       fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
+                       return err;
+               }
+       }
        /* count number of in-sync devices according to the superblock.
         * We must have this number to start the array without -s or -R
         */
diff --git a/Grow.c b/Grow.c
index ece2bda384f5bc3c36012a78b3a9fc0597e43b95..6d09dc689c8500acf97beb87a48849046d352185 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -219,7 +219,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
        }
 
        if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
-               if (errno == ENOMEM) 
+               if (errno == ENOMEM)
                        fprintf(stderr, Name ": Memory allocation failure.\n");
                else
                        fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
@@ -605,12 +605,12 @@ int Grow_reshape(char *devname, int fd, int quiet,
                 * from
                 */
                nstripe = ostripe = 0;
-               while (nstripe+ochunk/512 >= ostripe) {
+               while (nstripe >= ostripe) {
                        nstripe += nchunk/512;
                        last_block = nstripe * ndata;
-                       ostripe = last_block / odata;
+                       ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
                }
-               printf("Need to backup to stripe %llu sectors, %lluK\n", nstripe, last_block/2);
+               printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
 
                sra = sysfs_read(fd, 0,
                                 GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
@@ -625,6 +625,11 @@ int Grow_reshape(char *devname, int fd, int quiet,
                                devname);
                        return 1;
                }
+               if (sra->spares == 0) {
+                       fprintf(stderr, Name ": %s: Cannot grow - need a spare to backup critical section\n",
+                               devname);
+                       return 1;
+               }
 
                nrdisks = array.nr_disks + sra->spares;
                /* Now we need to open all these devices so we can read/write.
@@ -724,13 +729,13 @@ int Grow_reshape(char *devname, int fd, int quiet,
                        goto abort_resume;
                }
                /* FIXME write superblocks */
-               memcpy(bsb.magic, "md_backups_data-1", 16);
+               memcpy(bsb.magic, "md_backup_data-1", 16);
                st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
-               bsb.mtime = time(0);
+               bsb.mtime = __cpu_to_le64(time(0));
                bsb.arraystart = 0;
-               bsb.length = last_block;
+               bsb.length = __cpu_to_le64(last_block);
                for (i=odisks; i<d ; i++) {
-                       bsb.devstart = offsets[i];
+                       bsb.devstart = __cpu_to_le64(offsets[i]);
                        bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
                        lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
                        write(fdlist[i], &bsb, sizeof(bsb));
@@ -769,6 +774,7 @@ int Grow_reshape(char *devname, int fd, int quiet,
                free(fdlist);
                free(offsets);
 
+               printf("mdadm: ... critical section passed.\n");
                break;
        }
        return 0;
@@ -785,3 +791,114 @@ int Grow_reshape(char *devname, int fd, int quiet,
        return 1;
 
 }
+
+/*
+ * If any spare contains md_back_data-1 which is recent wrt mtime,
+ * write that data into the array and update the super blocks with
+ * the new reshape_progress
+ */
+int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt)
+{
+       int i, j;
+       int old_disks;
+       int err = 0;
+       unsigned long long *offsets;
+
+       if (info->delta_disks < 0)
+               return 1; /* cannot handle a shrink */
+       if (info->new_level != info->array.level ||
+           info->new_layout != info->array.layout ||
+           info->new_chunk != info->array.chunk_size)
+               return 1; /* Can only handle change in disks */
+
+       old_disks = info->array.raid_disks - info->delta_disks;
+
+       for (i=old_disks; i<cnt; i++) {
+               void *super = NULL;
+               struct mdinfo dinfo;
+               struct mddev_ident_s id;
+               struct mdp_backup_super bsb;
+
+               /* This was a spare and may have some saved data on it.
+                * Load the superblock, find and load the
+                * backup_super_block.
+                * If either fail, go on to next device.
+                * If the backup contains no new info, just return
+                * Else retore data and update all superblocks
+                */
+               if (fdlist[i] < 0)
+                       continue;
+               if (st->ss->load_super(st, fdlist[i], &super, NULL))
+                       continue;
+
+               st->ss->getinfo_super(&dinfo, &id, super);
+               free(super); super = NULL;
+               if (lseek64(fdlist[i],
+                       (dinfo.data_offset + dinfo.component_size - 8) <<9,
+                           0) < 0)
+                       continue; /* Cannot seek */
+               if (read(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb))
+                       continue; /* Cannot read */
+               if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
+                       continue;
+               if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
+                       continue; /* bad checksum */
+               if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
+                       continue; /* Wrong uuid */
+
+               if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
+                   info->array.utime < __le64_to_cpu(bsb.mtime))
+                       continue; /* time stamp is too bad */
+
+               if (__le64_to_cpu(bsb.arraystart) != 0)
+                       continue; /* Can only handle backup from start of array */
+               if (__le64_to_cpu(bsb.length) <
+                   info->reshape_progress)
+                       continue; /* No new data here */
+
+               if (lseek64(fdlist[i], __le64_to_cpu(bsb.devstart)*512, 0)< 0)
+                       continue; /* Cannot seek */
+
+               /* Now need the data offsets for all devices. */
+               offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
+               for(j=0; j<info->array.raid_disks; j++) {
+                       if (fdlist[j] < 0)
+                               continue;
+                       if (st->ss->load_super(st, fdlist[j], &super, NULL))
+                               /* FIXME should be this be an error */
+                               continue;
+                       st->ss->getinfo_super(&dinfo, &id, super);
+                       free(super); super = NULL;
+                       offsets[j] = dinfo.data_offset;
+               }
+               printf(Name ": restoring critical section\n");
+
+               if (restore_stripes(fdlist, offsets,
+                                   info->array.raid_disks,
+                                   info->new_chunk,
+                                   info->new_level,
+                                   info->new_layout,
+                                   fdlist[i], __le64_to_cpu(bsb.devstart)*512,
+                                   0, __le64_to_cpu(bsb.length)*512)) {
+                       /* didn't succeed, so giveup */
+                       return 0;
+               }
+
+               /* Ok, so the data is restored. Let's update those superblocks. */
+
+               for (j=0; j<info->array.raid_disks; j++) {
+                       if (fdlist[j] < 0) continue;
+                       if (st->ss->load_super(st, fdlist[j], &super, NULL))
+                               continue;
+                       st->ss->getinfo_super(&dinfo, &id, super);
+                       dinfo.reshape_progress = __le64_to_cpu(bsb.length);
+                       st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0);
+                       st->ss->store_super(st, fdlist[j], super);
+                       free(super);
+               }
+
+               /* And we are done! */
+               return 0;
+       }
+       return err;
+}
diff --git a/mdadm.h b/mdadm.h
index 61d0469c456907b015cdd9d55e9ca0aac56bd21f..00c280b35572d2448c2170c540c752b9cf0c3f0a 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -91,6 +91,11 @@ struct mdinfo {
        mdu_disk_info_t         disk;
        __u64                   events;
        int                     uuid[4];
+       unsigned long long      data_offset;
+       unsigned long long      component_size;
+       int                     reshape_active;
+       unsigned long long      reshape_progress;
+       int                     new_level, delta_disks, new_layout, new_chunk;
 };
 
 #define Name "mdadm"
@@ -225,6 +230,10 @@ extern int save_stripes(int *source, unsigned long long *offsets,
                        int raid_disks, int chunk_size, int level, int layout,
                        int nwrites, int *dest,
                        unsigned long long start, unsigned long long length);
+extern int restore_stripes(int *dest, unsigned long long *offsets,
+                          int raid_disks, int chunk_size, int level, int layout,
+                          int source, unsigned long long read_offset,
+                          unsigned long long start, unsigned long long length);
 
 #ifndef Sendmail
 #define Sendmail "/usr/lib/sendmail -t"
@@ -302,6 +311,7 @@ extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int dela
 extern int Grow_reshape(char *devname, int fd, int quiet,
                        long long size,
                        int level, int layout, int chunksize, int raid_disks);
+extern int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt);
 
 
 extern int Assemble(struct supertype *st, char *mddev, int mdfd,
index 94a0e3d8b74133c2091b5e2687fd480fc13b3f68..b7b36368130f9326688425eb365808955496ad12 100644 (file)
@@ -162,7 +162,7 @@ int save_stripes(int *source, unsigned long long *offsets,
 /* Restore data:
  * We are given:
  *  A list of 'fds' of the active disks. Some may be '-1' for not-available.
- *  A geometry: raid_disks, chunk_sisze, level, layout
+ *  A geometry: raid_disks, chunk_size, level, layout
  *  An 'fd' to read from.  It is already seeked to the right (Read) location.
  *  A start and length.
  * The length must be a multiple of the stripe size.
@@ -172,7 +172,7 @@ int save_stripes(int *source, unsigned long long *offsets,
  */
 int restore_stripes(int *dest, unsigned long long *offsets,
                    int raid_disks, int chunk_size, int level, int layout,
-                   int source,
+                   int source, unsigned long long read_offset,
                    unsigned long long start, unsigned long long length)
 {
        char *stripe_buf = malloc(raid_disks * chunk_size);
@@ -199,8 +199,11 @@ int restore_stripes(int *dest, unsigned long long *offsets,
                        int disk = geo_map(i, start/chunk_size/data_disks,
                                           raid_disks, level, layout);
                        blocks[i] = stripes[disk];
+                       if (lseek64(source, read_offset, 0) != read_offset)
+                               return -1;
                        if (read(source, stripes[disk], chunk_size) != chunk_size)
                                return -1;
+                       read_offset += chunk_size;
                }
                /* We have the data, now do the parity */
                offset = (start/chunk_size/data_disks) * chunk_size;
@@ -311,7 +314,7 @@ main(int argc, char *argv[])
        } else {
                int rv = restore_stripes(fds, offsets,
                                         raid_disks, chunk_size, level, layout,
-                                        storefd,
+                                        storefd, 0ULL,
                                         start, length);
                if (rv != 0) {
                        fprintf(stderr, "test_stripe: restore_stripes returned %d\n", rv);
index 52be23ddd15c54130aa6a2c05c655c5aaa8c6e5a..99d09a437d5d144695ce24baae3787262755b996 100644 (file)
--- a/super0.c
+++ b/super0.c
@@ -80,7 +80,7 @@ void super0_swap_endian(struct mdp_superblock_s *sb)
        sb->cp_events_hi = sb->cp_events_lo;
        sb->cp_events_lo = t32;
 
-}      
+}
 
 #ifndef MDASSEMBLE
 
@@ -182,7 +182,7 @@ static void examine_super0(void *sbv)
        case -1:
                printf("       Rounding : %dK\n", sb->chunk_size/1024);
                break;
-       default: break;         
+       default: break;
        }
        printf("\n");
        printf("      Number   Major   Minor   RaidDevice State\n");
@@ -279,6 +279,9 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv)
        info->array.layout = sb->layout;
        info->array.md_minor = sb->md_minor;
        info->array.ctime = sb->ctime;
+       info->array.utime = sb->utime;
+       info->array.chunk_size = sb->chunk_size;
+       info->component_size = sb->size*2;
 
        info->disk.state = sb->this_disk.state;
        info->disk.major = sb->this_disk.major;
@@ -287,9 +290,20 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv)
        info->disk.number = sb->this_disk.number;
 
        info->events = md_event(sb);
+       info->data_offset = 0;
 
        uuid_from_super0(info->uuid, sbv);
 
+       if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
+               info->reshape_active = 1;
+               info->reshape_progress = sb->reshape_position;
+               info->new_level = sb->new_level;
+               info->delta_disks = sb->delta_disks;
+               info->new_layout = sb->new_layout;
+               info->new_chunk = sb->new_chunk;
+       } else
+               info->reshape_active = 0;
+
        ident->name[0] = 0;
        /* work_disks is calculated rather than read directly */
        for (i=0; i < MD_SB_DISKS; i++)
@@ -403,6 +417,8 @@ static int update_super0(struct mdinfo *info, void *sbv, char *update, char *dev
                sb->set_uuid2 = info->uuid[2];
                sb->set_uuid3 = info->uuid[3];
        }
+       if (strcmp(update, "_reshape_progress")==0)
+               sb->reshape_position = info->reshape_progress;
 
        sb->sb_csum = calc_sb0_csum(sb);
        return rv;
@@ -481,7 +497,7 @@ static void add_to_super0(void *sbv, mdu_disk_info_t *dinfo)
 {
        mdp_super_t *sb = sbv;
        mdp_disk_t *dk = &sb->disks[dinfo->number];
-       
+
        dk->number = dinfo->number;
        dk->major = dinfo->major;
        dk->minor = dinfo->minor;
@@ -508,7 +524,7 @@ static int store_super0(struct supertype *st, int fd, void *sbv)
 
        if (dsize < MD_RESERVED_SECTORS*2*512)
                return 2;
-       
+
        offset = MD_NEW_SIZE_SECTORS(dsize>>9);
 
        offset *= 512;
@@ -622,7 +638,7 @@ static int load_super0(struct supertype *st, int fd, void **sbp, char *devname)
                                devname, size);
                return 1;
        }
-       
+
        offset = MD_NEW_SIZE_SECTORS(dsize>>9);
 
        offset *= 512;
@@ -717,7 +733,7 @@ static int add_internal_bitmap0(struct supertype *st, void *sbv, int chunk, int
        mdp_super_t *sb = sbv;
        bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MD_SB_BYTES);
 
-       
+
        min_chunk = 4096; /* sub-page chunks don't work yet.. */
        bits = (size * 512)/ min_chunk +1;
        while (bits > max_bits) {
@@ -744,7 +760,7 @@ static int add_internal_bitmap0(struct supertype *st, void *sbv, int chunk, int
 
        return 1;
 }
-               
+
 
 void locate_bitmap0(struct supertype *st, int fd, void *sbv)
 {
@@ -763,7 +779,7 @@ void locate_bitmap0(struct supertype *st, int fd, void *sbv)
 
        if (dsize < MD_RESERVED_SECTORS*2)
                return;
-       
+
        offset = MD_NEW_SIZE_SECTORS(dsize>>9);
 
        offset *= 512;
@@ -796,8 +812,8 @@ int write_bitmap0(struct supertype *st, int fd, void *sbv)
        }
 
        if (dsize < MD_RESERVED_SECTORS*2)
-               return -1;
-       
+       return -1;
+
        offset = MD_NEW_SIZE_SECTORS(dsize>>9);
 
        offset *= 512;
index dd852e583fca0f97be8ba158f2302e6d30b09a49..4df637004be7cfea651cc18cd4a3e7eaffe2cf3f 100644 (file)
--- a/super1.c
+++ b/super1.c
@@ -368,6 +368,11 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv)
        info->array.layout = __le32_to_cpu(sb->layout);
        info->array.md_minor = -1;
        info->array.ctime = __le64_to_cpu(sb->ctime);
+       info->array.utime = __le64_to_cpu(sb->utime);
+       info->array.chunk_size = __le32_to_cpu(sb->chunksize)/512;
+
+       info->data_offset = __le64_to_cpu(sb->data_offset);
+       info->component_size = __le64_to_cpu(sb->size);
 
        info->disk.major = 0;
        info->disk.minor = 0;
@@ -397,6 +402,16 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv)
        strncpy(ident->name, sb->set_name, 32);
        ident->name[32] = 0;
 
+       if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
+               info->reshape_active = 1;
+               info->reshape_progress = __le64_to_cpu(sb->reshape_position);
+               info->new_level = __le32_to_cpu(sb->new_level);
+               info->delta_disks = __le32_to_cpu(sb->delta_disks);
+               info->new_layout = __le32_to_cpu(sb->new_layout);
+               info->new_chunk = __le32_to_cpu(sb->new_chunk);
+       } else
+               info->reshape_active = 0;
+
        for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
                role = __le16_to_cpu(sb->dev_roles[i]);
                if (/*role == 0xFFFF || */role < info->array.raid_disks)
@@ -453,6 +468,8 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update, char *dev
        }
        if (strcmp(update, "uuid") == 0)
                memcpy(sb->set_uuid, info->uuid, 16);
+       if (strcmp(update, "_reshape_progress")==0)
+               sb->reshape_position = __cpu_to_le64(info->reshape_progress);
 
        sb->sb_csum = calc_sb_1_csum(sb);
        return rv;
diff --git a/sysfs.c b/sysfs.c
index 989476068da6eb0aee77e5aef68e09e03574653d..1774509ce7d84de96b0665b69529f4f8a9fbab72 100644 (file)
--- a/sysfs.c
+++ b/sysfs.c
@@ -98,6 +98,8 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
                if (load_sys(fname, buf))
                        goto abort;
                sra->component_size = strtoull(buf, NULL, 0);
+               /* sysfs reports "K", but we want sectors */
+               sra->component_size *= 2;
        }
        if (options & GET_CHUNK) {
                strcpy(base, "chunk_size");
@@ -192,6 +194,8 @@ unsigned long long get_component_size(int fd)
         * We cannot trust GET_ARRAY_INFO ioctl as it's
         * size field is only 32bits.
         * So look in /sys/block/mdXXX/md/component_size
+        *
+        * WARNING: this returns in units of Kilobytes.
         */
        struct stat stb;
        char fname[50];