]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
Merge branch 'master' in devel-3.0
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index 742f656f02f72b0aefa7e1e8bdc1bf78910f2069..14e48f5696cb0c93fb35eb843a4a5fbd99e40b3e 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2004 Neil Brown <neilb@cse.unsw.edu.au>
+ * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -47,19 +47,18 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
         */
        struct mdinfo info;
 
-       void *super = NULL;
        struct stat stb;
        int nfd, fd2;
        int d, nd;
        struct supertype *st = NULL;
-       
+
 
        if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
                fprintf(stderr, Name ": cannot get array info for %s\n", devname);
                return 1;
        }
 
-       st = super_by_version(info.array.major_version, info.array.minor_version);
+       st = super_by_fd(fd);
        if (!st) {
                fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
                return 1;
@@ -70,7 +69,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
                return 1;
        }
 
-       nfd = open(newdev, O_RDWR|O_EXCL);
+       nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
        if (nfd < 0) {
                fprintf(stderr, Name ": cannot open %s\n", newdev);
                return 1;
@@ -103,9 +102,9 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
                        fprintf(stderr, Name ": cannot open device file %s\n", dv);
                        return 1;
                }
-               if (super) free(super);
-               super= NULL;
-               if (st->ss->load_super(st, fd2, &super, NULL)) {
+               st->ss->free_super(st);
+
+               if (st->ss->load_super(st, fd2, NULL)) {
                        fprintf(stderr, Name ": cannot find super block on %s\n", dv);
                        close(fd2);
                        return 1;
@@ -115,16 +114,18 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
        /* Ok, looks good. Lets update the superblock and write it out to
         * newdev.
         */
-       
+
        info.disk.number = d;
        info.disk.major = major(stb.st_rdev);
        info.disk.minor = minor(stb.st_rdev);
        info.disk.raid_disk = d;
        info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
-       st->ss->update_super(&info, super, "grow", newdev, 0);
+       st->ss->update_super(st, &info, "linear-grow-new", newdev,
+                            0, 0, NULL);
 
-       if (st->ss->store_super(st, nfd, super)) {
-               fprintf(stderr, Name ": Cannot store new superblock on %s\n", newdev);
+       if (st->ss->store_super(st, nfd)) {
+               fprintf(stderr, Name ": Cannot store new superblock on %s\n",
+                       newdev);
                close(nfd);
                return 1;
        }
@@ -165,7 +166,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
                        fprintf(stderr, Name ": cannot open device file %s\n", dv);
                        return 1;
                }
-               if (st->ss->load_super(st, fd2, &super, NULL)) {
+               if (st->ss->load_super(st, fd2, NULL)) {
                        fprintf(stderr, Name ": cannot find super block on %s\n", dv);
                        close(fd);
                        return 1;
@@ -174,14 +175,11 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
                info.array.nr_disks = nd+1;
                info.array.active_disks = nd+1;
                info.array.working_disks = nd+1;
-               info.disk.number = nd;
-               info.disk.major = major(stb.st_rdev);
-               info.disk.minor = minor(stb.st_rdev);
-               info.disk.raid_disk = nd;
-               info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
-               st->ss->update_super(&info, super, "grow", dv, 0);
-               
-               if (st->ss->store_super(st, fd2, super)) {
+
+               st->ss->update_super(st, &info, "linear-grow-update", dv,
+                                    0, 0, NULL);
+
+               if (st->ss->store_super(st, fd2)) {
                        fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
                        close(fd2);
                        return 1;
@@ -255,17 +253,20 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                        devname);
                return 1;
        }
+       if (array.level <= 0) {
+               fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n",
+                       map_num(pers, array.level)?:"of this array");
+               return 1;
+       }
        bitmapsize = array.size;
        bitmapsize <<= 1;
-#ifdef BLKGETSIZE64
-       if (ioctl(fd, BLKGETSIZE64, &array_size) == 0 &&
+       if (get_dev_size(fd, NULL, &array_size) &&
            array_size > (0x7fffffffULL<<9)) {
                /* Array is big enough that we cannot trust array.size
                 * try other approaches
                 */
                bitmapsize = get_component_size(fd);
        }
-#endif
        if (bitmapsize == 0) {
                fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
                return 1;
@@ -276,7 +277,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                bitmapsize = bitmapsize * array.raid_disks / ncopies;
        }
 
-       st = super_by_version(array.major_version, array.minor_version);
+       st = super_by_fd(fd);
        if (!st) {
                fprintf(stderr, Name ": Cannot understand version %d.%d\n",
                        array.major_version, array.minor_version);
@@ -300,15 +301,16 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                                continue;
                        dv = map_dev(disk.major, disk.minor, 1);
                        if (dv) {
-                               void *super;
                                int fd2 = dev_open(dv, O_RDWR);
                                if (fd2 < 0)
                                        continue;
-                               if (st->ss->load_super(st, fd2, &super, NULL)==0) {
-                                       if (st->ss->add_internal_bitmap(st, super,
-                                                                       chunk, delay, write_behind,
-                                                                       bitmapsize, 0, major))
-                                               st->ss->write_bitmap(st, fd2, super);
+                               if (st->ss->load_super(st, fd2, NULL)==0) {
+                                       if (st->ss->add_internal_bitmap(
+                                                   st,
+                                                   &chunk, delay, write_behind,
+                                                   bitmapsize, 0, major)
+                                               )
+                                               st->ss->write_bitmap(st, fd2);
                                        else {
                                                fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
                                                close(fd2);
@@ -328,7 +330,6 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                int bitmap_fd;
                int d;
                int max_devs = st->max_devs;
-               void *super = NULL;
 
                /* try to load a superblock */
                for (d=0; d<max_devs; d++) {
@@ -345,9 +346,9 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                        if (!dv) continue;
                        fd2 = dev_open(dv, O_RDONLY);
                        if (fd2 >= 0 &&
-                           st->ss->load_super(st, fd2, &super, NULL) == 0) {
+                           st->ss->load_super(st, fd2, NULL) == 0) {
                                close(fd2);
-                               st->ss->uuid_from_super(uuid, super);
+                               st->ss->uuid_from_super(st, uuid);
                                break;
                        }
                        close(fd2);
@@ -395,7 +396,8 @@ struct mdp_backup_super {
        __u64   arraystart;
        __u64   length;
        __u32   sb_csum;        /* csum of preceeding bytes. */
-};
+       __u8 pad[512-68];
+} __attribute__((aligned(512))) bsb;
 
 int bsb_csum(char *buf, int len)
 {
@@ -419,7 +421,6 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
        struct mdu_array_info_s array;
        char *c;
 
-       struct mdp_backup_super bsb;
        struct supertype *st;
 
        int nlevel, olevel;
@@ -433,10 +434,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
        int d, i, spares;
        int nrdisks;
        int err;
-       void *super = NULL;
 
-       struct sysarray *sra;
-       struct sysdev *sd;
+       struct mdinfo *sra;
+       struct mdinfo *sd;
 
        if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
                fprintf(stderr, Name ": %s is not an active md array - aborting\n",
@@ -519,9 +519,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
        case 4:
        case 5:
        case 6:
-               st = super_by_version(array.major_version,
-                                     array.minor_version);
-               /* size can be changed independantly.
+               st = super_by_fd(fd);
+
+               /* size can be changed independently.
                 * layout/chunksize/raid_disks/level can be changed
                 * though the kernel may not support it all.
                 * If 'suspend_lo' is not present in devfs, then
@@ -615,10 +615,12 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        last_block = nstripe * ndata;
                        ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
                }
-               printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
+               fprintf(stderr, Name ": Need to backup %lluK of critical "
+                       "section..\n", last_block/2);
 
                sra = sysfs_read(fd, 0,
-                                GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
+                                GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+                                GET_CACHE);
                if (!sra) {
                        fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
                                devname);
@@ -630,13 +632,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                devname);
                        return 1;
                }
-               if (sra->spares == 0 && backup_file == NULL) {
+               if (sra->array.spare_disks == 0 && backup_file == NULL) {
                        fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
                                devname);
                        return 1;
                }
 
-               nrdisks = array.nr_disks + sra->spares;
+               nrdisks = array.nr_disks + sra->array.spare_disks;
                /* Now we need to open all these devices so we can read/write.
                 */
                fdlist = malloc((1+nrdisks) * sizeof(int));
@@ -649,25 +651,28 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        fdlist[d] = -1;
                d = array.raid_disks;
                for (sd = sra->devs; sd; sd=sd->next) {
-                       if (sd->state & (1<<MD_DISK_FAULTY))
+                       if (sd->disk.state & (1<<MD_DISK_FAULTY))
                                continue;
-                       if (sd->state & (1<<MD_DISK_SYNC)) {
-                               char *dn = map_dev(sd->major, sd->minor, 1);
-                               fdlist[sd->role] = dev_open(dn, O_RDONLY);
-                               offsets[sd->role] = sd->offset;
-                               if (fdlist[sd->role] < 0) {
+                       if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+                               char *dn = map_dev(sd->disk.major,
+                                                  sd->disk.minor, 1);
+                               fdlist[sd->disk.raid_disk]
+                                       = dev_open(dn, O_RDONLY);
+                               offsets[sd->disk.raid_disk] = sd->data_offset;
+                               if (fdlist[sd->disk.raid_disk] < 0) {
                                        fprintf(stderr, Name ": %s: cannot open component %s\n",
-                                               devname, dn);
+                                               devname, dn?dn:"-unknown-");
                                        goto abort;
                                }
                        } else {
                                /* spare */
-                               char *dn = map_dev(sd->major, sd->minor, 1);
+                               char *dn = map_dev(sd->disk.major,
+                                                  sd->disk.minor, 1);
                                fdlist[d] = dev_open(dn, O_RDWR);
-                               offsets[d] = sd->offset;
+                               offsets[d] = sd->data_offset;
                                if (fdlist[d]<0) {
                                        fprintf(stderr, Name ": %s: cannot open component %s\n",
-                                               devname, dn);
+                                               devname, dn?dn:"-unknown");
                                        goto abort;
                                }
                                d++;
@@ -679,9 +684,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                        " --grow aborted\n", devname, i);
                                goto abort;
                        }
-               spares = sra->spares;
+               spares = sra->array.spare_disks;
                if (backup_file) {
-                       fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, 0600);
+                       fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, S_IRUSR | S_IWUSR);
                        if (fdlist[d] < 0) {
                                fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
                                        devname, backup_file, strerror(errno));
@@ -698,7 +703,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                }
 
                /* Find a superblock */
-               if (st->ss->load_super(st, fdlist[0], &super, NULL)) {
+               if (st->ss->load_super(st, fdlist[0], NULL)) {
                        fprintf(stderr, Name ": %s: Cannot find a superblock\n",
                                devname);
                        goto abort;
@@ -706,7 +711,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 
 
                memcpy(bsb.magic, "md_backup_data-1", 16);
-               st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
+               st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
                bsb.mtime = __cpu_to_le64(time(0));
                bsb.arraystart = 0;
                bsb.length = __cpu_to_le64(last_block);
@@ -715,7 +720,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                 * a leading superblock 4K earlier.
                 */
                for (i=array.raid_disks; i<d; i++) {
-                       char buf[4096];
+                       char abuf[4096+512];
+                       char *buf = (char*)(((unsigned long)abuf+511)& ~511);
                        if (i==d-1 && backup_file) {
                                /* This is the backup file */
                                offsets[i] = 8;
@@ -726,7 +732,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                fprintf(stderr, Name ": could not seek...\n");
                                goto abort;
                        }
-                       memset(buf, 0, sizeof(buf));
+                       memset(buf, 0, 4096);
                        bsb.devstart = __cpu_to_le64(offsets[i]);
                        bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
                        memcpy(buf, &bsb, sizeof(bsb));
@@ -740,10 +746,25 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                array.chunk_size = nchunk;
                array.layout = nlayout;
                if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+                       if (errno == ENOSPC) {
+                               /* stripe cache is not big enough.
+                                * It needs to be 4 times chunksize_size,
+                                * and we assume pagesize is 4K
+                                */
+                               if (sra->cache_size < 4 * (nchunk/4096)) {
+                                       sysfs_set_num(sra, NULL,
+                                                     "stripe_cache_size",
+                                                     4 * (nchunk/4096) +1);
+                                       if (ioctl(fd, SET_ARRAY_INFO,
+                                                 &array) == 0)
+                                               goto ok;
+                               }
+                       }
                        fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
                                devname, strerror(errno));
                        goto abort;
                }
+               ok: ;
 
                /* suspend the relevant region */
                sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
@@ -773,7 +794,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
                            write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
                            fsync(fdlist[i]) != 0) {
-                               fprintf(stderr, Name ": %s: fail to save metadata for critical region backups.\n",
+                               fprintf(stderr, Name ": %s: failed to save metadata for critical region backups.\n",
                                        devname);
                                goto abort_resume;
                        }
@@ -796,12 +817,15 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                break;
                        sleep(1);
                }
-               
+
                /* invalidate superblocks */
                memset(&bsb, 0, sizeof(bsb));
                for (i=odisks; i<d ; i++) {
                        lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
-                       write(fdlist[i], &bsb, sizeof(bsb));
+                       if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
+                               fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
+                                       devname, i);
+                       }
                }
 
                /* unsuspend. */
@@ -815,7 +839,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                if (backup_file)
                        unlink(backup_file);
 
-               printf(Name ": ... critical section passed.\n");
+               fprintf(stderr, Name ": ... critical section passed.\n");
                break;
        }
        return 0;
@@ -844,8 +868,9 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
 {
        int i, j;
        int old_disks;
-       int err = 0;
        unsigned long long *offsets;
+       unsigned long long  nstripe, ostripe, last_block;
+       int ndata, odata;
 
        if (info->delta_disks < 0)
                return 1; /* cannot handle a shrink */
@@ -857,9 +882,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
        old_disks = info->array.raid_disks - info->delta_disks;
 
        for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
-               void *super = NULL;
                struct mdinfo dinfo;
-               struct mdp_backup_super bsb;
                char buf[4096];
                int fd;
 
@@ -878,11 +901,12 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                        fd = fdlist[i];
                        if (fd < 0)
                                continue;
-                       if (st->ss->load_super(st, fd, &super, NULL))
+                       if (st->ss->load_super(st, fd, NULL))
                                continue;
 
-                       st->ss->getinfo_super(&dinfo, super);
-                       free(super); super = NULL;
+                       st->ss->getinfo_super(st, &dinfo);
+                       st->ss->free_super(st);
+
                        if (lseek64(fd,
                                    (dinfo.data_offset + dinfo.component_size - 8) <<9,
                                    0) < 0)
@@ -920,11 +944,11 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                for(j=0; j<info->array.raid_disks; j++) {
                        if (fdlist[j] < 0)
                                continue;
-                       if (st->ss->load_super(st, fdlist[j], &super, NULL))
+                       if (st->ss->load_super(st, fdlist[j], NULL))
                                /* FIXME should be this be an error */
                                continue;
-                       st->ss->getinfo_super(&dinfo, super);
-                       free(super); super = NULL;
+                       st->ss->getinfo_super(st, &dinfo);
+                       st->ss->free_super(st);
                        offsets[j] = dinfo.data_offset;
                }
                printf(Name ": restoring critical section\n");
@@ -937,24 +961,45 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                                    fd, __le64_to_cpu(bsb.devstart)*512,
                                    0, __le64_to_cpu(bsb.length)*512)) {
                        /* didn't succeed, so giveup */
-                       return -1;
+                       return 1;
                }
 
                /* Ok, so the data is restored. Let's update those superblocks. */
 
                for (j=0; j<info->array.raid_disks; j++) {
                        if (fdlist[j] < 0) continue;
-                       if (st->ss->load_super(st, fdlist[j], &super, NULL))
+                       if (st->ss->load_super(st, fdlist[j], NULL))
                                continue;
-                       st->ss->getinfo_super(&dinfo, super);
+                       st->ss->getinfo_super(st, &dinfo);
                        dinfo.reshape_progress = __le64_to_cpu(bsb.length);
-                       st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0);
-                       st->ss->store_super(st, fdlist[j], super);
-                       free(super);
+                       st->ss->update_super(st, &dinfo,
+                                            "_reshape_progress",
+                                            NULL,0, 0, NULL);
+                       st->ss->store_super(st, fdlist[j]);
+                       st->ss->free_super(st);
                }
 
                /* And we are done! */
                return 0;
        }
-       return err;
+       /* Didn't find any backup data, try to see if any
+        * was needed.
+        */
+       nstripe = ostripe = 0;
+       odata = info->array.raid_disks - info->delta_disks - 1;
+       if (info->array.level == 6) odata--; /* number of data disks */
+       ndata = info->array.raid_disks - 1;
+       if (info->new_level == 6) ndata--;
+       last_block = 0;
+       while (nstripe >= ostripe) {
+               nstripe += info->new_chunk / 512;
+               last_block = nstripe * ndata;
+               ostripe = last_block / odata / (info->array.chunk_size/512) *
+                       (info->array.chunk_size/512);
+       }
+
+       if (info->reshape_progress >= last_block)
+               return 0;
+       /* needed to recover critical section! */
+       return 1;
 }