Can now mostly assemble DDF arrays
authorNeil Brown <neilb@suse.de>
Thu, 15 May 2008 06:48:19 +0000 (16:48 +1000)
committerNeil Brown <neilb@suse.de>
Thu, 15 May 2008 06:48:19 +0000 (16:48 +1000)
Assemble.c
Create.c
Incremental.c
mdadm.h
super-ddf.c
util.c

index 16dec24..f10491b 100644 (file)
@@ -918,6 +918,10 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
                                        if (fd < 0)
                                                rv = 1;
                                        else {
+                                               devices[j].i.disk.number =
+                                                       devices[j].i.disk.raid_disk;
+                                               st->ss->getinfo_super_n(st,
+                                                              &devices[j].i);
                                                rv = sysfs_add_disk(sra, fd,
                                                              &devices[j].i);
                                                close(fd);
@@ -948,6 +952,21 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
                                        i, mddev);
                }
 
+               if (info.array.level == LEVEL_CONTAINER) {
+                       if (verbose >= 0) {
+                               fprintf(stderr, Name ": Container %s has been "
+                                       "assembled with %d drive%s",
+                                       mddev, okcnt, okcnt==1?"":"s");
+                               if (okcnt < info.array.raid_disks)
+                                       fprintf(stderr, " (out of %d)",
+                                               info.array.raid_disks);
+                               fprintf(stderr, "\n");
+                       }
+                       if (must_close)
+                               close(mdfd);
+                       return 0;
+               }
+
                if (runstop == 1 ||
                    (runstop <= 0 &&
                     ( enough(info.array.level, info.array.raid_disks,
@@ -970,7 +989,8 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
                                        /* There is a nasty race with 'mdadm --monitor'.
                                         * If it opens this device before we close it,
                                         * it gets an incomplete open on which IO
-                                        * doesn't work and the capacity if wrong.
+                                        * doesn't work and the capacity is
+                                        * wrong.
                                         * If we reopen (to check for layered devices)
                                         * before --monitor closes, we loose.
                                         *
index 50da1ad..0bc4738 100644 (file)
--- a/Create.c
+++ b/Create.c
@@ -637,6 +637,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
                                rv = 0;
 
                                if (st->ss->external) {
+                                       st->ss->getinfo_super_n(st, &info);
                                        rv = sysfs_add_disk(sra, fd, &info);
                                        close(fd);
                                } else {
@@ -666,12 +667,27 @@ int Create(struct supertype *st, char *mddev, int mdfd,
                /* No need to start */
                ;
        else if (runstop == 1 || subdevs >= raiddisks) {
-               mdu_param_t param;
-               if (ioctl(mdfd, RUN_ARRAY, &param)) {
-                       fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
-                               strerror(errno));
-                       Manage_runstop(mddev, mdfd, -1, 0);
-                       return 1;
+               if (st->ss->external) {
+                       switch(level) {
+                       case LEVEL_LINEAR:
+                       case LEVEL_MULTIPATH:
+                       case 0:
+                               sysfs_set_str(sra, NULL, "array_state",
+                                             "active");
+                               break;
+                       default:
+                               sysfs_set_str(sra, NULL, "array_state",
+                                             "readonly");
+                               break;
+                       }
+               } else {
+                       mdu_param_t param;
+                       if (ioctl(mdfd, RUN_ARRAY, &param)) {
+                               fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
+                                       strerror(errno));
+                               Manage_runstop(mddev, mdfd, -1, 0);
+                               return 1;
+                       }
                }
                if (verbose >= 0)
                        fprintf(stderr, Name ": array %s started.\n", mddev);
index 0fb9afd..2e23677 100644 (file)
@@ -40,7 +40,7 @@ int Incremental(char *devname, int verbose, int runstop,
                struct supertype *st, char *homehost, int autof)
 {
        /* Add this device to an array, creating the array if necessary
-        * and starting the array if sensibe or - if runstop>0 - if possible.
+        * and starting the array if sensible or - if runstop>0 - if possible.
         *
         * This has several steps:
         *
@@ -140,9 +140,17 @@ int Incremental(char *devname, int verbose, int runstop,
                close(dfd);
                return 1;
        }
-       st->ss->getinfo_super(st, &info);
        close (dfd);
 
+       if (st->ss->container_content) {
+               /* This is a pre-built container array, so we do something
+                * rather different.
+                */
+               return Incremental_container(st, devname, verbose, runstop,
+                                            autof);
+       }
+
+       st->ss->getinfo_super(st, &info);
        /* 3/ Check if there is a match in mdadm.conf */
 
        array_list = conf_get_ident(NULL);
@@ -708,3 +716,90 @@ int IncrementalScan(int verbose)
        }
        return rv;
 }
+
+int Incremental_container(struct supertype *st, char *devname, int verbose,
+                         int runstop, int autof)
+{
+       /* Collect the contents of this container and for each
+        * array, choose a device name and assemble the array.
+        */
+
+       struct mdinfo *list = st->ss->container_content(st);
+       struct mdinfo *ra;
+
+       for (ra = list ; ra ; ra = ra->next) {
+               struct mdinfo *sra;
+               struct mdinfo *dev;
+               int devnum = -1;
+               int mdfd;
+               char chosen_name[1024];
+               int usepart = 1;
+               char *n;
+               int working = 0;
+
+               if ((autof&7) == 3 || (autof&7) == 5)
+                       usepart = 0;
+
+               n = ra->name;
+               if (*n == 'd')
+                       n++;
+               if (*n) {
+                       devnum = strtoul(n, &n, 10);
+                       if (devnum >= 0 && (*n == 0 || *n == ' ')) {
+                               /* Use this devnum */
+                               usepart = (ra->name[0] == 'd');
+                               if (mddev_busy(usepart ? (-1-devnum) : devnum))
+                                       devnum = -1;
+                       } else
+                               devnum = -1;
+               }
+
+               if (devnum >= 0)
+                       devnum = usepart ? (-1-devnum) : devnum;
+               else
+                       devnum = find_free_devnum(usepart);
+               mdfd = open_mddev_devnum(NULL, devnum, ra->name,
+                                        chosen_name, autof>>3);
+
+               if (mdfd < 0) {
+                       fprintf(stderr, Name ": failed to open %s: %s.\n",
+                               chosen_name, strerror(errno));
+                       return 2;
+               }
+
+               sra = sysfs_read(mdfd, 0, 0);
+
+               sysfs_set_array(sra, ra);
+               for (dev = ra->devs; dev; dev = dev->next) {
+                       char buf[20];
+                       int dfd;
+                       sprintf(buf, "%d:%d", dev->disk.major, dev->disk.minor);
+                       dfd = dev_open(buf, O_RDONLY);
+                       if (sysfs_add_disk(sra, dfd, dev) == 0)
+                               working++;
+               }
+               if (runstop > 0 || working >= ra->array.working_disks) {
+                       switch(ra->array.level) {
+                       case LEVEL_LINEAR:
+                       case LEVEL_MULTIPATH:
+                       case 0:
+                               sysfs_set_str(sra, NULL, "array_state",
+                                             "active");
+                               break;
+                       default:
+                               sysfs_set_str(sra, NULL, "array_state",
+                                             "readonly");
+                               break;
+                       }
+                       if (verbose >= 0)
+                               printf("Started %s with %d devices\n",
+                                      chosen_name, working);
+               } else
+                       if (verbose >= 0)
+                               printf("%s assembled with %d devices but "
+                                      "not started\n",
+                                      chosen_name, working);
+               close(mdfd);
+       }
+       return 0;
+}
diff --git a/mdadm.h b/mdadm.h
index 93425b1..4c0e9e1 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -142,7 +142,9 @@ struct mdinfo {
        int                     uuid[4];
        char                    name[33];
        unsigned long long      data_offset;
-       unsigned long long      component_size;
+       unsigned long long      component_size; /* same as array.size, except in
+                                                * sectors and up to 64bits.
+                                                */
        int                     reshape_active;
        unsigned long long      reshape_progress;
        int                     new_level, delta_disks, new_layout, new_chunk;
@@ -318,7 +320,7 @@ extern int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
 extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
                        char *name, unsigned long long *val);
 extern int sysfs_set_array(struct mdinfo *sra,
-                          struct mdinfo *array);
+                          struct mdinfo *info);
 extern int sysfs_add_disk(struct mdinfo *sra, int fd, struct mdinfo *sd);
 
 
@@ -355,6 +357,7 @@ extern struct superswitch {
        void (*export_detail_super)(struct supertype *st);
        void (*uuid_from_super)(struct supertype *st, int uuid[4]);
        void (*getinfo_super)(struct supertype *st, struct mdinfo *info);
+       void (*getinfo_super_n)(struct supertype *st, struct mdinfo *info);
        int (*match_home)(struct supertype *st, char *homehost);
        int (*update_super)(struct supertype *st, struct mdinfo *info,
                            char *update,
@@ -381,6 +384,9 @@ extern struct superswitch {
                                 int raiddisks,
                                 int chunk, unsigned long long size,
                                 char *subdev, unsigned long long *freesize);
+
+       struct mdinfo *(*container_content)(struct supertype *st);
+
        int major;
        char *text_version;
        int swapuuid; /* true if uuid is bigending rather than hostendian */
@@ -490,6 +496,8 @@ extern int Wait(char *dev);
 
 extern int Incremental(char *devname, int verbose, int runstop,
                       struct supertype *st, char *homehost, int autof);
+extern int Incremental_container(struct supertype *st, char *devname,
+                                int verbose, int runstop, int autof);
 extern void RebuildMap(void);
 extern int IncrementalScan(int verbose);
 
index b0ed739..b494647 100644 (file)
@@ -288,8 +288,10 @@ struct vd_config {
        __u8    sec_elmnt_count;
        __u8    sec_elmnt_seq;
        __u8    srl;
-       __u64   blocks;
-       __u64   array_blocks;
+       __u64   blocks;         /* blocks per component could be different
+                                * on different component devices...(only
+                                * for concat I hope) */
+       __u64   array_blocks;   /* blocks in array */
        __u8    pad1[8];
        __u32   spare_refs[8];
        __u8    cache_pol[8];
@@ -609,6 +611,7 @@ static int load_ddf_local(int fd, struct ddf_super *super,
        char *conf;
        int i;
        int conflen;
+       int mppe;
 
        /* First the local disk info */
        super->max_part = __be16_to_cpu(super->active->max_partitions);
@@ -620,6 +623,7 @@ static int load_ddf_local(int fd, struct ddf_super *super,
                     super->active->data_section_length,
                     0);
        dl->devname = devname ? strdup(devname) : NULL;
+
        fstat(fd, &stb);
        dl->major = major(stb.st_rdev);
        dl->minor = minor(stb.st_rdev);
@@ -667,8 +671,9 @@ static int load_ddf_local(int fd, struct ddf_super *super,
                        super->conflist = vcl;
                }
                memcpy(&vcl->conf, vd, conflen*512);
+               mppe = __be16_to_cpu(super->anchor.max_primary_element_entries);
                vcl->lba_offset = (__u64*)
-                       &vcl->conf.phys_refnum[super->max_part+1];
+                       &vcl->conf.phys_refnum[mppe];
                dl->vlist[i/conflen] = vcl;
        }
        free(conf);
@@ -1044,7 +1049,9 @@ static void examine_super_ddf(struct supertype *st, char *homehost)
 
        printf("          Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
        printf("        Version : %.8s\n", sb->anchor.revision);
-       printf("Controller GUID : "); print_guid(sb->anchor.guid, 1);
+       printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
+       printf("\n");
+       printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
        printf("\n");
        printf("            Seq : %08x\n", __be32_to_cpu(sb->active->seq));
        printf("  Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
@@ -1185,6 +1192,18 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
 //     info->name[] ?? ;
 }
 
+static void getinfo_super_n_container(struct supertype *st, struct mdinfo *info)
+{
+       /* just need offset and size */
+       struct ddf_super *ddf = st->sb;
+       int n = info->disk.number;
+
+       info->data_offset = __be64_to_cpu(ddf->phys->entries[n].config_size);
+       info->component_size = 32*1024*1024 / 512;
+}
+
+static int rlq_to_layout(int rlq, int prl, int raiddisks);
+
 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
 {
        struct ddf_super *ddf = st->sb;
@@ -1197,7 +1216,8 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
        info->array.patch_version = 0;
        info->array.raid_disks    = __be16_to_cpu(vd->prim_elmnt_count);
        info->array.level         = map_num1(ddf_level_num, vd->prl);
-       info->array.layout        = vd->rlq; /* FIXME should this be mapped */
+       info->array.layout        = rlq_to_layout(vd->rlq, vd->prl,
+                                                 info->array.raid_disks);
        info->array.md_minor      = -1;
        info->array.ctime         = DECADE + __be32_to_cpu(*(__u32*)(vd->guid+16));
        info->array.utime         = DECADE + __be32_to_cpu(vd->timestamp);
@@ -1217,6 +1237,18 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
 //     info->name[] ?? ;
 }
 
+static void getinfo_super_n_bvd(struct supertype *st, struct mdinfo *info)
+{
+       /* Find the particular details for info->disk.raid_disk.
+        * This includes data_offset, component_size,
+        */
+       struct ddf_super *ddf = st->sb;
+       __u64 *lba_offset = ddf->newconf->lba_offset;
+       struct vd_config *conf = &ddf->newconf->conf;
+       info->data_offset = __be64_to_cpu(lba_offset[info->disk.raid_disk]);
+       info->component_size = __be64_to_cpu(conf->blocks);
+}
+
 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
                            char *update,
                            char *devname, int verbose,
@@ -1558,6 +1590,38 @@ static int layout_to_rlq(int level, int layout, int raiddisks)
        return -1;
 }
 
+static int rlq_to_layout(int rlq, int prl, int raiddisks)
+{
+       switch(prl) {
+       case DDF_RAID0:
+               return 0; /* hopefully rlq == DDF_RAID0_SIMPLE */
+       case DDF_RAID1:
+               return 0; /* hopefully rlq == SIMPLE or MULTI depending
+                            on raiddisks*/
+       case DDF_RAID4:
+               switch(rlq) {
+               case DDF_RAID4_N:
+                       return 0;
+               default:
+                       /* not supported */
+                       return -1; /* FIXME this isn't checked */
+               }
+       case DDF_RAID5:
+       case DDF_RAID6:
+               switch(rlq) {
+               case DDF_RAID5_N_RESTART:
+                       return ALGORITHM_LEFT_ASYMMETRIC;
+               case DDF_RAID5_0_RESTART:
+                       return ALGORITHM_RIGHT_ASYMMETRIC;
+               case DDF_RAID5_N_CONTINUE:
+                       return ALGORITHM_LEFT_SYMMETRIC;
+               default:
+                       return -1;
+               }
+       }
+       return -1;
+}
+
 static int init_super_ddf_bvd(struct supertype *st,
                              mdu_array_info_t *info,
                              unsigned long long size,
@@ -1616,7 +1680,8 @@ static int init_super_ddf_bvd(struct supertype *st,
        /* Now create a new vd_config */
        conflen =  __be16_to_cpu(ddf->active->config_record_len);
        vcl = malloc(offsetof(struct vcl, conf) + conflen * 512);
-       vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->max_part+1];
+       mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+       vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[mppe];
 
        vc = &vcl->conf;
 
@@ -1655,7 +1720,7 @@ static int init_super_ddf_bvd(struct supertype *st,
        memset(vc->v2, 0xff, 16);
        memset(vc->v3, 0xff, 16);
        memset(vc->vendor, 0xff, 32);
-       mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+
        memset(vc->phys_refnum, 0xff, 4*mppe);
        memset(vc->phys_refnum+mppe, 0x00, 8*mppe);
 
@@ -2223,7 +2288,7 @@ static int load_super_ddf_all(struct supertype *st, int fd,
        }
        *sbp = super;
        if (st->ss == NULL) {
-               st->ss = &super_ddf;
+               st->ss = &super_ddf_container;
                st->minor_version = 0;
                st->max_devs = 512;
        }
@@ -2233,6 +2298,97 @@ static int load_super_ddf_all(struct supertype *st, int fd,
 
 
 
+static struct mdinfo *container_content_ddf(struct supertype *st)
+{
+       /* Given a container loaded by load_super_ddf_all,
+        * extract information about all the arrays into
+        * an mdinfo tree.
+        *
+        * For each vcl in conflist: create an mdinfo, fill it in,
+        *  then look for matching devices (phys_refnum) in dlist
+        *  and create appropriate device mdinfo.
+        */
+       struct ddf_super *ddf = st->sb;
+       struct mdinfo *rest = NULL;
+       struct vcl *vc;
+
+       for (vc = ddf->conflist ; vc ; vc=vc->next)
+       {
+               int mppe;
+               int i;
+               struct mdinfo *this;
+               this = malloc(sizeof(*this));
+               memset(this, 0, sizeof(*this));
+               this->next = rest;
+               rest = this;
+
+               this->array.major_version = 1000;
+               this->array.minor_version = 0;
+               this->array.patch_version = 0;
+               this->array.level = map_num1(ddf_level_num, vc->conf.prl);
+               this->array.raid_disks =
+                       __be16_to_cpu(vc->conf.prim_elmnt_count);
+               /* FIXME this should be mapped */
+               this->array.layout = vc->conf.rlq;
+               this->array.md_minor      = -1;
+               this->array.ctime         = DECADE +
+                       __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+               this->array.utime         = DECADE +
+                       __be32_to_cpu(vc->conf.timestamp);
+               this->array.chunk_size    = 512 << vc->conf.chunk_shift;
+
+               for (i=0; i < __be16_to_cpu(ddf->virt->populated_vdes); i++)
+                       if (memcmp(ddf->virt->entries[i].guid,
+                                  vc->conf.guid, DDF_GUID_LEN) == 0)
+                               break;
+               if (ddf->virt->entries[i].state & DDF_state_inconsistent)
+                       this->array.state = 0;
+               else
+                       this->array.state = 1;
+               memcpy(this->name, ddf->virt->entries[i].name, 32);
+               this->name[33]=0;
+
+               memset(this->uuid, 0, sizeof(this->uuid));
+               this->component_size = __be64_to_cpu(vc->conf.blocks);
+               this->array.size = this->component_size / 2;
+
+               mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+               for (i=0 ; i < mppe ; i++) {
+                       struct mdinfo *dev;
+                       struct dl *d;
+
+                       if (vc->conf.phys_refnum[i] == 0xFFFFFFFF)
+                               continue;
+
+                       this->array.working_disks++;
+
+                       for (d = ddf->dlist; d ; d=d->next)
+                               if (d->disk.refnum == vc->conf.phys_refnum[i])
+                                       break;
+                       if (d == NULL)
+                               break;
+
+                       dev = malloc(sizeof(*dev));
+                       memset(dev, 0, sizeof(*dev));
+                       dev->next = this->devs;
+                       this->devs = dev;
+
+                       dev->disk.number = __be32_to_cpu(d->disk.refnum);
+                       dev->disk.major = d->major;
+                       dev->disk.minor = d->minor;
+                       dev->disk.raid_disk = i;
+                       dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+
+                       dev->events = __le32_to_cpu(ddf->primary.seq);
+                       dev->data_offset = vc->lba_offset[i];
+                       dev->component_size = __be64_to_cpu(vc->conf.blocks);
+                       if (d->devname)
+                               strcpy(dev->name, d->devname);
+               }
+       }
+       return rest;
+}
+
 static int init_zero_ddf(struct supertype *st,
                         mdu_array_info_t *info,
                         unsigned long long size, char *name,
@@ -2304,6 +2460,7 @@ struct superswitch super_ddf = {
        .store_super    = store_zero_ddf,
        .free_super     = free_super_ddf,
        .match_metadata_desc = match_metadata_desc_ddf,
+       .getinfo_super_n  = getinfo_super_n_container,
 
 
        .major          = 1000,
@@ -2326,6 +2483,8 @@ struct superswitch super_ddf_container = {
 
        .free_super     = free_super_ddf,
 
+       .container_content = container_content_ddf,
+
        .major          = 1000,
        .swapuuid       = 0,
        .external       = 1,
@@ -2343,6 +2502,7 @@ struct superswitch super_ddf_bvd = {
        .init_super     = init_super_ddf_bvd,
        .add_to_super   = add_to_super_ddf_bvd,
        .getinfo_super  = getinfo_super_ddf_bvd,
+       .getinfo_super_n  = getinfo_super_n_bvd,
 
        .load_super     = load_super_ddf,
        .free_super     = free_super_ddf,
diff --git a/util.c b/util.c
index 3838b6e..7b43ee2 100644 (file)
--- a/util.c
+++ b/util.c
@@ -390,6 +390,9 @@ int is_standard(char *dev, int *nump)
        /* tests if dev is a "standard" md dev name.
         * i.e if the last component is "/dNN" or "/mdNN",
         * where NN is a string of digits
+        * Returns 1 if a partitionable standard,
+        *   -1 if non-partitonable,
+        *   0 if not a standard name.
         */
        char *d = strrchr(dev, '/');
        int type=0;