]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super-ddf.c
Create arrays via metadata-update
[thirdparty/mdadm.git] / super-ddf.c
index 91c53dae143bfa44fcbaebaf458c8dce091ae750..aefe25ef25cb22f07e951fc2b7dbb37712894ffb 100644 (file)
@@ -71,7 +71,7 @@ unsigned long crc32(
 #define        DDF_CONCAT      0x1f
 #define        DDF_RAID5E      0x15
 #define        DDF_RAID5EE     0x25
-#define        DDF_RAID6       0x16    /* Vendor unique layout */
+#define        DDF_RAID6       0x06
 
 /* Raid Level Qualifier (RLQ) */
 #define        DDF_RAID0_SIMPLE        0x00
@@ -83,6 +83,7 @@ unsigned long crc32(
 #define        DDF_RAID4_N             0x01 /* parity in last extent */
 /* these apply to raid5e and raid5ee as well */
 #define        DDF_RAID5_0_RESTART     0x00 /* same as 'right asymmetric' - layout 1 */
+#define        DDF_RAID6_0_RESTART     0x01 /* raid6 different from raid5 here!!! */
 #define        DDF_RAID5_N_RESTART     0x02 /* same as 'left asymmetric' - layout 0 */
 #define        DDF_RAID5_N_CONTINUE    0x03 /* same as 'left symmetric' - layout 2 */
 
@@ -108,13 +109,14 @@ unsigned long crc32(
 #define        DDF_BBM_LOG_MAGIC       __cpu_to_be32(0xABADB10C)
 
 #define        DDF_GUID_LEN    24
-#define DDF_REVISION   "01.00.00"
+#define DDF_REVISION_0 "01.00.00"
+#define DDF_REVISION_2 "01.02.00"
 
 struct ddf_header {
-       __u32   magic;
+       __u32   magic;          /* DDF_HEADER_MAGIC */
        __u32   crc;
        char    guid[DDF_GUID_LEN];
-       char    revision[8];    /* 01.00.00 */
+       char    revision[8];    /* 01.02.00 */
        __u32   seq;            /* starts at '1' */
        __u32   timestamp;
        __u8    openflag;
@@ -167,7 +169,7 @@ struct ddf_header {
 
 /* The content of the 'controller section' - global scope */
 struct ddf_controller_data {
-       __u32   magic;
+       __u32   magic;                  /* DDF_CONTROLLER_MAGIC */
        __u32   crc;
        char    guid[DDF_GUID_LEN];
        struct controller_type {
@@ -183,7 +185,7 @@ struct ddf_controller_data {
 
 /* The content of phys_section - global scope */
 struct phys_disk {
-       __u32   magic;
+       __u32   magic;          /* DDF_PHYS_RECORDS_MAGIC */
        __u32   crc;
        __u16   used_pdes;
        __u16   max_pdes;
@@ -202,7 +204,7 @@ struct phys_disk {
 /* phys_disk_entry.type is a bitmap - bigendian remember */
 #define        DDF_Forced_PD_GUID              1
 #define        DDF_Active_in_VD                2
-#define        DDF_Global_Spare                4
+#define        DDF_Global_Spare                4 /* VD_CONF records are ignored */
 #define        DDF_Spare                       8 /* overrides Global_spare */
 #define        DDF_Foreign                     16
 #define        DDF_Legacy                      32 /* no DDF on this device */
@@ -224,7 +226,7 @@ struct phys_disk {
 
 /* The content of the virt_section global scope */
 struct virtual_disk {
-       __u32   magic;
+       __u32   magic;          /* DDF_VIRT_RECORDS_MAGIC */
        __u32   crc;
        __u16   populated_vdes;
        __u16   max_vdes;
@@ -255,6 +257,7 @@ struct virtual_disk {
 #define        DDF_state_deleted       0x2
 #define        DDF_state_missing       0x3
 #define        DDF_state_failed        0x4
+#define        DDF_state_part_optimal  0x5
 
 #define        DDF_state_morphing      0x8
 #define        DDF_state_inconsistent  0x10
@@ -262,7 +265,8 @@ struct virtual_disk {
 /* virtual_entry.init_state is a bigendian bitmap */
 #define        DDF_initstate_mask      0x03
 #define        DDF_init_not            0x00
-#define        DDF_init_quick          0x01
+#define        DDF_init_quick          0x01 /* initialisation is progress.
+                                     * i.e. 'state_inconsistent' */
 #define        DDF_init_full           0x02
 
 #define        DDF_access_mask         0xc0
@@ -276,7 +280,7 @@ struct virtual_disk {
  */
 
 struct vd_config {
-       __u32   magic;
+       __u32   magic;          /* DDF_VD_CONF_MAGIC */
        __u32   crc;
        char    guid[DDF_GUID_LEN];
        __u32   timestamp;
@@ -320,7 +324,7 @@ struct vd_config {
 #define        DDF_cache_rallowed      64      /* enable read caching */
 
 struct spare_assign {
-       __u32   magic;
+       __u32   magic;          /* DDF_SPARE_ASSIGN_MAGIC */
        __u32   crc;
        __u32   timestamp;
        __u8    reserved[7];
@@ -342,7 +346,7 @@ struct spare_assign {
 
 /* The data_section contents - local scope */
 struct disk_data {
-       __u32   magic;
+       __u32   magic;          /* DDF_PHYS_DATA_MAGIC */
        __u32   crc;
        char    guid[DDF_GUID_LEN];
        __u32   refnum;         /* crc of some magic drive data ... */
@@ -396,20 +400,26 @@ struct ddf_super {
        struct phys_disk        *phys;
        struct virtual_disk     *virt;
        int pdsize, vdsize;
-       int max_part;
+       int max_part, mppe, conf_rec_len;
+       int currentdev;
        struct vcl {
                struct vcl      *next;
                __u64           *lba_offset; /* location in 'conf' of
                                              * the lba table */
+               int     vcnum; /* index into ->virt */
+               __u64           *block_sizes; /* NULL if all the same */
                struct vd_config conf;
-       } *conflist, *newconf;
+       } *conflist, *currentconf;
        struct dl {
                struct dl       *next;
                struct disk_data disk;
                int major, minor;
                char *devname;
                int fd;
-               struct vcl *vlist[0]; /* max_part+1 in size */
+               unsigned long long size; /* sectors */
+               int pdnum;      /* index in ->phys */
+               struct spare_assign *spare;
+               struct vcl *vlist[0]; /* max_part in size */
        } *dlist;
 };
 
@@ -417,7 +427,6 @@ struct ddf_super {
 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
 #endif
 
-extern struct superswitch super_ddf_container, super_ddf_bvd, super_ddf;
 
 static int calc_crc(void *buf, int len)
 {
@@ -542,11 +551,12 @@ static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
                                devname);
                return 2;
        }
-       if (memcmp(super->anchor.revision, DDF_REVISION, 8) != 0) {
+       if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
+           memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
                if (devname)
                        fprintf(stderr, Name ": can only support super revision"
-                               " %.8s, not %.8s on %s\n",
-                               DDF_REVISION, super->anchor.revision, devname);
+                               " %.8s and earlier, not %.8s on %s\n",
+                               DDF_REVISION_2, super->anchor.revision,devname);
                return 2;
        }
        if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
@@ -603,6 +613,10 @@ static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
        }
        super->conflist = NULL;
        super->dlist = NULL;
+
+       super->max_part = __be16_to_cpu(super->active->max_partitions);
+       super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
+       super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
        return 0;
 }
 
@@ -613,13 +627,13 @@ static int load_ddf_local(int fd, struct ddf_super *super,
        struct stat stb;
        char *conf;
        int i;
-       int conflen;
-       int mppe;
+       int vnum;
+       int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
+       unsigned long long dsize;
 
        /* First the local disk info */
-       super->max_part = __be16_to_cpu(super->active->max_partitions);
        dl = malloc(sizeof(*dl) +
-                   (super->max_part+1) * sizeof(dl->vlist[0]));
+                   (super->max_part) * sizeof(dl->vlist[0]));
 
        load_section(fd, super, &dl->disk,
                     super->active->data_section_offset,
@@ -632,29 +646,46 @@ static int load_ddf_local(int fd, struct ddf_super *super,
        dl->minor = minor(stb.st_rdev);
        dl->next = super->dlist;
        dl->fd = keep ? fd : -1;
-       for (i=0 ; i < super->max_part + 1 ; i++)
+
+       dl->size = 0;
+       if (get_dev_size(fd, devname, &dsize))
+               dl->size = dsize >> 9;
+       dl->spare = NULL;
+       for (i=0 ; i < super->max_part ; i++)
                dl->vlist[i] = NULL;
        super->dlist = dl;
+       dl->pdnum = -1;
+       for (i=0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
+               if (memcmp(super->phys->entries[i].guid,
+                          dl->disk.guid, DDF_GUID_LEN) == 0)
+                       dl->pdnum = i;
 
        /* Now the config list. */
        /* 'conf' is an array of config entries, some of which are
         * probably invalid.  Those which are good need to be copied into
         * the conflist
         */
-       conflen =  __be16_to_cpu(super->active->config_record_len);
 
        conf = load_section(fd, super, NULL,
                            super->active->config_section_offset,
                            super->active->config_section_length,
                            0);
 
+       vnum = 0;
        for (i = 0;
             i < __be32_to_cpu(super->active->config_section_length);
-            i += conflen) {
+            i += super->conf_rec_len) {
                struct vd_config *vd =
                        (struct vd_config *)((char*)conf + i*512);
                struct vcl *vcl;
 
+               if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
+                       if (dl->spare)
+                               continue;
+                       dl->spare = malloc(super->conf_rec_len*512);
+                       memcpy(dl->spare, vd, super->conf_rec_len*512);
+                       continue;
+               }
                if (vd->magic != DDF_VD_CONF_MAGIC)
                        continue;
                for (vcl = super->conflist; vcl; vcl = vcl->next) {
@@ -664,20 +695,28 @@ static int load_ddf_local(int fd, struct ddf_super *super,
                }
 
                if (vcl) {
-                       dl->vlist[i/conflen] = vcl;
+                       dl->vlist[vnum++] = vcl;
                        if (__be32_to_cpu(vd->seqnum) <=
                            __be32_to_cpu(vcl->conf.seqnum))
                                continue;
-               } else {
-                       vcl = malloc(conflen*512 + offsetof(struct vcl, conf));
+               } else {
+                       vcl = malloc(super->conf_rec_len*512 +
+                                    offsetof(struct vcl, conf));
                        vcl->next = super->conflist;
+                       vcl->block_sizes = NULL; /* FIXME not for CONCAT */
                        super->conflist = vcl;
+                       dl->vlist[vnum++] = vcl;
                }
-               memcpy(&vcl->conf, vd, conflen*512);
-               mppe = __be16_to_cpu(super->anchor.max_primary_element_entries);
+               memcpy(&vcl->conf, vd, super->conf_rec_len*512);
                vcl->lba_offset = (__u64*)
-                       &vcl->conf.phys_refnum[mppe];
-               dl->vlist[i/conflen] = vcl;
+                       &vcl->conf.phys_refnum[super->mppe];
+
+               for (i=0; i < max_virt_disks ; i++)
+                       if (memcmp(super->virt->entries[i].guid,
+                                  vcl->conf.guid, DDF_GUID_LEN)==0)
+                               break;
+               if (i < max_virt_disks)
+                       vcl->vcnum = i;
        }
        free(conf);
 
@@ -696,9 +735,12 @@ static int load_super_ddf(struct supertype *st, int fd,
        int rv;
 
 #ifndef MDASSEMBLE
+       /* if 'fd' is a container, load metadata from all the devices */
        if (load_super_ddf_all(st, fd, &st->sb, devname, 1) == 0)
                return 0;
 #endif
+       if (st->subarray[0])
+               return 1; /* FIXME Is this correct */
 
        if (get_dev_size(fd, devname, &dsize) == 0)
                return 1;
@@ -774,6 +816,8 @@ static void free_super_ddf(struct supertype *st)
        while (ddf->conflist) {
                struct vcl *v = ddf->conflist;
                ddf->conflist = v->next;
+               if (v->block_sizes)
+                       free(v->block_sizes);
                free(v);
        }
        while (ddf->dlist) {
@@ -781,6 +825,8 @@ static void free_super_ddf(struct supertype *st)
                ddf->dlist = d->next;
                if (d->fd >= 0)
                        close(d->fd);
+               if (d->spare)
+                       free(d->spare);
                free(d);
        }
        free(ddf);
@@ -797,6 +843,7 @@ static struct supertype *match_metadata_desc_ddf(char *arg)
                return NULL;
 
        st = malloc(sizeof(*st));
+       memset(st, 0, sizeof(*st));
        st->ss = &super_ddf;
        st->max_devs = 512;
        st->minor_version = 0;
@@ -804,38 +851,6 @@ static struct supertype *match_metadata_desc_ddf(char *arg)
        return st;
 }
 
-static struct supertype *match_metadata_desc_ddf_bvd(char *arg)
-{
-       struct supertype *st;
-       if (strcmp(arg, "ddf/bvd") != 0 &&
-           strcmp(arg, "bvd") != 0 &&
-           strcmp(arg, "default") != 0
-               )
-               return NULL;
-
-       st = malloc(sizeof(*st));
-       st->ss = &super_ddf_bvd;
-       st->max_devs = 512;
-       st->minor_version = 0;
-       st->sb = NULL;
-       return st;
-}
-static struct supertype *match_metadata_desc_ddf_svd(char *arg)
-{
-       struct supertype *st;
-       if (strcmp(arg, "ddf/svd") != 0 &&
-           strcmp(arg, "svd") != 0 &&
-           strcmp(arg, "default") != 0
-               )
-               return NULL;
-
-       st = malloc(sizeof(*st));
-       st->ss = &super_ddf_svd;
-       st->max_devs = 512;
-       st->minor_version = 0;
-       st->sb = NULL;
-       return st;
-}
 
 #ifndef MDASSEMBLE
 
@@ -896,7 +911,8 @@ static struct num_mapping ddf_level_num[] = {
        { DDF_RAID0, 0 },
        { DDF_RAID1, 1 },
        { DDF_RAID3, LEVEL_UNSUPPORTED },
-       { DDF_RAID5, 4 },
+       { DDF_RAID4, 4 },
+       { DDF_RAID5, 5 },
        { DDF_RAID1E, LEVEL_UNSUPPORTED },
        { DDF_JBOD, LEVEL_UNSUPPORTED },
        { DDF_CONCAT, LEVEL_LINEAR },
@@ -920,34 +936,41 @@ static void print_guid(char *guid, int tstamp)
 {
        /* A GUIDs are part (or all) ASCII and part binary.
         * They tend to be space padded.
-        * We ignore trailing spaces and print numbers
-        * <0x20 and >=0x7f as \xXX
-        * Some GUIDs have a time stamp in bytes 16-19.
-        * We print that if appropriate
+        * We print the GUID in HEX, then in parentheses add
+        * any initial ASCII sequence, and a possible
+        * time stamp from bytes 16-19
         */
        int l = DDF_GUID_LEN;
        int i;
+
+       for (i=0 ; i<DDF_GUID_LEN ; i++) {
+               if ((i&3)==0 && i != 0) printf(":");
+               printf("%02X", guid[i]&255);
+       }
+
+       printf(" (");
        while (l && guid[l-1] == ' ')
                l--;
        for (i=0 ; i<l ; i++) {
                if (guid[i] >= 0x20 && guid[i] < 0x7f)
                        fputc(guid[i], stdout);
                else
-                       fprintf(stdout, "\\x%02x", guid[i]&255);
+                       break;
        }
        if (tstamp) {
                time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
                char tbuf[100];
                struct tm *tm;
                tm = localtime(&then);
-               strftime(tbuf, 100, " (%D %T)",tm);
+               strftime(tbuf, 100, " %D %T",tm);
                fputs(tbuf, stdout);
        }
+       printf(")");
 }
 
 static void examine_vd(int n, struct ddf_super *sb, char *guid)
 {
-       int crl = __be16_to_cpu(sb->anchor.config_record_len);
+       int crl = sb->conf_rec_len;
        struct vcl *vcl;
 
        for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
@@ -1071,11 +1094,11 @@ static void brief_examine_super_ddf(struct supertype *st)
         */
        struct ddf_super *ddf = st->sb;
        int i;
-       printf("ARRAY /dev/ddf UUID=");
+       printf("ARRAY /dev/ddf metadata=ddf UUID=");
        for (i = 0; i < DDF_GUID_LEN; i++) {
-               printf("%02x", ddf->anchor.guid[i]);
                if ((i&3) == 0 && i != 0)
                        printf(":");
+               printf("%02X", 255&ddf->anchor.guid[i]);
        }
        printf("\n");
 }
@@ -1097,8 +1120,6 @@ static void brief_detail_super_ddf(struct supertype *st)
         */
 //     struct ddf_super *ddf = st->sb;
 }
-
-
 #endif
 
 static int match_home_ddf(struct supertype *st, char *homehost)
@@ -1116,10 +1137,26 @@ static int match_home_ddf(struct supertype *st, char *homehost)
                ddf->controller.vendor_data[len] == 0);
 }
 
-static struct vd_config *find_vdcr(struct ddf_super *ddf)
+static struct vd_config *find_vdcr(struct ddf_super *ddf, int inst)
 {
-       /* FIXME this just picks off the first one */
-       return &ddf->conflist->conf;
+       struct vcl *v;
+
+       for (v = ddf->conflist; v; v = v->next)
+               if (inst == v->vcnum)
+                       return &v->conf;
+       return NULL;
+}
+
+static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
+{
+       /* Find the entry in phys_disk which has the given refnum
+        * and return it's index
+        */
+       int i;
+       for (i=0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
+               if (ddf->phys->entries[i].refnum == phys_refnum)
+                       return i;
+       return -1;
 }
 
 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
@@ -1141,30 +1178,33 @@ static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
         * The first 16 bytes of the sha1 of these is used.
         */
        struct ddf_super *ddf = st->sb;
-       struct vd_config *vd = find_vdcr(ddf);
+       struct vcl *vcl = ddf->currentconf;
 
-       if (!vd)
+       if (!vcl)
                memset(uuid, 0, sizeof (uuid));
        else {
                char buf[20];
                struct sha1_ctx ctx;
                sha1_init_ctx(&ctx);
-               sha1_process_bytes(&vd->guid, DDF_GUID_LEN, &ctx);
-               if (vd->sec_elmnt_count > 1)
-                       sha1_process_bytes(&vd->sec_elmnt_seq, 1, &ctx);
+               sha1_process_bytes(&vcl->conf.guid, DDF_GUID_LEN, &ctx);
+               if (vcl->conf.sec_elmnt_count > 1)
+                       sha1_process_bytes(&vcl->conf.sec_elmnt_seq, 1, &ctx);
                sha1_finish_ctx(&ctx, buf);
                memcpy(uuid, buf, sizeof(uuid));
        }
 }
 
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info);
+
 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
 {
        struct ddf_super *ddf = st->sb;
-       int i;
 
-       info->array.major_version = 1000;
-       info->array.minor_version = 0; /* FIXME use ddf->revision somehow */
-       info->array.patch_version = 0;
+       if (ddf->currentconf) {
+               getinfo_super_ddf_bvd(st, info);
+               return;
+       }
+
        info->array.raid_disks    = __be16_to_cpu(ddf->phys->used_pdes);
        info->array.level         = LEVEL_CONTAINER;
        info->array.layout        = 0;
@@ -1174,61 +1214,60 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
        info->array.utime         = 0;
        info->array.chunk_size    = 0;
 
-//     info->data_offset         = ???;
-//     info->component_size      = ???;
 
        info->disk.major = 0;
        info->disk.minor = 0;
-       info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
-//     info->disk.raid_disk = find refnum in the table and use index;
-       info->disk.raid_disk = -1;
-       for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes) ; i++)
-               if (ddf->phys->entries[i].refnum == ddf->dlist->disk.refnum) {
-                       info->disk.raid_disk = i;
-                       break;
-               }
+       if (ddf->dlist) {
+               info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
+               info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
+
+               info->data_offset = __be64_to_cpu(ddf->phys->
+                                         entries[info->disk.raid_disk].
+                                         config_size);
+               info->component_size = ddf->dlist->size - info->data_offset;
+       } else {
+               info->disk.number = -1;
+//             info->disk.raid_disk = find refnum in the table and use index;
+       }
        info->disk.state = (1 << MD_DISK_SYNC);
 
+
        info->reshape_active = 0;
 
+       strcpy(info->text_version, "ddf");
+
 //     uuid_from_super_ddf(info->uuid, sbv);
 
 //     info->name[] ?? ;
 }
 
-static void getinfo_super_n_container(struct supertype *st, struct mdinfo *info)
-{
-       /* just need offset and size */
-       struct ddf_super *ddf = st->sb;
-       int n = info->disk.number;
-
-       info->data_offset = __be64_to_cpu(ddf->phys->entries[n].config_size);
-       info->component_size = 32*1024*1024 / 512;
-}
-
 static int rlq_to_layout(int rlq, int prl, int raiddisks);
 
 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
 {
        struct ddf_super *ddf = st->sb;
-       struct vd_config *vd = find_vdcr(ddf);
+       struct vcl *vc = ddf->currentconf;
+       int cd = ddf->currentdev;
 
        /* FIXME this returns BVD info - what if we want SVD ?? */
 
-       info->array.major_version = 1000;
-       info->array.minor_version = 0; /* FIXME use ddf->revision somehow */
-       info->array.patch_version = 0;
-       info->array.raid_disks    = __be16_to_cpu(vd->prim_elmnt_count);
-       info->array.level         = map_num1(ddf_level_num, vd->prl);
-       info->array.layout        = rlq_to_layout(vd->rlq, vd->prl,
+       info->array.raid_disks    = __be16_to_cpu(vc->conf.prim_elmnt_count);
+       info->array.level         = map_num1(ddf_level_num, vc->conf.prl);
+       info->array.layout        = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
                                                  info->array.raid_disks);
        info->array.md_minor      = -1;
-       info->array.ctime         = DECADE + __be32_to_cpu(*(__u32*)(vd->guid+16));
-       info->array.utime         = DECADE + __be32_to_cpu(vd->timestamp);
-       info->array.chunk_size    = 512 << vd->chunk_shift;
-
-//     info->data_offset         = ???;
-//     info->component_size      = ???;
+       info->array.ctime         = DECADE +
+               __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+       info->array.utime         = DECADE + __be32_to_cpu(vc->conf.timestamp);
+       info->array.chunk_size    = 512 << vc->conf.chunk_shift;
+
+       if (cd >= 0 && cd < ddf->mppe) {
+               info->data_offset         = __be64_to_cpu(vc->lba_offset[cd]);
+               if (vc->block_sizes)
+                       info->component_size = vc->block_sizes[cd];
+               else
+                       info->component_size = __be64_to_cpu(vc->conf.blocks);
+       }
 
        info->disk.major = 0;
        info->disk.minor = 0;
@@ -1236,22 +1275,23 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
 //     info->disk.raid_disk = find refnum in the table and use index;
 //     info->disk.state = ???;
 
+       info->resync_start = 0;
+       if (!(ddf->virt->entries[info->container_member].state
+             & DDF_state_inconsistent)  &&
+           (ddf->virt->entries[info->container_member].init_state
+            & DDF_initstate_mask)
+           == DDF_init_full)
+               info->resync_start = ~0ULL;
+
        uuid_from_super_ddf(st, info->uuid);
 
+       sprintf(info->text_version, "/%s/%d",
+               devnum2devname(st->container_dev),
+               info->container_member);
+
 //     info->name[] ?? ;
 }
 
-static void getinfo_super_n_bvd(struct supertype *st, struct mdinfo *info)
-{
-       /* Find the particular details for info->disk.raid_disk.
-        * This includes data_offset, component_size,
-        */
-       struct ddf_super *ddf = st->sb;
-       __u64 *lba_offset = ddf->newconf->lba_offset;
-       struct vd_config *conf = &ddf->newconf->conf;
-       info->data_offset = __be64_to_cpu(lba_offset[info->disk.raid_disk]);
-       info->component_size = __be64_to_cpu(conf->blocks);
-}
 
 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
                            char *update,
@@ -1272,7 +1312,7 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info,
         *  grow:  Array has gained a new device - this is currently for
         *              linear only
         *  resync: mark as dirty so a resync will happen.
-        *  uuid:  Change the uuid of the array to match watch is given
+        *  uuid:  Change the uuid of the array to match what is given
         *  homehost:  update the recorded homehost
         *  name:  update the name - preserving the homehost
         *  _reshape_progress: record new reshape_progress position.
@@ -1284,10 +1324,9 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info,
         */
        int rv = 0;
 //     struct ddf_super *ddf = st->sb;
-//     struct vd_config *vd = find_vdcr(ddf);
+//     struct vd_config *vd = find_vdcr(ddf, info->container_member);
 //     struct virtual_entry *ve = find_ve(ddf);
 
-
        /* we don't need to handle "force-*" or "assemble" as
         * there is no need to 'trick' the kernel.  We the metadata is
         * first updated to activate the array, all the implied modifications
@@ -1348,6 +1387,13 @@ static void make_header_guid(char *guid)
        memcpy(guid+20, &stamp, 4);
        if (rfd >= 0) close(rfd);
 }
+
+static int init_super_ddf_bvd(struct supertype *st,
+                             mdu_array_info_t *info,
+                             unsigned long long size,
+                             char *name, char *homehost,
+                             int *uuid);
+
 static int init_super_ddf(struct supertype *st,
                          mdu_array_info_t *info,
                          unsigned long long size, char *name, char *homehost,
@@ -1387,7 +1433,16 @@ static int init_super_ddf(struct supertype *st,
        struct phys_disk *pd;
        struct virtual_disk *vd;
 
+       if (!info) {
+               st->sb = NULL;
+               return 0;
+       }
+       if (st->sb)
+               return init_super_ddf_bvd(st, info, size, name, homehost,
+                                         uuid);
+
        ddf = malloc(sizeof(*ddf));
+       memset(ddf, 0, sizeof(*ddf));
        ddf->dlist = NULL; /* no physical disks yet */
        ddf->conflist = NULL; /* No virtual disks yet */
 
@@ -1402,7 +1457,7 @@ static int init_super_ddf(struct supertype *st,
        ddf->anchor.magic = DDF_HEADER_MAGIC;
        make_header_guid(ddf->anchor.guid);
 
-       memcpy(ddf->anchor.revision, DDF_REVISION, 8);
+       memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
        ddf->anchor.seq = __cpu_to_be32(1);
        ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
        ddf->anchor.openflag = 0xFF;
@@ -1424,10 +1479,11 @@ static int init_super_ddf(struct supertype *st,
        ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
        ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
        ddf->max_part = 64;
-       ddf->anchor.config_record_len = __cpu_to_be16(1 + 256*12/512);
-       ddf->anchor.max_primary_element_entries = __cpu_to_be16(256);
+       ddf->mppe = 256;
+       ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
+       ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
+       ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
        memset(ddf->anchor.pad3, 0xff, 54);
-
        /* controller sections is one sector long immediately
         * after the ddf header */
        sector = 1;
@@ -1461,7 +1517,7 @@ static int init_super_ddf(struct supertype *st,
                __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
        sector += vdsize/512;
 
-       clen = (1 + 256*12/512) * (64+1);
+       clen = ddf->conf_rec_len * (ddf->max_part+1);
        ddf->anchor.config_section_offset = __cpu_to_be32(sector);
        ddf->anchor.config_section_length = __cpu_to_be32(clen);
        sector += clen;
@@ -1497,8 +1553,8 @@ static int init_super_ddf(struct supertype *st,
         * Remaining 16 are serial number.... maybe a hostname would do?
         */
        memcpy(ddf->controller.guid, T10, sizeof(T10));
-       gethostname(hostname, 17);
-       hostname[17] = 0;
+       gethostname(hostname, sizeof(hostname));
+       hostname[sizeof(hostname) - 1] = 0;
        hostlen = strlen(hostname);
        memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
        for (i = strlen(T10) ; i+hostlen < 24; i++)
@@ -1584,7 +1640,10 @@ static int layout_to_rlq(int level, int layout, int raiddisks)
                case ALGORITHM_LEFT_ASYMMETRIC:
                        return DDF_RAID5_N_RESTART;
                case ALGORITHM_RIGHT_ASYMMETRIC:
-                       return DDF_RAID5_0_RESTART;
+                       if (level == 5)
+                               return DDF_RAID5_0_RESTART;
+                       else
+                               return DDF_RAID6_0_RESTART;
                case ALGORITHM_LEFT_SYMMETRIC:
                        return DDF_RAID5_N_CONTINUE;
                case ALGORITHM_RIGHT_SYMMETRIC:
@@ -1611,7 +1670,6 @@ static int rlq_to_layout(int rlq, int prl, int raiddisks)
                        return -1; /* FIXME this isn't checked */
                }
        case DDF_RAID5:
-       case DDF_RAID6:
                switch(rlq) {
                case DDF_RAID5_N_RESTART:
                        return ALGORITHM_LEFT_ASYMMETRIC;
@@ -1622,10 +1680,72 @@ static int rlq_to_layout(int rlq, int prl, int raiddisks)
                default:
                        return -1;
                }
+       case DDF_RAID6:
+               switch(rlq) {
+               case DDF_RAID5_N_RESTART:
+                       return ALGORITHM_LEFT_ASYMMETRIC;
+               case DDF_RAID6_0_RESTART:
+                       return ALGORITHM_RIGHT_ASYMMETRIC;
+               case DDF_RAID5_N_CONTINUE:
+                       return ALGORITHM_LEFT_SYMMETRIC;
+               default:
+                       return -1;
+               }
        }
        return -1;
 }
 
+struct extent {
+       unsigned long long start, size;
+};
+static int cmp_extent(const void *av, const void *bv)
+{
+       const struct extent *a = av;
+       const struct extent *b = bv;
+       if (a->start < b->start)
+               return -1;
+       if (a->start > b->start)
+               return 1;
+       return 0;
+}
+
+static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
+{
+       /* find a list of used extents on the give physical device
+        * (dnum) of the given ddf.
+        * Return a malloced array of 'struct extent'
+
+FIXME ignore DDF_Legacy devices?
+
+        */
+       struct extent *rv;
+       int n = 0;
+       int i, j;
+
+       rv = malloc(sizeof(struct extent) * (ddf->max_part + 2));
+       if (!rv)
+               return NULL;
+
+       for (i = 0; i < ddf->max_part; i++) {
+               struct vcl *v = dl->vlist[i];
+               if (v == NULL)
+                       continue;
+               for (j=0; j < v->conf.prim_elmnt_count; j++)
+                       if (v->conf.phys_refnum[j] == dl->disk.refnum) {
+                               /* This device plays role 'j' in  'v'. */
+                               rv[n].start = __be64_to_cpu(v->lba_offset[j]);
+                               rv[n].size = __be64_to_cpu(v->conf.blocks);
+                               n++;
+                               break;
+                       }
+       }
+       qsort(rv, n, sizeof(*rv), cmp_extent);
+
+       rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
+       rv[n].size = 0;
+       return rv;
+}
+
 static int init_super_ddf_bvd(struct supertype *st,
                              mdu_array_info_t *info,
                              unsigned long long size,
@@ -1641,8 +1761,6 @@ static int init_super_ddf_bvd(struct supertype *st,
        struct virtual_entry *ve;
        struct vcl *vcl;
        struct vd_config *vc;
-       int mppe;
-       int conflen;
 
        if (__be16_to_cpu(ddf->virt->populated_vdes)
            >= __be16_to_cpu(ddf->virt->max_vdes)) {
@@ -1661,7 +1779,6 @@ static int init_super_ddf_bvd(struct supertype *st,
                return 0;
        }
        ve = &ddf->virt->entries[venum];
-       st->container_member = venum;
 
        /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
         * timestamp, random number
@@ -1671,10 +1788,12 @@ static int init_super_ddf_bvd(struct supertype *st,
        ve->pad0 = 0xFFFF;
        ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
        ve->type = 0;
-       ve->state = 0;
-       ve->init_state = 0;
-       if (!(info->state & 1))
-               ve->init_state = DDF_state_inconsistent;
+       ve->state = DDF_state_degraded; /* Will be modified as devices are added */
+       if (info->state & 1) /* clean */
+               ve->init_state = DDF_init_full;
+       else
+               ve->init_state = DDF_init_not;
+
        memset(ve->pad1, 0xff, 14);
        memset(ve->name, ' ', 16);
        if (name)
@@ -1683,10 +1802,11 @@ static int init_super_ddf_bvd(struct supertype *st,
                __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
 
        /* Now create a new vd_config */
-       conflen =  __be16_to_cpu(ddf->active->config_record_len);
-       vcl = malloc(offsetof(struct vcl, conf) + conflen * 512);
-       mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
-       vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[mppe];
+       vcl = malloc(offsetof(struct vcl, conf) + ddf->conf_rec_len * 512);
+       vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
+       vcl->vcnum = venum;
+       sprintf(st->subarray, "%d", venum);
+       vcl->block_sizes = NULL; /* FIXME not for CONCAT */
 
        vc = &vcl->conf;
 
@@ -1726,12 +1846,12 @@ static int init_super_ddf_bvd(struct supertype *st,
        memset(vc->v3, 0xff, 16);
        memset(vc->vendor, 0xff, 32);
 
-       memset(vc->phys_refnum, 0xff, 4*mppe);
-       memset(vc->phys_refnum+mppe, 0x00, 8*mppe);
+       memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
+       memset(vc->phys_refnum+(ddf->mppe * 4), 0x00, 8*ddf->mppe);
 
        vcl->next = ddf->conflist;
        ddf->conflist = vcl;
-       ddf->newconf = vcl;
+       ddf->currentconf = vcl;
        return 1;
 }
 
@@ -1743,13 +1863,16 @@ static void add_to_super_ddf_bvd(struct supertype *st,
         * We need to find suitable free space in that device and update
         * the phys_refnum and lba_offset for the newly created vd_config.
         * We might also want to update the type in the phys_disk
-        * section. FIXME
+        * section.
         */
        struct dl *dl;
        struct ddf_super *ddf = st->sb;
        struct vd_config *vc;
        __u64 *lba_offset;
-       int mppe;
+       int working;
+       int i;
+       unsigned long long blocks, pos, esize;
+       struct extent *ex;
 
        for (dl = ddf->dlist; dl ; dl = dl->next)
                if (dl->major == dk->major &&
@@ -1758,16 +1881,68 @@ static void add_to_super_ddf_bvd(struct supertype *st,
        if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
                return;
 
-       vc = &ddf->newconf->conf;
+       vc = &ddf->currentconf->conf;
+       lba_offset = ddf->currentconf->lba_offset;
+
+       ex = get_extents(ddf, dl);
+       if (!ex)
+               return;
+
+       i = 0; pos = 0;
+       blocks = __be64_to_cpu(vc->blocks);
+       if (ddf->currentconf->block_sizes)
+               blocks = ddf->currentconf->block_sizes[dk->raid_disk];
+
+       do {
+               esize = ex[i].start - pos;
+               if (esize >= blocks)
+                       break;
+               pos = ex[i].start + ex[i].size;
+               i++;
+       } while (ex[i-1].size);
+
+       free(ex);
+       if (esize < blocks)
+               return;
+
+       ddf->currentdev = dk->raid_disk;
        vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
-       mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
-       lba_offset = (__u64*)(vc->phys_refnum + mppe);
-       lba_offset[dk->raid_disk] = 0; /* FIXME */
+       lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
 
-       dl->vlist[0] =ddf->newconf; /* FIXME */
+       for (i=0; i < ddf->max_part ; i++)
+               if (dl->vlist[i] == NULL)
+                       break;
+       if (i == ddf->max_part)
+               return;
+       dl->vlist[i] = ddf->currentconf;
 
        dl->fd = fd;
        dl->devname = devname;
+
+       /* Check how many working raid_disks, and if we can mark
+        * array as optimal yet
+        */
+       working = 0;
+
+       for (i=0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
+               if (vc->phys_refnum[i] != 0xffffffff)
+                       working++;
+
+       /* Find which virtual_entry */
+       i = ddf->currentconf->vcnum;
+       if (working == __be16_to_cpu(vc->prim_elmnt_count))
+               ddf->virt->entries[i].state =
+                       (ddf->virt->entries[i].state & ~DDF_state_mask)
+                       | DDF_state_optimal;
+
+       if (vc->prl == DDF_RAID6 &&
+           working+1 == __be16_to_cpu(vc->prim_elmnt_count))
+               ddf->virt->entries[i].state =
+                       (ddf->virt->entries[i].state & ~DDF_state_mask)
+                       | DDF_state_part_optimal;
+
+       ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
+       ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
 }
 
 /* add a device to a container, either while creating it or while
@@ -1785,16 +1960,22 @@ static void add_to_super_ddf(struct supertype *st,
        int n, i;
        struct stat stb;
 
+       if (ddf->currentconf) {
+               add_to_super_ddf_bvd(st, dk, fd, devname);
+               return;
+       }
+
        /* This is device numbered dk->number.  We need to create
         * a phys_disk entry and a more detailed disk_data entry.
         */
        fstat(fd, &stb);
-       dd = malloc(sizeof(*dd) + sizeof(dd->vlist[0]) * (ddf->max_part+1));
+       dd = malloc(sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part);
        dd->major = major(stb.st_rdev);
        dd->minor = minor(stb.st_rdev);
        dd->devname = devname;
        dd->next = ddf->dlist;
        dd->fd = fd;
+       dd->spare = NULL;
 
        dd->disk.magic = DDF_PHYS_DATA_MAGIC;
        now = time(0);
@@ -1804,23 +1985,33 @@ static void add_to_super_ddf(struct supertype *st,
        *(__u32*)(dd->disk.guid + 16) = random();
        *(__u32*)(dd->disk.guid + 20) = random();
 
-       dd->disk.refnum = random(); /* and hope for the best FIXME check this is unique!!*/
+       do {
+               /* Cannot be bothered finding a CRC of some irrelevant details*/
+               dd->disk.refnum = random();
+               for (i = __be16_to_cpu(ddf->active->max_pd_entries) - 1;
+                    i >= 0; i--)
+                       if (ddf->phys->entries[i].refnum == dd->disk.refnum)
+                               break;
+       } while (i >= 0);
+
        dd->disk.forced_ref = 1;
        dd->disk.forced_guid = 1;
        memset(dd->disk.vendor, ' ', 32);
        memcpy(dd->disk.vendor, "Linux", 5);
        memset(dd->disk.pad, 0xff, 442);
-       for (i = 0; i < ddf->max_part+1 ; i++)
+       for (i = 0; i < ddf->max_part ; i++)
                dd->vlist[i] = NULL;
 
        n = __be16_to_cpu(ddf->phys->used_pdes);
        pde = &ddf->phys->entries[n];
+       dd->pdnum = n;
+
        n++;
        ddf->phys->used_pdes = __cpu_to_be16(n);
 
        memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
        pde->refnum = dd->disk.refnum;
-       pde->type = __cpu_to_be16(DDF_Forced_PD_GUID |DDF_Global_Spare);
+       pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
        pde->state = __cpu_to_be16(DDF_Online);
        get_dev_size(fd, NULL, &size);
        /* We are required to reserve 32Meg, and record the size in sectors */
@@ -1828,6 +2019,7 @@ static void add_to_super_ddf(struct supertype *st,
        sprintf(pde->path, "%17.17s","Information: nil") ;
        memset(pde->pad, 0xff, 6);
 
+       dd->size = size >> 9;
        ddf->dlist = dd;
 }
 
@@ -1838,7 +2030,7 @@ static void add_to_super_ddf(struct supertype *st,
  */
 
 #ifndef MDASSEMBLE
-static int write_init_super_ddf(struct supertype *st)
+static int __write_init_super_ddf(struct supertype *st, int do_close)
 {
 
        struct ddf_super *ddf = st->sb;
@@ -1897,10 +2089,12 @@ static int write_init_super_ddf(struct supertype *st)
                write(fd, ddf->virt, ddf->vdsize);
 
                /* Now write lots of config records. */
-               n_config = __be16_to_cpu(ddf->active->max_partitions);
-               conf_size = __be16_to_cpu(ddf->active->config_record_len) * 512;
+               n_config = ddf->max_part;
+               conf_size = ddf->conf_rec_len * 512;
                for (i = 0 ; i <= n_config ; i++) {
                        struct vcl *c = d->vlist[i];
+                       if (i == n_config)
+                               c = (struct vcl*)d->spare;
 
                        if (c) {
                                c->conf.crc = calc_crc(&c->conf, conf_size);
@@ -1918,10 +2112,44 @@ static int write_init_super_ddf(struct supertype *st)
 
                lseek64(fd, (size-1)*512, SEEK_SET);
                write(fd, &ddf->anchor, 512);
-               close(fd);
+               if (do_close) {
+                       close(fd);
+                       d->fd = -1;
+               }
        }
        return 1;
 }
+
+static int write_init_super_ddf(struct supertype *st)
+{
+
+       if (st->update_tail) {
+               /* queue the virtual_disk and vd_config as metadata updates */
+               struct virtual_disk *vd;
+               struct vd_config *vc;
+               struct ddf_super *ddf = st->sb;
+               int len;
+
+               /* First the virtual disk.  We have a slightly fake header */
+               len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
+               vd = malloc(len);
+               *vd = *ddf->virt;
+               vd->entries[0] = ddf->virt->entries[ddf->currentconf->vcnum];
+               vd->populated_vdes = __cpu_to_be16(ddf->currentconf->vcnum);
+               append_metadata_update(st, vd, len);
+
+               /* Then the vd_config */
+               len = ddf->conf_rec_len * 512;
+               vc = malloc(len);
+               memcpy(vc, &ddf->currentconf->conf, len);
+               append_metadata_update(st, vc, len);
+
+               /* FIXME I need to close the fds! */
+               return 0;
+       } else 
+               return __write_init_super_ddf(st, 1);
+}
+
 #endif
 
 static __u64 avail_size_ddf(struct supertype *st, __u64 devsize)
@@ -1933,7 +2161,17 @@ static __u64 avail_size_ddf(struct supertype *st, __u64 devsize)
 }
 
 #ifndef MDASSEMBLE
-int validate_geometry_ddf(struct supertype *st,
+static int validate_geometry_ddf_container(struct supertype *st,
+                                   int level, int layout, int raiddisks,
+                                   int chunk, unsigned long long size,
+                                   char *dev, unsigned long long *freesize);
+
+static int validate_geometry_ddf_bvd(struct supertype *st,
+                                    int level, int layout, int raiddisks,
+                                    int chunk, unsigned long long size,
+                                    char *dev, unsigned long long *freesize);
+
+static int validate_geometry_ddf(struct supertype *st,
                          int level, int layout, int raiddisks,
                          int chunk, unsigned long long size,
                          char *dev, unsigned long long *freesize)
@@ -1950,53 +2188,50 @@ int validate_geometry_ddf(struct supertype *st,
         */
 
        if (level == LEVEL_CONTAINER) {
-               st->ss = &super_ddf_container;
-               if (dev) {
-                       int rv =st->ss->validate_geometry(st, level, layout,
-                                                         raiddisks, chunk,
-                                                         size,
-                                                         NULL, freesize);
-                       if (rv)
-                               return rv;
-               }
-               return st->ss->validate_geometry(st, level, layout, raiddisks,
-                                                chunk, size, dev, freesize);
+               /* Must be a fresh device to add to a container */
+               return validate_geometry_ddf_container(st, level, layout,
+                                              raiddisks,
+                                              chunk, size, dev, freesize);
        }
 
        if (st->sb) {
-               /* creating in a given container */
-               st->ss = &super_ddf_bvd;
-               if (dev) {
-                       int rv =st->ss->validate_geometry(st, level, layout,
-                                                         raiddisks, chunk,
-                                                         size,
-                                                         NULL, freesize);
-                       if (rv)
-                               return rv;
-               }
-               return st->ss->validate_geometry(st, level, layout, raiddisks,
+               /* A container has already been opened, so we are
+                * creating in there.  Maybe a BVD, maybe an SVD.
+                * Should make a distinction one day.
+                */
+               return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
                                                 chunk, size, dev, freesize);
        }
-       /* FIXME should exclude MULTIPATH, or more appropriately, allow
-        * only known levels.
-        */
-       if (!dev)
+       if (!dev) {
+               /* Initial sanity check.  Exclude illegal levels. */
+               int i;
+               for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
+                       if (ddf_level_num[i].num2 == level)
+                               break;
+               if (ddf_level_num[i].num1 == MAXINT)
+                       return 0;
+               /* Should check layout? etc */
                return 1;
+       }
 
-       /* This device needs to be either a device in a 'ddf' container,
-        * or it needs to be a 'ddf-bvd' array.
+       /* This is the first device for the array.
+        * If it is a container, we read it in and do automagic allocations,
+        * no other devices should be given.
+        * Otherwise it must be a member device of a container, and we
+        * do manual allocation.
+        * Later we should check for a BVD and make an SVD.
         */
-
        fd = open(dev, O_RDONLY|O_EXCL, 0);
        if (fd >= 0) {
                sra = sysfs_read(fd, 0, GET_VERSION);
                close(fd);
                if (sra && sra->array.major_version == -1 &&
-                   strcmp(sra->text_version, "ddf-bvd") == 0) {
-                       st->ss = &super_ddf_svd;
-                       return st->ss->validate_geometry(st, level, layout,
-                                                        raiddisks, chunk, size,
-                                                        dev, freesize);
+                   strcmp(sra->text_version, "ddf") == 0) {
+
+                       /* load super */
+                       /* find space for 'n' devices. */
+                       /* remember the devices */
+                       /* Somehow return the fact that we have enough */
                }
 
                fprintf(stderr,
@@ -2025,27 +2260,25 @@ int validate_geometry_ddf(struct supertype *st,
                 * and try to create a bvd
                 */
                struct ddf_super *ddf;
-               st->ss = &super_ddf_bvd;
                if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL, 1) == 0) {
                        st->sb = ddf;
                        st->container_dev = fd2devnum(cfd);
-                       st->container_member = 27; // FIXME
                        close(cfd);
-                       return st->ss->validate_geometry(st, level, layout,
+                       return validate_geometry_ddf_bvd(st, level, layout,
                                                         raiddisks, chunk, size,
                                                         dev, freesize);
                }
                close(cfd);
-       }
-       fprintf(stderr, Name ": Cannot use %s: Already in use\n",
-               dev);
+       } else /* device may belong to a different container */
+               return 0;
+
        return 1;
 }
 
-int validate_geometry_ddf_container(struct supertype *st,
-                                   int level, int layout, int raiddisks,
-                                   int chunk, unsigned long long size,
-                                   char *dev, unsigned long long *freesize)
+static int validate_geometry_ddf_container(struct supertype *st,
+                                  int level, int layout, int raiddisks,
+                                  int chunk, unsigned long long size,
+                                  char *dev, unsigned long long *freesize)
 {
        int fd;
        unsigned long long ldsize;
@@ -2067,76 +2300,15 @@ int validate_geometry_ddf_container(struct supertype *st,
        }
        close(fd);
 
-       *freesize = avail_size_ddf(st, ldsize);
+       *freesize = avail_size_ddf(st, ldsize >> 9);
 
        return 1;
 }
 
-struct extent {
-       unsigned long long start, size;
-};
-int cmp_extent(const void *av, const void *bv)
-{
-       const struct extent *a = av;
-       const struct extent *b = bv;
-       if (a->start < b->start)
-               return -1;
-       if (a->start > b->start)
-               return 1;
-       return 0;
-}
-
-struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
-{
-       /* find a list of used extents on the give physical device
-        * (dnum) or the given ddf.
-        * Return a malloced array of 'struct extent'
-
-FIXME ignore DDF_Legacy devices?
-
-        */
-       struct extent *rv;
-       int n = 0;
-       int dnum;
-       int i, j;
-
-       for (dnum = 0; dnum < ddf->phys->used_pdes; dnum++)
-               if (memcmp(dl->disk.guid,
-                          ddf->phys->entries[dnum].guid,
-                          DDF_GUID_LEN) == 0)
-                       break;
-
-       if (dnum == ddf->phys->used_pdes)
-               return NULL;
-
-       rv = malloc(sizeof(struct extent) * (ddf->max_part + 2));
-       if (!rv)
-               return NULL;
-
-       for (i = 0; i < ddf->max_part+1; i++) {
-               struct vcl *v = dl->vlist[i];
-               if (v == NULL)
-                       continue;
-               for (j=0; j < v->conf.prim_elmnt_count; j++)
-                       if (v->conf.phys_refnum[j] == dl->disk.refnum) {
-                               /* This device plays role 'j' in  'v'. */
-                               rv[n].start = __be64_to_cpu(v->lba_offset[j]);
-                               rv[n].size = __be64_to_cpu(v->conf.blocks);
-                               n++;
-                               break;
-                       }
-       }
-       qsort(rv, n, sizeof(*rv), cmp_extent);
-
-       rv[n].start = __be64_to_cpu(ddf->phys->entries[dnum].config_size);
-       rv[n].size = 0;
-       return rv;
-}
-
-int validate_geometry_ddf_bvd(struct supertype *st,
-                             int level, int layout, int raiddisks,
-                             int chunk, unsigned long long size,
-                             char *dev, unsigned long long *freesize)
+static int validate_geometry_ddf_bvd(struct supertype *st,
+                                    int level, int layout, int raiddisks,
+                                    int chunk, unsigned long long size,
+                                    char *dev, unsigned long long *freesize)
 {
        struct stat stb;
        struct ddf_super *ddf = st->sb;
@@ -2163,6 +2335,7 @@ int validate_geometry_ddf_bvd(struct supertype *st,
                for (dl = ddf->dlist; dl ; dl = dl->next)
                {
                        int found = 0;
+                       pos = 0;
 
                        i = 0;
                        e = get_extents(ddf, dl);
@@ -2218,19 +2391,6 @@ int validate_geometry_ddf_bvd(struct supertype *st,
 
        return 1;
 }
-int validate_geometry_ddf_svd(struct supertype *st,
-                             int level, int layout, int raiddisks,
-                             int chunk, unsigned long long size,
-                             char *dev, unsigned long long *freesize)
-{
-       /* dd/svd only supports striped, mirrored, concat, spanned... */
-       if (level != LEVEL_LINEAR &&
-           level != 0 &&
-           level != 1)
-               return 0;
-       return 1;
-}
-
 
 static int load_super_ddf_all(struct supertype *st, int fd,
                              void **sbp, char *devname, int keep_fd)
@@ -2260,11 +2420,11 @@ static int load_super_ddf_all(struct supertype *st, int fd,
        for (sd = sra->devs ; sd ; sd = sd->next) {
                int rv;
                sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
-               dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
-               if (!dfd)
+               dfd = dev_open(nm, O_RDONLY);
+               if (dfd < 0)
                        return 2;
                rv = load_ddf_headers(dfd, super, NULL);
-               if (!keep_fd) close(dfd);
+               close(dfd);
                if (rv == 0) {
                        seq = __be32_to_cpu(super->active->seq);
                        if (super->active->openflag)
@@ -2280,7 +2440,7 @@ static int load_super_ddf_all(struct supertype *st, int fd,
        /* OK, load this ddf */
        sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
        dfd = dev_open(nm, O_RDONLY);
-       if (!dfd)
+       if (dfd < 0)
                return 1;
        load_ddf_headers(dfd, super, NULL);
        load_ddf_global(dfd, super, NULL);
@@ -2289,23 +2449,31 @@ static int load_super_ddf_all(struct supertype *st, int fd,
        for (sd = sra->devs ; sd ; sd = sd->next) {
                sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
                dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
-               if (!dfd)
+               if (dfd < 0)
                        return 2;
                seq = load_ddf_local(dfd, super, NULL, keep_fd);
                if (!keep_fd) close(dfd);
        }
+       if (st->subarray[0]) {
+               struct vcl *v;
+
+               for (v = super->conflist; v; v = v->next)
+                       if (v->vcnum == atoi(st->subarray))
+                               super->currentconf = v;
+               if (!super->currentconf)
+                       return 1;
+       }
        *sbp = super;
        if (st->ss == NULL) {
-               st->ss = &super_ddf_container;
+               st->ss = &super_ddf;
                st->minor_version = 0;
                st->max_devs = 512;
+               st->container_dev = fd2devnum(fd);
        }
        return 0;
 }
 #endif
 
-
-
 static struct mdinfo *container_content_ddf(struct supertype *st)
 {
        /* Given a container loaded by load_super_ddf_all,
@@ -2322,7 +2490,6 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
 
        for (vc = ddf->conflist ; vc ; vc=vc->next)
        {
-               int mppe;
                int i;
                struct mdinfo *this;
                this = malloc(sizeof(*this));
@@ -2330,14 +2497,11 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
                this->next = rest;
                rest = this;
 
-               this->array.major_version = 1000;
-               this->array.minor_version = 0;
-               this->array.patch_version = 0;
                this->array.level = map_num1(ddf_level_num, vc->conf.prl);
                this->array.raid_disks =
                        __be16_to_cpu(vc->conf.prim_elmnt_count);
-               /* FIXME this should be mapped */
-               this->array.layout = vc->conf.rlq;
+               this->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
+                                                  this->array.raid_disks);
                this->array.md_minor      = -1;
                this->array.ctime         = DECADE +
                        __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
@@ -2345,14 +2509,16 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
                        __be32_to_cpu(vc->conf.timestamp);
                this->array.chunk_size    = 512 << vc->conf.chunk_shift;
 
-               for (i=0; i < __be16_to_cpu(ddf->virt->populated_vdes); i++)
-                       if (memcmp(ddf->virt->entries[i].guid,
-                                  vc->conf.guid, DDF_GUID_LEN) == 0)
-                               break;
-               if (ddf->virt->entries[i].state & DDF_state_inconsistent)
+               i = vc->vcnum;
+               if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
+                   (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
+                   DDF_init_full) {
                        this->array.state = 0;
-               else
+                       this->resync_start = 0;
+               } else {
                        this->array.state = 1;
+                       this->resync_start = ~0ULL;
+               }
                memcpy(this->name, ddf->virt->entries[i].name, 32);
                this->name[33]=0;
 
@@ -2361,8 +2527,11 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
                this->array.size = this->component_size / 2;
                this->container_member = i;
 
-               mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
-               for (i=0 ; i < mppe ; i++) {
+               sprintf(this->text_version, "/%s/%d",
+                       devnum2devname(st->container_dev),
+                       this->container_member);
+
+               for (i=0 ; i < ddf->mppe ; i++) {
                        struct mdinfo *dev;
                        struct dl *d;
 
@@ -2398,22 +2567,12 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
        return rest;
 }
 
-static int init_zero_ddf(struct supertype *st,
-                        mdu_array_info_t *info,
-                        unsigned long long size, char *name,
-                        char *homehost, int *uuid)
-{
-       st->sb = NULL;
-       return 0;
-}
-
 static int store_zero_ddf(struct supertype *st, int fd)
 {
        unsigned long long dsize;
        char buf[512];
        memset(buf, 0, 512);
 
-
        if (!get_dev_size(fd, NULL, &dsize))
                return 1;
 
@@ -2447,35 +2606,509 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
        return 0;
 }
 
-static int ddf_open_new(struct supertype *c, struct active_array *a, int inst)
+/*
+ * A new array 'a' has been started which claims to be instance 'inst'
+ * within container 'c'.
+ * We need to confirm that the array matches the metadata in 'c' so
+ * that we don't corrupt any metadata.
+ */
+static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
 {
-       fprintf(stderr, "ddf: open_new %d\n", inst);
+       fprintf(stderr, "ddf: open_new %s\n", inst);
+       a->info.container_member = atoi(inst);
        return 0;
 }
 
-static void ddf_mark_clean(struct active_array *a, unsigned long long sync_pos)
+/*
+ * The array 'a' is to be marked clean in the metadata.
+ * If '->resync_start' is not ~(unsigned long long)0, then the array is only
+ * clean up to the point (in sectors).  If that cannot be recorded in the
+ * metadata, then leave it as dirty.
+ *
+ * For DDF, we need to clear the DDF_state_inconsistent bit in the
+ * !global! virtual_disk.virtual_entry structure.
+ */
+static void ddf_set_array_state(struct active_array *a, int consistent)
 {
-       fprintf(stderr, "ddf: mark clean %llu\n", sync_pos);
+       struct ddf_super *ddf = a->container->sb;
+       int inst = a->info.container_member;
+       if (consistent)
+               ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
+       else
+               ddf->virt->entries[inst].state |= DDF_state_inconsistent;
+       ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
+       if (a->resync_start == ~0ULL)
+               ddf->virt->entries[inst].init_state |= DDF_init_full;
+       else if (a->resync_start == 0)
+               ddf->virt->entries[inst].init_state |= DDF_init_not;
+       else
+               ddf->virt->entries[inst].init_state |= DDF_init_quick;
+
+       printf("ddf mark %s %llu\n", consistent?"clean":"dirty",
+              a->resync_start);
 }
 
-static void ddf_mark_dirty(struct active_array *a)
+/*
+ * The state of each disk is stored in the global phys_disk structure
+ * in phys_disk.entries[n].state.
+ * This makes various combinations awkward.
+ * - When a device fails in any array, it must be failed in all arrays
+ *   that include a part of this device.
+ * - When a component is rebuilding, we cannot include it officially in the
+ *   array unless this is the only array that uses the device.
+ *
+ * So: when transitioning:
+ *   Online -> failed,  just set failed flag.  monitor will propagate
+ *   spare -> online,   the device might need to be added to the array.
+ *   spare -> failed,   just set failed.  Don't worry if in array or not.
+ */
+static void ddf_set_disk(struct active_array *a, int n, int state)
 {
-       fprintf(stderr, "ddf: mark dirty\n");
+       struct ddf_super *ddf = a->container->sb;
+       int inst = a->info.container_member;
+       struct vd_config *vc = find_vdcr(ddf, inst);
+       int pd = find_phys(ddf, vc->phys_refnum[n]);
+       int i, st, working;
+
+       if (vc == NULL) {
+               fprintf(stderr, "ddf: cannot find instance %d!!\n", inst);
+               return;
+       }
+       if (pd < 0) {
+               /* disk doesn't currently exist. If it is now in_sync,
+                * insert it. */
+               if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
+                       /* Find dev 'n' in a->info->devs, determine the
+                        * ddf refnum, and set vc->phys_refnum and update
+                        * phys->entries[]
+                        */
+                       /* FIXME */
+               }
+       } else {
+               if (state & DS_FAULTY)
+                       ddf->phys->entries[pd].state  |= __cpu_to_be16(DDF_Failed);
+               if (state & DS_INSYNC) {
+                       ddf->phys->entries[pd].state  |= __cpu_to_be16(DDF_Online);
+                       ddf->phys->entries[pd].state  &= __cpu_to_be16(~DDF_Rebuilding);
+               }
+       }
+
+       fprintf(stderr, "ddf: set_disk %d to %x\n", n, state);
+
+       /* Now we need to check the state of the array and update
+        * virtual_disk.entries[n].state.
+        * It needs to be one of "optimal", "degraded", "failed".
+        * I don't understand 'deleted' or 'missing'.
+        */
+       working = 0;
+       for (i=0; i < a->info.array.raid_disks; i++) {
+               pd = find_phys(ddf, vc->phys_refnum[i]);
+               if (pd < 0)
+                       continue;
+               st = __be16_to_cpu(ddf->phys->entries[pd].state);
+               if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+                   == DDF_Online)
+                       working++;
+       }
+       state = DDF_state_degraded;
+       if (working == a->info.array.raid_disks)
+               state = DDF_state_optimal;
+       else switch(vc->prl) {
+       case DDF_RAID0:
+       case DDF_CONCAT:
+       case DDF_JBOD:
+               state = DDF_state_failed;
+               break;
+       case DDF_RAID1:
+               if (working == 0)
+                       state = DDF_state_failed;
+               break;
+       case DDF_RAID4:
+       case DDF_RAID5:
+               if (working < a->info.array.raid_disks-1)
+                       state = DDF_state_failed;
+               break;
+       case DDF_RAID6:
+               if (working < a->info.array.raid_disks-2)
+                       state = DDF_state_failed;
+               else if (working == a->info.array.raid_disks-1)
+                       state = DDF_state_part_optimal;
+               break;
+       }
+
+       ddf->virt->entries[inst].state =
+               (ddf->virt->entries[inst].state & ~DDF_state_mask)
+               | state;
+
 }
 
-static void ddf_mark_sync(struct active_array *a, unsigned long long resync)
+static void ddf_sync_metadata(struct supertype *st)
 {
-       fprintf(stderr, "ddf: mark sync\n");
+
+       /*
+        * Write all data to all devices.
+        * Later, we might be able to track whether only local changes
+        * have been made, or whether any global data has been changed,
+        * but ddf is sufficiently weird that it probably always
+        * changes global data ....
+        */
+       __write_init_super_ddf(st, 0);
+       fprintf(stderr, "ddf: sync_metadata\n");
 }
 
-static void ddf_set_disk(struct active_array *a, int n, int state)
+static void ddf_process_update(struct supertype *st,
+                              struct metadata_update *update)
+{
+       /* Apply this update to the metadata.
+        * The first 4 bytes are a DDF_*_MAGIC which guides
+        * our actions.
+        * Possible update are:
+        *  DDF_PHYS_RECORDS_MAGIC
+        *    Add a new physical device.  Changes to this record
+        *    only happen implicitly.
+        *    used_pdes is the device number.
+        *  DDF_VIRT_RECORDS_MAGIC
+        *    Add a new VD.  Possibly also change the 'access' bits.
+        *    populated_vdes is the entry number.
+        *  DDF_VD_CONF_MAGIC
+        *    New or updated VD.  the VIRT_RECORD must already
+        *    exist.  For an update, phys_refnum and lba_offset
+        *    (at least) are updated, and the VD_CONF must
+        *    be written to precisely those devices listed with
+        *    a phys_refnum.
+        *  DDF_SPARE_ASSIGN_MAGIC
+        *    replacement Spare Assignment Record... but for which device?
+        *
+        * So, e.g.:
+        *  - to create a new array, we send a VIRT_RECORD and
+        *    a VD_CONF.  Then assemble and start the array.
+        *  - to activate a spare we send a VD_CONF to add the phys_refnum
+        *    and offset.  This will also mark the spare as active with
+        *    a spare-assignment record.
+        */
+       struct ddf_super *ddf = st->sb;
+       __u32 *magic = (__u32*)update->buf;
+       struct phys_disk *pd;
+       struct virtual_disk *vd;
+       struct vd_config *vc;
+       struct vcl *vcl;
+       struct dl *dl;
+       int mppe;
+       int ent;
+
+       printf("Process update %x\n", *magic);
+
+       switch (*magic) {
+       case DDF_PHYS_RECORDS_MAGIC:
+
+               if (update->len != (sizeof(struct phys_disk) +
+                                   sizeof(struct phys_disk_entry)))
+                       return;
+               pd = (struct phys_disk*)update->buf;
+
+               ent = __be16_to_cpu(pd->used_pdes);
+               if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
+                       return;
+               if (!all_ff(ddf->phys->entries[ent].guid))
+                       return;
+               ddf->phys->entries[ent] = pd->entries[0];
+               ddf->phys->used_pdes = __cpu_to_be16(1 +
+                                          __be16_to_cpu(ddf->phys->used_pdes));
+               break;
+
+       case DDF_VIRT_RECORDS_MAGIC:
+
+               if (update->len != (sizeof(struct virtual_disk) +
+                                   sizeof(struct virtual_entry)))
+                       return;
+               vd = (struct virtual_disk*)update->buf;
+
+               ent = __be16_to_cpu(vd->populated_vdes);
+               if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
+                       return;
+               if (!all_ff(ddf->virt->entries[ent].guid))
+                       return;
+               ddf->virt->entries[ent] = vd->entries[0];
+               ddf->virt->populated_vdes = __cpu_to_be16(1 +
+                             __be16_to_cpu(ddf->virt->populated_vdes));
+               break;
+
+       case DDF_VD_CONF_MAGIC:
+               printf("len %d %d\n", update->len, ddf->conf_rec_len);
+
+               mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+               if (update->len != ddf->conf_rec_len * 512)
+                       return;
+               vc = (struct vd_config*)update->buf;
+               for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+                       if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
+                               break;
+               printf("vcl = %p\n", vcl);
+               if (vcl) {
+                       /* An update, just copy the phys_refnum and lba_offset
+                        * fields
+                        */
+                       memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
+                              mppe * (sizeof(__u32) + sizeof(__u64)));
+               } else {
+                       /* A new VD_CONF */
+                       vcl = update->space;
+                       update->space = NULL;
+                       vcl->next = ddf->conflist;
+                       memcpy(&vcl->conf, vc, update->len);
+                       vcl->lba_offset = (__u64*)
+                               &vcl->conf.phys_refnum[mppe];
+                       ddf->conflist = vcl;
+               }
+               /* Now make sure vlist is correct for each dl. */
+               for (dl = ddf->dlist; dl; dl = dl->next) {
+                       int dn;
+                       int vn = 0;
+                       for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+                               for (dn=0; dn < ddf->mppe ; dn++)
+                                       if (vcl->conf.phys_refnum[dn] ==
+                                           dl->disk.refnum) {
+                                               printf("dev %d has %p at %d\n",
+                                                      dl->pdnum, vcl, vn);
+                                               dl->vlist[vn++] = vcl;
+                                               break;
+                                       }
+                       while (vn < ddf->max_part)
+                               dl->vlist[vn++] = NULL;
+                       if (dl->vlist[0]) {
+                               ddf->phys->entries[dl->pdnum].type &=
+                                       ~__cpu_to_be16(DDF_Global_Spare);
+                               ddf->phys->entries[dl->pdnum].type |=
+                                       __cpu_to_be16(DDF_Active_in_VD);
+                       }
+                       if (dl->spare) {
+                               ddf->phys->entries[dl->pdnum].type &=
+                                       ~__cpu_to_be16(DDF_Global_Spare);
+                               ddf->phys->entries[dl->pdnum].type |=
+                                       __cpu_to_be16(DDF_Spare);
+                       }
+                       if (!dl->vlist[0] && !dl->spare) {
+                               ddf->phys->entries[dl->pdnum].type |=
+                                       __cpu_to_be16(DDF_Global_Spare);
+                               ddf->phys->entries[dl->pdnum].type &=
+                                       ~__cpu_to_be16(DDF_Spare |
+                                                      DDF_Active_in_VD);
+                       }
+               }
+               break;
+       case DDF_SPARE_ASSIGN_MAGIC:
+       default: break;
+       }
+}
+
+static void ddf_prepare_update(struct supertype *st,
+                              struct metadata_update *update)
 {
-       fprintf(stderr, "ddf: set_disk %d\n", n);
+       /* This update arrived at managemon.
+        * We are about to pass it to monitor.
+        * If a malloc is needed, do it here.
+        */
+       struct ddf_super *ddf = st->sb;
+       __u32 *magic = (__u32*)update->buf;
+       if (*magic == DDF_VD_CONF_MAGIC)
+               update->space = malloc(offsetof(struct vcl, conf)
+                                      + ddf->conf_rec_len * 512);
 }
 
-static void ddf_sync_metadata(struct active_array *a)
+/*
+ * Check if the array 'a' is degraded but not failed.
+ * If it is, find as many spares as are available and needed and
+ * arrange for their inclusion.
+ * We only choose devices which are not already in the array,
+ * and prefer those with a spare-assignment to this array.
+ * otherwise we choose global spares - assuming always that
+ * there is enough room.
+ * For each spare that we assign, we return an 'mdinfo' which
+ * describes the position for the device in the array.
+ * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
+ * the new phys_refnum and lba_offset values.
+ *
+ * Only worry about BVDs at the moment.
+ */
+static struct mdinfo *ddf_activate_spare(struct active_array *a,
+                                        struct metadata_update **updates)
 {
-       fprintf(stderr, "ddf: sync_metadata\n");
+       int working = 0;
+       struct mdinfo *d;
+       struct ddf_super *ddf = a->container->sb;
+       int global_ok = 0;
+       struct mdinfo *rv = NULL;
+       struct mdinfo *di;
+       struct metadata_update *mu;
+       struct dl *dl;
+       int i;
+       struct vd_config *vc;
+       __u64 *lba;
+
+       for (d = a->info.devs ; d ; d = d->next) {
+               if ((d->curr_state & DS_FAULTY) &&
+                       d->state_fd >= 0)
+                       /* wait for Removal to happen */
+                       return NULL;
+               if (d->state_fd >= 0)
+                       working ++;
+       }
+
+       printf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
+              a->info.array.level);
+       if (working == a->info.array.raid_disks)
+               return NULL; /* array not degraded */
+       switch (a->info.array.level) {
+       case 1:
+               if (working == 0)
+                       return NULL; /* failed */
+               break;
+       case 4:
+       case 5:
+               if (working < a->info.array.raid_disks - 1)
+                       return NULL; /* failed */
+               break;
+       case 6:
+               if (working < a->info.array.raid_disks - 2)
+                       return NULL; /* failed */
+               break;
+       default: /* concat or stripe */
+               return NULL; /* failed */
+       }
+
+       /* For each slot, if it is not working, find a spare */
+       dl = ddf->dlist;
+       for (i = 0; i < a->info.array.raid_disks; i++) {
+               for (d = a->info.devs ; d ; d = d->next)
+                       if (d->disk.raid_disk == i)
+                               break;
+               printf("found %d: %p %x\n", i, d, d?d->curr_state:0);
+               if (d && (d->state_fd >= 0))
+                       continue;
+
+               /* OK, this device needs recovery.  Find a spare */
+       again:
+               for ( ; dl ; dl = dl->next) {
+                       unsigned long long esize;
+                       unsigned long long pos;
+                       struct mdinfo *d2;
+                       int is_global = 0;
+                       int is_dedicated = 0;
+                       struct extent *ex;
+                       int j;
+                       /* If in this array, skip */
+                       for (d2 = a->info.devs ; d2 ; d2 = d2->next)
+                               if (d2->disk.major == dl->major &&
+                                   d2->disk.minor == dl->minor) {
+                                       printf("%x:%x already in array\n", dl->major, dl->minor);
+                                       break;
+                               }
+                       if (d2)
+                               continue;
+                       if (ddf->phys->entries[dl->pdnum].type &
+                           __cpu_to_be16(DDF_Spare)) {
+                               /* Check spare assign record */
+                               if (dl->spare) {
+                                       if (dl->spare->type & DDF_spare_dedicated) {
+                                               /* check spare_ents for guid */
+                                               for (j = 0 ;
+                                                    j < __be16_to_cpu(dl->spare->populated);
+                                                    j++) {
+                                                       if (memcmp(dl->spare->spare_ents[j].guid,
+                                                                  ddf->virt->entries[a->info.container_member].guid,
+                                                                  DDF_GUID_LEN) == 0)
+                                                               is_dedicated = 1;
+                                               }
+                                       } else
+                                               is_global = 1;
+                               }
+                       } else if (ddf->phys->entries[dl->pdnum].type &
+                                  __cpu_to_be16(DDF_Global_Spare)) {
+                               is_global = 1;
+                       }
+                       if ( ! (is_dedicated ||
+                               (is_global && global_ok))) {
+                               printf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
+                                      is_dedicated, is_global);
+                               continue;
+                       }
+
+                       /* We are allowed to use this device - is there space?
+                        * We need a->info.component_size sectors */
+                       ex = get_extents(ddf, dl);
+                       if (!ex) {
+                               printf("cannot get extents\n");
+                               continue;
+                       }
+                       j = 0; pos = 0;
+                       esize = 0;
+
+                       do {
+                               esize = ex[j].start - pos;
+                               if (esize >= a->info.component_size)
+                                       break;
+                               pos = ex[i].start + ex[i].size;
+                               i++;
+                       } while (ex[i-1].size);
+
+                       free(ex);
+                       if (esize < a->info.component_size) {
+                               printf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor,
+                                      esize, a->info.component_size);
+                               /* No room */
+                               continue;
+                       }
+
+                       /* Cool, we have a device with some space at pos */
+                       di = malloc(sizeof(*di));
+                       memset(di, 0, sizeof(*di));
+                       di->disk.number = i;
+                       di->disk.raid_disk = i;
+                       di->disk.major = dl->major;
+                       di->disk.minor = dl->minor;
+                       di->disk.state = 0;
+                       di->data_offset = pos;
+                       di->component_size = a->info.component_size;
+                       di->container_member = dl->pdnum;
+                       di->next = rv;
+                       rv = di;
+                       printf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
+                              i, pos);
+
+                       break;
+               }
+               if (!dl && ! global_ok) {
+                       /* not enough dedicated spares, try global */
+                       global_ok = 1;
+                       dl = ddf->dlist;
+                       goto again;
+               }
+       }
+
+       if (!rv)
+               /* No spares found */
+               return rv;
+       /* Now 'rv' has a list of devices to return.
+        * Create a metadata_update record to update the
+        * phys_refnum and lba_offset values
+        */
+       mu = malloc(sizeof(*mu));
+       mu->buf = malloc(ddf->conf_rec_len * 512);
+       mu->space = malloc(sizeof(struct vcl));
+       mu->len = ddf->conf_rec_len;
+       mu->next = *updates;
+       vc = find_vdcr(ddf, a->info.container_member);
+       memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
+
+       vc = (struct vd_config*)mu->buf;
+       lba = (__u64*)&vc->phys_refnum[ddf->mppe];
+       for (di = rv ; di ; di = di->next) {
+               vc->phys_refnum[di->disk.raid_disk] =
+                       ddf->phys->entries[dl->pdnum].refnum;
+               lba[di->disk.raid_disk] = di->data_offset;
+       }
+       *updates = mu;
+       return rv;
 }
 
 struct superswitch super_ddf = {
@@ -2485,6 +3118,7 @@ struct superswitch super_ddf = {
        .detail_super   = detail_super_ddf,
        .brief_detail_super = brief_detail_super_ddf,
        .validate_geometry = validate_geometry_ddf,
+       .write_init_super = write_init_super_ddf,
 #endif
        .match_home     = match_home_ddf,
        .uuid_from_super= uuid_from_super_ddf,
@@ -2496,91 +3130,22 @@ struct superswitch super_ddf = {
        .compare_super  = compare_super_ddf,
 
        .load_super     = load_super_ddf,
-       .init_super     = init_zero_ddf,
+       .init_super     = init_super_ddf,
        .store_super    = store_zero_ddf,
        .free_super     = free_super_ddf,
        .match_metadata_desc = match_metadata_desc_ddf,
-       .getinfo_super_n  = getinfo_super_n_container,
-
+       .add_to_super   = add_to_super_ddf,
+       .container_content = container_content_ddf,
 
-       .major          = 1000,
-       .swapuuid       = 0,
        .external       = 1,
-       .text_version   = "ddf",
 
 /* for mdmon */
        .open_new       = ddf_open_new,
-       .load_super     = load_super_ddf,
-       .mark_clean     = ddf_mark_clean,
-       .mark_dirty     = ddf_mark_dirty,
-       .mark_sync      = ddf_mark_sync,
+       .set_array_state= ddf_set_array_state,
        .set_disk       = ddf_set_disk,
        .sync_metadata  = ddf_sync_metadata,
+       .process_update = ddf_process_update,
+       .prepare_update = ddf_prepare_update,
+       .activate_spare = ddf_activate_spare,
 
-
-};
-
-/* Super_ddf_container is set by validate_geometry_ddf when given a
- * device that is not part of any array
- */
-struct superswitch super_ddf_container = {
-#ifndef MDASSEMBLE
-       .validate_geometry = validate_geometry_ddf_container,
-       .write_init_super = write_init_super_ddf,
-#endif
-
-       .init_super     = init_super_ddf,
-       .add_to_super   = add_to_super_ddf,
-
-       .free_super     = free_super_ddf,
-
-       .container_content = container_content_ddf,
-
-       .major          = 1000,
-       .swapuuid       = 0,
-       .external       = 1,
-       .text_version   = "ddf",
-};
-
-struct superswitch super_ddf_bvd = {
-#ifndef        MDASSEMBLE
-//     .detail_super   = detail_super_ddf_bvd,
-//     .brief_detail_super = brief_detail_super_ddf_bvd,
-       .validate_geometry = validate_geometry_ddf_bvd,
-       .write_init_super = write_init_super_ddf,
-#endif
-       .update_super   = update_super_ddf,
-       .init_super     = init_super_ddf_bvd,
-       .add_to_super   = add_to_super_ddf_bvd,
-       .getinfo_super  = getinfo_super_ddf_bvd,
-       .getinfo_super_n  = getinfo_super_n_bvd,
-
-       .load_super     = load_super_ddf,
-       .free_super     = free_super_ddf,
-       .match_metadata_desc = match_metadata_desc_ddf_bvd,
-
-
-       .major          = 1001,
-       .swapuuid       = 0,
-       .external       = 2,
-       .text_version   = "ddf",
-};
-
-struct superswitch super_ddf_svd = {
-#ifndef        MDASSEMBLE
-//     .detail_super   = detail_super_ddf_svd,
-//     .brief_detail_super = brief_detail_super_ddf_svd,
-       .validate_geometry = validate_geometry_ddf_svd,
-#endif
-       .update_super   = update_super_ddf,
-       .init_super     = init_super_ddf,
-
-       .load_super     = load_super_ddf,
-       .free_super     = free_super_ddf,
-       .match_metadata_desc = match_metadata_desc_ddf_svd,
-
-       .major          = 1002,
-       .swapuuid       = 0,
-       .external       = 2,
-       .text_version   = "ddf",
 };