]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super-ddf.c
Report uuid in --detail --brief for ddf and intel
[thirdparty/mdadm.git] / super-ddf.c
index 69ca320142033696b222472b3ea1c89d2239a99f..ad21e74a3c82fee0ded5074d1b737ac921b330db 100644 (file)
 
 #define HAVE_STDINT_H 1
 #include "mdadm.h"
+#include "mdmon.h"
 #include "sha1.h"
 #include <values.h>
 
-static inline int ROUND_UP(int a, int base)
-{
-       return ((a+base-1)/base)*base;
-}
-
 /* a non-official T10 name for creation GUIDs */
 static char T10[] = "Linux-MD";
 
@@ -70,7 +66,7 @@ unsigned long crc32(
 #define        DDF_CONCAT      0x1f
 #define        DDF_RAID5E      0x15
 #define        DDF_RAID5EE     0x25
-#define        DDF_RAID6       0x16    /* Vendor unique layout */
+#define        DDF_RAID6       0x06
 
 /* Raid Level Qualifier (RLQ) */
 #define        DDF_RAID0_SIMPLE        0x00
@@ -82,6 +78,7 @@ unsigned long crc32(
 #define        DDF_RAID4_N             0x01 /* parity in last extent */
 /* these apply to raid5e and raid5ee as well */
 #define        DDF_RAID5_0_RESTART     0x00 /* same as 'right asymmetric' - layout 1 */
+#define        DDF_RAID6_0_RESTART     0x01 /* raid6 different from raid5 here!!! */
 #define        DDF_RAID5_N_RESTART     0x02 /* same as 'left asymmetric' - layout 0 */
 #define        DDF_RAID5_N_CONTINUE    0x03 /* same as 'left symmetric' - layout 2 */
 
@@ -107,13 +104,14 @@ unsigned long crc32(
 #define        DDF_BBM_LOG_MAGIC       __cpu_to_be32(0xABADB10C)
 
 #define        DDF_GUID_LEN    24
-#define DDF_REVISION   "01.00.00"
+#define DDF_REVISION_0 "01.00.00"
+#define DDF_REVISION_2 "01.02.00"
 
 struct ddf_header {
-       __u32   magic;
+       __u32   magic;          /* DDF_HEADER_MAGIC */
        __u32   crc;
        char    guid[DDF_GUID_LEN];
-       char    revision[8];    /* 01.00.00 */
+       char    revision[8];    /* 01.02.00 */
        __u32   seq;            /* starts at '1' */
        __u32   timestamp;
        __u8    openflag;
@@ -166,7 +164,7 @@ struct ddf_header {
 
 /* The content of the 'controller section' - global scope */
 struct ddf_controller_data {
-       __u32   magic;
+       __u32   magic;                  /* DDF_CONTROLLER_MAGIC */
        __u32   crc;
        char    guid[DDF_GUID_LEN];
        struct controller_type {
@@ -182,7 +180,7 @@ struct ddf_controller_data {
 
 /* The content of phys_section - global scope */
 struct phys_disk {
-       __u32   magic;
+       __u32   magic;          /* DDF_PHYS_RECORDS_MAGIC */
        __u32   crc;
        __u16   used_pdes;
        __u16   max_pdes;
@@ -201,7 +199,7 @@ struct phys_disk {
 /* phys_disk_entry.type is a bitmap - bigendian remember */
 #define        DDF_Forced_PD_GUID              1
 #define        DDF_Active_in_VD                2
-#define        DDF_Global_Spare                4
+#define        DDF_Global_Spare                4 /* VD_CONF records are ignored */
 #define        DDF_Spare                       8 /* overrides Global_spare */
 #define        DDF_Foreign                     16
 #define        DDF_Legacy                      32 /* no DDF on this device */
@@ -223,7 +221,7 @@ struct phys_disk {
 
 /* The content of the virt_section global scope */
 struct virtual_disk {
-       __u32   magic;
+       __u32   magic;          /* DDF_VIRT_RECORDS_MAGIC */
        __u32   crc;
        __u16   populated_vdes;
        __u16   max_vdes;
@@ -254,6 +252,7 @@ struct virtual_disk {
 #define        DDF_state_deleted       0x2
 #define        DDF_state_missing       0x3
 #define        DDF_state_failed        0x4
+#define        DDF_state_part_optimal  0x5
 
 #define        DDF_state_morphing      0x8
 #define        DDF_state_inconsistent  0x10
@@ -261,7 +260,8 @@ struct virtual_disk {
 /* virtual_entry.init_state is a bigendian bitmap */
 #define        DDF_initstate_mask      0x03
 #define        DDF_init_not            0x00
-#define        DDF_init_quick          0x01
+#define        DDF_init_quick          0x01 /* initialisation is progress.
+                                     * i.e. 'state_inconsistent' */
 #define        DDF_init_full           0x02
 
 #define        DDF_access_mask         0xc0
@@ -275,7 +275,7 @@ struct virtual_disk {
  */
 
 struct vd_config {
-       __u32   magic;
+       __u32   magic;          /* DDF_VD_CONF_MAGIC */
        __u32   crc;
        char    guid[DDF_GUID_LEN];
        __u32   timestamp;
@@ -319,7 +319,7 @@ struct vd_config {
 #define        DDF_cache_rallowed      64      /* enable read caching */
 
 struct spare_assign {
-       __u32   magic;
+       __u32   magic;          /* DDF_SPARE_ASSIGN_MAGIC */
        __u32   crc;
        __u32   timestamp;
        __u8    reserved[7];
@@ -341,7 +341,7 @@ struct spare_assign {
 
 /* The data_section contents - local scope */
 struct disk_data {
-       __u32   magic;
+       __u32   magic;          /* DDF_PHYS_DATA_MAGIC */
        __u32   crc;
        char    guid[DDF_GUID_LEN];
        __u32   refnum;         /* crc of some magic drive data ... */
@@ -390,33 +390,51 @@ struct bad_block_log {
  * built in Create or Assemble to describe the whole array.
  */
 struct ddf_super {
-       struct ddf_header anchor, primary, secondary, *active;
+       struct ddf_header anchor, primary, secondary;
        struct ddf_controller_data controller;
+       struct ddf_header *active;
        struct phys_disk        *phys;
        struct virtual_disk     *virt;
        int pdsize, vdsize;
-       int max_part;
+       int max_part, mppe, conf_rec_len;
+       int currentdev;
+       int updates_pending;
        struct vcl {
-               struct vcl      *next;
-               __u64           *lba_offset; /* location in 'conf' of
-                                             * the lba table */
+               union {
+                       char space[512];
+                       struct {
+                               struct vcl      *next;
+                               __u64           *lba_offset; /* location in 'conf' of
+                                                             * the lba table */
+                               int     vcnum; /* index into ->virt */
+                               __u64           *block_sizes; /* NULL if all the same */
+                       };
+               };
                struct vd_config conf;
-       } *conflist, *newconf;
+       } *conflist, *currentconf;
        struct dl {
-               struct dl       *next;
+               union {
+                       char space[512];
+                       struct {
+                               struct dl       *next;
+                               int major, minor;
+                               char *devname;
+                               int fd;
+                               unsigned long long size; /* sectors */
+                               int pdnum;      /* index in ->phys */
+                               struct spare_assign *spare;
+                       };
+               };
                struct disk_data disk;
-               int major, minor;
-               char *devname;
-               int fd;
-               struct vcl *vlist[0]; /* max_part+1 in size */
-       } *dlist;
+               void *mdupdate; /* hold metadata update */
+               struct vcl *vlist[0]; /* max_part in size */
+       } *dlist, *add_list;
 };
 
 #ifndef offsetof
 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
 #endif
 
-extern struct superswitch super_ddf_container, super_ddf_bvd;
 
 static int calc_crc(void *buf, int len)
 {
@@ -486,8 +504,10 @@ static void *load_section(int fd, struct ddf_super *super, void *buf,
                /* All pre-allocated sections are a single block */
                if (len != 1)
                        return NULL;
-       } else
-               buf = malloc(len<<9);
+       } else {
+               posix_memalign(&buf, 512, len<<9);
+       }
+
        if (!buf)
                return NULL;
 
@@ -541,11 +561,12 @@ static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
                                devname);
                return 2;
        }
-       if (memcmp(super->anchor.revision, DDF_REVISION, 8) != 0) {
+       if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
+           memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
                if (devname)
                        fprintf(stderr, Name ": can only support super revision"
-                               " %.8s, not %.8s on %s\n",
-                               DDF_REVISION, super->anchor.revision, devname);
+                               " %.8s and earlier, not %.8s on %s\n",
+                               DDF_REVISION_2, super->anchor.revision,devname);
                return 2;
        }
        if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
@@ -596,10 +617,16 @@ static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
            !super->virt) {
                free(super->phys);
                free(super->virt);
+               super->phys = NULL;
+               super->virt = NULL;
                return 2;
        }
        super->conflist = NULL;
        super->dlist = NULL;
+
+       super->max_part = __be16_to_cpu(super->active->max_partitions);
+       super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
+       super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
        return 0;
 }
 
@@ -610,13 +637,14 @@ static int load_ddf_local(int fd, struct ddf_super *super,
        struct stat stb;
        char *conf;
        int i;
-       int conflen;
-       int mppe;
+       int vnum;
+       int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
+       unsigned long long dsize;
 
        /* First the local disk info */
-       super->max_part = __be16_to_cpu(super->active->max_partitions);
-       dl = malloc(sizeof(*dl) +
-                   (super->max_part+1) * sizeof(dl->vlist[0]));
+       posix_memalign((void**)&dl, 512,
+                      sizeof(*dl) +
+                      (super->max_part) * sizeof(dl->vlist[0]));
 
        load_section(fd, super, &dl->disk,
                     super->active->data_section_offset,
@@ -629,29 +657,47 @@ static int load_ddf_local(int fd, struct ddf_super *super,
        dl->minor = minor(stb.st_rdev);
        dl->next = super->dlist;
        dl->fd = keep ? fd : -1;
-       for (i=0 ; i < super->max_part + 1 ; i++)
+
+       dl->size = 0;
+       if (get_dev_size(fd, devname, &dsize))
+               dl->size = dsize >> 9;
+       dl->spare = NULL;
+       for (i=0 ; i < super->max_part ; i++)
                dl->vlist[i] = NULL;
        super->dlist = dl;
+       dl->pdnum = -1;
+       for (i=0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
+               if (memcmp(super->phys->entries[i].guid,
+                          dl->disk.guid, DDF_GUID_LEN) == 0)
+                       dl->pdnum = i;
 
        /* Now the config list. */
        /* 'conf' is an array of config entries, some of which are
         * probably invalid.  Those which are good need to be copied into
         * the conflist
         */
-       conflen =  __be16_to_cpu(super->active->config_record_len);
 
        conf = load_section(fd, super, NULL,
                            super->active->config_section_offset,
                            super->active->config_section_length,
                            0);
 
+       vnum = 0;
        for (i = 0;
             i < __be32_to_cpu(super->active->config_section_length);
-            i += conflen) {
+            i += super->conf_rec_len) {
                struct vd_config *vd =
                        (struct vd_config *)((char*)conf + i*512);
                struct vcl *vcl;
 
+               if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
+                       if (dl->spare)
+                               continue;
+                       posix_memalign((void**)&dl->spare, 512,
+                                      super->conf_rec_len*512);
+                       memcpy(dl->spare, vd, super->conf_rec_len*512);
+                       continue;
+               }
                if (vd->magic != DDF_VD_CONF_MAGIC)
                        continue;
                for (vcl = super->conflist; vcl; vcl = vcl->next) {
@@ -661,20 +707,29 @@ static int load_ddf_local(int fd, struct ddf_super *super,
                }
 
                if (vcl) {
-                       dl->vlist[i/conflen] = vcl;
+                       dl->vlist[vnum++] = vcl;
                        if (__be32_to_cpu(vd->seqnum) <=
                            __be32_to_cpu(vcl->conf.seqnum))
                                continue;
-               } else {
-                       vcl = malloc(conflen*512 + offsetof(struct vcl, conf));
+               } else {
+                       posix_memalign((void**)&vcl, 512,
+                                      (super->conf_rec_len*512 +
+                                       offsetof(struct vcl, conf)));
                        vcl->next = super->conflist;
+                       vcl->block_sizes = NULL; /* FIXME not for CONCAT */
                        super->conflist = vcl;
+                       dl->vlist[vnum++] = vcl;
                }
-               memcpy(&vcl->conf, vd, conflen*512);
-               mppe = __be16_to_cpu(super->anchor.max_primary_element_entries);
+               memcpy(&vcl->conf, vd, super->conf_rec_len*512);
                vcl->lba_offset = (__u64*)
-                       &vcl->conf.phys_refnum[mppe];
-               dl->vlist[i/conflen] = vcl;
+                       &vcl->conf.phys_refnum[super->mppe];
+
+               for (i=0; i < max_virt_disks ; i++)
+                       if (memcmp(super->virt->entries[i].guid,
+                                  vcl->conf.guid, DDF_GUID_LEN)==0)
+                               break;
+               if (i < max_virt_disks)
+                       vcl->vcnum = i;
        }
        free(conf);
 
@@ -693,9 +748,12 @@ static int load_super_ddf(struct supertype *st, int fd,
        int rv;
 
 #ifndef MDASSEMBLE
-       if (load_super_ddf_all(st, fd, &st->sb, devname, 0) == 0)
+       /* if 'fd' is a container, load metadata from all the devices */
+       if (load_super_ddf_all(st, fd, &st->sb, devname, 1) == 0)
                return 0;
 #endif
+       if (st->subarray[0])
+               return 1; /* FIXME Is this correct */
 
        if (get_dev_size(fd, devname, &dsize) == 0)
                return 1;
@@ -720,12 +778,12 @@ static int load_super_ddf(struct supertype *st, int fd,
                }
        }
 
-       super = malloc(sizeof(*super));
-       if (!super) {
+       if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
                fprintf(stderr, Name ": malloc of %zu failed.\n",
                        sizeof(*super));
                return 1;
        }
+       memset(super, 0, sizeof(*super));
 
        rv = load_ddf_headers(fd, super, devname);
        if (rv) {
@@ -756,6 +814,7 @@ static int load_super_ddf(struct supertype *st, int fd,
                st->minor_version = 0;
                st->max_devs = 512;
        }
+       st->loaded_container = 0;
        return 0;
 
 }
@@ -770,6 +829,8 @@ static void free_super_ddf(struct supertype *st)
        while (ddf->conflist) {
                struct vcl *v = ddf->conflist;
                ddf->conflist = v->next;
+               if (v->block_sizes)
+                       free(v->block_sizes);
                free(v);
        }
        while (ddf->dlist) {
@@ -777,6 +838,8 @@ static void free_super_ddf(struct supertype *st)
                ddf->dlist = d->next;
                if (d->fd >= 0)
                        close(d->fd);
+               if (d->spare)
+                       free(d->spare);
                free(d);
        }
        free(ddf);
@@ -793,6 +856,7 @@ static struct supertype *match_metadata_desc_ddf(char *arg)
                return NULL;
 
        st = malloc(sizeof(*st));
+       memset(st, 0, sizeof(*st));
        st->ss = &super_ddf;
        st->max_devs = 512;
        st->minor_version = 0;
@@ -800,38 +864,6 @@ static struct supertype *match_metadata_desc_ddf(char *arg)
        return st;
 }
 
-static struct supertype *match_metadata_desc_ddf_bvd(char *arg)
-{
-       struct supertype *st;
-       if (strcmp(arg, "ddf/bvd") != 0 &&
-           strcmp(arg, "bvd") != 0 &&
-           strcmp(arg, "default") != 0
-               )
-               return NULL;
-
-       st = malloc(sizeof(*st));
-       st->ss = &super_ddf_bvd;
-       st->max_devs = 512;
-       st->minor_version = 0;
-       st->sb = NULL;
-       return st;
-}
-static struct supertype *match_metadata_desc_ddf_svd(char *arg)
-{
-       struct supertype *st;
-       if (strcmp(arg, "ddf/svd") != 0 &&
-           strcmp(arg, "svd") != 0 &&
-           strcmp(arg, "default") != 0
-               )
-               return NULL;
-
-       st = malloc(sizeof(*st));
-       st->ss = &super_ddf_svd;
-       st->max_devs = 512;
-       st->minor_version = 0;
-       st->sb = NULL;
-       return st;
-}
 
 #ifndef MDASSEMBLE
 
@@ -892,7 +924,8 @@ static struct num_mapping ddf_level_num[] = {
        { DDF_RAID0, 0 },
        { DDF_RAID1, 1 },
        { DDF_RAID3, LEVEL_UNSUPPORTED },
-       { DDF_RAID5, 4 },
+       { DDF_RAID4, 4 },
+       { DDF_RAID5, 5 },
        { DDF_RAID1E, LEVEL_UNSUPPORTED },
        { DDF_JBOD, LEVEL_UNSUPPORTED },
        { DDF_CONCAT, LEVEL_LINEAR },
@@ -916,34 +949,41 @@ static void print_guid(char *guid, int tstamp)
 {
        /* A GUIDs are part (or all) ASCII and part binary.
         * They tend to be space padded.
-        * We ignore trailing spaces and print numbers
-        * <0x20 and >=0x7f as \xXX
-        * Some GUIDs have a time stamp in bytes 16-19.
-        * We print that if appropriate
+        * We print the GUID in HEX, then in parentheses add
+        * any initial ASCII sequence, and a possible
+        * time stamp from bytes 16-19
         */
        int l = DDF_GUID_LEN;
        int i;
+
+       for (i=0 ; i<DDF_GUID_LEN ; i++) {
+               if ((i&3)==0 && i != 0) printf(":");
+               printf("%02X", guid[i]&255);
+       }
+
+       printf(" (");
        while (l && guid[l-1] == ' ')
                l--;
        for (i=0 ; i<l ; i++) {
                if (guid[i] >= 0x20 && guid[i] < 0x7f)
                        fputc(guid[i], stdout);
                else
-                       fprintf(stdout, "\\x%02x", guid[i]&255);
+                       break;
        }
        if (tstamp) {
                time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
                char tbuf[100];
                struct tm *tm;
                tm = localtime(&then);
-               strftime(tbuf, 100, " (%D %T)",tm);
+               strftime(tbuf, 100, " %D %T",tm);
                fputs(tbuf, stdout);
        }
+       printf(")");
 }
 
 static void examine_vd(int n, struct ddf_super *sb, char *guid)
 {
-       int crl = __be16_to_cpu(sb->anchor.config_record_len);
+       int crl = sb->conf_rec_len;
        struct vcl *vcl;
 
        for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
@@ -1060,20 +1100,18 @@ static void examine_super_ddf(struct supertype *st, char *homehost)
        examine_pds(sb);
 }
 
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info);
+
+
 static void brief_examine_super_ddf(struct supertype *st)
 {
        /* We just write a generic DDF ARRAY entry
-        * The uuid is all hex, 6 groups of 4 bytes
         */
-       struct ddf_super *ddf = st->sb;
-       int i;
-       printf("ARRAY /dev/ddf UUID=");
-       for (i = 0; i < DDF_GUID_LEN; i++) {
-               printf("%02x", ddf->anchor.guid[i]);
-               if ((i&3) == 0 && i != 0)
-                       printf(":");
-       }
-       printf("\n");
+       struct mdinfo info;
+       char nbuf[64];
+       getinfo_super_ddf(st, &info);
+       fname_from_uuid(st, &info, nbuf, ':');
+       printf("ARRAY /dev/ddf metadata=ddf UUID=%s\n", nbuf + 5);
 }
 
 static void detail_super_ddf(struct supertype *st, char *homehost)
@@ -1092,9 +1130,12 @@ static void brief_detail_super_ddf(struct supertype *st)
         * Can that be stored in ddf_super??
         */
 //     struct ddf_super *ddf = st->sb;
+       struct mdinfo info;
+       char nbuf[64];
+       getinfo_super_ddf(st, &info);
+       fname_from_uuid(st, &info, nbuf,':');
+       printf(" UUID=%s", nbuf + 5);
 }
-
-
 #endif
 
 static int match_home_ddf(struct supertype *st, char *homehost)
@@ -1112,10 +1153,28 @@ static int match_home_ddf(struct supertype *st, char *homehost)
                ddf->controller.vendor_data[len] == 0);
 }
 
-static struct vd_config *find_vdcr(struct ddf_super *ddf)
+#ifndef MDASSEMBLE
+static struct vd_config *find_vdcr(struct ddf_super *ddf, int inst)
+{
+       struct vcl *v;
+
+       for (v = ddf->conflist; v; v = v->next)
+               if (inst == v->vcnum)
+                       return &v->conf;
+       return NULL;
+}
+#endif
+
+static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
 {
-       /* FIXME this just picks off the first one */
-       return &ddf->conflist->conf;
+       /* Find the entry in phys_disk which has the given refnum
+        * and return it's index
+        */
+       int i;
+       for (i=0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
+               if (ddf->phys->entries[i].refnum == phys_refnum)
+                       return i;
+       return -1;
 }
 
 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
@@ -1124,11 +1183,14 @@ static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
         *  uuid to put into bitmap file (Create, Grow)
         *  uuid for backup header when saving critical section (Grow)
         *  comparing uuids when re-adding a device into an array
+        *    In these cases the uuid required is that of the data-array,
+        *    not the device-set.
+        *  uuid to recognise same set when adding a missing device back
+        *    to an array.   This is a uuid for the device-set.
+        *  
         * For each of these we can make do with a truncated
         * or hashed uuid rather than the original, as long as
         * everyone agrees.
-        * In each case the uuid required is that of the data-array,
-        * not the device-set.
         * In the case of SVD we assume the BVD is of interest,
         * though that might be the case if a bitmap were made for
         * a mirrored SVD - worry about that later.
@@ -1137,30 +1199,35 @@ static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
         * The first 16 bytes of the sha1 of these is used.
         */
        struct ddf_super *ddf = st->sb;
-       struct vd_config *vd = find_vdcr(ddf);
-
-       if (!vd)
-               memset(uuid, 0, sizeof (uuid));
-       else {
-               char buf[20];
-               struct sha1_ctx ctx;
-               sha1_init_ctx(&ctx);
-               sha1_process_bytes(&vd->guid, DDF_GUID_LEN, &ctx);
-               if (vd->sec_elmnt_count > 1)
-                       sha1_process_bytes(&vd->sec_elmnt_seq, 1, &ctx);
-               sha1_finish_ctx(&ctx, buf);
-               memcpy(uuid, buf, sizeof(uuid));
-       }
+       struct vcl *vcl = ddf->currentconf;
+       char *guid;
+       char buf[20];
+       struct sha1_ctx ctx;
+
+       if (vcl)
+               guid = vcl->conf.guid;
+       else
+               guid = ddf->anchor.guid;
+
+       sha1_init_ctx(&ctx);
+       sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
+       if (vcl && vcl->conf.sec_elmnt_count > 1)
+               sha1_process_bytes(&vcl->conf.sec_elmnt_seq, 1, &ctx);
+       sha1_finish_ctx(&ctx, buf);
+       memcpy(uuid, buf, 4*4);
 }
 
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info);
+
 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
 {
        struct ddf_super *ddf = st->sb;
-       int i;
 
-       info->array.major_version = 1000;
-       info->array.minor_version = 0; /* FIXME use ddf->revision somehow */
-       info->array.patch_version = 0;
+       if (ddf->currentconf) {
+               getinfo_super_ddf_bvd(st, info);
+               return;
+       }
+
        info->array.raid_disks    = __be16_to_cpu(ddf->phys->used_pdes);
        info->array.level         = LEVEL_CONTAINER;
        info->array.layout        = 0;
@@ -1170,36 +1237,34 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
        info->array.utime         = 0;
        info->array.chunk_size    = 0;
 
-//     info->data_offset         = ???;
-//     info->component_size      = ???;
 
        info->disk.major = 0;
        info->disk.minor = 0;
-       info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
-//     info->disk.raid_disk = find refnum in the table and use index;
-       info->disk.raid_disk = -1;
-       for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes) ; i++)
-               if (ddf->phys->entries[i].refnum == ddf->dlist->disk.refnum) {
-                       info->disk.raid_disk = i;
-                       break;
-               }
+       if (ddf->dlist) {
+               info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
+               info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
+
+               info->data_offset = __be64_to_cpu(ddf->phys->
+                                         entries[info->disk.raid_disk].
+                                         config_size);
+               info->component_size = ddf->dlist->size - info->data_offset;
+       } else {
+               info->disk.number = -1;
+//             info->disk.raid_disk = find refnum in the table and use index;
+       }
        info->disk.state = (1 << MD_DISK_SYNC);
 
-       info->reshape_active = 0;
 
-//     uuid_from_super_ddf(info->uuid, sbv);
+       info->reshape_active = 0;
+       info->name[0] = 0;
 
-//     info->name[] ?? ;
-}
+       info->array.major_version = -1;
+       info->array.minor_version = -2;
+       strcpy(info->text_version, "ddf");
+       info->safe_mode_delay = 0;
 
-static void getinfo_super_n_container(struct supertype *st, struct mdinfo *info)
-{
-       /* just need offset and size */
-       struct ddf_super *ddf = st->sb;
-       int n = info->disk.number;
+       uuid_from_super_ddf(st, info->uuid);
 
-       info->data_offset = __be64_to_cpu(ddf->phys->entries[n].config_size);
-       info->component_size = 32*1024*1024 / 512;
 }
 
 static int rlq_to_layout(int rlq, int prl, int raiddisks);
@@ -1207,24 +1272,28 @@ static int rlq_to_layout(int rlq, int prl, int raiddisks);
 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
 {
        struct ddf_super *ddf = st->sb;
-       struct vd_config *vd = find_vdcr(ddf);
+       struct vcl *vc = ddf->currentconf;
+       int cd = ddf->currentdev;
 
        /* FIXME this returns BVD info - what if we want SVD ?? */
 
-       info->array.major_version = 1000;
-       info->array.minor_version = 0; /* FIXME use ddf->revision somehow */
-       info->array.patch_version = 0;
-       info->array.raid_disks    = __be16_to_cpu(vd->prim_elmnt_count);
-       info->array.level         = map_num1(ddf_level_num, vd->prl);
-       info->array.layout        = rlq_to_layout(vd->rlq, vd->prl,
+       info->array.raid_disks    = __be16_to_cpu(vc->conf.prim_elmnt_count);
+       info->array.level         = map_num1(ddf_level_num, vc->conf.prl);
+       info->array.layout        = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
                                                  info->array.raid_disks);
        info->array.md_minor      = -1;
-       info->array.ctime         = DECADE + __be32_to_cpu(*(__u32*)(vd->guid+16));
-       info->array.utime         = DECADE + __be32_to_cpu(vd->timestamp);
-       info->array.chunk_size    = 512 << vd->chunk_shift;
-
-//     info->data_offset         = ???;
-//     info->component_size      = ???;
+       info->array.ctime         = DECADE +
+               __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+       info->array.utime         = DECADE + __be32_to_cpu(vc->conf.timestamp);
+       info->array.chunk_size    = 512 << vc->conf.chunk_shift;
+
+       if (cd >= 0 && cd < ddf->mppe) {
+               info->data_offset         = __be64_to_cpu(vc->lba_offset[cd]);
+               if (vc->block_sizes)
+                       info->component_size = vc->block_sizes[cd];
+               else
+                       info->component_size = __be64_to_cpu(vc->conf.blocks);
+       }
 
        info->disk.major = 0;
        info->disk.minor = 0;
@@ -1232,23 +1301,30 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
 //     info->disk.raid_disk = find refnum in the table and use index;
 //     info->disk.state = ???;
 
+       info->container_member = ddf->currentconf->vcnum;
+
+       info->resync_start = 0;
+       if (!(ddf->virt->entries[info->container_member].state
+             & DDF_state_inconsistent)  &&
+           (ddf->virt->entries[info->container_member].init_state
+            & DDF_initstate_mask)
+           == DDF_init_full)
+               info->resync_start = ~0ULL;
+
        uuid_from_super_ddf(st, info->uuid);
 
-//     info->name[] ?? ;
-}
+       info->container_member = atoi(st->subarray);
+       info->array.major_version = -1;
+       info->array.minor_version = -2;
+       sprintf(info->text_version, "/%s/%s",
+               devnum2devname(st->container_dev),
+               st->subarray);
+       info->safe_mode_delay = 200;
 
-static void getinfo_super_n_bvd(struct supertype *st, struct mdinfo *info)
-{
-       /* Find the particular details for info->disk.raid_disk.
-        * This includes data_offset, component_size,
-        */
-       struct ddf_super *ddf = st->sb;
-       __u64 *lba_offset = ddf->newconf->lba_offset;
-       struct vd_config *conf = &ddf->newconf->conf;
-       info->data_offset = __be64_to_cpu(lba_offset[info->disk.raid_disk]);
-       info->component_size = __be64_to_cpu(conf->blocks);
+       info->name[0] = 0;
 }
 
+
 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
                            char *update,
                            char *devname, int verbose,
@@ -1268,7 +1344,7 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info,
         *  grow:  Array has gained a new device - this is currently for
         *              linear only
         *  resync: mark as dirty so a resync will happen.
-        *  uuid:  Change the uuid of the array to match watch is given
+        *  uuid:  Change the uuid of the array to match what is given
         *  homehost:  update the recorded homehost
         *  name:  update the name - preserving the homehost
         *  _reshape_progress: record new reshape_progress position.
@@ -1280,10 +1356,9 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info,
         */
        int rv = 0;
 //     struct ddf_super *ddf = st->sb;
-//     struct vd_config *vd = find_vdcr(ddf);
+//     struct vd_config *vd = find_vdcr(ddf, info->container_member);
 //     struct virtual_entry *ve = find_ve(ddf);
 
-
        /* we don't need to handle "force-*" or "assemble" as
         * there is no need to 'trick' the kernel.  We the metadata is
         * first updated to activate the array, all the implied modifications
@@ -1344,6 +1419,13 @@ static void make_header_guid(char *guid)
        memcpy(guid+20, &stamp, 4);
        if (rfd >= 0) close(rfd);
 }
+
+static int init_super_ddf_bvd(struct supertype *st,
+                             mdu_array_info_t *info,
+                             unsigned long long size,
+                             char *name, char *homehost,
+                             int *uuid);
+
 static int init_super_ddf(struct supertype *st,
                          mdu_array_info_t *info,
                          unsigned long long size, char *name, char *homehost,
@@ -1383,7 +1465,16 @@ static int init_super_ddf(struct supertype *st,
        struct phys_disk *pd;
        struct virtual_disk *vd;
 
-       ddf = malloc(sizeof(*ddf));
+       if (!info) {
+               st->sb = NULL;
+               return 0;
+       }
+       if (st->sb)
+               return init_super_ddf_bvd(st, info, size, name, homehost,
+                                         uuid);
+
+       posix_memalign((void**)&ddf, 512, sizeof(*ddf));
+       memset(ddf, 0, sizeof(*ddf));
        ddf->dlist = NULL; /* no physical disks yet */
        ddf->conflist = NULL; /* No virtual disks yet */
 
@@ -1398,7 +1489,7 @@ static int init_super_ddf(struct supertype *st,
        ddf->anchor.magic = DDF_HEADER_MAGIC;
        make_header_guid(ddf->anchor.guid);
 
-       memcpy(ddf->anchor.revision, DDF_REVISION, 8);
+       memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
        ddf->anchor.seq = __cpu_to_be32(1);
        ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
        ddf->anchor.openflag = 0xFF;
@@ -1420,10 +1511,11 @@ static int init_super_ddf(struct supertype *st,
        ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
        ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
        ddf->max_part = 64;
-       ddf->anchor.config_record_len = __cpu_to_be16(1 + 256*12/512);
-       ddf->anchor.max_primary_element_entries = __cpu_to_be16(256);
+       ddf->mppe = 256;
+       ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
+       ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
+       ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
        memset(ddf->anchor.pad3, 0xff, 54);
-
        /* controller sections is one sector long immediately
         * after the ddf header */
        sector = 1;
@@ -1457,7 +1549,7 @@ static int init_super_ddf(struct supertype *st,
                __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
        sector += vdsize/512;
 
-       clen = (1 + 256*12/512) * (64+1);
+       clen = ddf->conf_rec_len * (ddf->max_part+1);
        ddf->anchor.config_section_offset = __cpu_to_be32(sector);
        ddf->anchor.config_section_length = __cpu_to_be32(clen);
        sector += clen;
@@ -1493,8 +1585,8 @@ static int init_super_ddf(struct supertype *st,
         * Remaining 16 are serial number.... maybe a hostname would do?
         */
        memcpy(ddf->controller.guid, T10, sizeof(T10));
-       gethostname(hostname, 17);
-       hostname[17] = 0;
+       gethostname(hostname, sizeof(hostname));
+       hostname[sizeof(hostname) - 1] = 0;
        hostlen = strlen(hostname);
        memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
        for (i = strlen(T10) ; i+hostlen < 24; i++)
@@ -1508,7 +1600,8 @@ static int init_super_ddf(struct supertype *st,
        memset(ddf->controller.pad, 0xff, 8);
        memset(ddf->controller.vendor_data, 0xff, 448);
 
-       pd = ddf->phys = malloc(pdsize);
+       posix_memalign((void**)&pd, 512, pdsize);
+       ddf->phys = pd;
        ddf->pdsize = pdsize;
 
        memset(pd, 0xff, pdsize);
@@ -1518,7 +1611,8 @@ static int init_super_ddf(struct supertype *st,
        pd->max_pdes = __cpu_to_be16(max_phys_disks);
        memset(pd->pad, 0xff, 52);
 
-       vd = ddf->virt = malloc(vdsize);
+       posix_memalign((void**)&vd, 512, vdsize);
+       ddf->virt = vd;
        ddf->vdsize = vdsize;
        memset(vd, 0, vdsize);
        vd->magic = DDF_VIRT_RECORDS_MAGIC;
@@ -1530,6 +1624,7 @@ static int init_super_ddf(struct supertype *st,
                memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
 
        st->sb = ddf;
+       ddf->updates_pending = 1;
        return 1;
 }
 
@@ -1580,7 +1675,10 @@ static int layout_to_rlq(int level, int layout, int raiddisks)
                case ALGORITHM_LEFT_ASYMMETRIC:
                        return DDF_RAID5_N_RESTART;
                case ALGORITHM_RIGHT_ASYMMETRIC:
-                       return DDF_RAID5_0_RESTART;
+                       if (level == 5)
+                               return DDF_RAID5_0_RESTART;
+                       else
+                               return DDF_RAID6_0_RESTART;
                case ALGORITHM_LEFT_SYMMETRIC:
                        return DDF_RAID5_N_CONTINUE;
                case ALGORITHM_RIGHT_SYMMETRIC:
@@ -1607,7 +1705,6 @@ static int rlq_to_layout(int rlq, int prl, int raiddisks)
                        return -1; /* FIXME this isn't checked */
                }
        case DDF_RAID5:
-       case DDF_RAID6:
                switch(rlq) {
                case DDF_RAID5_N_RESTART:
                        return ALGORITHM_LEFT_ASYMMETRIC;
@@ -1618,10 +1715,74 @@ static int rlq_to_layout(int rlq, int prl, int raiddisks)
                default:
                        return -1;
                }
+       case DDF_RAID6:
+               switch(rlq) {
+               case DDF_RAID5_N_RESTART:
+                       return ALGORITHM_LEFT_ASYMMETRIC;
+               case DDF_RAID6_0_RESTART:
+                       return ALGORITHM_RIGHT_ASYMMETRIC;
+               case DDF_RAID5_N_CONTINUE:
+                       return ALGORITHM_LEFT_SYMMETRIC;
+               default:
+                       return -1;
+               }
        }
        return -1;
 }
 
+#ifndef MDASSEMBLE
+struct extent {
+       unsigned long long start, size;
+};
+static int cmp_extent(const void *av, const void *bv)
+{
+       const struct extent *a = av;
+       const struct extent *b = bv;
+       if (a->start < b->start)
+               return -1;
+       if (a->start > b->start)
+               return 1;
+       return 0;
+}
+
+static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
+{
+       /* find a list of used extents on the give physical device
+        * (dnum) of the given ddf.
+        * Return a malloced array of 'struct extent'
+
+FIXME ignore DDF_Legacy devices?
+
+        */
+       struct extent *rv;
+       int n = 0;
+       int i, j;
+
+       rv = malloc(sizeof(struct extent) * (ddf->max_part + 2));
+       if (!rv)
+               return NULL;
+
+       for (i = 0; i < ddf->max_part; i++) {
+               struct vcl *v = dl->vlist[i];
+               if (v == NULL)
+                       continue;
+               for (j=0; j < v->conf.prim_elmnt_count; j++)
+                       if (v->conf.phys_refnum[j] == dl->disk.refnum) {
+                               /* This device plays role 'j' in  'v'. */
+                               rv[n].start = __be64_to_cpu(v->lba_offset[j]);
+                               rv[n].size = __be64_to_cpu(v->conf.blocks);
+                               n++;
+                               break;
+                       }
+       }
+       qsort(rv, n, sizeof(*rv), cmp_extent);
+
+       rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
+       rv[n].size = 0;
+       return rv;
+}
+#endif
+
 static int init_super_ddf_bvd(struct supertype *st,
                              mdu_array_info_t *info,
                              unsigned long long size,
@@ -1637,8 +1798,6 @@ static int init_super_ddf_bvd(struct supertype *st,
        struct virtual_entry *ve;
        struct vcl *vcl;
        struct vd_config *vc;
-       int mppe;
-       int conflen;
 
        if (__be16_to_cpu(ddf->virt->populated_vdes)
            >= __be16_to_cpu(ddf->virt->max_vdes)) {
@@ -1657,7 +1816,6 @@ static int init_super_ddf_bvd(struct supertype *st,
                return 0;
        }
        ve = &ddf->virt->entries[venum];
-       st->container_member = venum;
 
        /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
         * timestamp, random number
@@ -1667,10 +1825,12 @@ static int init_super_ddf_bvd(struct supertype *st,
        ve->pad0 = 0xFFFF;
        ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
        ve->type = 0;
-       ve->state = 0;
-       ve->init_state = 0;
-       if (!(info->state & 1))
-               ve->init_state = DDF_state_inconsistent;
+       ve->state = DDF_state_degraded; /* Will be modified as devices are added */
+       if (info->state & 1) /* clean */
+               ve->init_state = DDF_init_full;
+       else
+               ve->init_state = DDF_init_not;
+
        memset(ve->pad1, 0xff, 14);
        memset(ve->name, ' ', 16);
        if (name)
@@ -1679,10 +1839,12 @@ static int init_super_ddf_bvd(struct supertype *st,
                __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
 
        /* Now create a new vd_config */
-       conflen =  __be16_to_cpu(ddf->active->config_record_len);
-       vcl = malloc(offsetof(struct vcl, conf) + conflen * 512);
-       mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
-       vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[mppe];
+       posix_memalign((void**)&vcl, 512,
+                      (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512));
+       vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
+       vcl->vcnum = venum;
+       sprintf(st->subarray, "%d", venum);
+       vcl->block_sizes = NULL; /* FIXME not for CONCAT */
 
        vc = &vcl->conf;
 
@@ -1722,15 +1884,17 @@ static int init_super_ddf_bvd(struct supertype *st,
        memset(vc->v3, 0xff, 16);
        memset(vc->vendor, 0xff, 32);
 
-       memset(vc->phys_refnum, 0xff, 4*mppe);
-       memset(vc->phys_refnum+mppe, 0x00, 8*mppe);
+       memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
+       memset(vc->phys_refnum+(ddf->mppe * 4), 0x00, 8*ddf->mppe);
 
        vcl->next = ddf->conflist;
        ddf->conflist = vcl;
-       ddf->newconf = vcl;
+       ddf->currentconf = vcl;
+       ddf->updates_pending = 1;
        return 1;
 }
 
+#ifndef MDASSEMBLE
 static void add_to_super_ddf_bvd(struct supertype *st,
                                 mdu_disk_info_t *dk, int fd, char *devname)
 {
@@ -1739,13 +1903,16 @@ static void add_to_super_ddf_bvd(struct supertype *st,
         * We need to find suitable free space in that device and update
         * the phys_refnum and lba_offset for the newly created vd_config.
         * We might also want to update the type in the phys_disk
-        * section. FIXME
+        * section.
         */
        struct dl *dl;
        struct ddf_super *ddf = st->sb;
        struct vd_config *vc;
        __u64 *lba_offset;
-       int mppe;
+       int working;
+       int i;
+       unsigned long long blocks, pos, esize;
+       struct extent *ex;
 
        for (dl = ddf->dlist; dl ; dl = dl->next)
                if (dl->major == dk->major &&
@@ -1754,16 +1921,69 @@ static void add_to_super_ddf_bvd(struct supertype *st,
        if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
                return;
 
-       vc = &ddf->newconf->conf;
+       vc = &ddf->currentconf->conf;
+       lba_offset = ddf->currentconf->lba_offset;
+
+       ex = get_extents(ddf, dl);
+       if (!ex)
+               return;
+
+       i = 0; pos = 0;
+       blocks = __be64_to_cpu(vc->blocks);
+       if (ddf->currentconf->block_sizes)
+               blocks = ddf->currentconf->block_sizes[dk->raid_disk];
+
+       do {
+               esize = ex[i].start - pos;
+               if (esize >= blocks)
+                       break;
+               pos = ex[i].start + ex[i].size;
+               i++;
+       } while (ex[i-1].size);
+
+       free(ex);
+       if (esize < blocks)
+               return;
+
+       ddf->currentdev = dk->raid_disk;
        vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
-       mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
-       lba_offset = (__u64*)(vc->phys_refnum + mppe);
-       lba_offset[dk->raid_disk] = 0; /* FIXME */
+       lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
 
-       dl->vlist[0] =ddf->newconf; /* FIXME */
+       for (i=0; i < ddf->max_part ; i++)
+               if (dl->vlist[i] == NULL)
+                       break;
+       if (i == ddf->max_part)
+               return;
+       dl->vlist[i] = ddf->currentconf;
 
        dl->fd = fd;
        dl->devname = devname;
+
+       /* Check how many working raid_disks, and if we can mark
+        * array as optimal yet
+        */
+       working = 0;
+
+       for (i=0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
+               if (vc->phys_refnum[i] != 0xffffffff)
+                       working++;
+
+       /* Find which virtual_entry */
+       i = ddf->currentconf->vcnum;
+       if (working == __be16_to_cpu(vc->prim_elmnt_count))
+               ddf->virt->entries[i].state =
+                       (ddf->virt->entries[i].state & ~DDF_state_mask)
+                       | DDF_state_optimal;
+
+       if (vc->prl == DDF_RAID6 &&
+           working+1 == __be16_to_cpu(vc->prim_elmnt_count))
+               ddf->virt->entries[i].state =
+                       (ddf->virt->entries[i].state & ~DDF_state_mask)
+                       | DDF_state_part_optimal;
+
+       ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
+       ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
+       ddf->updates_pending = 1;
 }
 
 /* add a device to a container, either while creating it or while
@@ -1781,16 +2001,22 @@ static void add_to_super_ddf(struct supertype *st,
        int n, i;
        struct stat stb;
 
+       if (ddf->currentconf) {
+               add_to_super_ddf_bvd(st, dk, fd, devname);
+               return;
+       }
+
        /* This is device numbered dk->number.  We need to create
         * a phys_disk entry and a more detailed disk_data entry.
         */
        fstat(fd, &stb);
-       dd = malloc(sizeof(*dd) + sizeof(dd->vlist[0]) * (ddf->max_part+1));
+       posix_memalign((void**)&dd, 512,
+                      sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part);
        dd->major = major(stb.st_rdev);
        dd->minor = minor(stb.st_rdev);
        dd->devname = devname;
-       dd->next = ddf->dlist;
        dd->fd = fd;
+       dd->spare = NULL;
 
        dd->disk.magic = DDF_PHYS_DATA_MAGIC;
        now = time(0);
@@ -1800,23 +2026,45 @@ static void add_to_super_ddf(struct supertype *st,
        *(__u32*)(dd->disk.guid + 16) = random();
        *(__u32*)(dd->disk.guid + 20) = random();
 
-       dd->disk.refnum = random(); /* and hope for the best FIXME check this is unique!!*/
+       do {
+               /* Cannot be bothered finding a CRC of some irrelevant details*/
+               dd->disk.refnum = random();
+               for (i = __be16_to_cpu(ddf->active->max_pd_entries) - 1;
+                    i >= 0; i--)
+                       if (ddf->phys->entries[i].refnum == dd->disk.refnum)
+                               break;
+       } while (i >= 0);
+
        dd->disk.forced_ref = 1;
        dd->disk.forced_guid = 1;
        memset(dd->disk.vendor, ' ', 32);
        memcpy(dd->disk.vendor, "Linux", 5);
        memset(dd->disk.pad, 0xff, 442);
-       for (i = 0; i < ddf->max_part+1 ; i++)
+       for (i = 0; i < ddf->max_part ; i++)
                dd->vlist[i] = NULL;
 
        n = __be16_to_cpu(ddf->phys->used_pdes);
        pde = &ddf->phys->entries[n];
-       n++;
-       ddf->phys->used_pdes = __cpu_to_be16(n);
+       dd->pdnum = n;
+
+       if (st->update_tail) {
+               int len = (sizeof(struct phys_disk) +
+                          sizeof(struct phys_disk_entry));
+               struct phys_disk *pd;
+
+               pd = malloc(len);
+               pd->magic = DDF_PHYS_RECORDS_MAGIC;
+               pd->used_pdes = __cpu_to_be16(n);
+               pde = &pd->entries[0];
+               dd->mdupdate = pd;
+       } else {
+               n++;
+               ddf->phys->used_pdes = __cpu_to_be16(n);
+       }
 
        memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
        pde->refnum = dd->disk.refnum;
-       pde->type = __cpu_to_be16(DDF_Forced_PD_GUID |DDF_Global_Spare);
+       pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
        pde->state = __cpu_to_be16(DDF_Online);
        get_dev_size(fd, NULL, &size);
        /* We are required to reserve 32Meg, and record the size in sectors */
@@ -1824,7 +2072,15 @@ static void add_to_super_ddf(struct supertype *st,
        sprintf(pde->path, "%17.17s","Information: nil") ;
        memset(pde->pad, 0xff, 6);
 
-       ddf->dlist = dd;
+       dd->size = size >> 9;
+       if (st->update_tail) {
+               dd->next = ddf->add_list;
+               ddf->add_list = dd;
+       } else {
+               dd->next = ddf->dlist;
+               ddf->dlist = dd;
+               ddf->updates_pending = 1;
+       }
 }
 
 /*
@@ -1833,8 +2089,9 @@ static void add_to_super_ddf(struct supertype *st,
  * container.
  */
 
-#ifndef MDASSEMBLE
-static int write_init_super_ddf(struct supertype *st)
+static unsigned char null_conf[4096+512];
+
+static int __write_init_super_ddf(struct supertype *st, int do_close)
 {
 
        struct ddf_super *ddf = st->sb;
@@ -1893,18 +2150,26 @@ static int write_init_super_ddf(struct supertype *st)
                write(fd, ddf->virt, ddf->vdsize);
 
                /* Now write lots of config records. */
-               n_config = __be16_to_cpu(ddf->active->max_partitions);
-               conf_size = __be16_to_cpu(ddf->active->config_record_len) * 512;
+               n_config = ddf->max_part;
+               conf_size = ddf->conf_rec_len * 512;
                for (i = 0 ; i <= n_config ; i++) {
                        struct vcl *c = d->vlist[i];
+                       if (i == n_config)
+                               c = (struct vcl*)d->spare;
 
                        if (c) {
                                c->conf.crc = calc_crc(&c->conf, conf_size);
                                write(fd, &c->conf, conf_size);
                        } else {
-                               __u32 sig = 0xffffffff;
-                               write(fd, &sig, 4);
-                               lseek64(fd, conf_size-4, SEEK_CUR);
+                               char *null_aligned = (char*)((((unsigned long)null_conf)+511)&~511UL);
+                               if (null_conf[0] != 0xff)
+                                       memset(null_conf, 0xff, sizeof(null_conf));
+                               int togo = conf_size;
+                               while (togo > sizeof(null_conf)-512) {
+                                       write(fd, null_aligned, sizeof(null_conf)-512);
+                                       togo -= sizeof(null_conf)-512;
+                               }
+                               write(fd, null_aligned, togo);
                        }
                }
                d->disk.crc = calc_crc(&d->disk, 512);
@@ -1914,10 +2179,59 @@ static int write_init_super_ddf(struct supertype *st)
 
                lseek64(fd, (size-1)*512, SEEK_SET);
                write(fd, &ddf->anchor, 512);
-               close(fd);
+               if (do_close) {
+                       close(fd);
+                       d->fd = -1;
+               }
        }
        return 1;
 }
+
+static int write_init_super_ddf(struct supertype *st)
+{
+
+       if (st->update_tail) {
+               /* queue the virtual_disk and vd_config as metadata updates */
+               struct virtual_disk *vd;
+               struct vd_config *vc;
+               struct ddf_super *ddf = st->sb;
+               int len;
+
+               if (!ddf->currentconf) {
+                       int len = (sizeof(struct phys_disk) +
+                                  sizeof(struct phys_disk_entry));
+
+                       /* adding a disk to the container. */
+                       if (!ddf->add_list)
+                               return 0;
+
+                       append_metadata_update(st, ddf->add_list->mdupdate, len);
+                       ddf->add_list->mdupdate = NULL;
+                       return 0;
+               }
+
+               /* Newly created VD */
+
+               /* First the virtual disk.  We have a slightly fake header */
+               len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
+               vd = malloc(len);
+               *vd = *ddf->virt;
+               vd->entries[0] = ddf->virt->entries[ddf->currentconf->vcnum];
+               vd->populated_vdes = __cpu_to_be16(ddf->currentconf->vcnum);
+               append_metadata_update(st, vd, len);
+
+               /* Then the vd_config */
+               len = ddf->conf_rec_len * 512;
+               vc = malloc(len);
+               memcpy(vc, &ddf->currentconf->conf, len);
+               append_metadata_update(st, vc, len);
+
+               /* FIXME I need to close the fds! */
+               return 0;
+       } else 
+               return __write_init_super_ddf(st, 1);
+}
+
 #endif
 
 static __u64 avail_size_ddf(struct supertype *st, __u64 devsize)
@@ -1929,10 +2243,24 @@ static __u64 avail_size_ddf(struct supertype *st, __u64 devsize)
 }
 
 #ifndef MDASSEMBLE
-int validate_geometry_ddf(struct supertype *st,
-                         int level, int layout, int raiddisks,
-                         int chunk, unsigned long long size,
-                         char *dev, unsigned long long *freesize)
+static int
+validate_geometry_ddf_container(struct supertype *st,
+                               int level, int layout, int raiddisks,
+                               int chunk, unsigned long long size,
+                               char *dev, unsigned long long *freesize,
+                               int verbose);
+
+static int validate_geometry_ddf_bvd(struct supertype *st,
+                                    int level, int layout, int raiddisks,
+                                    int chunk, unsigned long long size,
+                                    char *dev, unsigned long long *freesize,
+                                    int verbose);
+
+static int validate_geometry_ddf(struct supertype *st,
+                                int level, int layout, int raiddisks,
+                                int chunk, unsigned long long size,
+                                char *dev, unsigned long long *freesize,
+                                int verbose)
 {
        int fd;
        struct mdinfo *sra;
@@ -1946,71 +2274,74 @@ int validate_geometry_ddf(struct supertype *st,
         */
 
        if (level == LEVEL_CONTAINER) {
-               st->ss = &super_ddf_container;
-               if (dev) {
-                       int rv =st->ss->validate_geometry(st, level, layout,
-                                                         raiddisks, chunk,
-                                                         size,
-                                                         NULL, freesize);
-                       if (rv)
-                               return rv;
-               }
-               return st->ss->validate_geometry(st, level, layout, raiddisks,
-                                                chunk, size, dev, freesize);
+               /* Must be a fresh device to add to a container */
+               return validate_geometry_ddf_container(st, level, layout,
+                                                      raiddisks, chunk,
+                                                      size, dev, freesize,
+                                                      verbose);
        }
 
        if (st->sb) {
-               /* creating in a given container */
-               st->ss = &super_ddf_bvd;
-               if (dev) {
-                       int rv =st->ss->validate_geometry(st, level, layout,
-                                                         raiddisks, chunk,
-                                                         size,
-                                                         NULL, freesize);
-                       if (rv)
-                               return rv;
-               }
-               return st->ss->validate_geometry(st, level, layout, raiddisks,
-                                                chunk, size, dev, freesize);
+               /* A container has already been opened, so we are
+                * creating in there.  Maybe a BVD, maybe an SVD.
+                * Should make a distinction one day.
+                */
+               return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
+                                                chunk, size, dev, freesize,
+                                                verbose);
        }
-       /* FIXME should exclude MULTIPATH, or more appropriately, allow
-        * only known levels.
-        */
-       if (!dev)
+       if (!dev) {
+               /* Initial sanity check.  Exclude illegal levels. */
+               int i;
+               for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
+                       if (ddf_level_num[i].num2 == level)
+                               break;
+               if (ddf_level_num[i].num1 == MAXINT)
+                       return 0;
+               /* Should check layout? etc */
                return 1;
+       }
 
-       /* This device needs to be either a device in a 'ddf' container,
-        * or it needs to be a 'ddf-bvd' array.
+       /* This is the first device for the array.
+        * If it is a container, we read it in and do automagic allocations,
+        * no other devices should be given.
+        * Otherwise it must be a member device of a container, and we
+        * do manual allocation.
+        * Later we should check for a BVD and make an SVD.
         */
-
        fd = open(dev, O_RDONLY|O_EXCL, 0);
        if (fd >= 0) {
                sra = sysfs_read(fd, 0, GET_VERSION);
                close(fd);
                if (sra && sra->array.major_version == -1 &&
-                   strcmp(sra->text_version, "ddf-bvd") == 0) {
-                       st->ss = &super_ddf_svd;
-                       return st->ss->validate_geometry(st, level, layout,
-                                                        raiddisks, chunk, size,
-                                                        dev, freesize);
+                   strcmp(sra->text_version, "ddf") == 0) {
+
+                       /* load super */
+                       /* find space for 'n' devices. */
+                       /* remember the devices */
+                       /* Somehow return the fact that we have enough */
                }
 
-               fprintf(stderr,
-                       Name ": Cannot create this array on device %s\n",
-                       dev);
+               if (verbose)
+                       fprintf(stderr,
+                               Name ": ddf: Cannot create this array "
+                               "on device %s\n",
+                               dev);
                return 0;
        }
        if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
-               fprintf(stderr, Name ": Cannot open %s: %s\n",
-                       dev, strerror(errno));
+               if (verbose)
+                       fprintf(stderr, Name ": ddf: Cannot open %s: %s\n",
+                               dev, strerror(errno));
                return 0;
        }
        /* Well, it is in use by someone, maybe a 'ddf' container. */
        cfd = open_container(fd);
        if (cfd < 0) {
                close(fd);
-               fprintf(stderr, Name ": Cannot use %s: It is busy\n",
-                       dev);
+               if (verbose)
+                       fprintf(stderr, Name ": ddf: Cannot use %s: %s\n",
+                               dev, strerror(EBUSY));
                return 0;
        }
        sra = sysfs_read(cfd, 0, GET_VERSION);
@@ -2021,27 +2352,28 @@ int validate_geometry_ddf(struct supertype *st,
                 * and try to create a bvd
                 */
                struct ddf_super *ddf;
-               st->ss = &super_ddf_bvd;
                if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL, 1) == 0) {
                        st->sb = ddf;
                        st->container_dev = fd2devnum(cfd);
-                       st->container_member = 27; // FIXME
                        close(cfd);
-                       return st->ss->validate_geometry(st, level, layout,
+                       return validate_geometry_ddf_bvd(st, level, layout,
                                                         raiddisks, chunk, size,
-                                                        dev, freesize);
+                                                        dev, freesize,
+                                                        verbose);
                }
                close(cfd);
-       }
-       fprintf(stderr, Name ": Cannot use %s: Already in use\n",
-               dev);
+       } else /* device may belong to a different container */
+               return 0;
+
        return 1;
 }
 
-int validate_geometry_ddf_container(struct supertype *st,
-                                   int level, int layout, int raiddisks,
-                                   int chunk, unsigned long long size,
-                                   char *dev, unsigned long long *freesize)
+static int
+validate_geometry_ddf_container(struct supertype *st,
+                               int level, int layout, int raiddisks,
+                               int chunk, unsigned long long size,
+                               char *dev, unsigned long long *freesize,
+                               int verbose)
 {
        int fd;
        unsigned long long ldsize;
@@ -2053,8 +2385,9 @@ int validate_geometry_ddf_container(struct supertype *st,
 
        fd = open(dev, O_RDONLY|O_EXCL, 0);
        if (fd < 0) {
-               fprintf(stderr, Name ": Cannot open %s: %s\n",
-                       dev, strerror(errno));
+               if (verbose)
+                       fprintf(stderr, Name ": ddf: Cannot open %s: %s\n",
+                               dev, strerror(errno));
                return 0;
        }
        if (!get_dev_size(fd, dev, &ldsize)) {
@@ -2063,76 +2396,16 @@ int validate_geometry_ddf_container(struct supertype *st,
        }
        close(fd);
 
-       *freesize = avail_size_ddf(st, ldsize);
+       *freesize = avail_size_ddf(st, ldsize >> 9);
 
        return 1;
 }
 
-struct extent {
-       unsigned long long start, size;
-};
-int cmp_extent(const void *av, const void *bv)
-{
-       const struct extent *a = av;
-       const struct extent *b = bv;
-       if (a->start < b->start)
-               return -1;
-       if (a->start > b->start)
-               return 1;
-       return 0;
-}
-
-struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
-{
-       /* find a list of used extents on the give physical device
-        * (dnum) or the given ddf.
-        * Return a malloced array of 'struct extent'
-
-FIXME ignore DDF_Legacy devices?
-
-        */
-       struct extent *rv;
-       int n = 0;
-       int dnum;
-       int i, j;
-
-       for (dnum = 0; dnum < ddf->phys->used_pdes; dnum++)
-               if (memcmp(dl->disk.guid,
-                          ddf->phys->entries[dnum].guid,
-                          DDF_GUID_LEN) == 0)
-                       break;
-
-       if (dnum == ddf->phys->used_pdes)
-               return NULL;
-
-       rv = malloc(sizeof(struct extent) * (ddf->max_part + 2));
-       if (!rv)
-               return NULL;
-
-       for (i = 0; i < ddf->max_part+1; i++) {
-               struct vcl *v = dl->vlist[i];
-               if (v == NULL)
-                       continue;
-               for (j=0; j < v->conf.prim_elmnt_count; j++)
-                       if (v->conf.phys_refnum[j] == dl->disk.refnum) {
-                               /* This device plays role 'j' in  'v'. */
-                               rv[n].start = __be64_to_cpu(v->lba_offset[j]);
-                               rv[n].size = __be64_to_cpu(v->conf.blocks);
-                               n++;
-                               break;
-                       }
-       }
-       qsort(rv, n, sizeof(*rv), cmp_extent);
-
-       rv[n].start = __be64_to_cpu(ddf->phys->entries[dnum].config_size);
-       rv[n].size = 0;
-       return rv;
-}
-
-int validate_geometry_ddf_bvd(struct supertype *st,
-                             int level, int layout, int raiddisks,
-                             int chunk, unsigned long long size,
-                             char *dev, unsigned long long *freesize)
+static int validate_geometry_ddf_bvd(struct supertype *st,
+                                    int level, int layout, int raiddisks,
+                                    int chunk, unsigned long long size,
+                                    char *dev, unsigned long long *freesize,
+                                    int verbose)
 {
        struct stat stb;
        struct ddf_super *ddf = st->sb;
@@ -2159,6 +2432,7 @@ int validate_geometry_ddf_bvd(struct supertype *st,
                for (dl = ddf->dlist; dl ; dl = dl->next)
                {
                        int found = 0;
+                       pos = 0;
 
                        i = 0;
                        e = get_extents(ddf, dl);
@@ -2176,9 +2450,11 @@ int validate_geometry_ddf_bvd(struct supertype *st,
                        free(e);
                }
                if (dcnt < raiddisks) {
-                       fprintf(stderr, Name ": Not enough devices with space "
-                               "for this array (%d < %d)\n",
-                               dcnt, raiddisks);
+                       if (verbose)
+                               fprintf(stderr,
+                                       Name ": ddf: Not enough devices with "
+                                       "space for this array (%d < %d)\n",
+                                       dcnt, raiddisks);
                        return 0;
                }
                return 1;
@@ -2194,8 +2470,10 @@ int validate_geometry_ddf_bvd(struct supertype *st,
                        break;
        }
        if (!dl) {
-               fprintf(stderr, Name ": %s is not in the same DDF set\n",
-                       dev);
+               if (verbose)
+                       fprintf(stderr, Name ": ddf: %s is not in the "
+                               "same DDF set\n",
+                               dev);
                return 0;
        }
        e = get_extents(ddf, dl);
@@ -2214,19 +2492,6 @@ int validate_geometry_ddf_bvd(struct supertype *st,
 
        return 1;
 }
-int validate_geometry_ddf_svd(struct supertype *st,
-                             int level, int layout, int raiddisks,
-                             int chunk, unsigned long long size,
-                             char *dev, unsigned long long *freesize)
-{
-       /* dd/svd only supports striped, mirrored, concat, spanned... */
-       if (level != LEVEL_LINEAR &&
-           level != 0 &&
-           level != 1)
-               return 0;
-       return 1;
-}
-
 
 static int load_super_ddf_all(struct supertype *st, int fd,
                              void **sbp, char *devname, int keep_fd)
@@ -2247,19 +2512,19 @@ static int load_super_ddf_all(struct supertype *st, int fd,
            strcmp(sra->text_version, "ddf") != 0)
                return 1;
 
-       super = malloc(sizeof(*super));
-       if (!super)
+       if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
                return 1;
+       memset(super, 0, sizeof(*super));
 
        /* first, try each device, and choose the best ddf */
        for (sd = sra->devs ; sd ; sd = sd->next) {
                int rv;
                sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
-               dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
-               if (!dfd)
+               dfd = dev_open(nm, O_RDONLY);
+               if (dfd < 0)
                        return 2;
                rv = load_ddf_headers(dfd, super, NULL);
-               if (!keep_fd) close(dfd);
+               close(dfd);
                if (rv == 0) {
                        seq = __be32_to_cpu(super->active->seq);
                        if (super->active->openflag)
@@ -2275,7 +2540,7 @@ static int load_super_ddf_all(struct supertype *st, int fd,
        /* OK, load this ddf */
        sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
        dfd = dev_open(nm, O_RDONLY);
-       if (!dfd)
+       if (dfd < 0)
                return 1;
        load_ddf_headers(dfd, super, NULL);
        load_ddf_global(dfd, super, NULL);
@@ -2284,22 +2549,32 @@ static int load_super_ddf_all(struct supertype *st, int fd,
        for (sd = sra->devs ; sd ; sd = sd->next) {
                sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
                dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
-               if (!dfd)
+               if (dfd < 0)
                        return 2;
+               load_ddf_headers(dfd, super, NULL);
                seq = load_ddf_local(dfd, super, NULL, keep_fd);
                if (!keep_fd) close(dfd);
        }
+       if (st->subarray[0]) {
+               struct vcl *v;
+
+               for (v = super->conflist; v; v = v->next)
+                       if (v->vcnum == atoi(st->subarray))
+                               super->currentconf = v;
+               if (!super->currentconf)
+                       return 1;
+       }
        *sbp = super;
        if (st->ss == NULL) {
-               st->ss = &super_ddf_container;
+               st->ss = &super_ddf;
                st->minor_version = 0;
                st->max_devs = 512;
+               st->container_dev = fd2devnum(fd);
        }
+       st->loaded_container = 1;
        return 0;
 }
-#endif
-
-
+#endif /* MDASSEMBLE */
 
 static struct mdinfo *container_content_ddf(struct supertype *st)
 {
@@ -2317,7 +2592,6 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
 
        for (vc = ddf->conflist ; vc ; vc=vc->next)
        {
-               int mppe;
                int i;
                struct mdinfo *this;
                this = malloc(sizeof(*this));
@@ -2325,38 +2599,47 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
                this->next = rest;
                rest = this;
 
-               this->array.major_version = 1000;
-               this->array.minor_version = 0;
-               this->array.patch_version = 0;
                this->array.level = map_num1(ddf_level_num, vc->conf.prl);
                this->array.raid_disks =
                        __be16_to_cpu(vc->conf.prim_elmnt_count);
-               /* FIXME this should be mapped */
-               this->array.layout = vc->conf.rlq;
+               this->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
+                                                  this->array.raid_disks);
                this->array.md_minor      = -1;
+               this->array.major_version = -1;
+               this->array.minor_version = -2;
                this->array.ctime         = DECADE +
                        __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
                this->array.utime         = DECADE +
                        __be32_to_cpu(vc->conf.timestamp);
                this->array.chunk_size    = 512 << vc->conf.chunk_shift;
 
-               for (i=0; i < __be16_to_cpu(ddf->virt->populated_vdes); i++)
-                       if (memcmp(ddf->virt->entries[i].guid,
-                                  vc->conf.guid, DDF_GUID_LEN) == 0)
-                               break;
-               if (ddf->virt->entries[i].state & DDF_state_inconsistent)
+               i = vc->vcnum;
+               if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
+                   (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
+                   DDF_init_full) {
                        this->array.state = 0;
-               else
+                       this->resync_start = 0;
+               } else {
                        this->array.state = 1;
+                       this->resync_start = ~0ULL;
+               }
                memcpy(this->name, ddf->virt->entries[i].name, 32);
-               this->name[33]=0;
+               this->name[32]=0;
 
                memset(this->uuid, 0, sizeof(this->uuid));
                this->component_size = __be64_to_cpu(vc->conf.blocks);
                this->array.size = this->component_size / 2;
+               this->container_member = i;
 
-               mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
-               for (i=0 ; i < mppe ; i++) {
+               ddf->currentconf = vc;
+               uuid_from_super_ddf(st, this->uuid);
+               ddf->currentconf = NULL;
+
+               sprintf(this->text_version, "/%s/%d",
+                       devnum2devname(st->container_dev),
+                       this->container_member);
+
+               for (i=0 ; i < ddf->mppe ; i++) {
                        struct mdinfo *dev;
                        struct dl *d;
 
@@ -2382,8 +2665,8 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
                        dev->disk.raid_disk = i;
                        dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
 
-                       dev->events = __le32_to_cpu(ddf->primary.seq);
-                       dev->data_offset = vc->lba_offset[i];
+                       dev->events = __be32_to_cpu(ddf->primary.seq);
+                       dev->data_offset = __be64_to_cpu(vc->lba_offset[i]);
                        dev->component_size = __be64_to_cpu(vc->conf.blocks);
                        if (d->devname)
                                strcpy(dev->name, d->devname);
@@ -2392,27 +2675,20 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
        return rest;
 }
 
-static int init_zero_ddf(struct supertype *st,
-                        mdu_array_info_t *info,
-                        unsigned long long size, char *name,
-                        char *homehost, int *uuid)
-{
-       st->sb = NULL;
-       return 0;
-}
-
 static int store_zero_ddf(struct supertype *st, int fd)
 {
        unsigned long long dsize;
-       char buf[512];
-       memset(buf, 0, 512);
-
+       void *buf;
 
        if (!get_dev_size(fd, NULL, &dsize))
                return 1;
 
+       posix_memalign(&buf, 512, 512);
+       memset(buf, 0, 512);
+
        lseek64(fd, dsize-512, 0);
        write(fd, buf, 512);
+       free(buf);
        return 0;
 }
 
@@ -2441,6 +2717,558 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
        return 0;
 }
 
+#ifndef MDASSEMBLE
+/*
+ * A new array 'a' has been started which claims to be instance 'inst'
+ * within container 'c'.
+ * We need to confirm that the array matches the metadata in 'c' so
+ * that we don't corrupt any metadata.
+ */
+static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
+{
+       dprintf("ddf: open_new %s\n", inst);
+       a->info.container_member = atoi(inst);
+       return 0;
+}
+
+/*
+ * The array 'a' is to be marked clean in the metadata.
+ * If '->resync_start' is not ~(unsigned long long)0, then the array is only
+ * clean up to the point (in sectors).  If that cannot be recorded in the
+ * metadata, then leave it as dirty.
+ *
+ * For DDF, we need to clear the DDF_state_inconsistent bit in the
+ * !global! virtual_disk.virtual_entry structure.
+ */
+static int ddf_set_array_state(struct active_array *a, int consistent)
+{
+       struct ddf_super *ddf = a->container->sb;
+       int inst = a->info.container_member;
+       int old = ddf->virt->entries[inst].state;
+       if (consistent == 2) {
+               /* Should check if a recovery should be started FIXME */
+               consistent = 1;
+               if (a->resync_start != ~0ULL)
+                       consistent = 0;
+       }
+       if (consistent)
+               ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
+       else
+               ddf->virt->entries[inst].state |= DDF_state_inconsistent;
+       if (old != ddf->virt->entries[inst].state)
+               ddf->updates_pending = 1;
+
+       old = ddf->virt->entries[inst].init_state;
+       ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
+       if (a->resync_start == ~0ULL)
+               ddf->virt->entries[inst].init_state |= DDF_init_full;
+       else if (a->resync_start == 0)
+               ddf->virt->entries[inst].init_state |= DDF_init_not;
+       else
+               ddf->virt->entries[inst].init_state |= DDF_init_quick;
+       if (old != ddf->virt->entries[inst].init_state)
+               ddf->updates_pending = 1;
+
+       dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
+               a->resync_start);
+       return consistent;
+}
+
+/*
+ * The state of each disk is stored in the global phys_disk structure
+ * in phys_disk.entries[n].state.
+ * This makes various combinations awkward.
+ * - When a device fails in any array, it must be failed in all arrays
+ *   that include a part of this device.
+ * - When a component is rebuilding, we cannot include it officially in the
+ *   array unless this is the only array that uses the device.
+ *
+ * So: when transitioning:
+ *   Online -> failed,  just set failed flag.  monitor will propagate
+ *   spare -> online,   the device might need to be added to the array.
+ *   spare -> failed,   just set failed.  Don't worry if in array or not.
+ */
+static void ddf_set_disk(struct active_array *a, int n, int state)
+{
+       struct ddf_super *ddf = a->container->sb;
+       int inst = a->info.container_member;
+       struct vd_config *vc = find_vdcr(ddf, inst);
+       int pd = find_phys(ddf, vc->phys_refnum[n]);
+       int i, st, working;
+
+       if (vc == NULL) {
+               dprintf("ddf: cannot find instance %d!!\n", inst);
+               return;
+       }
+       if (pd < 0) {
+               /* disk doesn't currently exist. If it is now in_sync,
+                * insert it. */
+               if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
+                       /* Find dev 'n' in a->info->devs, determine the
+                        * ddf refnum, and set vc->phys_refnum and update
+                        * phys->entries[]
+                        */
+                       /* FIXME */
+               }
+       } else {
+               int old = ddf->phys->entries[pd].state;
+               if (state & DS_FAULTY)
+                       ddf->phys->entries[pd].state  |= __cpu_to_be16(DDF_Failed);
+               if (state & DS_INSYNC) {
+                       ddf->phys->entries[pd].state  |= __cpu_to_be16(DDF_Online);
+                       ddf->phys->entries[pd].state  &= __cpu_to_be16(~DDF_Rebuilding);
+               }
+               if (old != ddf->phys->entries[pd].state)
+                       ddf->updates_pending = 1;
+       }
+
+       dprintf("ddf: set_disk %d to %x\n", n, state);
+
+       /* Now we need to check the state of the array and update
+        * virtual_disk.entries[n].state.
+        * It needs to be one of "optimal", "degraded", "failed".
+        * I don't understand 'deleted' or 'missing'.
+        */
+       working = 0;
+       for (i=0; i < a->info.array.raid_disks; i++) {
+               pd = find_phys(ddf, vc->phys_refnum[i]);
+               if (pd < 0)
+                       continue;
+               st = __be16_to_cpu(ddf->phys->entries[pd].state);
+               if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+                   == DDF_Online)
+                       working++;
+       }
+       state = DDF_state_degraded;
+       if (working == a->info.array.raid_disks)
+               state = DDF_state_optimal;
+       else switch(vc->prl) {
+       case DDF_RAID0:
+       case DDF_CONCAT:
+       case DDF_JBOD:
+               state = DDF_state_failed;
+               break;
+       case DDF_RAID1:
+               if (working == 0)
+                       state = DDF_state_failed;
+               break;
+       case DDF_RAID4:
+       case DDF_RAID5:
+               if (working < a->info.array.raid_disks-1)
+                       state = DDF_state_failed;
+               break;
+       case DDF_RAID6:
+               if (working < a->info.array.raid_disks-2)
+                       state = DDF_state_failed;
+               else if (working == a->info.array.raid_disks-1)
+                       state = DDF_state_part_optimal;
+               break;
+       }
+
+       if (ddf->virt->entries[inst].state !=
+           ((ddf->virt->entries[inst].state & ~DDF_state_mask)
+            | state)) {
+
+               ddf->virt->entries[inst].state =
+                       (ddf->virt->entries[inst].state & ~DDF_state_mask)
+                       | state;
+               ddf->updates_pending = 1;
+       }
+
+}
+
+static void ddf_sync_metadata(struct supertype *st)
+{
+
+       /*
+        * Write all data to all devices.
+        * Later, we might be able to track whether only local changes
+        * have been made, or whether any global data has been changed,
+        * but ddf is sufficiently weird that it probably always
+        * changes global data ....
+        */
+       struct ddf_super *ddf = st->sb;
+       if (!ddf->updates_pending)
+               return;
+       ddf->updates_pending = 0;
+       __write_init_super_ddf(st, 0);
+       dprintf("ddf: sync_metadata\n");
+}
+
+static void ddf_process_update(struct supertype *st,
+                              struct metadata_update *update)
+{
+       /* Apply this update to the metadata.
+        * The first 4 bytes are a DDF_*_MAGIC which guides
+        * our actions.
+        * Possible update are:
+        *  DDF_PHYS_RECORDS_MAGIC
+        *    Add a new physical device.  Changes to this record
+        *    only happen implicitly.
+        *    used_pdes is the device number.
+        *  DDF_VIRT_RECORDS_MAGIC
+        *    Add a new VD.  Possibly also change the 'access' bits.
+        *    populated_vdes is the entry number.
+        *  DDF_VD_CONF_MAGIC
+        *    New or updated VD.  the VIRT_RECORD must already
+        *    exist.  For an update, phys_refnum and lba_offset
+        *    (at least) are updated, and the VD_CONF must
+        *    be written to precisely those devices listed with
+        *    a phys_refnum.
+        *  DDF_SPARE_ASSIGN_MAGIC
+        *    replacement Spare Assignment Record... but for which device?
+        *
+        * So, e.g.:
+        *  - to create a new array, we send a VIRT_RECORD and
+        *    a VD_CONF.  Then assemble and start the array.
+        *  - to activate a spare we send a VD_CONF to add the phys_refnum
+        *    and offset.  This will also mark the spare as active with
+        *    a spare-assignment record.
+        */
+       struct ddf_super *ddf = st->sb;
+       __u32 *magic = (__u32*)update->buf;
+       struct phys_disk *pd;
+       struct virtual_disk *vd;
+       struct vd_config *vc;
+       struct vcl *vcl;
+       struct dl *dl;
+       int mppe;
+       int ent;
+
+       dprintf("Process update %x\n", *magic);
+
+       switch (*magic) {
+       case DDF_PHYS_RECORDS_MAGIC:
+
+               if (update->len != (sizeof(struct phys_disk) +
+                                   sizeof(struct phys_disk_entry)))
+                       return;
+               pd = (struct phys_disk*)update->buf;
+
+               ent = __be16_to_cpu(pd->used_pdes);
+               if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
+                       return;
+               if (!all_ff(ddf->phys->entries[ent].guid))
+                       return;
+               ddf->phys->entries[ent] = pd->entries[0];
+               ddf->phys->used_pdes = __cpu_to_be16(1 +
+                                          __be16_to_cpu(ddf->phys->used_pdes));
+               ddf->updates_pending = 1;
+               if (ddf->add_list) {
+                       struct active_array *a;
+                       struct dl *al = ddf->add_list;
+                       ddf->add_list = al->next;
+
+                       al->next = ddf->dlist;
+                       ddf->dlist = al;
+
+                       /* As a device has been added, we should check
+                        * for any degraded devices that might make
+                        * use of this spare */
+                       for (a = st->arrays ; a; a=a->next)
+                               a->check_degraded = 1;
+               }
+               break;
+
+       case DDF_VIRT_RECORDS_MAGIC:
+
+               if (update->len != (sizeof(struct virtual_disk) +
+                                   sizeof(struct virtual_entry)))
+                       return;
+               vd = (struct virtual_disk*)update->buf;
+
+               ent = __be16_to_cpu(vd->populated_vdes);
+               if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
+                       return;
+               if (!all_ff(ddf->virt->entries[ent].guid))
+                       return;
+               ddf->virt->entries[ent] = vd->entries[0];
+               ddf->virt->populated_vdes = __cpu_to_be16(1 +
+                             __be16_to_cpu(ddf->virt->populated_vdes));
+               ddf->updates_pending = 1;
+               break;
+
+       case DDF_VD_CONF_MAGIC:
+               dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
+
+               mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+               if (update->len != ddf->conf_rec_len * 512)
+                       return;
+               vc = (struct vd_config*)update->buf;
+               for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+                       if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
+                               break;
+               dprintf("vcl = %p\n", vcl);
+               if (vcl) {
+                       /* An update, just copy the phys_refnum and lba_offset
+                        * fields
+                        */
+                       memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
+                              mppe * (sizeof(__u32) + sizeof(__u64)));
+               } else {
+                       /* A new VD_CONF */
+                       vcl = update->space;
+                       update->space = NULL;
+                       vcl->next = ddf->conflist;
+                       memcpy(&vcl->conf, vc, update->len);
+                       vcl->lba_offset = (__u64*)
+                               &vcl->conf.phys_refnum[mppe];
+                       ddf->conflist = vcl;
+               }
+               /* Now make sure vlist is correct for each dl. */
+               for (dl = ddf->dlist; dl; dl = dl->next) {
+                       int dn;
+                       int vn = 0;
+                       for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+                               for (dn=0; dn < ddf->mppe ; dn++)
+                                       if (vcl->conf.phys_refnum[dn] ==
+                                           dl->disk.refnum) {
+                                               dprintf("dev %d has %p at %d\n",
+                                                       dl->pdnum, vcl, vn);
+                                               dl->vlist[vn++] = vcl;
+                                               break;
+                                       }
+                       while (vn < ddf->max_part)
+                               dl->vlist[vn++] = NULL;
+                       if (dl->vlist[0]) {
+                               ddf->phys->entries[dl->pdnum].type &=
+                                       ~__cpu_to_be16(DDF_Global_Spare);
+                               ddf->phys->entries[dl->pdnum].type |=
+                                       __cpu_to_be16(DDF_Active_in_VD);
+                       }
+                       if (dl->spare) {
+                               ddf->phys->entries[dl->pdnum].type &=
+                                       ~__cpu_to_be16(DDF_Global_Spare);
+                               ddf->phys->entries[dl->pdnum].type |=
+                                       __cpu_to_be16(DDF_Spare);
+                       }
+                       if (!dl->vlist[0] && !dl->spare) {
+                               ddf->phys->entries[dl->pdnum].type |=
+                                       __cpu_to_be16(DDF_Global_Spare);
+                               ddf->phys->entries[dl->pdnum].type &=
+                                       ~__cpu_to_be16(DDF_Spare |
+                                                      DDF_Active_in_VD);
+                       }
+               }
+               ddf->updates_pending = 1;
+               break;
+       case DDF_SPARE_ASSIGN_MAGIC:
+       default: break;
+       }
+}
+
+static void ddf_prepare_update(struct supertype *st,
+                              struct metadata_update *update)
+{
+       /* This update arrived at managemon.
+        * We are about to pass it to monitor.
+        * If a malloc is needed, do it here.
+        */
+       struct ddf_super *ddf = st->sb;
+       __u32 *magic = (__u32*)update->buf;
+       if (*magic == DDF_VD_CONF_MAGIC)
+               posix_memalign(&update->space, 512,
+                              offsetof(struct vcl, conf)
+                              + ddf->conf_rec_len * 512);
+}
+
+/*
+ * Check if the array 'a' is degraded but not failed.
+ * If it is, find as many spares as are available and needed and
+ * arrange for their inclusion.
+ * We only choose devices which are not already in the array,
+ * and prefer those with a spare-assignment to this array.
+ * otherwise we choose global spares - assuming always that
+ * there is enough room.
+ * For each spare that we assign, we return an 'mdinfo' which
+ * describes the position for the device in the array.
+ * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
+ * the new phys_refnum and lba_offset values.
+ *
+ * Only worry about BVDs at the moment.
+ */
+static struct mdinfo *ddf_activate_spare(struct active_array *a,
+                                        struct metadata_update **updates)
+{
+       int working = 0;
+       struct mdinfo *d;
+       struct ddf_super *ddf = a->container->sb;
+       int global_ok = 0;
+       struct mdinfo *rv = NULL;
+       struct mdinfo *di;
+       struct metadata_update *mu;
+       struct dl *dl;
+       int i;
+       struct vd_config *vc;
+       __u64 *lba;
+
+       for (d = a->info.devs ; d ; d = d->next) {
+               if ((d->curr_state & DS_FAULTY) &&
+                       d->state_fd >= 0)
+                       /* wait for Removal to happen */
+                       return NULL;
+               if (d->state_fd >= 0)
+                       working ++;
+       }
+
+       dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
+               a->info.array.level);
+       if (working == a->info.array.raid_disks)
+               return NULL; /* array not degraded */
+       switch (a->info.array.level) {
+       case 1:
+               if (working == 0)
+                       return NULL; /* failed */
+               break;
+       case 4:
+       case 5:
+               if (working < a->info.array.raid_disks - 1)
+                       return NULL; /* failed */
+               break;
+       case 6:
+               if (working < a->info.array.raid_disks - 2)
+                       return NULL; /* failed */
+               break;
+       default: /* concat or stripe */
+               return NULL; /* failed */
+       }
+
+       /* For each slot, if it is not working, find a spare */
+       dl = ddf->dlist;
+       for (i = 0; i < a->info.array.raid_disks; i++) {
+               for (d = a->info.devs ; d ; d = d->next)
+                       if (d->disk.raid_disk == i)
+                               break;
+               dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
+               if (d && (d->state_fd >= 0))
+                       continue;
+
+               /* OK, this device needs recovery.  Find a spare */
+       again:
+               for ( ; dl ; dl = dl->next) {
+                       unsigned long long esize;
+                       unsigned long long pos;
+                       struct mdinfo *d2;
+                       int is_global = 0;
+                       int is_dedicated = 0;
+                       struct extent *ex;
+                       int j;
+                       /* If in this array, skip */
+                       for (d2 = a->info.devs ; d2 ; d2 = d2->next)
+                               if (d2->disk.major == dl->major &&
+                                   d2->disk.minor == dl->minor) {
+                                       dprintf("%x:%x already in array\n", dl->major, dl->minor);
+                                       break;
+                               }
+                       if (d2)
+                               continue;
+                       if (ddf->phys->entries[dl->pdnum].type &
+                           __cpu_to_be16(DDF_Spare)) {
+                               /* Check spare assign record */
+                               if (dl->spare) {
+                                       if (dl->spare->type & DDF_spare_dedicated) {
+                                               /* check spare_ents for guid */
+                                               for (j = 0 ;
+                                                    j < __be16_to_cpu(dl->spare->populated);
+                                                    j++) {
+                                                       if (memcmp(dl->spare->spare_ents[j].guid,
+                                                                  ddf->virt->entries[a->info.container_member].guid,
+                                                                  DDF_GUID_LEN) == 0)
+                                                               is_dedicated = 1;
+                                               }
+                                       } else
+                                               is_global = 1;
+                               }
+                       } else if (ddf->phys->entries[dl->pdnum].type &
+                                  __cpu_to_be16(DDF_Global_Spare)) {
+                               is_global = 1;
+                       }
+                       if ( ! (is_dedicated ||
+                               (is_global && global_ok))) {
+                               dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
+                                      is_dedicated, is_global);
+                               continue;
+                       }
+
+                       /* We are allowed to use this device - is there space?
+                        * We need a->info.component_size sectors */
+                       ex = get_extents(ddf, dl);
+                       if (!ex) {
+                               dprintf("cannot get extents\n");
+                               continue;
+                       }
+                       j = 0; pos = 0;
+                       esize = 0;
+
+                       do {
+                               esize = ex[j].start - pos;
+                               if (esize >= a->info.component_size)
+                                       break;
+                               pos = ex[i].start + ex[i].size;
+                               i++;
+                       } while (ex[i-1].size);
+
+                       free(ex);
+                       if (esize < a->info.component_size) {
+                               dprintf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor,
+                                       esize, a->info.component_size);
+                               /* No room */
+                               continue;
+                       }
+
+                       /* Cool, we have a device with some space at pos */
+                       di = malloc(sizeof(*di));
+                       memset(di, 0, sizeof(*di));
+                       di->disk.number = i;
+                       di->disk.raid_disk = i;
+                       di->disk.major = dl->major;
+                       di->disk.minor = dl->minor;
+                       di->disk.state = 0;
+                       di->data_offset = pos;
+                       di->component_size = a->info.component_size;
+                       di->container_member = dl->pdnum;
+                       di->next = rv;
+                       rv = di;
+                       dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
+                               i, pos);
+
+                       break;
+               }
+               if (!dl && ! global_ok) {
+                       /* not enough dedicated spares, try global */
+                       global_ok = 1;
+                       dl = ddf->dlist;
+                       goto again;
+               }
+       }
+
+       if (!rv)
+               /* No spares found */
+               return rv;
+       /* Now 'rv' has a list of devices to return.
+        * Create a metadata_update record to update the
+        * phys_refnum and lba_offset values
+        */
+       mu = malloc(sizeof(*mu));
+       mu->buf = malloc(ddf->conf_rec_len * 512);
+       posix_memalign(&mu->space, 512, sizeof(struct vcl));
+       mu->len = ddf->conf_rec_len;
+       mu->next = *updates;
+       vc = find_vdcr(ddf, a->info.container_member);
+       memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
+
+       vc = (struct vd_config*)mu->buf;
+       lba = (__u64*)&vc->phys_refnum[ddf->mppe];
+       for (di = rv ; di ; di = di->next) {
+               vc->phys_refnum[di->disk.raid_disk] =
+                       ddf->phys->entries[dl->pdnum].refnum;
+               lba[di->disk.raid_disk] = di->data_offset;
+       }
+       *updates = mu;
+       return rv;
+}
+#endif /* MDASSEMBLE */
+
 struct superswitch super_ddf = {
 #ifndef        MDASSEMBLE
        .examine_super  = examine_super_ddf,
@@ -2448,6 +3276,8 @@ struct superswitch super_ddf = {
        .detail_super   = detail_super_ddf,
        .brief_detail_super = brief_detail_super_ddf,
        .validate_geometry = validate_geometry_ddf,
+       .write_init_super = write_init_super_ddf,
+       .add_to_super   = add_to_super_ddf,
 #endif
        .match_home     = match_home_ddf,
        .uuid_from_super= uuid_from_super_ddf,
@@ -2459,80 +3289,22 @@ struct superswitch super_ddf = {
        .compare_super  = compare_super_ddf,
 
        .load_super     = load_super_ddf,
-       .init_super     = init_zero_ddf,
+       .init_super     = init_super_ddf,
        .store_super    = store_zero_ddf,
        .free_super     = free_super_ddf,
        .match_metadata_desc = match_metadata_desc_ddf,
-       .getinfo_super_n  = getinfo_super_n_container,
-
-
-       .major          = 1000,
-       .swapuuid       = 0,
-       .external       = 1,
-       .text_version   = "ddf",
-};
-
-/* Super_ddf_container is set by validate_geometry_ddf when given a
- * device that is not part of any array
- */
-struct superswitch super_ddf_container = {
-#ifndef MDASSEMBLE
-       .validate_geometry = validate_geometry_ddf_container,
-       .write_init_super = write_init_super_ddf,
-#endif
-
-       .init_super     = init_super_ddf,
-       .add_to_super   = add_to_super_ddf,
-
-       .free_super     = free_super_ddf,
-
        .container_content = container_content_ddf,
 
-       .major          = 1000,
-       .swapuuid       = 0,
        .external       = 1,
-       .text_version   = "ddf",
-};
-
-struct superswitch super_ddf_bvd = {
-#ifndef        MDASSEMBLE
-//     .detail_super   = detail_super_ddf_bvd,
-//     .brief_detail_super = brief_detail_super_ddf_bvd,
-       .validate_geometry = validate_geometry_ddf_bvd,
-       .write_init_super = write_init_super_ddf,
-#endif
-       .update_super   = update_super_ddf,
-       .init_super     = init_super_ddf_bvd,
-       .add_to_super   = add_to_super_ddf_bvd,
-       .getinfo_super  = getinfo_super_ddf_bvd,
-       .getinfo_super_n  = getinfo_super_n_bvd,
-
-       .load_super     = load_super_ddf,
-       .free_super     = free_super_ddf,
-       .match_metadata_desc = match_metadata_desc_ddf_bvd,
 
-
-       .major          = 1001,
-       .swapuuid       = 0,
-       .external       = 2,
-       .text_version   = "ddf",
-};
-
-struct superswitch super_ddf_svd = {
-#ifndef        MDASSEMBLE
-//     .detail_super   = detail_super_ddf_svd,
-//     .brief_detail_super = brief_detail_super_ddf_svd,
-       .validate_geometry = validate_geometry_ddf_svd,
+#ifndef MDASSEMBLE
+/* for mdmon */
+       .open_new       = ddf_open_new,
+       .set_array_state= ddf_set_array_state,
+       .set_disk       = ddf_set_disk,
+       .sync_metadata  = ddf_sync_metadata,
+       .process_update = ddf_process_update,
+       .prepare_update = ddf_prepare_update,
+       .activate_spare = ddf_activate_spare,
 #endif
-       .update_super   = update_super_ddf,
-       .init_super     = init_super_ddf,
-
-       .load_super     = load_super_ddf,
-       .free_super     = free_super_ddf,
-       .match_metadata_desc = match_metadata_desc_ddf_svd,
-
-       .major          = 1002,
-       .swapuuid       = 0,
-       .external       = 2,
-       .text_version   = "ddf",
 };