]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super-ddf.c
Merge https://github.com/makelinux/mdadm
[thirdparty/mdadm.git] / super-ddf.c
index 6f162b9aab1744109bf65dd3fadaad48f84c6789..faaf0a7ca9e0ba5eed7e8fdd961fcb43d158ee7d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2006-2014 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -21,7 +21,7 @@
  *    Author: Neil Brown
  *    Email: <neil@brown.name>
  *
- * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
+ * Specifications for DDF taken from Common RAID DDF Specification Revision 1.2
  * (July 28 2006).  Reused by permission of SNIA.
  */
 
@@ -30,6 +30,7 @@
 #include "mdmon.h"
 #include "sha1.h"
 #include <values.h>
+#include <stddef.h>
 
 /* a non-official T10 name for creation GUIDs */
 static char T10[] = "Linux-MD";
@@ -44,6 +45,13 @@ unsigned long crc32(
        const unsigned char *buf,
        unsigned len);
 
+#define DDF_NOTFOUND (~0U)
+#define DDF_CONTAINER (DDF_NOTFOUND-1)
+
+/* Default for safe_mode_delay. Same value as for IMSM.
+ */
+static const int DDF_SAFE_MODE_DELAY = 4000;
+
 /* The DDF metadata handling.
  * DDF metadata lives at the end of the device.
  * The last 512 byte block provides an 'anchor' which is used to locate
@@ -55,6 +63,46 @@ unsigned long crc32(
  *
  */
 
+typedef struct __be16 {
+       __u16 _v16;
+} be16;
+#define be16_eq(x, y) ((x)._v16 == (y)._v16)
+#define be16_and(x, y) ((x)._v16 & (y)._v16)
+#define be16_or(x, y) ((x)._v16 | (y)._v16)
+#define be16_clear(x, y) ((x)._v16 &= ~(y)._v16)
+#define be16_set(x, y) ((x)._v16 |= (y)._v16)
+
+typedef struct __be32 {
+       __u32 _v32;
+} be32;
+#define be32_eq(x, y) ((x)._v32 == (y)._v32)
+
+typedef struct __be64 {
+       __u64 _v64;
+} be64;
+#define be64_eq(x, y) ((x)._v64 == (y)._v64)
+
+#define be16_to_cpu(be) __be16_to_cpu((be)._v16)
+static inline be16 cpu_to_be16(__u16 x)
+{
+       be16 be = { ._v16 = __cpu_to_be16(x) };
+       return be;
+}
+
+#define be32_to_cpu(be) __be32_to_cpu((be)._v32)
+static inline be32 cpu_to_be32(__u32 x)
+{
+       be32 be = { ._v32 = __cpu_to_be32(x) };
+       return be;
+}
+
+#define be64_to_cpu(be) __be64_to_cpu((be)._v64)
+static inline be64 cpu_to_be64(__u64 x)
+{
+       be64 be = { ._v64 = __cpu_to_be64(x) };
+       return be;
+}
+
 /* Primary Raid Level (PRL) */
 #define        DDF_RAID0       0x00
 #define        DDF_RAID1       0x01
@@ -92,28 +140,28 @@ unsigned long crc32(
 #define        DDF_2SPANNED    0x03    /* This is also weird - be careful */
 
 /* Magic numbers */
-#define        DDF_HEADER_MAGIC        __cpu_to_be32(0xDE11DE11)
-#define        DDF_CONTROLLER_MAGIC    __cpu_to_be32(0xAD111111)
-#define        DDF_PHYS_RECORDS_MAGIC  __cpu_to_be32(0x22222222)
-#define        DDF_PHYS_DATA_MAGIC     __cpu_to_be32(0x33333333)
-#define        DDF_VIRT_RECORDS_MAGIC  __cpu_to_be32(0xDDDDDDDD)
-#define        DDF_VD_CONF_MAGIC       __cpu_to_be32(0xEEEEEEEE)
-#define        DDF_SPARE_ASSIGN_MAGIC  __cpu_to_be32(0x55555555)
-#define        DDF_VU_CONF_MAGIC       __cpu_to_be32(0x88888888)
-#define        DDF_VENDOR_LOG_MAGIC    __cpu_to_be32(0x01dBEEF0)
-#define        DDF_BBM_LOG_MAGIC       __cpu_to_be32(0xABADB10C)
+#define        DDF_HEADER_MAGIC        cpu_to_be32(0xDE11DE11)
+#define        DDF_CONTROLLER_MAGIC    cpu_to_be32(0xAD111111)
+#define        DDF_PHYS_RECORDS_MAGIC  cpu_to_be32(0x22222222)
+#define        DDF_PHYS_DATA_MAGIC     cpu_to_be32(0x33333333)
+#define        DDF_VIRT_RECORDS_MAGIC  cpu_to_be32(0xDDDDDDDD)
+#define        DDF_VD_CONF_MAGIC       cpu_to_be32(0xEEEEEEEE)
+#define        DDF_SPARE_ASSIGN_MAGIC  cpu_to_be32(0x55555555)
+#define        DDF_VU_CONF_MAGIC       cpu_to_be32(0x88888888)
+#define        DDF_VENDOR_LOG_MAGIC    cpu_to_be32(0x01dBEEF0)
+#define        DDF_BBM_LOG_MAGIC       cpu_to_be32(0xABADB10C)
 
 #define        DDF_GUID_LEN    24
 #define DDF_REVISION_0 "01.00.00"
 #define DDF_REVISION_2 "01.02.00"
 
 struct ddf_header {
-       __u32   magic;          /* DDF_HEADER_MAGIC */
-       __u32   crc;
+       be32    magic;          /* DDF_HEADER_MAGIC */
+       be32    crc;
        char    guid[DDF_GUID_LEN];
        char    revision[8];    /* 01.02.00 */
-       __u32   seq;            /* starts at '1' */
-       __u32   timestamp;
+       be32    seq;            /* starts at '1' */
+       be32    timestamp;
        __u8    openflag;
        __u8    foreignflag;
        __u8    enforcegroups;
@@ -121,38 +169,38 @@ struct ddf_header {
        __u8    pad1[12];       /* 12 * 0xff */
        /* 64 bytes so far */
        __u8    header_ext[32]; /* reserved: fill with 0xff */
-       __u64   primary_lba;
-       __u64   secondary_lba;
+       be64    primary_lba;
+       be64    secondary_lba;
        __u8    type;
        __u8    pad2[3];        /* 0xff */
-       __u32   workspace_len;  /* sectors for vendor space -
+       be32    workspace_len;  /* sectors for vendor space -
                                 * at least 32768(sectors) */
-       __u64   workspace_lba;
-       __u16   max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
-       __u16   max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
-       __u16   max_partitions; /* i.e. max num of configuration
+       be64    workspace_lba;
+       be16    max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
+       be16    max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
+       be16    max_partitions; /* i.e. max num of configuration
                                   record entries per disk */
-       __u16   config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
+       be16    config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
                                                 *12/512) */
-       __u16   max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
+       be16    max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
        __u8    pad3[54];       /* 0xff */
        /* 192 bytes so far */
-       __u32   controller_section_offset;
-       __u32   controller_section_length;
-       __u32   phys_section_offset;
-       __u32   phys_section_length;
-       __u32   virt_section_offset;
-       __u32   virt_section_length;
-       __u32   config_section_offset;
-       __u32   config_section_length;
-       __u32   data_section_offset;
-       __u32   data_section_length;
-       __u32   bbm_section_offset;
-       __u32   bbm_section_length;
-       __u32   diag_space_offset;
-       __u32   diag_space_length;
-       __u32   vendor_offset;
-       __u32   vendor_length;
+       be32    controller_section_offset;
+       be32    controller_section_length;
+       be32    phys_section_offset;
+       be32    phys_section_length;
+       be32    virt_section_offset;
+       be32    virt_section_length;
+       be32    config_section_offset;
+       be32    config_section_length;
+       be32    data_section_offset;
+       be32    data_section_length;
+       be32    bbm_section_offset;
+       be32    bbm_section_length;
+       be32    diag_space_offset;
+       be32    diag_space_length;
+       be32    vendor_offset;
+       be32    vendor_length;
        /* 256 bytes so far */
        __u8    pad4[256];      /* 0xff */
 };
@@ -164,14 +212,14 @@ struct ddf_header {
 
 /* The content of the 'controller section' - global scope */
 struct ddf_controller_data {
-       __u32   magic;                  /* DDF_CONTROLLER_MAGIC */
-       __u32   crc;
+       be32    magic;                  /* DDF_CONTROLLER_MAGIC */
+       be32    crc;
        char    guid[DDF_GUID_LEN];
        struct controller_type {
-               __u16 vendor_id;
-               __u16 device_id;
-               __u16 sub_vendor_id;
-               __u16 sub_device_id;
+               be16 vendor_id;
+               be16 device_id;
+               be16 sub_vendor_id;
+               be16 sub_device_id;
        } type;
        char    product_id[16];
        __u8    pad[8]; /* 0xff */
@@ -180,18 +228,21 @@ struct ddf_controller_data {
 
 /* The content of phys_section - global scope */
 struct phys_disk {
-       __u32   magic;          /* DDF_PHYS_RECORDS_MAGIC */
-       __u32   crc;
-       __u16   used_pdes;
-       __u16   max_pdes;
+       be32    magic;          /* DDF_PHYS_RECORDS_MAGIC */
+       be32    crc;
+       be16    used_pdes;      /* This is a counter, not a max - the list
+                                * of used entries may not be dense */
+       be16    max_pdes;
        __u8    pad[52];
        struct phys_disk_entry {
                char    guid[DDF_GUID_LEN];
-               __u32   refnum;
-               __u16   type;
-               __u16   state;
-               __u64   config_size; /* DDF structures must be after here */
-               char    path[18];       /* another horrible structure really */
+               be32    refnum;
+               be16    type;
+               be16    state;
+               be64    config_size;    /* DDF structures must be after here */
+               char    path[18];       /* Another horrible structure really
+                                        * but is "used for information
+                                        * purposes only" */
                __u8    pad[6];
        } entries[0];
 };
@@ -221,17 +272,17 @@ struct phys_disk {
 
 /* The content of the virt_section global scope */
 struct virtual_disk {
-       __u32   magic;          /* DDF_VIRT_RECORDS_MAGIC */
-       __u32   crc;
-       __u16   populated_vdes;
-       __u16   max_vdes;
+       be32    magic;          /* DDF_VIRT_RECORDS_MAGIC */
+       be32    crc;
+       be16    populated_vdes;
+       be16    max_vdes;
        __u8    pad[52];
        struct virtual_entry {
                char    guid[DDF_GUID_LEN];
-               __u16   unit;
+               be16    unit;
                __u16   pad0;   /* 0xffff */
-               __u16   guid_crc;
-               __u16   type;
+               be16    guid_crc;
+               be16    type;
                __u8    state;
                __u8    init_state;
                __u8    pad1[14];
@@ -275,25 +326,28 @@ struct virtual_disk {
  */
 
 struct vd_config {
-       __u32   magic;          /* DDF_VD_CONF_MAGIC */
-       __u32   crc;
+       be32    magic;          /* DDF_VD_CONF_MAGIC */
+       be32    crc;
        char    guid[DDF_GUID_LEN];
-       __u32   timestamp;
-       __u32   seqnum;
+       be32    timestamp;
+       be32    seqnum;
        __u8    pad0[24];
-       __u16   prim_elmnt_count;
+       be16    prim_elmnt_count;
        __u8    chunk_shift;    /* 0 == 512, 1==1024 etc */
        __u8    prl;
        __u8    rlq;
        __u8    sec_elmnt_count;
        __u8    sec_elmnt_seq;
        __u8    srl;
-       __u64   blocks;         /* blocks per component could be different
+       be64    blocks;         /* blocks per component could be different
                                 * on different component devices...(only
                                 * for concat I hope) */
-       __u64   array_blocks;   /* blocks in array */
+       be64    array_blocks;   /* blocks in array */
        __u8    pad1[8];
-       __u32   spare_refs[8];
+       be32    spare_refs[8];  /* This is used to detect missing spares.
+                                * As we don't have an interface for that
+                                * the values are ignored.
+                                */
        __u8    cache_pol[8];
        __u8    bg_rate;
        __u8    pad2[3];
@@ -304,10 +358,11 @@ struct vd_config {
        __u8    v2[16]; /* reserved- 0xff */
        __u8    v3[16]; /* reserved- 0xff */
        __u8    vendor[32];
-       __u32   phys_refnum[0]; /* refnum of each disk in sequence */
+       be32    phys_refnum[0]; /* refnum of each disk in sequence */
       /*__u64  lba_offset[0];  LBA offset in each phys.  Note extents in a
                                bvd are always the same size */
 };
+#define LBA_OFFSET(ddf, vd) ((be64 *) &(vd)->phys_refnum[(ddf)->mppe])
 
 /* vd_config.cache_pol[7] is a bitmap */
 #define        DDF_cache_writeback     1       /* else writethrough */
@@ -319,17 +374,17 @@ struct vd_config {
 #define        DDF_cache_rallowed      64      /* enable read caching */
 
 struct spare_assign {
-       __u32   magic;          /* DDF_SPARE_ASSIGN_MAGIC */
-       __u32   crc;
-       __u32   timestamp;
+       be32    magic;          /* DDF_SPARE_ASSIGN_MAGIC */
+       be32    crc;
+       be32    timestamp;
        __u8    reserved[7];
        __u8    type;
-       __u16   populated;      /* SAEs used */
-       __u16   max;            /* max SAEs */
+       be16    populated;      /* SAEs used */
+       be16    max;            /* max SAEs */
        __u8    pad[8];
        struct spare_assign_entry {
                char    guid[DDF_GUID_LEN];
-               __u16   secondary_element;
+               be16    secondary_element;
                __u8    pad[6];
        } spare_ents[0];
 };
@@ -341,10 +396,10 @@ struct spare_assign {
 
 /* The data_section contents - local scope */
 struct disk_data {
-       __u32   magic;          /* DDF_PHYS_DATA_MAGIC */
-       __u32   crc;
+       be32    magic;          /* DDF_PHYS_DATA_MAGIC */
+       be32    crc;
        char    guid[DDF_GUID_LEN];
-       __u32   refnum;         /* crc of some magic drive data ... */
+       be32    refnum;         /* crc of some magic drive data ... */
        __u8    forced_ref;     /* set when above was not result of magic */
        __u8    forced_guid;    /* set if guid was forced rather than magic */
        __u8    vendor[32];
@@ -353,16 +408,16 @@ struct disk_data {
 
 /* bbm_section content */
 struct bad_block_log {
-       __u32   magic;
-       __u32   crc;
-       __u16   entry_count;
-       __u32   spare_count;
+       be32    magic;
+       be32    crc;
+       be16    entry_count;
+       be32    spare_count;
        __u8    pad[10];
-       __u64   first_spare;
+       be64    first_spare;
        struct mapped_block {
-               __u64   defective_start;
-               __u32   replacement_start;
-               __u16   remap_count;
+               be64    defective_start;
+               be32    replacement_start;
+               be16    remap_count;
                __u8    pad[2];
        } entries[0];
 };
@@ -384,29 +439,35 @@ struct bad_block_log {
  * and reconstructed for writing.  This means that we only need
  * to make config changes once and they are automatically
  * propagated to all devices.
- * Note that the ddf_super has space of the conf and disk data
- * for this disk and also for a list of all such data.
- * The list is only used for the superblock that is being
- * built in Create or Assemble to describe the whole array.
+ * The global (config and disk data) records are each in a list
+ * of separate data structures.  When writing we find the entry
+ * or entries applicable to the particular device.
  */
 struct ddf_super {
-       struct ddf_header anchor, primary, secondary;
+       struct ddf_header       anchor, primary, secondary;
        struct ddf_controller_data controller;
-       struct ddf_header *active;
+       struct ddf_header       *active;
        struct phys_disk        *phys;
        struct virtual_disk     *virt;
-       int pdsize, vdsize;
-       unsigned int max_part, mppe, conf_rec_len;
-       int currentdev;
-       int updates_pending;
+       char                    *conf;
+       int                     pdsize, vdsize;
+       unsigned int            max_part, mppe, conf_rec_len;
+       int                     currentdev;
+       int                     updates_pending;
        struct vcl {
                union {
                        char space[512];
                        struct {
                                struct vcl      *next;
-                               __u64           *lba_offset; /* location in 'conf' of
-                                                             * the lba table */
                                unsigned int    vcnum; /* index into ->virt */
+                               /* For an array with a secondary level there are
+                                * multiple vd_config structures, all with the same
+                                * guid but with different sec_elmnt_seq.
+                                * One of these structures is in 'conf' below.
+                                * The others are in other_bvds, not in any
+                                * particular order.
+                                */
+                               struct vd_config **other_bvds;
                                __u64           *block_sizes; /* NULL if all the same */
                        };
                };
@@ -421,6 +482,9 @@ struct ddf_super {
                                char *devname;
                                int fd;
                                unsigned long long size; /* sectors */
+                               be64 primary_lba; /* sectors */
+                               be64 secondary_lba; /* sectors */
+                               be64 workspace_lba; /* sectors */
                                int pdnum;      /* index in ->phys */
                                struct spare_assign *spare;
                                void *mdupdate; /* hold metadata update */
@@ -428,6 +492,7 @@ struct ddf_super {
                                /* These fields used by auto-layout */
                                int raiddisk; /* slot to fill in autolayout */
                                __u64 esize;
+                               int displayed;
                        };
                };
                struct disk_data disk;
@@ -435,25 +500,308 @@ struct ddf_super {
        } *dlist, *add_list;
 };
 
-#ifndef offsetof
-#define offsetof(t,f) ((size_t)&(((t*)0)->f))
+#ifndef MDASSEMBLE
+static int load_super_ddf_all(struct supertype *st, int fd,
+                             void **sbp, char *devname);
+static int get_svd_state(const struct ddf_super *, const struct vcl *);
+static int
+validate_geometry_ddf_container(struct supertype *st,
+                               int level, int layout, int raiddisks,
+                               int chunk, unsigned long long size,
+                               unsigned long long data_offset,
+                               char *dev, unsigned long long *freesize,
+                               int verbose);
+
+static int validate_geometry_ddf_bvd(struct supertype *st,
+                                    int level, int layout, int raiddisks,
+                                    int *chunk, unsigned long long size,
+                                    unsigned long long data_offset,
+                                    char *dev, unsigned long long *freesize,
+                                    int verbose);
+#endif
+
+static void free_super_ddf(struct supertype *st);
+static int all_ff(const char *guid);
+static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
+                                            be32 refnum, unsigned int nmax,
+                                            const struct vd_config **bvd,
+                                            unsigned int *idx);
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
+static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
+static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
+static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i);
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
+static int init_super_ddf_bvd(struct supertype *st,
+                             mdu_array_info_t *info,
+                             unsigned long long size,
+                             char *name, char *homehost,
+                             int *uuid, unsigned long long data_offset);
+
+#if DEBUG
+static void pr_state(struct ddf_super *ddf, const char *msg)
+{
+       unsigned int i;
+       dprintf("%s: ", msg);
+       for (i = 0; i < be16_to_cpu(ddf->active->max_vd_entries); i++) {
+               if (all_ff(ddf->virt->entries[i].guid))
+                       continue;
+               dprintf_cont("%u(s=%02x i=%02x) ", i,
+                       ddf->virt->entries[i].state,
+                       ddf->virt->entries[i].init_state);
+       }
+       dprintf_cont("\n");
+}
+#else
+static void pr_state(const struct ddf_super *ddf, const char *msg) {}
 #endif
 
+static void _ddf_set_updates_pending(struct ddf_super *ddf, struct vd_config *vc,
+                                    const char *func)
+{
+       if (vc) {
+               vc->timestamp = cpu_to_be32(time(0)-DECADE);
+               vc->seqnum = cpu_to_be32(be32_to_cpu(vc->seqnum) + 1);
+       }
+       if (ddf->updates_pending)
+               return;
+       ddf->updates_pending = 1;
+       ddf->active->seq = cpu_to_be32((be32_to_cpu(ddf->active->seq)+1));
+       pr_state(ddf, func);
+}
+
+#define ddf_set_updates_pending(x,v) _ddf_set_updates_pending((x), (v), __func__)
 
-static unsigned int calc_crc(void *buf, int len)
+static be32 calc_crc(void *buf, int len)
 {
        /* crcs are always at the same place as in the ddf_header */
        struct ddf_header *ddf = buf;
-       __u32 oldcrc = ddf->crc;
+       be32 oldcrc = ddf->crc;
        __u32 newcrc;
-       ddf->crc = 0xffffffff;
+       ddf->crc = cpu_to_be32(0xffffffff);
 
        newcrc = crc32(0, buf, len);
        ddf->crc = oldcrc;
-       /* The crc is store (like everything) bigendian, so convert
+       /* The crc is stored (like everything) bigendian, so convert
         * here for simplicity
         */
-       return __cpu_to_be32(newcrc);
+       return cpu_to_be32(newcrc);
+}
+
+#define DDF_INVALID_LEVEL 0xff
+#define DDF_NO_SECONDARY 0xff
+static int err_bad_md_layout(const mdu_array_info_t *array)
+{
+       pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
+              array->level, array->layout, array->raid_disks);
+       return -1;
+}
+
+static int layout_md2ddf(const mdu_array_info_t *array,
+                        struct vd_config *conf)
+{
+       be16 prim_elmnt_count = cpu_to_be16(array->raid_disks);
+       __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
+       __u8 sec_elmnt_count = 1;
+       __u8 srl = DDF_NO_SECONDARY;
+
+       switch (array->level) {
+       case LEVEL_LINEAR:
+               prl = DDF_CONCAT;
+               break;
+       case 0:
+               rlq = DDF_RAID0_SIMPLE;
+               prl = DDF_RAID0;
+               break;
+       case 1:
+               switch (array->raid_disks) {
+               case 2:
+                       rlq = DDF_RAID1_SIMPLE;
+                       break;
+               case 3:
+                       rlq = DDF_RAID1_MULTI;
+                       break;
+               default:
+                       return err_bad_md_layout(array);
+               }
+               prl = DDF_RAID1;
+               break;
+       case 4:
+               if (array->layout != 0)
+                       return err_bad_md_layout(array);
+               rlq = DDF_RAID4_N;
+               prl = DDF_RAID4;
+               break;
+       case 5:
+               switch (array->layout) {
+               case ALGORITHM_LEFT_ASYMMETRIC:
+                       rlq = DDF_RAID5_N_RESTART;
+                       break;
+               case ALGORITHM_RIGHT_ASYMMETRIC:
+                       rlq = DDF_RAID5_0_RESTART;
+                       break;
+               case ALGORITHM_LEFT_SYMMETRIC:
+                       rlq = DDF_RAID5_N_CONTINUE;
+                       break;
+               case ALGORITHM_RIGHT_SYMMETRIC:
+                       /* not mentioned in standard */
+               default:
+                       return err_bad_md_layout(array);
+               }
+               prl = DDF_RAID5;
+               break;
+       case 6:
+               switch (array->layout) {
+               case ALGORITHM_ROTATING_N_RESTART:
+                       rlq = DDF_RAID5_N_RESTART;
+                       break;
+               case ALGORITHM_ROTATING_ZERO_RESTART:
+                       rlq = DDF_RAID6_0_RESTART;
+                       break;
+               case ALGORITHM_ROTATING_N_CONTINUE:
+                       rlq = DDF_RAID5_N_CONTINUE;
+                       break;
+               default:
+                       return err_bad_md_layout(array);
+               }
+               prl = DDF_RAID6;
+               break;
+       case 10:
+               if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
+                       rlq = DDF_RAID1_SIMPLE;
+                       prim_elmnt_count =  cpu_to_be16(2);
+                       sec_elmnt_count = array->raid_disks / 2;
+                       srl = DDF_2SPANNED;
+                       prl = DDF_RAID1;
+               } else if (array->raid_disks % 3 == 0
+                          && array->layout == 0x103) {
+                       rlq = DDF_RAID1_MULTI;
+                       prim_elmnt_count =  cpu_to_be16(3);
+                       sec_elmnt_count = array->raid_disks / 3;
+                       srl = DDF_2SPANNED;
+                       prl = DDF_RAID1;
+               } else if (array->layout == 0x201) {
+                       prl = DDF_RAID1E;
+                       rlq = DDF_RAID1E_OFFSET;
+               } else if (array->layout == 0x102) {
+                       prl = DDF_RAID1E;
+                       rlq = DDF_RAID1E_ADJACENT;
+               } else
+                       return err_bad_md_layout(array);
+               break;
+       default:
+               return err_bad_md_layout(array);
+       }
+       conf->prl = prl;
+       conf->prim_elmnt_count = prim_elmnt_count;
+       conf->rlq = rlq;
+       conf->srl = srl;
+       conf->sec_elmnt_count = sec_elmnt_count;
+       return 0;
+}
+
+static int err_bad_ddf_layout(const struct vd_config *conf)
+{
+       pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
+              conf->prl, conf->rlq, be16_to_cpu(conf->prim_elmnt_count));
+       return -1;
+}
+
+static int layout_ddf2md(const struct vd_config *conf,
+                        mdu_array_info_t *array)
+{
+       int level = LEVEL_UNSUPPORTED;
+       int layout = 0;
+       int raiddisks = be16_to_cpu(conf->prim_elmnt_count);
+
+       if (conf->sec_elmnt_count > 1) {
+               /* see also check_secondary() */
+               if (conf->prl != DDF_RAID1 ||
+                   (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
+                       pr_err("Unsupported secondary RAID level %u/%u\n",
+                              conf->prl, conf->srl);
+                       return -1;
+               }
+               if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
+                       layout = 0x102;
+               else if  (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
+                       layout = 0x103;
+               else
+                       return err_bad_ddf_layout(conf);
+               raiddisks *= conf->sec_elmnt_count;
+               level = 10;
+               goto good;
+       }
+
+       switch (conf->prl) {
+       case DDF_CONCAT:
+               level = LEVEL_LINEAR;
+               break;
+       case DDF_RAID0:
+               if (conf->rlq != DDF_RAID0_SIMPLE)
+                       return err_bad_ddf_layout(conf);
+               level = 0;
+               break;
+       case DDF_RAID1:
+               if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
+                     (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
+                       return err_bad_ddf_layout(conf);
+               level = 1;
+               break;
+       case DDF_RAID1E:
+               if (conf->rlq == DDF_RAID1E_ADJACENT)
+                       layout = 0x102;
+               else if (conf->rlq == DDF_RAID1E_OFFSET)
+                       layout = 0x201;
+               else
+                       return err_bad_ddf_layout(conf);
+               level = 10;
+               break;
+       case DDF_RAID4:
+               if (conf->rlq != DDF_RAID4_N)
+                       return err_bad_ddf_layout(conf);
+               level = 4;
+               break;
+       case DDF_RAID5:
+               switch (conf->rlq) {
+               case DDF_RAID5_N_RESTART:
+                       layout = ALGORITHM_LEFT_ASYMMETRIC;
+                       break;
+               case DDF_RAID5_0_RESTART:
+                       layout = ALGORITHM_RIGHT_ASYMMETRIC;
+                       break;
+               case DDF_RAID5_N_CONTINUE:
+                       layout = ALGORITHM_LEFT_SYMMETRIC;
+                       break;
+               default:
+                       return err_bad_ddf_layout(conf);
+               }
+               level = 5;
+               break;
+       case DDF_RAID6:
+               switch (conf->rlq) {
+               case DDF_RAID5_N_RESTART:
+                       layout = ALGORITHM_ROTATING_N_RESTART;
+                       break;
+               case DDF_RAID6_0_RESTART:
+                       layout = ALGORITHM_ROTATING_ZERO_RESTART;
+                       break;
+               case DDF_RAID5_N_CONTINUE:
+                       layout = ALGORITHM_ROTATING_N_CONTINUE;
+                       break;
+               default:
+                       return err_bad_ddf_layout(conf);
+               }
+               level = 6;
+               break;
+       default:
+               return err_bad_ddf_layout(conf);
+       };
+
+good:
+       array->level = level;
+       array->layout = layout;
+       array->raid_disks = raiddisks;
+       return 0;
 }
 
 static int load_ddf_header(int fd, unsigned long long lba,
@@ -476,28 +824,34 @@ static int load_ddf_header(int fd, unsigned long long lba,
        if (read(fd, hdr, 512) != 512)
                return 0;
 
-       if (hdr->magic != DDF_HEADER_MAGIC)
+       if (!be32_eq(hdr->magic, DDF_HEADER_MAGIC)) {
+               pr_err("bad header magic\n");
                return 0;
-       if (calc_crc(hdr, 512) != hdr->crc)
+       }
+       if (!be32_eq(calc_crc(hdr, 512), hdr->crc)) {
+               pr_err("bad CRC\n");
                return 0;
+       }
        if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
            memcmp(anchor->revision, hdr->revision, 8) != 0 ||
-           anchor->primary_lba != hdr->primary_lba ||
-           anchor->secondary_lba != hdr->secondary_lba ||
+           !be64_eq(anchor->primary_lba, hdr->primary_lba) ||
+           !be64_eq(anchor->secondary_lba, hdr->secondary_lba) ||
            hdr->type != type ||
            memcmp(anchor->pad2, hdr->pad2, 512 -
-                  offsetof(struct ddf_header, pad2)) != 0)
+                  offsetof(struct ddf_header, pad2)) != 0) {
+               pr_err("header mismatch\n");
                return 0;
+       }
 
        /* Looks good enough to me... */
        return 1;
 }
 
 static void *load_section(int fd, struct ddf_super *super, void *buf,
-                         __u32 offset_be, __u32 len_be, int check)
+                         be32 offset_be, be32 len_be, int check)
 {
-       unsigned long long offset = __be32_to_cpu(offset_be);
-       unsigned long long len = __be32_to_cpu(len_be);
+       unsigned long long offset = be32_to_cpu(offset_be);
+       unsigned long long len = be32_to_cpu(len_be);
        int dofree = (buf == NULL);
 
        if (check)
@@ -507,20 +861,16 @@ static void *load_section(int fd, struct ddf_super *super, void *buf,
 
        if (len > 1024)
                return NULL;
-       if (buf) {
-               /* All pre-allocated sections are a single block */
-               if (len != 1)
-                       return NULL;
-       } else if (posix_memalign(&buf, 512, len<<9) != 0)
+       if (!buf && posix_memalign(&buf, 512, len<<9) != 0)
                buf = NULL;
 
        if (!buf)
                return NULL;
 
        if (super->active->type == 1)
-               offset += __be64_to_cpu(super->active->primary_lba);
+               offset += be64_to_cpu(super->active->primary_lba);
        else
-               offset += __be64_to_cpu(super->active->secondary_lba);
+               offset += be64_to_cpu(super->active->secondary_lba);
 
        if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
                if (dofree)
@@ -543,60 +893,62 @@ static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
 
        if (lseek64(fd, dsize-512, 0) < 0) {
                if (devname)
-                       fprintf(stderr,
-                               Name": Cannot seek to anchor block on %s: %s\n",
-                               devname, strerror(errno));
+                       pr_err("Cannot seek to anchor block on %s: %s\n",
+                              devname, strerror(errno));
                return 1;
        }
        if (read(fd, &super->anchor, 512) != 512) {
                if (devname)
-                       fprintf(stderr,
-                               Name ": Cannot read anchor block on %s: %s\n",
-                               devname, strerror(errno));
+                       pr_err("Cannot read anchor block on %s: %s\n",
+                              devname, strerror(errno));
                return 1;
        }
-       if (super->anchor.magic != DDF_HEADER_MAGIC) {
+       if (!be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) {
                if (devname)
-                       fprintf(stderr, Name ": no DDF anchor found on %s\n",
+                       pr_err("no DDF anchor found on %s\n",
                                devname);
                return 2;
        }
-       if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
+       if (!be32_eq(calc_crc(&super->anchor, 512), super->anchor.crc)) {
                if (devname)
-                       fprintf(stderr, Name ": bad CRC on anchor on %s\n",
+                       pr_err("bad CRC on anchor on %s\n",
                                devname);
                return 2;
        }
        if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
            memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
                if (devname)
-                       fprintf(stderr, Name ": can only support super revision"
-                               " %.8s and earlier, not %.8s on %s\n",
+                       pr_err("can only support super revision %.8s and earlier, not %.8s on %s\n",
                                DDF_REVISION_2, super->anchor.revision,devname);
                return 2;
        }
-       if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
+       super->active = NULL;
+       if (load_ddf_header(fd, be64_to_cpu(super->anchor.primary_lba),
                            dsize >> 9,  1,
                            &super->primary, &super->anchor) == 0) {
                if (devname)
-                       fprintf(stderr,
-                               Name ": Failed to load primary DDF header "
-                               "on %s\n", devname);
-               return 2;
-       }
-       super->active = &super->primary;
-       if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
+                       pr_err("Failed to load primary DDF header on %s\n", devname);
+       } else
+               super->active = &super->primary;
+
+       if (load_ddf_header(fd, be64_to_cpu(super->anchor.secondary_lba),
                            dsize >> 9,  2,
                            &super->secondary, &super->anchor)) {
-               if ((__be32_to_cpu(super->primary.seq)
-                    < __be32_to_cpu(super->secondary.seq) &&
-                    !super->secondary.openflag)
-                   || (__be32_to_cpu(super->primary.seq)
-                       == __be32_to_cpu(super->secondary.seq) &&
+               if (super->active == NULL
+                   || (be32_to_cpu(super->primary.seq)
+                       < be32_to_cpu(super->secondary.seq) &&
+                       !super->secondary.openflag)
+                   || (be32_to_cpu(super->primary.seq)
+                       == be32_to_cpu(super->secondary.seq) &&
                        super->primary.openflag && !super->secondary.openflag)
                        )
                        super->active = &super->secondary;
-       }
+       } else if (devname &&
+                  be64_to_cpu(super->anchor.secondary_lba) != ~(__u64)0)
+               pr_err("Failed to load secondary DDF header on %s\n",
+                      devname);
+       if (super->active == NULL)
+               return 2;
        return 0;
 }
 
@@ -611,13 +963,13 @@ static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
                                   super->active->phys_section_offset,
                                   super->active->phys_section_length,
                                   1);
-       super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
+       super->pdsize = be32_to_cpu(super->active->phys_section_length) * 512;
 
        super->virt = load_section(fd, super, NULL,
                                   super->active->virt_section_offset,
                                   super->active->virt_section_length,
                                   1);
-       super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
+       super->vdsize = be32_to_cpu(super->active->virt_section_length) * 512;
        if (!ok ||
            !super->phys ||
            !super->virt) {
@@ -630,12 +982,60 @@ static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
        super->conflist = NULL;
        super->dlist = NULL;
 
-       super->max_part = __be16_to_cpu(super->active->max_partitions);
-       super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
-       super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
+       super->max_part = be16_to_cpu(super->active->max_partitions);
+       super->mppe = be16_to_cpu(super->active->max_primary_element_entries);
+       super->conf_rec_len = be16_to_cpu(super->active->config_record_len);
        return 0;
 }
 
+#define DDF_UNUSED_BVD 0xff
+static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
+{
+       unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
+       unsigned int i, vdsize;
+       void *p;
+       if (n_vds == 0) {
+               vcl->other_bvds = NULL;
+               return 0;
+       }
+       vdsize = ddf->conf_rec_len * 512;
+       if (posix_memalign(&p, 512, n_vds *
+                          (vdsize +  sizeof(struct vd_config *))) != 0)
+               return -1;
+       vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
+       for (i = 0; i < n_vds; i++) {
+               vcl->other_bvds[i] = p + i * vdsize;
+               memset(vcl->other_bvds[i], 0, vdsize);
+               vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
+       }
+       return 0;
+}
+
+static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
+                         unsigned int len)
+{
+       int i;
+       for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
+               if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
+                       break;
+
+       if (i < vcl->conf.sec_elmnt_count-1) {
+               if (be32_to_cpu(vd->seqnum) <=
+                   be32_to_cpu(vcl->other_bvds[i]->seqnum))
+                       return;
+       } else {
+               for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
+                       if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
+                               break;
+               if (i == vcl->conf.sec_elmnt_count-1) {
+                       pr_err("no space for sec level config %u, count is %u\n",
+                              vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
+                       return;
+               }
+       }
+       memcpy(vcl->other_bvds[i], vd, len);
+}
+
 static int load_ddf_local(int fd, struct ddf_super *super,
                          char *devname, int keep)
 {
@@ -645,15 +1045,15 @@ static int load_ddf_local(int fd, struct ddf_super *super,
        unsigned int i;
        unsigned int confsec;
        int vnum;
-       unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
+       unsigned int max_virt_disks =
+               be16_to_cpu(super->active->max_vd_entries);
        unsigned long long dsize;
 
        /* First the local disk info */
        if (posix_memalign((void**)&dl, 512,
-                      sizeof(*dl) +
-                      (super->max_part) * sizeof(dl->vlist[0])) != 0) {
-               fprintf(stderr, Name ": %s could not allocate disk info buffer\n",
-                       __func__);
+                          sizeof(*dl) +
+                          (super->max_part) * sizeof(dl->vlist[0])) != 0) {
+               pr_err("could not allocate disk info buffer\n");
                return 1;
        }
 
@@ -661,7 +1061,7 @@ static int load_ddf_local(int fd, struct ddf_super *super,
                     super->active->data_section_offset,
                     super->active->data_section_length,
                     0);
-       dl->devname = devname ? strdup(devname) : NULL;
+       dl->devname = devname ? xstrdup(devname) : NULL;
 
        fstat(fd, &stb);
        dl->major = major(stb.st_rdev);
@@ -672,12 +1072,19 @@ static int load_ddf_local(int fd, struct ddf_super *super,
        dl->size = 0;
        if (get_dev_size(fd, devname, &dsize))
                dl->size = dsize >> 9;
+       /* If the disks have different sizes, the LBAs will differ
+        * between phys disks.
+        * At this point here, the values in super->active must be valid
+        * for this phys disk. */
+       dl->primary_lba = super->active->primary_lba;
+       dl->secondary_lba = super->active->secondary_lba;
+       dl->workspace_lba = super->active->workspace_lba;
        dl->spare = NULL;
        for (i = 0 ; i < super->max_part ; i++)
                dl->vlist[i] = NULL;
        super->dlist = dl;
        dl->pdnum = -1;
-       for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
+       for (i = 0; i < be16_to_cpu(super->active->max_pd_entries); i++)
                if (memcmp(super->phys->entries[i].guid,
                           dl->disk.guid, DDF_GUID_LEN) == 0)
                        dl->pdnum = i;
@@ -688,35 +1095,35 @@ static int load_ddf_local(int fd, struct ddf_super *super,
         * the conflist
         */
 
-       conf = load_section(fd, super, NULL,
+       conf = load_section(fd, super, super->conf,
                            super->active->config_section_offset,
                            super->active->config_section_length,
                            0);
-
+       super->conf = conf;
        vnum = 0;
        for (confsec = 0;
-            confsec < __be32_to_cpu(super->active->config_section_length);
+            confsec < be32_to_cpu(super->active->config_section_length);
             confsec += super->conf_rec_len) {
                struct vd_config *vd =
                        (struct vd_config *)((char*)conf + confsec*512);
                struct vcl *vcl;
 
-               if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
+               if (be32_eq(vd->magic, DDF_SPARE_ASSIGN_MAGIC)) {
                        if (dl->spare)
                                continue;
                        if (posix_memalign((void**)&dl->spare, 512,
-                                      super->conf_rec_len*512) != 0) {
-                               fprintf(stderr, Name
-                                       ": %s could not allocate spare info buf\n",
-                                       __func__);
+                                          super->conf_rec_len*512) != 0) {
+                               pr_err("could not allocate spare info buf\n");
                                return 1;
                        }
-                               
+
                        memcpy(dl->spare, vd, super->conf_rec_len*512);
                        continue;
                }
-               if (vd->magic != DDF_VD_CONF_MAGIC)
+               if (!be32_eq(vd->magic, DDF_VD_CONF_MAGIC))
+                       /* Must be vendor-unique - I cannot handle those */
                        continue;
+
                for (vcl = super->conflist; vcl; vcl = vcl->next) {
                        if (memcmp(vcl->conf.guid,
                                   vd->guid, DDF_GUID_LEN) == 0)
@@ -725,27 +1132,33 @@ static int load_ddf_local(int fd, struct ddf_super *super,
 
                if (vcl) {
                        dl->vlist[vnum++] = vcl;
-                       if (__be32_to_cpu(vd->seqnum) <=
-                           __be32_to_cpu(vcl->conf.seqnum))
+                       if (vcl->other_bvds != NULL &&
+                           vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
+                               add_other_bvd(vcl, vd, super->conf_rec_len*512);
+                               continue;
+                       }
+                       if (be32_to_cpu(vd->seqnum) <=
+                           be32_to_cpu(vcl->conf.seqnum))
                                continue;
                } else {
                        if (posix_memalign((void**)&vcl, 512,
-                                      (super->conf_rec_len*512 +
-                                       offsetof(struct vcl, conf))) != 0) {
-                               fprintf(stderr, Name
-                                       ": %s could not allocate vcl buf\n",
-                                       __func__);
+                                          (super->conf_rec_len*512 +
+                                           offsetof(struct vcl, conf))) != 0) {
+                               pr_err("could not allocate vcl buf\n");
                                return 1;
                        }
                        vcl->next = super->conflist;
                        vcl->block_sizes = NULL; /* FIXME not for CONCAT */
+                       vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
+                       if (alloc_other_bvds(super, vcl) != 0) {
+                               pr_err("could not allocate other bvds\n");
+                               free(vcl);
+                               return 1;
+                       };
                        super->conflist = vcl;
                        dl->vlist[vnum++] = vcl;
                }
                memcpy(&vcl->conf, vd, super->conf_rec_len*512);
-               vcl->lba_offset = (__u64*)
-                       &vcl->conf.phys_refnum[super->mppe];
-
                for (i=0; i < max_virt_disks ; i++)
                        if (memcmp(super->virt->entries[i].guid,
                                   vcl->conf.guid, DDF_GUID_LEN)==0)
@@ -753,18 +1166,10 @@ static int load_ddf_local(int fd, struct ddf_super *super,
                if (i < max_virt_disks)
                        vcl->vcnum = i;
        }
-       free(conf);
 
        return 0;
 }
 
-#ifndef MDASSEMBLE
-static int load_super_ddf_all(struct supertype *st, int fd,
-                             void **sbp, char *devname, int keep_fd);
-#endif
-
-static void free_super_ddf(struct supertype *st);
-
 static int load_super_ddf(struct supertype *st, int fd,
                          char *devname)
 {
@@ -772,14 +1177,6 @@ static int load_super_ddf(struct supertype *st, int fd,
        struct ddf_super *super;
        int rv;
 
-#ifndef MDASSEMBLE
-       /* if 'fd' is a container, load metadata from all the devices */
-       if (load_super_ddf_all(st, fd, &st->sb, devname, 1) == 0)
-               return 0;
-#endif
-       if (st->subarray[0])
-               return 1; /* FIXME Is this correct */
-
        if (get_dev_size(fd, devname, &dsize) == 0)
                return 1;
 
@@ -790,25 +1187,21 @@ static int load_super_ddf(struct supertype *st, int fd,
        /* 32M is a lower bound */
        if (dsize <= 32*1024*1024) {
                if (devname)
-                       fprintf(stderr,
-                               Name ": %s is too small for ddf: "
-                               "size is %llu sectors.\n",
-                               devname, dsize>>9);
+                       pr_err("%s is too small for ddf: size is %llu sectors.\n",
+                              devname, dsize>>9);
                return 1;
        }
        if (dsize & 511) {
                if (devname)
-                       fprintf(stderr,
-                               Name ": %s is an odd size for ddf: "
-                               "size is %llu bytes.\n",
-                               devname, dsize);
+                       pr_err("%s is an odd size for ddf: size is %llu bytes.\n",
+                              devname, dsize);
                return 1;
        }
 
        free_super_ddf(st);
 
        if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
-               fprintf(stderr, Name ": malloc of %zu failed.\n",
+               pr_err("malloc of %zu failed.\n",
                        sizeof(*super));
                return 1;
        }
@@ -826,9 +1219,7 @@ static int load_super_ddf(struct supertype *st, int fd,
 
        if (rv) {
                if (devname)
-                       fprintf(stderr,
-                               Name ": Failed to load all information "
-                               "sections on %s\n", devname);
+                       pr_err("Failed to load all information sections on %s\n", devname);
                free(super);
                return rv;
        }
@@ -837,33 +1228,11 @@ static int load_super_ddf(struct supertype *st, int fd,
 
        if (rv) {
                if (devname)
-                       fprintf(stderr,
-                               Name ": Failed to load all information "
-                               "sections on %s\n", devname);
+                       pr_err("Failed to load all information sections on %s\n", devname);
                free(super);
                return rv;
        }
 
-       if (st->subarray[0]) {
-               unsigned long val;
-               struct vcl *v;
-               char *ep;
-
-               val = strtoul(st->subarray, &ep, 10);
-               if (*ep != '\0') {
-                       free(super);
-                       return 1;
-               }
-
-               for (v = super->conflist; v; v = v->next)
-                       if (v->vcnum == val)
-                               super->currentconf = v;
-               if (!super->currentconf) {
-                       free(super);
-                       return 1;
-               }
-       }
-
        /* Should possibly check the sections .... */
 
        st->sb = super;
@@ -872,7 +1241,6 @@ static int load_super_ddf(struct supertype *st, int fd,
                st->minor_version = 0;
                st->max_devs = 512;
        }
-       st->loaded_container = 0;
        return 0;
 
 }
@@ -884,11 +1252,18 @@ static void free_super_ddf(struct supertype *st)
                return;
        free(ddf->phys);
        free(ddf->virt);
+       free(ddf->conf);
        while (ddf->conflist) {
                struct vcl *v = ddf->conflist;
                ddf->conflist = v->next;
                if (v->block_sizes)
                        free(v->block_sizes);
+               if (v->other_bvds)
+                       /*
+                          v->other_bvds[0] points to beginning of buffer,
+                          see alloc_other_bvds()
+                       */
+                       free(v->other_bvds[0]);
                free(v);
        }
        while (ddf->dlist) {
@@ -915,15 +1290,14 @@ static void free_super_ddf(struct supertype *st)
 
 static struct supertype *match_metadata_desc_ddf(char *arg)
 {
-       /* 'ddf' only support containers */
+       /* 'ddf' only supports containers */
        struct supertype *st;
        if (strcmp(arg, "ddf") != 0 &&
            strcmp(arg, "default") != 0
                )
                return NULL;
 
-       st = malloc(sizeof(*st));
-       memset(st, 0, sizeof(*st));
+       st = xcalloc(1, sizeof(*st));
        st->ss = &super_ddf;
        st->max_devs = 512;
        st->minor_version = 0;
@@ -931,7 +1305,6 @@ static struct supertype *match_metadata_desc_ddf(char *arg)
        return st;
 }
 
-
 #ifndef MDASSEMBLE
 
 static mapping_t ddf_state[] = {
@@ -984,34 +1357,7 @@ static mapping_t ddf_sec_level[] = {
 };
 #endif
 
-struct num_mapping {
-       int num1, num2;
-};
-static struct num_mapping ddf_level_num[] = {
-       { DDF_RAID0, 0 },
-       { DDF_RAID1, 1 },
-       { DDF_RAID3, LEVEL_UNSUPPORTED },
-       { DDF_RAID4, 4 },
-       { DDF_RAID5, 5 },
-       { DDF_RAID1E, LEVEL_UNSUPPORTED },
-       { DDF_JBOD, LEVEL_UNSUPPORTED },
-       { DDF_CONCAT, LEVEL_LINEAR },
-       { DDF_RAID5E, LEVEL_UNSUPPORTED },
-       { DDF_RAID5EE, LEVEL_UNSUPPORTED },
-       { DDF_RAID6, 6},
-       { MAXINT, MAXINT }
-};
-
-static int map_num1(struct num_mapping *map, int num)
-{
-       int i;
-       for (i=0 ; map[i].num1 != MAXINT; i++)
-               if (map[i].num1 == num)
-                       break;
-       return map[i].num2;
-}
-
-static int all_ff(char *guid)
+static int all_ff(const char *guid)
 {
        int i;
        for (i = 0; i < DDF_GUID_LEN; i++)
@@ -1020,6 +1366,22 @@ static int all_ff(char *guid)
        return 1;
 }
 
+static const char *guid_str(const char *guid)
+{
+       static char buf[DDF_GUID_LEN*2+1];
+       int i;
+       char *p = buf;
+       for (i = 0; i < DDF_GUID_LEN; i++) {
+               unsigned char c = guid[i];
+               if (c >= 32 && c < 127)
+                       p += sprintf(p, "%c", c);
+               else
+                       p += sprintf(p, "%02x", c);
+       }
+       *p = '\0';
+       return (const char *) buf;
+}
+
 #ifndef MDASSEMBLE
 static void print_guid(char *guid, int tstamp)
 {
@@ -1066,30 +1428,32 @@ static void examine_vd(int n, struct ddf_super *sb, char *guid)
                unsigned int i;
                struct vd_config *vc = &vcl->conf;
 
-               if (calc_crc(vc, crl*512) != vc->crc)
+               if (!be32_eq(calc_crc(vc, crl*512), vc->crc))
                        continue;
                if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
                        continue;
 
                /* Ok, we know about this VD, let's give more details */
                printf(" Raid Devices[%d] : %d (", n,
-                      __be16_to_cpu(vc->prim_elmnt_count));
-               for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
+                      be16_to_cpu(vc->prim_elmnt_count));
+               for (i = 0; i < be16_to_cpu(vc->prim_elmnt_count); i++) {
                        int j;
-                       int cnt = __be16_to_cpu(sb->phys->used_pdes);
+                       int cnt = be16_to_cpu(sb->phys->max_pdes);
                        for (j=0; j<cnt; j++)
-                               if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
+                               if (be32_eq(vc->phys_refnum[i],
+                                           sb->phys->entries[j].refnum))
                                        break;
                        if (i) printf(" ");
                        if (j < cnt)
                                printf("%d", j);
                        else
                                printf("--");
+                       printf("@%lluK", (unsigned long long) be64_to_cpu(LBA_OFFSET(sb, vc)[i])/2);
                }
                printf(")\n");
                if (vc->chunk_shift != 255)
-               printf("   Chunk Size[%d] : %d sectors\n", n,
-                      1 << vc->chunk_shift);
+                       printf("   Chunk Size[%d] : %d sectors\n", n,
+                              1 << vc->chunk_shift);
                printf("   Raid Level[%d] : %s\n", n,
                       map_num(ddf_level, vc->prl)?:"-unknown-");
                if (vc->sec_elmnt_count != 1) {
@@ -1099,32 +1463,34 @@ static void examine_vd(int n, struct ddf_super *sb, char *guid)
                               map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
                }
                printf("  Device Size[%d] : %llu\n", n,
-                      (unsigned long long)__be64_to_cpu(vc->blocks)/2);
+                      be64_to_cpu(vc->blocks)/2);
                printf("   Array Size[%d] : %llu\n", n,
-                      (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
+                      be64_to_cpu(vc->array_blocks)/2);
        }
 }
 
 static void examine_vds(struct ddf_super *sb)
 {
-       int cnt = __be16_to_cpu(sb->virt->populated_vdes);
-       int i;
+       int cnt = be16_to_cpu(sb->virt->populated_vdes);
+       unsigned int i;
        printf("  Virtual Disks : %d\n", cnt);
 
-       for (i=0; i<cnt; i++) {
+       for (i = 0; i < be16_to_cpu(sb->virt->max_vdes); i++) {
                struct virtual_entry *ve = &sb->virt->entries[i];
+               if (all_ff(ve->guid))
+                       continue;
                printf("\n");
                printf("      VD GUID[%d] : ", i); print_guid(ve->guid, 1);
                printf("\n");
-               printf("         unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
+               printf("         unit[%d] : %d\n", i, be16_to_cpu(ve->unit));
                printf("        state[%d] : %s, %s%s\n", i,
                       map_num(ddf_state, ve->state & 7),
-                      (ve->state & 8) ? "Morphing, ": "",
-                      (ve->state & 16)? "Not Consistent" : "Consistent");
+                      (ve->state & DDF_state_morphing) ? "Morphing, ": "",
+                      (ve->state & DDF_state_inconsistent)? "Not Consistent" : "Consistent");
                printf("   init state[%d] : %s\n", i,
-                      map_num(ddf_init_state, ve->init_state&3));
+                      map_num(ddf_init_state, ve->init_state&DDF_initstate_mask));
                printf("       access[%d] : %s\n", i,
-                      map_num(ddf_access, (ve->init_state>>6) & 3));
+                      map_num(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
                printf("         Name[%d] : %.16s\n", i, ve->name);
                examine_vd(i, sb, ve->guid);
        }
@@ -1133,25 +1499,32 @@ static void examine_vds(struct ddf_super *sb)
 
 static void examine_pds(struct ddf_super *sb)
 {
-       int cnt = __be16_to_cpu(sb->phys->used_pdes);
+       int cnt = be16_to_cpu(sb->phys->max_pdes);
        int i;
        struct dl *dl;
+       int unlisted = 0;
        printf(" Physical Disks : %d\n", cnt);
        printf("      Number    RefNo      Size       Device      Type/State\n");
 
+       for (dl = sb->dlist; dl; dl = dl->next)
+               dl->displayed = 0;
+
        for (i=0 ; i<cnt ; i++) {
                struct phys_disk_entry *pd = &sb->phys->entries[i];
-               int type = __be16_to_cpu(pd->type);
-               int state = __be16_to_cpu(pd->state);
+               int type = be16_to_cpu(pd->type);
+               int state = be16_to_cpu(pd->state);
 
+               if (be32_to_cpu(pd->refnum) == 0xffffffff)
+                       /* Not in use */
+                       continue;
                //printf("      PD GUID[%d] : ", i); print_guid(pd->guid, 0);
                //printf("\n");
                printf("       %3d    %08x  ", i,
-                      __be32_to_cpu(pd->refnum));
-               printf("%8lluK ", 
-                      (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
+                      be32_to_cpu(pd->refnum));
+               printf("%8lluK ",
+                      be64_to_cpu(pd->config_size)>>1);
                for (dl = sb->dlist; dl ; dl = dl->next) {
-                       if (dl->disk.refnum == pd->refnum) {
+                       if (be32_eq(dl->disk.refnum, pd->refnum)) {
                                char *dv = map_dev(dl->major, dl->minor, 0);
                                if (dv) {
                                        printf("%-15s", dv);
@@ -1161,6 +1534,8 @@ static void examine_pds(struct ddf_super *sb)
                }
                if (!dl)
                        printf("%15s","");
+               else
+                       dl->displayed = 1;
                printf(" %s%s%s%s%s",
                       (type&2) ? "active":"",
                       (type&4) ? "Global-Spare":"",
@@ -1180,28 +1555,69 @@ static void examine_pds(struct ddf_super *sb)
                       (state&64)? ", Missing" : "");
                printf("\n");
        }
+       for (dl = sb->dlist; dl; dl = dl->next) {
+               char *dv;
+               if (dl->displayed)
+                       continue;
+               if (!unlisted)
+                       printf(" Physical disks not in metadata!:\n");
+               unlisted = 1;
+               dv = map_dev(dl->major, dl->minor, 0);
+               printf("   %08x %s\n", be32_to_cpu(dl->disk.refnum),
+                      dv ? dv : "-unknown-");
+       }
+       if (unlisted)
+               printf("\n");
 }
 
 static void examine_super_ddf(struct supertype *st, char *homehost)
 {
        struct ddf_super *sb = st->sb;
 
-       printf("          Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
+       printf("          Magic : %08x\n", be32_to_cpu(sb->anchor.magic));
        printf("        Version : %.8s\n", sb->anchor.revision);
        printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
        printf("\n");
        printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
        printf("\n");
-       printf("            Seq : %08x\n", __be32_to_cpu(sb->active->seq));
-       printf("  Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
-              ?"yes" : "no");
+       printf("            Seq : %08x\n", be32_to_cpu(sb->active->seq));
+       printf("  Redundant hdr : %s\n", (be32_eq(sb->secondary.magic,
+                                                DDF_HEADER_MAGIC)
+                                         ?"yes" : "no"));
        examine_vds(sb);
        examine_pds(sb);
 }
 
-static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info);
+static unsigned int get_vd_num_of_subarray(struct supertype *st)
+{
+       /*
+        * Figure out the VD number for this supertype.
+        * Returns DDF_CONTAINER for the container itself,
+        * and DDF_NOTFOUND on error.
+        */
+       struct ddf_super *ddf = st->sb;
+       struct mdinfo *sra;
+       char *sub, *end;
+       unsigned int vcnum;
 
-static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
+       if (*st->container_devnm == '\0')
+               return DDF_CONTAINER;
+
+       sra = sysfs_read(-1, st->devnm, GET_VERSION);
+       if (!sra || sra->array.major_version != -1 ||
+           sra->array.minor_version != -2 ||
+           !is_subarray(sra->text_version))
+               return DDF_NOTFOUND;
+
+       sub = strchr(sra->text_version + 1, '/');
+       if (sub != NULL)
+               vcnum = strtoul(sub + 1, &end, 10);
+       if (sub == NULL || *sub == '\0' || *end != '\0' ||
+           vcnum >= be16_to_cpu(ddf->active->max_vd_entries))
+               return DDF_NOTFOUND;
+
+       return vcnum;
+}
 
 static void brief_examine_super_ddf(struct supertype *st, int verbose)
 {
@@ -1209,7 +1625,7 @@ static void brief_examine_super_ddf(struct supertype *st, int verbose)
         */
        struct mdinfo info;
        char nbuf[64];
-       getinfo_super_ddf(st, &info);
+       getinfo_super_ddf(st, &info, NULL);
        fname_from_uuid(st, &info, nbuf, ':');
 
        printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
@@ -1217,26 +1633,31 @@ static void brief_examine_super_ddf(struct supertype *st, int verbose)
 
 static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
 {
-       /* We just write a generic DDF ARRAY entry
+       /* We write a DDF ARRAY member entry for each vd, identifying container
+        * by uuid and member by unit number and uuid.
         */
        struct ddf_super *ddf = st->sb;
        struct mdinfo info;
        unsigned int i;
        char nbuf[64];
-       getinfo_super_ddf(st, &info);
+       getinfo_super_ddf(st, &info, NULL);
        fname_from_uuid(st, &info, nbuf, ':');
 
-       for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
+       for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
                struct virtual_entry *ve = &ddf->virt->entries[i];
                struct vcl vcl;
                char nbuf1[64];
+               char namebuf[17];
                if (all_ff(ve->guid))
                        continue;
                memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
                ddf->currentconf =&vcl;
+               vcl.vcnum = i;
                uuid_from_super_ddf(st, info.uuid);
                fname_from_uuid(st, &info, nbuf1, ':');
-               printf("ARRAY container=%s member=%d UUID=%s\n",
+               _ddf_array_name(namebuf, ddf, i);
+               printf("ARRAY%s%s container=%s member=%d UUID=%s\n",
+                      namebuf[0] == '\0' ? "" : " /dev/md/", namebuf,
                       nbuf+5, i, nbuf1+5);
        }
 }
@@ -1245,43 +1666,159 @@ static void export_examine_super_ddf(struct supertype *st)
 {
        struct mdinfo info;
        char nbuf[64];
-       getinfo_super_ddf(st, &info);
+       getinfo_super_ddf(st, &info, NULL);
        fname_from_uuid(st, &info, nbuf, ':');
        printf("MD_METADATA=ddf\n");
        printf("MD_LEVEL=container\n");
        printf("MD_UUID=%s\n", nbuf+5);
+       printf("MD_DEVICES=%u\n",
+               be16_to_cpu(((struct ddf_super *)st->sb)->phys->used_pdes));
 }
-       
 
-static void detail_super_ddf(struct supertype *st, char *homehost)
+static int copy_metadata_ddf(struct supertype *st, int from, int to)
 {
-       /* FIXME later
-        * Could print DDF GUID
-        * Need to find which array
-        *  If whole, briefly list all arrays
-        *  If one, give name
+       void *buf;
+       unsigned long long dsize, offset;
+       int bytes;
+       struct ddf_header *ddf;
+       int written = 0;
+
+       /* The meta consists of an anchor, a primary, and a secondary.
+        * This all lives at the end of the device.
+        * So it is easiest to find the earliest of primary and
+        * secondary, and copy everything from there.
+        *
+        * Anchor is 512 from end.  It contains primary_lba and secondary_lba
+        * we choose one of those
         */
+
+       if (posix_memalign(&buf, 4096, 4096) != 0)
+               return 1;
+
+       if (!get_dev_size(from, NULL, &dsize))
+               goto err;
+
+       if (lseek64(from, dsize-512, 0) < 0)
+               goto err;
+       if (read(from, buf, 512) != 512)
+               goto err;
+       ddf = buf;
+       if (!be32_eq(ddf->magic, DDF_HEADER_MAGIC) ||
+           !be32_eq(calc_crc(ddf, 512), ddf->crc) ||
+           (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
+            memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
+               goto err;
+
+       offset = dsize - 512;
+       if ((be64_to_cpu(ddf->primary_lba) << 9) < offset)
+               offset = be64_to_cpu(ddf->primary_lba) << 9;
+       if ((be64_to_cpu(ddf->secondary_lba) << 9) < offset)
+               offset = be64_to_cpu(ddf->secondary_lba) << 9;
+
+       bytes = dsize - offset;
+
+       if (lseek64(from, offset, 0) < 0 ||
+           lseek64(to, offset, 0) < 0)
+               goto err;
+       while (written < bytes) {
+               int n = bytes - written;
+               if (n > 4096)
+                       n = 4096;
+               if (read(from, buf, n) != n)
+                       goto err;
+               if (write(to, buf, n) != n)
+                       goto err;
+               written += n;
+       }
+       free(buf);
+       return 0;
+err:
+       free(buf);
+       return 1;
 }
 
-static void brief_detail_super_ddf(struct supertype *st)
+static void detail_super_ddf(struct supertype *st, char *homehost)
 {
-       /* FIXME I really need to know which array we are detailing.
-        * Can that be stored in ddf_super??
-        */
-//     struct ddf_super *ddf = st->sb;
-       struct mdinfo info;
-       char nbuf[64];
-       getinfo_super_ddf(st, &info);
-       fname_from_uuid(st, &info, nbuf,':');
-       printf(" UUID=%s", nbuf + 5);
+       struct ddf_super *sb = st->sb;
+       int cnt = be16_to_cpu(sb->virt->populated_vdes);
+
+       printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
+       printf("\n");
+       printf("            Seq : %08x\n", be32_to_cpu(sb->active->seq));
+       printf("  Virtual Disks : %d\n", cnt);
+       printf("\n");
 }
 #endif
 
-static int match_home_ddf(struct supertype *st, char *homehost)
+static const char *vendors_with_variable_volume_UUID[] = {
+       "LSI      ",
+};
+
+static int volume_id_is_reliable(const struct ddf_super *ddf)
+{
+       int n = ARRAY_SIZE(vendors_with_variable_volume_UUID);
+       int i;
+       for (i = 0; i < n; i++)
+               if (!memcmp(ddf->controller.guid,
+                       vendors_with_variable_volume_UUID[i], 8))
+               return 0;
+       return 1;
+}
+
+static void uuid_of_ddf_subarray(const struct ddf_super *ddf,
+                                unsigned int vcnum, int uuid[4])
+{
+       char buf[DDF_GUID_LEN+18], sha[20], *p;
+       struct sha1_ctx ctx;
+       if (volume_id_is_reliable(ddf)) {
+               uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, uuid);
+               return;
+       }
+       /*
+        * Some fake RAID BIOSes (in particular, LSI ones) change the
+        * VD GUID at every boot. These GUIDs are not suitable for
+        * identifying an array. Luckily the header GUID appears to
+        * remain constant.
+        * We construct a pseudo-UUID from the header GUID and those
+        * properties of the subarray that we expect to remain constant.
+        */
+       memset(buf, 0, sizeof(buf));
+       p = buf;
+       memcpy(p, ddf->anchor.guid, DDF_GUID_LEN);
+       p += DDF_GUID_LEN;
+       memcpy(p, ddf->virt->entries[vcnum].name, 16);
+       p += 16;
+       *((__u16 *) p) = vcnum;
+       sha1_init_ctx(&ctx);
+       sha1_process_bytes(buf, sizeof(buf), &ctx);
+       sha1_finish_ctx(&ctx, sha);
+       memcpy(uuid, sha, 4*4);
+}
+
+#ifndef MDASSEMBLE
+static void brief_detail_super_ddf(struct supertype *st)
+{
+       struct mdinfo info;
+       char nbuf[64];
+       struct ddf_super *ddf = st->sb;
+       unsigned int vcnum = get_vd_num_of_subarray(st);
+       if (vcnum == DDF_CONTAINER)
+               uuid_from_super_ddf(st, info.uuid);
+       else if (vcnum == DDF_NOTFOUND)
+               return;
+       else
+               uuid_of_ddf_subarray(ddf, vcnum, info.uuid);
+       fname_from_uuid(st, &info, nbuf,':');
+       printf(" UUID=%s", nbuf + 5);
+}
+#endif
+
+static int match_home_ddf(struct supertype *st, char *homehost)
 {
        /* It matches 'this' host if the controller is a
         * Linux-MD controller with vendor_data matching
-        * the hostname
+        * the hostname.  It would be nice if we could
+        * test against controller found in /sys or somewhere...
         */
        struct ddf_super *ddf = st->sb;
        unsigned int len;
@@ -1297,29 +1834,110 @@ static int match_home_ddf(struct supertype *st, char *homehost)
 }
 
 #ifndef MDASSEMBLE
-static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst)
+static int find_index_in_bvd(const struct ddf_super *ddf,
+                            const struct vd_config *conf, unsigned int n,
+                            unsigned int *n_bvd)
+{
+       /*
+        * Find the index of the n-th valid physical disk in this BVD.
+        * Unused entries can be sprinkled in with the used entries,
+        * but don't count.
+        */
+       unsigned int i, j;
+       for (i = 0, j = 0;
+            i < ddf->mppe && j < be16_to_cpu(conf->prim_elmnt_count);
+            i++) {
+               if (be32_to_cpu(conf->phys_refnum[i]) != 0xffffffff) {
+                       if (n == j) {
+                               *n_bvd = i;
+                               return 1;
+                       }
+                       j++;
+               }
+       }
+       dprintf("couldn't find BVD member %u (total %u)\n",
+               n, be16_to_cpu(conf->prim_elmnt_count));
+       return 0;
+}
+
+/* Given a member array instance number, and a raid disk within that instance,
+ * find the vd_config structure.  The offset of the given disk in the phys_refnum
+ * table is returned in n_bvd.
+ * For two-level members with a secondary raid level the vd_config for
+ * the appropriate BVD is returned.
+ * The return value is always &vlc->conf, where vlc is returned in last pointer.
+ */
+static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
+                                  unsigned int n,
+                                  unsigned int *n_bvd, struct vcl **vcl)
 {
        struct vcl *v;
 
-       for (v = ddf->conflist; v; v = v->next)
-               if (inst == v->vcnum)
-                       return &v->conf;
+       for (v = ddf->conflist; v; v = v->next) {
+               unsigned int nsec, ibvd = 0;
+               struct vd_config *conf;
+               if (inst != v->vcnum)
+                       continue;
+               conf = &v->conf;
+               if (conf->sec_elmnt_count == 1) {
+                       if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
+                               *vcl = v;
+                               return conf;
+                       } else
+                               goto bad;
+               }
+               if (v->other_bvds == NULL) {
+                       pr_err("BUG: other_bvds is NULL, nsec=%u\n",
+                              conf->sec_elmnt_count);
+                       goto bad;
+               }
+               nsec = n / be16_to_cpu(conf->prim_elmnt_count);
+               if (conf->sec_elmnt_seq != nsec) {
+                       for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
+                               if (v->other_bvds[ibvd-1]->sec_elmnt_seq
+                                   == nsec)
+                                       break;
+                       }
+                       if (ibvd == conf->sec_elmnt_count)
+                               goto bad;
+                       conf = v->other_bvds[ibvd-1];
+               }
+               if (!find_index_in_bvd(ddf, conf,
+                                      n - nsec*conf->sec_elmnt_count, n_bvd))
+                       goto bad;
+               dprintf("found disk %u as member %u in bvd %d of array %u\n",
+                       n, *n_bvd, ibvd, inst);
+               *vcl = v;
+               return conf;
+       }
+bad:
+       pr_err("Could't find disk %d in array %u\n", n, inst);
        return NULL;
 }
 #endif
 
-static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
+static int find_phys(const struct ddf_super *ddf, be32 phys_refnum)
 {
        /* Find the entry in phys_disk which has the given refnum
         * and return it's index
         */
        unsigned int i;
-       for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
-               if (ddf->phys->entries[i].refnum == phys_refnum)
+       for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++)
+               if (be32_eq(ddf->phys->entries[i].refnum, phys_refnum))
                        return i;
        return -1;
 }
 
+static void uuid_from_ddf_guid(const char *guid, int uuid[4])
+{
+       char buf[20];
+       struct sha1_ctx ctx;
+       sha1_init_ctx(&ctx);
+       sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
+       sha1_finish_ctx(&ctx, buf);
+       memcpy(uuid, buf, 4*4);
+}
+
 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
 {
        /* The uuid returned here is used for:
@@ -1330,7 +1948,7 @@ static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
         *    not the device-set.
         *  uuid to recognise same set when adding a missing device back
         *    to an array.   This is a uuid for the device-set.
-        *  
+        *
         * For each of these we can make do with a truncated
         * or hashed uuid rather than the original, as long as
         * everyone agrees.
@@ -1343,63 +1961,67 @@ static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
         */
        struct ddf_super *ddf = st->sb;
        struct vcl *vcl = ddf->currentconf;
-       char *guid;
-       char buf[20];
-       struct sha1_ctx ctx;
 
        if (vcl)
-               guid = vcl->conf.guid;
+               uuid_of_ddf_subarray(ddf, vcl->vcnum, uuid);
        else
-               guid = ddf->anchor.guid;
-
-       sha1_init_ctx(&ctx);
-       sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
-       sha1_finish_ctx(&ctx, buf);
-       memcpy(uuid, buf, 4*4);
+               uuid_from_ddf_guid(ddf->anchor.guid, uuid);
 }
 
-static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info);
-
-static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
 {
        struct ddf_super *ddf = st->sb;
+       int map_disks = info->array.raid_disks;
+       __u32 *cptr;
 
        if (ddf->currentconf) {
-               getinfo_super_ddf_bvd(st, info);
+               getinfo_super_ddf_bvd(st, info, map);
                return;
        }
+       memset(info, 0, sizeof(*info));
 
-       info->array.raid_disks    = __be16_to_cpu(ddf->phys->used_pdes);
+       info->array.raid_disks    = be16_to_cpu(ddf->phys->used_pdes);
        info->array.level         = LEVEL_CONTAINER;
        info->array.layout        = 0;
        info->array.md_minor      = -1;
-       info->array.ctime         = DECADE + __be32_to_cpu(*(__u32*)
-                                                        (ddf->anchor.guid+16));
-       info->array.utime         = 0;
+       cptr = (__u32 *)(ddf->anchor.guid + 16);
+       info->array.ctime         = DECADE + __be32_to_cpu(*cptr);
+
        info->array.chunk_size    = 0;
        info->container_enough    = 1;
 
-
-       info->disk.major = 0;
-       info->disk.minor = 0;
+       info->disk.major          = 0;
+       info->disk.minor          = 0;
        if (ddf->dlist) {
-               info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
+               struct phys_disk_entry *pde = NULL;
+               info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum);
                info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
 
-               info->data_offset = __be64_to_cpu(ddf->phys->
-                                         entries[info->disk.raid_disk].
-                                         config_size);
+               info->data_offset = be64_to_cpu(ddf->phys->
+                                                 entries[info->disk.raid_disk].
+                                                 config_size);
                info->component_size = ddf->dlist->size - info->data_offset;
+               if (info->disk.raid_disk >= 0)
+                       pde = ddf->phys->entries + info->disk.raid_disk;
+               if (pde &&
+                   !(be16_to_cpu(pde->state) & DDF_Failed) &&
+                   !(be16_to_cpu(pde->state) & DDF_Missing))
+                       info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
+               else
+                       info->disk.state = 1 << MD_DISK_FAULTY;
+
        } else {
+               /* There should always be a dlist, but just in case...*/
                info->disk.number = -1;
                info->disk.raid_disk = -1;
-//             info->disk.raid_disk = find refnum in the table and use index;
+               info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
        }
-       info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
-
+       info->events = be32_to_cpu(ddf->active->seq);
+       info->array.utime = DECADE + be32_to_cpu(ddf->active->timestamp);
 
        info->recovery_start = MaxSector;
        info->reshape_active = 0;
+       info->recovery_blocked = 0;
        info->name[0] = 0;
 
        info->array.major_version = -1;
@@ -1409,57 +2031,100 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
 
        uuid_from_super_ddf(st, info->uuid);
 
+       if (map) {
+               int i, e = 0;
+               int max = be16_to_cpu(ddf->phys->max_pdes);
+               for (i = e = 0 ; i < map_disks ; i++, e++) {
+                       while (e < max &&
+                              be32_to_cpu(ddf->phys->entries[e].refnum) == 0xffffffff)
+                               e++;
+                       if (i < info->array.raid_disks && e < max &&
+                           !(be16_to_cpu(ddf->phys->entries[e].state)
+                             & DDF_Failed))
+                               map[i] = 1;
+                       else
+                               map[i] = 0;
+               }
+       }
 }
 
-static int rlq_to_layout(int rlq, int prl, int raiddisks);
+/* size of name must be at least 17 bytes! */
+static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i)
+{
+       int j;
+       memcpy(name, ddf->virt->entries[i].name, 16);
+       name[16] = 0;
+       for(j = 0; j < 16; j++)
+               if (name[j] == ' ')
+                       name[j] = 0;
+}
 
-static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
 {
        struct ddf_super *ddf = st->sb;
        struct vcl *vc = ddf->currentconf;
        int cd = ddf->currentdev;
+       int n_prim;
        int j;
-       struct dl *dl;
-
-       /* FIXME this returns BVD info - what if we want SVD ?? */
+       struct dl *dl = NULL;
+       int map_disks = info->array.raid_disks;
+       __u32 *cptr;
+       struct vd_config *conf;
 
-       info->array.raid_disks    = __be16_to_cpu(vc->conf.prim_elmnt_count);
-       info->array.level         = map_num1(ddf_level_num, vc->conf.prl);
-       info->array.layout        = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
-                                                 info->array.raid_disks);
+       memset(info, 0, sizeof(*info));
+       if (layout_ddf2md(&vc->conf, &info->array) == -1)
+               return;
        info->array.md_minor      = -1;
-       info->array.ctime         = DECADE +
-               __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
-       info->array.utime         = DECADE + __be32_to_cpu(vc->conf.timestamp);
+       cptr = (__u32 *)(vc->conf.guid + 16);
+       info->array.ctime         = DECADE + __be32_to_cpu(*cptr);
+       info->array.utime         = DECADE + be32_to_cpu(vc->conf.timestamp);
        info->array.chunk_size    = 512 << vc->conf.chunk_shift;
-       info->custom_array_size   = 0;
+       info->custom_array_size   = be64_to_cpu(vc->conf.array_blocks);
+
+       conf = &vc->conf;
+       n_prim = be16_to_cpu(conf->prim_elmnt_count);
+       if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
+               int ibvd = cd / n_prim - 1;
+               cd %= n_prim;
+               conf = vc->other_bvds[ibvd];
+       }
 
        if (cd >= 0 && (unsigned)cd < ddf->mppe) {
-               info->data_offset         = __be64_to_cpu(vc->lba_offset[cd]);
+               info->data_offset =
+                       be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
                if (vc->block_sizes)
                        info->component_size = vc->block_sizes[cd];
                else
-                       info->component_size = __be64_to_cpu(vc->conf.blocks);
+                       info->component_size = be64_to_cpu(conf->blocks);
+
+               for (dl = ddf->dlist; dl ; dl = dl->next)
+                       if (be32_eq(dl->disk.refnum, conf->phys_refnum[cd]))
+                               break;
        }
 
-       for (dl = ddf->dlist; dl ; dl = dl->next)
-               if (dl->raiddisk == info->disk.raid_disk)
-                       break;
        info->disk.major = 0;
        info->disk.minor = 0;
-       if (dl) {
+       info->disk.state = 0;
+       if (dl && dl->pdnum >= 0) {
                info->disk.major = dl->major;
                info->disk.minor = dl->minor;
+               info->disk.raid_disk = cd + conf->sec_elmnt_seq
+                       * be16_to_cpu(conf->prim_elmnt_count);
+               info->disk.number = dl->pdnum;
+               info->disk.state = 0;
+               if (info->disk.number >= 0 &&
+                   (be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Online) &&
+                   !(be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Failed))
+                       info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+               info->events = be32_to_cpu(ddf->active->seq);
        }
-//     info->disk.number = __be32_to_cpu(ddf->disk.refnum);
-//     info->disk.raid_disk = find refnum in the table and use index;
-//     info->disk.state = ???;
 
        info->container_member = ddf->currentconf->vcnum;
 
        info->recovery_start = MaxSector;
        info->resync_start = 0;
        info->reshape_active = 0;
+       info->recovery_blocked = 0;
        if (!(ddf->virt->entries[info->container_member].state
              & DDF_state_inconsistent)  &&
            (ddf->virt->entries[info->container_member].init_state
@@ -1471,19 +2136,28 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
 
        info->array.major_version = -1;
        info->array.minor_version = -2;
-       sprintf(info->text_version, "/%s/%s",
-               devnum2devname(st->container_dev),
-               st->subarray);
-       info->safe_mode_delay = 200;
-
-       memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
-       info->name[16]=0;
-       for(j=0; j<16; j++)
-               if (info->name[j] == ' ')
-                       info->name[j] = 0;
+       sprintf(info->text_version, "/%s/%d",
+               st->container_devnm,
+               info->container_member);
+       info->safe_mode_delay = DDF_SAFE_MODE_DELAY;
+
+       _ddf_array_name(info->name, ddf, info->container_member);
+
+       if (map)
+               for (j = 0; j < map_disks; j++) {
+                       map[j] = 0;
+                       if (j <  info->array.raid_disks) {
+                               int i = find_phys(ddf, vc->conf.phys_refnum[j]);
+                               if (i >= 0 &&
+                                   (be16_to_cpu(ddf->phys->entries[i].state)
+                                    & DDF_Online) &&
+                                   !(be16_to_cpu(ddf->phys->entries[i].state)
+                                     & DDF_Failed))
+                                       map[i] = 1;
+                       }
+               }
 }
 
-
 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
                            char *update,
                            char *devname, int verbose,
@@ -1519,35 +2193,34 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info,
 //     struct virtual_entry *ve = find_ve(ddf);
 
        /* we don't need to handle "force-*" or "assemble" as
-        * there is no need to 'trick' the kernel.  We the metadata is
+        * there is no need to 'trick' the kernel.  When the metadata is
         * first updated to activate the array, all the implied modifications
         * will just happen.
         */
 
        if (strcmp(update, "grow") == 0) {
                /* FIXME */
-       }
-       if (strcmp(update, "resync") == 0) {
+       } else if (strcmp(update, "resync") == 0) {
 //             info->resync_checkpoint = 0;
-       }
-       /* We ignore UUID updates as they make even less sense
-        * with DDF
-        */
-       if (strcmp(update, "homehost") == 0) {
+       } else if (strcmp(update, "homehost") == 0) {
                /* homehost is stored in controller->vendor_data,
                 * or it is when we are the vendor
                 */
 //             if (info->vendor_is_local)
 //                     strcpy(ddf->controller.vendor_data, homehost);
-       }
-       if (strcmp(update, "name") == 0) {
+               rv = -1;
+       } else if (strcmp(update, "name") == 0) {
                /* name is stored in virtual_entry->name */
 //             memset(ve->name, ' ', 16);
 //             strncpy(ve->name, info->name, 16);
-       }
-       if (strcmp(update, "_reshape_progress") == 0) {
+               rv = -1;
+       } else if (strcmp(update, "_reshape_progress") == 0) {
                /* We don't support reshape yet */
-       }
+       } else if (strcmp(update, "assemble") == 0 ) {
+               /* Do nothing, just succeed */
+               rv = 0;
+       } else
+               rv = -1;
 
 //     update_all_csum(ddf);
 
@@ -1556,7 +2229,7 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info,
 
 static void make_header_guid(char *guid)
 {
-       __u32 stamp;
+       be32 stamp;
        /* Create a DDF Header of Virtual Disk GUID */
 
        /* 24 bytes of fiction required.
@@ -1565,26 +2238,60 @@ static void make_header_guid(char *guid)
         * Remaining 8 random number plus timestamp
         */
        memcpy(guid, T10, sizeof(T10));
-       stamp = __cpu_to_be32(0xdeadbeef);
+       stamp = cpu_to_be32(0xdeadbeef);
        memcpy(guid+8, &stamp, 4);
-       stamp = __cpu_to_be32(0);
+       stamp = cpu_to_be32(0);
        memcpy(guid+12, &stamp, 4);
-       stamp = __cpu_to_be32(time(0) - DECADE);
+       stamp = cpu_to_be32(time(0) - DECADE);
        memcpy(guid+16, &stamp, 4);
-       stamp = random32();
+       stamp._v32 = random32();
        memcpy(guid+20, &stamp, 4);
 }
 
-static int init_super_ddf_bvd(struct supertype *st,
-                             mdu_array_info_t *info,
-                             unsigned long long size,
-                             char *name, char *homehost,
-                             int *uuid);
+static unsigned int find_unused_vde(const struct ddf_super *ddf)
+{
+       unsigned int i;
+       for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
+               if (all_ff(ddf->virt->entries[i].guid))
+                       return i;
+       }
+       return DDF_NOTFOUND;
+}
+
+static unsigned int find_vde_by_name(const struct ddf_super *ddf,
+                                    const char *name)
+{
+       unsigned int i;
+       if (name == NULL)
+               return DDF_NOTFOUND;
+       for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
+               if (all_ff(ddf->virt->entries[i].guid))
+                       continue;
+               if (!strncmp(name, ddf->virt->entries[i].name,
+                            sizeof(ddf->virt->entries[i].name)))
+                       return i;
+       }
+       return DDF_NOTFOUND;
+}
+
+#ifndef MDASSEMBLE
+static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
+                                    const char *guid)
+{
+       unsigned int i;
+       if (guid == NULL || all_ff(guid))
+               return DDF_NOTFOUND;
+       for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++)
+               if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
+                       return i;
+       return DDF_NOTFOUND;
+}
+#endif
 
 static int init_super_ddf(struct supertype *st,
                          mdu_array_info_t *info,
                          unsigned long long size, char *name, char *homehost,
-                         int *uuid)
+                         int *uuid, unsigned long long data_offset)
 {
        /* This is primarily called by Create when creating a new array.
         * We will then get add_to_super called for each component, and then
@@ -1597,12 +2304,12 @@ static int init_super_ddf(struct supertype *st,
         * We need to create the entire 'ddf' structure which includes:
         *  DDF headers - these are easy.
         *  Controller data - a Sector describing this controller .. not that
-        *                  this is a controller exactly.
+        *                    this is a controller exactly.
         *  Physical Disk Record - one entry per device, so
-        *                      leave plenty of space.
+        *                         leave plenty of space.
         *  Virtual Disk Records - again, just leave plenty of space.
-        *                   This just lists VDs, doesn't give details
-        *  Config records - describes the VDs that use this disk
+        *                         This just lists VDs, doesn't give details.
+        *  Config records - describe the VDs that use this disk
         *  DiskData  - describes 'this' device.
         *  BadBlockManagement - empty
         *  Diag Space - empty
@@ -1621,15 +2328,14 @@ static int init_super_ddf(struct supertype *st,
        struct virtual_disk *vd;
 
        if (st->sb)
-               return init_super_ddf_bvd(st, info, size, name, homehost, uuid);
+               return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
+                                         data_offset);
 
        if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
-               fprintf(stderr, Name ": %s could not allocate superblock\n", __func__);
+               pr_err("could not allocate superblock\n");
                return 0;
        }
        memset(ddf, 0, sizeof(*ddf));
-       ddf->dlist = NULL; /* no physical disks yet */
-       ddf->conflist = NULL; /* No virtual disks yet */
        st->sb = ddf;
 
        if (info == NULL) {
@@ -1641,45 +2347,44 @@ static int init_super_ddf(struct supertype *st,
         * start 32MB from the end, and put the primary header there.
         * Don't do secondary for now.
         * We don't know exactly where that will be yet as it could be
-        * different on each device.  To just set up the lengths.
-        *
+        * different on each device.  So just set up the lengths.
         */
 
        ddf->anchor.magic = DDF_HEADER_MAGIC;
        make_header_guid(ddf->anchor.guid);
 
        memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
-       ddf->anchor.seq = __cpu_to_be32(1);
-       ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
+       ddf->anchor.seq = cpu_to_be32(1);
+       ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
        ddf->anchor.openflag = 0xFF;
        ddf->anchor.foreignflag = 0;
        ddf->anchor.enforcegroups = 0; /* Is this best?? */
        ddf->anchor.pad0 = 0xff;
        memset(ddf->anchor.pad1, 0xff, 12);
        memset(ddf->anchor.header_ext, 0xff, 32);
-       ddf->anchor.primary_lba = ~(__u64)0;
-       ddf->anchor.secondary_lba = ~(__u64)0;
+       ddf->anchor.primary_lba = cpu_to_be64(~(__u64)0);
+       ddf->anchor.secondary_lba = cpu_to_be64(~(__u64)0);
        ddf->anchor.type = DDF_HEADER_ANCHOR;
        memset(ddf->anchor.pad2, 0xff, 3);
-       ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
-       ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
-                                                 of 32M reserved.. */
-       max_phys_disks = 1023;   /* Should be enough */
-       ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
-       max_virt_disks = 255;
-       ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
-       ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
+       ddf->anchor.workspace_len = cpu_to_be32(32768); /* Must be reserved */
+       /* Put this at bottom of 32M reserved.. */
+       ddf->anchor.workspace_lba = cpu_to_be64(~(__u64)0);
+       max_phys_disks = 1023;   /* Should be enough, 4095 is also allowed */
+       ddf->anchor.max_pd_entries = cpu_to_be16(max_phys_disks);
+       max_virt_disks = 255; /* 15, 63, 255, 1024, 4095 are all allowed */
+       ddf->anchor.max_vd_entries = cpu_to_be16(max_virt_disks);
        ddf->max_part = 64;
-       ddf->mppe = 256;
+       ddf->anchor.max_partitions = cpu_to_be16(ddf->max_part);
+       ddf->mppe = 256; /* 16, 64, 256, 1024, 4096 are all allowed */
        ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
-       ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
-       ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
+       ddf->anchor.config_record_len = cpu_to_be16(ddf->conf_rec_len);
+       ddf->anchor.max_primary_element_entries = cpu_to_be16(ddf->mppe);
        memset(ddf->anchor.pad3, 0xff, 54);
-       /* controller sections is one sector long immediately
+       /* Controller section is one sector long immediately
         * after the ddf header */
        sector = 1;
-       ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
-       ddf->anchor.controller_section_length = __cpu_to_be32(1);
+       ddf->anchor.controller_section_offset = cpu_to_be32(sector);
+       ddf->anchor.controller_section_length = cpu_to_be32(1);
        sector += 1;
 
        /* phys is 8 sectors after that */
@@ -1690,9 +2395,9 @@ static int init_super_ddf(struct supertype *st,
        case 2: case 8: case 32: case 128: case 512: break;
        default: abort();
        }
-       ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
+       ddf->anchor.phys_section_offset = cpu_to_be32(sector);
        ddf->anchor.phys_section_length =
-               __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
+               cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
        sector += pdsize/512;
 
        /* virt is another 32 sectors */
@@ -1703,26 +2408,26 @@ static int init_super_ddf(struct supertype *st,
        case 2: case 8: case 32: case 128: case 512: break;
        default: abort();
        }
-       ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
+       ddf->anchor.virt_section_offset = cpu_to_be32(sector);
        ddf->anchor.virt_section_length =
-               __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
+               cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
        sector += vdsize/512;
 
        clen = ddf->conf_rec_len * (ddf->max_part+1);
-       ddf->anchor.config_section_offset = __cpu_to_be32(sector);
-       ddf->anchor.config_section_length = __cpu_to_be32(clen);
+       ddf->anchor.config_section_offset = cpu_to_be32(sector);
+       ddf->anchor.config_section_length = cpu_to_be32(clen);
        sector += clen;
 
-       ddf->anchor.data_section_offset = __cpu_to_be32(sector);
-       ddf->anchor.data_section_length = __cpu_to_be32(1);
+       ddf->anchor.data_section_offset = cpu_to_be32(sector);
+       ddf->anchor.data_section_length = cpu_to_be32(1);
        sector += 1;
 
-       ddf->anchor.bbm_section_length = __cpu_to_be32(0);
-       ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
-       ddf->anchor.diag_space_length = __cpu_to_be32(0);
-       ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
-       ddf->anchor.vendor_length = __cpu_to_be32(0);
-       ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
+       ddf->anchor.bbm_section_length = cpu_to_be32(0);
+       ddf->anchor.bbm_section_offset = cpu_to_be32(0xFFFFFFFF);
+       ddf->anchor.diag_space_length = cpu_to_be32(0);
+       ddf->anchor.diag_space_offset = cpu_to_be32(0xFFFFFFFF);
+       ddf->anchor.vendor_length = cpu_to_be32(0);
+       ddf->anchor.vendor_offset = cpu_to_be32(0xFFFFFFFF);
 
        memset(ddf->anchor.pad4, 0xff, 256);
 
@@ -1751,10 +2456,10 @@ static int init_super_ddf(struct supertype *st,
        for (i = strlen(T10) ; i+hostlen < 24; i++)
                ddf->controller.guid[i] = ' ';
 
-       ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
-       ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
-       ddf->controller.type.sub_vendor_id = 0;
-       ddf->controller.type.sub_device_id = 0;
+       ddf->controller.type.vendor_id = cpu_to_be16(0xDEAD);
+       ddf->controller.type.device_id = cpu_to_be16(0xBEEF);
+       ddf->controller.type.sub_vendor_id = cpu_to_be16(0);
+       ddf->controller.type.sub_device_id = cpu_to_be16(0);
        memcpy(ddf->controller.product_id, "What Is My PID??", 16);
        memset(ddf->controller.pad, 0xff, 8);
        memset(ddf->controller.vendor_data, 0xff, 448);
@@ -1762,7 +2467,7 @@ static int init_super_ddf(struct supertype *st,
                strcpy((char*)ddf->controller.vendor_data, homehost);
 
        if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
-               fprintf(stderr, Name ": %s could not allocate pd\n", __func__);
+               pr_err("could not allocate pd\n");
                return 0;
        }
        ddf->phys = pd;
@@ -1771,27 +2476,29 @@ static int init_super_ddf(struct supertype *st,
        memset(pd, 0xff, pdsize);
        memset(pd, 0, sizeof(*pd));
        pd->magic = DDF_PHYS_RECORDS_MAGIC;
-       pd->used_pdes = __cpu_to_be16(0);
-       pd->max_pdes = __cpu_to_be16(max_phys_disks);
+       pd->used_pdes = cpu_to_be16(0);
+       pd->max_pdes = cpu_to_be16(max_phys_disks);
        memset(pd->pad, 0xff, 52);
+       for (i = 0; i < max_phys_disks; i++)
+               memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
 
        if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
-               fprintf(stderr, Name ": %s could not allocate vd\n", __func__);
+               pr_err("could not allocate vd\n");
                return 0;
        }
        ddf->virt = vd;
        ddf->vdsize = vdsize;
        memset(vd, 0, vdsize);
        vd->magic = DDF_VIRT_RECORDS_MAGIC;
-       vd->populated_vdes = __cpu_to_be16(0);
-       vd->max_vdes = __cpu_to_be16(max_virt_disks);
+       vd->populated_vdes = cpu_to_be16(0);
+       vd->max_vdes = cpu_to_be16(max_virt_disks);
        memset(vd->pad, 0xff, 52);
 
        for (i=0; i<max_virt_disks; i++)
                memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
 
        st->sb = ddf;
-       ddf->updates_pending = 1;
+       ddf_set_updates_pending(ddf, NULL);
        return 1;
 }
 
@@ -1800,100 +2507,6 @@ static int chunk_to_shift(int chunksize)
        return ffs(chunksize/512)-1;
 }
 
-static int level_to_prl(int level)
-{
-       switch (level) {
-       case LEVEL_LINEAR: return DDF_CONCAT;
-       case 0: return DDF_RAID0;
-       case 1: return DDF_RAID1;
-       case 4: return DDF_RAID4;
-       case 5: return DDF_RAID5;
-       case 6: return DDF_RAID6;
-       default: return -1;
-       }
-}
-static int layout_to_rlq(int level, int layout, int raiddisks)
-{
-       switch(level) {
-       case 0:
-               return DDF_RAID0_SIMPLE;
-       case 1:
-               switch(raiddisks) {
-               case 2: return DDF_RAID1_SIMPLE;
-               case 3: return DDF_RAID1_MULTI;
-               default: return -1;
-               }
-       case 4:
-               switch(layout) {
-               case 0: return DDF_RAID4_N;
-               }
-               break;
-       case 5:
-               switch(layout) {
-               case ALGORITHM_LEFT_ASYMMETRIC:
-                       return DDF_RAID5_N_RESTART;
-               case ALGORITHM_RIGHT_ASYMMETRIC:
-                       return DDF_RAID5_0_RESTART;
-               case ALGORITHM_LEFT_SYMMETRIC:
-                       return DDF_RAID5_N_CONTINUE;
-               case ALGORITHM_RIGHT_SYMMETRIC:
-                       return -1; /* not mentioned in standard */
-               }
-       case 6:
-               switch(layout) {
-               case ALGORITHM_ROTATING_N_RESTART:
-                       return DDF_RAID5_N_RESTART;
-               case ALGORITHM_ROTATING_ZERO_RESTART:
-                       return DDF_RAID6_0_RESTART;
-               case ALGORITHM_ROTATING_N_CONTINUE:
-                       return DDF_RAID5_N_CONTINUE;
-               }
-       }
-       return -1;
-}
-
-static int rlq_to_layout(int rlq, int prl, int raiddisks)
-{
-       switch(prl) {
-       case DDF_RAID0:
-               return 0; /* hopefully rlq == DDF_RAID0_SIMPLE */
-       case DDF_RAID1:
-               return 0; /* hopefully rlq == SIMPLE or MULTI depending
-                            on raiddisks*/
-       case DDF_RAID4:
-               switch(rlq) {
-               case DDF_RAID4_N:
-                       return 0;
-               default:
-                       /* not supported */
-                       return -1; /* FIXME this isn't checked */
-               }
-       case DDF_RAID5:
-               switch(rlq) {
-               case DDF_RAID5_N_RESTART:
-                       return ALGORITHM_LEFT_ASYMMETRIC;
-               case DDF_RAID5_0_RESTART:
-                       return ALGORITHM_RIGHT_ASYMMETRIC;
-               case DDF_RAID5_N_CONTINUE:
-                       return ALGORITHM_LEFT_SYMMETRIC;
-               default:
-                       return -1;
-               }
-       case DDF_RAID6:
-               switch(rlq) {
-               case DDF_RAID5_N_RESTART:
-                       return ALGORITHM_ROTATING_N_RESTART;
-               case DDF_RAID6_0_RESTART:
-                       return ALGORITHM_ROTATING_ZERO_RESTART;
-               case DDF_RAID5_N_CONTINUE:
-                       return ALGORITHM_ROTATING_N_CONTINUE;
-               default:
-                       return -1;
-               }
-       }
-       return -1;
-}
-
 #ifndef MDASSEMBLE
 struct extent {
        unsigned long long start, size;
@@ -1911,85 +2524,115 @@ static int cmp_extent(const void *av, const void *bv)
 
 static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
 {
-       /* find a list of used extents on the give physical device
+       /* Find a list of used extents on the given physical device
         * (dnum) of the given ddf.
         * Return a malloced array of 'struct extent'
-
-FIXME ignore DDF_Legacy devices?
-
         */
        struct extent *rv;
        int n = 0;
-       unsigned int i, j;
+       unsigned int i;
+       __u16 state;
 
-       rv = malloc(sizeof(struct extent) * (ddf->max_part + 2));
-       if (!rv)
+       if (dl->pdnum < 0)
                return NULL;
+       state = be16_to_cpu(ddf->phys->entries[dl->pdnum].state);
+
+       if ((state & (DDF_Online|DDF_Failed|DDF_Missing)) != DDF_Online)
+               return NULL;
+
+       rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
 
        for (i = 0; i < ddf->max_part; i++) {
+               const struct vd_config *bvd;
+               unsigned int ibvd;
                struct vcl *v = dl->vlist[i];
-               if (v == NULL)
+               if (v == NULL ||
+                   get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
+                                            &bvd, &ibvd) == DDF_NOTFOUND)
                        continue;
-               for (j = 0; j < v->conf.prim_elmnt_count; j++)
-                       if (v->conf.phys_refnum[j] == dl->disk.refnum) {
-                               /* This device plays role 'j' in  'v'. */
-                               rv[n].start = __be64_to_cpu(v->lba_offset[j]);
-                               rv[n].size = __be64_to_cpu(v->conf.blocks);
-                               n++;
-                               break;
-                       }
+               rv[n].start = be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
+               rv[n].size = be64_to_cpu(bvd->blocks);
+               n++;
        }
        qsort(rv, n, sizeof(*rv), cmp_extent);
 
-       rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
+       rv[n].start = be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
        rv[n].size = 0;
        return rv;
 }
+
+static unsigned long long find_space(
+       struct ddf_super *ddf, struct dl *dl,
+       unsigned long long data_offset,
+       unsigned long long *size)
+{
+       /* Find if the requested amount of space is available.
+        * If it is, return start.
+        * If not, set *size to largest space.
+        * If data_offset != INVALID_SECTORS, then the space must start
+        * at this location.
+        */
+       struct extent *e = get_extents(ddf, dl);
+       int i = 0;
+       unsigned long long pos = 0;
+       unsigned long long max_size = 0;
+
+       if (!e) {
+               *size = 0;
+               return INVALID_SECTORS;
+       }
+       do {
+               unsigned long long esize = e[i].start - pos;
+               if (data_offset != INVALID_SECTORS &&
+                   pos <= data_offset &&
+                   e[i].start > data_offset) {
+                       pos = data_offset;
+                       esize = e[i].start - pos;
+               }
+               if (data_offset != INVALID_SECTORS &&
+                   pos != data_offset) {
+                       i++;
+                       continue;
+               }
+               if (esize >= *size) {
+                       /* Found! */
+                       free(e);
+                       return pos;
+               }
+               if (esize > max_size)
+                       max_size = esize;
+               pos = e[i].start + e[i].size;
+               i++;
+       } while (e[i-1].size);
+       *size = max_size;
+       free(e);
+       return INVALID_SECTORS;
+}
 #endif
 
 static int init_super_ddf_bvd(struct supertype *st,
                              mdu_array_info_t *info,
                              unsigned long long size,
                              char *name, char *homehost,
-                             int *uuid)
+                             int *uuid, unsigned long long data_offset)
 {
        /* We are creating a BVD inside a pre-existing container.
         * so st->sb is already set.
         * We need to create a new vd_config and a new virtual_entry
         */
        struct ddf_super *ddf = st->sb;
-       unsigned int venum;
+       unsigned int venum, i;
        struct virtual_entry *ve;
        struct vcl *vcl;
        struct vd_config *vc;
 
-       if (__be16_to_cpu(ddf->virt->populated_vdes)
-           >= __be16_to_cpu(ddf->virt->max_vdes)) {
-               fprintf(stderr, Name": This ddf already has the "
-                       "maximum of %d virtual devices\n",
-                       __be16_to_cpu(ddf->virt->max_vdes));
+       if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
+               pr_err("This ddf already has an array called %s\n", name);
                return 0;
        }
-
-       if (name)
-               for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
-                       if (!all_ff(ddf->virt->entries[venum].guid)) {
-                               char *n = ddf->virt->entries[venum].name;
-
-                               if (strncmp(name, n, 16) == 0) {
-                                       fprintf(stderr, Name ": This ddf already"
-                                               " has an array called %s\n",
-                                               name);
-                                       return 0;
-                               }
-                       }
-
-       for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
-               if (all_ff(ddf->virt->entries[venum].guid))
-                       break;
-       if (venum == __be16_to_cpu(ddf->virt->max_vdes)) {
-               fprintf(stderr, Name ": Cannot find spare slot for "
-                       "virtual disk - DDF is corrupt\n");
+       venum = find_unused_vde(ddf);
+       if (venum == DDF_NOTFOUND) {
+               pr_err("Cannot find spare slot for virtual disk\n");
                return 0;
        }
        ve = &ddf->virt->entries[venum];
@@ -1998,10 +2641,11 @@ static int init_super_ddf_bvd(struct supertype *st,
         * timestamp, random number
         */
        make_header_guid(ve->guid);
-       ve->unit = __cpu_to_be16(info->md_minor);
+       ve->unit = cpu_to_be16(info->md_minor);
        ve->pad0 = 0xFFFF;
-       ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
-       ve->type = 0;
+       ve->guid_crc._v16 = crc32(0, (unsigned char *)ddf->anchor.guid,
+                                 DDF_GUID_LEN);
+       ve->type = cpu_to_be16(0);
        ve->state = DDF_state_degraded; /* Will be modified as devices are added */
        if (info->state & 1) /* clean */
                ve->init_state = DDF_init_full;
@@ -2013,46 +2657,50 @@ static int init_super_ddf_bvd(struct supertype *st,
        if (name)
                strncpy(ve->name, name, 16);
        ddf->virt->populated_vdes =
-               __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
+               cpu_to_be16(be16_to_cpu(ddf->virt->populated_vdes)+1);
 
        /* Now create a new vd_config */
        if (posix_memalign((void**)&vcl, 512,
                           (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
-               fprintf(stderr, Name ": %s could not allocate vd_config\n", __func__);
+               pr_err("could not allocate vd_config\n");
                return 0;
        }
-       vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
        vcl->vcnum = venum;
-       sprintf(st->subarray, "%d", venum);
        vcl->block_sizes = NULL; /* FIXME not for CONCAT */
-
        vc = &vcl->conf;
 
        vc->magic = DDF_VD_CONF_MAGIC;
        memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
-       vc->timestamp = __cpu_to_be32(time(0)-DECADE);
-       vc->seqnum = __cpu_to_be32(1);
+       vc->timestamp = cpu_to_be32(time(0)-DECADE);
+       vc->seqnum = cpu_to_be32(1);
        memset(vc->pad0, 0xff, 24);
-       vc->prim_elmnt_count = __cpu_to_be16(info->raid_disks);
        vc->chunk_shift = chunk_to_shift(info->chunk_size);
-       vc->prl = level_to_prl(info->level);
-       vc->rlq = layout_to_rlq(info->level, info->layout, info->raid_disks);
-       vc->sec_elmnt_count = 1;
+       if (layout_md2ddf(info, vc) == -1 ||
+               be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
+               pr_err("unsupported RAID level/layout %d/%d with %d disks\n",
+                      info->level, info->layout, info->raid_disks);
+               free(vcl);
+               return 0;
+       }
        vc->sec_elmnt_seq = 0;
-       vc->srl = 0;
-       vc->blocks = __cpu_to_be64(info->size * 2);
-       vc->array_blocks = __cpu_to_be64(
+       if (alloc_other_bvds(ddf, vcl) != 0) {
+               pr_err("could not allocate other bvds\n");
+               free(vcl);
+               return 0;
+       }
+       vc->blocks = cpu_to_be64(info->size * 2);
+       vc->array_blocks = cpu_to_be64(
                calc_array_size(info->level, info->raid_disks, info->layout,
                                info->chunk_size, info->size*2));
        memset(vc->pad1, 0xff, 8);
-       vc->spare_refs[0] = 0xffffffff;
-       vc->spare_refs[1] = 0xffffffff;
-       vc->spare_refs[2] = 0xffffffff;
-       vc->spare_refs[3] = 0xffffffff;
-       vc->spare_refs[4] = 0xffffffff;
-       vc->spare_refs[5] = 0xffffffff;
-       vc->spare_refs[6] = 0xffffffff;
-       vc->spare_refs[7] = 0xffffffff;
+       vc->spare_refs[0] = cpu_to_be32(0xffffffff);
+       vc->spare_refs[1] = cpu_to_be32(0xffffffff);
+       vc->spare_refs[2] = cpu_to_be32(0xffffffff);
+       vc->spare_refs[3] = cpu_to_be32(0xffffffff);
+       vc->spare_refs[4] = cpu_to_be32(0xffffffff);
+       vc->spare_refs[5] = cpu_to_be32(0xffffffff);
+       vc->spare_refs[6] = cpu_to_be32(0xffffffff);
+       vc->spare_refs[7] = cpu_to_be32(0xffffffff);
        memset(vc->cache_pol, 0, 8);
        vc->bg_rate = 0x80;
        memset(vc->pad2, 0xff, 3);
@@ -2067,18 +2715,24 @@ static int init_super_ddf_bvd(struct supertype *st,
        memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
        memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
 
+       for (i = 1; i < vc->sec_elmnt_count; i++) {
+               memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
+               vcl->other_bvds[i-1]->sec_elmnt_seq = i;
+       }
+
        vcl->next = ddf->conflist;
        ddf->conflist = vcl;
        ddf->currentconf = vcl;
-       ddf->updates_pending = 1;
+       ddf_set_updates_pending(ddf, NULL);
        return 1;
 }
 
 #ifndef MDASSEMBLE
 static void add_to_super_ddf_bvd(struct supertype *st,
-                                mdu_disk_info_t *dk, int fd, char *devname)
+                                mdu_disk_info_t *dk, int fd, char *devname,
+                                unsigned long long data_offset)
 {
-       /* fd and devname identify a device with-in the ddf container (st).
+       /* fd and devname identify a device within the ddf container (st).
         * dk identifies a location in the new BVD.
         * We need to find suitable free space in that device and update
         * the phys_refnum and lba_offset for the newly created vd_config.
@@ -2091,11 +2745,9 @@ static void add_to_super_ddf_bvd(struct supertype *st,
        struct dl *dl;
        struct ddf_super *ddf = st->sb;
        struct vd_config *vc;
-       __u64 *lba_offset;
-       unsigned int working;
        unsigned int i;
-       unsigned long long blocks, pos, esize;
-       struct extent *ex;
+       unsigned long long blocks, pos;
+       unsigned int raid_disk = dk->raid_disk;
 
        if (fd == -1) {
                for (dl = ddf->dlist; dl ; dl = dl->next)
@@ -2107,36 +2759,28 @@ static void add_to_super_ddf_bvd(struct supertype *st,
                            dl->minor == dk->minor)
                                break;
        }
-       if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
+       if (!dl || dl->pdnum < 0 || ! (dk->state & (1<<MD_DISK_SYNC)))
                return;
 
        vc = &ddf->currentconf->conf;
-       lba_offset = ddf->currentconf->lba_offset;
-
-       ex = get_extents(ddf, dl);
-       if (!ex)
-               return;
+       if (vc->sec_elmnt_count > 1) {
+               unsigned int n = be16_to_cpu(vc->prim_elmnt_count);
+               if (raid_disk >= n)
+                       vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
+               raid_disk %= n;
+       }
 
-       i = 0; pos = 0;
-       blocks = __be64_to_cpu(vc->blocks);
+       blocks = be64_to_cpu(vc->blocks);
        if (ddf->currentconf->block_sizes)
                blocks = ddf->currentconf->block_sizes[dk->raid_disk];
 
-       do {
-               esize = ex[i].start - pos;
-               if (esize >= blocks)
-                       break;
-               pos = ex[i].start + ex[i].size;
-               i++;
-       } while (ex[i-1].size);
-
-       free(ex);
-       if (esize < blocks)
+       pos = find_space(ddf, dl, data_offset, &blocks);
+       if (pos == INVALID_SECTORS)
                return;
 
        ddf->currentdev = dk->raid_disk;
-       vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
-       lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
+       vc->phys_refnum[raid_disk] = dl->disk.refnum;
+       LBA_OFFSET(ddf, vc)[raid_disk] = cpu_to_be64(pos);
 
        for (i = 0; i < ddf->max_part ; i++)
                if (dl->vlist[i] == NULL)
@@ -2150,38 +2794,64 @@ static void add_to_super_ddf_bvd(struct supertype *st,
        if (devname)
                dl->devname = devname;
 
-       /* Check how many working raid_disks, and if we can mark
-        * array as optimal yet
-        */
-       working = 0;
+       /* Check if we can mark array as optimal yet */
+       i = ddf->currentconf->vcnum;
+       ddf->virt->entries[i].state =
+               (ddf->virt->entries[i].state & ~DDF_state_mask)
+               | get_svd_state(ddf, ddf->currentconf);
+       be16_clear(ddf->phys->entries[dl->pdnum].type,
+                  cpu_to_be16(DDF_Global_Spare));
+       be16_set(ddf->phys->entries[dl->pdnum].type,
+                cpu_to_be16(DDF_Active_in_VD));
+       dprintf("added disk %d/%08x to VD %d/%s as disk %d\n",
+               dl->pdnum, be32_to_cpu(dl->disk.refnum),
+               ddf->currentconf->vcnum, guid_str(vc->guid),
+               dk->raid_disk);
+       ddf_set_updates_pending(ddf, vc);
+}
 
-       for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
-               if (vc->phys_refnum[i] != 0xffffffff)
-                       working++;
+static unsigned int find_unused_pde(const struct ddf_super *ddf)
+{
+       unsigned int i;
+       for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++) {
+               if (all_ff(ddf->phys->entries[i].guid))
+                       return i;
+       }
+       return DDF_NOTFOUND;
+}
 
-       /* Find which virtual_entry */
-       i = ddf->currentconf->vcnum;
-       if (working == __be16_to_cpu(vc->prim_elmnt_count))
-               ddf->virt->entries[i].state =
-                       (ddf->virt->entries[i].state & ~DDF_state_mask)
-                       | DDF_state_optimal;
-
-       if (vc->prl == DDF_RAID6 &&
-           working+1 == __be16_to_cpu(vc->prim_elmnt_count))
-               ddf->virt->entries[i].state =
-                       (ddf->virt->entries[i].state & ~DDF_state_mask)
-                       | DDF_state_part_optimal;
-
-       ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
-       ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
-       ddf->updates_pending = 1;
+static void _set_config_size(struct phys_disk_entry *pde, const struct dl *dl)
+{
+       __u64 cfs, t;
+       cfs = min(dl->size - 32*1024*2ULL, be64_to_cpu(dl->primary_lba));
+       t = be64_to_cpu(dl->secondary_lba);
+       if (t != ~(__u64)0)
+               cfs = min(cfs, t);
+       /*
+        * Some vendor DDF structures interpret workspace_lba
+        * very differently than we do: Make a sanity check on the value.
+        */
+       t = be64_to_cpu(dl->workspace_lba);
+       if (t < cfs) {
+               __u64 wsp = cfs - t;
+               if (wsp > 1024*1024*2ULL && wsp > dl->size / 16) {
+                       pr_err("%x:%x: workspace size 0x%llx too big, ignoring\n",
+                              dl->major, dl->minor, (unsigned long long)wsp);
+               } else
+                       cfs = t;
+       }
+       pde->config_size = cpu_to_be64(cfs);
+       dprintf("%x:%x config_size %llx, DDF structure is %llx blocks\n",
+               dl->major, dl->minor,
+               (unsigned long long)cfs, (unsigned long long)(dl->size-cfs));
 }
 
-/* add a device to a container, either while creating it or while
+/* Add a device to a container, either while creating it or while
  * expanding a pre-existing container
  */
 static int add_to_super_ddf(struct supertype *st,
-                            mdu_disk_info_t *dk, int fd, char *devname)
+                           mdu_disk_info_t *dk, int fd, char *devname,
+                           unsigned long long data_offset)
 {
        struct ddf_super *ddf = st->sb;
        struct dl *dd;
@@ -2191,9 +2861,10 @@ static int add_to_super_ddf(struct supertype *st,
        struct phys_disk_entry *pde;
        unsigned int n, i;
        struct stat stb;
+       __u32 *tptr;
 
        if (ddf->currentconf) {
-               add_to_super_ddf_bvd(st, dk, fd, devname);
+               add_to_super_ddf_bvd(st, dk, fd, devname, data_offset);
                return 0;
        }
 
@@ -2201,11 +2872,22 @@ static int add_to_super_ddf(struct supertype *st,
         * a phys_disk entry and a more detailed disk_data entry.
         */
        fstat(fd, &stb);
+       n = find_unused_pde(ddf);
+       if (n == DDF_NOTFOUND) {
+               pr_err("No free slot in array, cannot add disk\n");
+               return 1;
+       }
+       pde = &ddf->phys->entries[n];
+       get_dev_size(fd, NULL, &size);
+       if (size <= 32*1024*1024) {
+               pr_err("device size must be at least 32MB\n");
+               return 1;
+       }
+       size >>= 9;
+
        if (posix_memalign((void**)&dd, 512,
                           sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
-               fprintf(stderr, Name
-                       ": %s could allocate buffer for new disk, aborting\n",
-                       __func__);
+               pr_err("could allocate buffer for new disk, aborting\n");
                return 1;
        }
        dd->major = major(stb.st_rdev);
@@ -2219,15 +2901,17 @@ static int add_to_super_ddf(struct supertype *st,
        tm = localtime(&now);
        sprintf(dd->disk.guid, "%8s%04d%02d%02d",
                T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
-       *(__u32*)(dd->disk.guid + 16) = random32();
-       *(__u32*)(dd->disk.guid + 20) = random32();
+       tptr = (__u32 *)(dd->disk.guid + 16);
+       *tptr++ = random32();
+       *tptr = random32();
 
        do {
                /* Cannot be bothered finding a CRC of some irrelevant details*/
-               dd->disk.refnum = random32();
-               for (i = __be16_to_cpu(ddf->active->max_pd_entries);
+               dd->disk.refnum._v32 = random32();
+               for (i = be16_to_cpu(ddf->active->max_pd_entries);
                     i > 0; i--)
-                       if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
+                       if (be32_eq(ddf->phys->entries[i-1].refnum,
+                                   dd->disk.refnum))
                                break;
        } while (i > 0);
 
@@ -2239,8 +2923,6 @@ static int add_to_super_ddf(struct supertype *st,
        for (i = 0; i < ddf->max_part ; i++)
                dd->vlist[i] = NULL;
 
-       n = __be16_to_cpu(ddf->phys->used_pdes);
-       pde = &ddf->phys->entries[n];
        dd->pdnum = n;
 
        if (st->update_tail) {
@@ -2248,160 +2930,270 @@ static int add_to_super_ddf(struct supertype *st,
                           sizeof(struct phys_disk_entry));
                struct phys_disk *pd;
 
-               pd = malloc(len);
+               pd = xmalloc(len);
                pd->magic = DDF_PHYS_RECORDS_MAGIC;
-               pd->used_pdes = __cpu_to_be16(n);
+               pd->used_pdes = cpu_to_be16(n);
                pde = &pd->entries[0];
                dd->mdupdate = pd;
-       } else {
-               n++;
-               ddf->phys->used_pdes = __cpu_to_be16(n);
-       }
+       } else
+               ddf->phys->used_pdes = cpu_to_be16(
+                       1 + be16_to_cpu(ddf->phys->used_pdes));
 
        memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
        pde->refnum = dd->disk.refnum;
-       pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
-       pde->state = __cpu_to_be16(DDF_Online);
-       get_dev_size(fd, NULL, &size);
-       /* We are required to reserve 32Meg, and record the size in sectors */
-       pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
+       pde->type = cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
+       pde->state = cpu_to_be16(DDF_Online);
+       dd->size = size;
+       /*
+        * If there is already a device in dlist, try to reserve the same
+        * amount of workspace. Otherwise, use 32MB.
+        * We checked disk size above already.
+        */
+#define __calc_lba(new, old, lba, mb) do { \
+               unsigned long long dif; \
+               if ((old) != NULL) \
+                       dif = (old)->size - be64_to_cpu((old)->lba); \
+               else \
+                       dif = (new)->size; \
+               if ((new)->size > dif) \
+                       (new)->lba = cpu_to_be64((new)->size - dif); \
+               else \
+                       (new)->lba = cpu_to_be64((new)->size - (mb*1024*2)); \
+       } while (0)
+       __calc_lba(dd, ddf->dlist, workspace_lba, 32);
+       __calc_lba(dd, ddf->dlist, primary_lba, 16);
+       if (ddf->dlist == NULL ||
+           be64_to_cpu(ddf->dlist->secondary_lba) != ~(__u64)0)
+               __calc_lba(dd, ddf->dlist, secondary_lba, 32);
+       _set_config_size(pde, dd);
+
        sprintf(pde->path, "%17.17s","Information: nil") ;
        memset(pde->pad, 0xff, 6);
 
-       dd->size = size >> 9;
        if (st->update_tail) {
                dd->next = ddf->add_list;
                ddf->add_list = dd;
        } else {
                dd->next = ddf->dlist;
                ddf->dlist = dd;
-               ddf->updates_pending = 1;
+               ddf_set_updates_pending(ddf, NULL);
        }
 
        return 0;
 }
 
-/*
+static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
+{
+       struct ddf_super *ddf = st->sb;
+       struct dl *dl;
+
+       /* mdmon has noticed that this disk (dk->major/dk->minor) has
+        * disappeared from the container.
+        * We need to arrange that it disappears from the metadata and
+        * internal data structures too.
+        * Most of the work is done by ddf_process_update which edits
+        * the metadata and closes the file handle and attaches the memory
+        * where free_updates will free it.
+        */
+       for (dl = ddf->dlist; dl ; dl = dl->next)
+               if (dl->major == dk->major &&
+                   dl->minor == dk->minor)
+                       break;
+       if (!dl || dl->pdnum < 0)
+               return -1;
+
+       if (st->update_tail) {
+               int len = (sizeof(struct phys_disk) +
+                          sizeof(struct phys_disk_entry));
+               struct phys_disk *pd;
+
+               pd = xmalloc(len);
+               pd->magic = DDF_PHYS_RECORDS_MAGIC;
+               pd->used_pdes = cpu_to_be16(dl->pdnum);
+               pd->entries[0].state = cpu_to_be16(DDF_Missing);
+               append_metadata_update(st, pd, len);
+       }
+       return 0;
+}
+#endif
+
+/*
  * This is the write_init_super method for a ddf container.  It is
  * called when creating a container or adding another device to a
  * container.
  */
 
-static unsigned char null_conf[4096+512];
+static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
+{
+       unsigned long long sector;
+       struct ddf_header *header;
+       int fd, i, n_config, conf_size, buf_size;
+       int ret = 0;
+       char *conf;
+
+       fd = d->fd;
+
+       switch (type) {
+       case DDF_HEADER_PRIMARY:
+               header = &ddf->primary;
+               sector = be64_to_cpu(header->primary_lba);
+               break;
+       case DDF_HEADER_SECONDARY:
+               header = &ddf->secondary;
+               sector = be64_to_cpu(header->secondary_lba);
+               break;
+       default:
+               return 0;
+       }
+       if (sector == ~(__u64)0)
+               return 0;
+
+       header->type = type;
+       header->openflag = 1;
+       header->crc = calc_crc(header, 512);
+
+       lseek64(fd, sector<<9, 0);
+       if (write(fd, header, 512) < 0)
+               goto out;
+
+       ddf->controller.crc = calc_crc(&ddf->controller, 512);
+       if (write(fd, &ddf->controller, 512) < 0)
+               goto out;
+
+       ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
+       if (write(fd, ddf->phys, ddf->pdsize) < 0)
+               goto out;
+       ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
+       if (write(fd, ddf->virt, ddf->vdsize) < 0)
+               goto out;
+
+       /* Now write lots of config records. */
+       n_config = ddf->max_part;
+       conf_size = ddf->conf_rec_len * 512;
+       conf = ddf->conf;
+       buf_size = conf_size * (n_config + 1);
+       if (!conf) {
+               if (posix_memalign((void**)&conf, 512, buf_size) != 0)
+                       goto out;
+               ddf->conf = conf;
+       }
+       for (i = 0 ; i <= n_config ; i++) {
+               struct vcl *c;
+               struct vd_config *vdc = NULL;
+               if (i == n_config) {
+                       c = (struct vcl *)d->spare;
+                       if (c)
+                               vdc = &c->conf;
+               } else {
+                       unsigned int dummy;
+                       c = d->vlist[i];
+                       if (c)
+                               get_pd_index_from_refnum(
+                                       c, d->disk.refnum,
+                                       ddf->mppe,
+                                       (const struct vd_config **)&vdc,
+                                       &dummy);
+               }
+               if (vdc) {
+                       dprintf("writing conf record %i on disk %08x for %s/%u\n",
+                               i, be32_to_cpu(d->disk.refnum),
+                               guid_str(vdc->guid),
+                               vdc->sec_elmnt_seq);
+                       vdc->crc = calc_crc(vdc, conf_size);
+                       memcpy(conf + i*conf_size, vdc, conf_size);
+               } else
+                       memset(conf + i*conf_size, 0xff, conf_size);
+       }
+       if (write(fd, conf, buf_size) != buf_size)
+               goto out;
+
+       d->disk.crc = calc_crc(&d->disk, 512);
+       if (write(fd, &d->disk, 512) < 0)
+               goto out;
+
+       ret = 1;
+out:
+       header->openflag = 0;
+       header->crc = calc_crc(header, 512);
+
+       lseek64(fd, sector<<9, 0);
+       if (write(fd, header, 512) < 0)
+               ret = 0;
 
-static int __write_init_super_ddf(struct supertype *st, int do_close)
+       return ret;
+}
+
+static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
 {
+       unsigned long long size;
+       int fd = d->fd;
+       if (fd < 0)
+               return 0;
+
+       /* We need to fill in the primary, (secondary) and workspace
+        * lba's in the headers, set their checksums,
+        * Also checksum phys, virt....
+        *
+        * Then write everything out, finally the anchor is written.
+        */
+       get_dev_size(fd, NULL, &size);
+       size /= 512;
+       memcpy(&ddf->anchor, ddf->active, 512);
+       if (be64_to_cpu(d->workspace_lba) != 0ULL)
+               ddf->anchor.workspace_lba = d->workspace_lba;
+       else
+               ddf->anchor.workspace_lba =
+                       cpu_to_be64(size - 32*1024*2);
+       if (be64_to_cpu(d->primary_lba) != 0ULL)
+               ddf->anchor.primary_lba = d->primary_lba;
+       else
+               ddf->anchor.primary_lba =
+                       cpu_to_be64(size - 16*1024*2);
+       if (be64_to_cpu(d->secondary_lba) != 0ULL)
+               ddf->anchor.secondary_lba = d->secondary_lba;
+       else
+               ddf->anchor.secondary_lba =
+                       cpu_to_be64(size - 32*1024*2);
+       ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
+       memcpy(&ddf->primary, &ddf->anchor, 512);
+       memcpy(&ddf->secondary, &ddf->anchor, 512);
+
+       ddf->anchor.type = DDF_HEADER_ANCHOR;
+       ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
+       ddf->anchor.seq = cpu_to_be32(0xFFFFFFFF); /* no sequencing in anchor */
+       ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
+
+       if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
+               return 0;
+
+       if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
+               return 0;
+
+       lseek64(fd, (size-1)*512, SEEK_SET);
+       if (write(fd, &ddf->anchor, 512) < 0)
+               return 0;
+
+       return 1;
+}
 
+#ifndef MDASSEMBLE
+static int __write_init_super_ddf(struct supertype *st)
+{
        struct ddf_super *ddf = st->sb;
-       int i;
        struct dl *d;
-       int n_config;
-       int conf_size;
        int attempts = 0;
        int successes = 0;
-       unsigned long long size, sector;
+
+       pr_state(ddf, __func__);
 
        /* try to write updated metadata,
         * if we catch a failure move on to the next disk
         */
        for (d = ddf->dlist; d; d=d->next) {
-               int fd = d->fd;
-
-               if (fd < 0)
-                       continue;
-
                attempts++;
-               /* We need to fill in the primary, (secondary) and workspace
-                * lba's in the headers, set their checksums,
-                * Also checksum phys, virt....
-                *
-                * Then write everything out, finally the anchor is written.
-                */
-               get_dev_size(fd, NULL, &size);
-               size /= 512;
-               ddf->anchor.workspace_lba = __cpu_to_be64(size - 32*1024*2);
-               ddf->anchor.primary_lba = __cpu_to_be64(size - 16*1024*2);
-               ddf->anchor.seq = __cpu_to_be32(1);
-               memcpy(&ddf->primary, &ddf->anchor, 512);
-               memcpy(&ddf->secondary, &ddf->anchor, 512);
-
-               ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
-               ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
-               ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
-
-               ddf->primary.openflag = 0;
-               ddf->primary.type = DDF_HEADER_PRIMARY;
-
-               ddf->secondary.openflag = 0;
-               ddf->secondary.type = DDF_HEADER_SECONDARY;
-
-               ddf->primary.crc = calc_crc(&ddf->primary, 512);
-               ddf->secondary.crc = calc_crc(&ddf->secondary, 512);
-
-               sector = size - 16*1024*2;
-               lseek64(fd, sector<<9, 0);
-               if (write(fd, &ddf->primary, 512) < 0)
-                       continue;
-
-               ddf->controller.crc = calc_crc(&ddf->controller, 512);
-               if (write(fd, &ddf->controller, 512) < 0)
-                       continue;
-
-               ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
-
-               if (write(fd, ddf->phys, ddf->pdsize) < 0)
-                       continue;
-
-               ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
-               if (write(fd, ddf->virt, ddf->vdsize) < 0)
-                       continue;
-
-               /* Now write lots of config records. */
-               n_config = ddf->max_part;
-               conf_size = ddf->conf_rec_len * 512;
-               for (i = 0 ; i <= n_config ; i++) {
-                       struct vcl *c = d->vlist[i];
-                       if (i == n_config)
-                               c = (struct vcl*)d->spare;
-
-                       if (c) {
-                               c->conf.crc = calc_crc(&c->conf, conf_size);
-                               if (write(fd, &c->conf, conf_size) < 0)
-                                       break;
-                       } else {
-                               char *null_aligned = (char*)((((unsigned long)null_conf)+511)&~511UL);
-                               if (null_conf[0] != 0xff)
-                                       memset(null_conf, 0xff, sizeof(null_conf));
-                               unsigned int togo = conf_size;
-                               while (togo > sizeof(null_conf)-512) {
-                                       if (write(fd, null_aligned, sizeof(null_conf)-512) < 0)
-                                               break;
-                                       togo -= sizeof(null_conf)-512;
-                               }
-                               if (write(fd, null_aligned, togo) < 0)
-                                       break;
-                       }
-               }
-               if (i <= n_config)
-                       continue;
-               d->disk.crc = calc_crc(&d->disk, 512);
-               if (write(fd, &d->disk, 512) < 0)
-                       continue;
-
-               /* Maybe do the same for secondary */
-
-               lseek64(fd, (size-1)*512, SEEK_SET);
-               if (write(fd, &ddf->anchor, 512) < 0)
-                       continue;
-               successes++;
+               successes += _write_super_to_disk(ddf, d);
        }
 
-       if (do_close)
-               for (d = ddf->dlist; d; d=d->next) {
-                       close(d->fd);
-                       d->fd = -1;
-               }
-
        return attempts != successes;
 }
 
@@ -2410,16 +3202,18 @@ static int write_init_super_ddf(struct supertype *st)
        struct ddf_super *ddf = st->sb;
        struct vcl *currentconf = ddf->currentconf;
 
-       /* we are done with currentconf reset it to point st at the container */
+       /* We are done with currentconf - reset it so st refers to the container */
        ddf->currentconf = NULL;
 
        if (st->update_tail) {
                /* queue the virtual_disk and vd_config as metadata updates */
                struct virtual_disk *vd;
                struct vd_config *vc;
-               int len;
+               int len, tlen;
+               unsigned int i;
 
                if (!currentconf) {
+                       /* Must be adding a physical disk to the container */
                        int len = (sizeof(struct phys_disk) +
                                   sizeof(struct phys_disk_entry));
 
@@ -2436,31 +3230,39 @@ static int write_init_super_ddf(struct supertype *st)
 
                /* First the virtual disk.  We have a slightly fake header */
                len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
-               vd = malloc(len);
+               vd = xmalloc(len);
                *vd = *ddf->virt;
                vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
-               vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
+               vd->populated_vdes = cpu_to_be16(currentconf->vcnum);
                append_metadata_update(st, vd, len);
 
                /* Then the vd_config */
                len = ddf->conf_rec_len * 512;
-               vc = malloc(len);
+               tlen = len * currentconf->conf.sec_elmnt_count;
+               vc = xmalloc(tlen);
                memcpy(vc, &currentconf->conf, len);
-               append_metadata_update(st, vc, len);
+               for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
+                       memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
+                              len);
+               append_metadata_update(st, vc, tlen);
 
-               /* FIXME I need to close the fds! */
                return 0;
-       } else {        
+       } else {
                struct dl *d;
-               for (d = ddf->dlist; d; d=d->next)
-                       while (Kill(d->devname, NULL, 0, 1, 1) == 0);
-               return __write_init_super_ddf(st, 1);
+               if (!currentconf)
+                       for (d = ddf->dlist; d; d=d->next)
+                               while (Kill(d->devname, NULL, 0, -1, 1) == 0);
+               /* Note: we don't close the fd's now, but a subsequent
+                * ->free_super() will
+                */
+               return __write_init_super_ddf(st);
        }
 }
 
 #endif
 
-static __u64 avail_size_ddf(struct supertype *st, __u64 devsize)
+static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
+                           unsigned long long data_offset)
 {
        /* We must reserve the last 32Meg */
        if (devsize <= 32*1024*2)
@@ -2472,52 +3274,35 @@ static __u64 avail_size_ddf(struct supertype *st, __u64 devsize)
 
 static int reserve_space(struct supertype *st, int raiddisks,
                         unsigned long long size, int chunk,
+                        unsigned long long data_offset,
                         unsigned long long *freesize)
 {
        /* Find 'raiddisks' spare extents at least 'size' big (but
         * only caring about multiples of 'chunk') and remember
-        * them.
-        * If the cannot be found, fail.
+        * them.   If size==0, find the largest size possible.
+        * Report available size in *freesize
+        * If space cannot be found, fail.
         */
        struct dl *dl;
        struct ddf_super *ddf = st->sb;
        int cnt = 0;
 
        for (dl = ddf->dlist; dl ; dl=dl->next) {
-               dl->raiddisk = -1;      
+               dl->raiddisk = -1;
                dl->esize = 0;
        }
        /* Now find largest extent on each device */
        for (dl = ddf->dlist ; dl ; dl=dl->next) {
-               struct extent *e = get_extents(ddf, dl);
-               unsigned long long pos = 0;
-               int i = 0;
-               int found = 0;
-               unsigned long long minsize = size;
+               unsigned long long minsize = ULLONG_MAX;
 
-               if (size == 0)
-                       minsize = chunk;
-
-               if (!e)
-                       continue;
-               do {
-                       unsigned long long esize;
-                       esize = e[i].start - pos;
-                       if (esize >= minsize) {
-                               found = 1;
-                               minsize = esize;
-                       }
-                       pos = e[i].start + e[i].size;
-                       i++;
-               } while (e[i-1].size);
-               if (found) {
+               find_space(ddf, dl, data_offset, &minsize);
+               if (minsize >= size && minsize >= (unsigned)chunk) {
                        cnt++;
                        dl->esize = minsize;
                }
-               free(e);
        }
        if (cnt < raiddisks) {
-               fprintf(stderr, Name ": not enough devices with space to create array.\n");
+               pr_err("not enough devices with space to create array.\n");
                return 0; /* No enough free spaces large enough */
        }
        if (size == 0) {
@@ -2528,7 +3313,7 @@ static int reserve_space(struct supertype *st, int raiddisks,
                                continue;
                        /* This is bigger than 'size', see if there are enough */
                        cnt = 0;
-                       for (dl2 = dl; dl2 ; dl2=dl2->next)
+                       for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
                                if (dl2->esize >= dl->esize)
                                        cnt++;
                        if (cnt >= raiddisks)
@@ -2540,7 +3325,7 @@ static int reserve_space(struct supertype *st, int raiddisks,
                }
                *freesize = size;
                if (size < 32) {
-                       fprintf(stderr, Name ": not enough spare devices to create array.\n");
+                       pr_err("not enough spare devices to create array.\n");
                        return 0;
                }
        }
@@ -2550,31 +3335,17 @@ static int reserve_space(struct supertype *st, int raiddisks,
        for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
                if (dl->esize < size)
                        continue;
-               
+
                dl->raiddisk = cnt;
                cnt++;
        }
        return 1;
 }
 
-
-
-static int
-validate_geometry_ddf_container(struct supertype *st,
-                               int level, int layout, int raiddisks,
-                               int chunk, unsigned long long size,
-                               char *dev, unsigned long long *freesize,
-                               int verbose);
-
-static int validate_geometry_ddf_bvd(struct supertype *st,
-                                    int level, int layout, int raiddisks,
-                                    int chunk, unsigned long long size,
-                                    char *dev, unsigned long long *freesize,
-                                    int verbose);
-
 static int validate_geometry_ddf(struct supertype *st,
                                 int level, int layout, int raiddisks,
-                                int chunk, unsigned long long size,
+                                int *chunk, unsigned long long size,
+                                unsigned long long data_offset,
                                 char *dev, unsigned long long *freesize,
                                 int verbose)
 {
@@ -2589,24 +3360,31 @@ static int validate_geometry_ddf(struct supertype *st,
         * If given BVDs, we make an SVD, changing all the GUIDs in the process.
         */
 
+       if (*chunk == UnSet)
+               *chunk = DEFAULT_CHUNK;
+
+       if (level == LEVEL_NONE)
+               level = LEVEL_CONTAINER;
        if (level == LEVEL_CONTAINER) {
                /* Must be a fresh device to add to a container */
                return validate_geometry_ddf_container(st, level, layout,
-                                                      raiddisks, chunk,
-                                                      size, dev, freesize,
+                                                      raiddisks, *chunk,
+                                                      size, data_offset, dev,
+                                                      freesize,
                                                       verbose);
        }
 
        if (!dev) {
-               /* Initial sanity check.  Exclude illegal levels. */
-               int i;
-               for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
-                       if (ddf_level_num[i].num2 == level)
-                               break;
-               if (ddf_level_num[i].num1 == MAXINT) {
+               mdu_array_info_t array = {
+                       .level = level,
+                       .layout = layout,
+                       .raid_disks = raiddisks
+               };
+               struct vd_config conf;
+               if (layout_md2ddf(&array, &conf) == -1) {
                        if (verbose)
-                               fprintf(stderr, Name ": DDF does not support level %d arrays\n",
-                                       level);
+                               pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
+                                      level, layout, raiddisks);
                        return 0;
                }
                /* Should check layout? etc */
@@ -2619,7 +3397,8 @@ static int validate_geometry_ddf(struct supertype *st,
                         * chosen so that add_to_super/getinfo_super
                         * can return them.
                         */
-                       return reserve_space(st, raiddisks, size, chunk, freesize);
+                       return reserve_space(st, raiddisks, size, *chunk,
+                                            data_offset, freesize);
                }
                return 1;
        }
@@ -2630,7 +3409,8 @@ static int validate_geometry_ddf(struct supertype *st,
                 * Should make a distinction one day.
                 */
                return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
-                                                chunk, size, dev, freesize,
+                                                chunk, size, data_offset, dev,
+                                                freesize,
                                                 verbose);
        }
        /* This is the first device for the array.
@@ -2642,28 +3422,17 @@ static int validate_geometry_ddf(struct supertype *st,
         */
        fd = open(dev, O_RDONLY|O_EXCL, 0);
        if (fd >= 0) {
-               sra = sysfs_read(fd, 0, GET_VERSION);
                close(fd);
-               if (sra && sra->array.major_version == -1 &&
-                   strcmp(sra->text_version, "ddf") == 0) {
-
-                       /* load super */
-                       /* find space for 'n' devices. */
-                       /* remember the devices */
-                       /* Somehow return the fact that we have enough */
-               }
-
+               /* Just a bare device, no good to us */
                if (verbose)
-                       fprintf(stderr,
-                               Name ": ddf: Cannot create this array "
-                               "on device %s - a container is required.\n",
-                               dev);
+                       pr_err("ddf: Cannot create this array on device %s - a container is required.\n",
+                              dev);
                return 0;
        }
        if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
                if (verbose)
-                       fprintf(stderr, Name ": ddf: Cannot open %s: %s\n",
-                               dev, strerror(errno));
+                       pr_err("ddf: Cannot open %s: %s\n",
+                              dev, strerror(errno));
                return 0;
        }
        /* Well, it is in use by someone, maybe a 'ddf' container. */
@@ -2671,11 +3440,11 @@ static int validate_geometry_ddf(struct supertype *st,
        if (cfd < 0) {
                close(fd);
                if (verbose)
-                       fprintf(stderr, Name ": ddf: Cannot use %s: %s\n",
-                               dev, strerror(EBUSY));
+                       pr_err("ddf: Cannot use %s: %s\n",
+                              dev, strerror(EBUSY));
                return 0;
        }
-       sra = sysfs_read(cfd, 0, GET_VERSION);
+       sra = sysfs_read(cfd, NULL, GET_VERSION);
        close(fd);
        if (sra && sra->array.major_version == -1 &&
            strcmp(sra->text_version, "ddf") == 0) {
@@ -2683,12 +3452,13 @@ static int validate_geometry_ddf(struct supertype *st,
                 * and try to create a bvd
                 */
                struct ddf_super *ddf;
-               if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL, 1) == 0) {
+               if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
                        st->sb = ddf;
-                       st->container_dev = fd2devnum(cfd);
+                       strcpy(st->container_devnm, fd2devnm(cfd));
                        close(cfd);
                        return validate_geometry_ddf_bvd(st, level, layout,
                                                         raiddisks, chunk, size,
+                                                        data_offset,
                                                         dev, freesize,
                                                         verbose);
                }
@@ -2703,6 +3473,7 @@ static int
 validate_geometry_ddf_container(struct supertype *st,
                                int level, int layout, int raiddisks,
                                int chunk, unsigned long long size,
+                               unsigned long long data_offset,
                                char *dev, unsigned long long *freesize,
                                int verbose)
 {
@@ -2717,8 +3488,8 @@ validate_geometry_ddf_container(struct supertype *st,
        fd = open(dev, O_RDONLY|O_EXCL, 0);
        if (fd < 0) {
                if (verbose)
-                       fprintf(stderr, Name ": ddf: Cannot open %s: %s\n",
-                               dev, strerror(errno));
+                       pr_err("ddf: Cannot open %s: %s\n",
+                              dev, strerror(errno));
                return 0;
        }
        if (!get_dev_size(fd, dev, &ldsize)) {
@@ -2727,7 +3498,7 @@ validate_geometry_ddf_container(struct supertype *st,
        }
        close(fd);
 
-       *freesize = avail_size_ddf(st, ldsize >> 9);
+       *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
        if (*freesize == 0)
                return 0;
 
@@ -2736,21 +3507,19 @@ validate_geometry_ddf_container(struct supertype *st,
 
 static int validate_geometry_ddf_bvd(struct supertype *st,
                                     int level, int layout, int raiddisks,
-                                    int chunk, unsigned long long size,
+                                    int *chunk, unsigned long long size,
+                                    unsigned long long data_offset,
                                     char *dev, unsigned long long *freesize,
                                     int verbose)
 {
        struct stat stb;
        struct ddf_super *ddf = st->sb;
        struct dl *dl;
-       unsigned long long pos = 0;
        unsigned long long maxsize;
-       struct extent *e;
-       int i;
        /* ddf/bvd supports lots of things, but not containers */
        if (level == LEVEL_CONTAINER) {
                if (verbose)
-                       fprintf(stderr, Name ": DDF cannot create a container within an container\n");
+                       pr_err("DDF cannot create a container within an container\n");
                return 0;
        }
        /* We must have the container info already read in. */
@@ -2765,32 +3534,15 @@ static int validate_geometry_ddf_bvd(struct supertype *st,
                int dcnt = 0;
                if (minsize == 0)
                        minsize = 8;
-               for (dl = ddf->dlist; dl ; dl = dl->next)
-               {
-                       int found = 0;
-                       pos = 0;
-
-                       i = 0;
-                       e = get_extents(ddf, dl);
-                       if (!e) continue;
-                       do {
-                               unsigned long long esize;
-                               esize = e[i].start - pos;
-                               if (esize >= minsize)
-                                       found = 1;
-                               pos = e[i].start + e[i].size;
-                               i++;
-                       } while (e[i-1].size);
-                       if (found)
+               for (dl = ddf->dlist; dl ; dl = dl->next) {
+                       if (find_space(ddf, dl, data_offset, &minsize)
+                           != INVALID_SECTORS)
                                dcnt++;
-                       free(e);
                }
                if (dcnt < raiddisks) {
                        if (verbose)
-                               fprintf(stderr,
-                                       Name ": ddf: Not enough devices with "
-                                       "space for this array (%d < %d)\n",
-                                       dcnt, raiddisks);
+                               pr_err("ddf: Not enough devices with space for this array (%d < %d)\n",
+                                      dcnt, raiddisks);
                        return 0;
                }
                return 1;
@@ -2807,30 +3559,19 @@ static int validate_geometry_ddf_bvd(struct supertype *st,
        }
        if (!dl) {
                if (verbose)
-                       fprintf(stderr, Name ": ddf: %s is not in the "
-                               "same DDF set\n",
-                               dev);
+                       pr_err("ddf: %s is not in the same DDF set\n",
+                              dev);
                return 0;
        }
-       e = get_extents(ddf, dl);
-       maxsize = 0;
-       i = 0;
-       if (e) do {
-               unsigned long long esize;
-               esize = e[i].start - pos;
-               if (esize >= maxsize)
-                       maxsize = esize;
-               pos = e[i].start + e[i].size;
-               i++;
-       } while (e[i-1].size);
+       maxsize = ULLONG_MAX;
+       find_space(ddf, dl, data_offset, &maxsize);
        *freesize = maxsize;
-       // FIXME here I am
 
        return 1;
 }
 
 static int load_super_ddf_all(struct supertype *st, int fd,
-                             void **sbp, char *devname, int keep_fd)
+                             void **sbp, char *devname)
 {
        struct mdinfo *sra;
        struct ddf_super *super;
@@ -2862,7 +3603,7 @@ static int load_super_ddf_all(struct supertype *st, int fd,
                rv = load_ddf_headers(dfd, super, NULL);
                close(dfd);
                if (rv == 0) {
-                       seq = __be32_to_cpu(super->active->seq);
+                       seq = be32_to_cpu(super->active->seq);
                        if (super->active->openflag)
                                seq--;
                        if (!best || seq > bestseq) {
@@ -2886,49 +3627,144 @@ static int load_super_ddf_all(struct supertype *st, int fd,
                int rv;
 
                sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
-               dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
+               dfd = dev_open(nm, O_RDWR);
                if (dfd < 0)
                        return 2;
                rv = load_ddf_headers(dfd, super, NULL);
                if (rv == 0)
-                       rv = load_ddf_local(dfd, super, NULL, keep_fd);
-               if (!keep_fd) close(dfd);
+                       rv = load_ddf_local(dfd, super, NULL, 1);
                if (rv)
                        return 1;
        }
-       if (st->subarray[0]) {
-               unsigned long val;
-               struct vcl *v;
-               char *ep;
-
-               val = strtoul(st->subarray, &ep, 10);
-               if (*ep != '\0') {
-                       free(super);
-                       return 1;
-               }
-
-               for (v = super->conflist; v; v = v->next)
-                       if (v->vcnum == val)
-                               super->currentconf = v;
-               if (!super->currentconf) {
-                       free(super);
-                       return 1;
-               }
-       }
 
        *sbp = super;
        if (st->ss == NULL) {
                st->ss = &super_ddf;
                st->minor_version = 0;
                st->max_devs = 512;
-               st->container_dev = fd2devnum(fd);
        }
-       st->loaded_container = 1;
+       strcpy(st->container_devnm, fd2devnm(fd));
        return 0;
 }
+
+static int load_container_ddf(struct supertype *st, int fd,
+                             char *devname)
+{
+       return load_super_ddf_all(st, fd, &st->sb, devname);
+}
+
 #endif /* MDASSEMBLE */
 
-static struct mdinfo *container_content_ddf(struct supertype *st)
+static int check_secondary(const struct vcl *vc)
+{
+       const struct vd_config *conf = &vc->conf;
+       int i;
+
+       /* The only DDF secondary RAID level md can support is
+        * RAID 10, if the stripe sizes and Basic volume sizes
+        * are all equal.
+        * Other configurations could in theory be supported by exposing
+        * the BVDs to user space and using device mapper for the secondary
+        * mapping. So far we don't support that.
+        */
+
+       __u64 sec_elements[4] = {0, 0, 0, 0};
+#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
+#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
+
+       if (vc->other_bvds == NULL) {
+               pr_err("No BVDs for secondary RAID found\n");
+               return -1;
+       }
+       if (conf->prl != DDF_RAID1) {
+               pr_err("Secondary RAID level only supported for mirrored BVD\n");
+               return -1;
+       }
+       if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
+               pr_err("Secondary RAID level %d is unsupported\n",
+                      conf->srl);
+               return -1;
+       }
+       __set_sec_seen(conf->sec_elmnt_seq);
+       for (i = 0; i < conf->sec_elmnt_count-1; i++) {
+               const struct vd_config *bvd = vc->other_bvds[i];
+               if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
+                       continue;
+               if (bvd->srl != conf->srl) {
+                       pr_err("Inconsistent secondary RAID level across BVDs\n");
+                       return -1;
+               }
+               if (bvd->prl != conf->prl) {
+                       pr_err("Different RAID levels for BVDs are unsupported\n");
+                       return -1;
+               }
+               if (!be16_eq(bvd->prim_elmnt_count, conf->prim_elmnt_count)) {
+                       pr_err("All BVDs must have the same number of primary elements\n");
+                       return -1;
+               }
+               if (bvd->chunk_shift != conf->chunk_shift) {
+                       pr_err("Different strip sizes for BVDs are unsupported\n");
+                       return -1;
+               }
+               if (!be64_eq(bvd->array_blocks, conf->array_blocks)) {
+                       pr_err("Different BVD sizes are unsupported\n");
+                       return -1;
+               }
+               __set_sec_seen(bvd->sec_elmnt_seq);
+       }
+       for (i = 0; i < conf->sec_elmnt_count; i++) {
+               if (!__was_sec_seen(i)) {
+                       /* pr_err("BVD %d is missing\n", i); */
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
+                                            be32 refnum, unsigned int nmax,
+                                            const struct vd_config **bvd,
+                                            unsigned int *idx)
+{
+       unsigned int i, j, n, sec, cnt;
+
+       cnt = be16_to_cpu(vc->conf.prim_elmnt_count);
+       sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
+
+       for (i = 0, j = 0 ; i < nmax ; i++) {
+               /* j counts valid entries for this BVD */
+               if (be32_eq(vc->conf.phys_refnum[i], refnum)) {
+                       *bvd = &vc->conf;
+                       *idx = i;
+                       return sec * cnt + j;
+               }
+               if (be32_to_cpu(vc->conf.phys_refnum[i]) != 0xffffffff)
+                       j++;
+       }
+       if (vc->other_bvds == NULL)
+               goto bad;
+
+       for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
+               struct vd_config *vd = vc->other_bvds[n-1];
+               sec = vd->sec_elmnt_seq;
+               if (sec == DDF_UNUSED_BVD)
+                       continue;
+               for (i = 0, j = 0 ; i < nmax ; i++) {
+                       if (be32_eq(vd->phys_refnum[i], refnum)) {
+                               *bvd = vd;
+                               *idx = i;
+                               return sec * cnt + j;
+                       }
+                       if (be32_to_cpu(vd->phys_refnum[i]) != 0xffffffff)
+                               j++;
+               }
+       }
+bad:
+       *bvd = NULL;
+       return DDF_NOTFOUND;
+}
+
+static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
 {
        /* Given a container loaded by load_super_ddf_all,
         * extract information about all the arrays into
@@ -2942,28 +3778,37 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
        struct mdinfo *rest = NULL;
        struct vcl *vc;
 
-       for (vc = ddf->conflist ; vc ; vc=vc->next)
-       {
+       for (vc = ddf->conflist ; vc ; vc=vc->next) {
                unsigned int i;
-               unsigned int j;
                struct mdinfo *this;
-               this = malloc(sizeof(*this));
-               memset(this, 0, sizeof(*this));
+               char *ep;
+               __u32 *cptr;
+               unsigned int pd;
+
+               if (subarray &&
+                   (strtoul(subarray, &ep, 10) != vc->vcnum ||
+                    *ep != '\0'))
+                       continue;
+
+               if (vc->conf.sec_elmnt_count > 1) {
+                       if (check_secondary(vc) != 0)
+                               continue;
+               }
+
+               this = xcalloc(1, sizeof(*this));
                this->next = rest;
                rest = this;
 
-               this->array.level = map_num1(ddf_level_num, vc->conf.prl);
-               this->array.raid_disks =
-                       __be16_to_cpu(vc->conf.prim_elmnt_count);
-               this->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
-                                                  this->array.raid_disks);
+               if (layout_ddf2md(&vc->conf, &this->array))
+                       continue;
                this->array.md_minor      = -1;
                this->array.major_version = -1;
                this->array.minor_version = -2;
-               this->array.ctime         = DECADE +
-                       __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+               this->safe_mode_delay     = DDF_SAFE_MODE_DELAY;
+               cptr = (__u32 *)(vc->conf.guid + 16);
+               this->array.ctime         = DECADE + __be32_to_cpu(*cptr);
                this->array.utime         = DECADE +
-                       __be32_to_cpu(vc->conf.timestamp);
+                       be32_to_cpu(vc->conf.timestamp);
                this->array.chunk_size    = 512 << vc->conf.chunk_shift;
 
                i = vc->vcnum;
@@ -2976,61 +3821,67 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
                        this->array.state = 1;
                        this->resync_start = MaxSector;
                }
-               memcpy(this->name, ddf->virt->entries[i].name, 16);
-               this->name[16]=0;
-               for(j=0; j<16; j++)
-                       if (this->name[j] == ' ')
-                               this->name[j] = 0;
-
+               _ddf_array_name(this->name, ddf, i);
                memset(this->uuid, 0, sizeof(this->uuid));
-               this->component_size = __be64_to_cpu(vc->conf.blocks);
-               this->array.size = this->component_size / 2;
-               this->container_member = i;
+               this->component_size      = be64_to_cpu(vc->conf.blocks);
+               this->array.size          = this->component_size / 2;
+               this->container_member    = i;
 
                ddf->currentconf = vc;
                uuid_from_super_ddf(st, this->uuid);
-               ddf->currentconf = NULL;
+               if (!subarray)
+                       ddf->currentconf = NULL;
 
                sprintf(this->text_version, "/%s/%d",
-                       devnum2devname(st->container_dev),
-                       this->container_member);
+                       st->container_devnm, this->container_member);
 
-               for (i = 0 ; i < ddf->mppe ; i++) {
+               for (pd = 0; pd < be16_to_cpu(ddf->phys->max_pdes); pd++) {
                        struct mdinfo *dev;
                        struct dl *d;
+                       const struct vd_config *bvd;
+                       unsigned int iphys;
                        int stt;
 
-                       if (vc->conf.phys_refnum[i] == 0xFFFFFFFF)
+                       if (be32_to_cpu(ddf->phys->entries[pd].refnum)
+                           == 0xFFFFFFFF)
                                continue;
 
-                       for (d = ddf->dlist; d ; d=d->next)
-                               if (d->disk.refnum == vc->conf.phys_refnum[i])
-                                       break;
-                       if (d == NULL)
-                               /* Haven't found that one yet, maybe there are others */
-                               continue;
-                       stt = __be16_to_cpu(ddf->phys->entries[d->pdnum].state);
+                       stt = be16_to_cpu(ddf->phys->entries[pd].state);
                        if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
                            != DDF_Online)
                                continue;
 
+                       i = get_pd_index_from_refnum(
+                               vc, ddf->phys->entries[pd].refnum,
+                               ddf->mppe, &bvd, &iphys);
+                       if (i == DDF_NOTFOUND)
+                               continue;
+
                        this->array.working_disks++;
 
-                       dev = malloc(sizeof(*dev));
-                       memset(dev, 0, sizeof(*dev));
-                       dev->next = this->devs;
-                       this->devs = dev;
+                       for (d = ddf->dlist; d ; d=d->next)
+                               if (be32_eq(d->disk.refnum,
+                                           ddf->phys->entries[pd].refnum))
+                                       break;
+                       if (d == NULL)
+                               /* Haven't found that one yet, maybe there are others */
+                               continue;
+
+                       dev = xcalloc(1, sizeof(*dev));
+                       dev->next        = this->devs;
+                       this->devs       = dev;
 
-                       dev->disk.number = __be32_to_cpu(d->disk.refnum);
-                       dev->disk.major = d->major;
-                       dev->disk.minor = d->minor;
+                       dev->disk.number = be32_to_cpu(d->disk.refnum);
+                       dev->disk.major  = d->major;
+                       dev->disk.minor  = d->minor;
                        dev->disk.raid_disk = i;
-                       dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+                       dev->disk.state  = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
                        dev->recovery_start = MaxSector;
 
-                       dev->events = __be32_to_cpu(ddf->primary.seq);
-                       dev->data_offset = __be64_to_cpu(vc->lba_offset[i]);
-                       dev->component_size = __be64_to_cpu(vc->conf.blocks);
+                       dev->events      = be32_to_cpu(ddf->active->seq);
+                       dev->data_offset =
+                               be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
+                       dev->component_size = be64_to_cpu(bvd->blocks);
                        if (d->devname)
                                strcpy(dev->name, d->devname);
                }
@@ -3048,15 +3899,35 @@ static int store_super_ddf(struct supertype *st, int fd)
        if (!ddf)
                return 1;
 
-       /* ->dlist and ->conflist will be set for updates, currently not
-        * supported
-        */
-       if (ddf->dlist || ddf->conflist)
-               return 1;
-
        if (!get_dev_size(fd, NULL, &dsize))
                return 1;
 
+       if (ddf->dlist || ddf->conflist) {
+               struct stat sta;
+               struct dl *dl;
+               int ofd, ret;
+
+               if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
+                       pr_err("file descriptor for invalid device\n");
+                       return 1;
+               }
+               for (dl = ddf->dlist; dl; dl = dl->next)
+                       if (dl->major == (int)major(sta.st_rdev) &&
+                           dl->minor == (int)minor(sta.st_rdev))
+                               break;
+               if (!dl) {
+                       pr_err("couldn't find disk %d/%d\n",
+                              (int)major(sta.st_rdev),
+                              (int)minor(sta.st_rdev));
+                       return 1;
+               }
+               ofd = dl->fd;
+               dl->fd = fd;
+               ret = (_write_super_to_disk(ddf, dl) != 1);
+               dl->fd = ofd;
+               return ret;
+       }
+
        if (posix_memalign(&buf, 512, 512) != 0)
                return 1;
        memset(buf, 0, 512);
@@ -3074,12 +3945,15 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
        /*
         * return:
         *  0 same, or first was empty, and second was copied
-        *  1 second had wrong number
+        *  1 second had wrong magic number - but that isn't possible
         *  2 wrong uuid
         *  3 wrong other info
         */
        struct ddf_super *first = st->sb;
        struct ddf_super *second = tst->sb;
+       struct dl *dl1, *dl2;
+       struct vcl *vl1, *vl2;
+       unsigned int max_vds, max_pds, pd, vd;
 
        if (!first) {
                st->sb = tst->sb;
@@ -3090,7 +3964,114 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
        if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
                return 2;
 
-       /* FIXME should I look at anything else? */
+       /* It is only OK to compare info in the anchor.  Anything else
+        * could be changing due to a reconfig so must be ignored.
+        * guid really should be enough anyway.
+        */
+
+       if (!be32_eq(first->active->seq, second->active->seq)) {
+               dprintf("sequence number mismatch %u<->%u\n",
+                       be32_to_cpu(first->active->seq),
+                       be32_to_cpu(second->active->seq));
+               return 0;
+       }
+
+       /*
+        * At this point we are fairly sure that the meta data matches.
+        * But the new disk may contain additional local data.
+        * Add it to the super block.
+        */
+       max_vds = be16_to_cpu(first->active->max_vd_entries);
+       max_pds = be16_to_cpu(first->phys->max_pdes);
+       for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
+               for (vl1 = first->conflist; vl1; vl1 = vl1->next)
+                       if (!memcmp(vl1->conf.guid, vl2->conf.guid,
+                                   DDF_GUID_LEN))
+                               break;
+               if (vl1) {
+                       if (vl1->other_bvds != NULL &&
+                           vl1->conf.sec_elmnt_seq !=
+                           vl2->conf.sec_elmnt_seq) {
+                               dprintf("adding BVD %u\n",
+                                       vl2->conf.sec_elmnt_seq);
+                               add_other_bvd(vl1, &vl2->conf,
+                                             first->conf_rec_len*512);
+                       }
+                       continue;
+               }
+
+               if (posix_memalign((void **)&vl1, 512,
+                                  (first->conf_rec_len*512 +
+                                   offsetof(struct vcl, conf))) != 0) {
+                       pr_err("could not allocate vcl buf\n");
+                       return 3;
+               }
+
+               vl1->next = first->conflist;
+               vl1->block_sizes = NULL;
+               memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
+               if (alloc_other_bvds(first, vl1) != 0) {
+                       pr_err("could not allocate other bvds\n");
+                       free(vl1);
+                       return 3;
+               }
+               for (vd = 0; vd < max_vds; vd++)
+                       if (!memcmp(first->virt->entries[vd].guid,
+                                   vl1->conf.guid, DDF_GUID_LEN))
+                               break;
+               vl1->vcnum = vd;
+               dprintf("added config for VD %u\n", vl1->vcnum);
+               first->conflist = vl1;
+       }
+
+       for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
+               for (dl1 = first->dlist; dl1; dl1 = dl1->next)
+                       if (be32_eq(dl1->disk.refnum, dl2->disk.refnum))
+                               break;
+               if (dl1)
+                       continue;
+
+               if (posix_memalign((void **)&dl1, 512,
+                      sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
+                   != 0) {
+                       pr_err("could not allocate disk info buffer\n");
+                       return 3;
+               }
+               memcpy(dl1, dl2, sizeof(*dl1));
+               dl1->mdupdate = NULL;
+               dl1->next = first->dlist;
+               dl1->fd = -1;
+               for (pd = 0; pd < max_pds; pd++)
+                       if (be32_eq(first->phys->entries[pd].refnum,
+                                   dl1->disk.refnum))
+                               break;
+               dl1->pdnum = pd < max_pds ? (int)pd : -1;
+               if (dl2->spare) {
+                       if (posix_memalign((void **)&dl1->spare, 512,
+                                      first->conf_rec_len*512) != 0) {
+                               pr_err("could not allocate spare info buf\n");
+                               return 3;
+                       }
+                       memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
+               }
+               for (vd = 0 ; vd < first->max_part ; vd++) {
+                       if (!dl2->vlist[vd]) {
+                               dl1->vlist[vd] = NULL;
+                               continue;
+                       }
+                       for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
+                               if (!memcmp(vl1->conf.guid,
+                                           dl2->vlist[vd]->conf.guid,
+                                           DDF_GUID_LEN))
+                                       break;
+                               dl1->vlist[vd] = vl1;
+                       }
+               }
+               first->dlist = dl1;
+               dprintf("added disk %d: %08x\n", dl1->pdnum,
+                       be32_to_cpu(dl1->disk.refnum));
+       }
+
        return 0;
 }
 
@@ -3103,10 +4084,92 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
  */
 static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
 {
-       dprintf("ddf: open_new %s\n", inst);
-       a->info.container_member = atoi(inst);
-       return 0;
-}
+       struct ddf_super *ddf = c->sb;
+       int n = atoi(inst);
+       struct mdinfo *dev;
+       struct dl *dl;
+       static const char faulty[] = "faulty";
+
+       if (all_ff(ddf->virt->entries[n].guid)) {
+               pr_err("subarray %d doesn't exist\n", n);
+               return -ENODEV;
+       }
+       dprintf("new subarray %d, GUID: %s\n", n,
+               guid_str(ddf->virt->entries[n].guid));
+       for (dev = a->info.devs; dev; dev = dev->next) {
+               for (dl = ddf->dlist; dl; dl = dl->next)
+                       if (dl->major == dev->disk.major &&
+                           dl->minor == dev->disk.minor)
+                               break;
+               if (!dl || dl->pdnum < 0) {
+                       pr_err("device %d/%d of subarray %d not found in meta data\n",
+                               dev->disk.major, dev->disk.minor, n);
+                       return -1;
+               }
+               if ((be16_to_cpu(ddf->phys->entries[dl->pdnum].state) &
+                       (DDF_Online|DDF_Missing|DDF_Failed)) != DDF_Online) {
+                       pr_err("new subarray %d contains broken device %d/%d (%02x)\n",
+                              n, dl->major, dl->minor,
+                              be16_to_cpu(ddf->phys->entries[dl->pdnum].state));
+                       if (write(dev->state_fd, faulty, sizeof(faulty)-1) !=
+                           sizeof(faulty) - 1)
+                               pr_err("Write to state_fd failed\n");
+                       dev->curr_state = DS_FAULTY;
+               }
+       }
+       a->info.container_member = n;
+       return 0;
+}
+
+static void handle_missing(struct ddf_super *ddf, struct active_array *a, int inst)
+{
+       /* This member array is being activated.  If any devices
+        * are missing they must now be marked as failed.
+        */
+       struct vd_config *vc;
+       unsigned int n_bvd;
+       struct vcl *vcl;
+       struct dl *dl;
+       int pd;
+       int n;
+       int state;
+
+       for (n = 0; ; n++) {
+               vc = find_vdcr(ddf, inst, n, &n_bvd, &vcl);
+               if (!vc)
+                       break;
+               for (dl = ddf->dlist; dl; dl = dl->next)
+                       if (be32_eq(dl->disk.refnum, vc->phys_refnum[n_bvd]))
+                               break;
+               if (dl)
+                       /* Found this disk, so not missing */
+                       continue;
+
+               /* Mark the device as failed/missing. */
+               pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
+               if (pd >= 0 && be16_and(ddf->phys->entries[pd].state,
+                                       cpu_to_be16(DDF_Online))) {
+                       be16_clear(ddf->phys->entries[pd].state,
+                                  cpu_to_be16(DDF_Online));
+                       be16_set(ddf->phys->entries[pd].state,
+                                cpu_to_be16(DDF_Failed|DDF_Missing));
+                       vc->phys_refnum[n_bvd] = cpu_to_be32(0);
+                       ddf_set_updates_pending(ddf, vc);
+               }
+
+               /* Mark the array as Degraded */
+               state = get_svd_state(ddf, vcl);
+               if (ddf->virt->entries[inst].state !=
+                   ((ddf->virt->entries[inst].state & ~DDF_state_mask)
+                    | state)) {
+                       ddf->virt->entries[inst].state =
+                               (ddf->virt->entries[inst].state & ~DDF_state_mask)
+                               | state;
+                       a->check_degraded = 1;
+                       ddf_set_updates_pending(ddf, vc);
+               }
+       }
+}
 
 /*
  * The array 'a' is to be marked clean in the metadata.
@@ -3123,7 +4186,7 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
        int inst = a->info.container_member;
        int old = ddf->virt->entries[inst].state;
        if (consistent == 2) {
-               /* Should check if a recovery should be started FIXME */
+               handle_missing(ddf, a, inst);
                consistent = 1;
                if (!is_resync_complete(&a->info))
                        consistent = 0;
@@ -3133,7 +4196,7 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
        else
                ddf->virt->entries[inst].state |= DDF_state_inconsistent;
        if (old != ddf->virt->entries[inst].state)
-               ddf->updates_pending = 1;
+               ddf_set_updates_pending(ddf, NULL);
 
        old = ddf->virt->entries[inst].init_state;
        ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
@@ -3144,16 +4207,107 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
        else
                ddf->virt->entries[inst].init_state |= DDF_init_quick;
        if (old != ddf->virt->entries[inst].init_state)
-               ddf->updates_pending = 1;
+               ddf_set_updates_pending(ddf, NULL);
 
-       dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
+       dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
+               guid_str(ddf->virt->entries[inst].guid), a->curr_state,
+               consistent?"clean":"dirty",
                a->info.resync_start);
        return consistent;
 }
 
-#define container_of(ptr, type, member) ({                      \
-        const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
-        (type *)( (char *)__mptr - offsetof(type,member) );})
+static int get_bvd_state(const struct ddf_super *ddf,
+                        const struct vd_config *vc)
+{
+       unsigned int i, n_bvd, working = 0;
+       unsigned int n_prim = be16_to_cpu(vc->prim_elmnt_count);
+       int pd, st, state;
+       char *avail = xcalloc(1, n_prim);
+       mdu_array_info_t array;
+
+       layout_ddf2md(vc, &array);
+
+       for (i = 0; i < n_prim; i++) {
+               if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
+                       continue;
+               pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
+               if (pd < 0)
+                       continue;
+               st = be16_to_cpu(ddf->phys->entries[pd].state);
+               if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+                   == DDF_Online) {
+                       working++;
+                       avail[i] = 1;
+               }
+       }
+
+       state = DDF_state_degraded;
+       if (working == n_prim)
+               state = DDF_state_optimal;
+       else
+               switch (vc->prl) {
+               case DDF_RAID0:
+               case DDF_CONCAT:
+               case DDF_JBOD:
+                       state = DDF_state_failed;
+                       break;
+               case DDF_RAID1:
+                       if (working == 0)
+                               state = DDF_state_failed;
+                       else if (working >= 2)
+                               state = DDF_state_part_optimal;
+                       break;
+               case DDF_RAID1E:
+                       if (!enough(10, n_prim, array.layout, 1, avail))
+                               state = DDF_state_failed;
+                       break;
+               case DDF_RAID4:
+               case DDF_RAID5:
+                       if (working < n_prim - 1)
+                               state = DDF_state_failed;
+                       break;
+               case DDF_RAID6:
+                       if (working < n_prim - 2)
+                               state = DDF_state_failed;
+                       else if (working == n_prim - 1)
+                               state = DDF_state_part_optimal;
+                       break;
+               }
+       return state;
+}
+
+static int secondary_state(int state, int other, int seclevel)
+{
+       if (state == DDF_state_optimal && other == DDF_state_optimal)
+               return DDF_state_optimal;
+       if (seclevel == DDF_2MIRRORED) {
+               if (state == DDF_state_optimal || other == DDF_state_optimal)
+                       return DDF_state_part_optimal;
+               if (state == DDF_state_failed && other == DDF_state_failed)
+                       return DDF_state_failed;
+               return DDF_state_degraded;
+       } else {
+               if (state == DDF_state_failed || other == DDF_state_failed)
+                       return DDF_state_failed;
+               if (state == DDF_state_degraded || other == DDF_state_degraded)
+                       return DDF_state_degraded;
+               return DDF_state_part_optimal;
+       }
+}
+
+static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
+{
+       int state = get_bvd_state(ddf, &vcl->conf);
+       unsigned int i;
+       for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
+               state = secondary_state(
+                       state,
+                       get_bvd_state(ddf, vcl->other_bvds[i-1]),
+                       vcl->conf.srl);
+       }
+       return state;
+}
+
 /*
  * The state of each disk is stored in the global phys_disk structure
  * in phys_disk.entries[n].state.
@@ -3171,13 +4325,16 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
 static void ddf_set_disk(struct active_array *a, int n, int state)
 {
        struct ddf_super *ddf = a->container->sb;
-       unsigned int inst = a->info.container_member;
-       struct vd_config *vc = find_vdcr(ddf, inst);
-       int pd = find_phys(ddf, vc->phys_refnum[n]);
-       int i, st, working;
+       unsigned int inst = a->info.container_member, n_bvd;
+       struct vcl *vcl;
+       struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
+                                        &n_bvd, &vcl);
+       int pd;
        struct mdinfo *mdi;
        struct dl *dl;
+       int update = 0;
 
+       dprintf("%d to %x\n", n, state);
        if (vc == NULL) {
                dprintf("ddf: cannot find instance %d!!\n", inst);
                return;
@@ -3186,8 +4343,10 @@ static void ddf_set_disk(struct active_array *a, int n, int state)
        for (mdi = a->info.devs; mdi; mdi = mdi->next)
                if (mdi->disk.raid_disk == n)
                        break;
-       if (!mdi)
+       if (!mdi) {
+               pr_err("cannot find raid disk %d\n", n);
                return;
+       }
 
        /* and find the 'dl' entry corresponding to that. */
        for (dl = ddf->dlist; dl; dl = dl->next)
@@ -3195,94 +4354,74 @@ static void ddf_set_disk(struct active_array *a, int n, int state)
                    mdi->disk.major == dl->major &&
                    mdi->disk.minor == dl->minor)
                        break;
-       if (!dl)
+       if (!dl) {
+               pr_err("cannot find raid disk %d (%d/%d)\n",
+                      n, mdi->disk.major, mdi->disk.minor);
                return;
+       }
 
+       pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
        if (pd < 0 || pd != dl->pdnum) {
                /* disk doesn't currently exist or has changed.
                 * If it is now in_sync, insert it. */
-               if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
-                       struct vcl *vcl;
+               dprintf("phys disk not found for %d: %d/%d ref %08x\n",
+                       dl->pdnum, dl->major, dl->minor,
+                       be32_to_cpu(dl->disk.refnum));
+               dprintf("array %u disk %u ref %08x pd %d\n",
+                       inst, n_bvd,
+                       be32_to_cpu(vc->phys_refnum[n_bvd]), pd);
+               if ((state & DS_INSYNC) && ! (state & DS_FAULTY) &&
+                   dl->pdnum >= 0) {
                        pd = dl->pdnum;
-                       vc->phys_refnum[n] = dl->disk.refnum;
-                       vcl = container_of(vc, struct vcl, conf);
-                       vcl->lba_offset[n] = mdi->data_offset;
-                       ddf->phys->entries[pd].type &=
-                               ~__cpu_to_be16(DDF_Global_Spare);
-                       ddf->phys->entries[pd].type |=
-                               __cpu_to_be16(DDF_Active_in_VD);
-                       ddf->updates_pending = 1;
+                       vc->phys_refnum[n_bvd] = dl->disk.refnum;
+                       LBA_OFFSET(ddf, vc)[n_bvd] =
+                               cpu_to_be64(mdi->data_offset);
+                       be16_clear(ddf->phys->entries[pd].type,
+                                  cpu_to_be16(DDF_Global_Spare));
+                       be16_set(ddf->phys->entries[pd].type,
+                                cpu_to_be16(DDF_Active_in_VD));
+                       update = 1;
                }
        } else {
-               int old = ddf->phys->entries[pd].state;
+               be16 old = ddf->phys->entries[pd].state;
                if (state & DS_FAULTY)
-                       ddf->phys->entries[pd].state  |= __cpu_to_be16(DDF_Failed);
+                       be16_set(ddf->phys->entries[pd].state,
+                                cpu_to_be16(DDF_Failed));
                if (state & DS_INSYNC) {
-                       ddf->phys->entries[pd].state  |= __cpu_to_be16(DDF_Online);
-                       ddf->phys->entries[pd].state  &= __cpu_to_be16(~DDF_Rebuilding);
+                       be16_set(ddf->phys->entries[pd].state,
+                                cpu_to_be16(DDF_Online));
+                       be16_clear(ddf->phys->entries[pd].state,
+                                  cpu_to_be16(DDF_Rebuilding));
                }
-               if (old != ddf->phys->entries[pd].state)
-                       ddf->updates_pending = 1;
+               if (!be16_eq(old, ddf->phys->entries[pd].state))
+                       update = 1;
        }
 
-       dprintf("ddf: set_disk %d to %x\n", n, state);
+       dprintf("ddf: set_disk %d (%08x) to %x->%02x\n", n,
+               be32_to_cpu(dl->disk.refnum), state,
+               be16_to_cpu(ddf->phys->entries[pd].state));
 
        /* Now we need to check the state of the array and update
         * virtual_disk.entries[n].state.
         * It needs to be one of "optimal", "degraded", "failed".
         * I don't understand 'deleted' or 'missing'.
         */
-       working = 0;
-       for (i=0; i < a->info.array.raid_disks; i++) {
-               pd = find_phys(ddf, vc->phys_refnum[i]);
-               if (pd < 0)
-                       continue;
-               st = __be16_to_cpu(ddf->phys->entries[pd].state);
-               if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
-                   == DDF_Online)
-                       working++;
-       }
-       state = DDF_state_degraded;
-       if (working == a->info.array.raid_disks)
-               state = DDF_state_optimal;
-       else switch(vc->prl) {
-       case DDF_RAID0:
-       case DDF_CONCAT:
-       case DDF_JBOD:
-               state = DDF_state_failed;
-               break;
-       case DDF_RAID1:
-               if (working == 0)
-                       state = DDF_state_failed;
-               break;
-       case DDF_RAID4:
-       case DDF_RAID5:
-               if (working < a->info.array.raid_disks-1)
-                       state = DDF_state_failed;
-               break;
-       case DDF_RAID6:
-               if (working < a->info.array.raid_disks-2)
-                       state = DDF_state_failed;
-               else if (working == a->info.array.raid_disks-1)
-                       state = DDF_state_part_optimal;
-               break;
-       }
+       state = get_svd_state(ddf, vcl);
 
        if (ddf->virt->entries[inst].state !=
            ((ddf->virt->entries[inst].state & ~DDF_state_mask)
             | state)) {
-
                ddf->virt->entries[inst].state =
                        (ddf->virt->entries[inst].state & ~DDF_state_mask)
                        | state;
-               ddf->updates_pending = 1;
+               update = 1;
        }
-
+       if (update)
+               ddf_set_updates_pending(ddf, vc);
 }
 
 static void ddf_sync_metadata(struct supertype *st)
 {
-
        /*
         * Write all data to all devices.
         * Later, we might be able to track whether only local changes
@@ -3294,10 +4433,404 @@ static void ddf_sync_metadata(struct supertype *st)
        if (!ddf->updates_pending)
                return;
        ddf->updates_pending = 0;
-       __write_init_super_ddf(st, 0);
+       __write_init_super_ddf(st);
        dprintf("ddf: sync_metadata\n");
 }
 
+static int del_from_conflist(struct vcl **list, const char *guid)
+{
+       struct vcl **p;
+       int found = 0;
+       for (p = list; p && *p; p = &((*p)->next))
+               if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
+                       found = 1;
+                       *p = (*p)->next;
+               }
+       return found;
+}
+
+static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
+{
+       struct dl *dl;
+       unsigned int vdnum, i;
+       vdnum = find_vde_by_guid(ddf, guid);
+       if (vdnum == DDF_NOTFOUND) {
+               pr_err("could not find VD %s\n", guid_str(guid));
+               return -1;
+       }
+       if (del_from_conflist(&ddf->conflist, guid) == 0) {
+               pr_err("could not find conf %s\n", guid_str(guid));
+               return -1;
+       }
+       for (dl = ddf->dlist; dl; dl = dl->next)
+               for (i = 0; i < ddf->max_part; i++)
+                       if (dl->vlist[i] != NULL &&
+                           !memcmp(dl->vlist[i]->conf.guid, guid,
+                                   DDF_GUID_LEN))
+                               dl->vlist[i] = NULL;
+       memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
+       dprintf("deleted %s\n", guid_str(guid));
+       return 0;
+}
+
+static int kill_subarray_ddf(struct supertype *st)
+{
+       struct ddf_super *ddf = st->sb;
+       /*
+        *  currentconf is set in container_content_ddf,
+        *  called with subarray arg
+        */
+       struct vcl *victim = ddf->currentconf;
+       struct vd_config *conf;
+       unsigned int vdnum;
+
+       ddf->currentconf = NULL;
+       if (!victim) {
+               pr_err("nothing to kill\n");
+               return -1;
+       }
+       conf = &victim->conf;
+       vdnum = find_vde_by_guid(ddf, conf->guid);
+       if (vdnum == DDF_NOTFOUND) {
+               pr_err("could not find VD %s\n", guid_str(conf->guid));
+               return -1;
+       }
+       if (st->update_tail) {
+               struct virtual_disk *vd;
+               int len = sizeof(struct virtual_disk)
+                       + sizeof(struct virtual_entry);
+               vd = xmalloc(len);
+               if (vd == NULL) {
+                       pr_err("failed to allocate %d bytes\n", len);
+                       return -1;
+               }
+               memset(vd, 0 , len);
+               vd->magic = DDF_VIRT_RECORDS_MAGIC;
+               vd->populated_vdes = cpu_to_be16(0);
+               memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
+               /* we use DDF_state_deleted as marker */
+               vd->entries[0].state = DDF_state_deleted;
+               append_metadata_update(st, vd, len);
+       } else {
+               _kill_subarray_ddf(ddf, conf->guid);
+               ddf_set_updates_pending(ddf, NULL);
+               ddf_sync_metadata(st);
+       }
+       return 0;
+}
+
+static void copy_matching_bvd(struct ddf_super *ddf,
+                             struct vd_config *conf,
+                             const struct metadata_update *update)
+{
+       unsigned int mppe =
+               be16_to_cpu(ddf->anchor.max_primary_element_entries);
+       unsigned int len = ddf->conf_rec_len * 512;
+       char *p;
+       struct vd_config *vc;
+       for (p = update->buf; p < update->buf + update->len; p += len) {
+               vc = (struct vd_config *) p;
+               if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
+                       memcpy(conf->phys_refnum, vc->phys_refnum,
+                              mppe * (sizeof(__u32) + sizeof(__u64)));
+                       return;
+               }
+       }
+       pr_err("no match for BVD %d of %s in update\n",
+              conf->sec_elmnt_seq, guid_str(conf->guid));
+}
+
+static void ddf_process_phys_update(struct supertype *st,
+                                   struct metadata_update *update)
+{
+       struct ddf_super *ddf = st->sb;
+       struct phys_disk *pd;
+       unsigned int ent;
+
+       pd = (struct phys_disk*)update->buf;
+       ent = be16_to_cpu(pd->used_pdes);
+       if (ent >= be16_to_cpu(ddf->phys->max_pdes))
+               return;
+       if (be16_and(pd->entries[0].state, cpu_to_be16(DDF_Missing))) {
+               struct dl **dlp;
+               /* removing this disk. */
+               be16_set(ddf->phys->entries[ent].state,
+                        cpu_to_be16(DDF_Missing));
+               for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
+                       struct dl *dl = *dlp;
+                       if (dl->pdnum == (signed)ent) {
+                               close(dl->fd);
+                               dl->fd = -1;
+                               *dlp = dl->next;
+                               update->space = dl->devname;
+                               *(void**)dl = update->space_list;
+                               update->space_list = (void**)dl;
+                               break;
+                       }
+               }
+               ddf_set_updates_pending(ddf, NULL);
+               return;
+       }
+       if (!all_ff(ddf->phys->entries[ent].guid))
+               return;
+       ddf->phys->entries[ent] = pd->entries[0];
+       ddf->phys->used_pdes = cpu_to_be16
+               (1 + be16_to_cpu(ddf->phys->used_pdes));
+       ddf_set_updates_pending(ddf, NULL);
+       if (ddf->add_list) {
+               struct active_array *a;
+               struct dl *al = ddf->add_list;
+               ddf->add_list = al->next;
+
+               al->next = ddf->dlist;
+               ddf->dlist = al;
+
+               /* As a device has been added, we should check
+                * for any degraded devices that might make
+                * use of this spare */
+               for (a = st->arrays ; a; a=a->next)
+                       a->check_degraded = 1;
+       }
+}
+
+static void ddf_process_virt_update(struct supertype *st,
+                                   struct metadata_update *update)
+{
+       struct ddf_super *ddf = st->sb;
+       struct virtual_disk *vd;
+       unsigned int ent;
+
+       vd = (struct virtual_disk*)update->buf;
+
+       if (vd->entries[0].state == DDF_state_deleted) {
+               if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
+                       return;
+       } else {
+               ent = find_vde_by_guid(ddf, vd->entries[0].guid);
+               if (ent != DDF_NOTFOUND) {
+                       dprintf("VD %s exists already in slot %d\n",
+                               guid_str(vd->entries[0].guid),
+                               ent);
+                       return;
+               }
+               ent = find_unused_vde(ddf);
+               if (ent == DDF_NOTFOUND)
+                       return;
+               ddf->virt->entries[ent] = vd->entries[0];
+               ddf->virt->populated_vdes =
+                       cpu_to_be16(
+                               1 + be16_to_cpu(
+                                       ddf->virt->populated_vdes));
+               dprintf("added VD %s in slot %d(s=%02x i=%02x)\n",
+                       guid_str(vd->entries[0].guid), ent,
+                       ddf->virt->entries[ent].state,
+                       ddf->virt->entries[ent].init_state);
+       }
+       ddf_set_updates_pending(ddf, NULL);
+}
+
+static void ddf_remove_failed(struct ddf_super *ddf)
+{
+       /* Now remove any 'Failed' devices that are not part
+        * of any VD.  They will have the Transition flag set.
+        * Once done, we need to update all dl->pdnum numbers.
+        */
+       unsigned int pdnum;
+       unsigned int pd2 = 0;
+       struct dl *dl;
+
+       for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->max_pdes);
+            pdnum++) {
+               if (be32_to_cpu(ddf->phys->entries[pdnum].refnum) ==
+                   0xFFFFFFFF)
+                       continue;
+               if (be16_and(ddf->phys->entries[pdnum].state,
+                            cpu_to_be16(DDF_Failed))
+                   && be16_and(ddf->phys->entries[pdnum].state,
+                               cpu_to_be16(DDF_Transition))) {
+                       /* skip this one unless in dlist*/
+                       for (dl = ddf->dlist; dl; dl = dl->next)
+                               if (dl->pdnum == (int)pdnum)
+                                       break;
+                       if (!dl)
+                               continue;
+               }
+               if (pdnum == pd2)
+                       pd2++;
+               else {
+                       ddf->phys->entries[pd2] =
+                               ddf->phys->entries[pdnum];
+                       for (dl = ddf->dlist; dl; dl = dl->next)
+                               if (dl->pdnum == (int)pdnum)
+                                       dl->pdnum = pd2;
+                       pd2++;
+               }
+       }
+       ddf->phys->used_pdes = cpu_to_be16(pd2);
+       while (pd2 < pdnum) {
+               memset(ddf->phys->entries[pd2].guid, 0xff,
+                      DDF_GUID_LEN);
+               pd2++;
+       }
+}
+
+static void ddf_update_vlist(struct ddf_super *ddf, struct dl *dl)
+{
+       struct vcl *vcl;
+       unsigned int vn = 0;
+       int in_degraded = 0;
+
+       if (dl->pdnum < 0)
+               return;
+       for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
+               unsigned int dn, ibvd;
+               const struct vd_config *conf;
+               int vstate;
+               dn = get_pd_index_from_refnum(vcl,
+                                             dl->disk.refnum,
+                                             ddf->mppe,
+                                             &conf, &ibvd);
+               if (dn == DDF_NOTFOUND)
+                       continue;
+               dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
+                       dl->pdnum,
+                       be32_to_cpu(dl->disk.refnum),
+                       guid_str(conf->guid),
+                       conf->sec_elmnt_seq, vn);
+               /* Clear the Transition flag */
+               if (be16_and
+                   (ddf->phys->entries[dl->pdnum].state,
+                    cpu_to_be16(DDF_Failed)))
+                       be16_clear(ddf->phys
+                                  ->entries[dl->pdnum].state,
+                                  cpu_to_be16(DDF_Transition));
+               dl->vlist[vn++] = vcl;
+               vstate = ddf->virt->entries[vcl->vcnum].state
+                       & DDF_state_mask;
+               if (vstate == DDF_state_degraded ||
+                   vstate == DDF_state_part_optimal)
+                       in_degraded = 1;
+       }
+       while (vn < ddf->max_part)
+               dl->vlist[vn++] = NULL;
+       if (dl->vlist[0]) {
+               be16_clear(ddf->phys->entries[dl->pdnum].type,
+                          cpu_to_be16(DDF_Global_Spare));
+               if (!be16_and(ddf->phys
+                             ->entries[dl->pdnum].type,
+                             cpu_to_be16(DDF_Active_in_VD))) {
+                       be16_set(ddf->phys
+                                ->entries[dl->pdnum].type,
+                                cpu_to_be16(DDF_Active_in_VD));
+                       if (in_degraded)
+                               be16_set(ddf->phys
+                                        ->entries[dl->pdnum]
+                                        .state,
+                                        cpu_to_be16
+                                        (DDF_Rebuilding));
+               }
+       }
+       if (dl->spare) {
+               be16_clear(ddf->phys->entries[dl->pdnum].type,
+                          cpu_to_be16(DDF_Global_Spare));
+               be16_set(ddf->phys->entries[dl->pdnum].type,
+                        cpu_to_be16(DDF_Spare));
+       }
+       if (!dl->vlist[0] && !dl->spare) {
+               be16_set(ddf->phys->entries[dl->pdnum].type,
+                        cpu_to_be16(DDF_Global_Spare));
+               be16_clear(ddf->phys->entries[dl->pdnum].type,
+                          cpu_to_be16(DDF_Spare));
+               be16_clear(ddf->phys->entries[dl->pdnum].type,
+                          cpu_to_be16(DDF_Active_in_VD));
+       }
+}
+
+static void ddf_process_conf_update(struct supertype *st,
+                                   struct metadata_update *update)
+{
+       struct ddf_super *ddf = st->sb;
+       struct vd_config *vc;
+       struct vcl *vcl;
+       struct dl *dl;
+       unsigned int ent;
+       unsigned int pdnum, len;
+
+       vc = (struct vd_config*)update->buf;
+       len = ddf->conf_rec_len * 512;
+       if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
+               pr_err("%s: insufficient data (%d) for %u BVDs\n",
+                      guid_str(vc->guid), update->len,
+                      vc->sec_elmnt_count);
+               return;
+       }
+       for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+               if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
+                       break;
+       dprintf("conf update for %s (%s)\n",
+               guid_str(vc->guid), (vcl ? "old" : "new"));
+       if (vcl) {
+               /* An update, just copy the phys_refnum and lba_offset
+                * fields
+                */
+               unsigned int i;
+               unsigned int k;
+               copy_matching_bvd(ddf, &vcl->conf, update);
+               for (k = 0; k < be16_to_cpu(vc->prim_elmnt_count); k++)
+                       dprintf("BVD %u has %08x at %llu\n", 0,
+                               be32_to_cpu(vcl->conf.phys_refnum[k]),
+                               be64_to_cpu(LBA_OFFSET(ddf,
+                                                      &vcl->conf)[k]));
+               for (i = 1; i < vc->sec_elmnt_count; i++) {
+                       copy_matching_bvd(ddf, vcl->other_bvds[i-1],
+                                         update);
+                       for (k = 0; k < be16_to_cpu(
+                                    vc->prim_elmnt_count); k++)
+                               dprintf("BVD %u has %08x at %llu\n", i,
+                                       be32_to_cpu
+                                       (vcl->other_bvds[i-1]->
+                                        phys_refnum[k]),
+                                       be64_to_cpu
+                                       (LBA_OFFSET
+                                        (ddf,
+                                         vcl->other_bvds[i-1])[k]));
+               }
+       } else {
+               /* A new VD_CONF */
+               unsigned int i;
+               if (!update->space)
+                       return;
+               vcl = update->space;
+               update->space = NULL;
+               vcl->next = ddf->conflist;
+               memcpy(&vcl->conf, vc, len);
+               ent = find_vde_by_guid(ddf, vc->guid);
+               if (ent == DDF_NOTFOUND)
+                       return;
+               vcl->vcnum = ent;
+               ddf->conflist = vcl;
+               for (i = 1; i < vc->sec_elmnt_count; i++)
+                       memcpy(vcl->other_bvds[i-1],
+                              update->buf + len * i, len);
+       }
+       /* Set DDF_Transition on all Failed devices - to help
+        * us detect those that are no longer in use
+        */
+       for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->max_pdes);
+            pdnum++)
+               if (be16_and(ddf->phys->entries[pdnum].state,
+                            cpu_to_be16(DDF_Failed)))
+                       be16_set(ddf->phys->entries[pdnum].state,
+                                cpu_to_be16(DDF_Transition));
+
+       /* Now make sure vlist is correct for each dl. */
+       for (dl = ddf->dlist; dl; dl = dl->next)
+               ddf_update_vlist(ddf, dl);
+       ddf_remove_failed(ddf);
+
+       ddf_set_updates_pending(ddf, vc);
+}
+
 static void ddf_process_update(struct supertype *st,
                               struct metadata_update *update)
 {
@@ -3306,8 +4839,8 @@ static void ddf_process_update(struct supertype *st,
         * our actions.
         * Possible update are:
         *  DDF_PHYS_RECORDS_MAGIC
-        *    Add a new physical device.  Changes to this record
-        *    only happen implicitly.
+        *    Add a new physical device or remove an old one.
+        *    Changes to this record only happen implicitly.
         *    used_pdes is the device number.
         *  DDF_VIRT_RECORDS_MAGIC
         *    Add a new VD.  Possibly also change the 'access' bits.
@@ -3328,217 +4861,99 @@ static void ddf_process_update(struct supertype *st,
         *    and offset.  This will also mark the spare as active with
         *    a spare-assignment record.
         */
-       struct ddf_super *ddf = st->sb;
-       __u32 *magic = (__u32*)update->buf;
-       struct phys_disk *pd;
-       struct virtual_disk *vd;
-       struct vd_config *vc;
-       struct vcl *vcl;
-       struct dl *dl;
-       unsigned int mppe;
-       unsigned int ent;
-       unsigned int pdnum, pd2;
+       be32 *magic = (be32 *)update->buf;
 
-       dprintf("Process update %x\n", *magic);
+       dprintf("Process update %x\n", be32_to_cpu(*magic));
 
-       switch (*magic) {
-       case DDF_PHYS_RECORDS_MAGIC:
-
-               if (update->len != (sizeof(struct phys_disk) +
+       if (be32_eq(*magic, DDF_PHYS_RECORDS_MAGIC)) {
+               if (update->len == (sizeof(struct phys_disk) +
                                    sizeof(struct phys_disk_entry)))
-                       return;
-               pd = (struct phys_disk*)update->buf;
-
-               ent = __be16_to_cpu(pd->used_pdes);
-               if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
-                       return;
-               if (!all_ff(ddf->phys->entries[ent].guid))
-                       return;
-               ddf->phys->entries[ent] = pd->entries[0];
-               ddf->phys->used_pdes = __cpu_to_be16(1 +
-                                          __be16_to_cpu(ddf->phys->used_pdes));
-               ddf->updates_pending = 1;
-               if (ddf->add_list) {
-                       struct active_array *a;
-                       struct dl *al = ddf->add_list;
-                       ddf->add_list = al->next;
-
-                       al->next = ddf->dlist;
-                       ddf->dlist = al;
-
-                       /* As a device has been added, we should check
-                        * for any degraded devices that might make
-                        * use of this spare */
-                       for (a = st->arrays ; a; a=a->next)
-                               a->check_degraded = 1;
-               }
-               break;
-
-       case DDF_VIRT_RECORDS_MAGIC:
-
-               if (update->len != (sizeof(struct virtual_disk) +
+                       ddf_process_phys_update(st, update);
+       } else if (be32_eq(*magic, DDF_VIRT_RECORDS_MAGIC)) {
+               if (update->len == (sizeof(struct virtual_disk) +
                                    sizeof(struct virtual_entry)))
-                       return;
-               vd = (struct virtual_disk*)update->buf;
-
-               ent = __be16_to_cpu(vd->populated_vdes);
-               if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
-                       return;
-               if (!all_ff(ddf->virt->entries[ent].guid))
-                       return;
-               ddf->virt->entries[ent] = vd->entries[0];
-               ddf->virt->populated_vdes = __cpu_to_be16(1 +
-                             __be16_to_cpu(ddf->virt->populated_vdes));
-               ddf->updates_pending = 1;
-               break;
-
-       case DDF_VD_CONF_MAGIC:
-               dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
-
-               mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
-               if ((unsigned)update->len != ddf->conf_rec_len * 512)
-                       return;
-               vc = (struct vd_config*)update->buf;
-               for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
-                       if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
-                               break;
-               dprintf("vcl = %p\n", vcl);
-               if (vcl) {
-                       /* An update, just copy the phys_refnum and lba_offset
-                        * fields
-                        */
-                       memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
-                              mppe * (sizeof(__u32) + sizeof(__u64)));
-               } else {
-                       /* A new VD_CONF */
-                       if (!update->space)
-                               return;
-                       vcl = update->space;
-                       update->space = NULL;
-                       vcl->next = ddf->conflist;
-                       memcpy(&vcl->conf, vc, update->len);
-                       vcl->lba_offset = (__u64*)
-                               &vcl->conf.phys_refnum[mppe];
-                       for (ent = 0;
-                            ent < __be16_to_cpu(ddf->virt->populated_vdes);
-                            ent++)
-                               if (memcmp(vc->guid, ddf->virt->entries[ent].guid,
-                                          DDF_GUID_LEN) == 0) {
-                                       vcl->vcnum = ent;
-                                       break;
-                               }
-                       ddf->conflist = vcl;
-               }
-               /* Set DDF_Transition on all Failed devices - to help
-                * us detect those that are no longer in use
-                */
-               for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
-                       if (ddf->phys->entries[pdnum].state
-                           & __be16_to_cpu(DDF_Failed))
-                               ddf->phys->entries[pdnum].state
-                                       |= __be16_to_cpu(DDF_Transition);
-               /* Now make sure vlist is correct for each dl. */
-               for (dl = ddf->dlist; dl; dl = dl->next) {
-                       unsigned int dn;
-                       unsigned int vn = 0;
-                       int in_degraded = 0;
-                       for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
-                               for (dn=0; dn < ddf->mppe ; dn++)
-                                       if (vcl->conf.phys_refnum[dn] ==
-                                           dl->disk.refnum) {
-                                               int vstate;
-                                               dprintf("dev %d has %p at %d\n",
-                                                       dl->pdnum, vcl, vn);
-                                               /* Clear the Transition flag */
-                                               if (ddf->phys->entries[dl->pdnum].state
-                                                   & __be16_to_cpu(DDF_Failed))
-                                                       ddf->phys->entries[dl->pdnum].state &=
-                                                               ~__be16_to_cpu(DDF_Transition);
-
-                                               dl->vlist[vn++] = vcl;
-                                               vstate = ddf->virt->entries[vcl->vcnum].state
-                                                       & DDF_state_mask;
-                                               if (vstate == DDF_state_degraded ||
-                                                   vstate == DDF_state_part_optimal)
-                                                       in_degraded = 1;
-                                               break;
-                                       }
-                       while (vn < ddf->max_part)
-                               dl->vlist[vn++] = NULL;
-                       if (dl->vlist[0]) {
-                               ddf->phys->entries[dl->pdnum].type &=
-                                       ~__cpu_to_be16(DDF_Global_Spare);
-                               if (!(ddf->phys->entries[dl->pdnum].type &
-                                     __cpu_to_be16(DDF_Active_in_VD))) {
-                                           ddf->phys->entries[dl->pdnum].type |=
-                                                   __cpu_to_be16(DDF_Active_in_VD);
-                                           if (in_degraded)
-                                                   ddf->phys->entries[dl->pdnum].state |=
-                                                           __cpu_to_be16(DDF_Rebuilding);
-                                   }
-                       }
-                       if (dl->spare) {
-                               ddf->phys->entries[dl->pdnum].type &=
-                                       ~__cpu_to_be16(DDF_Global_Spare);
-                               ddf->phys->entries[dl->pdnum].type |=
-                                       __cpu_to_be16(DDF_Spare);
-                       }
-                       if (!dl->vlist[0] && !dl->spare) {
-                               ddf->phys->entries[dl->pdnum].type |=
-                                       __cpu_to_be16(DDF_Global_Spare);
-                               ddf->phys->entries[dl->pdnum].type &=
-                                       ~__cpu_to_be16(DDF_Spare |
-                                                      DDF_Active_in_VD);
-                       }
-               }
-
-               /* Now remove any 'Failed' devices that are not part
-                * of any VD.  They will have the Transition flag set.
-                * Once done, we need to update all dl->pdnum numbers.
-                */
-               pd2 = 0;
-               for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
-                       if ((ddf->phys->entries[pdnum].state
-                            & __be16_to_cpu(DDF_Failed))
-                           && (ddf->phys->entries[pdnum].state
-                               & __be16_to_cpu(DDF_Transition)))
-                               /* skip this one */;
-                       else if (pdnum == pd2)
-                               pd2++;
-                       else {
-                               ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
-                               for (dl = ddf->dlist; dl; dl = dl->next)
-                                       if (dl->pdnum == (int)pdnum)
-                                               dl->pdnum = pd2;
-                               pd2++;
-                       }
-               ddf->phys->used_pdes = __cpu_to_be16(pd2);
-               while (pd2 < pdnum) {
-                       memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
-                       pd2++;
-               }
-
-               ddf->updates_pending = 1;
-               break;
-       case DDF_SPARE_ASSIGN_MAGIC:
-       default: break;
+                       ddf_process_virt_update(st, update);
+       } else if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
+               ddf_process_conf_update(st, update);
        }
+       /* case DDF_SPARE_ASSIGN_MAGIC */
 }
 
-static void ddf_prepare_update(struct supertype *st,
-                              struct metadata_update *update)
+static int ddf_prepare_update(struct supertype *st,
+                             struct metadata_update *update)
 {
        /* This update arrived at managemon.
         * We are about to pass it to monitor.
         * If a malloc is needed, do it here.
         */
        struct ddf_super *ddf = st->sb;
-       __u32 *magic = (__u32*)update->buf;
-       if (*magic == DDF_VD_CONF_MAGIC)
+       be32 *magic;
+       if (update->len < 4)
+               return 0;
+       magic = (be32 *)update->buf;
+       if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
+               struct vcl *vcl;
+               struct vd_config *conf;
+               if (update->len < (int)sizeof(*conf))
+                       return 0;
+               conf = (struct vd_config *) update->buf;
                if (posix_memalign(&update->space, 512,
-                              offsetof(struct vcl, conf)
-                              + ddf->conf_rec_len * 512) != 0)
+                                  offsetof(struct vcl, conf)
+                                  + ddf->conf_rec_len * 512) != 0) {
                        update->space = NULL;
+                       return 0;
+               }
+               vcl = update->space;
+               vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
+               if (alloc_other_bvds(ddf, vcl) != 0) {
+                       free(update->space);
+                       update->space = NULL;
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+/*
+ * Check degraded state of a RAID10.
+ * returns 2 for good, 1 for degraded, 0 for failed, and -1 for error
+ */
+static int raid10_degraded(struct mdinfo *info)
+{
+       int n_prim, n_bvds;
+       int i;
+       struct mdinfo *d;
+       char *found;
+       int ret = -1;
+
+       n_prim = info->array.layout & ~0x100;
+       n_bvds = info->array.raid_disks / n_prim;
+       found = xmalloc(n_bvds);
+       if (found == NULL)
+               return ret;
+       memset(found, 0, n_bvds);
+       for (d = info->devs; d; d = d->next) {
+               i = d->disk.raid_disk / n_prim;
+               if (i >= n_bvds) {
+                       pr_err("BUG: invalid raid disk\n");
+                       goto out;
+               }
+               if (d->state_fd > 0)
+                       found[i]++;
+       }
+       ret = 2;
+       for (i = 0; i < n_bvds; i++)
+               if (!found[i]) {
+                       dprintf("BVD %d/%d failed\n", i, n_bvds);
+                       ret = 0;
+                       goto out;
+               } else if (found[i] < n_prim) {
+                       dprintf("BVD %d/%d degraded\n", i, n_bvds);
+                       ret = 1;
+               }
+out:
+       free(found);
+       return ret;
 }
 
 /*
@@ -3547,7 +4962,7 @@ static void ddf_prepare_update(struct supertype *st,
  * arrange for their inclusion.
  * We only choose devices which are not already in the array,
  * and prefer those with a spare-assignment to this array.
- * otherwise we choose global spares - assuming always that
+ * Otherwise we choose global spares - assuming always that
  * there is enough room.
  * For each spare that we assign, we return an 'mdinfo' which
  * describes the position for the device in the array.
@@ -3568,19 +4983,22 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
        struct metadata_update *mu;
        struct dl *dl;
        int i;
+       unsigned int j;
+       struct vcl *vcl;
        struct vd_config *vc;
-       __u64 *lba;
+       unsigned int n_bvd;
 
        for (d = a->info.devs ; d ; d = d->next) {
                if ((d->curr_state & DS_FAULTY) &&
-                       d->state_fd >= 0)
+                   d->state_fd >= 0)
                        /* wait for Removal to happen */
                        return NULL;
                if (d->state_fd >= 0)
                        working ++;
        }
 
-       dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
+       dprintf("working=%d (%d) level=%d\n", working,
+               a->info.array.raid_disks,
                a->info.array.level);
        if (working == a->info.array.raid_disks)
                return NULL; /* array not degraded */
@@ -3598,6 +5016,10 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                if (working < a->info.array.raid_disks - 2)
                        return NULL; /* failed */
                break;
+       case 10:
+               if (raid10_degraded(&a->info) < 1)
+                       return NULL;
+               break;
        default: /* concat or stripe */
                return NULL; /* failed */
        }
@@ -3620,26 +5042,40 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                        struct mdinfo *d2;
                        int is_global = 0;
                        int is_dedicated = 0;
-                       struct extent *ex;
-                       unsigned int j;
+                       be16 state;
+
+                       if (dl->pdnum < 0)
+                               continue;
+                       state = ddf->phys->entries[dl->pdnum].state;
+                       if (be16_and(state,
+                                    cpu_to_be16(DDF_Failed|DDF_Missing)) ||
+                           !be16_and(state,
+                                     cpu_to_be16(DDF_Online)))
+                               continue;
+
                        /* If in this array, skip */
                        for (d2 = a->info.devs ; d2 ; d2 = d2->next)
                                if (d2->state_fd >= 0 &&
                                    d2->disk.major == dl->major &&
                                    d2->disk.minor == dl->minor) {
-                                       dprintf("%x:%x already in array\n", dl->major, dl->minor);
+                                       dprintf("%x:%x (%08x) already in array\n",
+                                               dl->major, dl->minor,
+                                               be32_to_cpu(dl->disk.refnum));
                                        break;
                                }
                        if (d2)
                                continue;
-                       if (ddf->phys->entries[dl->pdnum].type &
-                           __cpu_to_be16(DDF_Spare)) {
+                       if (be16_and(ddf->phys->entries[dl->pdnum].type,
+                                    cpu_to_be16(DDF_Spare))) {
                                /* Check spare assign record */
                                if (dl->spare) {
                                        if (dl->spare->type & DDF_spare_dedicated) {
                                                /* check spare_ents for guid */
+                                               unsigned int j;
                                                for (j = 0 ;
-                                                    j < __be16_to_cpu(dl->spare->populated);
+                                                    j < be16_to_cpu
+                                                            (dl->spare
+                                                             ->populated);
                                                     j++) {
                                                        if (memcmp(dl->spare->spare_ents[j].guid,
                                                                   ddf->virt->entries[a->info.container_member].guid,
@@ -3649,36 +5085,27 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                                        } else
                                                is_global = 1;
                                }
-                       } else if (ddf->phys->entries[dl->pdnum].type &
-                                  __cpu_to_be16(DDF_Global_Spare)) {
+                       } else if (be16_and(ddf->phys->entries[dl->pdnum].type,
+                                           cpu_to_be16(DDF_Global_Spare))) {
+                               is_global = 1;
+                       } else if (!be16_and(ddf->phys
+                                            ->entries[dl->pdnum].state,
+                                            cpu_to_be16(DDF_Failed))) {
+                               /* we can possibly use some of this */
                                is_global = 1;
                        }
                        if ( ! (is_dedicated ||
                                (is_global && global_ok))) {
                                dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
-                                      is_dedicated, is_global);
+                                       is_dedicated, is_global);
                                continue;
                        }
 
                        /* We are allowed to use this device - is there space?
                         * We need a->info.component_size sectors */
-                       ex = get_extents(ddf, dl);
-                       if (!ex) {
-                               dprintf("cannot get extents\n");
-                               continue;
-                       }
-                       j = 0; pos = 0;
-                       esize = 0;
-
-                       do {
-                               esize = ex[j].start - pos;
-                               if (esize >= a->info.component_size)
-                                       break;
-                               pos = ex[j].start + ex[j].size;
-                               j++;
-                       } while (ex[j-1].size);
+                       esize = a->info.component_size;
+                       pos = find_space(ddf, dl, INVALID_SECTORS, &esize);
 
-                       free(ex);
                        if (esize < a->info.component_size) {
                                dprintf("%x:%x has no room: %llu %llu\n",
                                        dl->major, dl->minor,
@@ -3688,10 +5115,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                        }
 
                        /* Cool, we have a device with some space at pos */
-                       di = malloc(sizeof(*di));
-                       if (!di)
-                               continue;
-                       memset(di, 0, sizeof(*di));
+                       di = xcalloc(1, sizeof(*di));
                        di->disk.number = i;
                        di->disk.raid_disk = i;
                        di->disk.major = dl->major;
@@ -3700,11 +5124,11 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                        di->recovery_start = 0;
                        di->data_offset = pos;
                        di->component_size = a->info.component_size;
-                       di->container_member = dl->pdnum;
                        di->next = rv;
                        rv = di;
-                       dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
-                               i, pos);
+                       dprintf("%x:%x (%08x) to be %d at %llu\n",
+                               dl->major, dl->minor,
+                               be32_to_cpu(dl->disk.refnum), i, pos);
 
                        break;
                }
@@ -3723,34 +5147,51 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
         * Create a metadata_update record to update the
         * phys_refnum and lba_offset values
         */
-       mu = malloc(sizeof(*mu));
-       if (mu && posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
+       vc = find_vdcr(ddf, a->info.container_member, rv->disk.raid_disk,
+                      &n_bvd, &vcl);
+       if (vc == NULL)
+               return NULL;
+
+       mu = xmalloc(sizeof(*mu));
+       if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
                free(mu);
                mu = NULL;
        }
-       if (!mu) {
-               while (rv) {
-                       struct mdinfo *n = rv->next;
 
-                       free(rv);
-                       rv = n;
-               }
-               return NULL;
-       }
-               
-       mu->buf = malloc(ddf->conf_rec_len * 512);
-       mu->len = ddf->conf_rec_len * 512;
+       mu->len = ddf->conf_rec_len * 512 * vcl->conf.sec_elmnt_count;
+       mu->buf = xmalloc(mu->len);
        mu->space = NULL;
+       mu->space_list = NULL;
        mu->next = *updates;
-       vc = find_vdcr(ddf, a->info.container_member);
-       memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
+       memcpy(mu->buf, &vcl->conf, ddf->conf_rec_len * 512);
+       for (j = 1; j < vcl->conf.sec_elmnt_count; j++)
+               memcpy(mu->buf + j * ddf->conf_rec_len * 512,
+                      vcl->other_bvds[j-1], ddf->conf_rec_len * 512);
 
        vc = (struct vd_config*)mu->buf;
-       lba = (__u64*)&vc->phys_refnum[ddf->mppe];
        for (di = rv ; di ; di = di->next) {
-               vc->phys_refnum[di->disk.raid_disk] =
-                       ddf->phys->entries[dl->pdnum].refnum;
-               lba[di->disk.raid_disk] = di->data_offset;
+               unsigned int i_sec, i_prim;
+               i_sec = di->disk.raid_disk
+                       / be16_to_cpu(vcl->conf.prim_elmnt_count);
+               i_prim = di->disk.raid_disk
+                       % be16_to_cpu(vcl->conf.prim_elmnt_count);
+               vc = (struct vd_config *)(mu->buf
+                                         + i_sec * ddf->conf_rec_len * 512);
+               for (dl = ddf->dlist; dl; dl = dl->next)
+                       if (dl->major == di->disk.major
+                           && dl->minor == di->disk.minor)
+                               break;
+               if (!dl || dl->pdnum < 0) {
+                       pr_err("BUG: can't find disk %d (%d/%d)\n",
+                              di->disk.raid_disk,
+                              di->disk.major, di->disk.minor);
+                       return NULL;
+               }
+               vc->phys_refnum[i_prim] = ddf->phys->entries[dl->pdnum].refnum;
+               LBA_OFFSET(ddf, vc)[i_prim] = cpu_to_be64(di->data_offset);
+               dprintf("BVD %u gets %u: %08x at %llu\n", i_sec, i_prim,
+                       be32_to_cpu(vc->phys_refnum[i_prim]),
+                       be64_to_cpu(LBA_OFFSET(ddf, vc)[i_prim]));
        }
        *updates = mu;
        return rv;
@@ -3774,6 +5215,15 @@ static int ddf_level_to_layout(int level)
        }
 }
 
+static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
+{
+       if (level && *level == UnSet)
+               *level = LEVEL_CONTAINER;
+
+       if (level && layout && *layout == UnSet)
+               *layout = ddf_level_to_layout(*level);
+}
+
 struct superswitch super_ddf = {
 #ifndef        MDASSEMBLE
        .examine_super  = examine_super_ddf,
@@ -3785,6 +5235,10 @@ struct superswitch super_ddf = {
        .validate_geometry = validate_geometry_ddf,
        .write_init_super = write_init_super_ddf,
        .add_to_super   = add_to_super_ddf,
+       .remove_from_super = remove_from_super_ddf,
+       .load_container = load_container_ddf,
+       .copy_metadata = copy_metadata_ddf,
+       .kill_subarray  = kill_subarray_ddf,
 #endif
        .match_home     = match_home_ddf,
        .uuid_from_super= uuid_from_super_ddf,
@@ -3801,7 +5255,7 @@ struct superswitch super_ddf = {
        .free_super     = free_super_ddf,
        .match_metadata_desc = match_metadata_desc_ddf,
        .container_content = container_content_ddf,
-       .default_layout = ddf_level_to_layout,
+       .default_geometry = default_geometry_ddf,
 
        .external       = 1,