Consistently print program Name and __func__ in debug messages.
[thirdparty/mdadm.git] / super-ddf.c
1 /*
2  * mdadm - manage Linux "md" devices aka RAID arrays.
3  *
4  * Copyright (C) 2006-2014 Neil Brown <neilb@suse.de>
5  *
6  *
7  *    This program is free software; you can redistribute it and/or modify
8  *    it under the terms of the GNU General Public License as published by
9  *    the Free Software Foundation; either version 2 of the License, or
10  *    (at your option) any later version.
11  *
12  *    This program is distributed in the hope that it will be useful,
13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *    GNU General Public License for more details.
16  *
17  *    You should have received a copy of the GNU General Public License
18  *    along with this program; if not, write to the Free Software
19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  *    Author: Neil Brown
22  *    Email: <neil@brown.name>
23  *
24  * Specifications for DDF taken from Common RAID DDF Specification Revision 1.2
25  * (July 28 2006).  Reused by permission of SNIA.
26  */
27
28 #define HAVE_STDINT_H 1
29 #include "mdadm.h"
30 #include "mdmon.h"
31 #include "sha1.h"
32 #include <values.h>
33 #include <stddef.h>
34
35 /* a non-official T10 name for creation GUIDs */
36 static char T10[] = "Linux-MD";
37
38 /* DDF timestamps are 1980 based, so we need to add
39  * second-in-decade-of-seventies to convert to linux timestamps.
40  * 10 years with 2 leap years.
41  */
42 #define DECADE (3600*24*(365*10+2))
43 unsigned long crc32(
44         unsigned long crc,
45         const unsigned char *buf,
46         unsigned len);
47
48 #define DDF_NOTFOUND (~0U)
49 #define DDF_CONTAINER (DDF_NOTFOUND-1)
50
51 /* Default for safe_mode_delay. Same value as for IMSM.
52  */
53 static const int DDF_SAFE_MODE_DELAY = 4000;
54
55 /* The DDF metadata handling.
56  * DDF metadata lives at the end of the device.
57  * The last 512 byte block provides an 'anchor' which is used to locate
58  * the rest of the metadata which usually lives immediately behind the anchor.
59  *
60  * Note:
61  *  - all multibyte numeric fields are bigendian.
62  *  - all strings are space padded.
63  *
64  */
65
66 typedef struct __be16 {
67         __u16 _v16;
68 } be16;
69 #define be16_eq(x, y) ((x)._v16 == (y)._v16)
70 #define be16_and(x, y) ((x)._v16 & (y)._v16)
71 #define be16_or(x, y) ((x)._v16 | (y)._v16)
72 #define be16_clear(x, y) ((x)._v16 &= ~(y)._v16)
73 #define be16_set(x, y) ((x)._v16 |= (y)._v16)
74
75 typedef struct __be32 {
76         __u32 _v32;
77 } be32;
78 #define be32_eq(x, y) ((x)._v32 == (y)._v32)
79
80 typedef struct __be64 {
81         __u64 _v64;
82 } be64;
83 #define be64_eq(x, y) ((x)._v64 == (y)._v64)
84
85 #define be16_to_cpu(be) __be16_to_cpu((be)._v16)
86 static inline be16 cpu_to_be16(__u16 x)
87 {
88         be16 be = { ._v16 = __cpu_to_be16(x) };
89         return be;
90 }
91
92 #define be32_to_cpu(be) __be32_to_cpu((be)._v32)
93 static inline be32 cpu_to_be32(__u32 x)
94 {
95         be32 be = { ._v32 = __cpu_to_be32(x) };
96         return be;
97 }
98
99 #define be64_to_cpu(be) __be64_to_cpu((be)._v64)
100 static inline be64 cpu_to_be64(__u64 x)
101 {
102         be64 be = { ._v64 = __cpu_to_be64(x) };
103         return be;
104 }
105
106 /* Primary Raid Level (PRL) */
107 #define DDF_RAID0       0x00
108 #define DDF_RAID1       0x01
109 #define DDF_RAID3       0x03
110 #define DDF_RAID4       0x04
111 #define DDF_RAID5       0x05
112 #define DDF_RAID1E      0x11
113 #define DDF_JBOD        0x0f
114 #define DDF_CONCAT      0x1f
115 #define DDF_RAID5E      0x15
116 #define DDF_RAID5EE     0x25
117 #define DDF_RAID6       0x06
118
119 /* Raid Level Qualifier (RLQ) */
120 #define DDF_RAID0_SIMPLE        0x00
121 #define DDF_RAID1_SIMPLE        0x00 /* just 2 devices in this plex */
122 #define DDF_RAID1_MULTI         0x01 /* exactly 3 devices in this plex */
123 #define DDF_RAID3_0             0x00 /* parity in first extent */
124 #define DDF_RAID3_N             0x01 /* parity in last extent */
125 #define DDF_RAID4_0             0x00 /* parity in first extent */
126 #define DDF_RAID4_N             0x01 /* parity in last extent */
127 /* these apply to raid5e and raid5ee as well */
128 #define DDF_RAID5_0_RESTART     0x00 /* same as 'right asymmetric' - layout 1 */
129 #define DDF_RAID6_0_RESTART     0x01 /* raid6 different from raid5 here!!! */
130 #define DDF_RAID5_N_RESTART     0x02 /* same as 'left asymmetric' - layout 0 */
131 #define DDF_RAID5_N_CONTINUE    0x03 /* same as 'left symmetric' - layout 2 */
132
133 #define DDF_RAID1E_ADJACENT     0x00 /* raid10 nearcopies==2 */
134 #define DDF_RAID1E_OFFSET       0x01 /* raid10 offsetcopies==2 */
135
136 /* Secondary RAID Level (SRL) */
137 #define DDF_2STRIPED    0x00    /* This is weirder than RAID0 !! */
138 #define DDF_2MIRRORED   0x01
139 #define DDF_2CONCAT     0x02
140 #define DDF_2SPANNED    0x03    /* This is also weird - be careful */
141
142 /* Magic numbers */
143 #define DDF_HEADER_MAGIC        cpu_to_be32(0xDE11DE11)
144 #define DDF_CONTROLLER_MAGIC    cpu_to_be32(0xAD111111)
145 #define DDF_PHYS_RECORDS_MAGIC  cpu_to_be32(0x22222222)
146 #define DDF_PHYS_DATA_MAGIC     cpu_to_be32(0x33333333)
147 #define DDF_VIRT_RECORDS_MAGIC  cpu_to_be32(0xDDDDDDDD)
148 #define DDF_VD_CONF_MAGIC       cpu_to_be32(0xEEEEEEEE)
149 #define DDF_SPARE_ASSIGN_MAGIC  cpu_to_be32(0x55555555)
150 #define DDF_VU_CONF_MAGIC       cpu_to_be32(0x88888888)
151 #define DDF_VENDOR_LOG_MAGIC    cpu_to_be32(0x01dBEEF0)
152 #define DDF_BBM_LOG_MAGIC       cpu_to_be32(0xABADB10C)
153
154 #define DDF_GUID_LEN    24
155 #define DDF_REVISION_0  "01.00.00"
156 #define DDF_REVISION_2  "01.02.00"
157
158 struct ddf_header {
159         be32    magic;          /* DDF_HEADER_MAGIC */
160         be32    crc;
161         char    guid[DDF_GUID_LEN];
162         char    revision[8];    /* 01.02.00 */
163         be32    seq;            /* starts at '1' */
164         be32    timestamp;
165         __u8    openflag;
166         __u8    foreignflag;
167         __u8    enforcegroups;
168         __u8    pad0;           /* 0xff */
169         __u8    pad1[12];       /* 12 * 0xff */
170         /* 64 bytes so far */
171         __u8    header_ext[32]; /* reserved: fill with 0xff */
172         be64    primary_lba;
173         be64    secondary_lba;
174         __u8    type;
175         __u8    pad2[3];        /* 0xff */
176         be32    workspace_len;  /* sectors for vendor space -
177                                  * at least 32768(sectors) */
178         be64    workspace_lba;
179         be16    max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
180         be16    max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
181         be16    max_partitions; /* i.e. max num of configuration
182                                    record entries per disk */
183         be16    config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
184                                                  *12/512) */
185         be16    max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
186         __u8    pad3[54];       /* 0xff */
187         /* 192 bytes so far */
188         be32    controller_section_offset;
189         be32    controller_section_length;
190         be32    phys_section_offset;
191         be32    phys_section_length;
192         be32    virt_section_offset;
193         be32    virt_section_length;
194         be32    config_section_offset;
195         be32    config_section_length;
196         be32    data_section_offset;
197         be32    data_section_length;
198         be32    bbm_section_offset;
199         be32    bbm_section_length;
200         be32    diag_space_offset;
201         be32    diag_space_length;
202         be32    vendor_offset;
203         be32    vendor_length;
204         /* 256 bytes so far */
205         __u8    pad4[256];      /* 0xff */
206 };
207
208 /* type field */
209 #define DDF_HEADER_ANCHOR       0x00
210 #define DDF_HEADER_PRIMARY      0x01
211 #define DDF_HEADER_SECONDARY    0x02
212
213 /* The content of the 'controller section' - global scope */
214 struct ddf_controller_data {
215         be32    magic;                  /* DDF_CONTROLLER_MAGIC */
216         be32    crc;
217         char    guid[DDF_GUID_LEN];
218         struct controller_type {
219                 be16 vendor_id;
220                 be16 device_id;
221                 be16 sub_vendor_id;
222                 be16 sub_device_id;
223         } type;
224         char    product_id[16];
225         __u8    pad[8]; /* 0xff */
226         __u8    vendor_data[448];
227 };
228
229 /* The content of phys_section - global scope */
230 struct phys_disk {
231         be32    magic;          /* DDF_PHYS_RECORDS_MAGIC */
232         be32    crc;
233         be16    used_pdes;      /* This is a counter, not a max - the list
234                                  * of used entries may not be dense */
235         be16    max_pdes;
236         __u8    pad[52];
237         struct phys_disk_entry {
238                 char    guid[DDF_GUID_LEN];
239                 be32    refnum;
240                 be16    type;
241                 be16    state;
242                 be64    config_size;    /* DDF structures must be after here */
243                 char    path[18];       /* Another horrible structure really
244                                          * but is "used for information
245                                          * purposes only" */
246                 __u8    pad[6];
247         } entries[0];
248 };
249
250 /* phys_disk_entry.type is a bitmap - bigendian remember */
251 #define DDF_Forced_PD_GUID              1
252 #define DDF_Active_in_VD                2
253 #define DDF_Global_Spare                4 /* VD_CONF records are ignored */
254 #define DDF_Spare                       8 /* overrides Global_spare */
255 #define DDF_Foreign                     16
256 #define DDF_Legacy                      32 /* no DDF on this device */
257
258 #define DDF_Interface_mask              0xf00
259 #define DDF_Interface_SCSI              0x100
260 #define DDF_Interface_SAS               0x200
261 #define DDF_Interface_SATA              0x300
262 #define DDF_Interface_FC                0x400
263
264 /* phys_disk_entry.state is a bigendian bitmap */
265 #define DDF_Online                      1
266 #define DDF_Failed                      2 /* overrides  1,4,8 */
267 #define DDF_Rebuilding                  4
268 #define DDF_Transition                  8
269 #define DDF_SMART                       16
270 #define DDF_ReadErrors                  32
271 #define DDF_Missing                     64
272
273 /* The content of the virt_section global scope */
274 struct virtual_disk {
275         be32    magic;          /* DDF_VIRT_RECORDS_MAGIC */
276         be32    crc;
277         be16    populated_vdes;
278         be16    max_vdes;
279         __u8    pad[52];
280         struct virtual_entry {
281                 char    guid[DDF_GUID_LEN];
282                 be16    unit;
283                 __u16   pad0;   /* 0xffff */
284                 be16    guid_crc;
285                 be16    type;
286                 __u8    state;
287                 __u8    init_state;
288                 __u8    pad1[14];
289                 char    name[16];
290         } entries[0];
291 };
292
293 /* virtual_entry.type is a bitmap - bigendian */
294 #define DDF_Shared              1
295 #define DDF_Enforce_Groups      2
296 #define DDF_Unicode             4
297 #define DDF_Owner_Valid         8
298
299 /* virtual_entry.state is a bigendian bitmap */
300 #define DDF_state_mask          0x7
301 #define DDF_state_optimal       0x0
302 #define DDF_state_degraded      0x1
303 #define DDF_state_deleted       0x2
304 #define DDF_state_missing       0x3
305 #define DDF_state_failed        0x4
306 #define DDF_state_part_optimal  0x5
307
308 #define DDF_state_morphing      0x8
309 #define DDF_state_inconsistent  0x10
310
311 /* virtual_entry.init_state is a bigendian bitmap */
312 #define DDF_initstate_mask      0x03
313 #define DDF_init_not            0x00
314 #define DDF_init_quick          0x01 /* initialisation is progress.
315                                       * i.e. 'state_inconsistent' */
316 #define DDF_init_full           0x02
317
318 #define DDF_access_mask         0xc0
319 #define DDF_access_rw           0x00
320 #define DDF_access_ro           0x80
321 #define DDF_access_blocked      0xc0
322
323 /* The content of the config_section - local scope
324  * It has multiple records each config_record_len sectors
325  * They can be vd_config or spare_assign
326  */
327
328 struct vd_config {
329         be32    magic;          /* DDF_VD_CONF_MAGIC */
330         be32    crc;
331         char    guid[DDF_GUID_LEN];
332         be32    timestamp;
333         be32    seqnum;
334         __u8    pad0[24];
335         be16    prim_elmnt_count;
336         __u8    chunk_shift;    /* 0 == 512, 1==1024 etc */
337         __u8    prl;
338         __u8    rlq;
339         __u8    sec_elmnt_count;
340         __u8    sec_elmnt_seq;
341         __u8    srl;
342         be64    blocks;         /* blocks per component could be different
343                                  * on different component devices...(only
344                                  * for concat I hope) */
345         be64    array_blocks;   /* blocks in array */
346         __u8    pad1[8];
347         be32    spare_refs[8];  /* This is used to detect missing spares.
348                                  * As we don't have an interface for that
349                                  * the values are ignored.
350                                  */
351         __u8    cache_pol[8];
352         __u8    bg_rate;
353         __u8    pad2[3];
354         __u8    pad3[52];
355         __u8    pad4[192];
356         __u8    v0[32]; /* reserved- 0xff */
357         __u8    v1[32]; /* reserved- 0xff */
358         __u8    v2[16]; /* reserved- 0xff */
359         __u8    v3[16]; /* reserved- 0xff */
360         __u8    vendor[32];
361         be32    phys_refnum[0]; /* refnum of each disk in sequence */
362       /*__u64   lba_offset[0];  LBA offset in each phys.  Note extents in a
363                                 bvd are always the same size */
364 };
365 #define LBA_OFFSET(ddf, vd) ((be64 *) &(vd)->phys_refnum[(ddf)->mppe])
366
367 /* vd_config.cache_pol[7] is a bitmap */
368 #define DDF_cache_writeback     1       /* else writethrough */
369 #define DDF_cache_wadaptive     2       /* only applies if writeback */
370 #define DDF_cache_readahead     4
371 #define DDF_cache_radaptive     8       /* only if doing read-ahead */
372 #define DDF_cache_ifnobatt      16      /* even to write cache if battery is poor */
373 #define DDF_cache_wallowed      32      /* enable write caching */
374 #define DDF_cache_rallowed      64      /* enable read caching */
375
376 struct spare_assign {
377         be32    magic;          /* DDF_SPARE_ASSIGN_MAGIC */
378         be32    crc;
379         be32    timestamp;
380         __u8    reserved[7];
381         __u8    type;
382         be16    populated;      /* SAEs used */
383         be16    max;            /* max SAEs */
384         __u8    pad[8];
385         struct spare_assign_entry {
386                 char    guid[DDF_GUID_LEN];
387                 be16    secondary_element;
388                 __u8    pad[6];
389         } spare_ents[0];
390 };
391 /* spare_assign.type is a bitmap */
392 #define DDF_spare_dedicated     0x1     /* else global */
393 #define DDF_spare_revertible    0x2     /* else committable */
394 #define DDF_spare_active        0x4     /* else not active */
395 #define DDF_spare_affinity      0x8     /* enclosure affinity */
396
397 /* The data_section contents - local scope */
398 struct disk_data {
399         be32    magic;          /* DDF_PHYS_DATA_MAGIC */
400         be32    crc;
401         char    guid[DDF_GUID_LEN];
402         be32    refnum;         /* crc of some magic drive data ... */
403         __u8    forced_ref;     /* set when above was not result of magic */
404         __u8    forced_guid;    /* set if guid was forced rather than magic */
405         __u8    vendor[32];
406         __u8    pad[442];
407 };
408
409 /* bbm_section content */
410 struct bad_block_log {
411         be32    magic;
412         be32    crc;
413         be16    entry_count;
414         be32    spare_count;
415         __u8    pad[10];
416         be64    first_spare;
417         struct mapped_block {
418                 be64    defective_start;
419                 be32    replacement_start;
420                 be16    remap_count;
421                 __u8    pad[2];
422         } entries[0];
423 };
424
425 /* Struct for internally holding ddf structures */
426 /* The DDF structure stored on each device is potentially
427  * quite different, as some data is global and some is local.
428  * The global data is:
429  *   - ddf header
430  *   - controller_data
431  *   - Physical disk records
432  *   - Virtual disk records
433  * The local data is:
434  *   - Configuration records
435  *   - Physical Disk data section
436  *  (  and Bad block and vendor which I don't care about yet).
437  *
438  * The local data is parsed into separate lists as it is read
439  * and reconstructed for writing.  This means that we only need
440  * to make config changes once and they are automatically
441  * propagated to all devices.
442  * The global (config and disk data) records are each in a list
443  * of separate data structures.  When writing we find the entry
444  * or entries applicable to the particular device.
445  */
446 struct ddf_super {
447         struct ddf_header       anchor, primary, secondary;
448         struct ddf_controller_data controller;
449         struct ddf_header       *active;
450         struct phys_disk        *phys;
451         struct virtual_disk     *virt;
452         char                    *conf;
453         int                     pdsize, vdsize;
454         unsigned int            max_part, mppe, conf_rec_len;
455         int                     currentdev;
456         int                     updates_pending;
457         struct vcl {
458                 union {
459                         char space[512];
460                         struct {
461                                 struct vcl      *next;
462                                 unsigned int    vcnum; /* index into ->virt */
463                                 /* For an array with a secondary level there are
464                                  * multiple vd_config structures, all with the same
465                                  * guid but with different sec_elmnt_seq.
466                                  * One of these structures is in 'conf' below.
467                                  * The others are in other_bvds, not in any
468                                  * particular order.
469                                  */
470                                 struct vd_config **other_bvds;
471                                 __u64           *block_sizes; /* NULL if all the same */
472                         };
473                 };
474                 struct vd_config conf;
475         } *conflist, *currentconf;
476         struct dl {
477                 union {
478                         char space[512];
479                         struct {
480                                 struct dl       *next;
481                                 int major, minor;
482                                 char *devname;
483                                 int fd;
484                                 unsigned long long size; /* sectors */
485                                 be64 primary_lba; /* sectors */
486                                 be64 secondary_lba; /* sectors */
487                                 be64 workspace_lba; /* sectors */
488                                 int pdnum;      /* index in ->phys */
489                                 struct spare_assign *spare;
490                                 void *mdupdate; /* hold metadata update */
491
492                                 /* These fields used by auto-layout */
493                                 int raiddisk; /* slot to fill in autolayout */
494                                 __u64 esize;
495                                 int displayed;
496                         };
497                 };
498                 struct disk_data disk;
499                 struct vcl *vlist[0]; /* max_part in size */
500         } *dlist, *add_list;
501 };
502
503 #ifndef MDASSEMBLE
504 static int load_super_ddf_all(struct supertype *st, int fd,
505                               void **sbp, char *devname);
506 static int get_svd_state(const struct ddf_super *, const struct vcl *);
507 static int
508 validate_geometry_ddf_container(struct supertype *st,
509                                 int level, int layout, int raiddisks,
510                                 int chunk, unsigned long long size,
511                                 unsigned long long data_offset,
512                                 char *dev, unsigned long long *freesize,
513                                 int verbose);
514
515 static int validate_geometry_ddf_bvd(struct supertype *st,
516                                      int level, int layout, int raiddisks,
517                                      int *chunk, unsigned long long size,
518                                      unsigned long long data_offset,
519                                      char *dev, unsigned long long *freesize,
520                                      int verbose);
521 #endif
522
523 static void free_super_ddf(struct supertype *st);
524 static int all_ff(const char *guid);
525 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
526                                              be32 refnum, unsigned int nmax,
527                                              const struct vd_config **bvd,
528                                              unsigned int *idx);
529 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
530 static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
531 static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
532 static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i);
533 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
534 static int init_super_ddf_bvd(struct supertype *st,
535                               mdu_array_info_t *info,
536                               unsigned long long size,
537                               char *name, char *homehost,
538                               int *uuid, unsigned long long data_offset);
539
540 #if DEBUG
541 static void pr_state(struct ddf_super *ddf, const char *msg)
542 {
543         unsigned int i;
544         dprintf("%s: ", msg);
545         for (i = 0; i < be16_to_cpu(ddf->active->max_vd_entries); i++) {
546                 if (all_ff(ddf->virt->entries[i].guid))
547                         continue;
548                 dprintf_cont("%u(s=%02x i=%02x) ", i,
549                         ddf->virt->entries[i].state,
550                         ddf->virt->entries[i].init_state);
551         }
552         dprintf_cont("\n");
553 }
554 #else
555 static void pr_state(const struct ddf_super *ddf, const char *msg) {}
556 #endif
557
558 static void _ddf_set_updates_pending(struct ddf_super *ddf, struct vd_config *vc,
559                                      const char *func)
560 {
561         if (vc) {
562                 vc->timestamp = cpu_to_be32(time(0)-DECADE);
563                 vc->seqnum = cpu_to_be32(be32_to_cpu(vc->seqnum) + 1);
564         }
565         if (ddf->updates_pending)
566                 return;
567         ddf->updates_pending = 1;
568         ddf->active->seq = cpu_to_be32((be32_to_cpu(ddf->active->seq)+1));
569         pr_state(ddf, func);
570 }
571
572 #define ddf_set_updates_pending(x,v) _ddf_set_updates_pending((x), (v), __func__)
573
574 static be32 calc_crc(void *buf, int len)
575 {
576         /* crcs are always at the same place as in the ddf_header */
577         struct ddf_header *ddf = buf;
578         be32 oldcrc = ddf->crc;
579         __u32 newcrc;
580         ddf->crc = cpu_to_be32(0xffffffff);
581
582         newcrc = crc32(0, buf, len);
583         ddf->crc = oldcrc;
584         /* The crc is stored (like everything) bigendian, so convert
585          * here for simplicity
586          */
587         return cpu_to_be32(newcrc);
588 }
589
590 #define DDF_INVALID_LEVEL 0xff
591 #define DDF_NO_SECONDARY 0xff
592 static int err_bad_md_layout(const mdu_array_info_t *array)
593 {
594         pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
595                array->level, array->layout, array->raid_disks);
596         return -1;
597 }
598
599 static int layout_md2ddf(const mdu_array_info_t *array,
600                          struct vd_config *conf)
601 {
602         be16 prim_elmnt_count = cpu_to_be16(array->raid_disks);
603         __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
604         __u8 sec_elmnt_count = 1;
605         __u8 srl = DDF_NO_SECONDARY;
606
607         switch (array->level) {
608         case LEVEL_LINEAR:
609                 prl = DDF_CONCAT;
610                 break;
611         case 0:
612                 rlq = DDF_RAID0_SIMPLE;
613                 prl = DDF_RAID0;
614                 break;
615         case 1:
616                 switch (array->raid_disks) {
617                 case 2:
618                         rlq = DDF_RAID1_SIMPLE;
619                         break;
620                 case 3:
621                         rlq = DDF_RAID1_MULTI;
622                         break;
623                 default:
624                         return err_bad_md_layout(array);
625                 }
626                 prl = DDF_RAID1;
627                 break;
628         case 4:
629                 if (array->layout != 0)
630                         return err_bad_md_layout(array);
631                 rlq = DDF_RAID4_N;
632                 prl = DDF_RAID4;
633                 break;
634         case 5:
635                 switch (array->layout) {
636                 case ALGORITHM_LEFT_ASYMMETRIC:
637                         rlq = DDF_RAID5_N_RESTART;
638                         break;
639                 case ALGORITHM_RIGHT_ASYMMETRIC:
640                         rlq = DDF_RAID5_0_RESTART;
641                         break;
642                 case ALGORITHM_LEFT_SYMMETRIC:
643                         rlq = DDF_RAID5_N_CONTINUE;
644                         break;
645                 case ALGORITHM_RIGHT_SYMMETRIC:
646                         /* not mentioned in standard */
647                 default:
648                         return err_bad_md_layout(array);
649                 }
650                 prl = DDF_RAID5;
651                 break;
652         case 6:
653                 switch (array->layout) {
654                 case ALGORITHM_ROTATING_N_RESTART:
655                         rlq = DDF_RAID5_N_RESTART;
656                         break;
657                 case ALGORITHM_ROTATING_ZERO_RESTART:
658                         rlq = DDF_RAID6_0_RESTART;
659                         break;
660                 case ALGORITHM_ROTATING_N_CONTINUE:
661                         rlq = DDF_RAID5_N_CONTINUE;
662                         break;
663                 default:
664                         return err_bad_md_layout(array);
665                 }
666                 prl = DDF_RAID6;
667                 break;
668         case 10:
669                 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
670                         rlq = DDF_RAID1_SIMPLE;
671                         prim_elmnt_count =  cpu_to_be16(2);
672                         sec_elmnt_count = array->raid_disks / 2;
673                         srl = DDF_2SPANNED;
674                         prl = DDF_RAID1;
675                 } else if (array->raid_disks % 3 == 0
676                            && array->layout == 0x103) {
677                         rlq = DDF_RAID1_MULTI;
678                         prim_elmnt_count =  cpu_to_be16(3);
679                         sec_elmnt_count = array->raid_disks / 3;
680                         srl = DDF_2SPANNED;
681                         prl = DDF_RAID1;
682                 } else if (array->layout == 0x201) {
683                         prl = DDF_RAID1E;
684                         rlq = DDF_RAID1E_OFFSET;
685                 } else if (array->layout == 0x102) {
686                         prl = DDF_RAID1E;
687                         rlq = DDF_RAID1E_ADJACENT;
688                 } else
689                         return err_bad_md_layout(array);
690                 break;
691         default:
692                 return err_bad_md_layout(array);
693         }
694         conf->prl = prl;
695         conf->prim_elmnt_count = prim_elmnt_count;
696         conf->rlq = rlq;
697         conf->srl = srl;
698         conf->sec_elmnt_count = sec_elmnt_count;
699         return 0;
700 }
701
702 static int err_bad_ddf_layout(const struct vd_config *conf)
703 {
704         pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
705                conf->prl, conf->rlq, be16_to_cpu(conf->prim_elmnt_count));
706         return -1;
707 }
708
709 static int layout_ddf2md(const struct vd_config *conf,
710                          mdu_array_info_t *array)
711 {
712         int level = LEVEL_UNSUPPORTED;
713         int layout = 0;
714         int raiddisks = be16_to_cpu(conf->prim_elmnt_count);
715
716         if (conf->sec_elmnt_count > 1) {
717                 /* see also check_secondary() */
718                 if (conf->prl != DDF_RAID1 ||
719                     (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
720                         pr_err("Unsupported secondary RAID level %u/%u\n",
721                                conf->prl, conf->srl);
722                         return -1;
723                 }
724                 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
725                         layout = 0x102;
726                 else if  (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
727                         layout = 0x103;
728                 else
729                         return err_bad_ddf_layout(conf);
730                 raiddisks *= conf->sec_elmnt_count;
731                 level = 10;
732                 goto good;
733         }
734
735         switch (conf->prl) {
736         case DDF_CONCAT:
737                 level = LEVEL_LINEAR;
738                 break;
739         case DDF_RAID0:
740                 if (conf->rlq != DDF_RAID0_SIMPLE)
741                         return err_bad_ddf_layout(conf);
742                 level = 0;
743                 break;
744         case DDF_RAID1:
745                 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
746                       (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
747                         return err_bad_ddf_layout(conf);
748                 level = 1;
749                 break;
750         case DDF_RAID1E:
751                 if (conf->rlq == DDF_RAID1E_ADJACENT)
752                         layout = 0x102;
753                 else if (conf->rlq == DDF_RAID1E_OFFSET)
754                         layout = 0x201;
755                 else
756                         return err_bad_ddf_layout(conf);
757                 level = 10;
758                 break;
759         case DDF_RAID4:
760                 if (conf->rlq != DDF_RAID4_N)
761                         return err_bad_ddf_layout(conf);
762                 level = 4;
763                 break;
764         case DDF_RAID5:
765                 switch (conf->rlq) {
766                 case DDF_RAID5_N_RESTART:
767                         layout = ALGORITHM_LEFT_ASYMMETRIC;
768                         break;
769                 case DDF_RAID5_0_RESTART:
770                         layout = ALGORITHM_RIGHT_ASYMMETRIC;
771                         break;
772                 case DDF_RAID5_N_CONTINUE:
773                         layout = ALGORITHM_LEFT_SYMMETRIC;
774                         break;
775                 default:
776                         return err_bad_ddf_layout(conf);
777                 }
778                 level = 5;
779                 break;
780         case DDF_RAID6:
781                 switch (conf->rlq) {
782                 case DDF_RAID5_N_RESTART:
783                         layout = ALGORITHM_ROTATING_N_RESTART;
784                         break;
785                 case DDF_RAID6_0_RESTART:
786                         layout = ALGORITHM_ROTATING_ZERO_RESTART;
787                         break;
788                 case DDF_RAID5_N_CONTINUE:
789                         layout = ALGORITHM_ROTATING_N_CONTINUE;
790                         break;
791                 default:
792                         return err_bad_ddf_layout(conf);
793                 }
794                 level = 6;
795                 break;
796         default:
797                 return err_bad_ddf_layout(conf);
798         };
799
800 good:
801         array->level = level;
802         array->layout = layout;
803         array->raid_disks = raiddisks;
804         return 0;
805 }
806
807 static int load_ddf_header(int fd, unsigned long long lba,
808                            unsigned long long size,
809                            int type,
810                            struct ddf_header *hdr, struct ddf_header *anchor)
811 {
812         /* read a ddf header (primary or secondary) from fd/lba
813          * and check that it is consistent with anchor
814          * Need to check:
815          *   magic, crc, guid, rev, and LBA's header_type, and
816          *  everything after header_type must be the same
817          */
818         if (lba >= size-1)
819                 return 0;
820
821         if (lseek64(fd, lba<<9, 0) < 0)
822                 return 0;
823
824         if (read(fd, hdr, 512) != 512)
825                 return 0;
826
827         if (!be32_eq(hdr->magic, DDF_HEADER_MAGIC)) {
828                 pr_err("bad header magic\n");
829                 return 0;
830         }
831         if (!be32_eq(calc_crc(hdr, 512), hdr->crc)) {
832                 pr_err("bad CRC\n");
833                 return 0;
834         }
835         if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
836             memcmp(anchor->revision, hdr->revision, 8) != 0 ||
837             !be64_eq(anchor->primary_lba, hdr->primary_lba) ||
838             !be64_eq(anchor->secondary_lba, hdr->secondary_lba) ||
839             hdr->type != type ||
840             memcmp(anchor->pad2, hdr->pad2, 512 -
841                    offsetof(struct ddf_header, pad2)) != 0) {
842                 pr_err("header mismatch\n");
843                 return 0;
844         }
845
846         /* Looks good enough to me... */
847         return 1;
848 }
849
850 static void *load_section(int fd, struct ddf_super *super, void *buf,
851                           be32 offset_be, be32 len_be, int check)
852 {
853         unsigned long long offset = be32_to_cpu(offset_be);
854         unsigned long long len = be32_to_cpu(len_be);
855         int dofree = (buf == NULL);
856
857         if (check)
858                 if (len != 2 && len != 8 && len != 32
859                     && len != 128 && len != 512)
860                         return NULL;
861
862         if (len > 1024)
863                 return NULL;
864         if (!buf && posix_memalign(&buf, 512, len<<9) != 0)
865                 buf = NULL;
866
867         if (!buf)
868                 return NULL;
869
870         if (super->active->type == 1)
871                 offset += be64_to_cpu(super->active->primary_lba);
872         else
873                 offset += be64_to_cpu(super->active->secondary_lba);
874
875         if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
876                 if (dofree)
877                         free(buf);
878                 return NULL;
879         }
880         if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
881                 if (dofree)
882                         free(buf);
883                 return NULL;
884         }
885         return buf;
886 }
887
888 static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
889 {
890         unsigned long long dsize;
891
892         get_dev_size(fd, NULL, &dsize);
893
894         if (lseek64(fd, dsize-512, 0) < 0) {
895                 if (devname)
896                         pr_err("Cannot seek to anchor block on %s: %s\n",
897                                devname, strerror(errno));
898                 return 1;
899         }
900         if (read(fd, &super->anchor, 512) != 512) {
901                 if (devname)
902                         pr_err("Cannot read anchor block on %s: %s\n",
903                                devname, strerror(errno));
904                 return 1;
905         }
906         if (!be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) {
907                 if (devname)
908                         pr_err("no DDF anchor found on %s\n",
909                                 devname);
910                 return 2;
911         }
912         if (!be32_eq(calc_crc(&super->anchor, 512), super->anchor.crc)) {
913                 if (devname)
914                         pr_err("bad CRC on anchor on %s\n",
915                                 devname);
916                 return 2;
917         }
918         if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
919             memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
920                 if (devname)
921                         pr_err("can only support super revision"
922                                 " %.8s and earlier, not %.8s on %s\n",
923                                 DDF_REVISION_2, super->anchor.revision,devname);
924                 return 2;
925         }
926         super->active = NULL;
927         if (load_ddf_header(fd, be64_to_cpu(super->anchor.primary_lba),
928                             dsize >> 9,  1,
929                             &super->primary, &super->anchor) == 0) {
930                 if (devname)
931                         pr_err("Failed to load primary DDF header "
932                                "on %s\n", devname);
933         } else
934                 super->active = &super->primary;
935
936         if (load_ddf_header(fd, be64_to_cpu(super->anchor.secondary_lba),
937                             dsize >> 9,  2,
938                             &super->secondary, &super->anchor)) {
939                 if (super->active == NULL
940                     || (be32_to_cpu(super->primary.seq)
941                         < be32_to_cpu(super->secondary.seq) &&
942                         !super->secondary.openflag)
943                     || (be32_to_cpu(super->primary.seq)
944                         == be32_to_cpu(super->secondary.seq) &&
945                         super->primary.openflag && !super->secondary.openflag)
946                         )
947                         super->active = &super->secondary;
948         } else if (devname &&
949                    be64_to_cpu(super->anchor.secondary_lba) != ~(__u64)0)
950                 pr_err("Failed to load secondary DDF header on %s\n",
951                        devname);
952         if (super->active == NULL)
953                 return 2;
954         return 0;
955 }
956
957 static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
958 {
959         void *ok;
960         ok = load_section(fd, super, &super->controller,
961                           super->active->controller_section_offset,
962                           super->active->controller_section_length,
963                           0);
964         super->phys = load_section(fd, super, NULL,
965                                    super->active->phys_section_offset,
966                                    super->active->phys_section_length,
967                                    1);
968         super->pdsize = be32_to_cpu(super->active->phys_section_length) * 512;
969
970         super->virt = load_section(fd, super, NULL,
971                                    super->active->virt_section_offset,
972                                    super->active->virt_section_length,
973                                    1);
974         super->vdsize = be32_to_cpu(super->active->virt_section_length) * 512;
975         if (!ok ||
976             !super->phys ||
977             !super->virt) {
978                 free(super->phys);
979                 free(super->virt);
980                 super->phys = NULL;
981                 super->virt = NULL;
982                 return 2;
983         }
984         super->conflist = NULL;
985         super->dlist = NULL;
986
987         super->max_part = be16_to_cpu(super->active->max_partitions);
988         super->mppe = be16_to_cpu(super->active->max_primary_element_entries);
989         super->conf_rec_len = be16_to_cpu(super->active->config_record_len);
990         return 0;
991 }
992
993 #define DDF_UNUSED_BVD 0xff
994 static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
995 {
996         unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
997         unsigned int i, vdsize;
998         void *p;
999         if (n_vds == 0) {
1000                 vcl->other_bvds = NULL;
1001                 return 0;
1002         }
1003         vdsize = ddf->conf_rec_len * 512;
1004         if (posix_memalign(&p, 512, n_vds *
1005                            (vdsize +  sizeof(struct vd_config *))) != 0)
1006                 return -1;
1007         vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
1008         for (i = 0; i < n_vds; i++) {
1009                 vcl->other_bvds[i] = p + i * vdsize;
1010                 memset(vcl->other_bvds[i], 0, vdsize);
1011                 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
1012         }
1013         return 0;
1014 }
1015
1016 static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
1017                           unsigned int len)
1018 {
1019         int i;
1020         for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
1021                 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
1022                         break;
1023
1024         if (i < vcl->conf.sec_elmnt_count-1) {
1025                 if (be32_to_cpu(vd->seqnum) <=
1026                     be32_to_cpu(vcl->other_bvds[i]->seqnum))
1027                         return;
1028         } else {
1029                 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
1030                         if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
1031                                 break;
1032                 if (i == vcl->conf.sec_elmnt_count-1) {
1033                         pr_err("no space for sec level config %u, count is %u\n",
1034                                vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
1035                         return;
1036                 }
1037         }
1038         memcpy(vcl->other_bvds[i], vd, len);
1039 }
1040
1041 static int load_ddf_local(int fd, struct ddf_super *super,
1042                           char *devname, int keep)
1043 {
1044         struct dl *dl;
1045         struct stat stb;
1046         char *conf;
1047         unsigned int i;
1048         unsigned int confsec;
1049         int vnum;
1050         unsigned int max_virt_disks =
1051                 be16_to_cpu(super->active->max_vd_entries);
1052         unsigned long long dsize;
1053
1054         /* First the local disk info */
1055         if (posix_memalign((void**)&dl, 512,
1056                            sizeof(*dl) +
1057                            (super->max_part) * sizeof(dl->vlist[0])) != 0) {
1058                 pr_err("could not allocate disk info buffer\n");
1059                 return 1;
1060         }
1061
1062         load_section(fd, super, &dl->disk,
1063                      super->active->data_section_offset,
1064                      super->active->data_section_length,
1065                      0);
1066         dl->devname = devname ? xstrdup(devname) : NULL;
1067
1068         fstat(fd, &stb);
1069         dl->major = major(stb.st_rdev);
1070         dl->minor = minor(stb.st_rdev);
1071         dl->next = super->dlist;
1072         dl->fd = keep ? fd : -1;
1073
1074         dl->size = 0;
1075         if (get_dev_size(fd, devname, &dsize))
1076                 dl->size = dsize >> 9;
1077         /* If the disks have different sizes, the LBAs will differ
1078          * between phys disks.
1079          * At this point here, the values in super->active must be valid
1080          * for this phys disk. */
1081         dl->primary_lba = super->active->primary_lba;
1082         dl->secondary_lba = super->active->secondary_lba;
1083         dl->workspace_lba = super->active->workspace_lba;
1084         dl->spare = NULL;
1085         for (i = 0 ; i < super->max_part ; i++)
1086                 dl->vlist[i] = NULL;
1087         super->dlist = dl;
1088         dl->pdnum = -1;
1089         for (i = 0; i < be16_to_cpu(super->active->max_pd_entries); i++)
1090                 if (memcmp(super->phys->entries[i].guid,
1091                            dl->disk.guid, DDF_GUID_LEN) == 0)
1092                         dl->pdnum = i;
1093
1094         /* Now the config list. */
1095         /* 'conf' is an array of config entries, some of which are
1096          * probably invalid.  Those which are good need to be copied into
1097          * the conflist
1098          */
1099
1100         conf = load_section(fd, super, super->conf,
1101                             super->active->config_section_offset,
1102                             super->active->config_section_length,
1103                             0);
1104         super->conf = conf;
1105         vnum = 0;
1106         for (confsec = 0;
1107              confsec < be32_to_cpu(super->active->config_section_length);
1108              confsec += super->conf_rec_len) {
1109                 struct vd_config *vd =
1110                         (struct vd_config *)((char*)conf + confsec*512);
1111                 struct vcl *vcl;
1112
1113                 if (be32_eq(vd->magic, DDF_SPARE_ASSIGN_MAGIC)) {
1114                         if (dl->spare)
1115                                 continue;
1116                         if (posix_memalign((void**)&dl->spare, 512,
1117                                            super->conf_rec_len*512) != 0) {
1118                                 pr_err("could not allocate spare info buf\n");
1119                                 return 1;
1120                         }
1121
1122                         memcpy(dl->spare, vd, super->conf_rec_len*512);
1123                         continue;
1124                 }
1125                 if (!be32_eq(vd->magic, DDF_VD_CONF_MAGIC))
1126                         /* Must be vendor-unique - I cannot handle those */
1127                         continue;
1128
1129                 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1130                         if (memcmp(vcl->conf.guid,
1131                                    vd->guid, DDF_GUID_LEN) == 0)
1132                                 break;
1133                 }
1134
1135                 if (vcl) {
1136                         dl->vlist[vnum++] = vcl;
1137                         if (vcl->other_bvds != NULL &&
1138                             vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1139                                 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1140                                 continue;
1141                         }
1142                         if (be32_to_cpu(vd->seqnum) <=
1143                             be32_to_cpu(vcl->conf.seqnum))
1144                                 continue;
1145                 } else {
1146                         if (posix_memalign((void**)&vcl, 512,
1147                                            (super->conf_rec_len*512 +
1148                                             offsetof(struct vcl, conf))) != 0) {
1149                                 pr_err("could not allocate vcl buf\n");
1150                                 return 1;
1151                         }
1152                         vcl->next = super->conflist;
1153                         vcl->block_sizes = NULL; /* FIXME not for CONCAT */
1154                         vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1155                         if (alloc_other_bvds(super, vcl) != 0) {
1156                                 pr_err("could not allocate other bvds\n");
1157                                 free(vcl);
1158                                 return 1;
1159                         };
1160                         super->conflist = vcl;
1161                         dl->vlist[vnum++] = vcl;
1162                 }
1163                 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
1164                 for (i=0; i < max_virt_disks ; i++)
1165                         if (memcmp(super->virt->entries[i].guid,
1166                                    vcl->conf.guid, DDF_GUID_LEN)==0)
1167                                 break;
1168                 if (i < max_virt_disks)
1169                         vcl->vcnum = i;
1170         }
1171
1172         return 0;
1173 }
1174
1175 static int load_super_ddf(struct supertype *st, int fd,
1176                           char *devname)
1177 {
1178         unsigned long long dsize;
1179         struct ddf_super *super;
1180         int rv;
1181
1182         if (get_dev_size(fd, devname, &dsize) == 0)
1183                 return 1;
1184
1185         if (test_partition(fd))
1186                 /* DDF is not allowed on partitions */
1187                 return 1;
1188
1189         /* 32M is a lower bound */
1190         if (dsize <= 32*1024*1024) {
1191                 if (devname)
1192                         pr_err("%s is too small for ddf: "
1193                                "size is %llu sectors.\n",
1194                                devname, dsize>>9);
1195                 return 1;
1196         }
1197         if (dsize & 511) {
1198                 if (devname)
1199                         pr_err("%s is an odd size for ddf: "
1200                                "size is %llu bytes.\n",
1201                                devname, dsize);
1202                 return 1;
1203         }
1204
1205         free_super_ddf(st);
1206
1207         if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
1208                 pr_err("malloc of %zu failed.\n",
1209                         sizeof(*super));
1210                 return 1;
1211         }
1212         memset(super, 0, sizeof(*super));
1213
1214         rv = load_ddf_headers(fd, super, devname);
1215         if (rv) {
1216                 free(super);
1217                 return rv;
1218         }
1219
1220         /* Have valid headers and have chosen the best. Let's read in the rest*/
1221
1222         rv = load_ddf_global(fd, super, devname);
1223
1224         if (rv) {
1225                 if (devname)
1226                         pr_err("Failed to load all information "
1227                                "sections on %s\n", devname);
1228                 free(super);
1229                 return rv;
1230         }
1231
1232         rv = load_ddf_local(fd, super, devname, 0);
1233
1234         if (rv) {
1235                 if (devname)
1236                         pr_err("Failed to load all information "
1237                                "sections on %s\n", devname);
1238                 free(super);
1239                 return rv;
1240         }
1241
1242         /* Should possibly check the sections .... */
1243
1244         st->sb = super;
1245         if (st->ss == NULL) {
1246                 st->ss = &super_ddf;
1247                 st->minor_version = 0;
1248                 st->max_devs = 512;
1249         }
1250         return 0;
1251
1252 }
1253
1254 static void free_super_ddf(struct supertype *st)
1255 {
1256         struct ddf_super *ddf = st->sb;
1257         if (ddf == NULL)
1258                 return;
1259         free(ddf->phys);
1260         free(ddf->virt);
1261         free(ddf->conf);
1262         while (ddf->conflist) {
1263                 struct vcl *v = ddf->conflist;
1264                 ddf->conflist = v->next;
1265                 if (v->block_sizes)
1266                         free(v->block_sizes);
1267                 if (v->other_bvds)
1268                         /*
1269                            v->other_bvds[0] points to beginning of buffer,
1270                            see alloc_other_bvds()
1271                         */
1272                         free(v->other_bvds[0]);
1273                 free(v);
1274         }
1275         while (ddf->dlist) {
1276                 struct dl *d = ddf->dlist;
1277                 ddf->dlist = d->next;
1278                 if (d->fd >= 0)
1279                         close(d->fd);
1280                 if (d->spare)
1281                         free(d->spare);
1282                 free(d);
1283         }
1284         while (ddf->add_list) {
1285                 struct dl *d = ddf->add_list;
1286                 ddf->add_list = d->next;
1287                 if (d->fd >= 0)
1288                         close(d->fd);
1289                 if (d->spare)
1290                         free(d->spare);
1291                 free(d);
1292         }
1293         free(ddf);
1294         st->sb = NULL;
1295 }
1296
1297 static struct supertype *match_metadata_desc_ddf(char *arg)
1298 {
1299         /* 'ddf' only supports containers */
1300         struct supertype *st;
1301         if (strcmp(arg, "ddf") != 0 &&
1302             strcmp(arg, "default") != 0
1303                 )
1304                 return NULL;
1305
1306         st = xcalloc(1, sizeof(*st));
1307         st->ss = &super_ddf;
1308         st->max_devs = 512;
1309         st->minor_version = 0;
1310         st->sb = NULL;
1311         return st;
1312 }
1313
1314 #ifndef MDASSEMBLE
1315
1316 static mapping_t ddf_state[] = {
1317         { "Optimal", 0},
1318         { "Degraded", 1},
1319         { "Deleted", 2},
1320         { "Missing", 3},
1321         { "Failed", 4},
1322         { "Partially Optimal", 5},
1323         { "-reserved-", 6},
1324         { "-reserved-", 7},
1325         { NULL, 0}
1326 };
1327
1328 static mapping_t ddf_init_state[] = {
1329         { "Not Initialised", 0},
1330         { "QuickInit in Progress", 1},
1331         { "Fully Initialised", 2},
1332         { "*UNKNOWN*", 3},
1333         { NULL, 0}
1334 };
1335 static mapping_t ddf_access[] = {
1336         { "Read/Write", 0},
1337         { "Reserved", 1},
1338         { "Read Only", 2},
1339         { "Blocked (no access)", 3},
1340         { NULL ,0}
1341 };
1342
1343 static mapping_t ddf_level[] = {
1344         { "RAID0", DDF_RAID0},
1345         { "RAID1", DDF_RAID1},
1346         { "RAID3", DDF_RAID3},
1347         { "RAID4", DDF_RAID4},
1348         { "RAID5", DDF_RAID5},
1349         { "RAID1E",DDF_RAID1E},
1350         { "JBOD",  DDF_JBOD},
1351         { "CONCAT",DDF_CONCAT},
1352         { "RAID5E",DDF_RAID5E},
1353         { "RAID5EE",DDF_RAID5EE},
1354         { "RAID6", DDF_RAID6},
1355         { NULL, 0}
1356 };
1357 static mapping_t ddf_sec_level[] = {
1358         { "Striped", DDF_2STRIPED},
1359         { "Mirrored", DDF_2MIRRORED},
1360         { "Concat", DDF_2CONCAT},
1361         { "Spanned", DDF_2SPANNED},
1362         { NULL, 0}
1363 };
1364 #endif
1365
1366 static int all_ff(const char *guid)
1367 {
1368         int i;
1369         for (i = 0; i < DDF_GUID_LEN; i++)
1370                 if (guid[i] != (char)0xff)
1371                         return 0;
1372         return 1;
1373 }
1374
1375 static const char *guid_str(const char *guid)
1376 {
1377         static char buf[DDF_GUID_LEN*2+1];
1378         int i;
1379         char *p = buf;
1380         for (i = 0; i < DDF_GUID_LEN; i++) {
1381                 unsigned char c = guid[i];
1382                 if (c >= 32 && c < 127)
1383                         p += sprintf(p, "%c", c);
1384                 else
1385                         p += sprintf(p, "%02x", c);
1386         }
1387         *p = '\0';
1388         return (const char *) buf;
1389 }
1390
1391 #ifndef MDASSEMBLE
1392 static void print_guid(char *guid, int tstamp)
1393 {
1394         /* A GUIDs are part (or all) ASCII and part binary.
1395          * They tend to be space padded.
1396          * We print the GUID in HEX, then in parentheses add
1397          * any initial ASCII sequence, and a possible
1398          * time stamp from bytes 16-19
1399          */
1400         int l = DDF_GUID_LEN;
1401         int i;
1402
1403         for (i=0 ; i<DDF_GUID_LEN ; i++) {
1404                 if ((i&3)==0 && i != 0) printf(":");
1405                 printf("%02X", guid[i]&255);
1406         }
1407
1408         printf("\n                  (");
1409         while (l && guid[l-1] == ' ')
1410                 l--;
1411         for (i=0 ; i<l ; i++) {
1412                 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1413                         fputc(guid[i], stdout);
1414                 else
1415                         break;
1416         }
1417         if (tstamp) {
1418                 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1419                 char tbuf[100];
1420                 struct tm *tm;
1421                 tm = localtime(&then);
1422                 strftime(tbuf, 100, " %D %T",tm);
1423                 fputs(tbuf, stdout);
1424         }
1425         printf(")");
1426 }
1427
1428 static void examine_vd(int n, struct ddf_super *sb, char *guid)
1429 {
1430         int crl = sb->conf_rec_len;
1431         struct vcl *vcl;
1432
1433         for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
1434                 unsigned int i;
1435                 struct vd_config *vc = &vcl->conf;
1436
1437                 if (!be32_eq(calc_crc(vc, crl*512), vc->crc))
1438                         continue;
1439                 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1440                         continue;
1441
1442                 /* Ok, we know about this VD, let's give more details */
1443                 printf(" Raid Devices[%d] : %d (", n,
1444                        be16_to_cpu(vc->prim_elmnt_count));
1445                 for (i = 0; i < be16_to_cpu(vc->prim_elmnt_count); i++) {
1446                         int j;
1447                         int cnt = be16_to_cpu(sb->phys->max_pdes);
1448                         for (j=0; j<cnt; j++)
1449                                 if (be32_eq(vc->phys_refnum[i],
1450                                             sb->phys->entries[j].refnum))
1451                                         break;
1452                         if (i) printf(" ");
1453                         if (j < cnt)
1454                                 printf("%d", j);
1455                         else
1456                                 printf("--");
1457                         printf("@%lluK", (unsigned long long) be64_to_cpu(LBA_OFFSET(sb, vc)[i])/2);
1458                 }
1459                 printf(")\n");
1460                 if (vc->chunk_shift != 255)
1461                         printf("   Chunk Size[%d] : %d sectors\n", n,
1462                                1 << vc->chunk_shift);
1463                 printf("   Raid Level[%d] : %s\n", n,
1464                        map_num(ddf_level, vc->prl)?:"-unknown-");
1465                 if (vc->sec_elmnt_count != 1) {
1466                         printf("  Secondary Position[%d] : %d of %d\n", n,
1467                                vc->sec_elmnt_seq, vc->sec_elmnt_count);
1468                         printf("  Secondary Level[%d] : %s\n", n,
1469                                map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1470                 }
1471                 printf("  Device Size[%d] : %llu\n", n,
1472                        be64_to_cpu(vc->blocks)/2);
1473                 printf("   Array Size[%d] : %llu\n", n,
1474                        be64_to_cpu(vc->array_blocks)/2);
1475         }
1476 }
1477
1478 static void examine_vds(struct ddf_super *sb)
1479 {
1480         int cnt = be16_to_cpu(sb->virt->populated_vdes);
1481         unsigned int i;
1482         printf("  Virtual Disks : %d\n", cnt);
1483
1484         for (i = 0; i < be16_to_cpu(sb->virt->max_vdes); i++) {
1485                 struct virtual_entry *ve = &sb->virt->entries[i];
1486                 if (all_ff(ve->guid))
1487                         continue;
1488                 printf("\n");
1489                 printf("      VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1490                 printf("\n");
1491                 printf("         unit[%d] : %d\n", i, be16_to_cpu(ve->unit));
1492                 printf("        state[%d] : %s, %s%s\n", i,
1493                        map_num(ddf_state, ve->state & 7),
1494                        (ve->state & DDF_state_morphing) ? "Morphing, ": "",
1495                        (ve->state & DDF_state_inconsistent)? "Not Consistent" : "Consistent");
1496                 printf("   init state[%d] : %s\n", i,
1497                        map_num(ddf_init_state, ve->init_state&DDF_initstate_mask));
1498                 printf("       access[%d] : %s\n", i,
1499                        map_num(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
1500                 printf("         Name[%d] : %.16s\n", i, ve->name);
1501                 examine_vd(i, sb, ve->guid);
1502         }
1503         if (cnt) printf("\n");
1504 }
1505
1506 static void examine_pds(struct ddf_super *sb)
1507 {
1508         int cnt = be16_to_cpu(sb->phys->max_pdes);
1509         int i;
1510         struct dl *dl;
1511         int unlisted = 0;
1512         printf(" Physical Disks : %d\n", cnt);
1513         printf("      Number    RefNo      Size       Device      Type/State\n");
1514
1515         for (dl = sb->dlist; dl; dl = dl->next)
1516                 dl->displayed = 0;
1517
1518         for (i=0 ; i<cnt ; i++) {
1519                 struct phys_disk_entry *pd = &sb->phys->entries[i];
1520                 int type = be16_to_cpu(pd->type);
1521                 int state = be16_to_cpu(pd->state);
1522
1523                 if (be32_to_cpu(pd->refnum) == 0xffffffff)
1524                         /* Not in use */
1525                         continue;
1526                 //printf("      PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1527                 //printf("\n");
1528                 printf("       %3d    %08x  ", i,
1529                        be32_to_cpu(pd->refnum));
1530                 printf("%8lluK ",
1531                        be64_to_cpu(pd->config_size)>>1);
1532                 for (dl = sb->dlist; dl ; dl = dl->next) {
1533                         if (be32_eq(dl->disk.refnum, pd->refnum)) {
1534                                 char *dv = map_dev(dl->major, dl->minor, 0);
1535                                 if (dv) {
1536                                         printf("%-15s", dv);
1537                                         break;
1538                                 }
1539                         }
1540                 }
1541                 if (!dl)
1542                         printf("%15s","");
1543                 else
1544                         dl->displayed = 1;
1545                 printf(" %s%s%s%s%s",
1546                        (type&2) ? "active":"",
1547                        (type&4) ? "Global-Spare":"",
1548                        (type&8) ? "spare" : "",
1549                        (type&16)? ", foreign" : "",
1550                        (type&32)? "pass-through" : "");
1551                 if (state & DDF_Failed)
1552                         /* This over-rides these three */
1553                         state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
1554                 printf("/%s%s%s%s%s%s%s",
1555                        (state&1)? "Online": "Offline",
1556                        (state&2)? ", Failed": "",
1557                        (state&4)? ", Rebuilding": "",
1558                        (state&8)? ", in-transition": "",
1559                        (state&16)? ", SMART-errors": "",
1560                        (state&32)? ", Unrecovered-Read-Errors": "",
1561                        (state&64)? ", Missing" : "");
1562                 printf("\n");
1563         }
1564         for (dl = sb->dlist; dl; dl = dl->next) {
1565                 char *dv;
1566                 if (dl->displayed)
1567                         continue;
1568                 if (!unlisted)
1569                         printf(" Physical disks not in metadata!:\n");
1570                 unlisted = 1;
1571                 dv = map_dev(dl->major, dl->minor, 0);
1572                 printf("   %08x %s\n", be32_to_cpu(dl->disk.refnum),
1573                        dv ? dv : "-unknown-");
1574         }
1575         if (unlisted)
1576                 printf("\n");
1577 }
1578
1579 static void examine_super_ddf(struct supertype *st, char *homehost)
1580 {
1581         struct ddf_super *sb = st->sb;
1582
1583         printf("          Magic : %08x\n", be32_to_cpu(sb->anchor.magic));
1584         printf("        Version : %.8s\n", sb->anchor.revision);
1585         printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1586         printf("\n");
1587         printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
1588         printf("\n");
1589         printf("            Seq : %08x\n", be32_to_cpu(sb->active->seq));
1590         printf("  Redundant hdr : %s\n", (be32_eq(sb->secondary.magic,
1591                                                  DDF_HEADER_MAGIC)
1592                                           ?"yes" : "no"));
1593         examine_vds(sb);
1594         examine_pds(sb);
1595 }
1596
1597 static unsigned int get_vd_num_of_subarray(struct supertype *st)
1598 {
1599         /*
1600          * Figure out the VD number for this supertype.
1601          * Returns DDF_CONTAINER for the container itself,
1602          * and DDF_NOTFOUND on error.
1603          */
1604         struct ddf_super *ddf = st->sb;
1605         struct mdinfo *sra;
1606         char *sub, *end;
1607         unsigned int vcnum;
1608
1609         if (*st->container_devnm == '\0')
1610                 return DDF_CONTAINER;
1611
1612         sra = sysfs_read(-1, st->devnm, GET_VERSION);
1613         if (!sra || sra->array.major_version != -1 ||
1614             sra->array.minor_version != -2 ||
1615             !is_subarray(sra->text_version))
1616                 return DDF_NOTFOUND;
1617
1618         sub = strchr(sra->text_version + 1, '/');
1619         if (sub != NULL)
1620                 vcnum = strtoul(sub + 1, &end, 10);
1621         if (sub == NULL || *sub == '\0' || *end != '\0' ||
1622             vcnum >= be16_to_cpu(ddf->active->max_vd_entries))
1623                 return DDF_NOTFOUND;
1624
1625         return vcnum;
1626 }
1627
1628 static void brief_examine_super_ddf(struct supertype *st, int verbose)
1629 {
1630         /* We just write a generic DDF ARRAY entry
1631          */
1632         struct mdinfo info;
1633         char nbuf[64];
1634         getinfo_super_ddf(st, &info, NULL);
1635         fname_from_uuid(st, &info, nbuf, ':');
1636
1637         printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1638 }
1639
1640 static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
1641 {
1642         /* We write a DDF ARRAY member entry for each vd, identifying container
1643          * by uuid and member by unit number and uuid.
1644          */
1645         struct ddf_super *ddf = st->sb;
1646         struct mdinfo info;
1647         unsigned int i;
1648         char nbuf[64];
1649         getinfo_super_ddf(st, &info, NULL);
1650         fname_from_uuid(st, &info, nbuf, ':');
1651
1652         for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
1653                 struct virtual_entry *ve = &ddf->virt->entries[i];
1654                 struct vcl vcl;
1655                 char nbuf1[64];
1656                 char namebuf[17];
1657                 if (all_ff(ve->guid))
1658                         continue;
1659                 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1660                 ddf->currentconf =&vcl;
1661                 vcl.vcnum = i;
1662                 uuid_from_super_ddf(st, info.uuid);
1663                 fname_from_uuid(st, &info, nbuf1, ':');
1664                 _ddf_array_name(namebuf, ddf, i);
1665                 printf("ARRAY%s%s container=%s member=%d UUID=%s\n",
1666                        namebuf[0] == '\0' ? "" : " /dev/md/", namebuf,
1667                        nbuf+5, i, nbuf1+5);
1668         }
1669 }
1670
1671 static void export_examine_super_ddf(struct supertype *st)
1672 {
1673         struct mdinfo info;
1674         char nbuf[64];
1675         getinfo_super_ddf(st, &info, NULL);
1676         fname_from_uuid(st, &info, nbuf, ':');
1677         printf("MD_METADATA=ddf\n");
1678         printf("MD_LEVEL=container\n");
1679         printf("MD_UUID=%s\n", nbuf+5);
1680         printf("MD_DEVICES=%u\n",
1681                 be16_to_cpu(((struct ddf_super *)st->sb)->phys->used_pdes));
1682 }
1683
1684 static int copy_metadata_ddf(struct supertype *st, int from, int to)
1685 {
1686         void *buf;
1687         unsigned long long dsize, offset;
1688         int bytes;
1689         struct ddf_header *ddf;
1690         int written = 0;
1691
1692         /* The meta consists of an anchor, a primary, and a secondary.
1693          * This all lives at the end of the device.
1694          * So it is easiest to find the earliest of primary and
1695          * secondary, and copy everything from there.
1696          *
1697          * Anchor is 512 from end.  It contains primary_lba and secondary_lba
1698          * we choose one of those
1699          */
1700
1701         if (posix_memalign(&buf, 4096, 4096) != 0)
1702                 return 1;
1703
1704         if (!get_dev_size(from, NULL, &dsize))
1705                 goto err;
1706
1707         if (lseek64(from, dsize-512, 0) < 0)
1708                 goto err;
1709         if (read(from, buf, 512) != 512)
1710                 goto err;
1711         ddf = buf;
1712         if (!be32_eq(ddf->magic, DDF_HEADER_MAGIC) ||
1713             !be32_eq(calc_crc(ddf, 512), ddf->crc) ||
1714             (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1715              memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1716                 goto err;
1717
1718         offset = dsize - 512;
1719         if ((be64_to_cpu(ddf->primary_lba) << 9) < offset)
1720                 offset = be64_to_cpu(ddf->primary_lba) << 9;
1721         if ((be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1722                 offset = be64_to_cpu(ddf->secondary_lba) << 9;
1723
1724         bytes = dsize - offset;
1725
1726         if (lseek64(from, offset, 0) < 0 ||
1727             lseek64(to, offset, 0) < 0)
1728                 goto err;
1729         while (written < bytes) {
1730                 int n = bytes - written;
1731                 if (n > 4096)
1732                         n = 4096;
1733                 if (read(from, buf, n) != n)
1734                         goto err;
1735                 if (write(to, buf, n) != n)
1736                         goto err;
1737                 written += n;
1738         }
1739         free(buf);
1740         return 0;
1741 err:
1742         free(buf);
1743         return 1;
1744 }
1745
1746 static void detail_super_ddf(struct supertype *st, char *homehost)
1747 {
1748         struct ddf_super *sb = st->sb;
1749         int cnt = be16_to_cpu(sb->virt->populated_vdes);
1750
1751         printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
1752         printf("\n");
1753         printf("            Seq : %08x\n", be32_to_cpu(sb->active->seq));
1754         printf("  Virtual Disks : %d\n", cnt);
1755         printf("\n");
1756 }
1757 #endif
1758
1759 static const char *vendors_with_variable_volume_UUID[] = {
1760         "LSI      ",
1761 };
1762
1763 static int volume_id_is_reliable(const struct ddf_super *ddf)
1764 {
1765         int n = ARRAY_SIZE(vendors_with_variable_volume_UUID);
1766         int i;
1767         for (i = 0; i < n; i++)
1768                 if (!memcmp(ddf->controller.guid,
1769                         vendors_with_variable_volume_UUID[i], 8))
1770                 return 0;
1771         return 1;
1772 }
1773
1774 static void uuid_of_ddf_subarray(const struct ddf_super *ddf,
1775                                  unsigned int vcnum, int uuid[4])
1776 {
1777         char buf[DDF_GUID_LEN+18], sha[20], *p;
1778         struct sha1_ctx ctx;
1779         if (volume_id_is_reliable(ddf)) {
1780                 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, uuid);
1781                 return;
1782         }
1783         /*
1784          * Some fake RAID BIOSes (in particular, LSI ones) change the
1785          * VD GUID at every boot. These GUIDs are not suitable for
1786          * identifying an array. Luckily the header GUID appears to
1787          * remain constant.
1788          * We construct a pseudo-UUID from the header GUID and those
1789          * properties of the subarray that we expect to remain constant.
1790          */
1791         memset(buf, 0, sizeof(buf));
1792         p = buf;
1793         memcpy(p, ddf->anchor.guid, DDF_GUID_LEN);
1794         p += DDF_GUID_LEN;
1795         memcpy(p, ddf->virt->entries[vcnum].name, 16);
1796         p += 16;
1797         *((__u16 *) p) = vcnum;
1798         sha1_init_ctx(&ctx);
1799         sha1_process_bytes(buf, sizeof(buf), &ctx);
1800         sha1_finish_ctx(&ctx, sha);
1801         memcpy(uuid, sha, 4*4);
1802 }
1803
1804 #ifndef MDASSEMBLE
1805 static void brief_detail_super_ddf(struct supertype *st)
1806 {
1807         struct mdinfo info;
1808         char nbuf[64];
1809         struct ddf_super *ddf = st->sb;
1810         unsigned int vcnum = get_vd_num_of_subarray(st);
1811         if (vcnum == DDF_CONTAINER)
1812                 uuid_from_super_ddf(st, info.uuid);
1813         else if (vcnum == DDF_NOTFOUND)
1814                 return;
1815         else
1816                 uuid_of_ddf_subarray(ddf, vcnum, info.uuid);
1817         fname_from_uuid(st, &info, nbuf,':');
1818         printf(" UUID=%s", nbuf + 5);
1819 }
1820 #endif
1821
1822 static int match_home_ddf(struct supertype *st, char *homehost)
1823 {
1824         /* It matches 'this' host if the controller is a
1825          * Linux-MD controller with vendor_data matching
1826          * the hostname.  It would be nice if we could
1827          * test against controller found in /sys or somewhere...
1828          */
1829         struct ddf_super *ddf = st->sb;
1830         unsigned int len;
1831
1832         if (!homehost)
1833                 return 0;
1834         len = strlen(homehost);
1835
1836         return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1837                 len < sizeof(ddf->controller.vendor_data) &&
1838                 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1839                 ddf->controller.vendor_data[len] == 0);
1840 }
1841
1842 #ifndef MDASSEMBLE
1843 static int find_index_in_bvd(const struct ddf_super *ddf,
1844                              const struct vd_config *conf, unsigned int n,
1845                              unsigned int *n_bvd)
1846 {
1847         /*
1848          * Find the index of the n-th valid physical disk in this BVD.
1849          * Unused entries can be sprinkled in with the used entries,
1850          * but don't count.
1851          */
1852         unsigned int i, j;
1853         for (i = 0, j = 0;
1854              i < ddf->mppe && j < be16_to_cpu(conf->prim_elmnt_count);
1855              i++) {
1856                 if (be32_to_cpu(conf->phys_refnum[i]) != 0xffffffff) {
1857                         if (n == j) {
1858                                 *n_bvd = i;
1859                                 return 1;
1860                         }
1861                         j++;
1862                 }
1863         }
1864         dprintf("couldn't find BVD member %u (total %u)\n",
1865                 n, be16_to_cpu(conf->prim_elmnt_count));
1866         return 0;
1867 }
1868
1869 /* Given a member array instance number, and a raid disk within that instance,
1870  * find the vd_config structure.  The offset of the given disk in the phys_refnum
1871  * table is returned in n_bvd.
1872  * For two-level members with a secondary raid level the vd_config for
1873  * the appropriate BVD is returned.
1874  * The return value is always &vlc->conf, where vlc is returned in last pointer.
1875  */
1876 static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1877                                    unsigned int n,
1878                                    unsigned int *n_bvd, struct vcl **vcl)
1879 {
1880         struct vcl *v;
1881
1882         for (v = ddf->conflist; v; v = v->next) {
1883                 unsigned int nsec, ibvd = 0;
1884                 struct vd_config *conf;
1885                 if (inst != v->vcnum)
1886                         continue;
1887                 conf = &v->conf;
1888                 if (conf->sec_elmnt_count == 1) {
1889                         if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1890                                 *vcl = v;
1891                                 return conf;
1892                         } else
1893                                 goto bad;
1894                 }
1895                 if (v->other_bvds == NULL) {
1896                         pr_err("BUG: other_bvds is NULL, nsec=%u\n",
1897                                conf->sec_elmnt_count);
1898                         goto bad;
1899                 }
1900                 nsec = n / be16_to_cpu(conf->prim_elmnt_count);
1901                 if (conf->sec_elmnt_seq != nsec) {
1902                         for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
1903                                 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1904                                     == nsec)
1905                                         break;
1906                         }
1907                         if (ibvd == conf->sec_elmnt_count)
1908                                 goto bad;
1909                         conf = v->other_bvds[ibvd-1];
1910                 }
1911                 if (!find_index_in_bvd(ddf, conf,
1912                                        n - nsec*conf->sec_elmnt_count, n_bvd))
1913                         goto bad;
1914                 dprintf("found disk %u as member %u in bvd %d of array %u\n",
1915                         n, *n_bvd, ibvd, inst);
1916                 *vcl = v;
1917                 return conf;
1918         }
1919 bad:
1920         pr_err("Could't find disk %d in array %u\n", n, inst);
1921         return NULL;
1922 }
1923 #endif
1924
1925 static int find_phys(const struct ddf_super *ddf, be32 phys_refnum)
1926 {
1927         /* Find the entry in phys_disk which has the given refnum
1928          * and return it's index
1929          */
1930         unsigned int i;
1931         for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++)
1932                 if (be32_eq(ddf->phys->entries[i].refnum, phys_refnum))
1933                         return i;
1934         return -1;
1935 }
1936
1937 static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1938 {
1939         char buf[20];
1940         struct sha1_ctx ctx;
1941         sha1_init_ctx(&ctx);
1942         sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1943         sha1_finish_ctx(&ctx, buf);
1944         memcpy(uuid, buf, 4*4);
1945 }
1946
1947 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1948 {
1949         /* The uuid returned here is used for:
1950          *  uuid to put into bitmap file (Create, Grow)
1951          *  uuid for backup header when saving critical section (Grow)
1952          *  comparing uuids when re-adding a device into an array
1953          *    In these cases the uuid required is that of the data-array,
1954          *    not the device-set.
1955          *  uuid to recognise same set when adding a missing device back
1956          *    to an array.   This is a uuid for the device-set.
1957          *
1958          * For each of these we can make do with a truncated
1959          * or hashed uuid rather than the original, as long as
1960          * everyone agrees.
1961          * In the case of SVD we assume the BVD is of interest,
1962          * though that might be the case if a bitmap were made for
1963          * a mirrored SVD - worry about that later.
1964          * So we need to find the VD configuration record for the
1965          * relevant BVD and extract the GUID and Secondary_Element_Seq.
1966          * The first 16 bytes of the sha1 of these is used.
1967          */
1968         struct ddf_super *ddf = st->sb;
1969         struct vcl *vcl = ddf->currentconf;
1970
1971         if (vcl)
1972                 uuid_of_ddf_subarray(ddf, vcl->vcnum, uuid);
1973         else
1974                 uuid_from_ddf_guid(ddf->anchor.guid, uuid);
1975 }
1976
1977 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
1978 {
1979         struct ddf_super *ddf = st->sb;
1980         int map_disks = info->array.raid_disks;
1981         __u32 *cptr;
1982
1983         if (ddf->currentconf) {
1984                 getinfo_super_ddf_bvd(st, info, map);
1985                 return;
1986         }
1987         memset(info, 0, sizeof(*info));
1988
1989         info->array.raid_disks    = be16_to_cpu(ddf->phys->used_pdes);
1990         info->array.level         = LEVEL_CONTAINER;
1991         info->array.layout        = 0;
1992         info->array.md_minor      = -1;
1993         cptr = (__u32 *)(ddf->anchor.guid + 16);
1994         info->array.ctime         = DECADE + __be32_to_cpu(*cptr);
1995
1996         info->array.chunk_size    = 0;
1997         info->container_enough    = 1;
1998
1999         info->disk.major          = 0;
2000         info->disk.minor          = 0;
2001         if (ddf->dlist) {
2002                 struct phys_disk_entry *pde = NULL;
2003                 info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum);
2004                 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
2005
2006                 info->data_offset = be64_to_cpu(ddf->phys->
2007                                                   entries[info->disk.raid_disk].
2008                                                   config_size);
2009                 info->component_size = ddf->dlist->size - info->data_offset;
2010                 if (info->disk.raid_disk >= 0)
2011                         pde = ddf->phys->entries + info->disk.raid_disk;
2012                 if (pde &&
2013                     !(be16_to_cpu(pde->state) & DDF_Failed) &&
2014                     !(be16_to_cpu(pde->state) & DDF_Missing))
2015                         info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
2016                 else
2017                         info->disk.state = 1 << MD_DISK_FAULTY;
2018
2019         } else {
2020                 /* There should always be a dlist, but just in case...*/
2021                 info->disk.number = -1;
2022                 info->disk.raid_disk = -1;
2023                 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
2024         }
2025         info->events = be32_to_cpu(ddf->active->seq);
2026         info->array.utime = DECADE + be32_to_cpu(ddf->active->timestamp);
2027
2028         info->recovery_start = MaxSector;
2029         info->reshape_active = 0;
2030         info->recovery_blocked = 0;
2031         info->name[0] = 0;
2032
2033         info->array.major_version = -1;
2034         info->array.minor_version = -2;
2035         strcpy(info->text_version, "ddf");
2036         info->safe_mode_delay = 0;
2037
2038         uuid_from_super_ddf(st, info->uuid);
2039
2040         if (map) {
2041                 int i, e = 0;
2042                 int max = be16_to_cpu(ddf->phys->max_pdes);
2043                 for (i = e = 0 ; i < map_disks ; i++, e++) {
2044                         while (e < max &&
2045                                be32_to_cpu(ddf->phys->entries[e].refnum) == 0xffffffff)
2046                                 e++;
2047                         if (i < info->array.raid_disks && e < max &&
2048                             !(be16_to_cpu(ddf->phys->entries[e].state)
2049                               & DDF_Failed))
2050                                 map[i] = 1;
2051                         else
2052                                 map[i] = 0;
2053                 }
2054         }
2055 }
2056
2057 /* size of name must be at least 17 bytes! */
2058 static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i)
2059 {
2060         int j;
2061         memcpy(name, ddf->virt->entries[i].name, 16);
2062         name[16] = 0;
2063         for(j = 0; j < 16; j++)
2064                 if (name[j] == ' ')
2065                         name[j] = 0;
2066 }
2067
2068 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
2069 {
2070         struct ddf_super *ddf = st->sb;
2071         struct vcl *vc = ddf->currentconf;
2072         int cd = ddf->currentdev;
2073         int n_prim;
2074         int j;
2075         struct dl *dl = NULL;
2076         int map_disks = info->array.raid_disks;
2077         __u32 *cptr;
2078         struct vd_config *conf;
2079
2080         memset(info, 0, sizeof(*info));
2081         if (layout_ddf2md(&vc->conf, &info->array) == -1)
2082                 return;
2083         info->array.md_minor      = -1;
2084         cptr = (__u32 *)(vc->conf.guid + 16);
2085         info->array.ctime         = DECADE + __be32_to_cpu(*cptr);
2086         info->array.utime         = DECADE + be32_to_cpu(vc->conf.timestamp);
2087         info->array.chunk_size    = 512 << vc->conf.chunk_shift;
2088         info->custom_array_size   = be64_to_cpu(vc->conf.array_blocks);
2089
2090         conf = &vc->conf;
2091         n_prim = be16_to_cpu(conf->prim_elmnt_count);
2092         if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
2093                 int ibvd = cd / n_prim - 1;
2094                 cd %= n_prim;
2095                 conf = vc->other_bvds[ibvd];
2096         }
2097
2098         if (cd >= 0 && (unsigned)cd < ddf->mppe) {
2099                 info->data_offset =
2100                         be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
2101                 if (vc->block_sizes)
2102                         info->component_size = vc->block_sizes[cd];
2103                 else
2104                         info->component_size = be64_to_cpu(conf->blocks);
2105
2106                 for (dl = ddf->dlist; dl ; dl = dl->next)
2107                         if (be32_eq(dl->disk.refnum, conf->phys_refnum[cd]))
2108                                 break;
2109         }
2110
2111         info->disk.major = 0;
2112         info->disk.minor = 0;
2113         info->disk.state = 0;
2114         if (dl && dl->pdnum >= 0) {
2115                 info->disk.major = dl->major;
2116                 info->disk.minor = dl->minor;
2117                 info->disk.raid_disk = cd + conf->sec_elmnt_seq
2118                         * be16_to_cpu(conf->prim_elmnt_count);
2119                 info->disk.number = dl->pdnum;
2120                 info->disk.state = 0;
2121                 if (info->disk.number >= 0 &&
2122                     (be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Online) &&
2123                     !(be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Failed))
2124                         info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
2125                 info->events = be32_to_cpu(ddf->active->seq);
2126         }
2127
2128         info->container_member = ddf->currentconf->vcnum;
2129
2130         info->recovery_start = MaxSector;
2131         info->resync_start = 0;
2132         info->reshape_active = 0;
2133         info->recovery_blocked = 0;
2134         if (!(ddf->virt->entries[info->container_member].state
2135               & DDF_state_inconsistent)  &&
2136             (ddf->virt->entries[info->container_member].init_state
2137              & DDF_initstate_mask)
2138             == DDF_init_full)
2139                 info->resync_start = MaxSector;
2140
2141         uuid_from_super_ddf(st, info->uuid);
2142
2143         info->array.major_version = -1;
2144         info->array.minor_version = -2;
2145         sprintf(info->text_version, "/%s/%d",
2146                 st->container_devnm,
2147                 info->container_member);
2148         info->safe_mode_delay = DDF_SAFE_MODE_DELAY;
2149
2150         _ddf_array_name(info->name, ddf, info->container_member);
2151
2152         if (map)
2153                 for (j = 0; j < map_disks; j++) {
2154                         map[j] = 0;
2155                         if (j <  info->array.raid_disks) {
2156                                 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
2157                                 if (i >= 0 &&
2158                                     (be16_to_cpu(ddf->phys->entries[i].state)
2159                                      & DDF_Online) &&
2160                                     !(be16_to_cpu(ddf->phys->entries[i].state)
2161                                       & DDF_Failed))
2162                                         map[i] = 1;
2163                         }
2164                 }
2165 }
2166
2167 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
2168                             char *update,
2169                             char *devname, int verbose,
2170                             int uuid_set, char *homehost)
2171 {
2172         /* For 'assemble' and 'force' we need to return non-zero if any
2173          * change was made.  For others, the return value is ignored.
2174          * Update options are:
2175          *  force-one : This device looks a bit old but needs to be included,
2176          *        update age info appropriately.
2177          *  assemble: clear any 'faulty' flag to allow this device to
2178          *              be assembled.
2179          *  force-array: Array is degraded but being forced, mark it clean
2180          *         if that will be needed to assemble it.
2181          *
2182          *  newdev:  not used ????
2183          *  grow:  Array has gained a new device - this is currently for
2184          *              linear only
2185          *  resync: mark as dirty so a resync will happen.
2186          *  uuid:  Change the uuid of the array to match what is given
2187          *  homehost:  update the recorded homehost
2188          *  name:  update the name - preserving the homehost
2189          *  _reshape_progress: record new reshape_progress position.
2190          *
2191          * Following are not relevant for this version:
2192          *  sparc2.2 : update from old dodgey metadata
2193          *  super-minor: change the preferred_minor number
2194          *  summaries:  update redundant counters.
2195          */
2196         int rv = 0;
2197 //      struct ddf_super *ddf = st->sb;
2198 //      struct vd_config *vd = find_vdcr(ddf, info->container_member);
2199 //      struct virtual_entry *ve = find_ve(ddf);
2200
2201         /* we don't need to handle "force-*" or "assemble" as
2202          * there is no need to 'trick' the kernel.  When the metadata is
2203          * first updated to activate the array, all the implied modifications
2204          * will just happen.
2205          */
2206
2207         if (strcmp(update, "grow") == 0) {
2208                 /* FIXME */
2209         } else if (strcmp(update, "resync") == 0) {
2210 //              info->resync_checkpoint = 0;
2211         } else if (strcmp(update, "homehost") == 0) {
2212                 /* homehost is stored in controller->vendor_data,
2213                  * or it is when we are the vendor
2214                  */
2215 //              if (info->vendor_is_local)
2216 //                      strcpy(ddf->controller.vendor_data, homehost);
2217                 rv = -1;
2218         } else if (strcmp(update, "name") == 0) {
2219                 /* name is stored in virtual_entry->name */
2220 //              memset(ve->name, ' ', 16);
2221 //              strncpy(ve->name, info->name, 16);
2222                 rv = -1;
2223         } else if (strcmp(update, "_reshape_progress") == 0) {
2224                 /* We don't support reshape yet */
2225         } else if (strcmp(update, "assemble") == 0 ) {
2226                 /* Do nothing, just succeed */
2227                 rv = 0;
2228         } else
2229                 rv = -1;
2230
2231 //      update_all_csum(ddf);
2232
2233         return rv;
2234 }
2235
2236 static void make_header_guid(char *guid)
2237 {
2238         be32 stamp;
2239         /* Create a DDF Header of Virtual Disk GUID */
2240
2241         /* 24 bytes of fiction required.
2242          * first 8 are a 'vendor-id'  - "Linux-MD"
2243          * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2244          * Remaining 8 random number plus timestamp
2245          */
2246         memcpy(guid, T10, sizeof(T10));
2247         stamp = cpu_to_be32(0xdeadbeef);
2248         memcpy(guid+8, &stamp, 4);
2249         stamp = cpu_to_be32(0);
2250         memcpy(guid+12, &stamp, 4);
2251         stamp = cpu_to_be32(time(0) - DECADE);
2252         memcpy(guid+16, &stamp, 4);
2253         stamp._v32 = random32();
2254         memcpy(guid+20, &stamp, 4);
2255 }
2256
2257 static unsigned int find_unused_vde(const struct ddf_super *ddf)
2258 {
2259         unsigned int i;
2260         for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
2261                 if (all_ff(ddf->virt->entries[i].guid))
2262                         return i;
2263         }
2264         return DDF_NOTFOUND;
2265 }
2266
2267 static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2268                                      const char *name)
2269 {
2270         unsigned int i;
2271         if (name == NULL)
2272                 return DDF_NOTFOUND;
2273         for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
2274                 if (all_ff(ddf->virt->entries[i].guid))
2275                         continue;
2276                 if (!strncmp(name, ddf->virt->entries[i].name,
2277                              sizeof(ddf->virt->entries[i].name)))
2278                         return i;
2279         }
2280         return DDF_NOTFOUND;
2281 }
2282
2283 #ifndef MDASSEMBLE
2284 static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2285                                      const char *guid)
2286 {
2287         unsigned int i;
2288         if (guid == NULL || all_ff(guid))
2289                 return DDF_NOTFOUND;
2290         for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++)
2291                 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2292                         return i;
2293         return DDF_NOTFOUND;
2294 }
2295 #endif
2296
2297 static int init_super_ddf(struct supertype *st,
2298                           mdu_array_info_t *info,
2299                           unsigned long long size, char *name, char *homehost,
2300                           int *uuid, unsigned long long data_offset)
2301 {
2302         /* This is primarily called by Create when creating a new array.
2303          * We will then get add_to_super called for each component, and then
2304          * write_init_super called to write it out to each device.
2305          * For DDF, Create can create on fresh devices or on a pre-existing
2306          * array.
2307          * To create on a pre-existing array a different method will be called.
2308          * This one is just for fresh drives.
2309          *
2310          * We need to create the entire 'ddf' structure which includes:
2311          *  DDF headers - these are easy.
2312          *  Controller data - a Sector describing this controller .. not that
2313          *                    this is a controller exactly.
2314          *  Physical Disk Record - one entry per device, so
2315          *                         leave plenty of space.
2316          *  Virtual Disk Records - again, just leave plenty of space.
2317          *                         This just lists VDs, doesn't give details.
2318          *  Config records - describe the VDs that use this disk
2319          *  DiskData  - describes 'this' device.
2320          *  BadBlockManagement - empty
2321          *  Diag Space - empty
2322          *  Vendor Logs - Could we put bitmaps here?
2323          *
2324          */
2325         struct ddf_super *ddf;
2326         char hostname[17];
2327         int hostlen;
2328         int max_phys_disks, max_virt_disks;
2329         unsigned long long sector;
2330         int clen;
2331         int i;
2332         int pdsize, vdsize;
2333         struct phys_disk *pd;
2334         struct virtual_disk *vd;
2335
2336         if (st->sb)
2337                 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2338                                           data_offset);
2339
2340         if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
2341                 pr_err("could not allocate superblock\n");
2342                 return 0;
2343         }
2344         memset(ddf, 0, sizeof(*ddf));
2345         st->sb = ddf;
2346
2347         if (info == NULL) {
2348                 /* zeroing superblock */
2349                 return 0;
2350         }
2351
2352         /* At least 32MB *must* be reserved for the ddf.  So let's just
2353          * start 32MB from the end, and put the primary header there.
2354          * Don't do secondary for now.
2355          * We don't know exactly where that will be yet as it could be
2356          * different on each device.  So just set up the lengths.
2357          */
2358
2359         ddf->anchor.magic = DDF_HEADER_MAGIC;
2360         make_header_guid(ddf->anchor.guid);
2361
2362         memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
2363         ddf->anchor.seq = cpu_to_be32(1);
2364         ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
2365         ddf->anchor.openflag = 0xFF;
2366         ddf->anchor.foreignflag = 0;
2367         ddf->anchor.enforcegroups = 0; /* Is this best?? */
2368         ddf->anchor.pad0 = 0xff;
2369         memset(ddf->anchor.pad1, 0xff, 12);
2370         memset(ddf->anchor.header_ext, 0xff, 32);
2371         ddf->anchor.primary_lba = cpu_to_be64(~(__u64)0);
2372         ddf->anchor.secondary_lba = cpu_to_be64(~(__u64)0);
2373         ddf->anchor.type = DDF_HEADER_ANCHOR;
2374         memset(ddf->anchor.pad2, 0xff, 3);
2375         ddf->anchor.workspace_len = cpu_to_be32(32768); /* Must be reserved */
2376         /* Put this at bottom of 32M reserved.. */
2377         ddf->anchor.workspace_lba = cpu_to_be64(~(__u64)0);
2378         max_phys_disks = 1023;   /* Should be enough, 4095 is also allowed */
2379         ddf->anchor.max_pd_entries = cpu_to_be16(max_phys_disks);
2380         max_virt_disks = 255; /* 15, 63, 255, 1024, 4095 are all allowed */
2381         ddf->anchor.max_vd_entries = cpu_to_be16(max_virt_disks);
2382         ddf->max_part = 64;
2383         ddf->anchor.max_partitions = cpu_to_be16(ddf->max_part);
2384         ddf->mppe = 256; /* 16, 64, 256, 1024, 4096 are all allowed */
2385         ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2386         ddf->anchor.config_record_len = cpu_to_be16(ddf->conf_rec_len);
2387         ddf->anchor.max_primary_element_entries = cpu_to_be16(ddf->mppe);
2388         memset(ddf->anchor.pad3, 0xff, 54);
2389         /* Controller section is one sector long immediately
2390          * after the ddf header */
2391         sector = 1;
2392         ddf->anchor.controller_section_offset = cpu_to_be32(sector);
2393         ddf->anchor.controller_section_length = cpu_to_be32(1);
2394         sector += 1;
2395
2396         /* phys is 8 sectors after that */
2397         pdsize = ROUND_UP(sizeof(struct phys_disk) +
2398                           sizeof(struct phys_disk_entry)*max_phys_disks,
2399                           512);
2400         switch(pdsize/512) {
2401         case 2: case 8: case 32: case 128: case 512: break;
2402         default: abort();
2403         }
2404         ddf->anchor.phys_section_offset = cpu_to_be32(sector);
2405         ddf->anchor.phys_section_length =
2406                 cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2407         sector += pdsize/512;
2408
2409         /* virt is another 32 sectors */
2410         vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2411                           sizeof(struct virtual_entry) * max_virt_disks,
2412                           512);
2413         switch(vdsize/512) {
2414         case 2: case 8: case 32: case 128: case 512: break;
2415         default: abort();
2416         }
2417         ddf->anchor.virt_section_offset = cpu_to_be32(sector);
2418         ddf->anchor.virt_section_length =
2419                 cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2420         sector += vdsize/512;
2421
2422         clen = ddf->conf_rec_len * (ddf->max_part+1);
2423         ddf->anchor.config_section_offset = cpu_to_be32(sector);
2424         ddf->anchor.config_section_length = cpu_to_be32(clen);
2425         sector += clen;
2426
2427         ddf->anchor.data_section_offset = cpu_to_be32(sector);
2428         ddf->anchor.data_section_length = cpu_to_be32(1);
2429         sector += 1;
2430
2431         ddf->anchor.bbm_section_length = cpu_to_be32(0);
2432         ddf->anchor.bbm_section_offset = cpu_to_be32(0xFFFFFFFF);
2433         ddf->anchor.diag_space_length = cpu_to_be32(0);
2434         ddf->anchor.diag_space_offset = cpu_to_be32(0xFFFFFFFF);
2435         ddf->anchor.vendor_length = cpu_to_be32(0);
2436         ddf->anchor.vendor_offset = cpu_to_be32(0xFFFFFFFF);
2437
2438         memset(ddf->anchor.pad4, 0xff, 256);
2439
2440         memcpy(&ddf->primary, &ddf->anchor, 512);
2441         memcpy(&ddf->secondary, &ddf->anchor, 512);
2442
2443         ddf->primary.openflag = 1; /* I guess.. */
2444         ddf->primary.type = DDF_HEADER_PRIMARY;
2445
2446         ddf->secondary.openflag = 1; /* I guess.. */
2447         ddf->secondary.type = DDF_HEADER_SECONDARY;
2448
2449         ddf->active = &ddf->primary;
2450
2451         ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2452
2453         /* 24 more bytes of fiction required.
2454          * first 8 are a 'vendor-id'  - "Linux-MD"
2455          * Remaining 16 are serial number.... maybe a hostname would do?
2456          */
2457         memcpy(ddf->controller.guid, T10, sizeof(T10));
2458         gethostname(hostname, sizeof(hostname));
2459         hostname[sizeof(hostname) - 1] = 0;
2460         hostlen = strlen(hostname);
2461         memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2462         for (i = strlen(T10) ; i+hostlen < 24; i++)
2463                 ddf->controller.guid[i] = ' ';
2464
2465         ddf->controller.type.vendor_id = cpu_to_be16(0xDEAD);
2466         ddf->controller.type.device_id = cpu_to_be16(0xBEEF);
2467         ddf->controller.type.sub_vendor_id = cpu_to_be16(0);
2468         ddf->controller.type.sub_device_id = cpu_to_be16(0);
2469         memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2470         memset(ddf->controller.pad, 0xff, 8);
2471         memset(ddf->controller.vendor_data, 0xff, 448);
2472         if (homehost && strlen(homehost) < 440)
2473                 strcpy((char*)ddf->controller.vendor_data, homehost);
2474
2475         if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
2476                 pr_err("could not allocate pd\n");
2477                 return 0;
2478         }
2479         ddf->phys = pd;
2480         ddf->pdsize = pdsize;
2481
2482         memset(pd, 0xff, pdsize);
2483         memset(pd, 0, sizeof(*pd));
2484         pd->magic = DDF_PHYS_RECORDS_MAGIC;
2485         pd->used_pdes = cpu_to_be16(0);
2486         pd->max_pdes = cpu_to_be16(max_phys_disks);
2487         memset(pd->pad, 0xff, 52);
2488         for (i = 0; i < max_phys_disks; i++)
2489                 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
2490
2491         if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
2492                 pr_err("could not allocate vd\n");
2493                 return 0;
2494         }
2495         ddf->virt = vd;
2496         ddf->vdsize = vdsize;
2497         memset(vd, 0, vdsize);
2498         vd->magic = DDF_VIRT_RECORDS_MAGIC;
2499         vd->populated_vdes = cpu_to_be16(0);
2500         vd->max_vdes = cpu_to_be16(max_virt_disks);
2501         memset(vd->pad, 0xff, 52);
2502
2503         for (i=0; i<max_virt_disks; i++)
2504                 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2505
2506         st->sb = ddf;
2507         ddf_set_updates_pending(ddf, NULL);
2508         return 1;
2509 }
2510
2511 static int chunk_to_shift(int chunksize)
2512 {
2513         return ffs(chunksize/512)-1;
2514 }
2515
2516 #ifndef MDASSEMBLE
2517 struct extent {
2518         unsigned long long start, size;
2519 };
2520 static int cmp_extent(const void *av, const void *bv)
2521 {
2522         const struct extent *a = av;
2523         const struct extent *b = bv;
2524         if (a->start < b->start)
2525                 return -1;
2526         if (a->start > b->start)
2527                 return 1;
2528         return 0;
2529 }
2530
2531 static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
2532 {
2533         /* Find a list of used extents on the given physical device
2534          * (dnum) of the given ddf.
2535          * Return a malloced array of 'struct extent'
2536          */
2537         struct extent *rv;
2538         int n = 0;
2539         unsigned int i;
2540         __u16 state;
2541
2542         if (dl->pdnum < 0)
2543                 return NULL;
2544         state = be16_to_cpu(ddf->phys->entries[dl->pdnum].state);
2545
2546         if ((state & (DDF_Online|DDF_Failed|DDF_Missing)) != DDF_Online)
2547                 return NULL;
2548
2549         rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
2550
2551         for (i = 0; i < ddf->max_part; i++) {
2552                 const struct vd_config *bvd;
2553                 unsigned int ibvd;
2554                 struct vcl *v = dl->vlist[i];
2555                 if (v == NULL ||
2556                     get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2557                                              &bvd, &ibvd) == DDF_NOTFOUND)
2558                         continue;
2559                 rv[n].start = be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2560                 rv[n].size = be64_to_cpu(bvd->blocks);
2561                 n++;
2562         }
2563         qsort(rv, n, sizeof(*rv), cmp_extent);
2564
2565         rv[n].start = be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2566         rv[n].size = 0;
2567         return rv;
2568 }
2569
2570 static unsigned long long find_space(
2571         struct ddf_super *ddf, struct dl *dl,
2572         unsigned long long data_offset,
2573         unsigned long long *size)
2574 {
2575         /* Find if the requested amount of space is available.
2576          * If it is, return start.
2577          * If not, set *size to largest space.
2578          * If data_offset != INVALID_SECTORS, then the space must start
2579          * at this location.
2580          */
2581         struct extent *e = get_extents(ddf, dl);
2582         int i = 0;
2583         unsigned long long pos = 0;
2584         unsigned long long max_size = 0;
2585
2586         if (!e) {
2587                 *size = 0;
2588                 return INVALID_SECTORS;
2589         }
2590         do {
2591                 unsigned long long esize = e[i].start - pos;
2592                 if (data_offset != INVALID_SECTORS &&
2593                     pos <= data_offset &&
2594                     e[i].start > data_offset) {
2595                         pos = data_offset;
2596                         esize = e[i].start - pos;
2597                 }
2598                 if (data_offset != INVALID_SECTORS &&
2599                     pos != data_offset) {
2600                         i++;
2601                         continue;
2602                 }
2603                 if (esize >= *size) {
2604                         /* Found! */
2605                         free(e);
2606                         return pos;
2607                 }
2608                 if (esize > max_size)
2609                         max_size = esize;
2610                 pos = e[i].start + e[i].size;
2611                 i++;
2612         } while (e[i-1].size);
2613         *size = max_size;
2614         free(e);
2615         return INVALID_SECTORS;
2616 }
2617 #endif
2618
2619 static int init_super_ddf_bvd(struct supertype *st,
2620                               mdu_array_info_t *info,
2621                               unsigned long long size,
2622                               char *name, char *homehost,
2623                               int *uuid, unsigned long long data_offset)
2624 {
2625         /* We are creating a BVD inside a pre-existing container.
2626          * so st->sb is already set.
2627          * We need to create a new vd_config and a new virtual_entry
2628          */
2629         struct ddf_super *ddf = st->sb;
2630         unsigned int venum, i;
2631         struct virtual_entry *ve;
2632         struct vcl *vcl;
2633         struct vd_config *vc;
2634
2635         if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2636                 pr_err("This ddf already has an array called %s\n", name);
2637                 return 0;
2638         }
2639         venum = find_unused_vde(ddf);
2640         if (venum == DDF_NOTFOUND) {
2641                 pr_err("Cannot find spare slot for virtual disk\n");
2642                 return 0;
2643         }
2644         ve = &ddf->virt->entries[venum];
2645
2646         /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2647          * timestamp, random number
2648          */
2649         make_header_guid(ve->guid);
2650         ve->unit = cpu_to_be16(info->md_minor);
2651         ve->pad0 = 0xFFFF;
2652         ve->guid_crc._v16 = crc32(0, (unsigned char *)ddf->anchor.guid,
2653                                   DDF_GUID_LEN);
2654         ve->type = cpu_to_be16(0);
2655         ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2656         if (info->state & 1) /* clean */
2657                 ve->init_state = DDF_init_full;
2658         else
2659                 ve->init_state = DDF_init_not;
2660
2661         memset(ve->pad1, 0xff, 14);
2662         memset(ve->name, ' ', 16);
2663         if (name)
2664                 strncpy(ve->name, name, 16);
2665         ddf->virt->populated_vdes =
2666                 cpu_to_be16(be16_to_cpu(ddf->virt->populated_vdes)+1);
2667
2668         /* Now create a new vd_config */
2669         if (posix_memalign((void**)&vcl, 512,
2670                            (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
2671                 pr_err("could not allocate vd_config\n");
2672                 return 0;
2673         }
2674         vcl->vcnum = venum;
2675         vcl->block_sizes = NULL; /* FIXME not for CONCAT */
2676         vc = &vcl->conf;
2677
2678         vc->magic = DDF_VD_CONF_MAGIC;
2679         memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2680         vc->timestamp = cpu_to_be32(time(0)-DECADE);
2681         vc->seqnum = cpu_to_be32(1);
2682         memset(vc->pad0, 0xff, 24);
2683         vc->chunk_shift = chunk_to_shift(info->chunk_size);
2684         if (layout_md2ddf(info, vc) == -1 ||
2685                 be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2686                 pr_err("unsupported RAID level/layout %d/%d with %d disks\n",
2687                        info->level, info->layout, info->raid_disks);
2688                 free(vcl);
2689                 return 0;
2690         }
2691         vc->sec_elmnt_seq = 0;
2692         if (alloc_other_bvds(ddf, vcl) != 0) {
2693                 pr_err("could not allocate other bvds\n");
2694                 free(vcl);
2695                 return 0;
2696         }
2697         vc->blocks = cpu_to_be64(info->size * 2);
2698         vc->array_blocks = cpu_to_be64(
2699                 calc_array_size(info->level, info->raid_disks, info->layout,
2700                                 info->chunk_size, info->size*2));
2701         memset(vc->pad1, 0xff, 8);
2702         vc->spare_refs[0] = cpu_to_be32(0xffffffff);
2703         vc->spare_refs[1] = cpu_to_be32(0xffffffff);
2704         vc->spare_refs[2] = cpu_to_be32(0xffffffff);
2705         vc->spare_refs[3] = cpu_to_be32(0xffffffff);
2706         vc->spare_refs[4] = cpu_to_be32(0xffffffff);
2707         vc->spare_refs[5] = cpu_to_be32(0xffffffff);
2708         vc->spare_refs[6] = cpu_to_be32(0xffffffff);
2709         vc->spare_refs[7] = cpu_to_be32(0xffffffff);
2710         memset(vc->cache_pol, 0, 8);
2711         vc->bg_rate = 0x80;
2712         memset(vc->pad2, 0xff, 3);
2713         memset(vc->pad3, 0xff, 52);
2714         memset(vc->pad4, 0xff, 192);
2715         memset(vc->v0, 0xff, 32);
2716         memset(vc->v1, 0xff, 32);
2717         memset(vc->v2, 0xff, 16);
2718         memset(vc->v3, 0xff, 16);
2719         memset(vc->vendor, 0xff, 32);
2720
2721         memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
2722         memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
2723
2724         for (i = 1; i < vc->sec_elmnt_count; i++) {
2725                 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2726                 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2727         }
2728
2729         vcl->next = ddf->conflist;
2730         ddf->conflist = vcl;
2731         ddf->currentconf = vcl;
2732         ddf_set_updates_pending(ddf, NULL);
2733         return 1;
2734 }
2735
2736 #ifndef MDASSEMBLE
2737 static void add_to_super_ddf_bvd(struct supertype *st,
2738                                  mdu_disk_info_t *dk, int fd, char *devname,
2739                                  unsigned long long data_offset)
2740 {
2741         /* fd and devname identify a device within the ddf container (st).
2742          * dk identifies a location in the new BVD.
2743          * We need to find suitable free space in that device and update
2744          * the phys_refnum and lba_offset for the newly created vd_config.
2745          * We might also want to update the type in the phys_disk
2746          * section.
2747          *
2748          * Alternately: fd == -1 and we have already chosen which device to
2749          * use and recorded in dlist->raid_disk;
2750          */
2751         struct dl *dl;
2752         struct ddf_super *ddf = st->sb;
2753         struct vd_config *vc;
2754         unsigned int i;
2755         unsigned long long blocks, pos;
2756         unsigned int raid_disk = dk->raid_disk;
2757
2758         if (fd == -1) {
2759                 for (dl = ddf->dlist; dl ; dl = dl->next)
2760                         if (dl->raiddisk == dk->raid_disk)
2761                                 break;
2762         } else {
2763                 for (dl = ddf->dlist; dl ; dl = dl->next)
2764                         if (dl->major == dk->major &&
2765                             dl->minor == dk->minor)
2766                                 break;
2767         }
2768         if (!dl || dl->pdnum < 0 || ! (dk->state & (1<<MD_DISK_SYNC)))
2769                 return;
2770
2771         vc = &ddf->currentconf->conf;
2772         if (vc->sec_elmnt_count > 1) {
2773                 unsigned int n = be16_to_cpu(vc->prim_elmnt_count);
2774                 if (raid_disk >= n)
2775                         vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2776                 raid_disk %= n;
2777         }
2778
2779         blocks = be64_to_cpu(vc->blocks);
2780         if (ddf->currentconf->block_sizes)
2781                 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
2782
2783         pos = find_space(ddf, dl, data_offset, &blocks);
2784         if (pos == INVALID_SECTORS)
2785                 return;
2786
2787         ddf->currentdev = dk->raid_disk;
2788         vc->phys_refnum[raid_disk] = dl->disk.refnum;
2789         LBA_OFFSET(ddf, vc)[raid_disk] = cpu_to_be64(pos);
2790
2791         for (i = 0; i < ddf->max_part ; i++)
2792                 if (dl->vlist[i] == NULL)
2793                         break;
2794         if (i == ddf->max_part)
2795                 return;
2796         dl->vlist[i] = ddf->currentconf;
2797
2798         if (fd >= 0)
2799                 dl->fd = fd;
2800         if (devname)
2801                 dl->devname = devname;
2802
2803         /* Check if we can mark array as optimal yet */
2804         i = ddf->currentconf->vcnum;
2805         ddf->virt->entries[i].state =
2806                 (ddf->virt->entries[i].state & ~DDF_state_mask)
2807                 | get_svd_state(ddf, ddf->currentconf);
2808         be16_clear(ddf->phys->entries[dl->pdnum].type,
2809                    cpu_to_be16(DDF_Global_Spare));
2810         be16_set(ddf->phys->entries[dl->pdnum].type,
2811                  cpu_to_be16(DDF_Active_in_VD));
2812         dprintf("added disk %d/%08x to VD %d/%s as disk %d\n",
2813                 dl->pdnum, be32_to_cpu(dl->disk.refnum),
2814                 ddf->currentconf->vcnum, guid_str(vc->guid),
2815                 dk->raid_disk);
2816         ddf_set_updates_pending(ddf, vc);
2817 }
2818
2819 static unsigned int find_unused_pde(const struct ddf_super *ddf)
2820 {
2821         unsigned int i;
2822         for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++) {
2823                 if (all_ff(ddf->phys->entries[i].guid))
2824                         return i;
2825         }
2826         return DDF_NOTFOUND;
2827 }
2828
2829 static void _set_config_size(struct phys_disk_entry *pde, const struct dl *dl)
2830 {
2831         __u64 cfs, t;
2832         cfs = min(dl->size - 32*1024*2ULL, be64_to_cpu(dl->primary_lba));
2833         t = be64_to_cpu(dl->secondary_lba);
2834         if (t != ~(__u64)0)
2835                 cfs = min(cfs, t);
2836         /*
2837          * Some vendor DDF structures interpret workspace_lba
2838          * very differently than we do: Make a sanity check on the value.
2839          */
2840         t = be64_to_cpu(dl->workspace_lba);
2841         if (t < cfs) {
2842                 __u64 wsp = cfs - t;
2843                 if (wsp > 1024*1024*2ULL && wsp > dl->size / 16) {
2844                         pr_err("%x:%x: workspace size 0x%llx too big, ignoring\n",
2845                                dl->major, dl->minor, (unsigned long long)wsp);
2846                 } else
2847                         cfs = t;
2848         }
2849         pde->config_size = cpu_to_be64(cfs);
2850         dprintf("%x:%x config_size %llx, DDF structure is %llx blocks\n",
2851                 dl->major, dl->minor,
2852                 (unsigned long long)cfs, (unsigned long long)(dl->size-cfs));
2853 }
2854
2855 /* Add a device to a container, either while creating it or while
2856  * expanding a pre-existing container
2857  */
2858 static int add_to_super_ddf(struct supertype *st,
2859                             mdu_disk_info_t *dk, int fd, char *devname,
2860                             unsigned long long data_offset)
2861 {
2862         struct ddf_super *ddf = st->sb;
2863         struct dl *dd;
2864         time_t now;
2865         struct tm *tm;
2866         unsigned long long size;
2867         struct phys_disk_entry *pde;
2868         unsigned int n, i;
2869         struct stat stb;
2870         __u32 *tptr;
2871
2872         if (ddf->currentconf) {
2873                 add_to_super_ddf_bvd(st, dk, fd, devname, data_offset);
2874                 return 0;
2875         }
2876
2877         /* This is device numbered dk->number.  We need to create
2878          * a phys_disk entry and a more detailed disk_data entry.
2879          */
2880         fstat(fd, &stb);
2881         n = find_unused_pde(ddf);
2882         if (n == DDF_NOTFOUND) {
2883                 pr_err("No free slot in array, cannot add disk\n");
2884                 return 1;
2885         }
2886         pde = &ddf->phys->entries[n];
2887         get_dev_size(fd, NULL, &size);
2888         if (size <= 32*1024*1024) {
2889                 pr_err("device size must be at least 32MB\n");
2890                 return 1;
2891         }
2892         size >>= 9;
2893
2894         if (posix_memalign((void**)&dd, 512,
2895                            sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
2896                 pr_err("could allocate buffer for new disk, aborting\n");
2897                 return 1;
2898         }
2899         dd->major = major(stb.st_rdev);
2900         dd->minor = minor(stb.st_rdev);
2901         dd->devname = devname;
2902         dd->fd = fd;
2903         dd->spare = NULL;
2904
2905         dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2906         now = time(0);
2907         tm = localtime(&now);
2908         sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2909                 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
2910         tptr = (__u32 *)(dd->disk.guid + 16);
2911         *tptr++ = random32();
2912         *tptr = random32();
2913
2914         do {
2915                 /* Cannot be bothered finding a CRC of some irrelevant details*/
2916                 dd->disk.refnum._v32 = random32();
2917                 for (i = be16_to_cpu(ddf->active->max_pd_entries);
2918                      i > 0; i--)
2919                         if (be32_eq(ddf->phys->entries[i-1].refnum,
2920                                     dd->disk.refnum))
2921                                 break;
2922         } while (i > 0);
2923
2924         dd->disk.forced_ref = 1;
2925         dd->disk.forced_guid = 1;
2926         memset(dd->disk.vendor, ' ', 32);
2927         memcpy(dd->disk.vendor, "Linux", 5);
2928         memset(dd->disk.pad, 0xff, 442);
2929         for (i = 0; i < ddf->max_part ; i++)
2930                 dd->vlist[i] = NULL;
2931
2932         dd->pdnum = n;
2933
2934         if (st->update_tail) {
2935                 int len = (sizeof(struct phys_disk) +
2936                            sizeof(struct phys_disk_entry));
2937                 struct phys_disk *pd;
2938
2939                 pd = xmalloc(len);
2940                 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2941                 pd->used_pdes = cpu_to_be16(n);
2942                 pde = &pd->entries[0];
2943                 dd->mdupdate = pd;
2944         } else
2945                 ddf->phys->used_pdes = cpu_to_be16(
2946                         1 + be16_to_cpu(ddf->phys->used_pdes));
2947
2948         memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2949         pde->refnum = dd->disk.refnum;
2950         pde->type = cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
2951         pde->state = cpu_to_be16(DDF_Online);
2952         dd->size = size;
2953         /*
2954          * If there is already a device in dlist, try to reserve the same
2955          * amount of workspace. Otherwise, use 32MB.
2956          * We checked disk size above already.
2957          */
2958 #define __calc_lba(new, old, lba, mb) do { \
2959                 unsigned long long dif; \
2960                 if ((old) != NULL) \
2961                         dif = (old)->size - be64_to_cpu((old)->lba); \
2962                 else \
2963                         dif = (new)->size; \
2964                 if ((new)->size > dif) \
2965                         (new)->lba = cpu_to_be64((new)->size - dif); \
2966                 else \
2967                         (new)->lba = cpu_to_be64((new)->size - (mb*1024*2)); \
2968         } while (0)
2969         __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2970         __calc_lba(dd, ddf->dlist, primary_lba, 16);
2971         if (ddf->dlist == NULL ||
2972             be64_to_cpu(ddf->dlist->secondary_lba) != ~(__u64)0)
2973                 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
2974         _set_config_size(pde, dd);
2975
2976         sprintf(pde->path, "%17.17s","Information: nil") ;
2977         memset(pde->pad, 0xff, 6);
2978
2979         if (st->update_tail) {
2980                 dd->next = ddf->add_list;
2981                 ddf->add_list = dd;
2982         } else {
2983                 dd->next = ddf->dlist;
2984                 ddf->dlist = dd;
2985                 ddf_set_updates_pending(ddf, NULL);
2986         }
2987
2988         return 0;
2989 }
2990
2991 static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2992 {
2993         struct ddf_super *ddf = st->sb;
2994         struct dl *dl;
2995
2996         /* mdmon has noticed that this disk (dk->major/dk->minor) has
2997          * disappeared from the container.
2998          * We need to arrange that it disappears from the metadata and
2999          * internal data structures too.
3000          * Most of the work is done by ddf_process_update which edits
3001          * the metadata and closes the file handle and attaches the memory
3002          * where free_updates will free it.
3003          */
3004         for (dl = ddf->dlist; dl ; dl = dl->next)
3005                 if (dl->major == dk->major &&
3006                     dl->minor == dk->minor)
3007                         break;
3008         if (!dl || dl->pdnum < 0)
3009                 return -1;
3010
3011         if (st->update_tail) {
3012                 int len = (sizeof(struct phys_disk) +
3013                            sizeof(struct phys_disk_entry));
3014                 struct phys_disk *pd;
3015
3016                 pd = xmalloc(len);
3017                 pd->magic = DDF_PHYS_RECORDS_MAGIC;
3018                 pd->used_pdes = cpu_to_be16(dl->pdnum);
3019                 pd->entries[0].state = cpu_to_be16(DDF_Missing);
3020                 append_metadata_update(st, pd, len);
3021         }
3022         return 0;
3023 }
3024 #endif
3025
3026 /*
3027  * This is the write_init_super method for a ddf container.  It is
3028  * called when creating a container or adding another device to a
3029  * container.
3030  */
3031
3032 static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
3033 {
3034         unsigned long long sector;
3035         struct ddf_header *header;
3036         int fd, i, n_config, conf_size, buf_size;
3037         int ret = 0;
3038         char *conf;
3039
3040         fd = d->fd;
3041
3042         switch (type) {
3043         case DDF_HEADER_PRIMARY:
3044                 header = &ddf->primary;
3045                 sector = be64_to_cpu(header->primary_lba);
3046                 break;
3047         case DDF_HEADER_SECONDARY:
3048                 header = &ddf->secondary;
3049                 sector = be64_to_cpu(header->secondary_lba);
3050                 break;
3051         default:
3052                 return 0;
3053         }
3054         if (sector == ~(__u64)0)
3055                 return 0;
3056
3057         header->type = type;
3058         header->openflag = 1;
3059         header->crc = calc_crc(header, 512);
3060
3061         lseek64(fd, sector<<9, 0);
3062         if (write(fd, header, 512) < 0)
3063                 goto out;
3064
3065         ddf->controller.crc = calc_crc(&ddf->controller, 512);
3066         if (write(fd, &ddf->controller, 512) < 0)
3067                 goto out;
3068
3069         ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
3070         if (write(fd, ddf->phys, ddf->pdsize) < 0)
3071                 goto out;
3072         ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
3073         if (write(fd, ddf->virt, ddf->vdsize) < 0)
3074                 goto out;
3075
3076         /* Now write lots of config records. */
3077         n_config = ddf->max_part;
3078         conf_size = ddf->conf_rec_len * 512;
3079         conf = ddf->conf;
3080         buf_size = conf_size * (n_config + 1);
3081         if (!conf) {
3082                 if (posix_memalign((void**)&conf, 512, buf_size) != 0)
3083                         goto out;
3084                 ddf->conf = conf;
3085         }
3086         for (i = 0 ; i <= n_config ; i++) {
3087                 struct vcl *c;
3088                 struct vd_config *vdc = NULL;
3089                 if (i == n_config) {
3090                         c = (struct vcl *)d->spare;
3091                         if (c)
3092                                 vdc = &c->conf;
3093                 } else {
3094                         unsigned int dummy;
3095                         c = d->vlist[i];
3096                         if (c)
3097                                 get_pd_index_from_refnum(
3098                                         c, d->disk.refnum,
3099                                         ddf->mppe,
3100                                         (const struct vd_config **)&vdc,
3101                                         &dummy);
3102                 }
3103                 if (vdc) {
3104                         dprintf("writing conf record %i on disk %08x for %s/%u\n",
3105                                 i, be32_to_cpu(d->disk.refnum),
3106                                 guid_str(vdc->guid),
3107                                 vdc->sec_elmnt_seq);
3108                         vdc->crc = calc_crc(vdc, conf_size);
3109                         memcpy(conf + i*conf_size, vdc, conf_size);
3110                 } else
3111                         memset(conf + i*conf_size, 0xff, conf_size);
3112         }
3113         if (write(fd, conf, buf_size) != buf_size)
3114                 goto out;
3115
3116         d->disk.crc = calc_crc(&d->disk, 512);
3117         if (write(fd, &d->disk, 512) < 0)
3118                 goto out;
3119
3120         ret = 1;
3121 out:
3122         header->openflag = 0;
3123         header->crc = calc_crc(header, 512);
3124
3125         lseek64(fd, sector<<9, 0);
3126         if (write(fd, header, 512) < 0)
3127                 ret = 0;
3128
3129         return ret;
3130 }
3131
3132 static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
3133 {
3134         unsigned long long size;
3135         int fd = d->fd;
3136         if (fd < 0)
3137                 return 0;
3138
3139         /* We need to fill in the primary, (secondary) and workspace
3140          * lba's in the headers, set their checksums,
3141          * Also checksum phys, virt....
3142          *
3143          * Then write everything out, finally the anchor is written.
3144          */
3145         get_dev_size(fd, NULL, &size);
3146         size /= 512;
3147         memcpy(&ddf->anchor, ddf->active, 512);
3148         if (be64_to_cpu(d->workspace_lba) != 0ULL)
3149                 ddf->anchor.workspace_lba = d->workspace_lba;
3150         else
3151                 ddf->anchor.workspace_lba =
3152                         cpu_to_be64(size - 32*1024*2);
3153         if (be64_to_cpu(d->primary_lba) != 0ULL)
3154                 ddf->anchor.primary_lba = d->primary_lba;
3155         else
3156                 ddf->anchor.primary_lba =
3157                         cpu_to_be64(size - 16*1024*2);
3158         if (be64_to_cpu(d->secondary_lba) != 0ULL)
3159                 ddf->anchor.secondary_lba = d->secondary_lba;
3160         else
3161                 ddf->anchor.secondary_lba =
3162                         cpu_to_be64(size - 32*1024*2);
3163         ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
3164         memcpy(&ddf->primary, &ddf->anchor, 512);
3165         memcpy(&ddf->secondary, &ddf->anchor, 512);
3166
3167         ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
3168         ddf->anchor.seq = cpu_to_be32(0xFFFFFFFF); /* no sequencing in anchor */
3169         ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
3170
3171         if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
3172                 return 0;
3173
3174         if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
3175                 return 0;
3176
3177         lseek64(fd, (size-1)*512, SEEK_SET);
3178         if (write(fd, &ddf->anchor, 512) < 0)
3179                 return 0;
3180
3181         return 1;
3182 }
3183
3184 #ifndef MDASSEMBLE
3185 static int __write_init_super_ddf(struct supertype *st)
3186 {
3187         struct ddf_super *ddf = st->sb;
3188         struct dl *d;
3189         int attempts = 0;
3190         int successes = 0;
3191
3192         pr_state(ddf, __func__);
3193
3194         /* try to write updated metadata,
3195          * if we catch a failure move on to the next disk
3196          */
3197         for (d = ddf->dlist; d; d=d->next) {
3198                 attempts++;
3199                 successes += _write_super_to_disk(ddf, d);
3200         }
3201
3202         return attempts != successes;
3203 }
3204
3205 static int write_init_super_ddf(struct supertype *st)
3206 {
3207         struct ddf_super *ddf = st->sb;
3208         struct vcl *currentconf = ddf->currentconf;
3209
3210         /* We are done with currentconf - reset it so st refers to the container */
3211         ddf->currentconf = NULL;
3212
3213         if (st->update_tail) {
3214                 /* queue the virtual_disk and vd_config as metadata updates */
3215                 struct virtual_disk *vd;
3216                 struct vd_config *vc;
3217                 int len, tlen;
3218                 unsigned int i;
3219
3220                 if (!currentconf) {
3221                         /* Must be adding a physical disk to the container */
3222                         int len = (sizeof(struct phys_disk) +
3223                                    sizeof(struct phys_disk_entry));
3224
3225                         /* adding a disk to the container. */
3226                         if (!ddf->add_list)
3227                                 return 0;
3228
3229                         append_metadata_update(st, ddf->add_list->mdupdate, len);
3230                         ddf->add_list->mdupdate = NULL;
3231                         return 0;
3232                 }
3233
3234                 /* Newly created VD */
3235
3236                 /* First the virtual disk.  We have a slightly fake header */
3237                 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
3238                 vd = xmalloc(len);
3239                 *vd = *ddf->virt;
3240                 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
3241                 vd->populated_vdes = cpu_to_be16(currentconf->vcnum);
3242                 append_metadata_update(st, vd, len);
3243
3244                 /* Then the vd_config */
3245                 len = ddf->conf_rec_len * 512;
3246                 tlen = len * currentconf->conf.sec_elmnt_count;
3247                 vc = xmalloc(tlen);
3248                 memcpy(vc, &currentconf->conf, len);
3249                 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
3250                         memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
3251                                len);
3252                 append_metadata_update(st, vc, tlen);
3253
3254                 return 0;
3255         } else {
3256                 struct dl *d;
3257                 if (!currentconf)
3258                         for (d = ddf->dlist; d; d=d->next)
3259                                 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
3260                 /* Note: we don't close the fd's now, but a subsequent
3261                  * ->free_super() will
3262                  */
3263                 return __write_init_super_ddf(st);
3264         }
3265 }
3266
3267 #endif
3268
3269 static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
3270                             unsigned long long data_offset)
3271 {
3272         /* We must reserve the last 32Meg */
3273         if (devsize <= 32*1024*2)
3274                 return 0;
3275         return devsize - 32*1024*2;
3276 }
3277
3278 #ifndef MDASSEMBLE
3279
3280 static int reserve_space(struct supertype *st, int raiddisks,
3281                          unsigned long long size, int chunk,
3282                          unsigned long long data_offset,
3283                          unsigned long long *freesize)
3284 {
3285         /* Find 'raiddisks' spare extents at least 'size' big (but
3286          * only caring about multiples of 'chunk') and remember
3287          * them.   If size==0, find the largest size possible.
3288          * Report available size in *freesize
3289          * If space cannot be found, fail.
3290          */
3291         struct dl *dl;
3292         struct ddf_super *ddf = st->sb;
3293         int cnt = 0;
3294
3295         for (dl = ddf->dlist; dl ; dl=dl->next) {
3296                 dl->raiddisk = -1;
3297                 dl->esize = 0;
3298         }
3299         /* Now find largest extent on each device */
3300         for (dl = ddf->dlist ; dl ; dl=dl->next) {
3301                 unsigned long long minsize = ULLONG_MAX;
3302
3303                 find_space(ddf, dl, data_offset, &minsize);
3304                 if (minsize >= size && minsize >= (unsigned)chunk) {
3305                         cnt++;
3306                         dl->esize = minsize;
3307                 }
3308         }
3309         if (cnt < raiddisks) {
3310                 pr_err("not enough devices with space to create array.\n");
3311                 return 0; /* No enough free spaces large enough */
3312         }
3313         if (size == 0) {
3314                 /* choose the largest size of which