]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: __write_init_super_ddf: just use seq number of active header
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
47/* The DDF metadata handling.
48 * DDF metadata lives at the end of the device.
49 * The last 512 byte block provides an 'anchor' which is used to locate
50 * the rest of the metadata which usually lives immediately behind the anchor.
51 *
52 * Note:
53 * - all multibyte numeric fields are bigendian.
54 * - all strings are space padded.
55 *
56 */
57
58/* Primary Raid Level (PRL) */
59#define DDF_RAID0 0x00
60#define DDF_RAID1 0x01
61#define DDF_RAID3 0x03
62#define DDF_RAID4 0x04
63#define DDF_RAID5 0x05
64#define DDF_RAID1E 0x11
65#define DDF_JBOD 0x0f
66#define DDF_CONCAT 0x1f
67#define DDF_RAID5E 0x15
68#define DDF_RAID5EE 0x25
59e36268 69#define DDF_RAID6 0x06
a322f70c
DW
70
71/* Raid Level Qualifier (RLQ) */
72#define DDF_RAID0_SIMPLE 0x00
73#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
74#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
75#define DDF_RAID3_0 0x00 /* parity in first extent */
76#define DDF_RAID3_N 0x01 /* parity in last extent */
77#define DDF_RAID4_0 0x00 /* parity in first extent */
78#define DDF_RAID4_N 0x01 /* parity in last extent */
79/* these apply to raid5e and raid5ee as well */
80#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 81#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
82#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
83#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
84
85#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
86#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
87
88/* Secondary RAID Level (SRL) */
89#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
90#define DDF_2MIRRORED 0x01
91#define DDF_2CONCAT 0x02
92#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
93
94/* Magic numbers */
95#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
96#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
97#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
98#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
99#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
100#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
101#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
102#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
103#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
104#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
105
106#define DDF_GUID_LEN 24
59e36268
NB
107#define DDF_REVISION_0 "01.00.00"
108#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
109
110struct ddf_header {
88c164f4 111 __u32 magic; /* DDF_HEADER_MAGIC */
a322f70c
DW
112 __u32 crc;
113 char guid[DDF_GUID_LEN];
59e36268 114 char revision[8]; /* 01.02.00 */
a322f70c
DW
115 __u32 seq; /* starts at '1' */
116 __u32 timestamp;
117 __u8 openflag;
118 __u8 foreignflag;
119 __u8 enforcegroups;
120 __u8 pad0; /* 0xff */
121 __u8 pad1[12]; /* 12 * 0xff */
122 /* 64 bytes so far */
123 __u8 header_ext[32]; /* reserved: fill with 0xff */
124 __u64 primary_lba;
125 __u64 secondary_lba;
126 __u8 type;
127 __u8 pad2[3]; /* 0xff */
128 __u32 workspace_len; /* sectors for vendor space -
129 * at least 32768(sectors) */
130 __u64 workspace_lba;
131 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
132 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
133 __u16 max_partitions; /* i.e. max num of configuration
134 record entries per disk */
135 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
136 *12/512) */
137 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
138 __u8 pad3[54]; /* 0xff */
139 /* 192 bytes so far */
140 __u32 controller_section_offset;
141 __u32 controller_section_length;
142 __u32 phys_section_offset;
143 __u32 phys_section_length;
144 __u32 virt_section_offset;
145 __u32 virt_section_length;
146 __u32 config_section_offset;
147 __u32 config_section_length;
148 __u32 data_section_offset;
149 __u32 data_section_length;
150 __u32 bbm_section_offset;
151 __u32 bbm_section_length;
152 __u32 diag_space_offset;
153 __u32 diag_space_length;
154 __u32 vendor_offset;
155 __u32 vendor_length;
156 /* 256 bytes so far */
157 __u8 pad4[256]; /* 0xff */
158};
159
160/* type field */
161#define DDF_HEADER_ANCHOR 0x00
162#define DDF_HEADER_PRIMARY 0x01
163#define DDF_HEADER_SECONDARY 0x02
164
165/* The content of the 'controller section' - global scope */
166struct ddf_controller_data {
88c164f4 167 __u32 magic; /* DDF_CONTROLLER_MAGIC */
a322f70c
DW
168 __u32 crc;
169 char guid[DDF_GUID_LEN];
170 struct controller_type {
171 __u16 vendor_id;
172 __u16 device_id;
173 __u16 sub_vendor_id;
174 __u16 sub_device_id;
175 } type;
176 char product_id[16];
177 __u8 pad[8]; /* 0xff */
178 __u8 vendor_data[448];
179};
180
181/* The content of phys_section - global scope */
182struct phys_disk {
88c164f4 183 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
a322f70c
DW
184 __u32 crc;
185 __u16 used_pdes;
186 __u16 max_pdes;
187 __u8 pad[52];
188 struct phys_disk_entry {
189 char guid[DDF_GUID_LEN];
190 __u32 refnum;
191 __u16 type;
192 __u16 state;
193 __u64 config_size; /* DDF structures must be after here */
194 char path[18]; /* another horrible structure really */
195 __u8 pad[6];
196 } entries[0];
197};
198
199/* phys_disk_entry.type is a bitmap - bigendian remember */
200#define DDF_Forced_PD_GUID 1
201#define DDF_Active_in_VD 2
88c164f4 202#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
203#define DDF_Spare 8 /* overrides Global_spare */
204#define DDF_Foreign 16
205#define DDF_Legacy 32 /* no DDF on this device */
206
207#define DDF_Interface_mask 0xf00
208#define DDF_Interface_SCSI 0x100
209#define DDF_Interface_SAS 0x200
210#define DDF_Interface_SATA 0x300
211#define DDF_Interface_FC 0x400
212
213/* phys_disk_entry.state is a bigendian bitmap */
214#define DDF_Online 1
215#define DDF_Failed 2 /* overrides 1,4,8 */
216#define DDF_Rebuilding 4
217#define DDF_Transition 8
218#define DDF_SMART 16
219#define DDF_ReadErrors 32
220#define DDF_Missing 64
221
222/* The content of the virt_section global scope */
223struct virtual_disk {
88c164f4 224 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
a322f70c
DW
225 __u32 crc;
226 __u16 populated_vdes;
227 __u16 max_vdes;
228 __u8 pad[52];
229 struct virtual_entry {
230 char guid[DDF_GUID_LEN];
231 __u16 unit;
232 __u16 pad0; /* 0xffff */
233 __u16 guid_crc;
234 __u16 type;
235 __u8 state;
236 __u8 init_state;
237 __u8 pad1[14];
238 char name[16];
239 } entries[0];
240};
241
242/* virtual_entry.type is a bitmap - bigendian */
243#define DDF_Shared 1
244#define DDF_Enforce_Groups 2
245#define DDF_Unicode 4
246#define DDF_Owner_Valid 8
247
248/* virtual_entry.state is a bigendian bitmap */
249#define DDF_state_mask 0x7
250#define DDF_state_optimal 0x0
251#define DDF_state_degraded 0x1
252#define DDF_state_deleted 0x2
253#define DDF_state_missing 0x3
254#define DDF_state_failed 0x4
7a7cc504 255#define DDF_state_part_optimal 0x5
a322f70c
DW
256
257#define DDF_state_morphing 0x8
258#define DDF_state_inconsistent 0x10
259
260/* virtual_entry.init_state is a bigendian bitmap */
261#define DDF_initstate_mask 0x03
262#define DDF_init_not 0x00
7a7cc504
NB
263#define DDF_init_quick 0x01 /* initialisation is progress.
264 * i.e. 'state_inconsistent' */
a322f70c
DW
265#define DDF_init_full 0x02
266
267#define DDF_access_mask 0xc0
268#define DDF_access_rw 0x00
269#define DDF_access_ro 0x80
270#define DDF_access_blocked 0xc0
271
272/* The content of the config_section - local scope
273 * It has multiple records each config_record_len sectors
274 * They can be vd_config or spare_assign
275 */
276
277struct vd_config {
88c164f4 278 __u32 magic; /* DDF_VD_CONF_MAGIC */
a322f70c
DW
279 __u32 crc;
280 char guid[DDF_GUID_LEN];
281 __u32 timestamp;
282 __u32 seqnum;
283 __u8 pad0[24];
284 __u16 prim_elmnt_count;
285 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
286 __u8 prl;
287 __u8 rlq;
288 __u8 sec_elmnt_count;
289 __u8 sec_elmnt_seq;
290 __u8 srl;
598f0d58
NB
291 __u64 blocks; /* blocks per component could be different
292 * on different component devices...(only
293 * for concat I hope) */
294 __u64 array_blocks; /* blocks in array */
a322f70c
DW
295 __u8 pad1[8];
296 __u32 spare_refs[8];
297 __u8 cache_pol[8];
298 __u8 bg_rate;
299 __u8 pad2[3];
300 __u8 pad3[52];
301 __u8 pad4[192];
302 __u8 v0[32]; /* reserved- 0xff */
303 __u8 v1[32]; /* reserved- 0xff */
304 __u8 v2[16]; /* reserved- 0xff */
305 __u8 v3[16]; /* reserved- 0xff */
306 __u8 vendor[32];
307 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
308 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
309 bvd are always the same size */
310};
311
312/* vd_config.cache_pol[7] is a bitmap */
313#define DDF_cache_writeback 1 /* else writethrough */
314#define DDF_cache_wadaptive 2 /* only applies if writeback */
315#define DDF_cache_readahead 4
316#define DDF_cache_radaptive 8 /* only if doing read-ahead */
317#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
318#define DDF_cache_wallowed 32 /* enable write caching */
319#define DDF_cache_rallowed 64 /* enable read caching */
320
321struct spare_assign {
88c164f4 322 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
a322f70c
DW
323 __u32 crc;
324 __u32 timestamp;
325 __u8 reserved[7];
326 __u8 type;
327 __u16 populated; /* SAEs used */
328 __u16 max; /* max SAEs */
329 __u8 pad[8];
330 struct spare_assign_entry {
331 char guid[DDF_GUID_LEN];
332 __u16 secondary_element;
333 __u8 pad[6];
334 } spare_ents[0];
335};
336/* spare_assign.type is a bitmap */
337#define DDF_spare_dedicated 0x1 /* else global */
338#define DDF_spare_revertible 0x2 /* else committable */
339#define DDF_spare_active 0x4 /* else not active */
340#define DDF_spare_affinity 0x8 /* enclosure affinity */
341
342/* The data_section contents - local scope */
343struct disk_data {
88c164f4 344 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
a322f70c
DW
345 __u32 crc;
346 char guid[DDF_GUID_LEN];
347 __u32 refnum; /* crc of some magic drive data ... */
348 __u8 forced_ref; /* set when above was not result of magic */
349 __u8 forced_guid; /* set if guid was forced rather than magic */
350 __u8 vendor[32];
351 __u8 pad[442];
352};
353
354/* bbm_section content */
355struct bad_block_log {
356 __u32 magic;
357 __u32 crc;
358 __u16 entry_count;
359 __u32 spare_count;
360 __u8 pad[10];
361 __u64 first_spare;
362 struct mapped_block {
363 __u64 defective_start;
364 __u32 replacement_start;
365 __u16 remap_count;
366 __u8 pad[2];
367 } entries[0];
368};
369
370/* Struct for internally holding ddf structures */
371/* The DDF structure stored on each device is potentially
372 * quite different, as some data is global and some is local.
373 * The global data is:
374 * - ddf header
375 * - controller_data
376 * - Physical disk records
377 * - Virtual disk records
378 * The local data is:
379 * - Configuration records
380 * - Physical Disk data section
381 * ( and Bad block and vendor which I don't care about yet).
382 *
383 * The local data is parsed into separate lists as it is read
384 * and reconstructed for writing. This means that we only need
385 * to make config changes once and they are automatically
386 * propagated to all devices.
387 * Note that the ddf_super has space of the conf and disk data
388 * for this disk and also for a list of all such data.
389 * The list is only used for the superblock that is being
390 * built in Create or Assemble to describe the whole array.
391 */
392struct ddf_super {
6416d527 393 struct ddf_header anchor, primary, secondary;
a322f70c 394 struct ddf_controller_data controller;
6416d527 395 struct ddf_header *active;
a322f70c
DW
396 struct phys_disk *phys;
397 struct virtual_disk *virt;
398 int pdsize, vdsize;
f21e18ca 399 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 400 int currentdev;
18a2f463 401 int updates_pending;
a322f70c 402 struct vcl {
6416d527
NB
403 union {
404 char space[512];
405 struct {
406 struct vcl *next;
407 __u64 *lba_offset; /* location in 'conf' of
408 * the lba table */
f21e18ca 409 unsigned int vcnum; /* index into ->virt */
8ec5d685 410 struct vd_config **other_bvds;
6416d527
NB
411 __u64 *block_sizes; /* NULL if all the same */
412 };
413 };
a322f70c 414 struct vd_config conf;
d2ca6449 415 } *conflist, *currentconf;
a322f70c 416 struct dl {
6416d527
NB
417 union {
418 char space[512];
419 struct {
420 struct dl *next;
421 int major, minor;
422 char *devname;
423 int fd;
424 unsigned long long size; /* sectors */
097bcf00 425 unsigned long long primary_lba; /* sectors */
426 unsigned long long secondary_lba; /* sectors */
427 unsigned long long workspace_lba; /* sectors */
6416d527
NB
428 int pdnum; /* index in ->phys */
429 struct spare_assign *spare;
8592f29d
N
430 void *mdupdate; /* hold metadata update */
431
432 /* These fields used by auto-layout */
433 int raiddisk; /* slot to fill in autolayout */
434 __u64 esize;
6416d527
NB
435 };
436 };
a322f70c 437 struct disk_data disk;
b2280677 438 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 439 } *dlist, *add_list;
a322f70c
DW
440};
441
442#ifndef offsetof
443#define offsetof(t,f) ((size_t)&(((t*)0)->f))
444#endif
445
f21e18ca 446static unsigned int calc_crc(void *buf, int len)
a322f70c
DW
447{
448 /* crcs are always at the same place as in the ddf_header */
449 struct ddf_header *ddf = buf;
450 __u32 oldcrc = ddf->crc;
451 __u32 newcrc;
452 ddf->crc = 0xffffffff;
453
454 newcrc = crc32(0, buf, len);
455 ddf->crc = oldcrc;
4abe6b70
N
456 /* The crc is store (like everything) bigendian, so convert
457 * here for simplicity
458 */
459 return __cpu_to_be32(newcrc);
a322f70c
DW
460}
461
462static int load_ddf_header(int fd, unsigned long long lba,
463 unsigned long long size,
464 int type,
465 struct ddf_header *hdr, struct ddf_header *anchor)
466{
467 /* read a ddf header (primary or secondary) from fd/lba
468 * and check that it is consistent with anchor
469 * Need to check:
470 * magic, crc, guid, rev, and LBA's header_type, and
471 * everything after header_type must be the same
472 */
473 if (lba >= size-1)
474 return 0;
475
476 if (lseek64(fd, lba<<9, 0) < 0)
477 return 0;
478
479 if (read(fd, hdr, 512) != 512)
480 return 0;
481
482 if (hdr->magic != DDF_HEADER_MAGIC)
483 return 0;
484 if (calc_crc(hdr, 512) != hdr->crc)
485 return 0;
486 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
487 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
488 anchor->primary_lba != hdr->primary_lba ||
489 anchor->secondary_lba != hdr->secondary_lba ||
490 hdr->type != type ||
491 memcmp(anchor->pad2, hdr->pad2, 512 -
492 offsetof(struct ddf_header, pad2)) != 0)
493 return 0;
494
495 /* Looks good enough to me... */
496 return 1;
497}
498
499static void *load_section(int fd, struct ddf_super *super, void *buf,
500 __u32 offset_be, __u32 len_be, int check)
501{
502 unsigned long long offset = __be32_to_cpu(offset_be);
503 unsigned long long len = __be32_to_cpu(len_be);
504 int dofree = (buf == NULL);
505
506 if (check)
507 if (len != 2 && len != 8 && len != 32
508 && len != 128 && len != 512)
509 return NULL;
510
511 if (len > 1024)
512 return NULL;
513 if (buf) {
514 /* All pre-allocated sections are a single block */
515 if (len != 1)
516 return NULL;
3d2c4fc7
DW
517 } else if (posix_memalign(&buf, 512, len<<9) != 0)
518 buf = NULL;
6416d527 519
a322f70c
DW
520 if (!buf)
521 return NULL;
522
523 if (super->active->type == 1)
524 offset += __be64_to_cpu(super->active->primary_lba);
525 else
526 offset += __be64_to_cpu(super->active->secondary_lba);
527
f21e18ca 528 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
529 if (dofree)
530 free(buf);
531 return NULL;
532 }
f21e18ca 533 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
534 if (dofree)
535 free(buf);
536 return NULL;
537 }
538 return buf;
539}
540
541static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
542{
543 unsigned long long dsize;
544
545 get_dev_size(fd, NULL, &dsize);
546
547 if (lseek64(fd, dsize-512, 0) < 0) {
548 if (devname)
e7b84f9d
N
549 pr_err("Cannot seek to anchor block on %s: %s\n",
550 devname, strerror(errno));
a322f70c
DW
551 return 1;
552 }
553 if (read(fd, &super->anchor, 512) != 512) {
554 if (devname)
e7b84f9d
N
555 pr_err("Cannot read anchor block on %s: %s\n",
556 devname, strerror(errno));
a322f70c
DW
557 return 1;
558 }
559 if (super->anchor.magic != DDF_HEADER_MAGIC) {
560 if (devname)
e7b84f9d 561 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
562 devname);
563 return 2;
564 }
565 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
566 if (devname)
e7b84f9d 567 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
568 devname);
569 return 2;
570 }
59e36268
NB
571 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
572 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 573 if (devname)
e7b84f9d 574 pr_err("can only support super revision"
59e36268
NB
575 " %.8s and earlier, not %.8s on %s\n",
576 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
577 return 2;
578 }
579 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
580 dsize >> 9, 1,
581 &super->primary, &super->anchor) == 0) {
582 if (devname)
e7b84f9d
N
583 pr_err("Failed to load primary DDF header "
584 "on %s\n", devname);
a322f70c
DW
585 return 2;
586 }
587 super->active = &super->primary;
588 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
589 dsize >> 9, 2,
590 &super->secondary, &super->anchor)) {
591 if ((__be32_to_cpu(super->primary.seq)
592 < __be32_to_cpu(super->secondary.seq) &&
593 !super->secondary.openflag)
594 || (__be32_to_cpu(super->primary.seq)
595 == __be32_to_cpu(super->secondary.seq) &&
596 super->primary.openflag && !super->secondary.openflag)
597 )
598 super->active = &super->secondary;
599 }
600 return 0;
601}
602
603static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
604{
605 void *ok;
606 ok = load_section(fd, super, &super->controller,
607 super->active->controller_section_offset,
608 super->active->controller_section_length,
609 0);
610 super->phys = load_section(fd, super, NULL,
611 super->active->phys_section_offset,
612 super->active->phys_section_length,
613 1);
614 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
615
616 super->virt = load_section(fd, super, NULL,
617 super->active->virt_section_offset,
618 super->active->virt_section_length,
619 1);
620 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
621 if (!ok ||
622 !super->phys ||
623 !super->virt) {
624 free(super->phys);
625 free(super->virt);
a2349791
NB
626 super->phys = NULL;
627 super->virt = NULL;
a322f70c
DW
628 return 2;
629 }
630 super->conflist = NULL;
631 super->dlist = NULL;
8c3b8c2c
NB
632
633 super->max_part = __be16_to_cpu(super->active->max_partitions);
634 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
635 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
636 return 0;
637}
638
3dc821b0 639static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
640 unsigned int len)
641{
642 int i;
643 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
644 if (vcl->other_bvds[i] != NULL &&
645 vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
646 break;
647
648 if (i < vcl->conf.sec_elmnt_count-1) {
649 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
650 return;
651 } else {
652 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
653 if (vcl->other_bvds[i] == NULL)
654 break;
655 if (i == vcl->conf.sec_elmnt_count-1) {
656 pr_err("no space for sec level config %u, count is %u\n",
657 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
658 return;
659 }
660 if (posix_memalign((void **)&vcl->other_bvds[i], 512, len)
661 != 0) {
662 pr_err("%s could not allocate vd buf\n", __func__);
663 return;
664 }
665 }
666 memcpy(vcl->other_bvds[i], vd, len);
667}
668
a322f70c
DW
669static int load_ddf_local(int fd, struct ddf_super *super,
670 char *devname, int keep)
671{
672 struct dl *dl;
673 struct stat stb;
674 char *conf;
f21e18ca
N
675 unsigned int i;
676 unsigned int confsec;
b2280677 677 int vnum;
f21e18ca 678 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 679 unsigned long long dsize;
a322f70c
DW
680
681 /* First the local disk info */
3d2c4fc7 682 if (posix_memalign((void**)&dl, 512,
6416d527 683 sizeof(*dl) +
3d2c4fc7 684 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 685 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
686 __func__);
687 return 1;
688 }
a322f70c
DW
689
690 load_section(fd, super, &dl->disk,
691 super->active->data_section_offset,
692 super->active->data_section_length,
693 0);
503975b9 694 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 695
a322f70c
DW
696 fstat(fd, &stb);
697 dl->major = major(stb.st_rdev);
698 dl->minor = minor(stb.st_rdev);
699 dl->next = super->dlist;
700 dl->fd = keep ? fd : -1;
d2ca6449
NB
701
702 dl->size = 0;
703 if (get_dev_size(fd, devname, &dsize))
704 dl->size = dsize >> 9;
097bcf00 705 /* If the disks have different sizes, the LBAs will differ
706 * between phys disks.
707 * At this point here, the values in super->active must be valid
708 * for this phys disk. */
709 dl->primary_lba = super->active->primary_lba;
710 dl->secondary_lba = super->active->secondary_lba;
711 dl->workspace_lba = super->active->workspace_lba;
b2280677 712 dl->spare = NULL;
f21e18ca 713 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
714 dl->vlist[i] = NULL;
715 super->dlist = dl;
59e36268 716 dl->pdnum = -1;
f21e18ca 717 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
718 if (memcmp(super->phys->entries[i].guid,
719 dl->disk.guid, DDF_GUID_LEN) == 0)
720 dl->pdnum = i;
721
a322f70c
DW
722 /* Now the config list. */
723 /* 'conf' is an array of config entries, some of which are
724 * probably invalid. Those which are good need to be copied into
725 * the conflist
726 */
a322f70c
DW
727
728 conf = load_section(fd, super, NULL,
729 super->active->config_section_offset,
730 super->active->config_section_length,
731 0);
732
b2280677 733 vnum = 0;
e223334f
N
734 for (confsec = 0;
735 confsec < __be32_to_cpu(super->active->config_section_length);
736 confsec += super->conf_rec_len) {
a322f70c 737 struct vd_config *vd =
e223334f 738 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
739 struct vcl *vcl;
740
b2280677
NB
741 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
742 if (dl->spare)
743 continue;
3d2c4fc7
DW
744 if (posix_memalign((void**)&dl->spare, 512,
745 super->conf_rec_len*512) != 0) {
e7b84f9d
N
746 pr_err("%s could not allocate spare info buf\n",
747 __func__);
3d2c4fc7
DW
748 return 1;
749 }
613b0d17 750
b2280677
NB
751 memcpy(dl->spare, vd, super->conf_rec_len*512);
752 continue;
753 }
a322f70c
DW
754 if (vd->magic != DDF_VD_CONF_MAGIC)
755 continue;
756 for (vcl = super->conflist; vcl; vcl = vcl->next) {
757 if (memcmp(vcl->conf.guid,
758 vd->guid, DDF_GUID_LEN) == 0)
759 break;
760 }
761
762 if (vcl) {
b2280677 763 dl->vlist[vnum++] = vcl;
3dc821b0 764 if (vcl->other_bvds != NULL &&
765 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
766 add_other_bvd(vcl, vd, super->conf_rec_len*512);
767 continue;
768 }
a322f70c
DW
769 if (__be32_to_cpu(vd->seqnum) <=
770 __be32_to_cpu(vcl->conf.seqnum))
771 continue;
59e36268 772 } else {
3d2c4fc7 773 if (posix_memalign((void**)&vcl, 512,
6416d527 774 (super->conf_rec_len*512 +
3d2c4fc7 775 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
776 pr_err("%s could not allocate vcl buf\n",
777 __func__);
3d2c4fc7
DW
778 return 1;
779 }
a322f70c 780 vcl->next = super->conflist;
59e36268 781 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 782 if (vd->sec_elmnt_count > 1)
783 vcl->other_bvds =
784 xcalloc(vd->sec_elmnt_count - 1,
785 sizeof(struct vd_config *));
786 else
787 vcl->other_bvds = NULL;
a322f70c 788 super->conflist = vcl;
b2280677 789 dl->vlist[vnum++] = vcl;
a322f70c 790 }
8c3b8c2c 791 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
a322f70c 792 vcl->lba_offset = (__u64*)
8c3b8c2c 793 &vcl->conf.phys_refnum[super->mppe];
59e36268
NB
794
795 for (i=0; i < max_virt_disks ; i++)
796 if (memcmp(super->virt->entries[i].guid,
797 vcl->conf.guid, DDF_GUID_LEN)==0)
798 break;
799 if (i < max_virt_disks)
800 vcl->vcnum = i;
a322f70c
DW
801 }
802 free(conf);
803
804 return 0;
805}
806
807#ifndef MDASSEMBLE
808static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 809 void **sbp, char *devname);
a322f70c 810#endif
37424f13
DW
811
812static void free_super_ddf(struct supertype *st);
813
a322f70c
DW
814static int load_super_ddf(struct supertype *st, int fd,
815 char *devname)
816{
817 unsigned long long dsize;
818 struct ddf_super *super;
819 int rv;
820
a322f70c
DW
821 if (get_dev_size(fd, devname, &dsize) == 0)
822 return 1;
823
691c6ee1
N
824 if (test_partition(fd))
825 /* DDF is not allowed on partitions */
826 return 1;
827
a322f70c
DW
828 /* 32M is a lower bound */
829 if (dsize <= 32*1024*1024) {
97320d7c 830 if (devname)
e7b84f9d
N
831 pr_err("%s is too small for ddf: "
832 "size is %llu sectors.\n",
833 devname, dsize>>9);
97320d7c 834 return 1;
a322f70c
DW
835 }
836 if (dsize & 511) {
97320d7c 837 if (devname)
e7b84f9d
N
838 pr_err("%s is an odd size for ddf: "
839 "size is %llu bytes.\n",
840 devname, dsize);
97320d7c 841 return 1;
a322f70c
DW
842 }
843
37424f13
DW
844 free_super_ddf(st);
845
6416d527 846 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 847 pr_err("malloc of %zu failed.\n",
a322f70c
DW
848 sizeof(*super));
849 return 1;
850 }
a2349791 851 memset(super, 0, sizeof(*super));
a322f70c
DW
852
853 rv = load_ddf_headers(fd, super, devname);
854 if (rv) {
855 free(super);
856 return rv;
857 }
858
859 /* Have valid headers and have chosen the best. Let's read in the rest*/
860
861 rv = load_ddf_global(fd, super, devname);
862
863 if (rv) {
864 if (devname)
e7b84f9d
N
865 pr_err("Failed to load all information "
866 "sections on %s\n", devname);
a322f70c
DW
867 free(super);
868 return rv;
869 }
870
3d2c4fc7
DW
871 rv = load_ddf_local(fd, super, devname, 0);
872
873 if (rv) {
874 if (devname)
e7b84f9d
N
875 pr_err("Failed to load all information "
876 "sections on %s\n", devname);
3d2c4fc7
DW
877 free(super);
878 return rv;
879 }
a322f70c
DW
880
881 /* Should possibly check the sections .... */
882
883 st->sb = super;
884 if (st->ss == NULL) {
885 st->ss = &super_ddf;
886 st->minor_version = 0;
887 st->max_devs = 512;
888 }
889 return 0;
890
891}
892
893static void free_super_ddf(struct supertype *st)
894{
895 struct ddf_super *ddf = st->sb;
896 if (ddf == NULL)
897 return;
898 free(ddf->phys);
899 free(ddf->virt);
900 while (ddf->conflist) {
901 struct vcl *v = ddf->conflist;
902 ddf->conflist = v->next;
59e36268
NB
903 if (v->block_sizes)
904 free(v->block_sizes);
3dc821b0 905 if (v->other_bvds) {
906 int i;
907 for (i = 0; i < v->conf.sec_elmnt_count-1; i++)
908 if (v->other_bvds[i] != NULL)
909 free(v->other_bvds[i]);
8ec5d685 910 free(v->other_bvds);
3dc821b0 911 }
a322f70c
DW
912 free(v);
913 }
914 while (ddf->dlist) {
915 struct dl *d = ddf->dlist;
916 ddf->dlist = d->next;
917 if (d->fd >= 0)
918 close(d->fd);
b2280677
NB
919 if (d->spare)
920 free(d->spare);
a322f70c
DW
921 free(d);
922 }
8a38cb04
N
923 while (ddf->add_list) {
924 struct dl *d = ddf->add_list;
925 ddf->add_list = d->next;
926 if (d->fd >= 0)
927 close(d->fd);
928 if (d->spare)
929 free(d->spare);
930 free(d);
931 }
a322f70c
DW
932 free(ddf);
933 st->sb = NULL;
934}
935
936static struct supertype *match_metadata_desc_ddf(char *arg)
937{
938 /* 'ddf' only support containers */
939 struct supertype *st;
940 if (strcmp(arg, "ddf") != 0 &&
941 strcmp(arg, "default") != 0
942 )
943 return NULL;
944
503975b9 945 st = xcalloc(1, sizeof(*st));
a322f70c
DW
946 st->ss = &super_ddf;
947 st->max_devs = 512;
948 st->minor_version = 0;
949 st->sb = NULL;
950 return st;
951}
952
a322f70c
DW
953#ifndef MDASSEMBLE
954
955static mapping_t ddf_state[] = {
956 { "Optimal", 0},
957 { "Degraded", 1},
958 { "Deleted", 2},
959 { "Missing", 3},
960 { "Failed", 4},
961 { "Partially Optimal", 5},
962 { "-reserved-", 6},
963 { "-reserved-", 7},
964 { NULL, 0}
965};
966
967static mapping_t ddf_init_state[] = {
968 { "Not Initialised", 0},
969 { "QuickInit in Progress", 1},
970 { "Fully Initialised", 2},
971 { "*UNKNOWN*", 3},
972 { NULL, 0}
973};
974static mapping_t ddf_access[] = {
975 { "Read/Write", 0},
976 { "Reserved", 1},
977 { "Read Only", 2},
978 { "Blocked (no access)", 3},
979 { NULL ,0}
980};
981
982static mapping_t ddf_level[] = {
983 { "RAID0", DDF_RAID0},
984 { "RAID1", DDF_RAID1},
985 { "RAID3", DDF_RAID3},
986 { "RAID4", DDF_RAID4},
987 { "RAID5", DDF_RAID5},
988 { "RAID1E",DDF_RAID1E},
989 { "JBOD", DDF_JBOD},
990 { "CONCAT",DDF_CONCAT},
991 { "RAID5E",DDF_RAID5E},
992 { "RAID5EE",DDF_RAID5EE},
993 { "RAID6", DDF_RAID6},
994 { NULL, 0}
995};
996static mapping_t ddf_sec_level[] = {
997 { "Striped", DDF_2STRIPED},
998 { "Mirrored", DDF_2MIRRORED},
999 { "Concat", DDF_2CONCAT},
1000 { "Spanned", DDF_2SPANNED},
1001 { NULL, 0}
1002};
1003#endif
1004
1005struct num_mapping {
1006 int num1, num2;
1007};
1008static struct num_mapping ddf_level_num[] = {
1009 { DDF_RAID0, 0 },
1010 { DDF_RAID1, 1 },
1011 { DDF_RAID3, LEVEL_UNSUPPORTED },
60f18132
NB
1012 { DDF_RAID4, 4 },
1013 { DDF_RAID5, 5 },
a322f70c
DW
1014 { DDF_RAID1E, LEVEL_UNSUPPORTED },
1015 { DDF_JBOD, LEVEL_UNSUPPORTED },
1016 { DDF_CONCAT, LEVEL_LINEAR },
1017 { DDF_RAID5E, LEVEL_UNSUPPORTED },
1018 { DDF_RAID5EE, LEVEL_UNSUPPORTED },
1019 { DDF_RAID6, 6},
1020 { MAXINT, MAXINT }
1021};
1022
1023static int map_num1(struct num_mapping *map, int num)
1024{
1025 int i;
1026 for (i=0 ; map[i].num1 != MAXINT; i++)
1027 if (map[i].num1 == num)
1028 break;
1029 return map[i].num2;
1030}
1031
42dc2744
N
1032static int all_ff(char *guid)
1033{
1034 int i;
1035 for (i = 0; i < DDF_GUID_LEN; i++)
1036 if (guid[i] != (char)0xff)
1037 return 0;
1038 return 1;
1039}
1040
a322f70c
DW
1041#ifndef MDASSEMBLE
1042static void print_guid(char *guid, int tstamp)
1043{
1044 /* A GUIDs are part (or all) ASCII and part binary.
1045 * They tend to be space padded.
59e36268
NB
1046 * We print the GUID in HEX, then in parentheses add
1047 * any initial ASCII sequence, and a possible
1048 * time stamp from bytes 16-19
a322f70c
DW
1049 */
1050 int l = DDF_GUID_LEN;
1051 int i;
59e36268
NB
1052
1053 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1054 if ((i&3)==0 && i != 0) printf(":");
1055 printf("%02X", guid[i]&255);
1056 }
1057
cfccea8c 1058 printf("\n (");
a322f70c
DW
1059 while (l && guid[l-1] == ' ')
1060 l--;
1061 for (i=0 ; i<l ; i++) {
1062 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1063 fputc(guid[i], stdout);
1064 else
59e36268 1065 break;
a322f70c
DW
1066 }
1067 if (tstamp) {
1068 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1069 char tbuf[100];
1070 struct tm *tm;
1071 tm = localtime(&then);
59e36268 1072 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1073 fputs(tbuf, stdout);
1074 }
59e36268 1075 printf(")");
a322f70c
DW
1076}
1077
1078static void examine_vd(int n, struct ddf_super *sb, char *guid)
1079{
8c3b8c2c 1080 int crl = sb->conf_rec_len;
a322f70c
DW
1081 struct vcl *vcl;
1082
1083 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1084 unsigned int i;
a322f70c
DW
1085 struct vd_config *vc = &vcl->conf;
1086
1087 if (calc_crc(vc, crl*512) != vc->crc)
1088 continue;
1089 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1090 continue;
1091
1092 /* Ok, we know about this VD, let's give more details */
b06e3095 1093 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1094 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1095 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1096 int j;
1097 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1098 for (j=0; j<cnt; j++)
1099 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1100 break;
1101 if (i) printf(" ");
1102 if (j < cnt)
1103 printf("%d", j);
1104 else
1105 printf("--");
1106 }
1107 printf(")\n");
1108 if (vc->chunk_shift != 255)
613b0d17
N
1109 printf(" Chunk Size[%d] : %d sectors\n", n,
1110 1 << vc->chunk_shift);
a322f70c
DW
1111 printf(" Raid Level[%d] : %s\n", n,
1112 map_num(ddf_level, vc->prl)?:"-unknown-");
1113 if (vc->sec_elmnt_count != 1) {
1114 printf(" Secondary Position[%d] : %d of %d\n", n,
1115 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1116 printf(" Secondary Level[%d] : %s\n", n,
1117 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1118 }
1119 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1120 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1121 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1122 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1123 }
1124}
1125
1126static void examine_vds(struct ddf_super *sb)
1127{
1128 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
1129 int i;
1130 printf(" Virtual Disks : %d\n", cnt);
1131
1132 for (i=0; i<cnt; i++) {
1133 struct virtual_entry *ve = &sb->virt->entries[i];
b06e3095 1134 printf("\n");
a322f70c
DW
1135 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1136 printf("\n");
1137 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1138 printf(" state[%d] : %s, %s%s\n", i,
1139 map_num(ddf_state, ve->state & 7),
1140 (ve->state & 8) ? "Morphing, ": "",
1141 (ve->state & 16)? "Not Consistent" : "Consistent");
1142 printf(" init state[%d] : %s\n", i,
1143 map_num(ddf_init_state, ve->init_state&3));
1144 printf(" access[%d] : %s\n", i,
1145 map_num(ddf_access, (ve->init_state>>6) & 3));
1146 printf(" Name[%d] : %.16s\n", i, ve->name);
1147 examine_vd(i, sb, ve->guid);
1148 }
1149 if (cnt) printf("\n");
1150}
1151
1152static void examine_pds(struct ddf_super *sb)
1153{
1154 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1155 int i;
1156 struct dl *dl;
1157 printf(" Physical Disks : %d\n", cnt);
962371a5 1158 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1159
1160 for (i=0 ; i<cnt ; i++) {
1161 struct phys_disk_entry *pd = &sb->phys->entries[i];
1162 int type = __be16_to_cpu(pd->type);
1163 int state = __be16_to_cpu(pd->state);
1164
b06e3095
N
1165 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1166 //printf("\n");
1167 printf(" %3d %08x ", i,
a322f70c 1168 __be32_to_cpu(pd->refnum));
613b0d17 1169 printf("%8lluK ",
c9b6907b 1170 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095
N
1171 for (dl = sb->dlist; dl ; dl = dl->next) {
1172 if (dl->disk.refnum == pd->refnum) {
1173 char *dv = map_dev(dl->major, dl->minor, 0);
1174 if (dv) {
962371a5 1175 printf("%-15s", dv);
b06e3095
N
1176 break;
1177 }
1178 }
1179 }
1180 if (!dl)
962371a5 1181 printf("%15s","");
b06e3095 1182 printf(" %s%s%s%s%s",
a322f70c 1183 (type&2) ? "active":"",
b06e3095 1184 (type&4) ? "Global-Spare":"",
a322f70c
DW
1185 (type&8) ? "spare" : "",
1186 (type&16)? ", foreign" : "",
1187 (type&32)? "pass-through" : "");
18cb4496
N
1188 if (state & DDF_Failed)
1189 /* This over-rides these three */
1190 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1191 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1192 (state&1)? "Online": "Offline",
1193 (state&2)? ", Failed": "",
1194 (state&4)? ", Rebuilding": "",
1195 (state&8)? ", in-transition": "",
b06e3095
N
1196 (state&16)? ", SMART-errors": "",
1197 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1198 (state&64)? ", Missing" : "");
a322f70c
DW
1199 printf("\n");
1200 }
1201}
1202
1203static void examine_super_ddf(struct supertype *st, char *homehost)
1204{
1205 struct ddf_super *sb = st->sb;
1206
1207 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1208 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1209 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1210 printf("\n");
1211 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c
DW
1212 printf("\n");
1213 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1214 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1215 ?"yes" : "no");
1216 examine_vds(sb);
1217 examine_pds(sb);
1218}
1219
a5d85af7 1220static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1221
42dc2744 1222static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1223
061f2c6a 1224static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1225{
1226 /* We just write a generic DDF ARRAY entry
1227 */
1228 struct mdinfo info;
1229 char nbuf[64];
a5d85af7 1230 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1231 fname_from_uuid(st, &info, nbuf, ':');
1232
1233 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1234}
1235
1236static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1237{
1238 /* We just write a generic DDF ARRAY entry
a322f70c 1239 */
42dc2744 1240 struct ddf_super *ddf = st->sb;
ff54de6e 1241 struct mdinfo info;
f21e18ca 1242 unsigned int i;
ff54de6e 1243 char nbuf[64];
a5d85af7 1244 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1245 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1246
f21e18ca 1247 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1248 struct virtual_entry *ve = &ddf->virt->entries[i];
1249 struct vcl vcl;
1250 char nbuf1[64];
1251 if (all_ff(ve->guid))
1252 continue;
1253 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1254 ddf->currentconf =&vcl;
1255 uuid_from_super_ddf(st, info.uuid);
1256 fname_from_uuid(st, &info, nbuf1, ':');
1257 printf("ARRAY container=%s member=%d UUID=%s\n",
1258 nbuf+5, i, nbuf1+5);
1259 }
a322f70c
DW
1260}
1261
bceedeec
N
1262static void export_examine_super_ddf(struct supertype *st)
1263{
1264 struct mdinfo info;
1265 char nbuf[64];
a5d85af7 1266 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1267 fname_from_uuid(st, &info, nbuf, ':');
1268 printf("MD_METADATA=ddf\n");
1269 printf("MD_LEVEL=container\n");
1270 printf("MD_UUID=%s\n", nbuf+5);
1271}
bceedeec 1272
a322f70c
DW
1273static void detail_super_ddf(struct supertype *st, char *homehost)
1274{
1275 /* FIXME later
1276 * Could print DDF GUID
1277 * Need to find which array
1278 * If whole, briefly list all arrays
1279 * If one, give name
1280 */
1281}
1282
1283static void brief_detail_super_ddf(struct supertype *st)
1284{
1285 /* FIXME I really need to know which array we are detailing.
1286 * Can that be stored in ddf_super??
1287 */
1288// struct ddf_super *ddf = st->sb;
ff54de6e
N
1289 struct mdinfo info;
1290 char nbuf[64];
a5d85af7 1291 getinfo_super_ddf(st, &info, NULL);
ff54de6e
N
1292 fname_from_uuid(st, &info, nbuf,':');
1293 printf(" UUID=%s", nbuf + 5);
a322f70c 1294}
a322f70c
DW
1295#endif
1296
1297static int match_home_ddf(struct supertype *st, char *homehost)
1298{
1299 /* It matches 'this' host if the controller is a
1300 * Linux-MD controller with vendor_data matching
1301 * the hostname
1302 */
1303 struct ddf_super *ddf = st->sb;
f21e18ca 1304 unsigned int len;
d1d3482b
N
1305
1306 if (!homehost)
1307 return 0;
1308 len = strlen(homehost);
a322f70c
DW
1309
1310 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1311 len < sizeof(ddf->controller.vendor_data) &&
1312 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1313 ddf->controller.vendor_data[len] == 0);
1314}
1315
0e600426 1316#ifndef MDASSEMBLE
f21e18ca 1317static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst)
a322f70c 1318{
7a7cc504 1319 struct vcl *v;
59e36268 1320
7a7cc504 1321 for (v = ddf->conflist; v; v = v->next)
59e36268 1322 if (inst == v->vcnum)
7a7cc504
NB
1323 return &v->conf;
1324 return NULL;
1325}
0e600426 1326#endif
7a7cc504
NB
1327
1328static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
1329{
1330 /* Find the entry in phys_disk which has the given refnum
1331 * and return it's index
1332 */
f21e18ca
N
1333 unsigned int i;
1334 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
7a7cc504
NB
1335 if (ddf->phys->entries[i].refnum == phys_refnum)
1336 return i;
1337 return -1;
a322f70c
DW
1338}
1339
1340static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1341{
1342 /* The uuid returned here is used for:
1343 * uuid to put into bitmap file (Create, Grow)
1344 * uuid for backup header when saving critical section (Grow)
1345 * comparing uuids when re-adding a device into an array
51006d85
N
1346 * In these cases the uuid required is that of the data-array,
1347 * not the device-set.
1348 * uuid to recognise same set when adding a missing device back
1349 * to an array. This is a uuid for the device-set.
613b0d17 1350 *
a322f70c
DW
1351 * For each of these we can make do with a truncated
1352 * or hashed uuid rather than the original, as long as
1353 * everyone agrees.
a322f70c
DW
1354 * In the case of SVD we assume the BVD is of interest,
1355 * though that might be the case if a bitmap were made for
1356 * a mirrored SVD - worry about that later.
1357 * So we need to find the VD configuration record for the
1358 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1359 * The first 16 bytes of the sha1 of these is used.
1360 */
1361 struct ddf_super *ddf = st->sb;
d2ca6449 1362 struct vcl *vcl = ddf->currentconf;
c5afc314
N
1363 char *guid;
1364 char buf[20];
1365 struct sha1_ctx ctx;
a322f70c 1366
c5afc314
N
1367 if (vcl)
1368 guid = vcl->conf.guid;
1369 else
1370 guid = ddf->anchor.guid;
1371
1372 sha1_init_ctx(&ctx);
1373 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
c5afc314
N
1374 sha1_finish_ctx(&ctx, buf);
1375 memcpy(uuid, buf, 4*4);
a322f70c
DW
1376}
1377
a5d85af7 1378static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1379
a5d85af7 1380static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1381{
1382 struct ddf_super *ddf = st->sb;
a5d85af7 1383 int map_disks = info->array.raid_disks;
90fa1a29 1384 __u32 *cptr;
a322f70c 1385
78e44928 1386 if (ddf->currentconf) {
a5d85af7 1387 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1388 return;
1389 }
95eeceeb 1390 memset(info, 0, sizeof(*info));
78e44928 1391
a322f70c
DW
1392 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1393 info->array.level = LEVEL_CONTAINER;
1394 info->array.layout = 0;
1395 info->array.md_minor = -1;
90fa1a29
JS
1396 cptr = (__u32 *)(ddf->anchor.guid + 16);
1397 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1398
a322f70c
DW
1399 info->array.utime = 0;
1400 info->array.chunk_size = 0;
510242aa 1401 info->container_enough = 1;
a322f70c 1402
a322f70c
DW
1403 info->disk.major = 0;
1404 info->disk.minor = 0;
cba0191b
NB
1405 if (ddf->dlist) {
1406 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1407 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1408
1409 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1410 entries[info->disk.raid_disk].
1411 config_size);
d2ca6449 1412 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1413 } else {
1414 info->disk.number = -1;
661dce36 1415 info->disk.raid_disk = -1;
cba0191b
NB
1416// info->disk.raid_disk = find refnum in the table and use index;
1417 }
f22385f9 1418 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1419
921d9e16 1420 info->recovery_start = MaxSector;
a19c88b8 1421 info->reshape_active = 0;
6e75048b 1422 info->recovery_blocked = 0;
c5afc314 1423 info->name[0] = 0;
a322f70c 1424
f35f2525
N
1425 info->array.major_version = -1;
1426 info->array.minor_version = -2;
159c3a1a 1427 strcpy(info->text_version, "ddf");
a67dd8cc 1428 info->safe_mode_delay = 0;
159c3a1a 1429
c5afc314 1430 uuid_from_super_ddf(st, info->uuid);
a322f70c 1431
a5d85af7
N
1432 if (map) {
1433 int i;
1434 for (i = 0 ; i < map_disks; i++) {
1435 if (i < info->array.raid_disks &&
1436 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1437 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1438 map[i] = 1;
1439 else
1440 map[i] = 0;
1441 }
1442 }
a322f70c
DW
1443}
1444
598f0d58
NB
1445static int rlq_to_layout(int rlq, int prl, int raiddisks);
1446
a5d85af7 1447static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1448{
1449 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1450 struct vcl *vc = ddf->currentconf;
1451 int cd = ddf->currentdev;
db42fa9b 1452 int j;
8592f29d 1453 struct dl *dl;
a5d85af7 1454 int map_disks = info->array.raid_disks;
90fa1a29 1455 __u32 *cptr;
a322f70c 1456
95eeceeb 1457 memset(info, 0, sizeof(*info));
a322f70c
DW
1458 /* FIXME this returns BVD info - what if we want SVD ?? */
1459
d2ca6449
NB
1460 info->array.raid_disks = __be16_to_cpu(vc->conf.prim_elmnt_count);
1461 info->array.level = map_num1(ddf_level_num, vc->conf.prl);
1462 info->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
598f0d58 1463 info->array.raid_disks);
a322f70c 1464 info->array.md_minor = -1;
90fa1a29
JS
1465 cptr = (__u32 *)(vc->conf.guid + 16);
1466 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
d2ca6449
NB
1467 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1468 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1469 info->custom_array_size = 0;
d2ca6449 1470
f21e18ca 1471 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
d2ca6449
NB
1472 info->data_offset = __be64_to_cpu(vc->lba_offset[cd]);
1473 if (vc->block_sizes)
1474 info->component_size = vc->block_sizes[cd];
1475 else
1476 info->component_size = __be64_to_cpu(vc->conf.blocks);
1477 }
a322f70c 1478
fb204fb2
N
1479 for (dl = ddf->dlist; dl ; dl = dl->next)
1480 if (dl->raiddisk == ddf->currentdev)
1481 break;
1482
a322f70c
DW
1483 info->disk.major = 0;
1484 info->disk.minor = 0;
fb204fb2 1485 info->disk.state = 0;
8592f29d
N
1486 if (dl) {
1487 info->disk.major = dl->major;
1488 info->disk.minor = dl->minor;
fb204fb2
N
1489 info->disk.raid_disk = dl->raiddisk;
1490 info->disk.number = dl->pdnum;
1491 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1492 }
a322f70c 1493
103f2410
NB
1494 info->container_member = ddf->currentconf->vcnum;
1495
921d9e16 1496 info->recovery_start = MaxSector;
80d26cb2 1497 info->resync_start = 0;
624c5ad4 1498 info->reshape_active = 0;
6e75048b 1499 info->recovery_blocked = 0;
80d26cb2
NB
1500 if (!(ddf->virt->entries[info->container_member].state
1501 & DDF_state_inconsistent) &&
1502 (ddf->virt->entries[info->container_member].init_state
1503 & DDF_initstate_mask)
1504 == DDF_init_full)
b7528a20 1505 info->resync_start = MaxSector;
80d26cb2 1506
a322f70c
DW
1507 uuid_from_super_ddf(st, info->uuid);
1508
f35f2525
N
1509 info->array.major_version = -1;
1510 info->array.minor_version = -2;
9b63e648 1511 sprintf(info->text_version, "/%s/%d",
4dd2df09 1512 st->container_devnm,
9b63e648 1513 info->container_member);
a67dd8cc 1514 info->safe_mode_delay = 200;
159c3a1a 1515
db42fa9b
N
1516 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1517 info->name[16]=0;
1518 for(j=0; j<16; j++)
1519 if (info->name[j] == ' ')
1520 info->name[j] = 0;
a5d85af7
N
1521
1522 if (map)
1523 for (j = 0; j < map_disks; j++) {
1524 map[j] = 0;
1525 if (j < info->array.raid_disks) {
1526 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1527 if (i >= 0 &&
a5d85af7
N
1528 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1529 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1530 map[i] = 1;
1531 }
1532 }
a322f70c
DW
1533}
1534
1535static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1536 char *update,
1537 char *devname, int verbose,
1538 int uuid_set, char *homehost)
1539{
1540 /* For 'assemble' and 'force' we need to return non-zero if any
1541 * change was made. For others, the return value is ignored.
1542 * Update options are:
1543 * force-one : This device looks a bit old but needs to be included,
1544 * update age info appropriately.
1545 * assemble: clear any 'faulty' flag to allow this device to
1546 * be assembled.
1547 * force-array: Array is degraded but being forced, mark it clean
1548 * if that will be needed to assemble it.
1549 *
1550 * newdev: not used ????
1551 * grow: Array has gained a new device - this is currently for
1552 * linear only
1553 * resync: mark as dirty so a resync will happen.
59e36268 1554 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
1555 * homehost: update the recorded homehost
1556 * name: update the name - preserving the homehost
1557 * _reshape_progress: record new reshape_progress position.
1558 *
1559 * Following are not relevant for this version:
1560 * sparc2.2 : update from old dodgey metadata
1561 * super-minor: change the preferred_minor number
1562 * summaries: update redundant counters.
1563 */
1564 int rv = 0;
1565// struct ddf_super *ddf = st->sb;
7a7cc504 1566// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
1567// struct virtual_entry *ve = find_ve(ddf);
1568
a322f70c
DW
1569 /* we don't need to handle "force-*" or "assemble" as
1570 * there is no need to 'trick' the kernel. We the metadata is
1571 * first updated to activate the array, all the implied modifications
1572 * will just happen.
1573 */
1574
1575 if (strcmp(update, "grow") == 0) {
1576 /* FIXME */
1e2b2765 1577 } else if (strcmp(update, "resync") == 0) {
a322f70c 1578// info->resync_checkpoint = 0;
1e2b2765 1579 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
1580 /* homehost is stored in controller->vendor_data,
1581 * or it is when we are the vendor
1582 */
1583// if (info->vendor_is_local)
1584// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 1585 rv = -1;
f49208ec 1586 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
1587 /* name is stored in virtual_entry->name */
1588// memset(ve->name, ' ', 16);
1589// strncpy(ve->name, info->name, 16);
1e2b2765 1590 rv = -1;
f49208ec 1591 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 1592 /* We don't support reshape yet */
f49208ec
N
1593 } else if (strcmp(update, "assemble") == 0 ) {
1594 /* Do nothing, just succeed */
1595 rv = 0;
1e2b2765
N
1596 } else
1597 rv = -1;
a322f70c
DW
1598
1599// update_all_csum(ddf);
1600
1601 return rv;
1602}
1603
5f8097be
NB
1604static void make_header_guid(char *guid)
1605{
1606 __u32 stamp;
5f8097be
NB
1607 /* Create a DDF Header of Virtual Disk GUID */
1608
1609 /* 24 bytes of fiction required.
1610 * first 8 are a 'vendor-id' - "Linux-MD"
1611 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
1612 * Remaining 8 random number plus timestamp
1613 */
1614 memcpy(guid, T10, sizeof(T10));
1615 stamp = __cpu_to_be32(0xdeadbeef);
1616 memcpy(guid+8, &stamp, 4);
1617 stamp = __cpu_to_be32(0);
1618 memcpy(guid+12, &stamp, 4);
1619 stamp = __cpu_to_be32(time(0) - DECADE);
1620 memcpy(guid+16, &stamp, 4);
bfb7ea78 1621 stamp = random32();
5f8097be 1622 memcpy(guid+20, &stamp, 4);
5f8097be 1623}
59e36268 1624
78e44928
NB
1625static int init_super_ddf_bvd(struct supertype *st,
1626 mdu_array_info_t *info,
1627 unsigned long long size,
1628 char *name, char *homehost,
83cd1e97 1629 int *uuid, unsigned long long data_offset);
78e44928 1630
a322f70c
DW
1631static int init_super_ddf(struct supertype *st,
1632 mdu_array_info_t *info,
1633 unsigned long long size, char *name, char *homehost,
83cd1e97 1634 int *uuid, unsigned long long data_offset)
a322f70c
DW
1635{
1636 /* This is primarily called by Create when creating a new array.
1637 * We will then get add_to_super called for each component, and then
1638 * write_init_super called to write it out to each device.
1639 * For DDF, Create can create on fresh devices or on a pre-existing
1640 * array.
1641 * To create on a pre-existing array a different method will be called.
1642 * This one is just for fresh drives.
1643 *
1644 * We need to create the entire 'ddf' structure which includes:
1645 * DDF headers - these are easy.
1646 * Controller data - a Sector describing this controller .. not that
1647 * this is a controller exactly.
1648 * Physical Disk Record - one entry per device, so
1649 * leave plenty of space.
1650 * Virtual Disk Records - again, just leave plenty of space.
1651 * This just lists VDs, doesn't give details
1652 * Config records - describes the VDs that use this disk
1653 * DiskData - describes 'this' device.
1654 * BadBlockManagement - empty
1655 * Diag Space - empty
1656 * Vendor Logs - Could we put bitmaps here?
1657 *
1658 */
1659 struct ddf_super *ddf;
1660 char hostname[17];
1661 int hostlen;
a322f70c
DW
1662 int max_phys_disks, max_virt_disks;
1663 unsigned long long sector;
1664 int clen;
1665 int i;
1666 int pdsize, vdsize;
1667 struct phys_disk *pd;
1668 struct virtual_disk *vd;
1669
83cd1e97
N
1670 if (data_offset != INVALID_SECTORS) {
1671 fprintf(stderr, Name ": data-offset not supported by DDF\n");
1672 return 0;
1673 }
1674
78e44928 1675 if (st->sb)
83cd1e97
N
1676 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
1677 data_offset);
ba7eb04f 1678
3d2c4fc7 1679 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 1680 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
1681 return 0;
1682 }
6264b437 1683 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
1684 ddf->dlist = NULL; /* no physical disks yet */
1685 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
1686 st->sb = ddf;
1687
1688 if (info == NULL) {
1689 /* zeroing superblock */
1690 return 0;
1691 }
a322f70c
DW
1692
1693 /* At least 32MB *must* be reserved for the ddf. So let's just
1694 * start 32MB from the end, and put the primary header there.
1695 * Don't do secondary for now.
1696 * We don't know exactly where that will be yet as it could be
1697 * different on each device. To just set up the lengths.
1698 *
1699 */
1700
1701 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 1702 make_header_guid(ddf->anchor.guid);
a322f70c 1703
59e36268 1704 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
a322f70c
DW
1705 ddf->anchor.seq = __cpu_to_be32(1);
1706 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
1707 ddf->anchor.openflag = 0xFF;
1708 ddf->anchor.foreignflag = 0;
1709 ddf->anchor.enforcegroups = 0; /* Is this best?? */
1710 ddf->anchor.pad0 = 0xff;
1711 memset(ddf->anchor.pad1, 0xff, 12);
1712 memset(ddf->anchor.header_ext, 0xff, 32);
1713 ddf->anchor.primary_lba = ~(__u64)0;
1714 ddf->anchor.secondary_lba = ~(__u64)0;
1715 ddf->anchor.type = DDF_HEADER_ANCHOR;
1716 memset(ddf->anchor.pad2, 0xff, 3);
1717 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
1718 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
1719 of 32M reserved.. */
1720 max_phys_disks = 1023; /* Should be enough */
1721 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
1722 max_virt_disks = 255;
1723 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
1724 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
1725 ddf->max_part = 64;
8c3b8c2c 1726 ddf->mppe = 256;
59e36268
NB
1727 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
1728 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
1729 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 1730 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
1731 /* controller sections is one sector long immediately
1732 * after the ddf header */
1733 sector = 1;
1734 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
1735 ddf->anchor.controller_section_length = __cpu_to_be32(1);
1736 sector += 1;
1737
1738 /* phys is 8 sectors after that */
1739 pdsize = ROUND_UP(sizeof(struct phys_disk) +
1740 sizeof(struct phys_disk_entry)*max_phys_disks,
1741 512);
1742 switch(pdsize/512) {
1743 case 2: case 8: case 32: case 128: case 512: break;
1744 default: abort();
1745 }
1746 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
1747 ddf->anchor.phys_section_length =
1748 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
1749 sector += pdsize/512;
1750
1751 /* virt is another 32 sectors */
1752 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
1753 sizeof(struct virtual_entry) * max_virt_disks,
1754 512);
1755 switch(vdsize/512) {
1756 case 2: case 8: case 32: case 128: case 512: break;
1757 default: abort();
1758 }
1759 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
1760 ddf->anchor.virt_section_length =
1761 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
1762 sector += vdsize/512;
1763
59e36268 1764 clen = ddf->conf_rec_len * (ddf->max_part+1);
a322f70c
DW
1765 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
1766 ddf->anchor.config_section_length = __cpu_to_be32(clen);
1767 sector += clen;
1768
1769 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
1770 ddf->anchor.data_section_length = __cpu_to_be32(1);
1771 sector += 1;
1772
1773 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
1774 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
1775 ddf->anchor.diag_space_length = __cpu_to_be32(0);
1776 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
1777 ddf->anchor.vendor_length = __cpu_to_be32(0);
1778 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
1779
1780 memset(ddf->anchor.pad4, 0xff, 256);
1781
1782 memcpy(&ddf->primary, &ddf->anchor, 512);
1783 memcpy(&ddf->secondary, &ddf->anchor, 512);
1784
1785 ddf->primary.openflag = 1; /* I guess.. */
1786 ddf->primary.type = DDF_HEADER_PRIMARY;
1787
1788 ddf->secondary.openflag = 1; /* I guess.. */
1789 ddf->secondary.type = DDF_HEADER_SECONDARY;
1790
1791 ddf->active = &ddf->primary;
1792
1793 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
1794
1795 /* 24 more bytes of fiction required.
1796 * first 8 are a 'vendor-id' - "Linux-MD"
1797 * Remaining 16 are serial number.... maybe a hostname would do?
1798 */
1799 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
1800 gethostname(hostname, sizeof(hostname));
1801 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
1802 hostlen = strlen(hostname);
1803 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
1804 for (i = strlen(T10) ; i+hostlen < 24; i++)
1805 ddf->controller.guid[i] = ' ';
1806
1807 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
1808 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
1809 ddf->controller.type.sub_vendor_id = 0;
1810 ddf->controller.type.sub_device_id = 0;
1811 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
1812 memset(ddf->controller.pad, 0xff, 8);
1813 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
1814 if (homehost && strlen(homehost) < 440)
1815 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 1816
3d2c4fc7 1817 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 1818 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
1819 return 0;
1820 }
6416d527 1821 ddf->phys = pd;
a322f70c
DW
1822 ddf->pdsize = pdsize;
1823
1824 memset(pd, 0xff, pdsize);
1825 memset(pd, 0, sizeof(*pd));
076515ba 1826 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
1827 pd->used_pdes = __cpu_to_be16(0);
1828 pd->max_pdes = __cpu_to_be16(max_phys_disks);
1829 memset(pd->pad, 0xff, 52);
1830
3d2c4fc7 1831 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 1832 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
1833 return 0;
1834 }
6416d527 1835 ddf->virt = vd;
a322f70c
DW
1836 ddf->vdsize = vdsize;
1837 memset(vd, 0, vdsize);
1838 vd->magic = DDF_VIRT_RECORDS_MAGIC;
1839 vd->populated_vdes = __cpu_to_be16(0);
1840 vd->max_vdes = __cpu_to_be16(max_virt_disks);
1841 memset(vd->pad, 0xff, 52);
1842
5f8097be
NB
1843 for (i=0; i<max_virt_disks; i++)
1844 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
1845
a322f70c 1846 st->sb = ddf;
18a2f463 1847 ddf->updates_pending = 1;
a322f70c
DW
1848 return 1;
1849}
1850
5f8097be
NB
1851static int chunk_to_shift(int chunksize)
1852{
1853 return ffs(chunksize/512)-1;
1854}
1855
1856static int level_to_prl(int level)
1857{
1858 switch (level) {
1859 case LEVEL_LINEAR: return DDF_CONCAT;
1860 case 0: return DDF_RAID0;
1861 case 1: return DDF_RAID1;
1862 case 4: return DDF_RAID4;
1863 case 5: return DDF_RAID5;
1864 case 6: return DDF_RAID6;
1865 default: return -1;
1866 }
1867}
613b0d17 1868
5f8097be
NB
1869static int layout_to_rlq(int level, int layout, int raiddisks)
1870{
1871 switch(level) {
1872 case 0:
1873 return DDF_RAID0_SIMPLE;
1874 case 1:
1875 switch(raiddisks) {
1876 case 2: return DDF_RAID1_SIMPLE;
1877 case 3: return DDF_RAID1_MULTI;
1878 default: return -1;
1879 }
1880 case 4:
1881 switch(layout) {
1882 case 0: return DDF_RAID4_N;
1883 }
1884 break;
1885 case 5:
5f8097be
NB
1886 switch(layout) {
1887 case ALGORITHM_LEFT_ASYMMETRIC:
1888 return DDF_RAID5_N_RESTART;
1889 case ALGORITHM_RIGHT_ASYMMETRIC:
b640a252 1890 return DDF_RAID5_0_RESTART;
5f8097be
NB
1891 case ALGORITHM_LEFT_SYMMETRIC:
1892 return DDF_RAID5_N_CONTINUE;
1893 case ALGORITHM_RIGHT_SYMMETRIC:
1894 return -1; /* not mentioned in standard */
1895 }
b640a252
N
1896 case 6:
1897 switch(layout) {
1898 case ALGORITHM_ROTATING_N_RESTART:
1899 return DDF_RAID5_N_RESTART;
1900 case ALGORITHM_ROTATING_ZERO_RESTART:
1901 return DDF_RAID6_0_RESTART;
1902 case ALGORITHM_ROTATING_N_CONTINUE:
1903 return DDF_RAID5_N_CONTINUE;
1904 }
5f8097be
NB
1905 }
1906 return -1;
1907}
1908
598f0d58
NB
1909static int rlq_to_layout(int rlq, int prl, int raiddisks)
1910{
1911 switch(prl) {
1912 case DDF_RAID0:
1913 return 0; /* hopefully rlq == DDF_RAID0_SIMPLE */
1914 case DDF_RAID1:
1915 return 0; /* hopefully rlq == SIMPLE or MULTI depending
1916 on raiddisks*/
1917 case DDF_RAID4:
1918 switch(rlq) {
1919 case DDF_RAID4_N:
1920 return 0;
1921 default:
1922 /* not supported */
1923 return -1; /* FIXME this isn't checked */
1924 }
1925 case DDF_RAID5:
598f0d58
NB
1926 switch(rlq) {
1927 case DDF_RAID5_N_RESTART:
1928 return ALGORITHM_LEFT_ASYMMETRIC;
1929 case DDF_RAID5_0_RESTART:
1930 return ALGORITHM_RIGHT_ASYMMETRIC;
1931 case DDF_RAID5_N_CONTINUE:
1932 return ALGORITHM_LEFT_SYMMETRIC;
1933 default:
1934 return -1;
1935 }
59e36268
NB
1936 case DDF_RAID6:
1937 switch(rlq) {
1938 case DDF_RAID5_N_RESTART:
b640a252 1939 return ALGORITHM_ROTATING_N_RESTART;
59e36268 1940 case DDF_RAID6_0_RESTART:
b640a252 1941 return ALGORITHM_ROTATING_ZERO_RESTART;
59e36268 1942 case DDF_RAID5_N_CONTINUE:
b640a252 1943 return ALGORITHM_ROTATING_N_CONTINUE;
59e36268
NB
1944 default:
1945 return -1;
1946 }
598f0d58
NB
1947 }
1948 return -1;
1949}
1950
0e600426 1951#ifndef MDASSEMBLE
59e36268
NB
1952struct extent {
1953 unsigned long long start, size;
1954};
78e44928 1955static int cmp_extent(const void *av, const void *bv)
59e36268
NB
1956{
1957 const struct extent *a = av;
1958 const struct extent *b = bv;
1959 if (a->start < b->start)
1960 return -1;
1961 if (a->start > b->start)
1962 return 1;
1963 return 0;
1964}
1965
78e44928 1966static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
1967{
1968 /* find a list of used extents on the give physical device
1969 * (dnum) of the given ddf.
1970 * Return a malloced array of 'struct extent'
1971
613b0d17 1972 * FIXME ignore DDF_Legacy devices?
59e36268
NB
1973
1974 */
1975 struct extent *rv;
1976 int n = 0;
f21e18ca 1977 unsigned int i, j;
59e36268 1978
503975b9 1979 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
1980
1981 for (i = 0; i < ddf->max_part; i++) {
1982 struct vcl *v = dl->vlist[i];
1983 if (v == NULL)
1984 continue;
f21e18ca 1985 for (j = 0; j < v->conf.prim_elmnt_count; j++)
59e36268
NB
1986 if (v->conf.phys_refnum[j] == dl->disk.refnum) {
1987 /* This device plays role 'j' in 'v'. */
1988 rv[n].start = __be64_to_cpu(v->lba_offset[j]);
1989 rv[n].size = __be64_to_cpu(v->conf.blocks);
1990 n++;
1991 break;
1992 }
1993 }
1994 qsort(rv, n, sizeof(*rv), cmp_extent);
1995
1996 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
1997 rv[n].size = 0;
1998 return rv;
1999}
0e600426 2000#endif
59e36268 2001
5f8097be
NB
2002static int init_super_ddf_bvd(struct supertype *st,
2003 mdu_array_info_t *info,
2004 unsigned long long size,
2005 char *name, char *homehost,
83cd1e97 2006 int *uuid, unsigned long long data_offset)
5f8097be
NB
2007{
2008 /* We are creating a BVD inside a pre-existing container.
2009 * so st->sb is already set.
2010 * We need to create a new vd_config and a new virtual_entry
2011 */
2012 struct ddf_super *ddf = st->sb;
f21e18ca 2013 unsigned int venum;
5f8097be
NB
2014 struct virtual_entry *ve;
2015 struct vcl *vcl;
2016 struct vd_config *vc;
5f8097be
NB
2017
2018 if (__be16_to_cpu(ddf->virt->populated_vdes)
2019 >= __be16_to_cpu(ddf->virt->max_vdes)) {
e7b84f9d
N
2020 pr_err("This ddf already has the "
2021 "maximum of %d virtual devices\n",
2022 __be16_to_cpu(ddf->virt->max_vdes));
5f8097be
NB
2023 return 0;
2024 }
2025
97c9c100
N
2026 if (name)
2027 for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
2028 if (!all_ff(ddf->virt->entries[venum].guid)) {
2029 char *n = ddf->virt->entries[venum].name;
2030
2031 if (strncmp(name, n, 16) == 0) {
e7b84f9d
N
2032 pr_err("This ddf already"
2033 " has an array called %s\n",
2034 name);
97c9c100
N
2035 return 0;
2036 }
2037 }
2038
5f8097be
NB
2039 for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
2040 if (all_ff(ddf->virt->entries[venum].guid))
2041 break;
2042 if (venum == __be16_to_cpu(ddf->virt->max_vdes)) {
e7b84f9d 2043 pr_err("Cannot find spare slot for "
613b0d17 2044 "virtual disk - DDF is corrupt\n");
5f8097be
NB
2045 return 0;
2046 }
2047 ve = &ddf->virt->entries[venum];
2048
2049 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2050 * timestamp, random number
2051 */
2052 make_header_guid(ve->guid);
2053 ve->unit = __cpu_to_be16(info->md_minor);
2054 ve->pad0 = 0xFFFF;
2055 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2056 ve->type = 0;
7a7cc504
NB
2057 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2058 if (info->state & 1) /* clean */
2059 ve->init_state = DDF_init_full;
2060 else
2061 ve->init_state = DDF_init_not;
2062
5f8097be
NB
2063 memset(ve->pad1, 0xff, 14);
2064 memset(ve->name, ' ', 16);
2065 if (name)
2066 strncpy(ve->name, name, 16);
2067 ddf->virt->populated_vdes =
2068 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2069
2070 /* Now create a new vd_config */
3d2c4fc7
DW
2071 if (posix_memalign((void**)&vcl, 512,
2072 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2073 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2074 return 0;
2075 }
8c3b8c2c 2076 vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
59e36268
NB
2077 vcl->vcnum = venum;
2078 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 2079 vcl->other_bvds = NULL;
5f8097be
NB
2080
2081 vc = &vcl->conf;
2082
2083 vc->magic = DDF_VD_CONF_MAGIC;
2084 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2085 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2086 vc->seqnum = __cpu_to_be32(1);
2087 memset(vc->pad0, 0xff, 24);
2088 vc->prim_elmnt_count = __cpu_to_be16(info->raid_disks);
2089 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2090 vc->prl = level_to_prl(info->level);
2091 vc->rlq = layout_to_rlq(info->level, info->layout, info->raid_disks);
2092 vc->sec_elmnt_count = 1;
2093 vc->sec_elmnt_seq = 0;
2094 vc->srl = 0;
2095 vc->blocks = __cpu_to_be64(info->size * 2);
2096 vc->array_blocks = __cpu_to_be64(
2097 calc_array_size(info->level, info->raid_disks, info->layout,
2098 info->chunk_size, info->size*2));
2099 memset(vc->pad1, 0xff, 8);
2100 vc->spare_refs[0] = 0xffffffff;
2101 vc->spare_refs[1] = 0xffffffff;
2102 vc->spare_refs[2] = 0xffffffff;
2103 vc->spare_refs[3] = 0xffffffff;
2104 vc->spare_refs[4] = 0xffffffff;
2105 vc->spare_refs[5] = 0xffffffff;
2106 vc->spare_refs[6] = 0xffffffff;
2107 vc->spare_refs[7] = 0xffffffff;
2108 memset(vc->cache_pol, 0, 8);
2109 vc->bg_rate = 0x80;
2110 memset(vc->pad2, 0xff, 3);
2111 memset(vc->pad3, 0xff, 52);
2112 memset(vc->pad4, 0xff, 192);
2113 memset(vc->v0, 0xff, 32);
2114 memset(vc->v1, 0xff, 32);
2115 memset(vc->v2, 0xff, 16);
2116 memset(vc->v3, 0xff, 16);
2117 memset(vc->vendor, 0xff, 32);
598f0d58 2118
8c3b8c2c 2119 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2120 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be
NB
2121
2122 vcl->next = ddf->conflist;
2123 ddf->conflist = vcl;
d2ca6449 2124 ddf->currentconf = vcl;
18a2f463 2125 ddf->updates_pending = 1;
5f8097be
NB
2126 return 1;
2127}
2128
0e600426 2129#ifndef MDASSEMBLE
5f8097be
NB
2130static void add_to_super_ddf_bvd(struct supertype *st,
2131 mdu_disk_info_t *dk, int fd, char *devname)
2132{
2133 /* fd and devname identify a device with-in the ddf container (st).
2134 * dk identifies a location in the new BVD.
2135 * We need to find suitable free space in that device and update
2136 * the phys_refnum and lba_offset for the newly created vd_config.
2137 * We might also want to update the type in the phys_disk
5575e7d9 2138 * section.
8592f29d
N
2139 *
2140 * Alternately: fd == -1 and we have already chosen which device to
2141 * use and recorded in dlist->raid_disk;
5f8097be
NB
2142 */
2143 struct dl *dl;
2144 struct ddf_super *ddf = st->sb;
2145 struct vd_config *vc;
2146 __u64 *lba_offset;
f21e18ca
N
2147 unsigned int working;
2148 unsigned int i;
59e36268
NB
2149 unsigned long long blocks, pos, esize;
2150 struct extent *ex;
5f8097be 2151
8592f29d
N
2152 if (fd == -1) {
2153 for (dl = ddf->dlist; dl ; dl = dl->next)
2154 if (dl->raiddisk == dk->raid_disk)
2155 break;
2156 } else {
2157 for (dl = ddf->dlist; dl ; dl = dl->next)
2158 if (dl->major == dk->major &&
2159 dl->minor == dk->minor)
2160 break;
2161 }
5f8097be
NB
2162 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2163 return;
2164
d2ca6449
NB
2165 vc = &ddf->currentconf->conf;
2166 lba_offset = ddf->currentconf->lba_offset;
59e36268
NB
2167
2168 ex = get_extents(ddf, dl);
2169 if (!ex)
2170 return;
2171
2172 i = 0; pos = 0;
2173 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2174 if (ddf->currentconf->block_sizes)
2175 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2176
2177 do {
2178 esize = ex[i].start - pos;
2179 if (esize >= blocks)
2180 break;
2181 pos = ex[i].start + ex[i].size;
2182 i++;
2183 } while (ex[i-1].size);
2184
2185 free(ex);
2186 if (esize < blocks)
2187 return;
2188
d2ca6449 2189 ddf->currentdev = dk->raid_disk;
5f8097be 2190 vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
59e36268 2191 lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
5f8097be 2192
f21e18ca 2193 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2194 if (dl->vlist[i] == NULL)
2195 break;
2196 if (i == ddf->max_part)
2197 return;
d2ca6449 2198 dl->vlist[i] = ddf->currentconf;
5f8097be 2199
8592f29d
N
2200 if (fd >= 0)
2201 dl->fd = fd;
2202 if (devname)
2203 dl->devname = devname;
7a7cc504
NB
2204
2205 /* Check how many working raid_disks, and if we can mark
2206 * array as optimal yet
2207 */
2208 working = 0;
5575e7d9 2209
f21e18ca 2210 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
7a7cc504
NB
2211 if (vc->phys_refnum[i] != 0xffffffff)
2212 working++;
59e36268 2213
5575e7d9 2214 /* Find which virtual_entry */
d2ca6449 2215 i = ddf->currentconf->vcnum;
7a7cc504 2216 if (working == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2217 ddf->virt->entries[i].state =
2218 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504
NB
2219 | DDF_state_optimal;
2220
2221 if (vc->prl == DDF_RAID6 &&
2222 working+1 == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2223 ddf->virt->entries[i].state =
2224 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504 2225 | DDF_state_part_optimal;
5575e7d9
NB
2226
2227 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2228 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
18a2f463 2229 ddf->updates_pending = 1;
5f8097be
NB
2230}
2231
a322f70c
DW
2232/* add a device to a container, either while creating it or while
2233 * expanding a pre-existing container
2234 */
f20c3968 2235static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2236 mdu_disk_info_t *dk, int fd, char *devname,
2237 unsigned long long data_offset)
a322f70c
DW
2238{
2239 struct ddf_super *ddf = st->sb;
2240 struct dl *dd;
2241 time_t now;
2242 struct tm *tm;
2243 unsigned long long size;
2244 struct phys_disk_entry *pde;
f21e18ca 2245 unsigned int n, i;
a322f70c 2246 struct stat stb;
90fa1a29 2247 __u32 *tptr;
a322f70c 2248
78e44928
NB
2249 if (ddf->currentconf) {
2250 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2251 return 0;
78e44928
NB
2252 }
2253
a322f70c
DW
2254 /* This is device numbered dk->number. We need to create
2255 * a phys_disk entry and a more detailed disk_data entry.
2256 */
2257 fstat(fd, &stb);
3d2c4fc7
DW
2258 if (posix_memalign((void**)&dd, 512,
2259 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2260 pr_err("%s could allocate buffer for new disk, aborting\n",
2261 __func__);
f20c3968 2262 return 1;
3d2c4fc7 2263 }
a322f70c
DW
2264 dd->major = major(stb.st_rdev);
2265 dd->minor = minor(stb.st_rdev);
2266 dd->devname = devname;
a322f70c 2267 dd->fd = fd;
b2280677 2268 dd->spare = NULL;
a322f70c
DW
2269
2270 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2271 now = time(0);
2272 tm = localtime(&now);
2273 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2274 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2275 tptr = (__u32 *)(dd->disk.guid + 16);
2276 *tptr++ = random32();
2277 *tptr = random32();
a322f70c 2278
59e36268
NB
2279 do {
2280 /* Cannot be bothered finding a CRC of some irrelevant details*/
bfb7ea78 2281 dd->disk.refnum = random32();
f21e18ca
N
2282 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2283 i > 0; i--)
2284 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
59e36268 2285 break;
f21e18ca 2286 } while (i > 0);
59e36268 2287
a322f70c
DW
2288 dd->disk.forced_ref = 1;
2289 dd->disk.forced_guid = 1;
2290 memset(dd->disk.vendor, ' ', 32);
2291 memcpy(dd->disk.vendor, "Linux", 5);
2292 memset(dd->disk.pad, 0xff, 442);
b2280677 2293 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2294 dd->vlist[i] = NULL;
2295
2296 n = __be16_to_cpu(ddf->phys->used_pdes);
2297 pde = &ddf->phys->entries[n];
5575e7d9
NB
2298 dd->pdnum = n;
2299
2cc2983d
N
2300 if (st->update_tail) {
2301 int len = (sizeof(struct phys_disk) +
2302 sizeof(struct phys_disk_entry));
2303 struct phys_disk *pd;
2304
503975b9 2305 pd = xmalloc(len);
2cc2983d
N
2306 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2307 pd->used_pdes = __cpu_to_be16(n);
2308 pde = &pd->entries[0];
2309 dd->mdupdate = pd;
2310 } else {
2311 n++;
2312 ddf->phys->used_pdes = __cpu_to_be16(n);
2313 }
a322f70c
DW
2314
2315 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2316 pde->refnum = dd->disk.refnum;
5575e7d9 2317 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c
DW
2318 pde->state = __cpu_to_be16(DDF_Online);
2319 get_dev_size(fd, NULL, &size);
2320 /* We are required to reserve 32Meg, and record the size in sectors */
2321 pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
2322 sprintf(pde->path, "%17.17s","Information: nil") ;
2323 memset(pde->pad, 0xff, 6);
2324
d2ca6449 2325 dd->size = size >> 9;
2cc2983d
N
2326 if (st->update_tail) {
2327 dd->next = ddf->add_list;
2328 ddf->add_list = dd;
2329 } else {
2330 dd->next = ddf->dlist;
2331 ddf->dlist = dd;
2332 ddf->updates_pending = 1;
2333 }
f20c3968
DW
2334
2335 return 0;
a322f70c
DW
2336}
2337
4dd968cc
N
2338static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2339{
2340 struct ddf_super *ddf = st->sb;
2341 struct dl *dl;
2342
2343 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2344 * disappeared from the container.
2345 * We need to arrange that it disappears from the metadata and
2346 * internal data structures too.
2347 * Most of the work is done by ddf_process_update which edits
2348 * the metadata and closes the file handle and attaches the memory
2349 * where free_updates will free it.
2350 */
2351 for (dl = ddf->dlist; dl ; dl = dl->next)
2352 if (dl->major == dk->major &&
2353 dl->minor == dk->minor)
2354 break;
2355 if (!dl)
2356 return -1;
2357
2358 if (st->update_tail) {
2359 int len = (sizeof(struct phys_disk) +
2360 sizeof(struct phys_disk_entry));
2361 struct phys_disk *pd;
2362
503975b9 2363 pd = xmalloc(len);
4dd968cc
N
2364 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2365 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2366 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2367 append_metadata_update(st, pd, len);
2368 }
2369 return 0;
2370}
2371
a322f70c
DW
2372/*
2373 * This is the write_init_super method for a ddf container. It is
2374 * called when creating a container or adding another device to a
2375 * container.
2376 */
42d5dfd9 2377#define NULL_CONF_SZ 4096
18a2f463 2378
e3c2a365 2379static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
2380 __u32 refnum, unsigned int nmax,
2381 const struct vd_config **bvd,
2382 unsigned int *idx);
2383
7f798aca 2384static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2385 char *null_aligned)
a322f70c 2386{
7f798aca 2387 unsigned long long sector;
2388 struct ddf_header *header;
2389 int fd, i, n_config, conf_size;
2390
2391 fd = d->fd;
2392
2393 switch (type) {
2394 case DDF_HEADER_PRIMARY:
2395 header = &ddf->primary;
2396 sector = __be64_to_cpu(header->primary_lba);
2397 break;
2398 case DDF_HEADER_SECONDARY:
2399 header = &ddf->secondary;
2400 sector = __be64_to_cpu(header->secondary_lba);
2401 break;
2402 default:
2403 return 0;
2404 }
2405
2406 header->type = type;
2407 header->openflag = 0;
2408 header->crc = calc_crc(header, 512);
2409
2410 lseek64(fd, sector<<9, 0);
2411 if (write(fd, header, 512) < 0)
2412 return 0;
2413
2414 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2415 if (write(fd, &ddf->controller, 512) < 0)
2416 return 0;
a322f70c 2417
7f798aca 2418 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2419 if (write(fd, ddf->phys, ddf->pdsize) < 0)
2420 return 0;
2421 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2422 if (write(fd, ddf->virt, ddf->vdsize) < 0)
2423 return 0;
2424
2425 /* Now write lots of config records. */
2426 n_config = ddf->max_part;
2427 conf_size = ddf->conf_rec_len * 512;
2428 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2429 struct vcl *c;
2430 struct vd_config *vdc = NULL;
2431 if (i == n_config) {
7f798aca 2432 c = (struct vcl *)d->spare;
e3c2a365 2433 if (c)
2434 vdc = &c->conf;
2435 } else {
2436 unsigned int dummy;
2437 c = d->vlist[i];
2438 if (c)
2439 get_pd_index_from_refnum(
2440 c, d->disk.refnum,
2441 ddf->mppe,
2442 (const struct vd_config **)&vdc,
2443 &dummy);
2444 }
7f798aca 2445 if (c) {
dacf3dc5 2446 vdc->seqnum = header->seq;
e3c2a365 2447 vdc->crc = calc_crc(vdc, conf_size);
2448 if (write(fd, vdc, conf_size) < 0)
7f798aca 2449 break;
2450 } else {
2451 unsigned int togo = conf_size;
2452 while (togo > NULL_CONF_SZ) {
2453 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2454 break;
2455 togo -= NULL_CONF_SZ;
2456 }
2457 if (write(fd, null_aligned, togo) < 0)
2458 break;
2459 }
2460 }
2461 if (i <= n_config)
2462 return 0;
2463
2464 d->disk.crc = calc_crc(&d->disk, 512);
2465 if (write(fd, &d->disk, 512) < 0)
2466 return 0;
2467
2468 return 1;
2469}
2470
2471static int __write_init_super_ddf(struct supertype *st)
2472{
a322f70c 2473 struct ddf_super *ddf = st->sb;
a322f70c 2474 struct dl *d;
175593bf
DW
2475 int attempts = 0;
2476 int successes = 0;
7f798aca 2477 unsigned long long size;
42d5dfd9 2478 char *null_aligned;
0175cbf6 2479 __u32 seq;
42d5dfd9
JS
2480
2481 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2482 return -ENOMEM;
2483 }
2484 memset(null_aligned, 0xff, NULL_CONF_SZ);
a322f70c 2485
dc9e279c 2486 seq = ddf->active->seq + 1;
0175cbf6 2487
175593bf
DW
2488 /* try to write updated metadata,
2489 * if we catch a failure move on to the next disk
2490 */
a322f70c
DW
2491 for (d = ddf->dlist; d; d=d->next) {
2492 int fd = d->fd;
2493
2494 if (fd < 0)
2495 continue;
2496
175593bf 2497 attempts++;
a322f70c
DW
2498 /* We need to fill in the primary, (secondary) and workspace
2499 * lba's in the headers, set their checksums,
2500 * Also checksum phys, virt....
2501 *
2502 * Then write everything out, finally the anchor is written.
2503 */
2504 get_dev_size(fd, NULL, &size);
2505 size /= 512;
097bcf00 2506 if (d->workspace_lba != 0)
2507 ddf->anchor.workspace_lba = d->workspace_lba;
2508 else
2509 ddf->anchor.workspace_lba =
2510 __cpu_to_be64(size - 32*1024*2);
2511 if (d->primary_lba != 0)
2512 ddf->anchor.primary_lba = d->primary_lba;
2513 else
2514 ddf->anchor.primary_lba =
2515 __cpu_to_be64(size - 16*1024*2);
2516 if (d->secondary_lba != 0)
2517 ddf->anchor.secondary_lba = d->secondary_lba;
2518 else
2519 ddf->anchor.secondary_lba =
2520 __cpu_to_be64(size - 32*1024*2);
0175cbf6 2521 ddf->anchor.seq = seq;
a322f70c
DW
2522 memcpy(&ddf->primary, &ddf->anchor, 512);
2523 memcpy(&ddf->secondary, &ddf->anchor, 512);
2524
2525 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2526 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2527 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2528
7f798aca 2529 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2530 null_aligned))
175593bf 2531 continue;
a322f70c 2532
7f798aca 2533 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2534 null_aligned))
175593bf 2535 continue;
a322f70c 2536
a322f70c 2537 lseek64(fd, (size-1)*512, SEEK_SET);
175593bf
DW
2538 if (write(fd, &ddf->anchor, 512) < 0)
2539 continue;
2540 successes++;
2541 }
42d5dfd9 2542 free(null_aligned);
175593bf 2543
175593bf 2544 return attempts != successes;
a322f70c 2545}
7a7cc504
NB
2546
2547static int write_init_super_ddf(struct supertype *st)
2548{
9b1fb677
DW
2549 struct ddf_super *ddf = st->sb;
2550 struct vcl *currentconf = ddf->currentconf;
2551
2552 /* we are done with currentconf reset it to point st at the container */
2553 ddf->currentconf = NULL;
edd8d13c
NB
2554
2555 if (st->update_tail) {
2556 /* queue the virtual_disk and vd_config as metadata updates */
2557 struct virtual_disk *vd;
2558 struct vd_config *vc;
edd8d13c
NB
2559 int len;
2560
9b1fb677 2561 if (!currentconf) {
2cc2983d
N
2562 int len = (sizeof(struct phys_disk) +
2563 sizeof(struct phys_disk_entry));
2564
2565 /* adding a disk to the container. */
2566 if (!ddf->add_list)
2567 return 0;
2568
2569 append_metadata_update(st, ddf->add_list->mdupdate, len);
2570 ddf->add_list->mdupdate = NULL;
2571 return 0;
2572 }
2573
2574 /* Newly created VD */
2575
edd8d13c
NB
2576 /* First the virtual disk. We have a slightly fake header */
2577 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 2578 vd = xmalloc(len);
edd8d13c 2579 *vd = *ddf->virt;
9b1fb677
DW
2580 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2581 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
2582 append_metadata_update(st, vd, len);
2583
2584 /* Then the vd_config */
2585 len = ddf->conf_rec_len * 512;
503975b9 2586 vc = xmalloc(len);
9b1fb677 2587 memcpy(vc, &currentconf->conf, len);
edd8d13c
NB
2588 append_metadata_update(st, vc, len);
2589
2590 /* FIXME I need to close the fds! */
2591 return 0;
613b0d17 2592 } else {
d682f344
N
2593 struct dl *d;
2594 for (d = ddf->dlist; d; d=d->next)
ba728be7 2595 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 2596 return __write_init_super_ddf(st);
d682f344 2597 }
7a7cc504
NB
2598}
2599
a322f70c
DW
2600#endif
2601
387fcd59
N
2602static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2603 unsigned long long data_offset)
a322f70c
DW
2604{
2605 /* We must reserve the last 32Meg */
2606 if (devsize <= 32*1024*2)
2607 return 0;
2608 return devsize - 32*1024*2;
2609}
2610
2611#ifndef MDASSEMBLE
8592f29d
N
2612
2613static int reserve_space(struct supertype *st, int raiddisks,
2614 unsigned long long size, int chunk,
2615 unsigned long long *freesize)
2616{
2617 /* Find 'raiddisks' spare extents at least 'size' big (but
2618 * only caring about multiples of 'chunk') and remember
2619 * them.
2620 * If the cannot be found, fail.
2621 */
2622 struct dl *dl;
2623 struct ddf_super *ddf = st->sb;
2624 int cnt = 0;
2625
2626 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 2627 dl->raiddisk = -1;
8592f29d
N
2628 dl->esize = 0;
2629 }
2630 /* Now find largest extent on each device */
2631 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2632 struct extent *e = get_extents(ddf, dl);
2633 unsigned long long pos = 0;
2634 int i = 0;
2635 int found = 0;
2636 unsigned long long minsize = size;
2637
2638 if (size == 0)
2639 minsize = chunk;
2640
2641 if (!e)
2642 continue;
2643 do {
2644 unsigned long long esize;
2645 esize = e[i].start - pos;
2646 if (esize >= minsize) {
2647 found = 1;
2648 minsize = esize;
2649 }
2650 pos = e[i].start + e[i].size;
2651 i++;
2652 } while (e[i-1].size);
2653 if (found) {
2654 cnt++;
2655 dl->esize = minsize;
2656 }
2657 free(e);
2658 }
2659 if (cnt < raiddisks) {
e7b84f9d 2660 pr_err("not enough devices with space to create array.\n");
8592f29d
N
2661 return 0; /* No enough free spaces large enough */
2662 }
2663 if (size == 0) {
2664 /* choose the largest size of which there are at least 'raiddisk' */
2665 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2666 struct dl *dl2;
2667 if (dl->esize <= size)
2668 continue;
2669 /* This is bigger than 'size', see if there are enough */
2670 cnt = 0;
7b80ad6a 2671 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
2672 if (dl2->esize >= dl->esize)
2673 cnt++;
2674 if (cnt >= raiddisks)
2675 size = dl->esize;
2676 }
2677 if (chunk) {
2678 size = size / chunk;
2679 size *= chunk;
2680 }
2681 *freesize = size;
2682 if (size < 32) {
e7b84f9d 2683 pr_err("not enough spare devices to create array.\n");
8592f29d
N
2684 return 0;
2685 }
2686 }
2687 /* We have a 'size' of which there are enough spaces.
2688 * We simply do a first-fit */
2689 cnt = 0;
2690 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
2691 if (dl->esize < size)
2692 continue;
613b0d17 2693
8592f29d
N
2694 dl->raiddisk = cnt;
2695 cnt++;
2696 }
2697 return 1;
2698}
2699
2c514b71
NB
2700static int
2701validate_geometry_ddf_container(struct supertype *st,
2702 int level, int layout, int raiddisks,
2703 int chunk, unsigned long long size,
af4348dd 2704 unsigned long long data_offset,
2c514b71
NB
2705 char *dev, unsigned long long *freesize,
2706 int verbose);
78e44928
NB
2707
2708static int validate_geometry_ddf_bvd(struct supertype *st,
2709 int level, int layout, int raiddisks,
c21e737b 2710 int *chunk, unsigned long long size,
af4348dd 2711 unsigned long long data_offset,
2c514b71
NB
2712 char *dev, unsigned long long *freesize,
2713 int verbose);
78e44928
NB
2714
2715static int validate_geometry_ddf(struct supertype *st,
2c514b71 2716 int level, int layout, int raiddisks,
c21e737b 2717 int *chunk, unsigned long long size,
af4348dd 2718 unsigned long long data_offset,
2c514b71
NB
2719 char *dev, unsigned long long *freesize,
2720 int verbose)
a322f70c
DW
2721{
2722 int fd;
2723 struct mdinfo *sra;
2724 int cfd;
2725
2726 /* ddf potentially supports lots of things, but it depends on
2727 * what devices are offered (and maybe kernel version?)
2728 * If given unused devices, we will make a container.
2729 * If given devices in a container, we will make a BVD.
2730 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
2731 */
2732
bb7295f1
N
2733 if (chunk && *chunk == UnSet)
2734 *chunk = DEFAULT_CHUNK;
2735
542ef4ec 2736 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 2737 if (level == LEVEL_CONTAINER) {
78e44928
NB
2738 /* Must be a fresh device to add to a container */
2739 return validate_geometry_ddf_container(st, level, layout,
c21e737b 2740 raiddisks, chunk?*chunk:0,
af4348dd
N
2741 size, data_offset, dev,
2742 freesize,
2c514b71 2743 verbose);
5f8097be
NB
2744 }
2745
78e44928
NB
2746 if (!dev) {
2747 /* Initial sanity check. Exclude illegal levels. */
2748 int i;
2749 for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
2750 if (ddf_level_num[i].num2 == level)
2751 break;
b42f577a
N
2752 if (ddf_level_num[i].num1 == MAXINT) {
2753 if (verbose)
e7b84f9d 2754 pr_err("DDF does not support level %d arrays\n",
613b0d17 2755 level);
78e44928 2756 return 0;
b42f577a 2757 }
78e44928 2758 /* Should check layout? etc */
8592f29d
N
2759
2760 if (st->sb && freesize) {
2761 /* --create was given a container to create in.
2762 * So we need to check that there are enough
2763 * free spaces and return the amount of space.
2764 * We may as well remember which drives were
2765 * chosen so that add_to_super/getinfo_super
2766 * can return them.
2767 */
c21e737b 2768 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 2769 }
a322f70c 2770 return 1;
78e44928 2771 }
a322f70c 2772
8592f29d
N
2773 if (st->sb) {
2774 /* A container has already been opened, so we are
2775 * creating in there. Maybe a BVD, maybe an SVD.
2776 * Should make a distinction one day.
2777 */
2778 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
2779 chunk, size, data_offset, dev,
2780 freesize,
8592f29d
N
2781 verbose);
2782 }
78e44928
NB
2783 /* This is the first device for the array.
2784 * If it is a container, we read it in and do automagic allocations,
2785 * no other devices should be given.
2786 * Otherwise it must be a member device of a container, and we
2787 * do manual allocation.
2788 * Later we should check for a BVD and make an SVD.
a322f70c 2789 */
a322f70c
DW
2790 fd = open(dev, O_RDONLY|O_EXCL, 0);
2791 if (fd >= 0) {
4dd2df09 2792 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
2793 close(fd);
2794 if (sra && sra->array.major_version == -1 &&
78e44928
NB
2795 strcmp(sra->text_version, "ddf") == 0) {
2796
2797 /* load super */
2798 /* find space for 'n' devices. */
2799 /* remember the devices */
2800 /* Somehow return the fact that we have enough */
a322f70c
DW
2801 }
2802
2c514b71 2803 if (verbose)
e7b84f9d
N
2804 pr_err("ddf: Cannot create this array "
2805 "on device %s - a container is required.\n",
2806 dev);
a322f70c
DW
2807 return 0;
2808 }
2809 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 2810 if (verbose)
e7b84f9d 2811 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 2812 dev, strerror(errno));
a322f70c
DW
2813 return 0;
2814 }
2815 /* Well, it is in use by someone, maybe a 'ddf' container. */
2816 cfd = open_container(fd);
2817 if (cfd < 0) {
2818 close(fd);
2c514b71 2819 if (verbose)
e7b84f9d 2820 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 2821 dev, strerror(EBUSY));
a322f70c
DW
2822 return 0;
2823 }
4dd2df09 2824 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
2825 close(fd);
2826 if (sra && sra->array.major_version == -1 &&
2827 strcmp(sra->text_version, "ddf") == 0) {
2828 /* This is a member of a ddf container. Load the container
2829 * and try to create a bvd
2830 */
2831 struct ddf_super *ddf;
e1902a7b 2832 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 2833 st->sb = ddf;
4dd2df09 2834 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 2835 close(cfd);
78e44928 2836 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 2837 raiddisks, chunk, size,
af4348dd 2838 data_offset,
2c514b71
NB
2839 dev, freesize,
2840 verbose);
a322f70c
DW
2841 }
2842 close(cfd);
c42ec1ed
DW
2843 } else /* device may belong to a different container */
2844 return 0;
2845
a322f70c
DW
2846 return 1;
2847}
2848
2c514b71
NB
2849static int
2850validate_geometry_ddf_container(struct supertype *st,
2851 int level, int layout, int raiddisks,
2852 int chunk, unsigned long long size,
af4348dd 2853 unsigned long long data_offset,
2c514b71
NB
2854 char *dev, unsigned long long *freesize,
2855 int verbose)
a322f70c
DW
2856{
2857 int fd;
2858 unsigned long long ldsize;
2859
2860 if (level != LEVEL_CONTAINER)
2861 return 0;
2862 if (!dev)
2863 return 1;
2864
2865 fd = open(dev, O_RDONLY|O_EXCL, 0);
2866 if (fd < 0) {
2c514b71 2867 if (verbose)
e7b84f9d 2868 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 2869 dev, strerror(errno));
a322f70c
DW
2870 return 0;
2871 }
2872 if (!get_dev_size(fd, dev, &ldsize)) {
2873 close(fd);
2874 return 0;
2875 }
2876 close(fd);
2877
387fcd59 2878 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
2879 if (*freesize == 0)
2880 return 0;
a322f70c
DW
2881
2882 return 1;
2883}
2884
78e44928
NB
2885static int validate_geometry_ddf_bvd(struct supertype *st,
2886 int level, int layout, int raiddisks,
c21e737b 2887 int *chunk, unsigned long long size,
af4348dd 2888 unsigned long long data_offset,
2c514b71
NB
2889 char *dev, unsigned long long *freesize,
2890 int verbose)
a322f70c
DW
2891{
2892 struct stat stb;
2893 struct ddf_super *ddf = st->sb;
2894 struct dl *dl;
5f8097be
NB
2895 unsigned long long pos = 0;
2896 unsigned long long maxsize;
2897 struct extent *e;
2898 int i;
a322f70c 2899 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
2900 if (level == LEVEL_CONTAINER) {
2901 if (verbose)
e7b84f9d 2902 pr_err("DDF cannot create a container within an container\n");
a322f70c 2903 return 0;
b42f577a 2904 }
a322f70c
DW
2905 /* We must have the container info already read in. */
2906 if (!ddf)
2907 return 0;
2908
5f8097be
NB
2909 if (!dev) {
2910 /* General test: make sure there is space for
2911 * 'raiddisks' device extents of size 'size'.
2912 */
2913 unsigned long long minsize = size;
2914 int dcnt = 0;
2915 if (minsize == 0)
2916 minsize = 8;
2917 for (dl = ddf->dlist; dl ; dl = dl->next)
2918 {
2919 int found = 0;
7e1432fb 2920 pos = 0;
5f8097be
NB
2921
2922 i = 0;
2923 e = get_extents(ddf, dl);
2924 if (!e) continue;
2925 do {
2926 unsigned long long esize;
2927 esize = e[i].start - pos;
2928 if (esize >= minsize)
2929 found = 1;
2930 pos = e[i].start + e[i].size;
2931 i++;
2932 } while (e[i-1].size);
2933 if (found)
2934 dcnt++;
2935 free(e);
2936 }
2937 if (dcnt < raiddisks) {
2c514b71 2938 if (verbose)
e7b84f9d
N
2939 pr_err("ddf: Not enough devices with "
2940 "space for this array (%d < %d)\n",
2941 dcnt, raiddisks);
5f8097be
NB
2942 return 0;
2943 }
2944 return 1;
2945 }
a322f70c
DW
2946 /* This device must be a member of the set */
2947 if (stat(dev, &stb) < 0)
2948 return 0;
2949 if ((S_IFMT & stb.st_mode) != S_IFBLK)
2950 return 0;
2951 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
2952 if (dl->major == (int)major(stb.st_rdev) &&
2953 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
2954 break;
2955 }
5f8097be 2956 if (!dl) {
2c514b71 2957 if (verbose)
e7b84f9d 2958 pr_err("ddf: %s is not in the "
613b0d17
N
2959 "same DDF set\n",
2960 dev);
5f8097be
NB
2961 return 0;
2962 }
2963 e = get_extents(ddf, dl);
2964 maxsize = 0;
2965 i = 0;
2966 if (e) do {
613b0d17
N
2967 unsigned long long esize;
2968 esize = e[i].start - pos;
2969 if (esize >= maxsize)
2970 maxsize = esize;
2971 pos = e[i].start + e[i].size;
2972 i++;
2973 } while (e[i-1].size);
5f8097be 2974 *freesize = maxsize;
a322f70c
DW
2975 // FIXME here I am
2976
2977 return 1;
2978}
59e36268 2979
a322f70c 2980static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 2981 void **sbp, char *devname)
a322f70c
DW
2982{
2983 struct mdinfo *sra;
2984 struct ddf_super *super;
2985 struct mdinfo *sd, *best = NULL;
2986 int bestseq = 0;
2987 int seq;
2988 char nm[20];
2989 int dfd;
2990
b526e52d 2991 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
2992 if (!sra)
2993 return 1;
2994 if (sra->array.major_version != -1 ||
2995 sra->array.minor_version != -2 ||
2996 strcmp(sra->text_version, "ddf") != 0)
2997 return 1;
2998
6416d527 2999 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3000 return 1;
a2349791 3001 memset(super, 0, sizeof(*super));
a322f70c
DW
3002
3003 /* first, try each device, and choose the best ddf */
3004 for (sd = sra->devs ; sd ; sd = sd->next) {
3005 int rv;
3006 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3007 dfd = dev_open(nm, O_RDONLY);
3008 if (dfd < 0)
a322f70c
DW
3009 return 2;
3010 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3011 close(dfd);
a322f70c
DW
3012 if (rv == 0) {
3013 seq = __be32_to_cpu(super->active->seq);
3014 if (super->active->openflag)
3015 seq--;
3016 if (!best || seq > bestseq) {
3017 bestseq = seq;
3018 best = sd;
3019 }
3020 }
3021 }
3022 if (!best)
3023 return 1;
3024 /* OK, load this ddf */
3025 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3026 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3027 if (dfd < 0)
a322f70c
DW
3028 return 1;
3029 load_ddf_headers(dfd, super, NULL);
3030 load_ddf_global(dfd, super, NULL);
3031 close(dfd);
3032 /* Now we need the device-local bits */
3033 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3034 int rv;
3035
a322f70c 3036 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3037 dfd = dev_open(nm, O_RDWR);
7a7cc504 3038 if (dfd < 0)
a322f70c 3039 return 2;
3d2c4fc7
DW
3040 rv = load_ddf_headers(dfd, super, NULL);
3041 if (rv == 0)
e1902a7b 3042 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3043 if (rv)
3044 return 1;
a322f70c 3045 }
33414a01 3046
a322f70c
DW
3047 *sbp = super;
3048 if (st->ss == NULL) {
78e44928 3049 st->ss = &super_ddf;
a322f70c
DW
3050 st->minor_version = 0;
3051 st->max_devs = 512;
3052 }
4dd2df09 3053 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3054 return 0;
3055}
2b959fbf
N
3056
3057static int load_container_ddf(struct supertype *st, int fd,
3058 char *devname)
3059{
3060 return load_super_ddf_all(st, fd, &st->sb, devname);
3061}
3062
0e600426 3063#endif /* MDASSEMBLE */
a322f70c 3064
a5c7adb3 3065static int check_secondary(const struct vcl *vc)
3066{
3067 const struct vd_config *conf = &vc->conf;
3068 int i;
3069
3070 /* The only DDF secondary RAID level md can support is
3071 * RAID 10, if the stripe sizes and Basic volume sizes
3072 * are all equal.
3073 * Other configurations could in theory be supported by exposing
3074 * the BVDs to user space and using device mapper for the secondary
3075 * mapping. So far we don't support that.
3076 */
3077
3078 __u64 sec_elements[4] = {0, 0, 0, 0};
3079#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3080#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3081
3082 if (vc->other_bvds == NULL) {
3083 pr_err("No BVDs for secondary RAID found\n");
3084 return -1;
3085 }
3086 if (conf->prl != DDF_RAID1) {
3087 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3088 return -1;
3089 }
3090 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3091 pr_err("Secondary RAID level %d is unsupported\n",
3092 conf->srl);
3093 return -1;
3094 }
3095 __set_sec_seen(conf->sec_elmnt_seq);
3096 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3097 const struct vd_config *bvd = vc->other_bvds[i];
3098 if (bvd == NULL) {
e3c2a365 3099 pr_err("BVD %d is missing\n", i+1);
a5c7adb3 3100 return -1;
3101 }
3102 if (bvd->srl != conf->srl) {
3103 pr_err("Inconsistent secondary RAID level across BVDs\n");
3104 return -1;
3105 }
3106 if (bvd->prl != conf->prl) {
3107 pr_err("Different RAID levels for BVDs are unsupported\n");
3108 return -1;
3109 }
3110 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3111 pr_err("All BVDs must have the same number of primary elements\n");
3112 return -1;
3113 }
3114 if (bvd->chunk_shift != conf->chunk_shift) {
3115 pr_err("Different strip sizes for BVDs are unsupported\n");
3116 return -1;
3117 }
3118 if (bvd->array_blocks != conf->array_blocks) {
3119 pr_err("Different BVD sizes are unsupported\n");
3120 return -1;
3121 }
3122 __set_sec_seen(bvd->sec_elmnt_seq);
3123 }
3124 for (i = 0; i < conf->sec_elmnt_count; i++) {
3125 if (!__was_sec_seen(i)) {
3126 pr_err("BVD %d is missing\n", i);
3127 return -1;
3128 }
3129 }
3130 return 0;
3131}
3132
8a38db86 3133#define NO_SUCH_REFNUM (0xFFFFFFFF)
3134static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
4e587018 3135 __u32 refnum, unsigned int nmax,
3136 const struct vd_config **bvd,
3137 unsigned int *idx)
8a38db86 3138{
4e587018 3139 unsigned int i, j, n, sec, cnt;
3140
3141 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3142 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3143
3144 for (i = 0, j = 0 ; i < nmax ; i++) {
3145 /* j counts valid entries for this BVD */
3146 if (vc->conf.phys_refnum[i] != 0xffffffff)
3147 j++;
3148 if (vc->conf.phys_refnum[i] == refnum) {
3149 *bvd = &vc->conf;
3150 *idx = i;
3151 return sec * cnt + j - 1;
3152 }
3153 }
3154 if (vc->other_bvds == NULL)
3155 goto bad;
3156
3157 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3158 struct vd_config *vd = vc->other_bvds[n-1];
3159 if (vd == NULL)
3160 continue;
3161 sec = vd->sec_elmnt_seq;
3162 for (i = 0, j = 0 ; i < nmax ; i++) {
3163 if (vd->phys_refnum[i] != 0xffffffff)
3164 j++;
3165 if (vd->phys_refnum[i] == refnum) {
3166 *bvd = vd;
3167 *idx = i;
3168 return sec * cnt + j - 1;
3169 }
3170 }
3171 }
3172bad:
3173 *bvd = NULL;
8a38db86 3174 return NO_SUCH_REFNUM;
3175}
3176
00bbdbda 3177static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3178{
3179 /* Given a container loaded by load_super_ddf_all,
3180 * extract information about all the arrays into
3181 * an mdinfo tree.
3182 *
3183 * For each vcl in conflist: create an mdinfo, fill it in,
3184 * then look for matching devices (phys_refnum) in dlist
3185 * and create appropriate device mdinfo.
3186 */
3187 struct ddf_super *ddf = st->sb;
3188 struct mdinfo *rest = NULL;
3189 struct vcl *vc;
3190
3191 for (vc = ddf->conflist ; vc ; vc=vc->next)
3192 {
f21e18ca
N
3193 unsigned int i;
3194 unsigned int j;
598f0d58 3195 struct mdinfo *this;
00bbdbda 3196 char *ep;
90fa1a29 3197 __u32 *cptr;
8a38db86 3198 unsigned int pd;
00bbdbda
N
3199
3200 if (subarray &&
3201 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3202 *ep != '\0'))
3203 continue;
3204
a5c7adb3 3205 if (vc->conf.sec_elmnt_count > 1) {
3206 if (check_secondary(vc) != 0)
3207 continue;
3208 }
3209
503975b9 3210 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3211 this->next = rest;
3212 rest = this;
3213
4e587018 3214 if (vc->conf.sec_elmnt_count == 1) {
3215 this->array.level = map_num1(ddf_level_num,
3216 vc->conf.prl);
3217 this->array.raid_disks =
3218 __be16_to_cpu(vc->conf.prim_elmnt_count);
3219 this->array.layout =
3220 rlq_to_layout(vc->conf.rlq, vc->conf.prl,
3221 this->array.raid_disks);
3222 } else {
3223 /* The only supported layout is RAID 10.
3224 * Compatibility has been checked in check_secondary()
3225 * above.
3226 */
3227 this->array.level = 10;
3228 this->array.raid_disks =
3229 __be16_to_cpu(vc->conf.prim_elmnt_count)
3230 * vc->conf.sec_elmnt_count;
3231 this->array.layout = 0x100 |
3232 __be16_to_cpu(vc->conf.prim_elmnt_count);
3233 }
598f0d58 3234 this->array.md_minor = -1;
f35f2525
N
3235 this->array.major_version = -1;
3236 this->array.minor_version = -2;
90fa1a29
JS
3237 cptr = (__u32 *)(vc->conf.guid + 16);
3238 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58
NB
3239 this->array.utime = DECADE +
3240 __be32_to_cpu(vc->conf.timestamp);
3241 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3242
59e36268 3243 i = vc->vcnum;
7a7cc504
NB
3244 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3245 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3246 DDF_init_full) {
598f0d58 3247 this->array.state = 0;
ed9d66aa
NB
3248 this->resync_start = 0;
3249 } else {
598f0d58 3250 this->array.state = 1;
b7528a20 3251 this->resync_start = MaxSector;
ed9d66aa 3252 }
db42fa9b
N
3253 memcpy(this->name, ddf->virt->entries[i].name, 16);
3254 this->name[16]=0;
3255 for(j=0; j<16; j++)
3256 if (this->name[j] == ' ')
3257 this->name[j] = 0;
598f0d58
NB
3258
3259 memset(this->uuid, 0, sizeof(this->uuid));
3260 this->component_size = __be64_to_cpu(vc->conf.blocks);
3261 this->array.size = this->component_size / 2;
5f2aace8 3262 this->container_member = i;
598f0d58 3263
c5afc314
N
3264 ddf->currentconf = vc;
3265 uuid_from_super_ddf(st, this->uuid);
3266 ddf->currentconf = NULL;
3267
60f18132 3268 sprintf(this->text_version, "/%s/%d",
4dd2df09 3269 st->container_devnm, this->container_member);
60f18132 3270
8a38db86 3271 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3272 struct mdinfo *dev;
3273 struct dl *d;
4e587018 3274 const struct vd_config *bvd;
3275 unsigned int iphys;
3276 __u64 *lba_offset;
fa033bec 3277 int stt;
598f0d58 3278
8a38db86 3279 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
bc17324f 3280 continue;
0cf5ef67
N
3281
3282 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3283 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3284 != DDF_Online)
3285 continue;
3286
8a38db86 3287 i = get_pd_index_from_refnum(
4e587018 3288 vc, ddf->phys->entries[pd].refnum,
3289 ddf->mppe, &bvd, &iphys);
8a38db86 3290 if (i == NO_SUCH_REFNUM)
3291 continue;
3292
fa033bec 3293 this->array.working_disks++;
bc17324f 3294
0cf5ef67 3295 for (d = ddf->dlist; d ; d=d->next)
8a38db86 3296 if (d->disk.refnum ==
3297 ddf->phys->entries[pd].refnum)
0cf5ef67
N
3298 break;
3299 if (d == NULL)
3300 /* Haven't found that one yet, maybe there are others */
3301 continue;
3302
503975b9 3303 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3304 dev->next = this->devs;
3305 this->devs = dev;
3306
3307 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3308 dev->disk.major = d->major;
3309 dev->disk.minor = d->minor;
3310 dev->disk.raid_disk = i;
3311 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3312 dev->recovery_start = MaxSector;
598f0d58 3313
120f7677 3314 dev->events = __be32_to_cpu(ddf->primary.seq);
4e587018 3315 lba_offset = (__u64 *)&bvd->phys_refnum[ddf->mppe];
3316 dev->data_offset = __be64_to_cpu(lba_offset[iphys]);
3317 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3318 if (d->devname)
3319 strcpy(dev->name, d->devname);
3320 }
3321 }
3322 return rest;
3323}
3324
955e9ea1 3325static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3326{
955e9ea1 3327 struct ddf_super *ddf = st->sb;
a322f70c 3328 unsigned long long dsize;
6416d527 3329 void *buf;
3d2c4fc7 3330 int rc;
a322f70c 3331
955e9ea1
DW
3332 if (!ddf)
3333 return 1;
3334
3335 /* ->dlist and ->conflist will be set for updates, currently not
3336 * supported
3337 */
3338 if (ddf->dlist || ddf->conflist)
3339 return 1;
3340
a322f70c
DW
3341 if (!get_dev_size(fd, NULL, &dsize))
3342 return 1;
3343
3d2c4fc7
DW
3344 if (posix_memalign(&buf, 512, 512) != 0)
3345 return 1;
6416d527
NB
3346 memset(buf, 0, 512);
3347
a322f70c 3348 lseek64(fd, dsize-512, 0);
3d2c4fc7 3349 rc = write(fd, buf, 512);
6416d527 3350 free(buf);
3d2c4fc7
DW
3351 if (rc < 0)
3352 return 1;
a322f70c
DW
3353 return 0;
3354}
3355
a19c88b8
NB
3356static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3357{
3358 /*
3359 * return:
3360 * 0 same, or first was empty, and second was copied
3361 * 1 second had wrong number
3362 * 2 wrong uuid
3363 * 3 wrong other info
3364 */
3365 struct ddf_super *first = st->sb;
3366 struct ddf_super *second = tst->sb;
4eefd651 3367 struct dl *dl1, *dl2;
3368 struct vcl *vl1, *vl2;
2d210697 3369 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3370
3371 if (!first) {
3372 st->sb = tst->sb;
3373 tst->sb = NULL;
3374 return 0;
3375 }
3376
3377 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3378 return 2;
3379
2d210697 3380 if (first->anchor.seq != second->anchor.seq) {
3381 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3382 __be32_to_cpu(first->anchor.seq),
3383 __be32_to_cpu(second->anchor.seq));
3384 return 3;
3385 }
3386 if (first->max_part != second->max_part ||
3387 first->phys->used_pdes != second->phys->used_pdes ||
3388 first->virt->populated_vdes != second->virt->populated_vdes) {
3389 dprintf("%s: PD/VD number mismatch\n", __func__);
3390 return 3;
3391 }
3392
3393 max_pds = __be16_to_cpu(first->phys->used_pdes);
3394 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3395 for (pd = 0; pd < max_pds; pd++)
3396 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3397 break;
3398 if (pd == max_pds) {
3399 dprintf("%s: no match for disk %08x\n", __func__,
3400 __be32_to_cpu(dl2->disk.refnum));
3401 return 3;
3402 }
3403 }
3404
3405 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3406 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3407 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3408 continue;
3409 for (vd = 0; vd < max_vds; vd++)
3410 if (!memcmp(first->virt->entries[vd].guid,
3411 vl2->conf.guid, DDF_GUID_LEN))
3412 break;
3413 if (vd == max_vds) {
3414 dprintf("%s: no match for VD config\n", __func__);
3415 return 3;
3416 }
3417 }
a19c88b8 3418 /* FIXME should I look at anything else? */
2d210697 3419
4eefd651 3420 /*
3421 At this point we are fairly sure that the meta data matches.
3422 But the new disk may contain additional local data.
3423 Add it to the super block.
3424 */
3425 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3426 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3427 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3428 DDF_GUID_LEN))
3429 break;
3430 if (vl1) {
3431 if (vl1->other_bvds != NULL &&
3432 vl1->conf.sec_elmnt_seq !=
3433 vl2->conf.sec_elmnt_seq) {
3434 dprintf("%s: adding BVD %u\n", __func__,
3435 vl2->conf.sec_elmnt_seq);
3436 add_other_bvd(vl1, &vl2->conf,
3437 first->conf_rec_len*512);
3438 }
3439 continue;
3440 }
3441
3442 if (posix_memalign((void **)&vl1, 512,
3443 (first->conf_rec_len*512 +
3444 offsetof(struct vcl, conf))) != 0) {
3445 pr_err("%s could not allocate vcl buf\n",
3446 __func__);
3447 return 3;
3448 }
3449
3450 vl1->next = first->conflist;
3451 vl1->block_sizes = NULL;
3452 if (vl1->conf.sec_elmnt_count > 1) {
3453 vl1->other_bvds = xcalloc(vl2->conf.sec_elmnt_count - 1,
3454 sizeof(struct vd_config *));
3455 } else
3456 vl1->other_bvds = NULL;
3457 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3458 vl1->lba_offset = (__u64 *)
3459 &vl1->conf.phys_refnum[first->mppe];
3460 for (vd = 0; vd < max_vds; vd++)
3461 if (!memcmp(first->virt->entries[vd].guid,
3462 vl1->conf.guid, DDF_GUID_LEN))
3463 break;
3464 vl1->vcnum = vd;
3465 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3466 first->conflist = vl1;
3467 }
3468
3469 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3470 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3471 if (dl1->disk.refnum == dl2->disk.refnum)
3472 break;
3473 if (dl1)
3474 continue;
3475
3476 if (posix_memalign((void **)&dl1, 512,
3477 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3478 != 0) {
3479 pr_err("%s could not allocate disk info buffer\n",
3480 __func__);
3481 return 3;
3482 }
3483 memcpy(dl1, dl2, sizeof(*dl1));
3484 dl1->mdupdate = NULL;
3485 dl1->next = first->dlist;
3486 dl1->fd = -1;
3487 for (pd = 0; pd < max_pds; pd++)
3488 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3489 break;
3490 dl1->pdnum = pd;
3491 if (dl2->spare) {
3492 if (posix_memalign((void **)&dl1->spare, 512,
3493 first->conf_rec_len*512) != 0) {
3494 pr_err("%s could not allocate spare info buf\n",
3495 __func__);
3496 return 3;
3497 }
3498 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3499 }
3500 for (vd = 0 ; vd < first->max_part ; vd++) {
3501 if (!dl2->vlist[vd]) {
3502 dl1->vlist[vd] = NULL;
3503 continue;
3504 }
3505 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3506 if (!memcmp(vl1->conf.guid,
3507 dl2->vlist[vd]->conf.guid,
3508 DDF_GUID_LEN))
3509 break;
3510 dl1->vlist[vd] = vl1;
3511 }
3512 }
3513 first->dlist = dl1;
3514 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3515 dl1->disk.refnum);
3516 }
3517
a19c88b8
NB
3518 return 0;
3519}
3520
0e600426 3521#ifndef MDASSEMBLE
4e5528c6
NB
3522/*
3523 * A new array 'a' has been started which claims to be instance 'inst'
3524 * within container 'c'.
3525 * We need to confirm that the array matches the metadata in 'c' so
3526 * that we don't corrupt any metadata.
3527 */
cba0191b 3528static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3529{
2c514b71 3530 dprintf("ddf: open_new %s\n", inst);
cba0191b 3531 a->info.container_member = atoi(inst);
549e9569
NB
3532 return 0;
3533}
3534
4e5528c6
NB
3535/*
3536 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3537 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3538 * clean up to the point (in sectors). If that cannot be recorded in the
3539 * metadata, then leave it as dirty.
3540 *
3541 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3542 * !global! virtual_disk.virtual_entry structure.
3543 */
01f157d7 3544static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3545{
4e5528c6
NB
3546 struct ddf_super *ddf = a->container->sb;
3547 int inst = a->info.container_member;
18a2f463 3548 int old = ddf->virt->entries[inst].state;
01f157d7
N
3549 if (consistent == 2) {
3550 /* Should check if a recovery should be started FIXME */
3551 consistent = 1;
b7941fd6 3552 if (!is_resync_complete(&a->info))
01f157d7
N
3553 consistent = 0;
3554 }
ed9d66aa
NB
3555 if (consistent)
3556 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3557 else
4e5528c6 3558 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463
NB
3559 if (old != ddf->virt->entries[inst].state)
3560 ddf->updates_pending = 1;
3561
3562 old = ddf->virt->entries[inst].init_state;
ed9d66aa 3563 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 3564 if (is_resync_complete(&a->info))
ed9d66aa 3565 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 3566 else if (a->info.resync_start == 0)
ed9d66aa 3567 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 3568 else
ed9d66aa 3569 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463
NB
3570 if (old != ddf->virt->entries[inst].init_state)
3571 ddf->updates_pending = 1;
ed9d66aa 3572
2c514b71 3573 dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
b7941fd6 3574 a->info.resync_start);
01f157d7 3575 return consistent;
fd7cde1b
DW
3576}
3577
e1316fab 3578#define container_of(ptr, type, member) ({ \
5d500228
N
3579 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
3580 (type *)( (char *)__mptr - offsetof(type,member) );})
7a7cc504
NB
3581/*
3582 * The state of each disk is stored in the global phys_disk structure
3583 * in phys_disk.entries[n].state.
3584 * This makes various combinations awkward.
3585 * - When a device fails in any array, it must be failed in all arrays
3586 * that include a part of this device.
3587 * - When a component is rebuilding, we cannot include it officially in the
3588 * array unless this is the only array that uses the device.
3589 *
3590 * So: when transitioning:
3591 * Online -> failed, just set failed flag. monitor will propagate
3592 * spare -> online, the device might need to be added to the array.
3593 * spare -> failed, just set failed. Don't worry if in array or not.
3594 */
8d45d196 3595static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 3596{
7a7cc504 3597 struct ddf_super *ddf = a->container->sb;
f21e18ca 3598 unsigned int inst = a->info.container_member;
7a7cc504
NB
3599 struct vd_config *vc = find_vdcr(ddf, inst);
3600 int pd = find_phys(ddf, vc->phys_refnum[n]);
3601 int i, st, working;
e1316fab
N
3602 struct mdinfo *mdi;
3603 struct dl *dl;
7a7cc504
NB
3604
3605 if (vc == NULL) {
2c514b71 3606 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
3607 return;
3608 }
e1316fab
N
3609 /* Find the matching slot in 'info'. */
3610 for (mdi = a->info.devs; mdi; mdi = mdi->next)
3611 if (mdi->disk.raid_disk == n)
3612 break;
3613 if (!mdi)
3614 return;
3615
3616 /* and find the 'dl' entry corresponding to that. */
3617 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
3618 if (mdi->state_fd >= 0 &&
3619 mdi->disk.major == dl->major &&
e1316fab
N
3620 mdi->disk.minor == dl->minor)
3621 break;
3622 if (!dl)
3623 return;
3624
3625 if (pd < 0 || pd != dl->pdnum) {
3626 /* disk doesn't currently exist or has changed.
3627 * If it is now in_sync, insert it. */
7a7cc504 3628 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
e1316fab
N
3629 struct vcl *vcl;
3630 pd = dl->pdnum;
3631 vc->phys_refnum[n] = dl->disk.refnum;
3632 vcl = container_of(vc, struct vcl, conf);
3633 vcl->lba_offset[n] = mdi->data_offset;
3634 ddf->phys->entries[pd].type &=
3635 ~__cpu_to_be16(DDF_Global_Spare);
3636 ddf->phys->entries[pd].type |=
3637 __cpu_to_be16(DDF_Active_in_VD);
3638 ddf->updates_pending = 1;
7a7cc504
NB
3639 }
3640 } else {
18a2f463 3641 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
3642 if (state & DS_FAULTY)
3643 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
3644 if (state & DS_INSYNC) {
3645 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
3646 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
3647 }
18a2f463
NB
3648 if (old != ddf->phys->entries[pd].state)
3649 ddf->updates_pending = 1;
7a7cc504
NB
3650 }
3651
2c514b71 3652 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 3653
7a7cc504
NB
3654 /* Now we need to check the state of the array and update
3655 * virtual_disk.entries[n].state.
3656 * It needs to be one of "optimal", "degraded", "failed".
3657 * I don't understand 'deleted' or 'missing'.
3658 */
3659 working = 0;
3660 for (i=0; i < a->info.array.raid_disks; i++) {
3661 pd = find_phys(ddf, vc->phys_refnum[i]);
3662 if (pd < 0)
3663 continue;
57632f4a
NB
3664 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3665 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
7a7cc504
NB
3666 == DDF_Online)
3667 working++;
3668 }
3669 state = DDF_state_degraded;
3670 if (working == a->info.array.raid_disks)
3671 state = DDF_state_optimal;
3672 else switch(vc->prl) {
613b0d17
N
3673 case DDF_RAID0:
3674 case DDF_CONCAT:
3675 case DDF_JBOD:
7a7cc504 3676 state = DDF_state_failed;
613b0d17
N
3677 break;
3678 case DDF_RAID1:
3679 if (working == 0)
3680 state = DDF_state_failed;
3681 else if (working == 2 && state == DDF_state_degraded)
3682 state = DDF_state_part_optimal;
3683 break;
3684 case DDF_RAID4:
3685 case DDF_RAID5:
3686 if (working < a->info.array.raid_disks-1)
3687 state = DDF_state_failed;
3688 break;
3689 case DDF_RAID6:
3690 if (working < a->info.array.raid_disks-2)
3691 state = DDF_state_failed;
3692 else if (working == a->info.array.raid_disks-1)
3693 state = DDF_state_part_optimal;
3694 break;
3695 }
7a7cc504 3696
18a2f463
NB
3697 if (ddf->virt->entries[inst].state !=
3698 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
3699 | state)) {
3700
3701 ddf->virt->entries[inst].state =
3702 (ddf->virt->entries[inst].state & ~DDF_state_mask)
3703 | state;
3704 ddf->updates_pending = 1;
3705 }
7a7cc504 3706
549e9569
NB
3707}
3708
2e735d19 3709static void ddf_sync_metadata(struct supertype *st)
549e9569 3710{
7a7cc504
NB
3711
3712 /*
3713 * Write all data to all devices.
3714 * Later, we might be able to track whether only local changes
3715 * have been made, or whether any global data has been changed,
3716 * but ddf is sufficiently weird that it probably always
3717 * changes global data ....
3718 */
18a2f463
NB
3719 struct ddf_super *ddf = st->sb;
3720 if (!ddf->updates_pending)
3721 return;
3722 ddf->updates_pending = 0;
1cc7f4fe 3723 __write_init_super_ddf(st);
2c514b71 3724 dprintf("ddf: sync_metadata\n");
549e9569
NB
3725}
3726
88c164f4
NB
3727static void ddf_process_update(struct supertype *st,
3728 struct metadata_update *update)
3729{
3730 /* Apply this update to the metadata.
3731 * The first 4 bytes are a DDF_*_MAGIC which guides
3732 * our actions.
3733 * Possible update are:
3734 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
3735 * Add a new physical device or remove an old one.
3736 * Changes to this record only happen implicitly.
88c164f4
NB
3737 * used_pdes is the device number.
3738 * DDF_VIRT_RECORDS_MAGIC
3739 * Add a new VD. Possibly also change the 'access' bits.
3740 * populated_vdes is the entry number.
3741 * DDF_VD_CONF_MAGIC
3742 * New or updated VD. the VIRT_RECORD must already
3743 * exist. For an update, phys_refnum and lba_offset
3744 * (at least) are updated, and the VD_CONF must
3745 * be written to precisely those devices listed with
3746 * a phys_refnum.
3747 * DDF_SPARE_ASSIGN_MAGIC
3748 * replacement Spare Assignment Record... but for which device?
3749 *
3750 * So, e.g.:
3751 * - to create a new array, we send a VIRT_RECORD and
3752 * a VD_CONF. Then assemble and start the array.
3753 * - to activate a spare we send a VD_CONF to add the phys_refnum
3754 * and offset. This will also mark the spare as active with
3755 * a spare-assignment record.
3756 */
3757 struct ddf_super *ddf = st->sb;
3758 __u32 *magic = (__u32*)update->buf;
3759 struct phys_disk *pd;
3760 struct virtual_disk *vd;
3761 struct vd_config *vc;
3762 struct vcl *vcl;
3763 struct dl *dl;
f21e18ca
N
3764 unsigned int mppe;
3765 unsigned int ent;
c7079c84 3766 unsigned int pdnum, pd2;
88c164f4 3767
2c514b71 3768 dprintf("Process update %x\n", *magic);
7e1432fb 3769
88c164f4
NB
3770 switch (*magic) {
3771 case DDF_PHYS_RECORDS_MAGIC:
3772
3773 if (update->len != (sizeof(struct phys_disk) +
3774 sizeof(struct phys_disk_entry)))
3775 return;
3776 pd = (struct phys_disk*)update->buf;
3777
3778 ent = __be16_to_cpu(pd->used_pdes);
3779 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
3780 return;
4dd968cc
N
3781 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
3782 struct dl **dlp;
3783 /* removing this disk. */
3784 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
3785 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
3786 struct dl *dl = *dlp;
3787 if (dl->pdnum == (signed)ent) {
3788 close(dl->fd);
3789 dl->fd = -1;
3790 /* FIXME this doesn't free
3791 * dl->devname */
3792 update->space = dl;
3793 *dlp = dl->next;
3794 break;
3795 }
3796 }
3797 ddf->updates_pending = 1;
3798 return;
3799 }
88c164f4
NB
3800 if (!all_ff(ddf->phys->entries[ent].guid))
3801 return;
3802 ddf->phys->entries[ent] = pd->entries[0];
3803 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 3804 __be16_to_cpu(ddf->phys->used_pdes));
18a2f463 3805 ddf->updates_pending = 1;
2cc2983d
N
3806 if (ddf->add_list) {
3807 struct active_array *a;
3808 struct dl *al = ddf->add_list;
3809 ddf->add_list = al->next;
3810
3811 al->next = ddf->dlist;
3812 ddf->dlist = al;
3813
3814 /* As a device has been added, we should check
3815 * for any degraded devices that might make
3816 * use of this spare */
3817 for (a = st->arrays ; a; a=a->next)
3818 a->check_degraded = 1;
3819 }
88c164f4
NB
3820 break;
3821
3822 case DDF_VIRT_RECORDS_MAGIC:
3823
3824 if (update->len != (sizeof(struct virtual_disk) +
3825 sizeof(struct virtual_entry)))
3826 return;
3827 vd = (struct virtual_disk*)update->buf;
3828
3829 ent = __be16_to_cpu(vd->populated_vdes);
3830 if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
3831 return;
3832 if (!all_ff(ddf->virt->entries[ent].guid))
3833 return;
3834 ddf->virt->entries[ent] = vd->entries[0];
3835 ddf->virt->populated_vdes = __cpu_to_be16(1 +
613b0d17 3836 __be16_to_cpu(ddf->virt->populated_vdes));
18a2f463 3837 ddf->updates_pending = 1;
88c164f4
NB
3838 break;
3839
3840 case DDF_VD_CONF_MAGIC:
2c514b71 3841 dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
88c164f4
NB
3842
3843 mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
f21e18ca 3844 if ((unsigned)update->len != ddf->conf_rec_len * 512)
88c164f4
NB
3845 return;
3846 vc = (struct vd_config*)update->buf;
3847 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
3848 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
3849 break;
2c514b71 3850 dprintf("vcl = %p\n", vcl);
88c164f4
NB
3851 if (vcl) {
3852 /* An update, just copy the phys_refnum and lba_offset
3853 * fields
3854 */
3855 memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
3856 mppe * (sizeof(__u32) + sizeof(__u64)));
3857 } else {
3858 /* A new VD_CONF */
e6b9548d
DW
3859 if (!update->space)
3860 return;
88c164f4
NB
3861 vcl = update->space;
3862 update->space = NULL;
3863 vcl->next = ddf->conflist;
edd8d13c 3864 memcpy(&vcl->conf, vc, update->len);
88c164f4
NB
3865 vcl->lba_offset = (__u64*)
3866 &vcl->conf.phys_refnum[mppe];
1502a43a
N
3867 for (ent = 0;
3868 ent < __be16_to_cpu(ddf->virt->populated_vdes);
3869 ent++)
3870 if (memcmp(vc->guid, ddf->virt->entries[ent].guid,
3871 DDF_GUID_LEN) == 0) {
3872 vcl->vcnum = ent;
3873 break;
3874 }
88c164f4
NB
3875 ddf->conflist = vcl;
3876 }
c7079c84
N
3877 /* Set DDF_Transition on all Failed devices - to help
3878 * us detect those that are no longer in use
3879 */
3880 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
3881 if (ddf->phys->entries[pdnum].state
3882 & __be16_to_cpu(DDF_Failed))
3883 ddf->phys->entries[pdnum].state
3884 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
3885 /* Now make sure vlist is correct for each dl. */
3886 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca
N
3887 unsigned int dn;
3888 unsigned int vn = 0;
8401644c 3889 int in_degraded = 0;
88c164f4
NB
3890 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
3891 for (dn=0; dn < ddf->mppe ; dn++)
3892 if (vcl->conf.phys_refnum[dn] ==
3893 dl->disk.refnum) {
8401644c 3894 int vstate;
2c514b71
NB
3895 dprintf("dev %d has %p at %d\n",
3896 dl->pdnum, vcl, vn);
c7079c84
N
3897 /* Clear the Transition flag */
3898 if (ddf->phys->entries[dl->pdnum].state
3899 & __be16_to_cpu(DDF_Failed))
3900 ddf->phys->entries[dl->pdnum].state &=
3901 ~__be16_to_cpu(DDF_Transition);
3902
88c164f4 3903 dl->vlist[vn++] = vcl;
8401644c
N
3904 vstate = ddf->virt->entries[vcl->vcnum].state
3905 & DDF_state_mask;
3906 if (vstate == DDF_state_degraded ||
3907 vstate == DDF_state_part_optimal)
3908 in_degraded = 1;
88c164f4
NB
3909 break;
3910 }
3911 while (vn < ddf->max_part)
3912 dl->vlist[vn++] = NULL;
7e1432fb
NB
3913 if (dl->vlist[0]) {
3914 ddf->phys->entries[dl->pdnum].type &=
3915 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
3916 if (!(ddf->phys->entries[dl->pdnum].type &
3917 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
3918 ddf->phys->entries[dl->pdnum].type |=
3919 __cpu_to_be16(DDF_Active_in_VD);
3920 if (in_degraded)
3921 ddf->phys->entries[dl->pdnum].state |=
3922 __cpu_to_be16(DDF_Rebuilding);
3923 }
7e1432fb
NB
3924 }
3925 if (dl->spare) {
3926 ddf->phys->entries[dl->pdnum].type &=
3927 ~__cpu_to_be16(DDF_Global_Spare);
3928 ddf->phys->entries[dl->pdnum].type |=
3929 __cpu_to_be16(DDF_Spare);
3930 }
3931 if (!dl->vlist[0] && !dl->spare) {
3932 ddf->phys->entries[dl->pdnum].type |=
3933 __cpu_to_be16(DDF_Global_Spare);
3934 ddf->phys->entries[dl->pdnum].type &=
3935 ~__cpu_to_be16(DDF_Spare |
3936 DDF_Active_in_VD);
3937 }
88c164f4 3938 }
c7079c84
N
3939
3940 /* Now remove any 'Failed' devices that are not part
3941 * of any VD. They will have the Transition flag set.
3942 * Once done, we need to update all dl->pdnum numbers.
3943 */
3944 pd2 = 0;
3945 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
3946 if ((ddf->phys->entries[pdnum].state
3947 & __be16_to_cpu(DDF_Failed))
3948 && (ddf->phys->entries[pdnum].state
3949 & __be16_to_cpu(DDF_Transition)))
3950 /* skip this one */;
3951 else if (pdnum == pd2)
3952 pd2++;
3953 else {
3954 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
3955 for (dl = ddf->dlist; dl; dl = dl->next)
3956 if (dl->pdnum == (int)pdnum)
3957 dl->pdnum = pd2;
3958 pd2++;
3959 }
3960 ddf->phys->used_pdes = __cpu_to_be16(pd2);
3961 while (pd2 < pdnum) {
3962 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
3963 pd2++;
3964 }
3965
18a2f463 3966 ddf->updates_pending = 1;
88c164f4
NB
3967 break;
3968 case DDF_SPARE_ASSIGN_MAGIC:
3969 default: break;
3970 }
3971}
3972
edd8d13c
NB
3973static void ddf_prepare_update(struct supertype *st,
3974 struct metadata_update *update)
3975{
3976 /* This update arrived at managemon.
3977 * We are about to pass it to monitor.
3978 * If a malloc is needed, do it here.
3979 */
3980 struct ddf_super *ddf = st->sb;
3981 __u32 *magic = (__u32*)update->buf;
3982 if (*magic == DDF_VD_CONF_MAGIC)
e6b9548d 3983 if (posix_memalign(&update->space, 512,
613b0d17
N
3984 offsetof(struct vcl, conf)
3985 + ddf->conf_rec_len * 512) != 0)
e6b9548d 3986 update->space = NULL;
edd8d13c
NB
3987}
3988
7e1432fb
NB
3989/*
3990 * Check if the array 'a' is degraded but not failed.
3991 * If it is, find as many spares as are available and needed and
3992 * arrange for their inclusion.
3993 * We only choose devices which are not already in the array,
3994 * and prefer those with a spare-assignment to this array.
3995 * otherwise we choose global spares - assuming always that
3996 * there is enough room.
3997 * For each spare that we assign, we return an 'mdinfo' which
3998 * describes the position for the device in the array.
3999 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4000 * the new phys_refnum and lba_offset values.
4001 *
4002 * Only worry about BVDs at the moment.
4003 */
4004static struct mdinfo *ddf_activate_spare(struct active_array *a,
4005 struct metadata_update **updates)
4006{
4007 int working = 0;
4008 struct mdinfo *d;
4009 struct ddf_super *ddf = a->container->sb;
4010 int global_ok = 0;
4011 struct mdinfo *rv = NULL;
4012 struct mdinfo *di;
4013 struct metadata_update *mu;
4014 struct dl *dl;
4015 int i;
4016 struct vd_config *vc;
4017 __u64 *lba;
4018
7e1432fb
NB
4019 for (d = a->info.devs ; d ; d = d->next) {
4020 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4021 d->state_fd >= 0)
7e1432fb
NB
4022 /* wait for Removal to happen */
4023 return NULL;
4024 if (d->state_fd >= 0)
4025 working ++;
4026 }
4027
2c514b71
NB
4028 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4029 a->info.array.level);
7e1432fb
NB
4030 if (working == a->info.array.raid_disks)
4031 return NULL; /* array not degraded */
4032 switch (a->info.array.level) {
4033 case 1:
4034 if (working == 0)
4035 return NULL; /* failed */
4036 break;
4037 case 4:
4038 case 5:
4039 if (working < a->info.array.raid_disks - 1)
4040 return NULL; /* failed */
4041 break;
4042 case 6:
4043 if (working < a->info.array.raid_disks - 2)
4044 return NULL; /* failed */
4045 break;
4046 default: /* concat or stripe */
4047 return NULL; /* failed */
4048 }
4049
4050 /* For each slot, if it is not working, find a spare */
4051 dl = ddf->dlist;
4052 for (i = 0; i < a->info.array.raid_disks; i++) {
4053 for (d = a->info.devs ; d ; d = d->next)
4054 if (d->disk.raid_disk == i)
4055 break;
2c514b71 4056 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4057 if (d && (d->state_fd >= 0))
4058 continue;
4059
4060 /* OK, this device needs recovery. Find a spare */
4061 again:
4062 for ( ; dl ; dl = dl->next) {
4063 unsigned long long esize;
4064 unsigned long long pos;
4065 struct mdinfo *d2;
4066 int is_global = 0;
4067 int is_dedicated = 0;
4068 struct extent *ex;
f21e18ca 4069 unsigned int j;
7e1432fb
NB
4070 /* If in this array, skip */
4071 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4072 if (d2->state_fd >= 0 &&
4073 d2->disk.major == dl->major &&
7e1432fb 4074 d2->disk.minor == dl->minor) {
2c514b71 4075 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4076 break;
4077 }
4078 if (d2)
4079 continue;
4080 if (ddf->phys->entries[dl->pdnum].type &
4081 __cpu_to_be16(DDF_Spare)) {
4082 /* Check spare assign record */
4083 if (dl->spare) {
4084 if (dl->spare->type & DDF_spare_dedicated) {
4085 /* check spare_ents for guid */
4086 for (j = 0 ;
4087 j < __be16_to_cpu(dl->spare->populated);
4088 j++) {
4089 if (memcmp(dl->spare->spare_ents[j].guid,
4090 ddf->virt->entries[a->info.container_member].guid,
4091 DDF_GUID_LEN) == 0)
4092 is_dedicated = 1;
4093 }
4094 } else
4095 is_global = 1;
4096 }
4097 } else if (ddf->phys->entries[dl->pdnum].type &
4098 __cpu_to_be16(DDF_Global_Spare)) {
4099 is_global = 1;
e0e7aeaa
N
4100 } else if (!(ddf->phys->entries[dl->pdnum].state &
4101 __cpu_to_be16(DDF_Failed))) {
4102 /* we can possibly use some of this */
4103 is_global = 1;
7e1432fb
NB
4104 }
4105 if ( ! (is_dedicated ||
4106 (is_global && global_ok))) {
2c514b71 4107 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4108 is_dedicated, is_global);
7e1432fb
NB
4109 continue;
4110 }
4111
4112 /* We are allowed to use this device - is there space?
4113 * We need a->info.component_size sectors */
4114 ex = get_extents(ddf, dl);
4115 if (!ex) {
2c514b71 4116 dprintf("cannot get extents\n");
7e1432fb
NB
4117 continue;
4118 }
4119 j = 0; pos = 0;
4120 esize = 0;
4121
4122 do {
4123 esize = ex[j].start - pos;
4124 if (esize >= a->info.component_size)
4125 break;
e5cc7d46
N
4126 pos = ex[j].start + ex[j].size;
4127 j++;
4128 } while (ex[j-1].size);
7e1432fb
NB
4129
4130 free(ex);
4131 if (esize < a->info.component_size) {
e5cc7d46
N
4132 dprintf("%x:%x has no room: %llu %llu\n",
4133 dl->major, dl->minor,
2c514b71 4134 esize, a->info.component_size);
7e1432fb
NB
4135 /* No room */
4136 continue;
4137 }
4138
4139 /* Cool, we have a device with some space at pos */
503975b9 4140 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4141 di->disk.number = i;
4142 di->disk.raid_disk = i;
4143 di->disk.major = dl->major;
4144 di->disk.minor = dl->minor;
4145 di->disk.state = 0;
d23534e4 4146 di->recovery_start = 0;
7e1432fb
NB
4147 di->data_offset = pos;
4148 di->component_size = a->info.component_size;
4149 di->container_member = dl->pdnum;
4150 di->next = rv;
4151 rv = di;
2c514b71
NB
4152 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4153 i, pos);
7e1432fb
NB
4154
4155 break;
4156 }
4157 if (!dl && ! global_ok) {
4158 /* not enough dedicated spares, try global */
4159 global_ok = 1;
4160 dl = ddf->dlist;
4161 goto again;
4162 }
4163 }
4164
4165 if (!rv)
4166 /* No spares found */
4167 return rv;
4168 /* Now 'rv' has a list of devices to return.
4169 * Create a metadata_update record to update the
4170 * phys_refnum and lba_offset values
4171 */
503975b9
N
4172 mu = xmalloc(sizeof(*mu));
4173 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4174 free(mu);
4175 mu = NULL;
4176 }
503975b9 4177 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4178 mu->len = ddf->conf_rec_len * 512;
4179 mu->space = NULL;
f50ae22e 4180 mu->space_list = NULL;
7e1432fb
NB
4181 mu->next = *updates;
4182 vc = find_vdcr(ddf, a->info.container_member);
4183 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4184
4185 vc = (struct vd_config*)mu->buf;
4186 lba = (__u64*)&vc->phys_refnum[ddf->mppe];
4187 for (di = rv ; di ; di = di->next) {
4188 vc->phys_refnum[di->disk.raid_disk] =
4189 ddf->phys->entries[dl->pdnum].refnum;
4190 lba[di->disk.raid_disk] = di->data_offset;
4191 }
4192 *updates = mu;
4193 return rv;
4194}
0e600426 4195#endif /* MDASSEMBLE */
7e1432fb 4196
b640a252
N
4197static int ddf_level_to_layout(int level)
4198{
4199 switch(level) {
4200 case 0:
4201 case 1:
4202 return 0;
4203 case 5:
4204 return ALGORITHM_LEFT_SYMMETRIC;
4205 case 6:
4206 return ALGORITHM_ROTATING_N_CONTINUE;
4207 case 10:
4208 return 0x102;
4209 default:
4210 return UnSet;
4211 }
4212}
4213
30f58b22
DW
4214static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4215{
4216 if (level && *level == UnSet)
4217 *level = LEVEL_CONTAINER;
4218
4219 if (level && layout && *layout == UnSet)
4220 *layout = ddf_level_to_layout(*level);
4221}
4222
a322f70c
DW
4223struct superswitch super_ddf = {
4224#ifndef MDASSEMBLE
4225 .examine_super = examine_super_ddf,
4226 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4227 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4228 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4229 .detail_super = detail_super_ddf,
4230 .brief_detail_super = brief_detail_super_ddf,
4231 .validate_geometry = validate_geometry_ddf,
78e44928 4232 .write_init_super = write_init_super_ddf,
0e600426 4233 .add_to_super = add_to_super_ddf,
4dd968cc 4234 .remove_from_super = remove_from_super_ddf,
2b959fbf 4235 .load_container = load_container_ddf,
a322f70c
DW
4236#endif
4237 .match_home = match_home_ddf,
4238 .uuid_from_super= uuid_from_super_ddf,
4239 .getinfo_super = getinfo_super_ddf,
4240 .update_super = update_super_ddf,
4241
4242 .avail_size = avail_size_ddf,
4243
a19c88b8
NB
4244 .compare_super = compare_super_ddf,
4245
a322f70c 4246 .load_super = load_super_ddf,
ba7eb04f 4247 .init_super = init_super_ddf,
955e9ea1 4248 .store_super = store_super_ddf,
a322f70c
DW
4249 .free_super = free_super_ddf,
4250 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4251 .container_content = container_content_ddf,
30f58b22 4252 .default_geometry = default_geometry_ddf,
a322f70c 4253
a322f70c 4254 .external = 1,
549e9569 4255
0e600426 4256#ifndef MDASSEMBLE
549e9569
NB
4257/* for mdmon */
4258 .open_new = ddf_open_new,
ed9d66aa 4259 .set_array_state= ddf_set_array_state,
549e9569
NB
4260 .set_disk = ddf_set_disk,
4261 .sync_metadata = ddf_sync_metadata,
88c164f4 4262 .process_update = ddf_process_update,
edd8d13c 4263 .prepare_update = ddf_prepare_update,
7e1432fb 4264 .activate_spare = ddf_activate_spare,
0e600426 4265#endif
4cce4069 4266 .name = "ddf",
a322f70c 4267};