]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: compare_super_ddf: merge local info of other superblock
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
47/* The DDF metadata handling.
48 * DDF metadata lives at the end of the device.
49 * The last 512 byte block provides an 'anchor' which is used to locate
50 * the rest of the metadata which usually lives immediately behind the anchor.
51 *
52 * Note:
53 * - all multibyte numeric fields are bigendian.
54 * - all strings are space padded.
55 *
56 */
57
58/* Primary Raid Level (PRL) */
59#define DDF_RAID0 0x00
60#define DDF_RAID1 0x01
61#define DDF_RAID3 0x03
62#define DDF_RAID4 0x04
63#define DDF_RAID5 0x05
64#define DDF_RAID1E 0x11
65#define DDF_JBOD 0x0f
66#define DDF_CONCAT 0x1f
67#define DDF_RAID5E 0x15
68#define DDF_RAID5EE 0x25
59e36268 69#define DDF_RAID6 0x06
a322f70c
DW
70
71/* Raid Level Qualifier (RLQ) */
72#define DDF_RAID0_SIMPLE 0x00
73#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
74#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
75#define DDF_RAID3_0 0x00 /* parity in first extent */
76#define DDF_RAID3_N 0x01 /* parity in last extent */
77#define DDF_RAID4_0 0x00 /* parity in first extent */
78#define DDF_RAID4_N 0x01 /* parity in last extent */
79/* these apply to raid5e and raid5ee as well */
80#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 81#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
82#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
83#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
84
85#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
86#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
87
88/* Secondary RAID Level (SRL) */
89#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
90#define DDF_2MIRRORED 0x01
91#define DDF_2CONCAT 0x02
92#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
93
94/* Magic numbers */
95#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
96#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
97#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
98#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
99#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
100#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
101#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
102#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
103#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
104#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
105
106#define DDF_GUID_LEN 24
59e36268
NB
107#define DDF_REVISION_0 "01.00.00"
108#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
109
110struct ddf_header {
88c164f4 111 __u32 magic; /* DDF_HEADER_MAGIC */
a322f70c
DW
112 __u32 crc;
113 char guid[DDF_GUID_LEN];
59e36268 114 char revision[8]; /* 01.02.00 */
a322f70c
DW
115 __u32 seq; /* starts at '1' */
116 __u32 timestamp;
117 __u8 openflag;
118 __u8 foreignflag;
119 __u8 enforcegroups;
120 __u8 pad0; /* 0xff */
121 __u8 pad1[12]; /* 12 * 0xff */
122 /* 64 bytes so far */
123 __u8 header_ext[32]; /* reserved: fill with 0xff */
124 __u64 primary_lba;
125 __u64 secondary_lba;
126 __u8 type;
127 __u8 pad2[3]; /* 0xff */
128 __u32 workspace_len; /* sectors for vendor space -
129 * at least 32768(sectors) */
130 __u64 workspace_lba;
131 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
132 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
133 __u16 max_partitions; /* i.e. max num of configuration
134 record entries per disk */
135 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
136 *12/512) */
137 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
138 __u8 pad3[54]; /* 0xff */
139 /* 192 bytes so far */
140 __u32 controller_section_offset;
141 __u32 controller_section_length;
142 __u32 phys_section_offset;
143 __u32 phys_section_length;
144 __u32 virt_section_offset;
145 __u32 virt_section_length;
146 __u32 config_section_offset;
147 __u32 config_section_length;
148 __u32 data_section_offset;
149 __u32 data_section_length;
150 __u32 bbm_section_offset;
151 __u32 bbm_section_length;
152 __u32 diag_space_offset;
153 __u32 diag_space_length;
154 __u32 vendor_offset;
155 __u32 vendor_length;
156 /* 256 bytes so far */
157 __u8 pad4[256]; /* 0xff */
158};
159
160/* type field */
161#define DDF_HEADER_ANCHOR 0x00
162#define DDF_HEADER_PRIMARY 0x01
163#define DDF_HEADER_SECONDARY 0x02
164
165/* The content of the 'controller section' - global scope */
166struct ddf_controller_data {
88c164f4 167 __u32 magic; /* DDF_CONTROLLER_MAGIC */
a322f70c
DW
168 __u32 crc;
169 char guid[DDF_GUID_LEN];
170 struct controller_type {
171 __u16 vendor_id;
172 __u16 device_id;
173 __u16 sub_vendor_id;
174 __u16 sub_device_id;
175 } type;
176 char product_id[16];
177 __u8 pad[8]; /* 0xff */
178 __u8 vendor_data[448];
179};
180
181/* The content of phys_section - global scope */
182struct phys_disk {
88c164f4 183 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
a322f70c
DW
184 __u32 crc;
185 __u16 used_pdes;
186 __u16 max_pdes;
187 __u8 pad[52];
188 struct phys_disk_entry {
189 char guid[DDF_GUID_LEN];
190 __u32 refnum;
191 __u16 type;
192 __u16 state;
193 __u64 config_size; /* DDF structures must be after here */
194 char path[18]; /* another horrible structure really */
195 __u8 pad[6];
196 } entries[0];
197};
198
199/* phys_disk_entry.type is a bitmap - bigendian remember */
200#define DDF_Forced_PD_GUID 1
201#define DDF_Active_in_VD 2
88c164f4 202#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
203#define DDF_Spare 8 /* overrides Global_spare */
204#define DDF_Foreign 16
205#define DDF_Legacy 32 /* no DDF on this device */
206
207#define DDF_Interface_mask 0xf00
208#define DDF_Interface_SCSI 0x100
209#define DDF_Interface_SAS 0x200
210#define DDF_Interface_SATA 0x300
211#define DDF_Interface_FC 0x400
212
213/* phys_disk_entry.state is a bigendian bitmap */
214#define DDF_Online 1
215#define DDF_Failed 2 /* overrides 1,4,8 */
216#define DDF_Rebuilding 4
217#define DDF_Transition 8
218#define DDF_SMART 16
219#define DDF_ReadErrors 32
220#define DDF_Missing 64
221
222/* The content of the virt_section global scope */
223struct virtual_disk {
88c164f4 224 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
a322f70c
DW
225 __u32 crc;
226 __u16 populated_vdes;
227 __u16 max_vdes;
228 __u8 pad[52];
229 struct virtual_entry {
230 char guid[DDF_GUID_LEN];
231 __u16 unit;
232 __u16 pad0; /* 0xffff */
233 __u16 guid_crc;
234 __u16 type;
235 __u8 state;
236 __u8 init_state;
237 __u8 pad1[14];
238 char name[16];
239 } entries[0];
240};
241
242/* virtual_entry.type is a bitmap - bigendian */
243#define DDF_Shared 1
244#define DDF_Enforce_Groups 2
245#define DDF_Unicode 4
246#define DDF_Owner_Valid 8
247
248/* virtual_entry.state is a bigendian bitmap */
249#define DDF_state_mask 0x7
250#define DDF_state_optimal 0x0
251#define DDF_state_degraded 0x1
252#define DDF_state_deleted 0x2
253#define DDF_state_missing 0x3
254#define DDF_state_failed 0x4
7a7cc504 255#define DDF_state_part_optimal 0x5
a322f70c
DW
256
257#define DDF_state_morphing 0x8
258#define DDF_state_inconsistent 0x10
259
260/* virtual_entry.init_state is a bigendian bitmap */
261#define DDF_initstate_mask 0x03
262#define DDF_init_not 0x00
7a7cc504
NB
263#define DDF_init_quick 0x01 /* initialisation is progress.
264 * i.e. 'state_inconsistent' */
a322f70c
DW
265#define DDF_init_full 0x02
266
267#define DDF_access_mask 0xc0
268#define DDF_access_rw 0x00
269#define DDF_access_ro 0x80
270#define DDF_access_blocked 0xc0
271
272/* The content of the config_section - local scope
273 * It has multiple records each config_record_len sectors
274 * They can be vd_config or spare_assign
275 */
276
277struct vd_config {
88c164f4 278 __u32 magic; /* DDF_VD_CONF_MAGIC */
a322f70c
DW
279 __u32 crc;
280 char guid[DDF_GUID_LEN];
281 __u32 timestamp;
282 __u32 seqnum;
283 __u8 pad0[24];
284 __u16 prim_elmnt_count;
285 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
286 __u8 prl;
287 __u8 rlq;
288 __u8 sec_elmnt_count;
289 __u8 sec_elmnt_seq;
290 __u8 srl;
598f0d58
NB
291 __u64 blocks; /* blocks per component could be different
292 * on different component devices...(only
293 * for concat I hope) */
294 __u64 array_blocks; /* blocks in array */
a322f70c
DW
295 __u8 pad1[8];
296 __u32 spare_refs[8];
297 __u8 cache_pol[8];
298 __u8 bg_rate;
299 __u8 pad2[3];
300 __u8 pad3[52];
301 __u8 pad4[192];
302 __u8 v0[32]; /* reserved- 0xff */
303 __u8 v1[32]; /* reserved- 0xff */
304 __u8 v2[16]; /* reserved- 0xff */
305 __u8 v3[16]; /* reserved- 0xff */
306 __u8 vendor[32];
307 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
308 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
309 bvd are always the same size */
310};
311
312/* vd_config.cache_pol[7] is a bitmap */
313#define DDF_cache_writeback 1 /* else writethrough */
314#define DDF_cache_wadaptive 2 /* only applies if writeback */
315#define DDF_cache_readahead 4
316#define DDF_cache_radaptive 8 /* only if doing read-ahead */
317#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
318#define DDF_cache_wallowed 32 /* enable write caching */
319#define DDF_cache_rallowed 64 /* enable read caching */
320
321struct spare_assign {
88c164f4 322 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
a322f70c
DW
323 __u32 crc;
324 __u32 timestamp;
325 __u8 reserved[7];
326 __u8 type;
327 __u16 populated; /* SAEs used */
328 __u16 max; /* max SAEs */
329 __u8 pad[8];
330 struct spare_assign_entry {
331 char guid[DDF_GUID_LEN];
332 __u16 secondary_element;
333 __u8 pad[6];
334 } spare_ents[0];
335};
336/* spare_assign.type is a bitmap */
337#define DDF_spare_dedicated 0x1 /* else global */
338#define DDF_spare_revertible 0x2 /* else committable */
339#define DDF_spare_active 0x4 /* else not active */
340#define DDF_spare_affinity 0x8 /* enclosure affinity */
341
342/* The data_section contents - local scope */
343struct disk_data {
88c164f4 344 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
a322f70c
DW
345 __u32 crc;
346 char guid[DDF_GUID_LEN];
347 __u32 refnum; /* crc of some magic drive data ... */
348 __u8 forced_ref; /* set when above was not result of magic */
349 __u8 forced_guid; /* set if guid was forced rather than magic */
350 __u8 vendor[32];
351 __u8 pad[442];
352};
353
354/* bbm_section content */
355struct bad_block_log {
356 __u32 magic;
357 __u32 crc;
358 __u16 entry_count;
359 __u32 spare_count;
360 __u8 pad[10];
361 __u64 first_spare;
362 struct mapped_block {
363 __u64 defective_start;
364 __u32 replacement_start;
365 __u16 remap_count;
366 __u8 pad[2];
367 } entries[0];
368};
369
370/* Struct for internally holding ddf structures */
371/* The DDF structure stored on each device is potentially
372 * quite different, as some data is global and some is local.
373 * The global data is:
374 * - ddf header
375 * - controller_data
376 * - Physical disk records
377 * - Virtual disk records
378 * The local data is:
379 * - Configuration records
380 * - Physical Disk data section
381 * ( and Bad block and vendor which I don't care about yet).
382 *
383 * The local data is parsed into separate lists as it is read
384 * and reconstructed for writing. This means that we only need
385 * to make config changes once and they are automatically
386 * propagated to all devices.
387 * Note that the ddf_super has space of the conf and disk data
388 * for this disk and also for a list of all such data.
389 * The list is only used for the superblock that is being
390 * built in Create or Assemble to describe the whole array.
391 */
392struct ddf_super {
6416d527 393 struct ddf_header anchor, primary, secondary;
a322f70c 394 struct ddf_controller_data controller;
6416d527 395 struct ddf_header *active;
a322f70c
DW
396 struct phys_disk *phys;
397 struct virtual_disk *virt;
398 int pdsize, vdsize;
f21e18ca 399 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 400 int currentdev;
18a2f463 401 int updates_pending;
a322f70c 402 struct vcl {
6416d527
NB
403 union {
404 char space[512];
405 struct {
406 struct vcl *next;
407 __u64 *lba_offset; /* location in 'conf' of
408 * the lba table */
f21e18ca 409 unsigned int vcnum; /* index into ->virt */
8ec5d685 410 struct vd_config **other_bvds;
6416d527
NB
411 __u64 *block_sizes; /* NULL if all the same */
412 };
413 };
a322f70c 414 struct vd_config conf;
d2ca6449 415 } *conflist, *currentconf;
a322f70c 416 struct dl {
6416d527
NB
417 union {
418 char space[512];
419 struct {
420 struct dl *next;
421 int major, minor;
422 char *devname;
423 int fd;
424 unsigned long long size; /* sectors */
097bcf00 425 unsigned long long primary_lba; /* sectors */
426 unsigned long long secondary_lba; /* sectors */
427 unsigned long long workspace_lba; /* sectors */
6416d527
NB
428 int pdnum; /* index in ->phys */
429 struct spare_assign *spare;
8592f29d
N
430 void *mdupdate; /* hold metadata update */
431
432 /* These fields used by auto-layout */
433 int raiddisk; /* slot to fill in autolayout */
434 __u64 esize;
6416d527
NB
435 };
436 };
a322f70c 437 struct disk_data disk;
b2280677 438 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 439 } *dlist, *add_list;
a322f70c
DW
440};
441
442#ifndef offsetof
443#define offsetof(t,f) ((size_t)&(((t*)0)->f))
444#endif
445
f21e18ca 446static unsigned int calc_crc(void *buf, int len)
a322f70c
DW
447{
448 /* crcs are always at the same place as in the ddf_header */
449 struct ddf_header *ddf = buf;
450 __u32 oldcrc = ddf->crc;
451 __u32 newcrc;
452 ddf->crc = 0xffffffff;
453
454 newcrc = crc32(0, buf, len);
455 ddf->crc = oldcrc;
4abe6b70
N
456 /* The crc is store (like everything) bigendian, so convert
457 * here for simplicity
458 */
459 return __cpu_to_be32(newcrc);
a322f70c
DW
460}
461
462static int load_ddf_header(int fd, unsigned long long lba,
463 unsigned long long size,
464 int type,
465 struct ddf_header *hdr, struct ddf_header *anchor)
466{
467 /* read a ddf header (primary or secondary) from fd/lba
468 * and check that it is consistent with anchor
469 * Need to check:
470 * magic, crc, guid, rev, and LBA's header_type, and
471 * everything after header_type must be the same
472 */
473 if (lba >= size-1)
474 return 0;
475
476 if (lseek64(fd, lba<<9, 0) < 0)
477 return 0;
478
479 if (read(fd, hdr, 512) != 512)
480 return 0;
481
482 if (hdr->magic != DDF_HEADER_MAGIC)
483 return 0;
484 if (calc_crc(hdr, 512) != hdr->crc)
485 return 0;
486 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
487 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
488 anchor->primary_lba != hdr->primary_lba ||
489 anchor->secondary_lba != hdr->secondary_lba ||
490 hdr->type != type ||
491 memcmp(anchor->pad2, hdr->pad2, 512 -
492 offsetof(struct ddf_header, pad2)) != 0)
493 return 0;
494
495 /* Looks good enough to me... */
496 return 1;
497}
498
499static void *load_section(int fd, struct ddf_super *super, void *buf,
500 __u32 offset_be, __u32 len_be, int check)
501{
502 unsigned long long offset = __be32_to_cpu(offset_be);
503 unsigned long long len = __be32_to_cpu(len_be);
504 int dofree = (buf == NULL);
505
506 if (check)
507 if (len != 2 && len != 8 && len != 32
508 && len != 128 && len != 512)
509 return NULL;
510
511 if (len > 1024)
512 return NULL;
513 if (buf) {
514 /* All pre-allocated sections are a single block */
515 if (len != 1)
516 return NULL;
3d2c4fc7
DW
517 } else if (posix_memalign(&buf, 512, len<<9) != 0)
518 buf = NULL;
6416d527 519
a322f70c
DW
520 if (!buf)
521 return NULL;
522
523 if (super->active->type == 1)
524 offset += __be64_to_cpu(super->active->primary_lba);
525 else
526 offset += __be64_to_cpu(super->active->secondary_lba);
527
f21e18ca 528 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
529 if (dofree)
530 free(buf);
531 return NULL;
532 }
f21e18ca 533 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
534 if (dofree)
535 free(buf);
536 return NULL;
537 }
538 return buf;
539}
540
541static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
542{
543 unsigned long long dsize;
544
545 get_dev_size(fd, NULL, &dsize);
546
547 if (lseek64(fd, dsize-512, 0) < 0) {
548 if (devname)
e7b84f9d
N
549 pr_err("Cannot seek to anchor block on %s: %s\n",
550 devname, strerror(errno));
a322f70c
DW
551 return 1;
552 }
553 if (read(fd, &super->anchor, 512) != 512) {
554 if (devname)
e7b84f9d
N
555 pr_err("Cannot read anchor block on %s: %s\n",
556 devname, strerror(errno));
a322f70c
DW
557 return 1;
558 }
559 if (super->anchor.magic != DDF_HEADER_MAGIC) {
560 if (devname)
e7b84f9d 561 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
562 devname);
563 return 2;
564 }
565 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
566 if (devname)
e7b84f9d 567 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
568 devname);
569 return 2;
570 }
59e36268
NB
571 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
572 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 573 if (devname)
e7b84f9d 574 pr_err("can only support super revision"
59e36268
NB
575 " %.8s and earlier, not %.8s on %s\n",
576 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
577 return 2;
578 }
579 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
580 dsize >> 9, 1,
581 &super->primary, &super->anchor) == 0) {
582 if (devname)
e7b84f9d
N
583 pr_err("Failed to load primary DDF header "
584 "on %s\n", devname);
a322f70c
DW
585 return 2;
586 }
587 super->active = &super->primary;
588 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
589 dsize >> 9, 2,
590 &super->secondary, &super->anchor)) {
591 if ((__be32_to_cpu(super->primary.seq)
592 < __be32_to_cpu(super->secondary.seq) &&
593 !super->secondary.openflag)
594 || (__be32_to_cpu(super->primary.seq)
595 == __be32_to_cpu(super->secondary.seq) &&
596 super->primary.openflag && !super->secondary.openflag)
597 )
598 super->active = &super->secondary;
599 }
600 return 0;
601}
602
603static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
604{
605 void *ok;
606 ok = load_section(fd, super, &super->controller,
607 super->active->controller_section_offset,
608 super->active->controller_section_length,
609 0);
610 super->phys = load_section(fd, super, NULL,
611 super->active->phys_section_offset,
612 super->active->phys_section_length,
613 1);
614 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
615
616 super->virt = load_section(fd, super, NULL,
617 super->active->virt_section_offset,
618 super->active->virt_section_length,
619 1);
620 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
621 if (!ok ||
622 !super->phys ||
623 !super->virt) {
624 free(super->phys);
625 free(super->virt);
a2349791
NB
626 super->phys = NULL;
627 super->virt = NULL;
a322f70c
DW
628 return 2;
629 }
630 super->conflist = NULL;
631 super->dlist = NULL;
8c3b8c2c
NB
632
633 super->max_part = __be16_to_cpu(super->active->max_partitions);
634 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
635 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
636 return 0;
637}
638
3dc821b0 639static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
640 unsigned int len)
641{
642 int i;
643 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
644 if (vcl->other_bvds[i] != NULL &&
645 vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
646 break;
647
648 if (i < vcl->conf.sec_elmnt_count-1) {
649 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
650 return;
651 } else {
652 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
653 if (vcl->other_bvds[i] == NULL)
654 break;
655 if (i == vcl->conf.sec_elmnt_count-1) {
656 pr_err("no space for sec level config %u, count is %u\n",
657 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
658 return;
659 }
660 if (posix_memalign((void **)&vcl->other_bvds[i], 512, len)
661 != 0) {
662 pr_err("%s could not allocate vd buf\n", __func__);
663 return;
664 }
665 }
666 memcpy(vcl->other_bvds[i], vd, len);
667}
668
a322f70c
DW
669static int load_ddf_local(int fd, struct ddf_super *super,
670 char *devname, int keep)
671{
672 struct dl *dl;
673 struct stat stb;
674 char *conf;
f21e18ca
N
675 unsigned int i;
676 unsigned int confsec;
b2280677 677 int vnum;
f21e18ca 678 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 679 unsigned long long dsize;
a322f70c
DW
680
681 /* First the local disk info */
3d2c4fc7 682 if (posix_memalign((void**)&dl, 512,
6416d527 683 sizeof(*dl) +
3d2c4fc7 684 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 685 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
686 __func__);
687 return 1;
688 }
a322f70c
DW
689
690 load_section(fd, super, &dl->disk,
691 super->active->data_section_offset,
692 super->active->data_section_length,
693 0);
503975b9 694 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 695
a322f70c
DW
696 fstat(fd, &stb);
697 dl->major = major(stb.st_rdev);
698 dl->minor = minor(stb.st_rdev);
699 dl->next = super->dlist;
700 dl->fd = keep ? fd : -1;
d2ca6449
NB
701
702 dl->size = 0;
703 if (get_dev_size(fd, devname, &dsize))
704 dl->size = dsize >> 9;
097bcf00 705 /* If the disks have different sizes, the LBAs will differ
706 * between phys disks.
707 * At this point here, the values in super->active must be valid
708 * for this phys disk. */
709 dl->primary_lba = super->active->primary_lba;
710 dl->secondary_lba = super->active->secondary_lba;
711 dl->workspace_lba = super->active->workspace_lba;
b2280677 712 dl->spare = NULL;
f21e18ca 713 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
714 dl->vlist[i] = NULL;
715 super->dlist = dl;
59e36268 716 dl->pdnum = -1;
f21e18ca 717 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
718 if (memcmp(super->phys->entries[i].guid,
719 dl->disk.guid, DDF_GUID_LEN) == 0)
720 dl->pdnum = i;
721
a322f70c
DW
722 /* Now the config list. */
723 /* 'conf' is an array of config entries, some of which are
724 * probably invalid. Those which are good need to be copied into
725 * the conflist
726 */
a322f70c
DW
727
728 conf = load_section(fd, super, NULL,
729 super->active->config_section_offset,
730 super->active->config_section_length,
731 0);
732
b2280677 733 vnum = 0;
e223334f
N
734 for (confsec = 0;
735 confsec < __be32_to_cpu(super->active->config_section_length);
736 confsec += super->conf_rec_len) {
a322f70c 737 struct vd_config *vd =
e223334f 738 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
739 struct vcl *vcl;
740
b2280677
NB
741 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
742 if (dl->spare)
743 continue;
3d2c4fc7
DW
744 if (posix_memalign((void**)&dl->spare, 512,
745 super->conf_rec_len*512) != 0) {
e7b84f9d
N
746 pr_err("%s could not allocate spare info buf\n",
747 __func__);
3d2c4fc7
DW
748 return 1;
749 }
613b0d17 750
b2280677
NB
751 memcpy(dl->spare, vd, super->conf_rec_len*512);
752 continue;
753 }
a322f70c
DW
754 if (vd->magic != DDF_VD_CONF_MAGIC)
755 continue;
756 for (vcl = super->conflist; vcl; vcl = vcl->next) {
757 if (memcmp(vcl->conf.guid,
758 vd->guid, DDF_GUID_LEN) == 0)
759 break;
760 }
761
762 if (vcl) {
b2280677 763 dl->vlist[vnum++] = vcl;
3dc821b0 764 if (vcl->other_bvds != NULL &&
765 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
766 add_other_bvd(vcl, vd, super->conf_rec_len*512);
767 continue;
768 }
a322f70c
DW
769 if (__be32_to_cpu(vd->seqnum) <=
770 __be32_to_cpu(vcl->conf.seqnum))
771 continue;
59e36268 772 } else {
3d2c4fc7 773 if (posix_memalign((void**)&vcl, 512,
6416d527 774 (super->conf_rec_len*512 +
3d2c4fc7 775 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
776 pr_err("%s could not allocate vcl buf\n",
777 __func__);
3d2c4fc7
DW
778 return 1;
779 }
a322f70c 780 vcl->next = super->conflist;
59e36268 781 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 782 if (vd->sec_elmnt_count > 1)
783 vcl->other_bvds =
784 xcalloc(vd->sec_elmnt_count - 1,
785 sizeof(struct vd_config *));
786 else
787 vcl->other_bvds = NULL;
a322f70c 788 super->conflist = vcl;
b2280677 789 dl->vlist[vnum++] = vcl;
a322f70c 790 }
8c3b8c2c 791 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
a322f70c 792 vcl->lba_offset = (__u64*)
8c3b8c2c 793 &vcl->conf.phys_refnum[super->mppe];
59e36268
NB
794
795 for (i=0; i < max_virt_disks ; i++)
796 if (memcmp(super->virt->entries[i].guid,
797 vcl->conf.guid, DDF_GUID_LEN)==0)
798 break;
799 if (i < max_virt_disks)
800 vcl->vcnum = i;
a322f70c
DW
801 }
802 free(conf);
803
804 return 0;
805}
806
807#ifndef MDASSEMBLE
808static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 809 void **sbp, char *devname);
a322f70c 810#endif
37424f13
DW
811
812static void free_super_ddf(struct supertype *st);
813
a322f70c
DW
814static int load_super_ddf(struct supertype *st, int fd,
815 char *devname)
816{
817 unsigned long long dsize;
818 struct ddf_super *super;
819 int rv;
820
a322f70c
DW
821 if (get_dev_size(fd, devname, &dsize) == 0)
822 return 1;
823
691c6ee1
N
824 if (test_partition(fd))
825 /* DDF is not allowed on partitions */
826 return 1;
827
a322f70c
DW
828 /* 32M is a lower bound */
829 if (dsize <= 32*1024*1024) {
97320d7c 830 if (devname)
e7b84f9d
N
831 pr_err("%s is too small for ddf: "
832 "size is %llu sectors.\n",
833 devname, dsize>>9);
97320d7c 834 return 1;
a322f70c
DW
835 }
836 if (dsize & 511) {
97320d7c 837 if (devname)
e7b84f9d
N
838 pr_err("%s is an odd size for ddf: "
839 "size is %llu bytes.\n",
840 devname, dsize);
97320d7c 841 return 1;
a322f70c
DW
842 }
843
37424f13
DW
844 free_super_ddf(st);
845
6416d527 846 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 847 pr_err("malloc of %zu failed.\n",
a322f70c
DW
848 sizeof(*super));
849 return 1;
850 }
a2349791 851 memset(super, 0, sizeof(*super));
a322f70c
DW
852
853 rv = load_ddf_headers(fd, super, devname);
854 if (rv) {
855 free(super);
856 return rv;
857 }
858
859 /* Have valid headers and have chosen the best. Let's read in the rest*/
860
861 rv = load_ddf_global(fd, super, devname);
862
863 if (rv) {
864 if (devname)
e7b84f9d
N
865 pr_err("Failed to load all information "
866 "sections on %s\n", devname);
a322f70c
DW
867 free(super);
868 return rv;
869 }
870
3d2c4fc7
DW
871 rv = load_ddf_local(fd, super, devname, 0);
872
873 if (rv) {
874 if (devname)
e7b84f9d
N
875 pr_err("Failed to load all information "
876 "sections on %s\n", devname);
3d2c4fc7
DW
877 free(super);
878 return rv;
879 }
a322f70c
DW
880
881 /* Should possibly check the sections .... */
882
883 st->sb = super;
884 if (st->ss == NULL) {
885 st->ss = &super_ddf;
886 st->minor_version = 0;
887 st->max_devs = 512;
888 }
889 return 0;
890
891}
892
893static void free_super_ddf(struct supertype *st)
894{
895 struct ddf_super *ddf = st->sb;
896 if (ddf == NULL)
897 return;
898 free(ddf->phys);
899 free(ddf->virt);
900 while (ddf->conflist) {
901 struct vcl *v = ddf->conflist;
902 ddf->conflist = v->next;
59e36268
NB
903 if (v->block_sizes)
904 free(v->block_sizes);
3dc821b0 905 if (v->other_bvds) {
906 int i;
907 for (i = 0; i < v->conf.sec_elmnt_count-1; i++)
908 if (v->other_bvds[i] != NULL)
909 free(v->other_bvds[i]);
8ec5d685 910 free(v->other_bvds);
3dc821b0 911 }
a322f70c
DW
912 free(v);
913 }
914 while (ddf->dlist) {
915 struct dl *d = ddf->dlist;
916 ddf->dlist = d->next;
917 if (d->fd >= 0)
918 close(d->fd);
b2280677
NB
919 if (d->spare)
920 free(d->spare);
a322f70c
DW
921 free(d);
922 }
8a38cb04
N
923 while (ddf->add_list) {
924 struct dl *d = ddf->add_list;
925 ddf->add_list = d->next;
926 if (d->fd >= 0)
927 close(d->fd);
928 if (d->spare)
929 free(d->spare);
930 free(d);
931 }
a322f70c
DW
932 free(ddf);
933 st->sb = NULL;
934}
935
936static struct supertype *match_metadata_desc_ddf(char *arg)
937{
938 /* 'ddf' only support containers */
939 struct supertype *st;
940 if (strcmp(arg, "ddf") != 0 &&
941 strcmp(arg, "default") != 0
942 )
943 return NULL;
944
503975b9 945 st = xcalloc(1, sizeof(*st));
a322f70c
DW
946 st->ss = &super_ddf;
947 st->max_devs = 512;
948 st->minor_version = 0;
949 st->sb = NULL;
950 return st;
951}
952
a322f70c
DW
953#ifndef MDASSEMBLE
954
955static mapping_t ddf_state[] = {
956 { "Optimal", 0},
957 { "Degraded", 1},
958 { "Deleted", 2},
959 { "Missing", 3},
960 { "Failed", 4},
961 { "Partially Optimal", 5},
962 { "-reserved-", 6},
963 { "-reserved-", 7},
964 { NULL, 0}
965};
966
967static mapping_t ddf_init_state[] = {
968 { "Not Initialised", 0},
969 { "QuickInit in Progress", 1},
970 { "Fully Initialised", 2},
971 { "*UNKNOWN*", 3},
972 { NULL, 0}
973};
974static mapping_t ddf_access[] = {
975 { "Read/Write", 0},
976 { "Reserved", 1},
977 { "Read Only", 2},
978 { "Blocked (no access)", 3},
979 { NULL ,0}
980};
981
982static mapping_t ddf_level[] = {
983 { "RAID0", DDF_RAID0},
984 { "RAID1", DDF_RAID1},
985 { "RAID3", DDF_RAID3},
986 { "RAID4", DDF_RAID4},
987 { "RAID5", DDF_RAID5},
988 { "RAID1E",DDF_RAID1E},
989 { "JBOD", DDF_JBOD},
990 { "CONCAT",DDF_CONCAT},
991 { "RAID5E",DDF_RAID5E},
992 { "RAID5EE",DDF_RAID5EE},
993 { "RAID6", DDF_RAID6},
994 { NULL, 0}
995};
996static mapping_t ddf_sec_level[] = {
997 { "Striped", DDF_2STRIPED},
998 { "Mirrored", DDF_2MIRRORED},
999 { "Concat", DDF_2CONCAT},
1000 { "Spanned", DDF_2SPANNED},
1001 { NULL, 0}
1002};
1003#endif
1004
1005struct num_mapping {
1006 int num1, num2;
1007};
1008static struct num_mapping ddf_level_num[] = {
1009 { DDF_RAID0, 0 },
1010 { DDF_RAID1, 1 },
1011 { DDF_RAID3, LEVEL_UNSUPPORTED },
60f18132
NB
1012 { DDF_RAID4, 4 },
1013 { DDF_RAID5, 5 },
a322f70c
DW
1014 { DDF_RAID1E, LEVEL_UNSUPPORTED },
1015 { DDF_JBOD, LEVEL_UNSUPPORTED },
1016 { DDF_CONCAT, LEVEL_LINEAR },
1017 { DDF_RAID5E, LEVEL_UNSUPPORTED },
1018 { DDF_RAID5EE, LEVEL_UNSUPPORTED },
1019 { DDF_RAID6, 6},
1020 { MAXINT, MAXINT }
1021};
1022
1023static int map_num1(struct num_mapping *map, int num)
1024{
1025 int i;
1026 for (i=0 ; map[i].num1 != MAXINT; i++)
1027 if (map[i].num1 == num)
1028 break;
1029 return map[i].num2;
1030}
1031
42dc2744
N
1032static int all_ff(char *guid)
1033{
1034 int i;
1035 for (i = 0; i < DDF_GUID_LEN; i++)
1036 if (guid[i] != (char)0xff)
1037 return 0;
1038 return 1;
1039}
1040
a322f70c
DW
1041#ifndef MDASSEMBLE
1042static void print_guid(char *guid, int tstamp)
1043{
1044 /* A GUIDs are part (or all) ASCII and part binary.
1045 * They tend to be space padded.
59e36268
NB
1046 * We print the GUID in HEX, then in parentheses add
1047 * any initial ASCII sequence, and a possible
1048 * time stamp from bytes 16-19
a322f70c
DW
1049 */
1050 int l = DDF_GUID_LEN;
1051 int i;
59e36268
NB
1052
1053 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1054 if ((i&3)==0 && i != 0) printf(":");
1055 printf("%02X", guid[i]&255);
1056 }
1057
cfccea8c 1058 printf("\n (");
a322f70c
DW
1059 while (l && guid[l-1] == ' ')
1060 l--;
1061 for (i=0 ; i<l ; i++) {
1062 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1063 fputc(guid[i], stdout);
1064 else
59e36268 1065 break;
a322f70c
DW
1066 }
1067 if (tstamp) {
1068 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1069 char tbuf[100];
1070 struct tm *tm;
1071 tm = localtime(&then);
59e36268 1072 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1073 fputs(tbuf, stdout);
1074 }
59e36268 1075 printf(")");
a322f70c
DW
1076}
1077
1078static void examine_vd(int n, struct ddf_super *sb, char *guid)
1079{
8c3b8c2c 1080 int crl = sb->conf_rec_len;
a322f70c
DW
1081 struct vcl *vcl;
1082
1083 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1084 unsigned int i;
a322f70c
DW
1085 struct vd_config *vc = &vcl->conf;
1086
1087 if (calc_crc(vc, crl*512) != vc->crc)
1088 continue;
1089 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1090 continue;
1091
1092 /* Ok, we know about this VD, let's give more details */
b06e3095 1093 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1094 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1095 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1096 int j;
1097 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1098 for (j=0; j<cnt; j++)
1099 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1100 break;
1101 if (i) printf(" ");
1102 if (j < cnt)
1103 printf("%d", j);
1104 else
1105 printf("--");
1106 }
1107 printf(")\n");
1108 if (vc->chunk_shift != 255)
613b0d17
N
1109 printf(" Chunk Size[%d] : %d sectors\n", n,
1110 1 << vc->chunk_shift);
a322f70c
DW
1111 printf(" Raid Level[%d] : %s\n", n,
1112 map_num(ddf_level, vc->prl)?:"-unknown-");
1113 if (vc->sec_elmnt_count != 1) {
1114 printf(" Secondary Position[%d] : %d of %d\n", n,
1115 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1116 printf(" Secondary Level[%d] : %s\n", n,
1117 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1118 }
1119 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1120 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1121 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1122 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1123 }
1124}
1125
1126static void examine_vds(struct ddf_super *sb)
1127{
1128 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
1129 int i;
1130 printf(" Virtual Disks : %d\n", cnt);
1131
1132 for (i=0; i<cnt; i++) {
1133 struct virtual_entry *ve = &sb->virt->entries[i];
b06e3095 1134 printf("\n");
a322f70c
DW
1135 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1136 printf("\n");
1137 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1138 printf(" state[%d] : %s, %s%s\n", i,
1139 map_num(ddf_state, ve->state & 7),
1140 (ve->state & 8) ? "Morphing, ": "",
1141 (ve->state & 16)? "Not Consistent" : "Consistent");
1142 printf(" init state[%d] : %s\n", i,
1143 map_num(ddf_init_state, ve->init_state&3));
1144 printf(" access[%d] : %s\n", i,
1145 map_num(ddf_access, (ve->init_state>>6) & 3));
1146 printf(" Name[%d] : %.16s\n", i, ve->name);
1147 examine_vd(i, sb, ve->guid);
1148 }
1149 if (cnt) printf("\n");
1150}
1151
1152static void examine_pds(struct ddf_super *sb)
1153{
1154 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1155 int i;
1156 struct dl *dl;
1157 printf(" Physical Disks : %d\n", cnt);
962371a5 1158 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1159
1160 for (i=0 ; i<cnt ; i++) {
1161 struct phys_disk_entry *pd = &sb->phys->entries[i];
1162 int type = __be16_to_cpu(pd->type);
1163 int state = __be16_to_cpu(pd->state);
1164
b06e3095
N
1165 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1166 //printf("\n");
1167 printf(" %3d %08x ", i,
a322f70c 1168 __be32_to_cpu(pd->refnum));
613b0d17 1169 printf("%8lluK ",
c9b6907b 1170 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095
N
1171 for (dl = sb->dlist; dl ; dl = dl->next) {
1172 if (dl->disk.refnum == pd->refnum) {
1173 char *dv = map_dev(dl->major, dl->minor, 0);
1174 if (dv) {
962371a5 1175 printf("%-15s", dv);
b06e3095
N
1176 break;
1177 }
1178 }
1179 }
1180 if (!dl)
962371a5 1181 printf("%15s","");
b06e3095 1182 printf(" %s%s%s%s%s",
a322f70c 1183 (type&2) ? "active":"",
b06e3095 1184 (type&4) ? "Global-Spare":"",
a322f70c
DW
1185 (type&8) ? "spare" : "",
1186 (type&16)? ", foreign" : "",
1187 (type&32)? "pass-through" : "");
18cb4496
N
1188 if (state & DDF_Failed)
1189 /* This over-rides these three */
1190 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1191 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1192 (state&1)? "Online": "Offline",
1193 (state&2)? ", Failed": "",
1194 (state&4)? ", Rebuilding": "",
1195 (state&8)? ", in-transition": "",
b06e3095
N
1196 (state&16)? ", SMART-errors": "",
1197 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1198 (state&64)? ", Missing" : "");
a322f70c
DW
1199 printf("\n");
1200 }
1201}
1202
1203static void examine_super_ddf(struct supertype *st, char *homehost)
1204{
1205 struct ddf_super *sb = st->sb;
1206
1207 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1208 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1209 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1210 printf("\n");
1211 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c
DW
1212 printf("\n");
1213 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1214 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1215 ?"yes" : "no");
1216 examine_vds(sb);
1217 examine_pds(sb);
1218}
1219
a5d85af7 1220static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1221
42dc2744 1222static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1223
061f2c6a 1224static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1225{
1226 /* We just write a generic DDF ARRAY entry
1227 */
1228 struct mdinfo info;
1229 char nbuf[64];
a5d85af7 1230 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1231 fname_from_uuid(st, &info, nbuf, ':');
1232
1233 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1234}
1235
1236static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1237{
1238 /* We just write a generic DDF ARRAY entry
a322f70c 1239 */
42dc2744 1240 struct ddf_super *ddf = st->sb;
ff54de6e 1241 struct mdinfo info;
f21e18ca 1242 unsigned int i;
ff54de6e 1243 char nbuf[64];
a5d85af7 1244 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1245 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1246
f21e18ca 1247 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1248 struct virtual_entry *ve = &ddf->virt->entries[i];
1249 struct vcl vcl;
1250 char nbuf1[64];
1251 if (all_ff(ve->guid))
1252 continue;
1253 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1254 ddf->currentconf =&vcl;
1255 uuid_from_super_ddf(st, info.uuid);
1256 fname_from_uuid(st, &info, nbuf1, ':');
1257 printf("ARRAY container=%s member=%d UUID=%s\n",
1258 nbuf+5, i, nbuf1+5);
1259 }
a322f70c
DW
1260}
1261
bceedeec
N
1262static void export_examine_super_ddf(struct supertype *st)
1263{
1264 struct mdinfo info;
1265 char nbuf[64];
a5d85af7 1266 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1267 fname_from_uuid(st, &info, nbuf, ':');
1268 printf("MD_METADATA=ddf\n");
1269 printf("MD_LEVEL=container\n");
1270 printf("MD_UUID=%s\n", nbuf+5);
1271}
bceedeec 1272
a322f70c
DW
1273static void detail_super_ddf(struct supertype *st, char *homehost)
1274{
1275 /* FIXME later
1276 * Could print DDF GUID
1277 * Need to find which array
1278 * If whole, briefly list all arrays
1279 * If one, give name
1280 */
1281}
1282
1283static void brief_detail_super_ddf(struct supertype *st)
1284{
1285 /* FIXME I really need to know which array we are detailing.
1286 * Can that be stored in ddf_super??
1287 */
1288// struct ddf_super *ddf = st->sb;
ff54de6e
N
1289 struct mdinfo info;
1290 char nbuf[64];
a5d85af7 1291 getinfo_super_ddf(st, &info, NULL);
ff54de6e
N
1292 fname_from_uuid(st, &info, nbuf,':');
1293 printf(" UUID=%s", nbuf + 5);
a322f70c 1294}
a322f70c
DW
1295#endif
1296
1297static int match_home_ddf(struct supertype *st, char *homehost)
1298{
1299 /* It matches 'this' host if the controller is a
1300 * Linux-MD controller with vendor_data matching
1301 * the hostname
1302 */
1303 struct ddf_super *ddf = st->sb;
f21e18ca 1304 unsigned int len;
d1d3482b
N
1305
1306 if (!homehost)
1307 return 0;
1308 len = strlen(homehost);
a322f70c
DW
1309
1310 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1311 len < sizeof(ddf->controller.vendor_data) &&
1312 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1313 ddf->controller.vendor_data[len] == 0);
1314}
1315
0e600426 1316#ifndef MDASSEMBLE
f21e18ca 1317static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst)
a322f70c 1318{
7a7cc504 1319 struct vcl *v;
59e36268 1320
7a7cc504 1321 for (v = ddf->conflist; v; v = v->next)
59e36268 1322 if (inst == v->vcnum)
7a7cc504
NB
1323 return &v->conf;
1324 return NULL;
1325}
0e600426 1326#endif
7a7cc504
NB
1327
1328static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
1329{
1330 /* Find the entry in phys_disk which has the given refnum
1331 * and return it's index
1332 */
f21e18ca
N
1333 unsigned int i;
1334 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
7a7cc504
NB
1335 if (ddf->phys->entries[i].refnum == phys_refnum)
1336 return i;
1337 return -1;
a322f70c
DW
1338}
1339
1340static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1341{
1342 /* The uuid returned here is used for:
1343 * uuid to put into bitmap file (Create, Grow)
1344 * uuid for backup header when saving critical section (Grow)
1345 * comparing uuids when re-adding a device into an array
51006d85
N
1346 * In these cases the uuid required is that of the data-array,
1347 * not the device-set.
1348 * uuid to recognise same set when adding a missing device back
1349 * to an array. This is a uuid for the device-set.
613b0d17 1350 *
a322f70c
DW
1351 * For each of these we can make do with a truncated
1352 * or hashed uuid rather than the original, as long as
1353 * everyone agrees.
a322f70c
DW
1354 * In the case of SVD we assume the BVD is of interest,
1355 * though that might be the case if a bitmap were made for
1356 * a mirrored SVD - worry about that later.
1357 * So we need to find the VD configuration record for the
1358 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1359 * The first 16 bytes of the sha1 of these is used.
1360 */
1361 struct ddf_super *ddf = st->sb;
d2ca6449 1362 struct vcl *vcl = ddf->currentconf;
c5afc314
N
1363 char *guid;
1364 char buf[20];
1365 struct sha1_ctx ctx;
a322f70c 1366
c5afc314
N
1367 if (vcl)
1368 guid = vcl->conf.guid;
1369 else
1370 guid = ddf->anchor.guid;
1371
1372 sha1_init_ctx(&ctx);
1373 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
c5afc314
N
1374 sha1_finish_ctx(&ctx, buf);
1375 memcpy(uuid, buf, 4*4);
a322f70c
DW
1376}
1377
a5d85af7 1378static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1379
a5d85af7 1380static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1381{
1382 struct ddf_super *ddf = st->sb;
a5d85af7 1383 int map_disks = info->array.raid_disks;
90fa1a29 1384 __u32 *cptr;
a322f70c 1385
78e44928 1386 if (ddf->currentconf) {
a5d85af7 1387 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1388 return;
1389 }
95eeceeb 1390 memset(info, 0, sizeof(*info));
78e44928 1391
a322f70c
DW
1392 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1393 info->array.level = LEVEL_CONTAINER;
1394 info->array.layout = 0;
1395 info->array.md_minor = -1;
90fa1a29
JS
1396 cptr = (__u32 *)(ddf->anchor.guid + 16);
1397 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1398
a322f70c
DW
1399 info->array.utime = 0;
1400 info->array.chunk_size = 0;
510242aa 1401 info->container_enough = 1;
a322f70c 1402
a322f70c
DW
1403 info->disk.major = 0;
1404 info->disk.minor = 0;
cba0191b
NB
1405 if (ddf->dlist) {
1406 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1407 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1408
1409 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1410 entries[info->disk.raid_disk].
1411 config_size);
d2ca6449 1412 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1413 } else {
1414 info->disk.number = -1;
661dce36 1415 info->disk.raid_disk = -1;
cba0191b
NB
1416// info->disk.raid_disk = find refnum in the table and use index;
1417 }
f22385f9 1418 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1419
921d9e16 1420 info->recovery_start = MaxSector;
a19c88b8 1421 info->reshape_active = 0;
6e75048b 1422 info->recovery_blocked = 0;
c5afc314 1423 info->name[0] = 0;
a322f70c 1424
f35f2525
N
1425 info->array.major_version = -1;
1426 info->array.minor_version = -2;
159c3a1a 1427 strcpy(info->text_version, "ddf");
a67dd8cc 1428 info->safe_mode_delay = 0;
159c3a1a 1429
c5afc314 1430 uuid_from_super_ddf(st, info->uuid);
a322f70c 1431
a5d85af7
N
1432 if (map) {
1433 int i;
1434 for (i = 0 ; i < map_disks; i++) {
1435 if (i < info->array.raid_disks &&
1436 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1437 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1438 map[i] = 1;
1439 else
1440 map[i] = 0;
1441 }
1442 }
a322f70c
DW
1443}
1444
598f0d58
NB
1445static int rlq_to_layout(int rlq, int prl, int raiddisks);
1446
a5d85af7 1447static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1448{
1449 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1450 struct vcl *vc = ddf->currentconf;
1451 int cd = ddf->currentdev;
db42fa9b 1452 int j;
8592f29d 1453 struct dl *dl;
a5d85af7 1454 int map_disks = info->array.raid_disks;
90fa1a29 1455 __u32 *cptr;
a322f70c 1456
95eeceeb 1457 memset(info, 0, sizeof(*info));
a322f70c
DW
1458 /* FIXME this returns BVD info - what if we want SVD ?? */
1459
d2ca6449
NB
1460 info->array.raid_disks = __be16_to_cpu(vc->conf.prim_elmnt_count);
1461 info->array.level = map_num1(ddf_level_num, vc->conf.prl);
1462 info->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
598f0d58 1463 info->array.raid_disks);
a322f70c 1464 info->array.md_minor = -1;
90fa1a29
JS
1465 cptr = (__u32 *)(vc->conf.guid + 16);
1466 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
d2ca6449
NB
1467 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1468 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1469 info->custom_array_size = 0;
d2ca6449 1470
f21e18ca 1471 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
d2ca6449
NB
1472 info->data_offset = __be64_to_cpu(vc->lba_offset[cd]);
1473 if (vc->block_sizes)
1474 info->component_size = vc->block_sizes[cd];
1475 else
1476 info->component_size = __be64_to_cpu(vc->conf.blocks);
1477 }
a322f70c 1478
fb204fb2
N
1479 for (dl = ddf->dlist; dl ; dl = dl->next)
1480 if (dl->raiddisk == ddf->currentdev)
1481 break;
1482
a322f70c
DW
1483 info->disk.major = 0;
1484 info->disk.minor = 0;
fb204fb2 1485 info->disk.state = 0;
8592f29d
N
1486 if (dl) {
1487 info->disk.major = dl->major;
1488 info->disk.minor = dl->minor;
fb204fb2
N
1489 info->disk.raid_disk = dl->raiddisk;
1490 info->disk.number = dl->pdnum;
1491 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1492 }
a322f70c 1493
103f2410
NB
1494 info->container_member = ddf->currentconf->vcnum;
1495
921d9e16 1496 info->recovery_start = MaxSector;
80d26cb2 1497 info->resync_start = 0;
624c5ad4 1498 info->reshape_active = 0;
6e75048b 1499 info->recovery_blocked = 0;
80d26cb2
NB
1500 if (!(ddf->virt->entries[info->container_member].state
1501 & DDF_state_inconsistent) &&
1502 (ddf->virt->entries[info->container_member].init_state
1503 & DDF_initstate_mask)
1504 == DDF_init_full)
b7528a20 1505 info->resync_start = MaxSector;
80d26cb2 1506
a322f70c
DW
1507 uuid_from_super_ddf(st, info->uuid);
1508
f35f2525
N
1509 info->array.major_version = -1;
1510 info->array.minor_version = -2;
9b63e648 1511 sprintf(info->text_version, "/%s/%d",
4dd2df09 1512 st->container_devnm,
9b63e648 1513 info->container_member);
a67dd8cc 1514 info->safe_mode_delay = 200;
159c3a1a 1515
db42fa9b
N
1516 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1517 info->name[16]=0;
1518 for(j=0; j<16; j++)
1519 if (info->name[j] == ' ')
1520 info->name[j] = 0;
a5d85af7
N
1521
1522 if (map)
1523 for (j = 0; j < map_disks; j++) {
1524 map[j] = 0;
1525 if (j < info->array.raid_disks) {
1526 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1527 if (i >= 0 &&
a5d85af7
N
1528 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1529 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1530 map[i] = 1;
1531 }
1532 }
a322f70c
DW
1533}
1534
1535static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1536 char *update,
1537 char *devname, int verbose,
1538 int uuid_set, char *homehost)
1539{
1540 /* For 'assemble' and 'force' we need to return non-zero if any
1541 * change was made. For others, the return value is ignored.
1542 * Update options are:
1543 * force-one : This device looks a bit old but needs to be included,
1544 * update age info appropriately.
1545 * assemble: clear any 'faulty' flag to allow this device to
1546 * be assembled.
1547 * force-array: Array is degraded but being forced, mark it clean
1548 * if that will be needed to assemble it.
1549 *
1550 * newdev: not used ????
1551 * grow: Array has gained a new device - this is currently for
1552 * linear only
1553 * resync: mark as dirty so a resync will happen.
59e36268 1554 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
1555 * homehost: update the recorded homehost
1556 * name: update the name - preserving the homehost
1557 * _reshape_progress: record new reshape_progress position.
1558 *
1559 * Following are not relevant for this version:
1560 * sparc2.2 : update from old dodgey metadata
1561 * super-minor: change the preferred_minor number
1562 * summaries: update redundant counters.
1563 */
1564 int rv = 0;
1565// struct ddf_super *ddf = st->sb;
7a7cc504 1566// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
1567// struct virtual_entry *ve = find_ve(ddf);
1568
a322f70c
DW
1569 /* we don't need to handle "force-*" or "assemble" as
1570 * there is no need to 'trick' the kernel. We the metadata is
1571 * first updated to activate the array, all the implied modifications
1572 * will just happen.
1573 */
1574
1575 if (strcmp(update, "grow") == 0) {
1576 /* FIXME */
1e2b2765 1577 } else if (strcmp(update, "resync") == 0) {
a322f70c 1578// info->resync_checkpoint = 0;
1e2b2765 1579 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
1580 /* homehost is stored in controller->vendor_data,
1581 * or it is when we are the vendor
1582 */
1583// if (info->vendor_is_local)
1584// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 1585 rv = -1;
f49208ec 1586 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
1587 /* name is stored in virtual_entry->name */
1588// memset(ve->name, ' ', 16);
1589// strncpy(ve->name, info->name, 16);
1e2b2765 1590 rv = -1;
f49208ec 1591 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 1592 /* We don't support reshape yet */
f49208ec
N
1593 } else if (strcmp(update, "assemble") == 0 ) {
1594 /* Do nothing, just succeed */
1595 rv = 0;
1e2b2765
N
1596 } else
1597 rv = -1;
a322f70c
DW
1598
1599// update_all_csum(ddf);
1600
1601 return rv;
1602}
1603
5f8097be
NB
1604static void make_header_guid(char *guid)
1605{
1606 __u32 stamp;
5f8097be
NB
1607 /* Create a DDF Header of Virtual Disk GUID */
1608
1609 /* 24 bytes of fiction required.
1610 * first 8 are a 'vendor-id' - "Linux-MD"
1611 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
1612 * Remaining 8 random number plus timestamp
1613 */
1614 memcpy(guid, T10, sizeof(T10));
1615 stamp = __cpu_to_be32(0xdeadbeef);
1616 memcpy(guid+8, &stamp, 4);
1617 stamp = __cpu_to_be32(0);
1618 memcpy(guid+12, &stamp, 4);
1619 stamp = __cpu_to_be32(time(0) - DECADE);
1620 memcpy(guid+16, &stamp, 4);
bfb7ea78 1621 stamp = random32();
5f8097be 1622 memcpy(guid+20, &stamp, 4);
5f8097be 1623}
59e36268 1624
78e44928
NB
1625static int init_super_ddf_bvd(struct supertype *st,
1626 mdu_array_info_t *info,
1627 unsigned long long size,
1628 char *name, char *homehost,
83cd1e97 1629 int *uuid, unsigned long long data_offset);
78e44928 1630
a322f70c
DW
1631static int init_super_ddf(struct supertype *st,
1632 mdu_array_info_t *info,
1633 unsigned long long size, char *name, char *homehost,
83cd1e97 1634 int *uuid, unsigned long long data_offset)
a322f70c
DW
1635{
1636 /* This is primarily called by Create when creating a new array.
1637 * We will then get add_to_super called for each component, and then
1638 * write_init_super called to write it out to each device.
1639 * For DDF, Create can create on fresh devices or on a pre-existing
1640 * array.
1641 * To create on a pre-existing array a different method will be called.
1642 * This one is just for fresh drives.
1643 *
1644 * We need to create the entire 'ddf' structure which includes:
1645 * DDF headers - these are easy.
1646 * Controller data - a Sector describing this controller .. not that
1647 * this is a controller exactly.
1648 * Physical Disk Record - one entry per device, so
1649 * leave plenty of space.
1650 * Virtual Disk Records - again, just leave plenty of space.
1651 * This just lists VDs, doesn't give details
1652 * Config records - describes the VDs that use this disk
1653 * DiskData - describes 'this' device.
1654 * BadBlockManagement - empty
1655 * Diag Space - empty
1656 * Vendor Logs - Could we put bitmaps here?
1657 *
1658 */
1659 struct ddf_super *ddf;
1660 char hostname[17];
1661 int hostlen;
a322f70c
DW
1662 int max_phys_disks, max_virt_disks;
1663 unsigned long long sector;
1664 int clen;
1665 int i;
1666 int pdsize, vdsize;
1667 struct phys_disk *pd;
1668 struct virtual_disk *vd;
1669
83cd1e97
N
1670 if (data_offset != INVALID_SECTORS) {
1671 fprintf(stderr, Name ": data-offset not supported by DDF\n");
1672 return 0;
1673 }
1674
78e44928 1675 if (st->sb)
83cd1e97
N
1676 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
1677 data_offset);
ba7eb04f 1678
3d2c4fc7 1679 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 1680 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
1681 return 0;
1682 }
6264b437 1683 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
1684 ddf->dlist = NULL; /* no physical disks yet */
1685 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
1686 st->sb = ddf;
1687
1688 if (info == NULL) {
1689 /* zeroing superblock */
1690 return 0;
1691 }
a322f70c
DW
1692
1693 /* At least 32MB *must* be reserved for the ddf. So let's just
1694 * start 32MB from the end, and put the primary header there.
1695 * Don't do secondary for now.
1696 * We don't know exactly where that will be yet as it could be
1697 * different on each device. To just set up the lengths.
1698 *
1699 */
1700
1701 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 1702 make_header_guid(ddf->anchor.guid);
a322f70c 1703
59e36268 1704 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
a322f70c
DW
1705 ddf->anchor.seq = __cpu_to_be32(1);
1706 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
1707 ddf->anchor.openflag = 0xFF;
1708 ddf->anchor.foreignflag = 0;
1709 ddf->anchor.enforcegroups = 0; /* Is this best?? */
1710 ddf->anchor.pad0 = 0xff;
1711 memset(ddf->anchor.pad1, 0xff, 12);
1712 memset(ddf->anchor.header_ext, 0xff, 32);
1713 ddf->anchor.primary_lba = ~(__u64)0;
1714 ddf->anchor.secondary_lba = ~(__u64)0;
1715 ddf->anchor.type = DDF_HEADER_ANCHOR;
1716 memset(ddf->anchor.pad2, 0xff, 3);
1717 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
1718 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
1719 of 32M reserved.. */
1720 max_phys_disks = 1023; /* Should be enough */
1721 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
1722 max_virt_disks = 255;
1723 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
1724 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
1725 ddf->max_part = 64;
8c3b8c2c 1726 ddf->mppe = 256;
59e36268
NB
1727 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
1728 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
1729 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 1730 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
1731 /* controller sections is one sector long immediately
1732 * after the ddf header */
1733 sector = 1;
1734 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
1735 ddf->anchor.controller_section_length = __cpu_to_be32(1);
1736 sector += 1;
1737
1738 /* phys is 8 sectors after that */
1739 pdsize = ROUND_UP(sizeof(struct phys_disk) +
1740 sizeof(struct phys_disk_entry)*max_phys_disks,
1741 512);
1742 switch(pdsize/512) {
1743 case 2: case 8: case 32: case 128: case 512: break;
1744 default: abort();
1745 }
1746 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
1747 ddf->anchor.phys_section_length =
1748 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
1749 sector += pdsize/512;
1750
1751 /* virt is another 32 sectors */
1752 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
1753 sizeof(struct virtual_entry) * max_virt_disks,
1754 512);
1755 switch(vdsize/512) {
1756 case 2: case 8: case 32: case 128: case 512: break;
1757 default: abort();
1758 }
1759 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
1760 ddf->anchor.virt_section_length =
1761 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
1762 sector += vdsize/512;
1763
59e36268 1764 clen = ddf->conf_rec_len * (ddf->max_part+1);
a322f70c
DW
1765 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
1766 ddf->anchor.config_section_length = __cpu_to_be32(clen);
1767 sector += clen;
1768
1769 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
1770 ddf->anchor.data_section_length = __cpu_to_be32(1);
1771 sector += 1;
1772
1773 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
1774 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
1775 ddf->anchor.diag_space_length = __cpu_to_be32(0);
1776 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
1777 ddf->anchor.vendor_length = __cpu_to_be32(0);
1778 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
1779
1780 memset(ddf->anchor.pad4, 0xff, 256);
1781
1782 memcpy(&ddf->primary, &ddf->anchor, 512);
1783 memcpy(&ddf->secondary, &ddf->anchor, 512);
1784
1785 ddf->primary.openflag = 1; /* I guess.. */
1786 ddf->primary.type = DDF_HEADER_PRIMARY;
1787
1788 ddf->secondary.openflag = 1; /* I guess.. */
1789 ddf->secondary.type = DDF_HEADER_SECONDARY;
1790
1791 ddf->active = &ddf->primary;
1792
1793 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
1794
1795 /* 24 more bytes of fiction required.
1796 * first 8 are a 'vendor-id' - "Linux-MD"
1797 * Remaining 16 are serial number.... maybe a hostname would do?
1798 */
1799 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
1800 gethostname(hostname, sizeof(hostname));
1801 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
1802 hostlen = strlen(hostname);
1803 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
1804 for (i = strlen(T10) ; i+hostlen < 24; i++)
1805 ddf->controller.guid[i] = ' ';
1806
1807 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
1808 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
1809 ddf->controller.type.sub_vendor_id = 0;
1810 ddf->controller.type.sub_device_id = 0;
1811 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
1812 memset(ddf->controller.pad, 0xff, 8);
1813 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
1814 if (homehost && strlen(homehost) < 440)
1815 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 1816
3d2c4fc7 1817 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 1818 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
1819 return 0;
1820 }
6416d527 1821 ddf->phys = pd;
a322f70c
DW
1822 ddf->pdsize = pdsize;
1823
1824 memset(pd, 0xff, pdsize);
1825 memset(pd, 0, sizeof(*pd));
076515ba 1826 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
1827 pd->used_pdes = __cpu_to_be16(0);
1828 pd->max_pdes = __cpu_to_be16(max_phys_disks);
1829 memset(pd->pad, 0xff, 52);
1830
3d2c4fc7 1831 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 1832 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
1833 return 0;
1834 }
6416d527 1835 ddf->virt = vd;
a322f70c
DW
1836 ddf->vdsize = vdsize;
1837 memset(vd, 0, vdsize);
1838 vd->magic = DDF_VIRT_RECORDS_MAGIC;
1839 vd->populated_vdes = __cpu_to_be16(0);
1840 vd->max_vdes = __cpu_to_be16(max_virt_disks);
1841 memset(vd->pad, 0xff, 52);
1842
5f8097be
NB
1843 for (i=0; i<max_virt_disks; i++)
1844 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
1845
a322f70c 1846 st->sb = ddf;
18a2f463 1847 ddf->updates_pending = 1;
a322f70c
DW
1848 return 1;
1849}
1850
5f8097be
NB
1851static int chunk_to_shift(int chunksize)
1852{
1853 return ffs(chunksize/512)-1;
1854}
1855
1856static int level_to_prl(int level)
1857{
1858 switch (level) {
1859 case LEVEL_LINEAR: return DDF_CONCAT;
1860 case 0: return DDF_RAID0;
1861 case 1: return DDF_RAID1;
1862 case 4: return DDF_RAID4;
1863 case 5: return DDF_RAID5;
1864 case 6: return DDF_RAID6;
1865 default: return -1;
1866 }
1867}
613b0d17 1868
5f8097be
NB
1869static int layout_to_rlq(int level, int layout, int raiddisks)
1870{
1871 switch(level) {
1872 case 0:
1873 return DDF_RAID0_SIMPLE;
1874 case 1:
1875 switch(raiddisks) {
1876 case 2: return DDF_RAID1_SIMPLE;
1877 case 3: return DDF_RAID1_MULTI;
1878 default: return -1;
1879 }
1880 case 4:
1881 switch(layout) {
1882 case 0: return DDF_RAID4_N;
1883 }
1884 break;
1885 case 5:
5f8097be
NB
1886 switch(layout) {
1887 case ALGORITHM_LEFT_ASYMMETRIC:
1888 return DDF_RAID5_N_RESTART;
1889 case ALGORITHM_RIGHT_ASYMMETRIC:
b640a252 1890 return DDF_RAID5_0_RESTART;
5f8097be
NB
1891 case ALGORITHM_LEFT_SYMMETRIC:
1892 return DDF_RAID5_N_CONTINUE;
1893 case ALGORITHM_RIGHT_SYMMETRIC:
1894 return -1; /* not mentioned in standard */
1895 }
b640a252
N
1896 case 6:
1897 switch(layout) {
1898 case ALGORITHM_ROTATING_N_RESTART:
1899 return DDF_RAID5_N_RESTART;
1900 case ALGORITHM_ROTATING_ZERO_RESTART:
1901 return DDF_RAID6_0_RESTART;
1902 case ALGORITHM_ROTATING_N_CONTINUE:
1903 return DDF_RAID5_N_CONTINUE;
1904 }
5f8097be
NB
1905 }
1906 return -1;
1907}
1908
598f0d58
NB
1909static int rlq_to_layout(int rlq, int prl, int raiddisks)
1910{
1911 switch(prl) {
1912 case DDF_RAID0:
1913 return 0; /* hopefully rlq == DDF_RAID0_SIMPLE */
1914 case DDF_RAID1:
1915 return 0; /* hopefully rlq == SIMPLE or MULTI depending
1916 on raiddisks*/
1917 case DDF_RAID4:
1918 switch(rlq) {
1919 case DDF_RAID4_N:
1920 return 0;
1921 default:
1922 /* not supported */
1923 return -1; /* FIXME this isn't checked */
1924 }
1925 case DDF_RAID5:
598f0d58
NB
1926 switch(rlq) {
1927 case DDF_RAID5_N_RESTART:
1928 return ALGORITHM_LEFT_ASYMMETRIC;
1929 case DDF_RAID5_0_RESTART:
1930 return ALGORITHM_RIGHT_ASYMMETRIC;
1931 case DDF_RAID5_N_CONTINUE:
1932 return ALGORITHM_LEFT_SYMMETRIC;
1933 default:
1934 return -1;
1935 }
59e36268
NB
1936 case DDF_RAID6:
1937 switch(rlq) {
1938 case DDF_RAID5_N_RESTART:
b640a252 1939 return ALGORITHM_ROTATING_N_RESTART;
59e36268 1940 case DDF_RAID6_0_RESTART:
b640a252 1941 return ALGORITHM_ROTATING_ZERO_RESTART;
59e36268 1942 case DDF_RAID5_N_CONTINUE:
b640a252 1943 return ALGORITHM_ROTATING_N_CONTINUE;
59e36268
NB
1944 default:
1945 return -1;
1946 }
598f0d58
NB
1947 }
1948 return -1;
1949}
1950
0e600426 1951#ifndef MDASSEMBLE
59e36268
NB
1952struct extent {
1953 unsigned long long start, size;
1954};
78e44928 1955static int cmp_extent(const void *av, const void *bv)
59e36268
NB
1956{
1957 const struct extent *a = av;
1958 const struct extent *b = bv;
1959 if (a->start < b->start)
1960 return -1;
1961 if (a->start > b->start)
1962 return 1;
1963 return 0;
1964}
1965
78e44928 1966static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
1967{
1968 /* find a list of used extents on the give physical device
1969 * (dnum) of the given ddf.
1970 * Return a malloced array of 'struct extent'
1971
613b0d17 1972 * FIXME ignore DDF_Legacy devices?
59e36268
NB
1973
1974 */
1975 struct extent *rv;
1976 int n = 0;
f21e18ca 1977 unsigned int i, j;
59e36268 1978
503975b9 1979 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
1980
1981 for (i = 0; i < ddf->max_part; i++) {
1982 struct vcl *v = dl->vlist[i];
1983 if (v == NULL)
1984 continue;
f21e18ca 1985 for (j = 0; j < v->conf.prim_elmnt_count; j++)
59e36268
NB
1986 if (v->conf.phys_refnum[j] == dl->disk.refnum) {
1987 /* This device plays role 'j' in 'v'. */
1988 rv[n].start = __be64_to_cpu(v->lba_offset[j]);
1989 rv[n].size = __be64_to_cpu(v->conf.blocks);
1990 n++;
1991 break;
1992 }
1993 }
1994 qsort(rv, n, sizeof(*rv), cmp_extent);
1995
1996 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
1997 rv[n].size = 0;
1998 return rv;
1999}
0e600426 2000#endif
59e36268 2001
5f8097be
NB
2002static int init_super_ddf_bvd(struct supertype *st,
2003 mdu_array_info_t *info,
2004 unsigned long long size,
2005 char *name, char *homehost,
83cd1e97 2006 int *uuid, unsigned long long data_offset)
5f8097be
NB
2007{
2008 /* We are creating a BVD inside a pre-existing container.
2009 * so st->sb is already set.
2010 * We need to create a new vd_config and a new virtual_entry
2011 */
2012 struct ddf_super *ddf = st->sb;
f21e18ca 2013 unsigned int venum;
5f8097be
NB
2014 struct virtual_entry *ve;
2015 struct vcl *vcl;
2016 struct vd_config *vc;
5f8097be
NB
2017
2018 if (__be16_to_cpu(ddf->virt->populated_vdes)
2019 >= __be16_to_cpu(ddf->virt->max_vdes)) {
e7b84f9d
N
2020 pr_err("This ddf already has the "
2021 "maximum of %d virtual devices\n",
2022 __be16_to_cpu(ddf->virt->max_vdes));
5f8097be
NB
2023 return 0;
2024 }
2025
97c9c100
N
2026 if (name)
2027 for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
2028 if (!all_ff(ddf->virt->entries[venum].guid)) {
2029 char *n = ddf->virt->entries[venum].name;
2030
2031 if (strncmp(name, n, 16) == 0) {
e7b84f9d
N
2032 pr_err("This ddf already"
2033 " has an array called %s\n",
2034 name);
97c9c100
N
2035 return 0;
2036 }
2037 }
2038
5f8097be
NB
2039 for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
2040 if (all_ff(ddf->virt->entries[venum].guid))
2041 break;
2042 if (venum == __be16_to_cpu(ddf->virt->max_vdes)) {
e7b84f9d 2043 pr_err("Cannot find spare slot for "
613b0d17 2044 "virtual disk - DDF is corrupt\n");
5f8097be
NB
2045 return 0;
2046 }
2047 ve = &ddf->virt->entries[venum];
2048
2049 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2050 * timestamp, random number
2051 */
2052 make_header_guid(ve->guid);
2053 ve->unit = __cpu_to_be16(info->md_minor);
2054 ve->pad0 = 0xFFFF;
2055 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2056 ve->type = 0;
7a7cc504
NB
2057 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2058 if (info->state & 1) /* clean */
2059 ve->init_state = DDF_init_full;
2060 else
2061 ve->init_state = DDF_init_not;
2062
5f8097be
NB
2063 memset(ve->pad1, 0xff, 14);
2064 memset(ve->name, ' ', 16);
2065 if (name)
2066 strncpy(ve->name, name, 16);
2067 ddf->virt->populated_vdes =
2068 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2069
2070 /* Now create a new vd_config */
3d2c4fc7
DW
2071 if (posix_memalign((void**)&vcl, 512,
2072 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2073 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2074 return 0;
2075 }
8c3b8c2c 2076 vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
59e36268
NB
2077 vcl->vcnum = venum;
2078 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 2079 vcl->other_bvds = NULL;
5f8097be
NB
2080
2081 vc = &vcl->conf;
2082
2083 vc->magic = DDF_VD_CONF_MAGIC;
2084 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2085 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2086 vc->seqnum = __cpu_to_be32(1);
2087 memset(vc->pad0, 0xff, 24);
2088 vc->prim_elmnt_count = __cpu_to_be16(info->raid_disks);
2089 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2090 vc->prl = level_to_prl(info->level);
2091 vc->rlq = layout_to_rlq(info->level, info->layout, info->raid_disks);
2092 vc->sec_elmnt_count = 1;
2093 vc->sec_elmnt_seq = 0;
2094 vc->srl = 0;
2095 vc->blocks = __cpu_to_be64(info->size * 2);
2096 vc->array_blocks = __cpu_to_be64(
2097 calc_array_size(info->level, info->raid_disks, info->layout,
2098 info->chunk_size, info->size*2));
2099 memset(vc->pad1, 0xff, 8);
2100 vc->spare_refs[0] = 0xffffffff;
2101 vc->spare_refs[1] = 0xffffffff;
2102 vc->spare_refs[2] = 0xffffffff;
2103 vc->spare_refs[3] = 0xffffffff;
2104 vc->spare_refs[4] = 0xffffffff;
2105 vc->spare_refs[5] = 0xffffffff;
2106 vc->spare_refs[6] = 0xffffffff;
2107 vc->spare_refs[7] = 0xffffffff;
2108 memset(vc->cache_pol, 0, 8);
2109 vc->bg_rate = 0x80;
2110 memset(vc->pad2, 0xff, 3);
2111 memset(vc->pad3, 0xff, 52);
2112 memset(vc->pad4, 0xff, 192);
2113 memset(vc->v0, 0xff, 32);
2114 memset(vc->v1, 0xff, 32);
2115 memset(vc->v2, 0xff, 16);
2116 memset(vc->v3, 0xff, 16);
2117 memset(vc->vendor, 0xff, 32);
598f0d58 2118
8c3b8c2c 2119 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2120 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be
NB
2121
2122 vcl->next = ddf->conflist;
2123 ddf->conflist = vcl;
d2ca6449 2124 ddf->currentconf = vcl;
18a2f463 2125 ddf->updates_pending = 1;
5f8097be
NB
2126 return 1;
2127}
2128
0e600426 2129#ifndef MDASSEMBLE
5f8097be
NB
2130static void add_to_super_ddf_bvd(struct supertype *st,
2131 mdu_disk_info_t *dk, int fd, char *devname)
2132{
2133 /* fd and devname identify a device with-in the ddf container (st).
2134 * dk identifies a location in the new BVD.
2135 * We need to find suitable free space in that device and update
2136 * the phys_refnum and lba_offset for the newly created vd_config.
2137 * We might also want to update the type in the phys_disk
5575e7d9 2138 * section.
8592f29d
N
2139 *
2140 * Alternately: fd == -1 and we have already chosen which device to
2141 * use and recorded in dlist->raid_disk;
5f8097be
NB
2142 */
2143 struct dl *dl;
2144 struct ddf_super *ddf = st->sb;
2145 struct vd_config *vc;
2146 __u64 *lba_offset;
f21e18ca
N
2147 unsigned int working;
2148 unsigned int i;
59e36268
NB
2149 unsigned long long blocks, pos, esize;
2150 struct extent *ex;
5f8097be 2151
8592f29d
N
2152 if (fd == -1) {
2153 for (dl = ddf->dlist; dl ; dl = dl->next)
2154 if (dl->raiddisk == dk->raid_disk)
2155 break;
2156 } else {
2157 for (dl = ddf->dlist; dl ; dl = dl->next)
2158 if (dl->major == dk->major &&
2159 dl->minor == dk->minor)
2160 break;
2161 }
5f8097be
NB
2162 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2163 return;
2164
d2ca6449
NB
2165 vc = &ddf->currentconf->conf;
2166 lba_offset = ddf->currentconf->lba_offset;
59e36268
NB
2167
2168 ex = get_extents(ddf, dl);
2169 if (!ex)
2170 return;
2171
2172 i = 0; pos = 0;
2173 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2174 if (ddf->currentconf->block_sizes)
2175 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2176
2177 do {
2178 esize = ex[i].start - pos;
2179 if (esize >= blocks)
2180 break;
2181 pos = ex[i].start + ex[i].size;
2182 i++;
2183 } while (ex[i-1].size);
2184
2185 free(ex);
2186 if (esize < blocks)
2187 return;
2188
d2ca6449 2189 ddf->currentdev = dk->raid_disk;
5f8097be 2190 vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
59e36268 2191 lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
5f8097be 2192
f21e18ca 2193 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2194 if (dl->vlist[i] == NULL)
2195 break;
2196 if (i == ddf->max_part)
2197 return;
d2ca6449 2198 dl->vlist[i] = ddf->currentconf;
5f8097be 2199
8592f29d
N
2200 if (fd >= 0)
2201 dl->fd = fd;
2202 if (devname)
2203 dl->devname = devname;
7a7cc504
NB
2204
2205 /* Check how many working raid_disks, and if we can mark
2206 * array as optimal yet
2207 */
2208 working = 0;
5575e7d9 2209
f21e18ca 2210 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
7a7cc504
NB
2211 if (vc->phys_refnum[i] != 0xffffffff)
2212 working++;
59e36268 2213
5575e7d9 2214 /* Find which virtual_entry */
d2ca6449 2215 i = ddf->currentconf->vcnum;
7a7cc504 2216 if (working == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2217 ddf->virt->entries[i].state =
2218 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504
NB
2219 | DDF_state_optimal;
2220
2221 if (vc->prl == DDF_RAID6 &&
2222 working+1 == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2223 ddf->virt->entries[i].state =
2224 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504 2225 | DDF_state_part_optimal;
5575e7d9
NB
2226
2227 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2228 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
18a2f463 2229 ddf->updates_pending = 1;
5f8097be
NB
2230}
2231
a322f70c
DW
2232/* add a device to a container, either while creating it or while
2233 * expanding a pre-existing container
2234 */
f20c3968 2235static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2236 mdu_disk_info_t *dk, int fd, char *devname,
2237 unsigned long long data_offset)
a322f70c
DW
2238{
2239 struct ddf_super *ddf = st->sb;
2240 struct dl *dd;
2241 time_t now;
2242 struct tm *tm;
2243 unsigned long long size;
2244 struct phys_disk_entry *pde;
f21e18ca 2245 unsigned int n, i;
a322f70c 2246 struct stat stb;
90fa1a29 2247 __u32 *tptr;
a322f70c 2248
78e44928
NB
2249 if (ddf->currentconf) {
2250 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2251 return 0;
78e44928
NB
2252 }
2253
a322f70c
DW
2254 /* This is device numbered dk->number. We need to create
2255 * a phys_disk entry and a more detailed disk_data entry.
2256 */
2257 fstat(fd, &stb);
3d2c4fc7
DW
2258 if (posix_memalign((void**)&dd, 512,
2259 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2260 pr_err("%s could allocate buffer for new disk, aborting\n",
2261 __func__);
f20c3968 2262 return 1;
3d2c4fc7 2263 }
a322f70c
DW
2264 dd->major = major(stb.st_rdev);
2265 dd->minor = minor(stb.st_rdev);
2266 dd->devname = devname;
a322f70c 2267 dd->fd = fd;
b2280677 2268 dd->spare = NULL;
a322f70c
DW
2269
2270 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2271 now = time(0);
2272 tm = localtime(&now);
2273 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2274 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2275 tptr = (__u32 *)(dd->disk.guid + 16);
2276 *tptr++ = random32();
2277 *tptr = random32();
a322f70c 2278
59e36268
NB
2279 do {
2280 /* Cannot be bothered finding a CRC of some irrelevant details*/
bfb7ea78 2281 dd->disk.refnum = random32();
f21e18ca
N
2282 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2283 i > 0; i--)
2284 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
59e36268 2285 break;
f21e18ca 2286 } while (i > 0);
59e36268 2287
a322f70c
DW
2288 dd->disk.forced_ref = 1;
2289 dd->disk.forced_guid = 1;
2290 memset(dd->disk.vendor, ' ', 32);
2291 memcpy(dd->disk.vendor, "Linux", 5);
2292 memset(dd->disk.pad, 0xff, 442);
b2280677 2293 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2294 dd->vlist[i] = NULL;
2295
2296 n = __be16_to_cpu(ddf->phys->used_pdes);
2297 pde = &ddf->phys->entries[n];
5575e7d9
NB
2298 dd->pdnum = n;
2299
2cc2983d
N
2300 if (st->update_tail) {
2301 int len = (sizeof(struct phys_disk) +
2302 sizeof(struct phys_disk_entry));
2303 struct phys_disk *pd;
2304
503975b9 2305 pd = xmalloc(len);
2cc2983d
N
2306 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2307 pd->used_pdes = __cpu_to_be16(n);
2308 pde = &pd->entries[0];
2309 dd->mdupdate = pd;
2310 } else {
2311 n++;
2312 ddf->phys->used_pdes = __cpu_to_be16(n);
2313 }
a322f70c
DW
2314
2315 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2316 pde->refnum = dd->disk.refnum;
5575e7d9 2317 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c
DW
2318 pde->state = __cpu_to_be16(DDF_Online);
2319 get_dev_size(fd, NULL, &size);
2320 /* We are required to reserve 32Meg, and record the size in sectors */
2321 pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
2322 sprintf(pde->path, "%17.17s","Information: nil") ;
2323 memset(pde->pad, 0xff, 6);
2324
d2ca6449 2325 dd->size = size >> 9;
2cc2983d
N
2326 if (st->update_tail) {
2327 dd->next = ddf->add_list;
2328 ddf->add_list = dd;
2329 } else {
2330 dd->next = ddf->dlist;
2331 ddf->dlist = dd;
2332 ddf->updates_pending = 1;
2333 }
f20c3968
DW
2334
2335 return 0;
a322f70c
DW
2336}
2337
4dd968cc
N
2338static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2339{
2340 struct ddf_super *ddf = st->sb;
2341 struct dl *dl;
2342
2343 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2344 * disappeared from the container.
2345 * We need to arrange that it disappears from the metadata and
2346 * internal data structures too.
2347 * Most of the work is done by ddf_process_update which edits
2348 * the metadata and closes the file handle and attaches the memory
2349 * where free_updates will free it.
2350 */
2351 for (dl = ddf->dlist; dl ; dl = dl->next)
2352 if (dl->major == dk->major &&
2353 dl->minor == dk->minor)
2354 break;
2355 if (!dl)
2356 return -1;
2357
2358 if (st->update_tail) {
2359 int len = (sizeof(struct phys_disk) +
2360 sizeof(struct phys_disk_entry));
2361 struct phys_disk *pd;
2362
503975b9 2363 pd = xmalloc(len);
4dd968cc
N
2364 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2365 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2366 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2367 append_metadata_update(st, pd, len);
2368 }
2369 return 0;
2370}
2371
a322f70c
DW
2372/*
2373 * This is the write_init_super method for a ddf container. It is
2374 * called when creating a container or adding another device to a
2375 * container.
2376 */
42d5dfd9 2377#define NULL_CONF_SZ 4096
18a2f463 2378
e3c2a365 2379static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
2380 __u32 refnum, unsigned int nmax,
2381 const struct vd_config **bvd,
2382 unsigned int *idx);
2383
7f798aca 2384static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2385 char *null_aligned)
a322f70c 2386{
7f798aca 2387 unsigned long long sector;
2388 struct ddf_header *header;
2389 int fd, i, n_config, conf_size;
2390
2391 fd = d->fd;
2392
2393 switch (type) {
2394 case DDF_HEADER_PRIMARY:
2395 header = &ddf->primary;
2396 sector = __be64_to_cpu(header->primary_lba);
2397 break;
2398 case DDF_HEADER_SECONDARY:
2399 header = &ddf->secondary;
2400 sector = __be64_to_cpu(header->secondary_lba);
2401 break;
2402 default:
2403 return 0;
2404 }
2405
2406 header->type = type;
2407 header->openflag = 0;
2408 header->crc = calc_crc(header, 512);
2409
2410 lseek64(fd, sector<<9, 0);
2411 if (write(fd, header, 512) < 0)
2412 return 0;
2413
2414 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2415 if (write(fd, &ddf->controller, 512) < 0)
2416 return 0;
a322f70c 2417
7f798aca 2418 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2419 if (write(fd, ddf->phys, ddf->pdsize) < 0)
2420 return 0;
2421 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2422 if (write(fd, ddf->virt, ddf->vdsize) < 0)
2423 return 0;
2424
2425 /* Now write lots of config records. */
2426 n_config = ddf->max_part;
2427 conf_size = ddf->conf_rec_len * 512;
2428 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2429 struct vcl *c;
2430 struct vd_config *vdc = NULL;
2431 if (i == n_config) {
7f798aca 2432 c = (struct vcl *)d->spare;
e3c2a365 2433 if (c)
2434 vdc = &c->conf;
2435 } else {
2436 unsigned int dummy;
2437 c = d->vlist[i];
2438 if (c)
2439 get_pd_index_from_refnum(
2440 c, d->disk.refnum,
2441 ddf->mppe,
2442 (const struct vd_config **)&vdc,
2443 &dummy);
2444 }
7f798aca 2445 if (c) {
e3c2a365 2446 vdc->seqnum = ddf->primary.seq;
2447 vdc->crc = calc_crc(vdc, conf_size);
2448 if (write(fd, vdc, conf_size) < 0)
7f798aca 2449 break;
2450 } else {
2451 unsigned int togo = conf_size;
2452 while (togo > NULL_CONF_SZ) {
2453 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2454 break;
2455 togo -= NULL_CONF_SZ;
2456 }
2457 if (write(fd, null_aligned, togo) < 0)
2458 break;
2459 }
2460 }
2461 if (i <= n_config)
2462 return 0;
2463
2464 d->disk.crc = calc_crc(&d->disk, 512);
2465 if (write(fd, &d->disk, 512) < 0)
2466 return 0;
2467
2468 return 1;
2469}
2470
2471static int __write_init_super_ddf(struct supertype *st)
2472{
a322f70c 2473 struct ddf_super *ddf = st->sb;
a322f70c 2474 struct dl *d;
175593bf
DW
2475 int attempts = 0;
2476 int successes = 0;
7f798aca 2477 unsigned long long size;
42d5dfd9 2478 char *null_aligned;
0175cbf6 2479 __u32 seq;
42d5dfd9
JS
2480
2481 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2482 return -ENOMEM;
2483 }
2484 memset(null_aligned, 0xff, NULL_CONF_SZ);
a322f70c 2485
0175cbf6 2486 if (ddf->primary.seq != 0xffffffff)
2487 seq = __cpu_to_be32(__be32_to_cpu(ddf->primary.seq)+1);
2488 else if (ddf->secondary.seq != 0xffffffff)
2489 seq = __cpu_to_be32(__be32_to_cpu(ddf->secondary.seq)+1);
2490 else
2491 seq = __cpu_to_be32(1);
2492
175593bf
DW
2493 /* try to write updated metadata,
2494 * if we catch a failure move on to the next disk
2495 */
a322f70c
DW
2496 for (d = ddf->dlist; d; d=d->next) {
2497 int fd = d->fd;
2498
2499 if (fd < 0)
2500 continue;
2501
175593bf 2502 attempts++;
a322f70c
DW
2503 /* We need to fill in the primary, (secondary) and workspace
2504 * lba's in the headers, set their checksums,
2505 * Also checksum phys, virt....
2506 *
2507 * Then write everything out, finally the anchor is written.
2508 */
2509 get_dev_size(fd, NULL, &size);
2510 size /= 512;
097bcf00 2511 if (d->workspace_lba != 0)
2512 ddf->anchor.workspace_lba = d->workspace_lba;
2513 else
2514 ddf->anchor.workspace_lba =
2515 __cpu_to_be64(size - 32*1024*2);
2516 if (d->primary_lba != 0)
2517 ddf->anchor.primary_lba = d->primary_lba;
2518 else
2519 ddf->anchor.primary_lba =
2520 __cpu_to_be64(size - 16*1024*2);
2521 if (d->secondary_lba != 0)
2522 ddf->anchor.secondary_lba = d->secondary_lba;
2523 else
2524 ddf->anchor.secondary_lba =
2525 __cpu_to_be64(size - 32*1024*2);
0175cbf6 2526 ddf->anchor.seq = seq;
a322f70c
DW
2527 memcpy(&ddf->primary, &ddf->anchor, 512);
2528 memcpy(&ddf->secondary, &ddf->anchor, 512);
2529
2530 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2531 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2532 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2533
7f798aca 2534 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2535 null_aligned))
175593bf 2536 continue;
a322f70c 2537
7f798aca 2538 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2539 null_aligned))
175593bf 2540 continue;
a322f70c 2541
a322f70c 2542 lseek64(fd, (size-1)*512, SEEK_SET);
175593bf
DW
2543 if (write(fd, &ddf->anchor, 512) < 0)
2544 continue;
2545 successes++;
2546 }
42d5dfd9 2547 free(null_aligned);
175593bf 2548
175593bf 2549 return attempts != successes;
a322f70c 2550}
7a7cc504
NB
2551
2552static int write_init_super_ddf(struct supertype *st)
2553{
9b1fb677
DW
2554 struct ddf_super *ddf = st->sb;
2555 struct vcl *currentconf = ddf->currentconf;
2556
2557 /* we are done with currentconf reset it to point st at the container */
2558 ddf->currentconf = NULL;
edd8d13c
NB
2559
2560 if (st->update_tail) {
2561 /* queue the virtual_disk and vd_config as metadata updates */
2562 struct virtual_disk *vd;
2563 struct vd_config *vc;
edd8d13c
NB
2564 int len;
2565
9b1fb677 2566 if (!currentconf) {
2cc2983d
N
2567 int len = (sizeof(struct phys_disk) +
2568 sizeof(struct phys_disk_entry));
2569
2570 /* adding a disk to the container. */
2571 if (!ddf->add_list)
2572 return 0;
2573
2574 append_metadata_update(st, ddf->add_list->mdupdate, len);
2575 ddf->add_list->mdupdate = NULL;
2576 return 0;
2577 }
2578
2579 /* Newly created VD */
2580
edd8d13c
NB
2581 /* First the virtual disk. We have a slightly fake header */
2582 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 2583 vd = xmalloc(len);
edd8d13c 2584 *vd = *ddf->virt;
9b1fb677
DW
2585 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2586 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
2587 append_metadata_update(st, vd, len);
2588
2589 /* Then the vd_config */
2590 len = ddf->conf_rec_len * 512;
503975b9 2591 vc = xmalloc(len);
9b1fb677 2592 memcpy(vc, &currentconf->conf, len);
edd8d13c
NB
2593 append_metadata_update(st, vc, len);
2594
2595 /* FIXME I need to close the fds! */
2596 return 0;
613b0d17 2597 } else {
d682f344
N
2598 struct dl *d;
2599 for (d = ddf->dlist; d; d=d->next)
ba728be7 2600 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 2601 return __write_init_super_ddf(st);
d682f344 2602 }
7a7cc504
NB
2603}
2604
a322f70c
DW
2605#endif
2606
387fcd59
N
2607static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2608 unsigned long long data_offset)
a322f70c
DW
2609{
2610 /* We must reserve the last 32Meg */
2611 if (devsize <= 32*1024*2)
2612 return 0;
2613 return devsize - 32*1024*2;
2614}
2615
2616#ifndef MDASSEMBLE
8592f29d
N
2617
2618static int reserve_space(struct supertype *st, int raiddisks,
2619 unsigned long long size, int chunk,
2620 unsigned long long *freesize)
2621{
2622 /* Find 'raiddisks' spare extents at least 'size' big (but
2623 * only caring about multiples of 'chunk') and remember
2624 * them.
2625 * If the cannot be found, fail.
2626 */
2627 struct dl *dl;
2628 struct ddf_super *ddf = st->sb;
2629 int cnt = 0;
2630
2631 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 2632 dl->raiddisk = -1;
8592f29d
N
2633 dl->esize = 0;
2634 }
2635 /* Now find largest extent on each device */
2636 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2637 struct extent *e = get_extents(ddf, dl);
2638 unsigned long long pos = 0;
2639 int i = 0;
2640 int found = 0;
2641 unsigned long long minsize = size;
2642
2643 if (size == 0)
2644 minsize = chunk;
2645
2646 if (!e)
2647 continue;
2648 do {
2649 unsigned long long esize;
2650 esize = e[i].start - pos;
2651 if (esize >= minsize) {
2652 found = 1;
2653 minsize = esize;
2654 }
2655 pos = e[i].start + e[i].size;
2656 i++;
2657 } while (e[i-1].size);
2658 if (found) {
2659 cnt++;
2660 dl->esize = minsize;
2661 }
2662 free(e);
2663 }
2664 if (cnt < raiddisks) {
e7b84f9d 2665 pr_err("not enough devices with space to create array.\n");
8592f29d
N
2666 return 0; /* No enough free spaces large enough */
2667 }
2668 if (size == 0) {
2669 /* choose the largest size of which there are at least 'raiddisk' */
2670 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2671 struct dl *dl2;
2672 if (dl->esize <= size)
2673 continue;
2674 /* This is bigger than 'size', see if there are enough */
2675 cnt = 0;
7b80ad6a 2676 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
2677 if (dl2->esize >= dl->esize)
2678 cnt++;
2679 if (cnt >= raiddisks)
2680 size = dl->esize;
2681 }
2682 if (chunk) {
2683 size = size / chunk;
2684 size *= chunk;
2685 }
2686 *freesize = size;
2687 if (size < 32) {
e7b84f9d 2688 pr_err("not enough spare devices to create array.\n");
8592f29d
N
2689 return 0;
2690 }
2691 }
2692 /* We have a 'size' of which there are enough spaces.
2693 * We simply do a first-fit */
2694 cnt = 0;
2695 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
2696 if (dl->esize < size)
2697 continue;
613b0d17 2698
8592f29d
N
2699 dl->raiddisk = cnt;
2700 cnt++;
2701 }
2702 return 1;
2703}
2704
2c514b71
NB
2705static int
2706validate_geometry_ddf_container(struct supertype *st,
2707 int level, int layout, int raiddisks,
2708 int chunk, unsigned long long size,
af4348dd 2709 unsigned long long data_offset,
2c514b71
NB
2710 char *dev, unsigned long long *freesize,
2711 int verbose);
78e44928
NB
2712
2713static int validate_geometry_ddf_bvd(struct supertype *st,
2714 int level, int layout, int raiddisks,
c21e737b 2715 int *chunk, unsigned long long size,
af4348dd 2716 unsigned long long data_offset,
2c514b71
NB
2717 char *dev, unsigned long long *freesize,
2718 int verbose);
78e44928
NB
2719
2720static int validate_geometry_ddf(struct supertype *st,
2c514b71 2721 int level, int layout, int raiddisks,
c21e737b 2722 int *chunk, unsigned long long size,
af4348dd 2723 unsigned long long data_offset,
2c514b71
NB
2724 char *dev, unsigned long long *freesize,
2725 int verbose)
a322f70c
DW
2726{
2727 int fd;
2728 struct mdinfo *sra;
2729 int cfd;
2730
2731 /* ddf potentially supports lots of things, but it depends on
2732 * what devices are offered (and maybe kernel version?)
2733 * If given unused devices, we will make a container.
2734 * If given devices in a container, we will make a BVD.
2735 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
2736 */
2737
bb7295f1
N
2738 if (chunk && *chunk == UnSet)
2739 *chunk = DEFAULT_CHUNK;
2740
542ef4ec 2741 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 2742 if (level == LEVEL_CONTAINER) {
78e44928
NB
2743 /* Must be a fresh device to add to a container */
2744 return validate_geometry_ddf_container(st, level, layout,
c21e737b 2745 raiddisks, chunk?*chunk:0,
af4348dd
N
2746 size, data_offset, dev,
2747 freesize,
2c514b71 2748 verbose);
5f8097be
NB
2749 }
2750
78e44928
NB
2751 if (!dev) {
2752 /* Initial sanity check. Exclude illegal levels. */
2753 int i;
2754 for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
2755 if (ddf_level_num[i].num2 == level)
2756 break;
b42f577a
N
2757 if (ddf_level_num[i].num1 == MAXINT) {
2758 if (verbose)
e7b84f9d 2759 pr_err("DDF does not support level %d arrays\n",
613b0d17 2760 level);
78e44928 2761 return 0;
b42f577a 2762 }
78e44928 2763 /* Should check layout? etc */
8592f29d
N
2764
2765 if (st->sb && freesize) {
2766 /* --create was given a container to create in.
2767 * So we need to check that there are enough
2768 * free spaces and return the amount of space.
2769 * We may as well remember which drives were
2770 * chosen so that add_to_super/getinfo_super
2771 * can return them.
2772 */
c21e737b 2773 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 2774 }
a322f70c 2775 return 1;
78e44928 2776 }
a322f70c 2777
8592f29d
N
2778 if (st->sb) {
2779 /* A container has already been opened, so we are
2780 * creating in there. Maybe a BVD, maybe an SVD.
2781 * Should make a distinction one day.
2782 */
2783 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
2784 chunk, size, data_offset, dev,
2785 freesize,
8592f29d
N
2786 verbose);
2787 }
78e44928
NB
2788 /* This is the first device for the array.
2789 * If it is a container, we read it in and do automagic allocations,
2790 * no other devices should be given.
2791 * Otherwise it must be a member device of a container, and we
2792 * do manual allocation.
2793 * Later we should check for a BVD and make an SVD.
a322f70c 2794 */
a322f70c
DW
2795 fd = open(dev, O_RDONLY|O_EXCL, 0);
2796 if (fd >= 0) {
4dd2df09 2797 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
2798 close(fd);
2799 if (sra && sra->array.major_version == -1 &&
78e44928
NB
2800 strcmp(sra->text_version, "ddf") == 0) {
2801
2802 /* load super */
2803 /* find space for 'n' devices. */
2804 /* remember the devices */
2805 /* Somehow return the fact that we have enough */
a322f70c
DW
2806 }
2807
2c514b71 2808 if (verbose)
e7b84f9d
N
2809 pr_err("ddf: Cannot create this array "
2810 "on device %s - a container is required.\n",
2811 dev);
a322f70c
DW
2812 return 0;
2813 }
2814 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 2815 if (verbose)
e7b84f9d 2816 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 2817 dev, strerror(errno));
a322f70c
DW
2818 return 0;
2819 }
2820 /* Well, it is in use by someone, maybe a 'ddf' container. */
2821 cfd = open_container(fd);
2822 if (cfd < 0) {
2823 close(fd);
2c514b71 2824 if (verbose)
e7b84f9d 2825 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 2826 dev, strerror(EBUSY));
a322f70c
DW
2827 return 0;
2828 }
4dd2df09 2829 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
2830 close(fd);
2831 if (sra && sra->array.major_version == -1 &&
2832 strcmp(sra->text_version, "ddf") == 0) {
2833 /* This is a member of a ddf container. Load the container
2834 * and try to create a bvd
2835 */
2836 struct ddf_super *ddf;
e1902a7b 2837 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 2838 st->sb = ddf;
4dd2df09 2839 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 2840 close(cfd);
78e44928 2841 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 2842 raiddisks, chunk, size,
af4348dd 2843 data_offset,
2c514b71
NB
2844 dev, freesize,
2845 verbose);
a322f70c
DW
2846 }
2847 close(cfd);
c42ec1ed
DW
2848 } else /* device may belong to a different container */
2849 return 0;
2850
a322f70c
DW
2851 return 1;
2852}
2853
2c514b71
NB
2854static int
2855validate_geometry_ddf_container(struct supertype *st,
2856 int level, int layout, int raiddisks,
2857 int chunk, unsigned long long size,
af4348dd 2858 unsigned long long data_offset,
2c514b71
NB
2859 char *dev, unsigned long long *freesize,
2860 int verbose)
a322f70c
DW
2861{
2862 int fd;
2863 unsigned long long ldsize;
2864
2865 if (level != LEVEL_CONTAINER)
2866 return 0;
2867 if (!dev)
2868 return 1;
2869
2870 fd = open(dev, O_RDONLY|O_EXCL, 0);
2871 if (fd < 0) {
2c514b71 2872 if (verbose)
e7b84f9d 2873 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 2874 dev, strerror(errno));
a322f70c
DW
2875 return 0;
2876 }
2877 if (!get_dev_size(fd, dev, &ldsize)) {
2878 close(fd);
2879 return 0;
2880 }
2881 close(fd);
2882
387fcd59 2883 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
2884 if (*freesize == 0)
2885 return 0;
a322f70c
DW
2886
2887 return 1;
2888}
2889
78e44928
NB
2890static int validate_geometry_ddf_bvd(struct supertype *st,
2891 int level, int layout, int raiddisks,
c21e737b 2892 int *chunk, unsigned long long size,
af4348dd 2893 unsigned long long data_offset,
2c514b71
NB
2894 char *dev, unsigned long long *freesize,
2895 int verbose)
a322f70c
DW
2896{
2897 struct stat stb;
2898 struct ddf_super *ddf = st->sb;
2899 struct dl *dl;
5f8097be
NB
2900 unsigned long long pos = 0;
2901 unsigned long long maxsize;
2902 struct extent *e;
2903 int i;
a322f70c 2904 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
2905 if (level == LEVEL_CONTAINER) {
2906 if (verbose)
e7b84f9d 2907 pr_err("DDF cannot create a container within an container\n");
a322f70c 2908 return 0;
b42f577a 2909 }
a322f70c
DW
2910 /* We must have the container info already read in. */
2911 if (!ddf)
2912 return 0;
2913
5f8097be
NB
2914 if (!dev) {
2915 /* General test: make sure there is space for
2916 * 'raiddisks' device extents of size 'size'.
2917 */
2918 unsigned long long minsize = size;
2919 int dcnt = 0;
2920 if (minsize == 0)
2921 minsize = 8;
2922 for (dl = ddf->dlist; dl ; dl = dl->next)
2923 {
2924 int found = 0;
7e1432fb 2925 pos = 0;
5f8097be
NB
2926
2927 i = 0;
2928 e = get_extents(ddf, dl);
2929 if (!e) continue;
2930 do {
2931 unsigned long long esize;
2932 esize = e[i].start - pos;
2933 if (esize >= minsize)
2934 found = 1;
2935 pos = e[i].start + e[i].size;
2936 i++;
2937 } while (e[i-1].size);
2938 if (found)
2939 dcnt++;
2940 free(e);
2941 }
2942 if (dcnt < raiddisks) {
2c514b71 2943 if (verbose)
e7b84f9d
N
2944 pr_err("ddf: Not enough devices with "
2945 "space for this array (%d < %d)\n",
2946 dcnt, raiddisks);
5f8097be
NB
2947 return 0;
2948 }
2949 return 1;
2950 }
a322f70c
DW
2951 /* This device must be a member of the set */
2952 if (stat(dev, &stb) < 0)
2953 return 0;
2954 if ((S_IFMT & stb.st_mode) != S_IFBLK)
2955 return 0;
2956 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
2957 if (dl->major == (int)major(stb.st_rdev) &&
2958 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
2959 break;
2960 }
5f8097be 2961 if (!dl) {
2c514b71 2962 if (verbose)
e7b84f9d 2963 pr_err("ddf: %s is not in the "
613b0d17
N
2964 "same DDF set\n",
2965 dev);
5f8097be
NB
2966 return 0;
2967 }
2968 e = get_extents(ddf, dl);
2969 maxsize = 0;
2970 i = 0;
2971 if (e) do {
613b0d17
N
2972 unsigned long long esize;
2973 esize = e[i].start - pos;
2974 if (esize >= maxsize)
2975 maxsize = esize;
2976 pos = e[i].start + e[i].size;
2977 i++;
2978 } while (e[i-1].size);
5f8097be 2979 *freesize = maxsize;
a322f70c
DW
2980 // FIXME here I am
2981
2982 return 1;
2983}
59e36268 2984
a322f70c 2985static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 2986 void **sbp, char *devname)
a322f70c
DW
2987{
2988 struct mdinfo *sra;
2989 struct ddf_super *super;
2990 struct mdinfo *sd, *best = NULL;
2991 int bestseq = 0;
2992 int seq;
2993 char nm[20];
2994 int dfd;
2995
b526e52d 2996 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
2997 if (!sra)
2998 return 1;
2999 if (sra->array.major_version != -1 ||
3000 sra->array.minor_version != -2 ||
3001 strcmp(sra->text_version, "ddf") != 0)
3002 return 1;
3003
6416d527 3004 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3005 return 1;
a2349791 3006 memset(super, 0, sizeof(*super));
a322f70c
DW
3007
3008 /* first, try each device, and choose the best ddf */
3009 for (sd = sra->devs ; sd ; sd = sd->next) {
3010 int rv;
3011 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3012 dfd = dev_open(nm, O_RDONLY);
3013 if (dfd < 0)
a322f70c
DW
3014 return 2;
3015 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3016 close(dfd);
a322f70c
DW
3017 if (rv == 0) {
3018 seq = __be32_to_cpu(super->active->seq);
3019 if (super->active->openflag)
3020 seq--;
3021 if (!best || seq > bestseq) {
3022 bestseq = seq;
3023 best = sd;
3024 }
3025 }
3026 }
3027 if (!best)
3028 return 1;
3029 /* OK, load this ddf */
3030 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3031 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3032 if (dfd < 0)
a322f70c
DW
3033 return 1;
3034 load_ddf_headers(dfd, super, NULL);
3035 load_ddf_global(dfd, super, NULL);
3036 close(dfd);
3037 /* Now we need the device-local bits */
3038 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3039 int rv;
3040
a322f70c 3041 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3042 dfd = dev_open(nm, O_RDWR);
7a7cc504 3043 if (dfd < 0)
a322f70c 3044 return 2;
3d2c4fc7
DW
3045 rv = load_ddf_headers(dfd, super, NULL);
3046 if (rv == 0)
e1902a7b 3047 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3048 if (rv)
3049 return 1;
a322f70c 3050 }
33414a01 3051
a322f70c
DW
3052 *sbp = super;
3053 if (st->ss == NULL) {
78e44928 3054 st->ss = &super_ddf;
a322f70c
DW
3055 st->minor_version = 0;
3056 st->max_devs = 512;
3057 }
4dd2df09 3058 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3059 return 0;
3060}
2b959fbf
N
3061
3062static int load_container_ddf(struct supertype *st, int fd,
3063 char *devname)
3064{
3065 return load_super_ddf_all(st, fd, &st->sb, devname);
3066}
3067
0e600426 3068#endif /* MDASSEMBLE */
a322f70c 3069
a5c7adb3 3070static int check_secondary(const struct vcl *vc)
3071{
3072 const struct vd_config *conf = &vc->conf;
3073 int i;
3074
3075 /* The only DDF secondary RAID level md can support is
3076 * RAID 10, if the stripe sizes and Basic volume sizes
3077 * are all equal.
3078 * Other configurations could in theory be supported by exposing
3079 * the BVDs to user space and using device mapper for the secondary
3080 * mapping. So far we don't support that.
3081 */
3082
3083 __u64 sec_elements[4] = {0, 0, 0, 0};
3084#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3085#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3086
3087 if (vc->other_bvds == NULL) {
3088 pr_err("No BVDs for secondary RAID found\n");
3089 return -1;
3090 }
3091 if (conf->prl != DDF_RAID1) {
3092 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3093 return -1;
3094 }
3095 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3096 pr_err("Secondary RAID level %d is unsupported\n",
3097 conf->srl);
3098 return -1;
3099 }
3100 __set_sec_seen(conf->sec_elmnt_seq);
3101 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3102 const struct vd_config *bvd = vc->other_bvds[i];
3103 if (bvd == NULL) {
e3c2a365 3104 pr_err("BVD %d is missing\n", i+1);
a5c7adb3 3105 return -1;
3106 }
3107 if (bvd->srl != conf->srl) {
3108 pr_err("Inconsistent secondary RAID level across BVDs\n");
3109 return -1;
3110 }
3111 if (bvd->prl != conf->prl) {
3112 pr_err("Different RAID levels for BVDs are unsupported\n");
3113 return -1;
3114 }
3115 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3116 pr_err("All BVDs must have the same number of primary elements\n");
3117 return -1;
3118 }
3119 if (bvd->chunk_shift != conf->chunk_shift) {
3120 pr_err("Different strip sizes for BVDs are unsupported\n");
3121 return -1;
3122 }
3123 if (bvd->array_blocks != conf->array_blocks) {
3124 pr_err("Different BVD sizes are unsupported\n");
3125 return -1;
3126 }
3127 __set_sec_seen(bvd->sec_elmnt_seq);
3128 }
3129 for (i = 0; i < conf->sec_elmnt_count; i++) {
3130 if (!__was_sec_seen(i)) {
3131 pr_err("BVD %d is missing\n", i);
3132 return -1;
3133 }
3134 }
3135 return 0;
3136}
3137
8a38db86 3138#define NO_SUCH_REFNUM (0xFFFFFFFF)
3139static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
4e587018 3140 __u32 refnum, unsigned int nmax,
3141 const struct vd_config **bvd,
3142 unsigned int *idx)
8a38db86 3143{
4e587018 3144 unsigned int i, j, n, sec, cnt;
3145
3146 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3147 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3148
3149 for (i = 0, j = 0 ; i < nmax ; i++) {
3150 /* j counts valid entries for this BVD */
3151 if (vc->conf.phys_refnum[i] != 0xffffffff)
3152 j++;
3153 if (vc->conf.phys_refnum[i] == refnum) {
3154 *bvd = &vc->conf;
3155 *idx = i;
3156 return sec * cnt + j - 1;
3157 }
3158 }
3159 if (vc->other_bvds == NULL)
3160 goto bad;
3161
3162 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3163 struct vd_config *vd = vc->other_bvds[n-1];
3164 if (vd == NULL)
3165 continue;
3166 sec = vd->sec_elmnt_seq;
3167 for (i = 0, j = 0 ; i < nmax ; i++) {
3168 if (vd->phys_refnum[i] != 0xffffffff)
3169 j++;
3170 if (vd->phys_refnum[i] == refnum) {
3171 *bvd = vd;
3172 *idx = i;
3173 return sec * cnt + j - 1;
3174 }
3175 }
3176 }
3177bad:
3178 *bvd = NULL;
8a38db86 3179 return NO_SUCH_REFNUM;
3180}
3181
00bbdbda 3182static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3183{
3184 /* Given a container loaded by load_super_ddf_all,
3185 * extract information about all the arrays into
3186 * an mdinfo tree.
3187 *
3188 * For each vcl in conflist: create an mdinfo, fill it in,
3189 * then look for matching devices (phys_refnum) in dlist
3190 * and create appropriate device mdinfo.
3191 */
3192 struct ddf_super *ddf = st->sb;
3193 struct mdinfo *rest = NULL;
3194 struct vcl *vc;
3195
3196 for (vc = ddf->conflist ; vc ; vc=vc->next)
3197 {
f21e18ca
N
3198 unsigned int i;
3199 unsigned int j;
598f0d58 3200 struct mdinfo *this;
00bbdbda 3201 char *ep;
90fa1a29 3202 __u32 *cptr;
8a38db86 3203 unsigned int pd;
00bbdbda
N
3204
3205 if (subarray &&
3206 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3207 *ep != '\0'))
3208 continue;
3209
a5c7adb3 3210 if (vc->conf.sec_elmnt_count > 1) {
3211 if (check_secondary(vc) != 0)
3212 continue;
3213 }
3214
503975b9 3215 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3216 this->next = rest;
3217 rest = this;
3218
4e587018 3219 if (vc->conf.sec_elmnt_count == 1) {
3220 this->array.level = map_num1(ddf_level_num,
3221 vc->conf.prl);
3222 this->array.raid_disks =
3223 __be16_to_cpu(vc->conf.prim_elmnt_count);
3224 this->array.layout =
3225 rlq_to_layout(vc->conf.rlq, vc->conf.prl,
3226 this->array.raid_disks);
3227 } else {
3228 /* The only supported layout is RAID 10.
3229 * Compatibility has been checked in check_secondary()
3230 * above.
3231 */
3232 this->array.level = 10;
3233 this->array.raid_disks =
3234 __be16_to_cpu(vc->conf.prim_elmnt_count)
3235 * vc->conf.sec_elmnt_count;
3236 this->array.layout = 0x100 |
3237 __be16_to_cpu(vc->conf.prim_elmnt_count);
3238 }
598f0d58 3239 this->array.md_minor = -1;
f35f2525
N
3240 this->array.major_version = -1;
3241 this->array.minor_version = -2;
90fa1a29
JS
3242 cptr = (__u32 *)(vc->conf.guid + 16);
3243 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58
NB
3244 this->array.utime = DECADE +
3245 __be32_to_cpu(vc->conf.timestamp);
3246 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3247
59e36268 3248 i = vc->vcnum;
7a7cc504
NB
3249 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3250 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3251 DDF_init_full) {
598f0d58 3252 this->array.state = 0;
ed9d66aa
NB
3253 this->resync_start = 0;
3254 } else {
598f0d58 3255 this->array.state = 1;
b7528a20 3256 this->resync_start = MaxSector;
ed9d66aa 3257 }
db42fa9b
N
3258 memcpy(this->name, ddf->virt->entries[i].name, 16);
3259 this->name[16]=0;
3260 for(j=0; j<16; j++)
3261 if (this->name[j] == ' ')
3262 this->name[j] = 0;
598f0d58
NB
3263
3264 memset(this->uuid, 0, sizeof(this->uuid));
3265 this->component_size = __be64_to_cpu(vc->conf.blocks);
3266 this->array.size = this->component_size / 2;
5f2aace8 3267 this->container_member = i;
598f0d58 3268
c5afc314
N
3269 ddf->currentconf = vc;
3270 uuid_from_super_ddf(st, this->uuid);
3271 ddf->currentconf = NULL;
3272
60f18132 3273 sprintf(this->text_version, "/%s/%d",
4dd2df09 3274 st->container_devnm, this->container_member);
60f18132 3275
8a38db86 3276 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3277 struct mdinfo *dev;
3278 struct dl *d;
4e587018 3279 const struct vd_config *bvd;
3280 unsigned int iphys;
3281 __u64 *lba_offset;
fa033bec 3282 int stt;
598f0d58 3283
8a38db86 3284 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
bc17324f 3285 continue;
0cf5ef67
N
3286
3287 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3288 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3289 != DDF_Online)
3290 continue;
3291
8a38db86 3292 i = get_pd_index_from_refnum(
4e587018 3293 vc, ddf->phys->entries[pd].refnum,
3294 ddf->mppe, &bvd, &iphys);
8a38db86 3295 if (i == NO_SUCH_REFNUM)
3296 continue;
3297
fa033bec 3298 this->array.working_disks++;
bc17324f 3299
0cf5ef67 3300 for (d = ddf->dlist; d ; d=d->next)
8a38db86 3301 if (d->disk.refnum ==
3302 ddf->phys->entries[pd].refnum)
0cf5ef67
N
3303 break;
3304 if (d == NULL)
3305 /* Haven't found that one yet, maybe there are others */
3306 continue;
3307
503975b9 3308 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3309 dev->next = this->devs;
3310 this->devs = dev;
3311
3312 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3313 dev->disk.major = d->major;
3314 dev->disk.minor = d->minor;
3315 dev->disk.raid_disk = i;
3316 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3317 dev->recovery_start = MaxSector;
598f0d58 3318
120f7677 3319 dev->events = __be32_to_cpu(ddf->primary.seq);
4e587018 3320 lba_offset = (__u64 *)&bvd->phys_refnum[ddf->mppe];
3321 dev->data_offset = __be64_to_cpu(lba_offset[iphys]);
3322 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3323 if (d->devname)
3324 strcpy(dev->name, d->devname);
3325 }
3326 }
3327 return rest;
3328}
3329
955e9ea1 3330static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3331{
955e9ea1 3332 struct ddf_super *ddf = st->sb;
a322f70c 3333 unsigned long long dsize;
6416d527 3334 void *buf;
3d2c4fc7 3335 int rc;
a322f70c 3336
955e9ea1
DW
3337 if (!ddf)
3338 return 1;
3339
3340 /* ->dlist and ->conflist will be set for updates, currently not
3341 * supported
3342 */
3343 if (ddf->dlist || ddf->conflist)
3344 return 1;
3345
a322f70c
DW
3346 if (!get_dev_size(fd, NULL, &dsize))
3347 return 1;
3348
3d2c4fc7
DW
3349 if (posix_memalign(&buf, 512, 512) != 0)
3350 return 1;
6416d527
NB
3351 memset(buf, 0, 512);
3352
a322f70c 3353 lseek64(fd, dsize-512, 0);
3d2c4fc7 3354 rc = write(fd, buf, 512);
6416d527 3355 free(buf);
3d2c4fc7
DW
3356 if (rc < 0)
3357 return 1;
a322f70c
DW
3358 return 0;
3359}
3360
a19c88b8
NB
3361static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3362{
3363 /*
3364 * return:
3365 * 0 same, or first was empty, and second was copied
3366 * 1 second had wrong number
3367 * 2 wrong uuid
3368 * 3 wrong other info
3369 */
3370 struct ddf_super *first = st->sb;
3371 struct ddf_super *second = tst->sb;
4eefd651 3372 struct dl *dl1, *dl2;
3373 struct vcl *vl1, *vl2;
2d210697 3374 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3375
3376 if (!first) {
3377 st->sb = tst->sb;
3378 tst->sb = NULL;
3379 return 0;
3380 }
3381
3382 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3383 return 2;
3384
2d210697 3385 if (first->anchor.seq != second->anchor.seq) {
3386 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3387 __be32_to_cpu(first->anchor.seq),
3388 __be32_to_cpu(second->anchor.seq));
3389 return 3;
3390 }
3391 if (first->max_part != second->max_part ||
3392 first->phys->used_pdes != second->phys->used_pdes ||
3393 first->virt->populated_vdes != second->virt->populated_vdes) {
3394 dprintf("%s: PD/VD number mismatch\n", __func__);
3395 return 3;
3396 }
3397
3398 max_pds = __be16_to_cpu(first->phys->used_pdes);
3399 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3400 for (pd = 0; pd < max_pds; pd++)
3401 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3402 break;
3403 if (pd == max_pds) {
3404 dprintf("%s: no match for disk %08x\n", __func__,
3405 __be32_to_cpu(dl2->disk.refnum));
3406 return 3;
3407 }
3408 }
3409
3410 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3411 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3412 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3413 continue;
3414 for (vd = 0; vd < max_vds; vd++)
3415 if (!memcmp(first->virt->entries[vd].guid,
3416 vl2->conf.guid, DDF_GUID_LEN))
3417 break;
3418 if (vd == max_vds) {
3419 dprintf("%s: no match for VD config\n", __func__);
3420 return 3;
3421 }
3422 }
a19c88b8 3423 /* FIXME should I look at anything else? */
2d210697 3424
4eefd651 3425 /*
3426 At this point we are fairly sure that the meta data matches.
3427 But the new disk may contain additional local data.
3428 Add it to the super block.
3429 */
3430 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3431 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3432 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3433 DDF_GUID_LEN))
3434 break;
3435 if (vl1) {
3436 if (vl1->other_bvds != NULL &&
3437 vl1->conf.sec_elmnt_seq !=
3438 vl2->conf.sec_elmnt_seq) {
3439 dprintf("%s: adding BVD %u\n", __func__,
3440 vl2->conf.sec_elmnt_seq);
3441 add_other_bvd(vl1, &vl2->conf,
3442 first->conf_rec_len*512);
3443 }
3444 continue;
3445 }
3446
3447 if (posix_memalign((void **)&vl1, 512,
3448 (first->conf_rec_len*512 +
3449 offsetof(struct vcl, conf))) != 0) {
3450 pr_err("%s could not allocate vcl buf\n",
3451 __func__);
3452 return 3;
3453 }
3454
3455 vl1->next = first->conflist;
3456 vl1->block_sizes = NULL;
3457 if (vl1->conf.sec_elmnt_count > 1) {
3458 vl1->other_bvds = xcalloc(vl2->conf.sec_elmnt_count - 1,
3459 sizeof(struct vd_config *));
3460 } else
3461 vl1->other_bvds = NULL;
3462 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3463 vl1->lba_offset = (__u64 *)
3464 &vl1->conf.phys_refnum[first->mppe];
3465 for (vd = 0; vd < max_vds; vd++)
3466 if (!memcmp(first->virt->entries[vd].guid,
3467 vl1->conf.guid, DDF_GUID_LEN))
3468 break;
3469 vl1->vcnum = vd;
3470 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3471 first->conflist = vl1;
3472 }
3473
3474 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3475 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3476 if (dl1->disk.refnum == dl2->disk.refnum)
3477 break;
3478 if (dl1)
3479 continue;
3480
3481 if (posix_memalign((void **)&dl1, 512,
3482 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3483 != 0) {
3484 pr_err("%s could not allocate disk info buffer\n",
3485 __func__);
3486 return 3;
3487 }
3488 memcpy(dl1, dl2, sizeof(*dl1));
3489 dl1->mdupdate = NULL;
3490 dl1->next = first->dlist;
3491 dl1->fd = -1;
3492 for (pd = 0; pd < max_pds; pd++)
3493 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3494 break;
3495 dl1->pdnum = pd;
3496 if (dl2->spare) {
3497 if (posix_memalign((void **)&dl1->spare, 512,
3498 first->conf_rec_len*512) != 0) {
3499 pr_err("%s could not allocate spare info buf\n",
3500 __func__);
3501 return 3;
3502 }
3503 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3504 }
3505 for (vd = 0 ; vd < first->max_part ; vd++) {
3506 if (!dl2->vlist[vd]) {
3507 dl1->vlist[vd] = NULL;
3508 continue;
3509 }
3510 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3511 if (!memcmp(vl1->conf.guid,
3512 dl2->vlist[vd]->conf.guid,
3513 DDF_GUID_LEN))
3514 break;
3515 dl1->vlist[vd] = vl1;
3516 }
3517 }
3518 first->dlist = dl1;
3519 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3520 dl1->disk.refnum);
3521 }
3522
a19c88b8
NB
3523 return 0;
3524}
3525
0e600426 3526#ifndef MDASSEMBLE
4e5528c6
NB
3527/*
3528 * A new array 'a' has been started which claims to be instance 'inst'
3529 * within container 'c'.
3530 * We need to confirm that the array matches the metadata in 'c' so
3531 * that we don't corrupt any metadata.
3532 */
cba0191b 3533static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3534{
2c514b71 3535 dprintf("ddf: open_new %s\n", inst);
cba0191b 3536 a->info.container_member = atoi(inst);
549e9569
NB
3537 return 0;
3538}
3539
4e5528c6
NB
3540/*
3541 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3542 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3543 * clean up to the point (in sectors). If that cannot be recorded in the
3544 * metadata, then leave it as dirty.
3545 *
3546 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3547 * !global! virtual_disk.virtual_entry structure.
3548 */
01f157d7 3549static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3550{
4e5528c6
NB
3551 struct ddf_super *ddf = a->container->sb;
3552 int inst = a->info.container_member;
18a2f463 3553 int old = ddf->virt->entries[inst].state;
01f157d7
N
3554 if (consistent == 2) {
3555 /* Should check if a recovery should be started FIXME */
3556 consistent = 1;
b7941fd6 3557 if (!is_resync_complete(&a->info))
01f157d7
N
3558 consistent = 0;
3559 }
ed9d66aa
NB
3560 if (consistent)
3561 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3562 else
4e5528c6 3563 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463
NB
3564 if (old != ddf->virt->entries[inst].state)
3565 ddf->updates_pending = 1;
3566
3567 old = ddf->virt->entries[inst].init_state;
ed9d66aa 3568 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 3569 if (is_resync_complete(&a->info))
ed9d66aa 3570 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 3571 else if (a->info.resync_start == 0)
ed9d66aa 3572 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 3573 else
ed9d66aa 3574 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463
NB
3575 if (old != ddf->virt->entries[inst].init_state)
3576 ddf->updates_pending = 1;
ed9d66aa 3577
2c514b71 3578 dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
b7941fd6 3579 a->info.resync_start);
01f157d7 3580 return consistent;
fd7cde1b
DW
3581}
3582
e1316fab 3583#define container_of(ptr, type, member) ({ \
5d500228
N
3584 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
3585 (type *)( (char *)__mptr - offsetof(type,member) );})
7a7cc504
NB
3586/*
3587 * The state of each disk is stored in the global phys_disk structure
3588 * in phys_disk.entries[n].state.
3589 * This makes various combinations awkward.
3590 * - When a device fails in any array, it must be failed in all arrays
3591 * that include a part of this device.
3592 * - When a component is rebuilding, we cannot include it officially in the
3593 * array unless this is the only array that uses the device.
3594 *
3595 * So: when transitioning:
3596 * Online -> failed, just set failed flag. monitor will propagate
3597 * spare -> online, the device might need to be added to the array.
3598 * spare -> failed, just set failed. Don't worry if in array or not.
3599 */
8d45d196 3600static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 3601{
7a7cc504 3602 struct ddf_super *ddf = a->container->sb;
f21e18ca 3603 unsigned int inst = a->info.container_member;
7a7cc504
NB
3604 struct vd_config *vc = find_vdcr(ddf, inst);
3605 int pd = find_phys(ddf, vc->phys_refnum[n]);
3606 int i, st, working;
e1316fab
N
3607 struct mdinfo *mdi;
3608 struct dl *dl;
7a7cc504
NB
3609
3610 if (vc == NULL) {
2c514b71 3611 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
3612 return;
3613 }
e1316fab
N
3614 /* Find the matching slot in 'info'. */
3615 for (mdi = a->info.devs; mdi; mdi = mdi->next)
3616 if (mdi->disk.raid_disk == n)
3617 break;
3618 if (!mdi)
3619 return;
3620
3621 /* and find the 'dl' entry corresponding to that. */
3622 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
3623 if (mdi->state_fd >= 0 &&
3624 mdi->disk.major == dl->major &&
e1316fab
N
3625 mdi->disk.minor == dl->minor)
3626 break;
3627 if (!dl)
3628 return;
3629
3630 if (pd < 0 || pd != dl->pdnum) {
3631 /* disk doesn't currently exist or has changed.
3632 * If it is now in_sync, insert it. */
7a7cc504 3633 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
e1316fab
N
3634 struct vcl *vcl;
3635 pd = dl->pdnum;
3636 vc->phys_refnum[n] = dl->disk.refnum;
3637 vcl = container_of(vc, struct vcl, conf);
3638 vcl->lba_offset[n] = mdi->data_offset;
3639 ddf->phys->entries[pd].type &=
3640 ~__cpu_to_be16(DDF_Global_Spare);
3641 ddf->phys->entries[pd].type |=
3642 __cpu_to_be16(DDF_Active_in_VD);
3643 ddf->updates_pending = 1;
7a7cc504
NB
3644 }
3645 } else {
18a2f463 3646 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
3647 if (state & DS_FAULTY)
3648 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
3649 if (state & DS_INSYNC) {
3650 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
3651 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
3652 }
18a2f463
NB
3653 if (old != ddf->phys->entries[pd].state)
3654 ddf->updates_pending = 1;
7a7cc504
NB
3655 }
3656
2c514b71 3657 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 3658
7a7cc504
NB
3659 /* Now we need to check the state of the array and update
3660 * virtual_disk.entries[n].state.
3661 * It needs to be one of "optimal", "degraded", "failed".
3662 * I don't understand 'deleted' or 'missing'.
3663 */
3664 working = 0;
3665 for (i=0; i < a->info.array.raid_disks; i++) {
3666 pd = find_phys(ddf, vc->phys_refnum[i]);
3667 if (pd < 0)
3668 continue;
57632f4a
NB
3669 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3670 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
7a7cc504
NB
3671 == DDF_Online)
3672 working++;
3673 }
3674 state = DDF_state_degraded;
3675 if (working == a->info.array.raid_disks)
3676 state = DDF_state_optimal;
3677 else switch(vc->prl) {
613b0d17
N
3678 case DDF_RAID0:
3679 case DDF_CONCAT:
3680 case DDF_JBOD:
7a7cc504 3681 state = DDF_state_failed;
613b0d17
N
3682 break;
3683 case DDF_RAID1:
3684 if (working == 0)
3685 state = DDF_state_failed;
3686 else if (working == 2 && state == DDF_state_degraded)
3687 state = DDF_state_part_optimal;
3688 break;
3689 case DDF_RAID4:
3690 case DDF_RAID5:
3691 if (working < a->info.array.raid_disks-1)
3692 state = DDF_state_failed;
3693 break;
3694 case DDF_RAID6:
3695 if (working < a->info.array.raid_disks-2)
3696 state = DDF_state_failed;
3697 else if (working == a->info.array.raid_disks-1)
3698 state = DDF_state_part_optimal;
3699 break;
3700 }
7a7cc504 3701
18a2f463
NB
3702 if (ddf->virt->entries[inst].state !=
3703 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
3704 | state)) {
3705
3706 ddf->virt->entries[inst].state =
3707 (ddf->virt->entries[inst].state & ~DDF_state_mask)
3708 | state;
3709 ddf->updates_pending = 1;
3710 }
7a7cc504 3711
549e9569
NB
3712}
3713
2e735d19 3714static void ddf_sync_metadata(struct supertype *st)
549e9569 3715{
7a7cc504
NB
3716
3717 /*
3718 * Write all data to all devices.
3719 * Later, we might be able to track whether only local changes
3720 * have been made, or whether any global data has been changed,
3721 * but ddf is sufficiently weird that it probably always
3722 * changes global data ....
3723 */
18a2f463
NB
3724 struct ddf_super *ddf = st->sb;
3725 if (!ddf->updates_pending)
3726 return;
3727 ddf->updates_pending = 0;
1cc7f4fe 3728 __write_init_super_ddf(st);
2c514b71 3729 dprintf("ddf: sync_metadata\n");
549e9569
NB
3730}
3731
88c164f4
NB
3732static void ddf_process_update(struct supertype *st,
3733 struct metadata_update *update)
3734{
3735 /* Apply this update to the metadata.
3736 * The first 4 bytes are a DDF_*_MAGIC which guides
3737 * our actions.
3738 * Possible update are:
3739 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
3740 * Add a new physical device or remove an old one.
3741 * Changes to this record only happen implicitly.
88c164f4
NB
3742 * used_pdes is the device number.
3743 * DDF_VIRT_RECORDS_MAGIC
3744 * Add a new VD. Possibly also change the 'access' bits.
3745 * populated_vdes is the entry number.
3746 * DDF_VD_CONF_MAGIC
3747 * New or updated VD. the VIRT_RECORD must already
3748 * exist. For an update, phys_refnum and lba_offset
3749 * (at least) are updated, and the VD_CONF must
3750 * be written to precisely those devices listed with
3751 * a phys_refnum.
3752 * DDF_SPARE_ASSIGN_MAGIC
3753 * replacement Spare Assignment Record... but for which device?
3754 *
3755 * So, e.g.:
3756 * - to create a new array, we send a VIRT_RECORD and
3757 * a VD_CONF. Then assemble and start the array.
3758 * - to activate a spare we send a VD_CONF to add the phys_refnum
3759 * and offset. This will also mark the spare as active with
3760 * a spare-assignment record.
3761 */
3762 struct ddf_super *ddf = st->sb;
3763 __u32 *magic = (__u32*)update->buf;
3764 struct phys_disk *pd;
3765 struct virtual_disk *vd;
3766 struct vd_config *vc;
3767 struct vcl *vcl;
3768 struct dl *dl;
f21e18ca
N
3769 unsigned int mppe;
3770 unsigned int ent;
c7079c84 3771 unsigned int pdnum, pd2;
88c164f4 3772
2c514b71 3773 dprintf("Process update %x\n", *magic);
7e1432fb 3774
88c164f4
NB
3775 switch (*magic) {
3776 case DDF_PHYS_RECORDS_MAGIC:
3777
3778 if (update->len != (sizeof(struct phys_disk) +
3779 sizeof(struct phys_disk_entry)))
3780 return;
3781 pd = (struct phys_disk*)update->buf;
3782
3783 ent = __be16_to_cpu(pd->used_pdes);
3784 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
3785 return;
4dd968cc
N
3786 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
3787 struct dl **dlp;
3788 /* removing this disk. */
3789 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
3790 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
3791 struct dl *dl = *dlp;
3792 if (dl->pdnum == (signed)ent) {
3793 close(dl->fd);
3794 dl->fd = -1;
3795 /* FIXME this doesn't free
3796 * dl->devname */
3797 update->space = dl;
3798 *dlp = dl->next;
3799 break;
3800 }
3801 }
3802 ddf->updates_pending = 1;
3803 return;
3804 }
88c164f4
NB
3805 if (!all_ff(ddf->phys->entries[ent].guid))
3806 return;
3807 ddf->phys->entries[ent] = pd->entries[0];
3808 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 3809 __be16_to_cpu(ddf->phys->used_pdes));
18a2f463 3810 ddf->updates_pending = 1;
2cc2983d
N
3811 if (ddf->add_list) {
3812 struct active_array *a;
3813 struct dl *al = ddf->add_list;
3814 ddf->add_list = al->next;
3815
3816 al->next = ddf->dlist;
3817 ddf->dlist = al;
3818
3819 /* As a device has been added, we should check
3820 * for any degraded devices that might make
3821 * use of this spare */
3822 for (a = st->arrays ; a; a=a->next)
3823 a->check_degraded = 1;
3824 }
88c164f4
NB
3825 break;
3826
3827 case DDF_VIRT_RECORDS_MAGIC:
3828
3829 if (update->len != (sizeof(struct virtual_disk) +
3830 sizeof(struct virtual_entry)))
3831 return;
3832 vd = (struct virtual_disk*)update->buf;
3833
3834 ent = __be16_to_cpu(vd->populated_vdes);
3835 if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
3836 return;
3837 if (!all_ff(ddf->virt->entries[ent].guid))
3838 return;
3839 ddf->virt->entries[ent] = vd->entries[0];
3840 ddf->virt->populated_vdes = __cpu_to_be16(1 +
613b0d17 3841 __be16_to_cpu(ddf->virt->populated_vdes));
18a2f463 3842 ddf->updates_pending = 1;
88c164f4
NB
3843 break;
3844
3845 case DDF_VD_CONF_MAGIC:
2c514b71 3846 dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
88c164f4
NB
3847
3848 mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
f21e18ca 3849 if ((unsigned)update->len != ddf->conf_rec_len * 512)
88c164f4
NB
3850 return;
3851 vc = (struct vd_config*)update->buf;
3852 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
3853 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
3854 break;
2c514b71 3855 dprintf("vcl = %p\n", vcl);
88c164f4
NB
3856 if (vcl) {
3857 /* An update, just copy the phys_refnum and lba_offset
3858 * fields
3859 */
3860 memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
3861 mppe * (sizeof(__u32) + sizeof(__u64)));
3862 } else {
3863 /* A new VD_CONF */
e6b9548d
DW
3864 if (!update->space)
3865 return;
88c164f4
NB
3866 vcl = update->space;
3867 update->space = NULL;
3868 vcl->next = ddf->conflist;
edd8d13c 3869 memcpy(&vcl->conf, vc, update->len);
88c164f4
NB
3870 vcl->lba_offset = (__u64*)
3871 &vcl->conf.phys_refnum[mppe];
1502a43a
N
3872 for (ent = 0;
3873 ent < __be16_to_cpu(ddf->virt->populated_vdes);
3874 ent++)
3875 if (memcmp(vc->guid, ddf->virt->entries[ent].guid,
3876 DDF_GUID_LEN) == 0) {
3877 vcl->vcnum = ent;
3878 break;
3879 }
88c164f4
NB
3880 ddf->conflist = vcl;
3881 }
c7079c84
N
3882 /* Set DDF_Transition on all Failed devices - to help
3883 * us detect those that are no longer in use
3884 */
3885 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
3886 if (ddf->phys->entries[pdnum].state
3887 & __be16_to_cpu(DDF_Failed))
3888 ddf->phys->entries[pdnum].state
3889 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
3890 /* Now make sure vlist is correct for each dl. */
3891 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca
N
3892 unsigned int dn;
3893 unsigned int vn = 0;
8401644c 3894 int in_degraded = 0;
88c164f4
NB
3895 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
3896 for (dn=0; dn < ddf->mppe ; dn++)
3897 if (vcl->conf.phys_refnum[dn] ==
3898 dl->disk.refnum) {
8401644c 3899 int vstate;
2c514b71
NB
3900 dprintf("dev %d has %p at %d\n",
3901 dl->pdnum, vcl, vn);
c7079c84
N
3902 /* Clear the Transition flag */
3903 if (ddf->phys->entries[dl->pdnum].state
3904 & __be16_to_cpu(DDF_Failed))
3905 ddf->phys->entries[dl->pdnum].state &=
3906 ~__be16_to_cpu(DDF_Transition);
3907
88c164f4 3908 dl->vlist[vn++] = vcl;
8401644c
N
3909 vstate = ddf->virt->entries[vcl->vcnum].state
3910 & DDF_state_mask;
3911 if (vstate == DDF_state_degraded ||
3912 vstate == DDF_state_part_optimal)
3913 in_degraded = 1;
88c164f4
NB
3914 break;
3915 }
3916 while (vn < ddf->max_part)
3917 dl->vlist[vn++] = NULL;
7e1432fb
NB
3918 if (dl->vlist[0]) {
3919 ddf->phys->entries[dl->pdnum].type &=
3920 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
3921 if (!(ddf->phys->entries[dl->pdnum].type &
3922 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
3923 ddf->phys->entries[dl->pdnum].type |=
3924 __cpu_to_be16(DDF_Active_in_VD);
3925 if (in_degraded)
3926 ddf->phys->entries[dl->pdnum].state |=
3927 __cpu_to_be16(DDF_Rebuilding);
3928 }
7e1432fb
NB
3929 }
3930 if (dl->spare) {
3931 ddf->phys->entries[dl->pdnum].type &=
3932 ~__cpu_to_be16(DDF_Global_Spare);
3933 ddf->phys->entries[dl->pdnum].type |=
3934 __cpu_to_be16(DDF_Spare);
3935 }
3936 if (!dl->vlist[0] && !dl->spare) {
3937 ddf->phys->entries[dl->pdnum].type |=
3938 __cpu_to_be16(DDF_Global_Spare);
3939 ddf->phys->entries[dl->pdnum].type &=
3940 ~__cpu_to_be16(DDF_Spare |
3941 DDF_Active_in_VD);
3942 }
88c164f4 3943 }
c7079c84
N
3944
3945 /* Now remove any 'Failed' devices that are not part
3946 * of any VD. They will have the Transition flag set.
3947 * Once done, we need to update all dl->pdnum numbers.
3948 */
3949 pd2 = 0;
3950 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
3951 if ((ddf->phys->entries[pdnum].state
3952 & __be16_to_cpu(DDF_Failed))
3953 && (ddf->phys->entries[pdnum].state
3954 & __be16_to_cpu(DDF_Transition)))
3955 /* skip this one */;
3956 else if (pdnum == pd2)
3957 pd2++;
3958 else {
3959 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
3960 for (dl = ddf->dlist; dl; dl = dl->next)
3961 if (dl->pdnum == (int)pdnum)
3962 dl->pdnum = pd2;
3963 pd2++;
3964 }
3965 ddf->phys->used_pdes = __cpu_to_be16(pd2);
3966 while (pd2 < pdnum) {
3967 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
3968 pd2++;
3969 }
3970
18a2f463 3971 ddf->updates_pending = 1;
88c164f4
NB
3972 break;
3973 case DDF_SPARE_ASSIGN_MAGIC:
3974 default: break;
3975 }
3976}
3977
edd8d13c
NB
3978static void ddf_prepare_update(struct supertype *st,
3979 struct metadata_update *update)
3980{
3981 /* This update arrived at managemon.
3982 * We are about to pass it to monitor.
3983 * If a malloc is needed, do it here.
3984 */
3985 struct ddf_super *ddf = st->sb;
3986 __u32 *magic = (__u32*)update->buf;
3987 if (*magic == DDF_VD_CONF_MAGIC)
e6b9548d 3988 if (posix_memalign(&update->space, 512,
613b0d17
N
3989 offsetof(struct vcl, conf)
3990 + ddf->conf_rec_len * 512) != 0)
e6b9548d 3991 update->space = NULL;
edd8d13c
NB
3992}
3993
7e1432fb
NB
3994/*
3995 * Check if the array 'a' is degraded but not failed.
3996 * If it is, find as many spares as are available and needed and
3997 * arrange for their inclusion.
3998 * We only choose devices which are not already in the array,
3999 * and prefer those with a spare-assignment to this array.
4000 * otherwise we choose global spares - assuming always that
4001 * there is enough room.
4002 * For each spare that we assign, we return an 'mdinfo' which
4003 * describes the position for the device in the array.
4004 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4005 * the new phys_refnum and lba_offset values.
4006 *
4007 * Only worry about BVDs at the moment.
4008 */
4009static struct mdinfo *ddf_activate_spare(struct active_array *a,
4010 struct metadata_update **updates)
4011{
4012 int working = 0;
4013 struct mdinfo *d;
4014 struct ddf_super *ddf = a->container->sb;
4015 int global_ok = 0;
4016 struct mdinfo *rv = NULL;
4017 struct mdinfo *di;
4018 struct metadata_update *mu;
4019 struct dl *dl;
4020 int i;
4021 struct vd_config *vc;
4022 __u64 *lba;
4023
7e1432fb
NB
4024 for (d = a->info.devs ; d ; d = d->next) {
4025 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4026 d->state_fd >= 0)
7e1432fb
NB
4027 /* wait for Removal to happen */
4028 return NULL;
4029 if (d->state_fd >= 0)
4030 working ++;
4031 }
4032
2c514b71
NB
4033 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4034 a->info.array.level);
7e1432fb
NB
4035 if (working == a->info.array.raid_disks)
4036 return NULL; /* array not degraded */
4037 switch (a->info.array.level) {
4038 case 1:
4039 if (working == 0)
4040 return NULL; /* failed */
4041 break;
4042 case 4:
4043 case 5:
4044 if (working < a->info.array.raid_disks - 1)
4045 return NULL; /* failed */
4046 break;
4047 case 6:
4048 if (working < a->info.array.raid_disks - 2)
4049 return NULL; /* failed */
4050 break;
4051 default: /* concat or stripe */
4052 return NULL; /* failed */
4053 }
4054
4055 /* For each slot, if it is not working, find a spare */
4056 dl = ddf->dlist;
4057 for (i = 0; i < a->info.array.raid_disks; i++) {
4058 for (d = a->info.devs ; d ; d = d->next)
4059 if (d->disk.raid_disk == i)
4060 break;
2c514b71 4061 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4062 if (d && (d->state_fd >= 0))
4063 continue;
4064
4065 /* OK, this device needs recovery. Find a spare */
4066 again:
4067 for ( ; dl ; dl = dl->next) {
4068 unsigned long long esize;
4069 unsigned long long pos;
4070 struct mdinfo *d2;
4071 int is_global = 0;
4072 int is_dedicated = 0;
4073 struct extent *ex;
f21e18ca 4074 unsigned int j;
7e1432fb
NB
4075 /* If in this array, skip */
4076 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4077 if (d2->state_fd >= 0 &&
4078 d2->disk.major == dl->major &&
7e1432fb 4079 d2->disk.minor == dl->minor) {
2c514b71 4080 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4081 break;
4082 }
4083 if (d2)
4084 continue;
4085 if (ddf->phys->entries[dl->pdnum].type &
4086 __cpu_to_be16(DDF_Spare)) {
4087 /* Check spare assign record */
4088 if (dl->spare) {
4089 if (dl->spare->type & DDF_spare_dedicated) {
4090 /* check spare_ents for guid */
4091 for (j = 0 ;
4092 j < __be16_to_cpu(dl->spare->populated);
4093 j++) {
4094 if (memcmp(dl->spare->spare_ents[j].guid,
4095 ddf->virt->entries[a->info.container_member].guid,
4096 DDF_GUID_LEN) == 0)
4097 is_dedicated = 1;
4098 }
4099 } else
4100 is_global = 1;
4101 }
4102 } else if (ddf->phys->entries[dl->pdnum].type &
4103 __cpu_to_be16(DDF_Global_Spare)) {
4104 is_global = 1;
e0e7aeaa
N
4105 } else if (!(ddf->phys->entries[dl->pdnum].state &
4106 __cpu_to_be16(DDF_Failed))) {
4107 /* we can possibly use some of this */
4108 is_global = 1;
7e1432fb
NB
4109 }
4110 if ( ! (is_dedicated ||
4111 (is_global && global_ok))) {
2c514b71 4112 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4113 is_dedicated, is_global);
7e1432fb
NB
4114 continue;
4115 }
4116
4117 /* We are allowed to use this device - is there space?
4118 * We need a->info.component_size sectors */
4119 ex = get_extents(ddf, dl);
4120 if (!ex) {
2c514b71 4121 dprintf("cannot get extents\n");
7e1432fb
NB
4122 continue;
4123 }
4124 j = 0; pos = 0;
4125 esize = 0;
4126
4127 do {
4128 esize = ex[j].start - pos;
4129 if (esize >= a->info.component_size)
4130 break;
e5cc7d46
N
4131 pos = ex[j].start + ex[j].size;
4132 j++;
4133 } while (ex[j-1].size);
7e1432fb
NB
4134
4135 free(ex);
4136 if (esize < a->info.component_size) {
e5cc7d46
N
4137 dprintf("%x:%x has no room: %llu %llu\n",
4138 dl->major, dl->minor,
2c514b71 4139 esize, a->info.component_size);
7e1432fb
NB
4140 /* No room */
4141 continue;
4142 }
4143
4144 /* Cool, we have a device with some space at pos */
503975b9 4145 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4146 di->disk.number = i;
4147 di->disk.raid_disk = i;
4148 di->disk.major = dl->major;
4149 di->disk.minor = dl->minor;
4150 di->disk.state = 0;
d23534e4 4151 di->recovery_start = 0;
7e1432fb
NB
4152 di->data_offset = pos;
4153 di->component_size = a->info.component_size;
4154 di->container_member = dl->pdnum;
4155 di->next = rv;
4156 rv = di;
2c514b71
NB
4157 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4158 i, pos);
7e1432fb
NB
4159
4160 break;
4161 }
4162 if (!dl && ! global_ok) {
4163 /* not enough dedicated spares, try global */
4164 global_ok = 1;
4165 dl = ddf->dlist;
4166 goto again;
4167 }
4168 }
4169
4170 if (!rv)
4171 /* No spares found */
4172 return rv;
4173 /* Now 'rv' has a list of devices to return.
4174 * Create a metadata_update record to update the
4175 * phys_refnum and lba_offset values
4176 */
503975b9
N
4177 mu = xmalloc(sizeof(*mu));
4178 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4179 free(mu);
4180 mu = NULL;
4181 }
503975b9 4182 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4183 mu->len = ddf->conf_rec_len * 512;
4184 mu->space = NULL;
f50ae22e 4185 mu->space_list = NULL;
7e1432fb
NB
4186 mu->next = *updates;
4187 vc = find_vdcr(ddf, a->info.container_member);
4188 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4189
4190 vc = (struct vd_config*)mu->buf;
4191 lba = (__u64*)&vc->phys_refnum[ddf->mppe];
4192 for (di = rv ; di ; di = di->next) {
4193 vc->phys_refnum[di->disk.raid_disk] =
4194 ddf->phys->entries[dl->pdnum].refnum;
4195 lba[di->disk.raid_disk] = di->data_offset;
4196 }
4197 *updates = mu;
4198 return rv;
4199}
0e600426 4200#endif /* MDASSEMBLE */
7e1432fb 4201
b640a252
N
4202static int ddf_level_to_layout(int level)
4203{
4204 switch(level) {
4205 case 0:
4206 case 1:
4207 return 0;
4208 case 5:
4209 return ALGORITHM_LEFT_SYMMETRIC;
4210 case 6:
4211 return ALGORITHM_ROTATING_N_CONTINUE;
4212 case 10:
4213 return 0x102;
4214 default:
4215 return UnSet;
4216 }
4217}
4218
30f58b22
DW
4219static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4220{
4221 if (level && *level == UnSet)
4222 *level = LEVEL_CONTAINER;
4223
4224 if (level && layout && *layout == UnSet)
4225 *layout = ddf_level_to_layout(*level);
4226}
4227
a322f70c
DW
4228struct superswitch super_ddf = {
4229#ifndef MDASSEMBLE
4230 .examine_super = examine_super_ddf,
4231 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4232 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4233 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4234 .detail_super = detail_super_ddf,
4235 .brief_detail_super = brief_detail_super_ddf,
4236 .validate_geometry = validate_geometry_ddf,
78e44928 4237 .write_init_super = write_init_super_ddf,
0e600426 4238 .add_to_super = add_to_super_ddf,
4dd968cc 4239 .remove_from_super = remove_from_super_ddf,
2b959fbf 4240 .load_container = load_container_ddf,
a322f70c
DW
4241#endif
4242 .match_home = match_home_ddf,
4243 .uuid_from_super= uuid_from_super_ddf,
4244 .getinfo_super = getinfo_super_ddf,
4245 .update_super = update_super_ddf,
4246
4247 .avail_size = avail_size_ddf,
4248
a19c88b8
NB
4249 .compare_super = compare_super_ddf,
4250
a322f70c 4251 .load_super = load_super_ddf,
ba7eb04f 4252 .init_super = init_super_ddf,
955e9ea1 4253 .store_super = store_super_ddf,
a322f70c
DW
4254 .free_super = free_super_ddf,
4255 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4256 .container_content = container_content_ddf,
30f58b22 4257 .default_geometry = default_geometry_ddf,
a322f70c 4258
a322f70c 4259 .external = 1,
549e9569 4260
0e600426 4261#ifndef MDASSEMBLE
549e9569
NB
4262/* for mdmon */
4263 .open_new = ddf_open_new,
ed9d66aa 4264 .set_array_state= ddf_set_array_state,
549e9569
NB
4265 .set_disk = ddf_set_disk,
4266 .sync_metadata = ddf_sync_metadata,
88c164f4 4267 .process_update = ddf_process_update,
edd8d13c 4268 .prepare_update = ddf_prepare_update,
7e1432fb 4269 .activate_spare = ddf_activate_spare,
0e600426 4270#endif
4cce4069 4271 .name = "ddf",
a322f70c 4272};