]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: brief_detail_super_ddf: print correct UUID for subarrays
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
a322f70c
DW
50/* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61/* Primary Raid Level (PRL) */
62#define DDF_RAID0 0x00
63#define DDF_RAID1 0x01
64#define DDF_RAID3 0x03
65#define DDF_RAID4 0x04
66#define DDF_RAID5 0x05
67#define DDF_RAID1E 0x11
68#define DDF_JBOD 0x0f
69#define DDF_CONCAT 0x1f
70#define DDF_RAID5E 0x15
71#define DDF_RAID5EE 0x25
59e36268 72#define DDF_RAID6 0x06
a322f70c
DW
73
74/* Raid Level Qualifier (RLQ) */
75#define DDF_RAID0_SIMPLE 0x00
76#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78#define DDF_RAID3_0 0x00 /* parity in first extent */
79#define DDF_RAID3_N 0x01 /* parity in last extent */
80#define DDF_RAID4_0 0x00 /* parity in first extent */
81#define DDF_RAID4_N 0x01 /* parity in last extent */
82/* these apply to raid5e and raid5ee as well */
83#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 84#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
85#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
87
88#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
90
91/* Secondary RAID Level (SRL) */
92#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93#define DDF_2MIRRORED 0x01
94#define DDF_2CONCAT 0x02
95#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
96
97/* Magic numbers */
98#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
108
109#define DDF_GUID_LEN 24
59e36268
NB
110#define DDF_REVISION_0 "01.00.00"
111#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
112
113struct ddf_header {
88c164f4 114 __u32 magic; /* DDF_HEADER_MAGIC */
a322f70c
DW
115 __u32 crc;
116 char guid[DDF_GUID_LEN];
59e36268 117 char revision[8]; /* 01.02.00 */
a322f70c
DW
118 __u32 seq; /* starts at '1' */
119 __u32 timestamp;
120 __u8 openflag;
121 __u8 foreignflag;
122 __u8 enforcegroups;
123 __u8 pad0; /* 0xff */
124 __u8 pad1[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext[32]; /* reserved: fill with 0xff */
127 __u64 primary_lba;
128 __u64 secondary_lba;
129 __u8 type;
130 __u8 pad2[3]; /* 0xff */
131 __u32 workspace_len; /* sectors for vendor space -
132 * at least 32768(sectors) */
133 __u64 workspace_lba;
134 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
139 *12/512) */
140 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset;
144 __u32 controller_section_length;
145 __u32 phys_section_offset;
146 __u32 phys_section_length;
147 __u32 virt_section_offset;
148 __u32 virt_section_length;
149 __u32 config_section_offset;
150 __u32 config_section_length;
151 __u32 data_section_offset;
152 __u32 data_section_length;
153 __u32 bbm_section_offset;
154 __u32 bbm_section_length;
155 __u32 diag_space_offset;
156 __u32 diag_space_length;
157 __u32 vendor_offset;
158 __u32 vendor_length;
159 /* 256 bytes so far */
160 __u8 pad4[256]; /* 0xff */
161};
162
163/* type field */
164#define DDF_HEADER_ANCHOR 0x00
165#define DDF_HEADER_PRIMARY 0x01
166#define DDF_HEADER_SECONDARY 0x02
167
168/* The content of the 'controller section' - global scope */
169struct ddf_controller_data {
88c164f4 170 __u32 magic; /* DDF_CONTROLLER_MAGIC */
a322f70c
DW
171 __u32 crc;
172 char guid[DDF_GUID_LEN];
173 struct controller_type {
174 __u16 vendor_id;
175 __u16 device_id;
176 __u16 sub_vendor_id;
177 __u16 sub_device_id;
178 } type;
179 char product_id[16];
180 __u8 pad[8]; /* 0xff */
181 __u8 vendor_data[448];
182};
183
184/* The content of phys_section - global scope */
185struct phys_disk {
88c164f4 186 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
a322f70c
DW
187 __u32 crc;
188 __u16 used_pdes;
189 __u16 max_pdes;
190 __u8 pad[52];
191 struct phys_disk_entry {
192 char guid[DDF_GUID_LEN];
193 __u32 refnum;
194 __u16 type;
195 __u16 state;
196 __u64 config_size; /* DDF structures must be after here */
197 char path[18]; /* another horrible structure really */
198 __u8 pad[6];
199 } entries[0];
200};
201
202/* phys_disk_entry.type is a bitmap - bigendian remember */
203#define DDF_Forced_PD_GUID 1
204#define DDF_Active_in_VD 2
88c164f4 205#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
206#define DDF_Spare 8 /* overrides Global_spare */
207#define DDF_Foreign 16
208#define DDF_Legacy 32 /* no DDF on this device */
209
210#define DDF_Interface_mask 0xf00
211#define DDF_Interface_SCSI 0x100
212#define DDF_Interface_SAS 0x200
213#define DDF_Interface_SATA 0x300
214#define DDF_Interface_FC 0x400
215
216/* phys_disk_entry.state is a bigendian bitmap */
217#define DDF_Online 1
218#define DDF_Failed 2 /* overrides 1,4,8 */
219#define DDF_Rebuilding 4
220#define DDF_Transition 8
221#define DDF_SMART 16
222#define DDF_ReadErrors 32
223#define DDF_Missing 64
224
225/* The content of the virt_section global scope */
226struct virtual_disk {
88c164f4 227 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
a322f70c
DW
228 __u32 crc;
229 __u16 populated_vdes;
230 __u16 max_vdes;
231 __u8 pad[52];
232 struct virtual_entry {
233 char guid[DDF_GUID_LEN];
234 __u16 unit;
235 __u16 pad0; /* 0xffff */
236 __u16 guid_crc;
237 __u16 type;
238 __u8 state;
239 __u8 init_state;
240 __u8 pad1[14];
241 char name[16];
242 } entries[0];
243};
244
245/* virtual_entry.type is a bitmap - bigendian */
246#define DDF_Shared 1
247#define DDF_Enforce_Groups 2
248#define DDF_Unicode 4
249#define DDF_Owner_Valid 8
250
251/* virtual_entry.state is a bigendian bitmap */
252#define DDF_state_mask 0x7
253#define DDF_state_optimal 0x0
254#define DDF_state_degraded 0x1
255#define DDF_state_deleted 0x2
256#define DDF_state_missing 0x3
257#define DDF_state_failed 0x4
7a7cc504 258#define DDF_state_part_optimal 0x5
a322f70c
DW
259
260#define DDF_state_morphing 0x8
261#define DDF_state_inconsistent 0x10
262
263/* virtual_entry.init_state is a bigendian bitmap */
264#define DDF_initstate_mask 0x03
265#define DDF_init_not 0x00
7a7cc504
NB
266#define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
a322f70c
DW
268#define DDF_init_full 0x02
269
270#define DDF_access_mask 0xc0
271#define DDF_access_rw 0x00
272#define DDF_access_ro 0x80
273#define DDF_access_blocked 0xc0
274
275/* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
278 */
279
280struct vd_config {
88c164f4 281 __u32 magic; /* DDF_VD_CONF_MAGIC */
a322f70c
DW
282 __u32 crc;
283 char guid[DDF_GUID_LEN];
284 __u32 timestamp;
285 __u32 seqnum;
286 __u8 pad0[24];
287 __u16 prim_elmnt_count;
288 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
289 __u8 prl;
290 __u8 rlq;
291 __u8 sec_elmnt_count;
292 __u8 sec_elmnt_seq;
293 __u8 srl;
598f0d58
NB
294 __u64 blocks; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks; /* blocks in array */
a322f70c
DW
298 __u8 pad1[8];
299 __u32 spare_refs[8];
300 __u8 cache_pol[8];
301 __u8 bg_rate;
302 __u8 pad2[3];
303 __u8 pad3[52];
304 __u8 pad4[192];
305 __u8 v0[32]; /* reserved- 0xff */
306 __u8 v1[32]; /* reserved- 0xff */
307 __u8 v2[16]; /* reserved- 0xff */
308 __u8 v3[16]; /* reserved- 0xff */
309 __u8 vendor[32];
310 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
313};
314
315/* vd_config.cache_pol[7] is a bitmap */
316#define DDF_cache_writeback 1 /* else writethrough */
317#define DDF_cache_wadaptive 2 /* only applies if writeback */
318#define DDF_cache_readahead 4
319#define DDF_cache_radaptive 8 /* only if doing read-ahead */
320#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
321#define DDF_cache_wallowed 32 /* enable write caching */
322#define DDF_cache_rallowed 64 /* enable read caching */
323
324struct spare_assign {
88c164f4 325 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
a322f70c
DW
326 __u32 crc;
327 __u32 timestamp;
328 __u8 reserved[7];
329 __u8 type;
330 __u16 populated; /* SAEs used */
331 __u16 max; /* max SAEs */
332 __u8 pad[8];
333 struct spare_assign_entry {
334 char guid[DDF_GUID_LEN];
335 __u16 secondary_element;
336 __u8 pad[6];
337 } spare_ents[0];
338};
339/* spare_assign.type is a bitmap */
340#define DDF_spare_dedicated 0x1 /* else global */
341#define DDF_spare_revertible 0x2 /* else committable */
342#define DDF_spare_active 0x4 /* else not active */
343#define DDF_spare_affinity 0x8 /* enclosure affinity */
344
345/* The data_section contents - local scope */
346struct disk_data {
88c164f4 347 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
a322f70c
DW
348 __u32 crc;
349 char guid[DDF_GUID_LEN];
350 __u32 refnum; /* crc of some magic drive data ... */
351 __u8 forced_ref; /* set when above was not result of magic */
352 __u8 forced_guid; /* set if guid was forced rather than magic */
353 __u8 vendor[32];
354 __u8 pad[442];
355};
356
357/* bbm_section content */
358struct bad_block_log {
359 __u32 magic;
360 __u32 crc;
361 __u16 entry_count;
362 __u32 spare_count;
363 __u8 pad[10];
364 __u64 first_spare;
365 struct mapped_block {
366 __u64 defective_start;
367 __u32 replacement_start;
368 __u16 remap_count;
369 __u8 pad[2];
370 } entries[0];
371};
372
373/* Struct for internally holding ddf structures */
374/* The DDF structure stored on each device is potentially
375 * quite different, as some data is global and some is local.
376 * The global data is:
377 * - ddf header
378 * - controller_data
379 * - Physical disk records
380 * - Virtual disk records
381 * The local data is:
382 * - Configuration records
383 * - Physical Disk data section
384 * ( and Bad block and vendor which I don't care about yet).
385 *
386 * The local data is parsed into separate lists as it is read
387 * and reconstructed for writing. This means that we only need
388 * to make config changes once and they are automatically
389 * propagated to all devices.
390 * Note that the ddf_super has space of the conf and disk data
391 * for this disk and also for a list of all such data.
392 * The list is only used for the superblock that is being
393 * built in Create or Assemble to describe the whole array.
394 */
395struct ddf_super {
6416d527 396 struct ddf_header anchor, primary, secondary;
a322f70c 397 struct ddf_controller_data controller;
6416d527 398 struct ddf_header *active;
a322f70c
DW
399 struct phys_disk *phys;
400 struct virtual_disk *virt;
401 int pdsize, vdsize;
f21e18ca 402 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 403 int currentdev;
18a2f463 404 int updates_pending;
a322f70c 405 struct vcl {
6416d527
NB
406 union {
407 char space[512];
408 struct {
409 struct vcl *next;
410 __u64 *lba_offset; /* location in 'conf' of
411 * the lba table */
f21e18ca 412 unsigned int vcnum; /* index into ->virt */
8ec5d685 413 struct vd_config **other_bvds;
6416d527
NB
414 __u64 *block_sizes; /* NULL if all the same */
415 };
416 };
a322f70c 417 struct vd_config conf;
d2ca6449 418 } *conflist, *currentconf;
a322f70c 419 struct dl {
6416d527
NB
420 union {
421 char space[512];
422 struct {
423 struct dl *next;
424 int major, minor;
425 char *devname;
426 int fd;
427 unsigned long long size; /* sectors */
097bcf00 428 unsigned long long primary_lba; /* sectors */
429 unsigned long long secondary_lba; /* sectors */
430 unsigned long long workspace_lba; /* sectors */
6416d527
NB
431 int pdnum; /* index in ->phys */
432 struct spare_assign *spare;
8592f29d
N
433 void *mdupdate; /* hold metadata update */
434
435 /* These fields used by auto-layout */
436 int raiddisk; /* slot to fill in autolayout */
437 __u64 esize;
6416d527
NB
438 };
439 };
a322f70c 440 struct disk_data disk;
b2280677 441 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 442 } *dlist, *add_list;
a322f70c
DW
443};
444
445#ifndef offsetof
446#define offsetof(t,f) ((size_t)&(((t*)0)->f))
447#endif
448
f21e18ca 449static unsigned int calc_crc(void *buf, int len)
a322f70c
DW
450{
451 /* crcs are always at the same place as in the ddf_header */
452 struct ddf_header *ddf = buf;
453 __u32 oldcrc = ddf->crc;
454 __u32 newcrc;
455 ddf->crc = 0xffffffff;
456
457 newcrc = crc32(0, buf, len);
458 ddf->crc = oldcrc;
4abe6b70
N
459 /* The crc is store (like everything) bigendian, so convert
460 * here for simplicity
461 */
462 return __cpu_to_be32(newcrc);
a322f70c
DW
463}
464
465static int load_ddf_header(int fd, unsigned long long lba,
466 unsigned long long size,
467 int type,
468 struct ddf_header *hdr, struct ddf_header *anchor)
469{
470 /* read a ddf header (primary or secondary) from fd/lba
471 * and check that it is consistent with anchor
472 * Need to check:
473 * magic, crc, guid, rev, and LBA's header_type, and
474 * everything after header_type must be the same
475 */
476 if (lba >= size-1)
477 return 0;
478
479 if (lseek64(fd, lba<<9, 0) < 0)
480 return 0;
481
482 if (read(fd, hdr, 512) != 512)
483 return 0;
484
485 if (hdr->magic != DDF_HEADER_MAGIC)
486 return 0;
487 if (calc_crc(hdr, 512) != hdr->crc)
488 return 0;
489 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
490 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
491 anchor->primary_lba != hdr->primary_lba ||
492 anchor->secondary_lba != hdr->secondary_lba ||
493 hdr->type != type ||
494 memcmp(anchor->pad2, hdr->pad2, 512 -
495 offsetof(struct ddf_header, pad2)) != 0)
496 return 0;
497
498 /* Looks good enough to me... */
499 return 1;
500}
501
502static void *load_section(int fd, struct ddf_super *super, void *buf,
503 __u32 offset_be, __u32 len_be, int check)
504{
505 unsigned long long offset = __be32_to_cpu(offset_be);
506 unsigned long long len = __be32_to_cpu(len_be);
507 int dofree = (buf == NULL);
508
509 if (check)
510 if (len != 2 && len != 8 && len != 32
511 && len != 128 && len != 512)
512 return NULL;
513
514 if (len > 1024)
515 return NULL;
516 if (buf) {
517 /* All pre-allocated sections are a single block */
518 if (len != 1)
519 return NULL;
3d2c4fc7
DW
520 } else if (posix_memalign(&buf, 512, len<<9) != 0)
521 buf = NULL;
6416d527 522
a322f70c
DW
523 if (!buf)
524 return NULL;
525
526 if (super->active->type == 1)
527 offset += __be64_to_cpu(super->active->primary_lba);
528 else
529 offset += __be64_to_cpu(super->active->secondary_lba);
530
f21e18ca 531 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
532 if (dofree)
533 free(buf);
534 return NULL;
535 }
f21e18ca 536 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
537 if (dofree)
538 free(buf);
539 return NULL;
540 }
541 return buf;
542}
543
544static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
545{
546 unsigned long long dsize;
547
548 get_dev_size(fd, NULL, &dsize);
549
550 if (lseek64(fd, dsize-512, 0) < 0) {
551 if (devname)
e7b84f9d
N
552 pr_err("Cannot seek to anchor block on %s: %s\n",
553 devname, strerror(errno));
a322f70c
DW
554 return 1;
555 }
556 if (read(fd, &super->anchor, 512) != 512) {
557 if (devname)
e7b84f9d
N
558 pr_err("Cannot read anchor block on %s: %s\n",
559 devname, strerror(errno));
a322f70c
DW
560 return 1;
561 }
562 if (super->anchor.magic != DDF_HEADER_MAGIC) {
563 if (devname)
e7b84f9d 564 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
565 devname);
566 return 2;
567 }
568 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
569 if (devname)
e7b84f9d 570 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
571 devname);
572 return 2;
573 }
59e36268
NB
574 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
575 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 576 if (devname)
e7b84f9d 577 pr_err("can only support super revision"
59e36268
NB
578 " %.8s and earlier, not %.8s on %s\n",
579 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
580 return 2;
581 }
582 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
583 dsize >> 9, 1,
584 &super->primary, &super->anchor) == 0) {
585 if (devname)
e7b84f9d
N
586 pr_err("Failed to load primary DDF header "
587 "on %s\n", devname);
a322f70c
DW
588 return 2;
589 }
590 super->active = &super->primary;
591 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
592 dsize >> 9, 2,
593 &super->secondary, &super->anchor)) {
594 if ((__be32_to_cpu(super->primary.seq)
595 < __be32_to_cpu(super->secondary.seq) &&
596 !super->secondary.openflag)
597 || (__be32_to_cpu(super->primary.seq)
598 == __be32_to_cpu(super->secondary.seq) &&
599 super->primary.openflag && !super->secondary.openflag)
600 )
601 super->active = &super->secondary;
602 }
603 return 0;
604}
605
606static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
607{
608 void *ok;
609 ok = load_section(fd, super, &super->controller,
610 super->active->controller_section_offset,
611 super->active->controller_section_length,
612 0);
613 super->phys = load_section(fd, super, NULL,
614 super->active->phys_section_offset,
615 super->active->phys_section_length,
616 1);
617 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
618
619 super->virt = load_section(fd, super, NULL,
620 super->active->virt_section_offset,
621 super->active->virt_section_length,
622 1);
623 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
624 if (!ok ||
625 !super->phys ||
626 !super->virt) {
627 free(super->phys);
628 free(super->virt);
a2349791
NB
629 super->phys = NULL;
630 super->virt = NULL;
a322f70c
DW
631 return 2;
632 }
633 super->conflist = NULL;
634 super->dlist = NULL;
8c3b8c2c
NB
635
636 super->max_part = __be16_to_cpu(super->active->max_partitions);
637 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
638 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
639 return 0;
640}
641
3dc821b0 642static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
643 unsigned int len)
644{
645 int i;
646 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
647 if (vcl->other_bvds[i] != NULL &&
648 vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
649 break;
650
651 if (i < vcl->conf.sec_elmnt_count-1) {
652 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
653 return;
654 } else {
655 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
656 if (vcl->other_bvds[i] == NULL)
657 break;
658 if (i == vcl->conf.sec_elmnt_count-1) {
659 pr_err("no space for sec level config %u, count is %u\n",
660 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
661 return;
662 }
663 if (posix_memalign((void **)&vcl->other_bvds[i], 512, len)
664 != 0) {
665 pr_err("%s could not allocate vd buf\n", __func__);
666 return;
667 }
668 }
669 memcpy(vcl->other_bvds[i], vd, len);
670}
671
a322f70c
DW
672static int load_ddf_local(int fd, struct ddf_super *super,
673 char *devname, int keep)
674{
675 struct dl *dl;
676 struct stat stb;
677 char *conf;
f21e18ca
N
678 unsigned int i;
679 unsigned int confsec;
b2280677 680 int vnum;
f21e18ca 681 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 682 unsigned long long dsize;
a322f70c
DW
683
684 /* First the local disk info */
3d2c4fc7 685 if (posix_memalign((void**)&dl, 512,
6416d527 686 sizeof(*dl) +
3d2c4fc7 687 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 688 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
689 __func__);
690 return 1;
691 }
a322f70c
DW
692
693 load_section(fd, super, &dl->disk,
694 super->active->data_section_offset,
695 super->active->data_section_length,
696 0);
503975b9 697 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 698
a322f70c
DW
699 fstat(fd, &stb);
700 dl->major = major(stb.st_rdev);
701 dl->minor = minor(stb.st_rdev);
702 dl->next = super->dlist;
703 dl->fd = keep ? fd : -1;
d2ca6449
NB
704
705 dl->size = 0;
706 if (get_dev_size(fd, devname, &dsize))
707 dl->size = dsize >> 9;
097bcf00 708 /* If the disks have different sizes, the LBAs will differ
709 * between phys disks.
710 * At this point here, the values in super->active must be valid
711 * for this phys disk. */
712 dl->primary_lba = super->active->primary_lba;
713 dl->secondary_lba = super->active->secondary_lba;
714 dl->workspace_lba = super->active->workspace_lba;
b2280677 715 dl->spare = NULL;
f21e18ca 716 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
717 dl->vlist[i] = NULL;
718 super->dlist = dl;
59e36268 719 dl->pdnum = -1;
f21e18ca 720 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
721 if (memcmp(super->phys->entries[i].guid,
722 dl->disk.guid, DDF_GUID_LEN) == 0)
723 dl->pdnum = i;
724
a322f70c
DW
725 /* Now the config list. */
726 /* 'conf' is an array of config entries, some of which are
727 * probably invalid. Those which are good need to be copied into
728 * the conflist
729 */
a322f70c
DW
730
731 conf = load_section(fd, super, NULL,
732 super->active->config_section_offset,
733 super->active->config_section_length,
734 0);
735
b2280677 736 vnum = 0;
e223334f
N
737 for (confsec = 0;
738 confsec < __be32_to_cpu(super->active->config_section_length);
739 confsec += super->conf_rec_len) {
a322f70c 740 struct vd_config *vd =
e223334f 741 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
742 struct vcl *vcl;
743
b2280677
NB
744 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
745 if (dl->spare)
746 continue;
3d2c4fc7
DW
747 if (posix_memalign((void**)&dl->spare, 512,
748 super->conf_rec_len*512) != 0) {
e7b84f9d
N
749 pr_err("%s could not allocate spare info buf\n",
750 __func__);
3d2c4fc7
DW
751 return 1;
752 }
613b0d17 753
b2280677
NB
754 memcpy(dl->spare, vd, super->conf_rec_len*512);
755 continue;
756 }
a322f70c
DW
757 if (vd->magic != DDF_VD_CONF_MAGIC)
758 continue;
759 for (vcl = super->conflist; vcl; vcl = vcl->next) {
760 if (memcmp(vcl->conf.guid,
761 vd->guid, DDF_GUID_LEN) == 0)
762 break;
763 }
764
765 if (vcl) {
b2280677 766 dl->vlist[vnum++] = vcl;
3dc821b0 767 if (vcl->other_bvds != NULL &&
768 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
769 add_other_bvd(vcl, vd, super->conf_rec_len*512);
770 continue;
771 }
a322f70c
DW
772 if (__be32_to_cpu(vd->seqnum) <=
773 __be32_to_cpu(vcl->conf.seqnum))
774 continue;
59e36268 775 } else {
3d2c4fc7 776 if (posix_memalign((void**)&vcl, 512,
6416d527 777 (super->conf_rec_len*512 +
3d2c4fc7 778 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
779 pr_err("%s could not allocate vcl buf\n",
780 __func__);
3d2c4fc7
DW
781 return 1;
782 }
a322f70c 783 vcl->next = super->conflist;
59e36268 784 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 785 if (vd->sec_elmnt_count > 1)
786 vcl->other_bvds =
787 xcalloc(vd->sec_elmnt_count - 1,
788 sizeof(struct vd_config *));
789 else
790 vcl->other_bvds = NULL;
a322f70c 791 super->conflist = vcl;
b2280677 792 dl->vlist[vnum++] = vcl;
a322f70c 793 }
8c3b8c2c 794 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
a322f70c 795 vcl->lba_offset = (__u64*)
8c3b8c2c 796 &vcl->conf.phys_refnum[super->mppe];
59e36268
NB
797
798 for (i=0; i < max_virt_disks ; i++)
799 if (memcmp(super->virt->entries[i].guid,
800 vcl->conf.guid, DDF_GUID_LEN)==0)
801 break;
802 if (i < max_virt_disks)
803 vcl->vcnum = i;
a322f70c
DW
804 }
805 free(conf);
806
807 return 0;
808}
809
810#ifndef MDASSEMBLE
811static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 812 void **sbp, char *devname);
a322f70c 813#endif
37424f13
DW
814
815static void free_super_ddf(struct supertype *st);
816
a322f70c
DW
817static int load_super_ddf(struct supertype *st, int fd,
818 char *devname)
819{
820 unsigned long long dsize;
821 struct ddf_super *super;
822 int rv;
823
a322f70c
DW
824 if (get_dev_size(fd, devname, &dsize) == 0)
825 return 1;
826
691c6ee1
N
827 if (test_partition(fd))
828 /* DDF is not allowed on partitions */
829 return 1;
830
a322f70c
DW
831 /* 32M is a lower bound */
832 if (dsize <= 32*1024*1024) {
97320d7c 833 if (devname)
e7b84f9d
N
834 pr_err("%s is too small for ddf: "
835 "size is %llu sectors.\n",
836 devname, dsize>>9);
97320d7c 837 return 1;
a322f70c
DW
838 }
839 if (dsize & 511) {
97320d7c 840 if (devname)
e7b84f9d
N
841 pr_err("%s is an odd size for ddf: "
842 "size is %llu bytes.\n",
843 devname, dsize);
97320d7c 844 return 1;
a322f70c
DW
845 }
846
37424f13
DW
847 free_super_ddf(st);
848
6416d527 849 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 850 pr_err("malloc of %zu failed.\n",
a322f70c
DW
851 sizeof(*super));
852 return 1;
853 }
a2349791 854 memset(super, 0, sizeof(*super));
a322f70c
DW
855
856 rv = load_ddf_headers(fd, super, devname);
857 if (rv) {
858 free(super);
859 return rv;
860 }
861
862 /* Have valid headers and have chosen the best. Let's read in the rest*/
863
864 rv = load_ddf_global(fd, super, devname);
865
866 if (rv) {
867 if (devname)
e7b84f9d
N
868 pr_err("Failed to load all information "
869 "sections on %s\n", devname);
a322f70c
DW
870 free(super);
871 return rv;
872 }
873
3d2c4fc7
DW
874 rv = load_ddf_local(fd, super, devname, 0);
875
876 if (rv) {
877 if (devname)
e7b84f9d
N
878 pr_err("Failed to load all information "
879 "sections on %s\n", devname);
3d2c4fc7
DW
880 free(super);
881 return rv;
882 }
a322f70c
DW
883
884 /* Should possibly check the sections .... */
885
886 st->sb = super;
887 if (st->ss == NULL) {
888 st->ss = &super_ddf;
889 st->minor_version = 0;
890 st->max_devs = 512;
891 }
892 return 0;
893
894}
895
896static void free_super_ddf(struct supertype *st)
897{
898 struct ddf_super *ddf = st->sb;
899 if (ddf == NULL)
900 return;
901 free(ddf->phys);
902 free(ddf->virt);
903 while (ddf->conflist) {
904 struct vcl *v = ddf->conflist;
905 ddf->conflist = v->next;
59e36268
NB
906 if (v->block_sizes)
907 free(v->block_sizes);
3dc821b0 908 if (v->other_bvds) {
909 int i;
910 for (i = 0; i < v->conf.sec_elmnt_count-1; i++)
911 if (v->other_bvds[i] != NULL)
912 free(v->other_bvds[i]);
8ec5d685 913 free(v->other_bvds);
3dc821b0 914 }
a322f70c
DW
915 free(v);
916 }
917 while (ddf->dlist) {
918 struct dl *d = ddf->dlist;
919 ddf->dlist = d->next;
920 if (d->fd >= 0)
921 close(d->fd);
b2280677
NB
922 if (d->spare)
923 free(d->spare);
a322f70c
DW
924 free(d);
925 }
8a38cb04
N
926 while (ddf->add_list) {
927 struct dl *d = ddf->add_list;
928 ddf->add_list = d->next;
929 if (d->fd >= 0)
930 close(d->fd);
931 if (d->spare)
932 free(d->spare);
933 free(d);
934 }
a322f70c
DW
935 free(ddf);
936 st->sb = NULL;
937}
938
939static struct supertype *match_metadata_desc_ddf(char *arg)
940{
941 /* 'ddf' only support containers */
942 struct supertype *st;
943 if (strcmp(arg, "ddf") != 0 &&
944 strcmp(arg, "default") != 0
945 )
946 return NULL;
947
503975b9 948 st = xcalloc(1, sizeof(*st));
a322f70c
DW
949 st->ss = &super_ddf;
950 st->max_devs = 512;
951 st->minor_version = 0;
952 st->sb = NULL;
953 return st;
954}
955
a322f70c
DW
956#ifndef MDASSEMBLE
957
958static mapping_t ddf_state[] = {
959 { "Optimal", 0},
960 { "Degraded", 1},
961 { "Deleted", 2},
962 { "Missing", 3},
963 { "Failed", 4},
964 { "Partially Optimal", 5},
965 { "-reserved-", 6},
966 { "-reserved-", 7},
967 { NULL, 0}
968};
969
970static mapping_t ddf_init_state[] = {
971 { "Not Initialised", 0},
972 { "QuickInit in Progress", 1},
973 { "Fully Initialised", 2},
974 { "*UNKNOWN*", 3},
975 { NULL, 0}
976};
977static mapping_t ddf_access[] = {
978 { "Read/Write", 0},
979 { "Reserved", 1},
980 { "Read Only", 2},
981 { "Blocked (no access)", 3},
982 { NULL ,0}
983};
984
985static mapping_t ddf_level[] = {
986 { "RAID0", DDF_RAID0},
987 { "RAID1", DDF_RAID1},
988 { "RAID3", DDF_RAID3},
989 { "RAID4", DDF_RAID4},
990 { "RAID5", DDF_RAID5},
991 { "RAID1E",DDF_RAID1E},
992 { "JBOD", DDF_JBOD},
993 { "CONCAT",DDF_CONCAT},
994 { "RAID5E",DDF_RAID5E},
995 { "RAID5EE",DDF_RAID5EE},
996 { "RAID6", DDF_RAID6},
997 { NULL, 0}
998};
999static mapping_t ddf_sec_level[] = {
1000 { "Striped", DDF_2STRIPED},
1001 { "Mirrored", DDF_2MIRRORED},
1002 { "Concat", DDF_2CONCAT},
1003 { "Spanned", DDF_2SPANNED},
1004 { NULL, 0}
1005};
1006#endif
1007
1008struct num_mapping {
1009 int num1, num2;
1010};
1011static struct num_mapping ddf_level_num[] = {
1012 { DDF_RAID0, 0 },
1013 { DDF_RAID1, 1 },
1014 { DDF_RAID3, LEVEL_UNSUPPORTED },
60f18132
NB
1015 { DDF_RAID4, 4 },
1016 { DDF_RAID5, 5 },
a322f70c
DW
1017 { DDF_RAID1E, LEVEL_UNSUPPORTED },
1018 { DDF_JBOD, LEVEL_UNSUPPORTED },
1019 { DDF_CONCAT, LEVEL_LINEAR },
1020 { DDF_RAID5E, LEVEL_UNSUPPORTED },
1021 { DDF_RAID5EE, LEVEL_UNSUPPORTED },
1022 { DDF_RAID6, 6},
1023 { MAXINT, MAXINT }
1024};
1025
1026static int map_num1(struct num_mapping *map, int num)
1027{
1028 int i;
1029 for (i=0 ; map[i].num1 != MAXINT; i++)
1030 if (map[i].num1 == num)
1031 break;
1032 return map[i].num2;
1033}
1034
42dc2744
N
1035static int all_ff(char *guid)
1036{
1037 int i;
1038 for (i = 0; i < DDF_GUID_LEN; i++)
1039 if (guid[i] != (char)0xff)
1040 return 0;
1041 return 1;
1042}
1043
a322f70c
DW
1044#ifndef MDASSEMBLE
1045static void print_guid(char *guid, int tstamp)
1046{
1047 /* A GUIDs are part (or all) ASCII and part binary.
1048 * They tend to be space padded.
59e36268
NB
1049 * We print the GUID in HEX, then in parentheses add
1050 * any initial ASCII sequence, and a possible
1051 * time stamp from bytes 16-19
a322f70c
DW
1052 */
1053 int l = DDF_GUID_LEN;
1054 int i;
59e36268
NB
1055
1056 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1057 if ((i&3)==0 && i != 0) printf(":");
1058 printf("%02X", guid[i]&255);
1059 }
1060
cfccea8c 1061 printf("\n (");
a322f70c
DW
1062 while (l && guid[l-1] == ' ')
1063 l--;
1064 for (i=0 ; i<l ; i++) {
1065 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1066 fputc(guid[i], stdout);
1067 else
59e36268 1068 break;
a322f70c
DW
1069 }
1070 if (tstamp) {
1071 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1072 char tbuf[100];
1073 struct tm *tm;
1074 tm = localtime(&then);
59e36268 1075 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1076 fputs(tbuf, stdout);
1077 }
59e36268 1078 printf(")");
a322f70c
DW
1079}
1080
1081static void examine_vd(int n, struct ddf_super *sb, char *guid)
1082{
8c3b8c2c 1083 int crl = sb->conf_rec_len;
a322f70c
DW
1084 struct vcl *vcl;
1085
1086 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1087 unsigned int i;
a322f70c
DW
1088 struct vd_config *vc = &vcl->conf;
1089
1090 if (calc_crc(vc, crl*512) != vc->crc)
1091 continue;
1092 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1093 continue;
1094
1095 /* Ok, we know about this VD, let's give more details */
b06e3095 1096 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1097 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1098 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1099 int j;
1100 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1101 for (j=0; j<cnt; j++)
1102 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1103 break;
1104 if (i) printf(" ");
1105 if (j < cnt)
1106 printf("%d", j);
1107 else
1108 printf("--");
1109 }
1110 printf(")\n");
1111 if (vc->chunk_shift != 255)
613b0d17
N
1112 printf(" Chunk Size[%d] : %d sectors\n", n,
1113 1 << vc->chunk_shift);
a322f70c
DW
1114 printf(" Raid Level[%d] : %s\n", n,
1115 map_num(ddf_level, vc->prl)?:"-unknown-");
1116 if (vc->sec_elmnt_count != 1) {
1117 printf(" Secondary Position[%d] : %d of %d\n", n,
1118 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1119 printf(" Secondary Level[%d] : %s\n", n,
1120 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1121 }
1122 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1123 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1124 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1125 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1126 }
1127}
1128
1129static void examine_vds(struct ddf_super *sb)
1130{
1131 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
1132 int i;
1133 printf(" Virtual Disks : %d\n", cnt);
1134
1135 for (i=0; i<cnt; i++) {
1136 struct virtual_entry *ve = &sb->virt->entries[i];
b06e3095 1137 printf("\n");
a322f70c
DW
1138 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1139 printf("\n");
1140 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1141 printf(" state[%d] : %s, %s%s\n", i,
1142 map_num(ddf_state, ve->state & 7),
1143 (ve->state & 8) ? "Morphing, ": "",
1144 (ve->state & 16)? "Not Consistent" : "Consistent");
1145 printf(" init state[%d] : %s\n", i,
1146 map_num(ddf_init_state, ve->init_state&3));
1147 printf(" access[%d] : %s\n", i,
1148 map_num(ddf_access, (ve->init_state>>6) & 3));
1149 printf(" Name[%d] : %.16s\n", i, ve->name);
1150 examine_vd(i, sb, ve->guid);
1151 }
1152 if (cnt) printf("\n");
1153}
1154
1155static void examine_pds(struct ddf_super *sb)
1156{
1157 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1158 int i;
1159 struct dl *dl;
1160 printf(" Physical Disks : %d\n", cnt);
962371a5 1161 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1162
1163 for (i=0 ; i<cnt ; i++) {
1164 struct phys_disk_entry *pd = &sb->phys->entries[i];
1165 int type = __be16_to_cpu(pd->type);
1166 int state = __be16_to_cpu(pd->state);
1167
b06e3095
N
1168 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1169 //printf("\n");
1170 printf(" %3d %08x ", i,
a322f70c 1171 __be32_to_cpu(pd->refnum));
613b0d17 1172 printf("%8lluK ",
c9b6907b 1173 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095
N
1174 for (dl = sb->dlist; dl ; dl = dl->next) {
1175 if (dl->disk.refnum == pd->refnum) {
1176 char *dv = map_dev(dl->major, dl->minor, 0);
1177 if (dv) {
962371a5 1178 printf("%-15s", dv);
b06e3095
N
1179 break;
1180 }
1181 }
1182 }
1183 if (!dl)
962371a5 1184 printf("%15s","");
b06e3095 1185 printf(" %s%s%s%s%s",
a322f70c 1186 (type&2) ? "active":"",
b06e3095 1187 (type&4) ? "Global-Spare":"",
a322f70c
DW
1188 (type&8) ? "spare" : "",
1189 (type&16)? ", foreign" : "",
1190 (type&32)? "pass-through" : "");
18cb4496
N
1191 if (state & DDF_Failed)
1192 /* This over-rides these three */
1193 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1194 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1195 (state&1)? "Online": "Offline",
1196 (state&2)? ", Failed": "",
1197 (state&4)? ", Rebuilding": "",
1198 (state&8)? ", in-transition": "",
b06e3095
N
1199 (state&16)? ", SMART-errors": "",
1200 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1201 (state&64)? ", Missing" : "");
a322f70c
DW
1202 printf("\n");
1203 }
1204}
1205
1206static void examine_super_ddf(struct supertype *st, char *homehost)
1207{
1208 struct ddf_super *sb = st->sb;
1209
1210 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1211 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1212 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1213 printf("\n");
1214 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c
DW
1215 printf("\n");
1216 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1217 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1218 ?"yes" : "no");
1219 examine_vds(sb);
1220 examine_pds(sb);
1221}
1222
a5d85af7 1223static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1224
bedbf68a 1225static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1226static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1227
bedbf68a 1228static unsigned int get_vd_num_of_subarray(struct supertype *st)
1229{
1230 /*
1231 * Figure out the VD number for this supertype.
1232 * Returns DDF_CONTAINER for the container itself,
1233 * and DDF_NOTFOUND on error.
1234 */
1235 struct ddf_super *ddf = st->sb;
1236 struct mdinfo *sra;
1237 char *sub, *end;
1238 unsigned int vcnum;
1239
1240 if (*st->container_devnm == '\0')
1241 return DDF_CONTAINER;
1242
1243 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1244 if (!sra || sra->array.major_version != -1 ||
1245 sra->array.minor_version != -2 ||
1246 !is_subarray(sra->text_version))
1247 return DDF_NOTFOUND;
1248
1249 sub = strchr(sra->text_version + 1, '/');
1250 if (sub != NULL)
1251 vcnum = strtoul(sub + 1, &end, 10);
1252 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1253 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1254 return DDF_NOTFOUND;
1255
1256 return vcnum;
1257}
1258
061f2c6a 1259static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1260{
1261 /* We just write a generic DDF ARRAY entry
1262 */
1263 struct mdinfo info;
1264 char nbuf[64];
a5d85af7 1265 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1266 fname_from_uuid(st, &info, nbuf, ':');
1267
1268 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1269}
1270
1271static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1272{
1273 /* We just write a generic DDF ARRAY entry
a322f70c 1274 */
42dc2744 1275 struct ddf_super *ddf = st->sb;
ff54de6e 1276 struct mdinfo info;
f21e18ca 1277 unsigned int i;
ff54de6e 1278 char nbuf[64];
a5d85af7 1279 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1280 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1281
f21e18ca 1282 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1283 struct virtual_entry *ve = &ddf->virt->entries[i];
1284 struct vcl vcl;
1285 char nbuf1[64];
1286 if (all_ff(ve->guid))
1287 continue;
1288 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1289 ddf->currentconf =&vcl;
1290 uuid_from_super_ddf(st, info.uuid);
1291 fname_from_uuid(st, &info, nbuf1, ':');
1292 printf("ARRAY container=%s member=%d UUID=%s\n",
1293 nbuf+5, i, nbuf1+5);
1294 }
a322f70c
DW
1295}
1296
bceedeec
N
1297static void export_examine_super_ddf(struct supertype *st)
1298{
1299 struct mdinfo info;
1300 char nbuf[64];
a5d85af7 1301 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1302 fname_from_uuid(st, &info, nbuf, ':');
1303 printf("MD_METADATA=ddf\n");
1304 printf("MD_LEVEL=container\n");
1305 printf("MD_UUID=%s\n", nbuf+5);
1306}
bceedeec 1307
a322f70c
DW
1308static void detail_super_ddf(struct supertype *st, char *homehost)
1309{
1310 /* FIXME later
1311 * Could print DDF GUID
1312 * Need to find which array
1313 * If whole, briefly list all arrays
1314 * If one, give name
1315 */
1316}
1317
1318static void brief_detail_super_ddf(struct supertype *st)
1319{
ff54de6e
N
1320 struct mdinfo info;
1321 char nbuf[64];
bedbf68a 1322 struct ddf_super *ddf = st->sb;
1323 unsigned int vcnum = get_vd_num_of_subarray(st);
1324 if (vcnum == DDF_CONTAINER)
1325 uuid_from_super_ddf(st, info.uuid);
1326 else if (vcnum == DDF_NOTFOUND)
1327 return;
1328 else
1329 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
ff54de6e
N
1330 fname_from_uuid(st, &info, nbuf,':');
1331 printf(" UUID=%s", nbuf + 5);
a322f70c 1332}
a322f70c
DW
1333#endif
1334
1335static int match_home_ddf(struct supertype *st, char *homehost)
1336{
1337 /* It matches 'this' host if the controller is a
1338 * Linux-MD controller with vendor_data matching
1339 * the hostname
1340 */
1341 struct ddf_super *ddf = st->sb;
f21e18ca 1342 unsigned int len;
d1d3482b
N
1343
1344 if (!homehost)
1345 return 0;
1346 len = strlen(homehost);
a322f70c
DW
1347
1348 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1349 len < sizeof(ddf->controller.vendor_data) &&
1350 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1351 ddf->controller.vendor_data[len] == 0);
1352}
1353
0e600426 1354#ifndef MDASSEMBLE
f21e18ca 1355static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst)
a322f70c 1356{
7a7cc504 1357 struct vcl *v;
59e36268 1358
7a7cc504 1359 for (v = ddf->conflist; v; v = v->next)
59e36268 1360 if (inst == v->vcnum)
7a7cc504
NB
1361 return &v->conf;
1362 return NULL;
1363}
0e600426 1364#endif
7a7cc504
NB
1365
1366static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
1367{
1368 /* Find the entry in phys_disk which has the given refnum
1369 * and return it's index
1370 */
f21e18ca
N
1371 unsigned int i;
1372 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
7a7cc504
NB
1373 if (ddf->phys->entries[i].refnum == phys_refnum)
1374 return i;
1375 return -1;
a322f70c
DW
1376}
1377
bedbf68a 1378static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1379{
1380 char buf[20];
1381 struct sha1_ctx ctx;
1382 sha1_init_ctx(&ctx);
1383 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1384 sha1_finish_ctx(&ctx, buf);
1385 memcpy(uuid, buf, 4*4);
1386}
1387
a322f70c
DW
1388static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1389{
1390 /* The uuid returned here is used for:
1391 * uuid to put into bitmap file (Create, Grow)
1392 * uuid for backup header when saving critical section (Grow)
1393 * comparing uuids when re-adding a device into an array
51006d85
N
1394 * In these cases the uuid required is that of the data-array,
1395 * not the device-set.
1396 * uuid to recognise same set when adding a missing device back
1397 * to an array. This is a uuid for the device-set.
613b0d17 1398 *
a322f70c
DW
1399 * For each of these we can make do with a truncated
1400 * or hashed uuid rather than the original, as long as
1401 * everyone agrees.
a322f70c
DW
1402 * In the case of SVD we assume the BVD is of interest,
1403 * though that might be the case if a bitmap were made for
1404 * a mirrored SVD - worry about that later.
1405 * So we need to find the VD configuration record for the
1406 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1407 * The first 16 bytes of the sha1 of these is used.
1408 */
1409 struct ddf_super *ddf = st->sb;
d2ca6449 1410 struct vcl *vcl = ddf->currentconf;
c5afc314 1411 char *guid;
a322f70c 1412
c5afc314
N
1413 if (vcl)
1414 guid = vcl->conf.guid;
1415 else
1416 guid = ddf->anchor.guid;
bedbf68a 1417 uuid_from_ddf_guid(guid, uuid);
a322f70c
DW
1418}
1419
a5d85af7 1420static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1421
a5d85af7 1422static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1423{
1424 struct ddf_super *ddf = st->sb;
a5d85af7 1425 int map_disks = info->array.raid_disks;
90fa1a29 1426 __u32 *cptr;
a322f70c 1427
78e44928 1428 if (ddf->currentconf) {
a5d85af7 1429 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1430 return;
1431 }
95eeceeb 1432 memset(info, 0, sizeof(*info));
78e44928 1433
a322f70c
DW
1434 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1435 info->array.level = LEVEL_CONTAINER;
1436 info->array.layout = 0;
1437 info->array.md_minor = -1;
90fa1a29
JS
1438 cptr = (__u32 *)(ddf->anchor.guid + 16);
1439 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1440
a322f70c
DW
1441 info->array.utime = 0;
1442 info->array.chunk_size = 0;
510242aa 1443 info->container_enough = 1;
a322f70c 1444
a322f70c
DW
1445 info->disk.major = 0;
1446 info->disk.minor = 0;
cba0191b
NB
1447 if (ddf->dlist) {
1448 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1449 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1450
1451 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1452 entries[info->disk.raid_disk].
1453 config_size);
d2ca6449 1454 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1455 } else {
1456 info->disk.number = -1;
661dce36 1457 info->disk.raid_disk = -1;
cba0191b
NB
1458// info->disk.raid_disk = find refnum in the table and use index;
1459 }
f22385f9 1460 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1461
921d9e16 1462 info->recovery_start = MaxSector;
a19c88b8 1463 info->reshape_active = 0;
6e75048b 1464 info->recovery_blocked = 0;
c5afc314 1465 info->name[0] = 0;
a322f70c 1466
f35f2525
N
1467 info->array.major_version = -1;
1468 info->array.minor_version = -2;
159c3a1a 1469 strcpy(info->text_version, "ddf");
a67dd8cc 1470 info->safe_mode_delay = 0;
159c3a1a 1471
c5afc314 1472 uuid_from_super_ddf(st, info->uuid);
a322f70c 1473
a5d85af7
N
1474 if (map) {
1475 int i;
1476 for (i = 0 ; i < map_disks; i++) {
1477 if (i < info->array.raid_disks &&
1478 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1479 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1480 map[i] = 1;
1481 else
1482 map[i] = 0;
1483 }
1484 }
a322f70c
DW
1485}
1486
598f0d58
NB
1487static int rlq_to_layout(int rlq, int prl, int raiddisks);
1488
a5d85af7 1489static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1490{
1491 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1492 struct vcl *vc = ddf->currentconf;
1493 int cd = ddf->currentdev;
db42fa9b 1494 int j;
8592f29d 1495 struct dl *dl;
a5d85af7 1496 int map_disks = info->array.raid_disks;
90fa1a29 1497 __u32 *cptr;
a322f70c 1498
95eeceeb 1499 memset(info, 0, sizeof(*info));
a322f70c
DW
1500 /* FIXME this returns BVD info - what if we want SVD ?? */
1501
d2ca6449
NB
1502 info->array.raid_disks = __be16_to_cpu(vc->conf.prim_elmnt_count);
1503 info->array.level = map_num1(ddf_level_num, vc->conf.prl);
1504 info->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
598f0d58 1505 info->array.raid_disks);
a322f70c 1506 info->array.md_minor = -1;
90fa1a29
JS
1507 cptr = (__u32 *)(vc->conf.guid + 16);
1508 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
d2ca6449
NB
1509 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1510 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1511 info->custom_array_size = 0;
d2ca6449 1512
f21e18ca 1513 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
d2ca6449
NB
1514 info->data_offset = __be64_to_cpu(vc->lba_offset[cd]);
1515 if (vc->block_sizes)
1516 info->component_size = vc->block_sizes[cd];
1517 else
1518 info->component_size = __be64_to_cpu(vc->conf.blocks);
1519 }
a322f70c 1520
fb204fb2
N
1521 for (dl = ddf->dlist; dl ; dl = dl->next)
1522 if (dl->raiddisk == ddf->currentdev)
1523 break;
1524
a322f70c
DW
1525 info->disk.major = 0;
1526 info->disk.minor = 0;
fb204fb2 1527 info->disk.state = 0;
8592f29d
N
1528 if (dl) {
1529 info->disk.major = dl->major;
1530 info->disk.minor = dl->minor;
fb204fb2
N
1531 info->disk.raid_disk = dl->raiddisk;
1532 info->disk.number = dl->pdnum;
1533 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1534 }
a322f70c 1535
103f2410
NB
1536 info->container_member = ddf->currentconf->vcnum;
1537
921d9e16 1538 info->recovery_start = MaxSector;
80d26cb2 1539 info->resync_start = 0;
624c5ad4 1540 info->reshape_active = 0;
6e75048b 1541 info->recovery_blocked = 0;
80d26cb2
NB
1542 if (!(ddf->virt->entries[info->container_member].state
1543 & DDF_state_inconsistent) &&
1544 (ddf->virt->entries[info->container_member].init_state
1545 & DDF_initstate_mask)
1546 == DDF_init_full)
b7528a20 1547 info->resync_start = MaxSector;
80d26cb2 1548
a322f70c
DW
1549 uuid_from_super_ddf(st, info->uuid);
1550
f35f2525
N
1551 info->array.major_version = -1;
1552 info->array.minor_version = -2;
9b63e648 1553 sprintf(info->text_version, "/%s/%d",
4dd2df09 1554 st->container_devnm,
9b63e648 1555 info->container_member);
a67dd8cc 1556 info->safe_mode_delay = 200;
159c3a1a 1557
db42fa9b
N
1558 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1559 info->name[16]=0;
1560 for(j=0; j<16; j++)
1561 if (info->name[j] == ' ')
1562 info->name[j] = 0;
a5d85af7
N
1563
1564 if (map)
1565 for (j = 0; j < map_disks; j++) {
1566 map[j] = 0;
1567 if (j < info->array.raid_disks) {
1568 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1569 if (i >= 0 &&
a5d85af7
N
1570 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1571 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1572 map[i] = 1;
1573 }
1574 }
a322f70c
DW
1575}
1576
1577static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1578 char *update,
1579 char *devname, int verbose,
1580 int uuid_set, char *homehost)
1581{
1582 /* For 'assemble' and 'force' we need to return non-zero if any
1583 * change was made. For others, the return value is ignored.
1584 * Update options are:
1585 * force-one : This device looks a bit old but needs to be included,
1586 * update age info appropriately.
1587 * assemble: clear any 'faulty' flag to allow this device to
1588 * be assembled.
1589 * force-array: Array is degraded but being forced, mark it clean
1590 * if that will be needed to assemble it.
1591 *
1592 * newdev: not used ????
1593 * grow: Array has gained a new device - this is currently for
1594 * linear only
1595 * resync: mark as dirty so a resync will happen.
59e36268 1596 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
1597 * homehost: update the recorded homehost
1598 * name: update the name - preserving the homehost
1599 * _reshape_progress: record new reshape_progress position.
1600 *
1601 * Following are not relevant for this version:
1602 * sparc2.2 : update from old dodgey metadata
1603 * super-minor: change the preferred_minor number
1604 * summaries: update redundant counters.
1605 */
1606 int rv = 0;
1607// struct ddf_super *ddf = st->sb;
7a7cc504 1608// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
1609// struct virtual_entry *ve = find_ve(ddf);
1610
a322f70c
DW
1611 /* we don't need to handle "force-*" or "assemble" as
1612 * there is no need to 'trick' the kernel. We the metadata is
1613 * first updated to activate the array, all the implied modifications
1614 * will just happen.
1615 */
1616
1617 if (strcmp(update, "grow") == 0) {
1618 /* FIXME */
1e2b2765 1619 } else if (strcmp(update, "resync") == 0) {
a322f70c 1620// info->resync_checkpoint = 0;
1e2b2765 1621 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
1622 /* homehost is stored in controller->vendor_data,
1623 * or it is when we are the vendor
1624 */
1625// if (info->vendor_is_local)
1626// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 1627 rv = -1;
f49208ec 1628 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
1629 /* name is stored in virtual_entry->name */
1630// memset(ve->name, ' ', 16);
1631// strncpy(ve->name, info->name, 16);
1e2b2765 1632 rv = -1;
f49208ec 1633 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 1634 /* We don't support reshape yet */
f49208ec
N
1635 } else if (strcmp(update, "assemble") == 0 ) {
1636 /* Do nothing, just succeed */
1637 rv = 0;
1e2b2765
N
1638 } else
1639 rv = -1;
a322f70c
DW
1640
1641// update_all_csum(ddf);
1642
1643 return rv;
1644}
1645
5f8097be
NB
1646static void make_header_guid(char *guid)
1647{
1648 __u32 stamp;
5f8097be
NB
1649 /* Create a DDF Header of Virtual Disk GUID */
1650
1651 /* 24 bytes of fiction required.
1652 * first 8 are a 'vendor-id' - "Linux-MD"
1653 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
1654 * Remaining 8 random number plus timestamp
1655 */
1656 memcpy(guid, T10, sizeof(T10));
1657 stamp = __cpu_to_be32(0xdeadbeef);
1658 memcpy(guid+8, &stamp, 4);
1659 stamp = __cpu_to_be32(0);
1660 memcpy(guid+12, &stamp, 4);
1661 stamp = __cpu_to_be32(time(0) - DECADE);
1662 memcpy(guid+16, &stamp, 4);
bfb7ea78 1663 stamp = random32();
5f8097be 1664 memcpy(guid+20, &stamp, 4);
5f8097be 1665}
59e36268 1666
78e44928
NB
1667static int init_super_ddf_bvd(struct supertype *st,
1668 mdu_array_info_t *info,
1669 unsigned long long size,
1670 char *name, char *homehost,
83cd1e97 1671 int *uuid, unsigned long long data_offset);
78e44928 1672
a322f70c
DW
1673static int init_super_ddf(struct supertype *st,
1674 mdu_array_info_t *info,
1675 unsigned long long size, char *name, char *homehost,
83cd1e97 1676 int *uuid, unsigned long long data_offset)
a322f70c
DW
1677{
1678 /* This is primarily called by Create when creating a new array.
1679 * We will then get add_to_super called for each component, and then
1680 * write_init_super called to write it out to each device.
1681 * For DDF, Create can create on fresh devices or on a pre-existing
1682 * array.
1683 * To create on a pre-existing array a different method will be called.
1684 * This one is just for fresh drives.
1685 *
1686 * We need to create the entire 'ddf' structure which includes:
1687 * DDF headers - these are easy.
1688 * Controller data - a Sector describing this controller .. not that
1689 * this is a controller exactly.
1690 * Physical Disk Record - one entry per device, so
1691 * leave plenty of space.
1692 * Virtual Disk Records - again, just leave plenty of space.
1693 * This just lists VDs, doesn't give details
1694 * Config records - describes the VDs that use this disk
1695 * DiskData - describes 'this' device.
1696 * BadBlockManagement - empty
1697 * Diag Space - empty
1698 * Vendor Logs - Could we put bitmaps here?
1699 *
1700 */
1701 struct ddf_super *ddf;
1702 char hostname[17];
1703 int hostlen;
a322f70c
DW
1704 int max_phys_disks, max_virt_disks;
1705 unsigned long long sector;
1706 int clen;
1707 int i;
1708 int pdsize, vdsize;
1709 struct phys_disk *pd;
1710 struct virtual_disk *vd;
1711
83cd1e97
N
1712 if (data_offset != INVALID_SECTORS) {
1713 fprintf(stderr, Name ": data-offset not supported by DDF\n");
1714 return 0;
1715 }
1716
78e44928 1717 if (st->sb)
83cd1e97
N
1718 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
1719 data_offset);
ba7eb04f 1720
3d2c4fc7 1721 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 1722 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
1723 return 0;
1724 }
6264b437 1725 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
1726 ddf->dlist = NULL; /* no physical disks yet */
1727 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
1728 st->sb = ddf;
1729
1730 if (info == NULL) {
1731 /* zeroing superblock */
1732 return 0;
1733 }
a322f70c
DW
1734
1735 /* At least 32MB *must* be reserved for the ddf. So let's just
1736 * start 32MB from the end, and put the primary header there.
1737 * Don't do secondary for now.
1738 * We don't know exactly where that will be yet as it could be
1739 * different on each device. To just set up the lengths.
1740 *
1741 */
1742
1743 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 1744 make_header_guid(ddf->anchor.guid);
a322f70c 1745
59e36268 1746 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
a322f70c
DW
1747 ddf->anchor.seq = __cpu_to_be32(1);
1748 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
1749 ddf->anchor.openflag = 0xFF;
1750 ddf->anchor.foreignflag = 0;
1751 ddf->anchor.enforcegroups = 0; /* Is this best?? */
1752 ddf->anchor.pad0 = 0xff;
1753 memset(ddf->anchor.pad1, 0xff, 12);
1754 memset(ddf->anchor.header_ext, 0xff, 32);
1755 ddf->anchor.primary_lba = ~(__u64)0;
1756 ddf->anchor.secondary_lba = ~(__u64)0;
1757 ddf->anchor.type = DDF_HEADER_ANCHOR;
1758 memset(ddf->anchor.pad2, 0xff, 3);
1759 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
1760 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
1761 of 32M reserved.. */
1762 max_phys_disks = 1023; /* Should be enough */
1763 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
1764 max_virt_disks = 255;
1765 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
1766 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
1767 ddf->max_part = 64;
8c3b8c2c 1768 ddf->mppe = 256;
59e36268
NB
1769 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
1770 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
1771 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 1772 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
1773 /* controller sections is one sector long immediately
1774 * after the ddf header */
1775 sector = 1;
1776 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
1777 ddf->anchor.controller_section_length = __cpu_to_be32(1);
1778 sector += 1;
1779
1780 /* phys is 8 sectors after that */
1781 pdsize = ROUND_UP(sizeof(struct phys_disk) +
1782 sizeof(struct phys_disk_entry)*max_phys_disks,
1783 512);
1784 switch(pdsize/512) {
1785 case 2: case 8: case 32: case 128: case 512: break;
1786 default: abort();
1787 }
1788 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
1789 ddf->anchor.phys_section_length =
1790 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
1791 sector += pdsize/512;
1792
1793 /* virt is another 32 sectors */
1794 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
1795 sizeof(struct virtual_entry) * max_virt_disks,
1796 512);
1797 switch(vdsize/512) {
1798 case 2: case 8: case 32: case 128: case 512: break;
1799 default: abort();
1800 }
1801 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
1802 ddf->anchor.virt_section_length =
1803 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
1804 sector += vdsize/512;
1805
59e36268 1806 clen = ddf->conf_rec_len * (ddf->max_part+1);
a322f70c
DW
1807 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
1808 ddf->anchor.config_section_length = __cpu_to_be32(clen);
1809 sector += clen;
1810
1811 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
1812 ddf->anchor.data_section_length = __cpu_to_be32(1);
1813 sector += 1;
1814
1815 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
1816 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
1817 ddf->anchor.diag_space_length = __cpu_to_be32(0);
1818 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
1819 ddf->anchor.vendor_length = __cpu_to_be32(0);
1820 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
1821
1822 memset(ddf->anchor.pad4, 0xff, 256);
1823
1824 memcpy(&ddf->primary, &ddf->anchor, 512);
1825 memcpy(&ddf->secondary, &ddf->anchor, 512);
1826
1827 ddf->primary.openflag = 1; /* I guess.. */
1828 ddf->primary.type = DDF_HEADER_PRIMARY;
1829
1830 ddf->secondary.openflag = 1; /* I guess.. */
1831 ddf->secondary.type = DDF_HEADER_SECONDARY;
1832
1833 ddf->active = &ddf->primary;
1834
1835 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
1836
1837 /* 24 more bytes of fiction required.
1838 * first 8 are a 'vendor-id' - "Linux-MD"
1839 * Remaining 16 are serial number.... maybe a hostname would do?
1840 */
1841 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
1842 gethostname(hostname, sizeof(hostname));
1843 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
1844 hostlen = strlen(hostname);
1845 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
1846 for (i = strlen(T10) ; i+hostlen < 24; i++)
1847 ddf->controller.guid[i] = ' ';
1848
1849 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
1850 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
1851 ddf->controller.type.sub_vendor_id = 0;
1852 ddf->controller.type.sub_device_id = 0;
1853 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
1854 memset(ddf->controller.pad, 0xff, 8);
1855 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
1856 if (homehost && strlen(homehost) < 440)
1857 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 1858
3d2c4fc7 1859 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 1860 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
1861 return 0;
1862 }
6416d527 1863 ddf->phys = pd;
a322f70c
DW
1864 ddf->pdsize = pdsize;
1865
1866 memset(pd, 0xff, pdsize);
1867 memset(pd, 0, sizeof(*pd));
076515ba 1868 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
1869 pd->used_pdes = __cpu_to_be16(0);
1870 pd->max_pdes = __cpu_to_be16(max_phys_disks);
1871 memset(pd->pad, 0xff, 52);
1872
3d2c4fc7 1873 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 1874 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
1875 return 0;
1876 }
6416d527 1877 ddf->virt = vd;
a322f70c
DW
1878 ddf->vdsize = vdsize;
1879 memset(vd, 0, vdsize);
1880 vd->magic = DDF_VIRT_RECORDS_MAGIC;
1881 vd->populated_vdes = __cpu_to_be16(0);
1882 vd->max_vdes = __cpu_to_be16(max_virt_disks);
1883 memset(vd->pad, 0xff, 52);
1884
5f8097be
NB
1885 for (i=0; i<max_virt_disks; i++)
1886 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
1887
a322f70c 1888 st->sb = ddf;
18a2f463 1889 ddf->updates_pending = 1;
a322f70c
DW
1890 return 1;
1891}
1892
5f8097be
NB
1893static int chunk_to_shift(int chunksize)
1894{
1895 return ffs(chunksize/512)-1;
1896}
1897
1898static int level_to_prl(int level)
1899{
1900 switch (level) {
1901 case LEVEL_LINEAR: return DDF_CONCAT;
1902 case 0: return DDF_RAID0;
1903 case 1: return DDF_RAID1;
1904 case 4: return DDF_RAID4;
1905 case 5: return DDF_RAID5;
1906 case 6: return DDF_RAID6;
1907 default: return -1;
1908 }
1909}
613b0d17 1910
5f8097be
NB
1911static int layout_to_rlq(int level, int layout, int raiddisks)
1912{
1913 switch(level) {
1914 case 0:
1915 return DDF_RAID0_SIMPLE;
1916 case 1:
1917 switch(raiddisks) {
1918 case 2: return DDF_RAID1_SIMPLE;
1919 case 3: return DDF_RAID1_MULTI;
1920 default: return -1;
1921 }
1922 case 4:
1923 switch(layout) {
1924 case 0: return DDF_RAID4_N;
1925 }
1926 break;
1927 case 5:
5f8097be
NB
1928 switch(layout) {
1929 case ALGORITHM_LEFT_ASYMMETRIC:
1930 return DDF_RAID5_N_RESTART;
1931 case ALGORITHM_RIGHT_ASYMMETRIC:
b640a252 1932 return DDF_RAID5_0_RESTART;
5f8097be
NB
1933 case ALGORITHM_LEFT_SYMMETRIC:
1934 return DDF_RAID5_N_CONTINUE;
1935 case ALGORITHM_RIGHT_SYMMETRIC:
1936 return -1; /* not mentioned in standard */
1937 }
b640a252
N
1938 case 6:
1939 switch(layout) {
1940 case ALGORITHM_ROTATING_N_RESTART:
1941 return DDF_RAID5_N_RESTART;
1942 case ALGORITHM_ROTATING_ZERO_RESTART:
1943 return DDF_RAID6_0_RESTART;
1944 case ALGORITHM_ROTATING_N_CONTINUE:
1945 return DDF_RAID5_N_CONTINUE;
1946 }
5f8097be
NB
1947 }
1948 return -1;
1949}
1950
598f0d58
NB
1951static int rlq_to_layout(int rlq, int prl, int raiddisks)
1952{
1953 switch(prl) {
1954 case DDF_RAID0:
1955 return 0; /* hopefully rlq == DDF_RAID0_SIMPLE */
1956 case DDF_RAID1:
1957 return 0; /* hopefully rlq == SIMPLE or MULTI depending
1958 on raiddisks*/
1959 case DDF_RAID4:
1960 switch(rlq) {
1961 case DDF_RAID4_N:
1962 return 0;
1963 default:
1964 /* not supported */
1965 return -1; /* FIXME this isn't checked */
1966 }
1967 case DDF_RAID5:
598f0d58
NB
1968 switch(rlq) {
1969 case DDF_RAID5_N_RESTART:
1970 return ALGORITHM_LEFT_ASYMMETRIC;
1971 case DDF_RAID5_0_RESTART:
1972 return ALGORITHM_RIGHT_ASYMMETRIC;
1973 case DDF_RAID5_N_CONTINUE:
1974 return ALGORITHM_LEFT_SYMMETRIC;
1975 default:
1976 return -1;
1977 }
59e36268
NB
1978 case DDF_RAID6:
1979 switch(rlq) {
1980 case DDF_RAID5_N_RESTART:
b640a252 1981 return ALGORITHM_ROTATING_N_RESTART;
59e36268 1982 case DDF_RAID6_0_RESTART:
b640a252 1983 return ALGORITHM_ROTATING_ZERO_RESTART;
59e36268 1984 case DDF_RAID5_N_CONTINUE:
b640a252 1985 return ALGORITHM_ROTATING_N_CONTINUE;
59e36268
NB
1986 default:
1987 return -1;
1988 }
598f0d58
NB
1989 }
1990 return -1;
1991}
1992
0e600426 1993#ifndef MDASSEMBLE
59e36268
NB
1994struct extent {
1995 unsigned long long start, size;
1996};
78e44928 1997static int cmp_extent(const void *av, const void *bv)
59e36268
NB
1998{
1999 const struct extent *a = av;
2000 const struct extent *b = bv;
2001 if (a->start < b->start)
2002 return -1;
2003 if (a->start > b->start)
2004 return 1;
2005 return 0;
2006}
2007
78e44928 2008static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2009{
2010 /* find a list of used extents on the give physical device
2011 * (dnum) of the given ddf.
2012 * Return a malloced array of 'struct extent'
2013
613b0d17 2014 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2015
2016 */
2017 struct extent *rv;
2018 int n = 0;
f21e18ca 2019 unsigned int i, j;
59e36268 2020
503975b9 2021 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2022
2023 for (i = 0; i < ddf->max_part; i++) {
2024 struct vcl *v = dl->vlist[i];
2025 if (v == NULL)
2026 continue;
f21e18ca 2027 for (j = 0; j < v->conf.prim_elmnt_count; j++)
59e36268
NB
2028 if (v->conf.phys_refnum[j] == dl->disk.refnum) {
2029 /* This device plays role 'j' in 'v'. */
2030 rv[n].start = __be64_to_cpu(v->lba_offset[j]);
2031 rv[n].size = __be64_to_cpu(v->conf.blocks);
2032 n++;
2033 break;
2034 }
2035 }
2036 qsort(rv, n, sizeof(*rv), cmp_extent);
2037
2038 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2039 rv[n].size = 0;
2040 return rv;
2041}
0e600426 2042#endif
59e36268 2043
5f8097be
NB
2044static int init_super_ddf_bvd(struct supertype *st,
2045 mdu_array_info_t *info,
2046 unsigned long long size,
2047 char *name, char *homehost,
83cd1e97 2048 int *uuid, unsigned long long data_offset)
5f8097be
NB
2049{
2050 /* We are creating a BVD inside a pre-existing container.
2051 * so st->sb is already set.
2052 * We need to create a new vd_config and a new virtual_entry
2053 */
2054 struct ddf_super *ddf = st->sb;
f21e18ca 2055 unsigned int venum;
5f8097be
NB
2056 struct virtual_entry *ve;
2057 struct vcl *vcl;
2058 struct vd_config *vc;
5f8097be
NB
2059
2060 if (__be16_to_cpu(ddf->virt->populated_vdes)
2061 >= __be16_to_cpu(ddf->virt->max_vdes)) {
e7b84f9d
N
2062 pr_err("This ddf already has the "
2063 "maximum of %d virtual devices\n",
2064 __be16_to_cpu(ddf->virt->max_vdes));
5f8097be
NB
2065 return 0;
2066 }
2067
97c9c100
N
2068 if (name)
2069 for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
2070 if (!all_ff(ddf->virt->entries[venum].guid)) {
2071 char *n = ddf->virt->entries[venum].name;
2072
2073 if (strncmp(name, n, 16) == 0) {
e7b84f9d
N
2074 pr_err("This ddf already"
2075 " has an array called %s\n",
2076 name);
97c9c100
N
2077 return 0;
2078 }
2079 }
2080
5f8097be
NB
2081 for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
2082 if (all_ff(ddf->virt->entries[venum].guid))
2083 break;
2084 if (venum == __be16_to_cpu(ddf->virt->max_vdes)) {
e7b84f9d 2085 pr_err("Cannot find spare slot for "
613b0d17 2086 "virtual disk - DDF is corrupt\n");
5f8097be
NB
2087 return 0;
2088 }
2089 ve = &ddf->virt->entries[venum];
2090
2091 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2092 * timestamp, random number
2093 */
2094 make_header_guid(ve->guid);
2095 ve->unit = __cpu_to_be16(info->md_minor);
2096 ve->pad0 = 0xFFFF;
2097 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2098 ve->type = 0;
7a7cc504
NB
2099 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2100 if (info->state & 1) /* clean */
2101 ve->init_state = DDF_init_full;
2102 else
2103 ve->init_state = DDF_init_not;
2104
5f8097be
NB
2105 memset(ve->pad1, 0xff, 14);
2106 memset(ve->name, ' ', 16);
2107 if (name)
2108 strncpy(ve->name, name, 16);
2109 ddf->virt->populated_vdes =
2110 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2111
2112 /* Now create a new vd_config */
3d2c4fc7
DW
2113 if (posix_memalign((void**)&vcl, 512,
2114 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2115 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2116 return 0;
2117 }
8c3b8c2c 2118 vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
59e36268
NB
2119 vcl->vcnum = venum;
2120 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 2121 vcl->other_bvds = NULL;
5f8097be
NB
2122
2123 vc = &vcl->conf;
2124
2125 vc->magic = DDF_VD_CONF_MAGIC;
2126 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2127 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2128 vc->seqnum = __cpu_to_be32(1);
2129 memset(vc->pad0, 0xff, 24);
2130 vc->prim_elmnt_count = __cpu_to_be16(info->raid_disks);
2131 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2132 vc->prl = level_to_prl(info->level);
2133 vc->rlq = layout_to_rlq(info->level, info->layout, info->raid_disks);
2134 vc->sec_elmnt_count = 1;
2135 vc->sec_elmnt_seq = 0;
2136 vc->srl = 0;
2137 vc->blocks = __cpu_to_be64(info->size * 2);
2138 vc->array_blocks = __cpu_to_be64(
2139 calc_array_size(info->level, info->raid_disks, info->layout,
2140 info->chunk_size, info->size*2));
2141 memset(vc->pad1, 0xff, 8);
2142 vc->spare_refs[0] = 0xffffffff;
2143 vc->spare_refs[1] = 0xffffffff;
2144 vc->spare_refs[2] = 0xffffffff;
2145 vc->spare_refs[3] = 0xffffffff;
2146 vc->spare_refs[4] = 0xffffffff;
2147 vc->spare_refs[5] = 0xffffffff;
2148 vc->spare_refs[6] = 0xffffffff;
2149 vc->spare_refs[7] = 0xffffffff;
2150 memset(vc->cache_pol, 0, 8);
2151 vc->bg_rate = 0x80;
2152 memset(vc->pad2, 0xff, 3);
2153 memset(vc->pad3, 0xff, 52);
2154 memset(vc->pad4, 0xff, 192);
2155 memset(vc->v0, 0xff, 32);
2156 memset(vc->v1, 0xff, 32);
2157 memset(vc->v2, 0xff, 16);
2158 memset(vc->v3, 0xff, 16);
2159 memset(vc->vendor, 0xff, 32);
598f0d58 2160
8c3b8c2c 2161 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2162 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be
NB
2163
2164 vcl->next = ddf->conflist;
2165 ddf->conflist = vcl;
d2ca6449 2166 ddf->currentconf = vcl;
18a2f463 2167 ddf->updates_pending = 1;
5f8097be
NB
2168 return 1;
2169}
2170
0e600426 2171#ifndef MDASSEMBLE
5f8097be
NB
2172static void add_to_super_ddf_bvd(struct supertype *st,
2173 mdu_disk_info_t *dk, int fd, char *devname)
2174{
2175 /* fd and devname identify a device with-in the ddf container (st).
2176 * dk identifies a location in the new BVD.
2177 * We need to find suitable free space in that device and update
2178 * the phys_refnum and lba_offset for the newly created vd_config.
2179 * We might also want to update the type in the phys_disk
5575e7d9 2180 * section.
8592f29d
N
2181 *
2182 * Alternately: fd == -1 and we have already chosen which device to
2183 * use and recorded in dlist->raid_disk;
5f8097be
NB
2184 */
2185 struct dl *dl;
2186 struct ddf_super *ddf = st->sb;
2187 struct vd_config *vc;
2188 __u64 *lba_offset;
f21e18ca
N
2189 unsigned int working;
2190 unsigned int i;
59e36268
NB
2191 unsigned long long blocks, pos, esize;
2192 struct extent *ex;
5f8097be 2193
8592f29d
N
2194 if (fd == -1) {
2195 for (dl = ddf->dlist; dl ; dl = dl->next)
2196 if (dl->raiddisk == dk->raid_disk)
2197 break;
2198 } else {
2199 for (dl = ddf->dlist; dl ; dl = dl->next)
2200 if (dl->major == dk->major &&
2201 dl->minor == dk->minor)
2202 break;
2203 }
5f8097be
NB
2204 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2205 return;
2206
d2ca6449
NB
2207 vc = &ddf->currentconf->conf;
2208 lba_offset = ddf->currentconf->lba_offset;
59e36268
NB
2209
2210 ex = get_extents(ddf, dl);
2211 if (!ex)
2212 return;
2213
2214 i = 0; pos = 0;
2215 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2216 if (ddf->currentconf->block_sizes)
2217 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2218
2219 do {
2220 esize = ex[i].start - pos;
2221 if (esize >= blocks)
2222 break;
2223 pos = ex[i].start + ex[i].size;
2224 i++;
2225 } while (ex[i-1].size);
2226
2227 free(ex);
2228 if (esize < blocks)
2229 return;
2230
d2ca6449 2231 ddf->currentdev = dk->raid_disk;
5f8097be 2232 vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
59e36268 2233 lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
5f8097be 2234
f21e18ca 2235 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2236 if (dl->vlist[i] == NULL)
2237 break;
2238 if (i == ddf->max_part)
2239 return;
d2ca6449 2240 dl->vlist[i] = ddf->currentconf;
5f8097be 2241
8592f29d
N
2242 if (fd >= 0)
2243 dl->fd = fd;
2244 if (devname)
2245 dl->devname = devname;
7a7cc504
NB
2246
2247 /* Check how many working raid_disks, and if we can mark
2248 * array as optimal yet
2249 */
2250 working = 0;
5575e7d9 2251
f21e18ca 2252 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
7a7cc504
NB
2253 if (vc->phys_refnum[i] != 0xffffffff)
2254 working++;
59e36268 2255
5575e7d9 2256 /* Find which virtual_entry */
d2ca6449 2257 i = ddf->currentconf->vcnum;
7a7cc504 2258 if (working == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2259 ddf->virt->entries[i].state =
2260 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504
NB
2261 | DDF_state_optimal;
2262
2263 if (vc->prl == DDF_RAID6 &&
2264 working+1 == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2265 ddf->virt->entries[i].state =
2266 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504 2267 | DDF_state_part_optimal;
5575e7d9
NB
2268
2269 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2270 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
18a2f463 2271 ddf->updates_pending = 1;
5f8097be
NB
2272}
2273
a322f70c
DW
2274/* add a device to a container, either while creating it or while
2275 * expanding a pre-existing container
2276 */
f20c3968 2277static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2278 mdu_disk_info_t *dk, int fd, char *devname,
2279 unsigned long long data_offset)
a322f70c
DW
2280{
2281 struct ddf_super *ddf = st->sb;
2282 struct dl *dd;
2283 time_t now;
2284 struct tm *tm;
2285 unsigned long long size;
2286 struct phys_disk_entry *pde;
f21e18ca 2287 unsigned int n, i;
a322f70c 2288 struct stat stb;
90fa1a29 2289 __u32 *tptr;
a322f70c 2290
78e44928
NB
2291 if (ddf->currentconf) {
2292 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2293 return 0;
78e44928
NB
2294 }
2295
a322f70c
DW
2296 /* This is device numbered dk->number. We need to create
2297 * a phys_disk entry and a more detailed disk_data entry.
2298 */
2299 fstat(fd, &stb);
3d2c4fc7
DW
2300 if (posix_memalign((void**)&dd, 512,
2301 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2302 pr_err("%s could allocate buffer for new disk, aborting\n",
2303 __func__);
f20c3968 2304 return 1;
3d2c4fc7 2305 }
a322f70c
DW
2306 dd->major = major(stb.st_rdev);
2307 dd->minor = minor(stb.st_rdev);
2308 dd->devname = devname;
a322f70c 2309 dd->fd = fd;
b2280677 2310 dd->spare = NULL;
a322f70c
DW
2311
2312 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2313 now = time(0);
2314 tm = localtime(&now);
2315 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2316 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2317 tptr = (__u32 *)(dd->disk.guid + 16);
2318 *tptr++ = random32();
2319 *tptr = random32();
a322f70c 2320
59e36268
NB
2321 do {
2322 /* Cannot be bothered finding a CRC of some irrelevant details*/
bfb7ea78 2323 dd->disk.refnum = random32();
f21e18ca
N
2324 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2325 i > 0; i--)
2326 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
59e36268 2327 break;
f21e18ca 2328 } while (i > 0);
59e36268 2329
a322f70c
DW
2330 dd->disk.forced_ref = 1;
2331 dd->disk.forced_guid = 1;
2332 memset(dd->disk.vendor, ' ', 32);
2333 memcpy(dd->disk.vendor, "Linux", 5);
2334 memset(dd->disk.pad, 0xff, 442);
b2280677 2335 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2336 dd->vlist[i] = NULL;
2337
2338 n = __be16_to_cpu(ddf->phys->used_pdes);
2339 pde = &ddf->phys->entries[n];
5575e7d9
NB
2340 dd->pdnum = n;
2341
2cc2983d
N
2342 if (st->update_tail) {
2343 int len = (sizeof(struct phys_disk) +
2344 sizeof(struct phys_disk_entry));
2345 struct phys_disk *pd;
2346
503975b9 2347 pd = xmalloc(len);
2cc2983d
N
2348 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2349 pd->used_pdes = __cpu_to_be16(n);
2350 pde = &pd->entries[0];
2351 dd->mdupdate = pd;
2352 } else {
2353 n++;
2354 ddf->phys->used_pdes = __cpu_to_be16(n);
2355 }
a322f70c
DW
2356
2357 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2358 pde->refnum = dd->disk.refnum;
5575e7d9 2359 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c
DW
2360 pde->state = __cpu_to_be16(DDF_Online);
2361 get_dev_size(fd, NULL, &size);
2362 /* We are required to reserve 32Meg, and record the size in sectors */
2363 pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
2364 sprintf(pde->path, "%17.17s","Information: nil") ;
2365 memset(pde->pad, 0xff, 6);
2366
d2ca6449 2367 dd->size = size >> 9;
2cc2983d
N
2368 if (st->update_tail) {
2369 dd->next = ddf->add_list;
2370 ddf->add_list = dd;
2371 } else {
2372 dd->next = ddf->dlist;
2373 ddf->dlist = dd;
2374 ddf->updates_pending = 1;
2375 }
f20c3968
DW
2376
2377 return 0;
a322f70c
DW
2378}
2379
4dd968cc
N
2380static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2381{
2382 struct ddf_super *ddf = st->sb;
2383 struct dl *dl;
2384
2385 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2386 * disappeared from the container.
2387 * We need to arrange that it disappears from the metadata and
2388 * internal data structures too.
2389 * Most of the work is done by ddf_process_update which edits
2390 * the metadata and closes the file handle and attaches the memory
2391 * where free_updates will free it.
2392 */
2393 for (dl = ddf->dlist; dl ; dl = dl->next)
2394 if (dl->major == dk->major &&
2395 dl->minor == dk->minor)
2396 break;
2397 if (!dl)
2398 return -1;
2399
2400 if (st->update_tail) {
2401 int len = (sizeof(struct phys_disk) +
2402 sizeof(struct phys_disk_entry));
2403 struct phys_disk *pd;
2404
503975b9 2405 pd = xmalloc(len);
4dd968cc
N
2406 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2407 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2408 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2409 append_metadata_update(st, pd, len);
2410 }
2411 return 0;
2412}
2413
a322f70c
DW
2414/*
2415 * This is the write_init_super method for a ddf container. It is
2416 * called when creating a container or adding another device to a
2417 * container.
2418 */
42d5dfd9 2419#define NULL_CONF_SZ 4096
18a2f463 2420
e3c2a365 2421static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
2422 __u32 refnum, unsigned int nmax,
2423 const struct vd_config **bvd,
2424 unsigned int *idx);
2425
7f798aca 2426static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2427 char *null_aligned)
a322f70c 2428{
7f798aca 2429 unsigned long long sector;
2430 struct ddf_header *header;
2431 int fd, i, n_config, conf_size;
2432
2433 fd = d->fd;
2434
2435 switch (type) {
2436 case DDF_HEADER_PRIMARY:
2437 header = &ddf->primary;
2438 sector = __be64_to_cpu(header->primary_lba);
2439 break;
2440 case DDF_HEADER_SECONDARY:
2441 header = &ddf->secondary;
2442 sector = __be64_to_cpu(header->secondary_lba);
2443 break;
2444 default:
2445 return 0;
2446 }
2447
2448 header->type = type;
2449 header->openflag = 0;
2450 header->crc = calc_crc(header, 512);
2451
2452 lseek64(fd, sector<<9, 0);
2453 if (write(fd, header, 512) < 0)
2454 return 0;
2455
2456 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2457 if (write(fd, &ddf->controller, 512) < 0)
2458 return 0;
a322f70c 2459
7f798aca 2460 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2461 if (write(fd, ddf->phys, ddf->pdsize) < 0)
2462 return 0;
2463 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2464 if (write(fd, ddf->virt, ddf->vdsize) < 0)
2465 return 0;
2466
2467 /* Now write lots of config records. */
2468 n_config = ddf->max_part;
2469 conf_size = ddf->conf_rec_len * 512;
2470 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2471 struct vcl *c;
2472 struct vd_config *vdc = NULL;
2473 if (i == n_config) {
7f798aca 2474 c = (struct vcl *)d->spare;
e3c2a365 2475 if (c)
2476 vdc = &c->conf;
2477 } else {
2478 unsigned int dummy;
2479 c = d->vlist[i];
2480 if (c)
2481 get_pd_index_from_refnum(
2482 c, d->disk.refnum,
2483 ddf->mppe,
2484 (const struct vd_config **)&vdc,
2485 &dummy);
2486 }
7f798aca 2487 if (c) {
dacf3dc5 2488 vdc->seqnum = header->seq;
e3c2a365 2489 vdc->crc = calc_crc(vdc, conf_size);
2490 if (write(fd, vdc, conf_size) < 0)
7f798aca 2491 break;
2492 } else {
2493 unsigned int togo = conf_size;
2494 while (togo > NULL_CONF_SZ) {
2495 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2496 break;
2497 togo -= NULL_CONF_SZ;
2498 }
2499 if (write(fd, null_aligned, togo) < 0)
2500 break;
2501 }
2502 }
2503 if (i <= n_config)
2504 return 0;
2505
2506 d->disk.crc = calc_crc(&d->disk, 512);
2507 if (write(fd, &d->disk, 512) < 0)
2508 return 0;
2509
2510 return 1;
2511}
2512
2513static int __write_init_super_ddf(struct supertype *st)
2514{
a322f70c 2515 struct ddf_super *ddf = st->sb;
a322f70c 2516 struct dl *d;
175593bf
DW
2517 int attempts = 0;
2518 int successes = 0;
7f798aca 2519 unsigned long long size;
42d5dfd9 2520 char *null_aligned;
0175cbf6 2521 __u32 seq;
42d5dfd9
JS
2522
2523 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2524 return -ENOMEM;
2525 }
2526 memset(null_aligned, 0xff, NULL_CONF_SZ);
a322f70c 2527
dc9e279c 2528 seq = ddf->active->seq + 1;
0175cbf6 2529
175593bf
DW
2530 /* try to write updated metadata,
2531 * if we catch a failure move on to the next disk
2532 */
a322f70c
DW
2533 for (d = ddf->dlist; d; d=d->next) {
2534 int fd = d->fd;
2535
2536 if (fd < 0)
2537 continue;
2538
175593bf 2539 attempts++;
a322f70c
DW
2540 /* We need to fill in the primary, (secondary) and workspace
2541 * lba's in the headers, set their checksums,
2542 * Also checksum phys, virt....
2543 *
2544 * Then write everything out, finally the anchor is written.
2545 */
2546 get_dev_size(fd, NULL, &size);
2547 size /= 512;
097bcf00 2548 if (d->workspace_lba != 0)
2549 ddf->anchor.workspace_lba = d->workspace_lba;
2550 else
2551 ddf->anchor.workspace_lba =
2552 __cpu_to_be64(size - 32*1024*2);
2553 if (d->primary_lba != 0)
2554 ddf->anchor.primary_lba = d->primary_lba;
2555 else
2556 ddf->anchor.primary_lba =
2557 __cpu_to_be64(size - 16*1024*2);
2558 if (d->secondary_lba != 0)
2559 ddf->anchor.secondary_lba = d->secondary_lba;
2560 else
2561 ddf->anchor.secondary_lba =
2562 __cpu_to_be64(size - 32*1024*2);
0175cbf6 2563 ddf->anchor.seq = seq;
a322f70c
DW
2564 memcpy(&ddf->primary, &ddf->anchor, 512);
2565 memcpy(&ddf->secondary, &ddf->anchor, 512);
2566
2567 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2568 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2569 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2570
7f798aca 2571 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2572 null_aligned))
175593bf 2573 continue;
a322f70c 2574
7f798aca 2575 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2576 null_aligned))
175593bf 2577 continue;
a322f70c 2578
a322f70c 2579 lseek64(fd, (size-1)*512, SEEK_SET);
175593bf
DW
2580 if (write(fd, &ddf->anchor, 512) < 0)
2581 continue;
2582 successes++;
2583 }
42d5dfd9 2584 free(null_aligned);
175593bf 2585
175593bf 2586 return attempts != successes;
a322f70c 2587}
7a7cc504
NB
2588
2589static int write_init_super_ddf(struct supertype *st)
2590{
9b1fb677
DW
2591 struct ddf_super *ddf = st->sb;
2592 struct vcl *currentconf = ddf->currentconf;
2593
2594 /* we are done with currentconf reset it to point st at the container */
2595 ddf->currentconf = NULL;
edd8d13c
NB
2596
2597 if (st->update_tail) {
2598 /* queue the virtual_disk and vd_config as metadata updates */
2599 struct virtual_disk *vd;
2600 struct vd_config *vc;
edd8d13c
NB
2601 int len;
2602
9b1fb677 2603 if (!currentconf) {
2cc2983d
N
2604 int len = (sizeof(struct phys_disk) +
2605 sizeof(struct phys_disk_entry));
2606
2607 /* adding a disk to the container. */
2608 if (!ddf->add_list)
2609 return 0;
2610
2611 append_metadata_update(st, ddf->add_list->mdupdate, len);
2612 ddf->add_list->mdupdate = NULL;
2613 return 0;
2614 }
2615
2616 /* Newly created VD */
2617
edd8d13c
NB
2618 /* First the virtual disk. We have a slightly fake header */
2619 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 2620 vd = xmalloc(len);
edd8d13c 2621 *vd = *ddf->virt;
9b1fb677
DW
2622 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2623 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
2624 append_metadata_update(st, vd, len);
2625
2626 /* Then the vd_config */
2627 len = ddf->conf_rec_len * 512;
503975b9 2628 vc = xmalloc(len);
9b1fb677 2629 memcpy(vc, &currentconf->conf, len);
edd8d13c
NB
2630 append_metadata_update(st, vc, len);
2631
2632 /* FIXME I need to close the fds! */
2633 return 0;
613b0d17 2634 } else {
d682f344
N
2635 struct dl *d;
2636 for (d = ddf->dlist; d; d=d->next)
ba728be7 2637 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 2638 return __write_init_super_ddf(st);
d682f344 2639 }
7a7cc504
NB
2640}
2641
a322f70c
DW
2642#endif
2643
387fcd59
N
2644static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2645 unsigned long long data_offset)
a322f70c
DW
2646{
2647 /* We must reserve the last 32Meg */
2648 if (devsize <= 32*1024*2)
2649 return 0;
2650 return devsize - 32*1024*2;
2651}
2652
2653#ifndef MDASSEMBLE
8592f29d
N
2654
2655static int reserve_space(struct supertype *st, int raiddisks,
2656 unsigned long long size, int chunk,
2657 unsigned long long *freesize)
2658{
2659 /* Find 'raiddisks' spare extents at least 'size' big (but
2660 * only caring about multiples of 'chunk') and remember
2661 * them.
2662 * If the cannot be found, fail.
2663 */
2664 struct dl *dl;
2665 struct ddf_super *ddf = st->sb;
2666 int cnt = 0;
2667
2668 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 2669 dl->raiddisk = -1;
8592f29d
N
2670 dl->esize = 0;
2671 }
2672 /* Now find largest extent on each device */
2673 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2674 struct extent *e = get_extents(ddf, dl);
2675 unsigned long long pos = 0;
2676 int i = 0;
2677 int found = 0;
2678 unsigned long long minsize = size;
2679
2680 if (size == 0)
2681 minsize = chunk;
2682
2683 if (!e)
2684 continue;
2685 do {
2686 unsigned long long esize;
2687 esize = e[i].start - pos;
2688 if (esize >= minsize) {
2689 found = 1;
2690 minsize = esize;
2691 }
2692 pos = e[i].start + e[i].size;
2693 i++;
2694 } while (e[i-1].size);
2695 if (found) {
2696 cnt++;
2697 dl->esize = minsize;
2698 }
2699 free(e);
2700 }
2701 if (cnt < raiddisks) {
e7b84f9d 2702 pr_err("not enough devices with space to create array.\n");
8592f29d
N
2703 return 0; /* No enough free spaces large enough */
2704 }
2705 if (size == 0) {
2706 /* choose the largest size of which there are at least 'raiddisk' */
2707 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2708 struct dl *dl2;
2709 if (dl->esize <= size)
2710 continue;
2711 /* This is bigger than 'size', see if there are enough */
2712 cnt = 0;
7b80ad6a 2713 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
2714 if (dl2->esize >= dl->esize)
2715 cnt++;
2716 if (cnt >= raiddisks)
2717 size = dl->esize;
2718 }
2719 if (chunk) {
2720 size = size / chunk;
2721 size *= chunk;
2722 }
2723 *freesize = size;
2724 if (size < 32) {
e7b84f9d 2725 pr_err("not enough spare devices to create array.\n");
8592f29d
N
2726 return 0;
2727 }
2728 }
2729 /* We have a 'size' of which there are enough spaces.
2730 * We simply do a first-fit */
2731 cnt = 0;
2732 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
2733 if (dl->esize < size)
2734 continue;
613b0d17 2735
8592f29d
N
2736 dl->raiddisk = cnt;
2737 cnt++;
2738 }
2739 return 1;
2740}
2741
2c514b71
NB
2742static int
2743validate_geometry_ddf_container(struct supertype *st,
2744 int level, int layout, int raiddisks,
2745 int chunk, unsigned long long size,
af4348dd 2746 unsigned long long data_offset,
2c514b71
NB
2747 char *dev, unsigned long long *freesize,
2748 int verbose);
78e44928
NB
2749
2750static int validate_geometry_ddf_bvd(struct supertype *st,
2751 int level, int layout, int raiddisks,
c21e737b 2752 int *chunk, unsigned long long size,
af4348dd 2753 unsigned long long data_offset,
2c514b71
NB
2754 char *dev, unsigned long long *freesize,
2755 int verbose);
78e44928
NB
2756
2757static int validate_geometry_ddf(struct supertype *st,
2c514b71 2758 int level, int layout, int raiddisks,
c21e737b 2759 int *chunk, unsigned long long size,
af4348dd 2760 unsigned long long data_offset,
2c514b71
NB
2761 char *dev, unsigned long long *freesize,
2762 int verbose)
a322f70c
DW
2763{
2764 int fd;
2765 struct mdinfo *sra;
2766 int cfd;
2767
2768 /* ddf potentially supports lots of things, but it depends on
2769 * what devices are offered (and maybe kernel version?)
2770 * If given unused devices, we will make a container.
2771 * If given devices in a container, we will make a BVD.
2772 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
2773 */
2774
bb7295f1
N
2775 if (chunk && *chunk == UnSet)
2776 *chunk = DEFAULT_CHUNK;
2777
542ef4ec 2778 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 2779 if (level == LEVEL_CONTAINER) {
78e44928
NB
2780 /* Must be a fresh device to add to a container */
2781 return validate_geometry_ddf_container(st, level, layout,
c21e737b 2782 raiddisks, chunk?*chunk:0,
af4348dd
N
2783 size, data_offset, dev,
2784 freesize,
2c514b71 2785 verbose);
5f8097be
NB
2786 }
2787
78e44928
NB
2788 if (!dev) {
2789 /* Initial sanity check. Exclude illegal levels. */
2790 int i;
2791 for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
2792 if (ddf_level_num[i].num2 == level)
2793 break;
b42f577a
N
2794 if (ddf_level_num[i].num1 == MAXINT) {
2795 if (verbose)
e7b84f9d 2796 pr_err("DDF does not support level %d arrays\n",
613b0d17 2797 level);
78e44928 2798 return 0;
b42f577a 2799 }
78e44928 2800 /* Should check layout? etc */
8592f29d
N
2801
2802 if (st->sb && freesize) {
2803 /* --create was given a container to create in.
2804 * So we need to check that there are enough
2805 * free spaces and return the amount of space.
2806 * We may as well remember which drives were
2807 * chosen so that add_to_super/getinfo_super
2808 * can return them.
2809 */
c21e737b 2810 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 2811 }
a322f70c 2812 return 1;
78e44928 2813 }
a322f70c 2814
8592f29d
N
2815 if (st->sb) {
2816 /* A container has already been opened, so we are
2817 * creating in there. Maybe a BVD, maybe an SVD.
2818 * Should make a distinction one day.
2819 */
2820 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
2821 chunk, size, data_offset, dev,
2822 freesize,
8592f29d
N
2823 verbose);
2824 }
78e44928
NB
2825 /* This is the first device for the array.
2826 * If it is a container, we read it in and do automagic allocations,
2827 * no other devices should be given.
2828 * Otherwise it must be a member device of a container, and we
2829 * do manual allocation.
2830 * Later we should check for a BVD and make an SVD.
a322f70c 2831 */
a322f70c
DW
2832 fd = open(dev, O_RDONLY|O_EXCL, 0);
2833 if (fd >= 0) {
4dd2df09 2834 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
2835 close(fd);
2836 if (sra && sra->array.major_version == -1 &&
78e44928
NB
2837 strcmp(sra->text_version, "ddf") == 0) {
2838
2839 /* load super */
2840 /* find space for 'n' devices. */
2841 /* remember the devices */
2842 /* Somehow return the fact that we have enough */
a322f70c
DW
2843 }
2844
2c514b71 2845 if (verbose)
e7b84f9d
N
2846 pr_err("ddf: Cannot create this array "
2847 "on device %s - a container is required.\n",
2848 dev);
a322f70c
DW
2849 return 0;
2850 }
2851 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 2852 if (verbose)
e7b84f9d 2853 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 2854 dev, strerror(errno));
a322f70c
DW
2855 return 0;
2856 }
2857 /* Well, it is in use by someone, maybe a 'ddf' container. */
2858 cfd = open_container(fd);
2859 if (cfd < 0) {
2860 close(fd);
2c514b71 2861 if (verbose)
e7b84f9d 2862 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 2863 dev, strerror(EBUSY));
a322f70c
DW
2864 return 0;
2865 }
4dd2df09 2866 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
2867 close(fd);
2868 if (sra && sra->array.major_version == -1 &&
2869 strcmp(sra->text_version, "ddf") == 0) {
2870 /* This is a member of a ddf container. Load the container
2871 * and try to create a bvd
2872 */
2873 struct ddf_super *ddf;
e1902a7b 2874 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 2875 st->sb = ddf;
4dd2df09 2876 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 2877 close(cfd);
78e44928 2878 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 2879 raiddisks, chunk, size,
af4348dd 2880 data_offset,
2c514b71
NB
2881 dev, freesize,
2882 verbose);
a322f70c
DW
2883 }
2884 close(cfd);
c42ec1ed
DW
2885 } else /* device may belong to a different container */
2886 return 0;
2887
a322f70c
DW
2888 return 1;
2889}
2890
2c514b71
NB
2891static int
2892validate_geometry_ddf_container(struct supertype *st,
2893 int level, int layout, int raiddisks,
2894 int chunk, unsigned long long size,
af4348dd 2895 unsigned long long data_offset,
2c514b71
NB
2896 char *dev, unsigned long long *freesize,
2897 int verbose)
a322f70c
DW
2898{
2899 int fd;
2900 unsigned long long ldsize;
2901
2902 if (level != LEVEL_CONTAINER)
2903 return 0;
2904 if (!dev)
2905 return 1;
2906
2907 fd = open(dev, O_RDONLY|O_EXCL, 0);
2908 if (fd < 0) {
2c514b71 2909 if (verbose)
e7b84f9d 2910 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 2911 dev, strerror(errno));
a322f70c
DW
2912 return 0;
2913 }
2914 if (!get_dev_size(fd, dev, &ldsize)) {
2915 close(fd);
2916 return 0;
2917 }
2918 close(fd);
2919
387fcd59 2920 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
2921 if (*freesize == 0)
2922 return 0;
a322f70c
DW
2923
2924 return 1;
2925}
2926
78e44928
NB
2927static int validate_geometry_ddf_bvd(struct supertype *st,
2928 int level, int layout, int raiddisks,
c21e737b 2929 int *chunk, unsigned long long size,
af4348dd 2930 unsigned long long data_offset,
2c514b71
NB
2931 char *dev, unsigned long long *freesize,
2932 int verbose)
a322f70c
DW
2933{
2934 struct stat stb;
2935 struct ddf_super *ddf = st->sb;
2936 struct dl *dl;
5f8097be
NB
2937 unsigned long long pos = 0;
2938 unsigned long long maxsize;
2939 struct extent *e;
2940 int i;
a322f70c 2941 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
2942 if (level == LEVEL_CONTAINER) {
2943 if (verbose)
e7b84f9d 2944 pr_err("DDF cannot create a container within an container\n");
a322f70c 2945 return 0;
b42f577a 2946 }
a322f70c
DW
2947 /* We must have the container info already read in. */
2948 if (!ddf)
2949 return 0;
2950
5f8097be
NB
2951 if (!dev) {
2952 /* General test: make sure there is space for
2953 * 'raiddisks' device extents of size 'size'.
2954 */
2955 unsigned long long minsize = size;
2956 int dcnt = 0;
2957 if (minsize == 0)
2958 minsize = 8;
2959 for (dl = ddf->dlist; dl ; dl = dl->next)
2960 {
2961 int found = 0;
7e1432fb 2962 pos = 0;
5f8097be
NB
2963
2964 i = 0;
2965 e = get_extents(ddf, dl);
2966 if (!e) continue;
2967 do {
2968 unsigned long long esize;
2969 esize = e[i].start - pos;
2970 if (esize >= minsize)
2971 found = 1;
2972 pos = e[i].start + e[i].size;
2973 i++;
2974 } while (e[i-1].size);
2975 if (found)
2976 dcnt++;
2977 free(e);
2978 }
2979 if (dcnt < raiddisks) {
2c514b71 2980 if (verbose)
e7b84f9d
N
2981 pr_err("ddf: Not enough devices with "
2982 "space for this array (%d < %d)\n",
2983 dcnt, raiddisks);
5f8097be
NB
2984 return 0;
2985 }
2986 return 1;
2987 }
a322f70c
DW
2988 /* This device must be a member of the set */
2989 if (stat(dev, &stb) < 0)
2990 return 0;
2991 if ((S_IFMT & stb.st_mode) != S_IFBLK)
2992 return 0;
2993 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
2994 if (dl->major == (int)major(stb.st_rdev) &&
2995 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
2996 break;
2997 }
5f8097be 2998 if (!dl) {
2c514b71 2999 if (verbose)
e7b84f9d 3000 pr_err("ddf: %s is not in the "
613b0d17
N
3001 "same DDF set\n",
3002 dev);
5f8097be
NB
3003 return 0;
3004 }
3005 e = get_extents(ddf, dl);
3006 maxsize = 0;
3007 i = 0;
3008 if (e) do {
613b0d17
N
3009 unsigned long long esize;
3010 esize = e[i].start - pos;
3011 if (esize >= maxsize)
3012 maxsize = esize;
3013 pos = e[i].start + e[i].size;
3014 i++;
3015 } while (e[i-1].size);
5f8097be 3016 *freesize = maxsize;
a322f70c
DW
3017 // FIXME here I am
3018
3019 return 1;
3020}
59e36268 3021
a322f70c 3022static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3023 void **sbp, char *devname)
a322f70c
DW
3024{
3025 struct mdinfo *sra;
3026 struct ddf_super *super;
3027 struct mdinfo *sd, *best = NULL;
3028 int bestseq = 0;
3029 int seq;
3030 char nm[20];
3031 int dfd;
3032
b526e52d 3033 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3034 if (!sra)
3035 return 1;
3036 if (sra->array.major_version != -1 ||
3037 sra->array.minor_version != -2 ||
3038 strcmp(sra->text_version, "ddf") != 0)
3039 return 1;
3040
6416d527 3041 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3042 return 1;
a2349791 3043 memset(super, 0, sizeof(*super));
a322f70c
DW
3044
3045 /* first, try each device, and choose the best ddf */
3046 for (sd = sra->devs ; sd ; sd = sd->next) {
3047 int rv;
3048 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3049 dfd = dev_open(nm, O_RDONLY);
3050 if (dfd < 0)
a322f70c
DW
3051 return 2;
3052 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3053 close(dfd);
a322f70c
DW
3054 if (rv == 0) {
3055 seq = __be32_to_cpu(super->active->seq);
3056 if (super->active->openflag)
3057 seq--;
3058 if (!best || seq > bestseq) {
3059 bestseq = seq;
3060 best = sd;
3061 }
3062 }
3063 }
3064 if (!best)
3065 return 1;
3066 /* OK, load this ddf */
3067 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3068 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3069 if (dfd < 0)
a322f70c
DW
3070 return 1;
3071 load_ddf_headers(dfd, super, NULL);
3072 load_ddf_global(dfd, super, NULL);
3073 close(dfd);
3074 /* Now we need the device-local bits */
3075 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3076 int rv;
3077
a322f70c 3078 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3079 dfd = dev_open(nm, O_RDWR);
7a7cc504 3080 if (dfd < 0)
a322f70c 3081 return 2;
3d2c4fc7
DW
3082 rv = load_ddf_headers(dfd, super, NULL);
3083 if (rv == 0)
e1902a7b 3084 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3085 if (rv)
3086 return 1;
a322f70c 3087 }
33414a01 3088
a322f70c
DW
3089 *sbp = super;
3090 if (st->ss == NULL) {
78e44928 3091 st->ss = &super_ddf;
a322f70c
DW
3092 st->minor_version = 0;
3093 st->max_devs = 512;
3094 }
4dd2df09 3095 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3096 return 0;
3097}
2b959fbf
N
3098
3099static int load_container_ddf(struct supertype *st, int fd,
3100 char *devname)
3101{
3102 return load_super_ddf_all(st, fd, &st->sb, devname);
3103}
3104
0e600426 3105#endif /* MDASSEMBLE */
a322f70c 3106
a5c7adb3 3107static int check_secondary(const struct vcl *vc)
3108{
3109 const struct vd_config *conf = &vc->conf;
3110 int i;
3111
3112 /* The only DDF secondary RAID level md can support is
3113 * RAID 10, if the stripe sizes and Basic volume sizes
3114 * are all equal.
3115 * Other configurations could in theory be supported by exposing
3116 * the BVDs to user space and using device mapper for the secondary
3117 * mapping. So far we don't support that.
3118 */
3119
3120 __u64 sec_elements[4] = {0, 0, 0, 0};
3121#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3122#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3123
3124 if (vc->other_bvds == NULL) {
3125 pr_err("No BVDs for secondary RAID found\n");
3126 return -1;
3127 }
3128 if (conf->prl != DDF_RAID1) {
3129 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3130 return -1;
3131 }
3132 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3133 pr_err("Secondary RAID level %d is unsupported\n",
3134 conf->srl);
3135 return -1;
3136 }
3137 __set_sec_seen(conf->sec_elmnt_seq);
3138 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3139 const struct vd_config *bvd = vc->other_bvds[i];
3140 if (bvd == NULL) {
e3c2a365 3141 pr_err("BVD %d is missing\n", i+1);
a5c7adb3 3142 return -1;
3143 }
3144 if (bvd->srl != conf->srl) {
3145 pr_err("Inconsistent secondary RAID level across BVDs\n");
3146 return -1;
3147 }
3148 if (bvd->prl != conf->prl) {
3149 pr_err("Different RAID levels for BVDs are unsupported\n");
3150 return -1;
3151 }
3152 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3153 pr_err("All BVDs must have the same number of primary elements\n");
3154 return -1;
3155 }
3156 if (bvd->chunk_shift != conf->chunk_shift) {
3157 pr_err("Different strip sizes for BVDs are unsupported\n");
3158 return -1;
3159 }
3160 if (bvd->array_blocks != conf->array_blocks) {
3161 pr_err("Different BVD sizes are unsupported\n");
3162 return -1;
3163 }
3164 __set_sec_seen(bvd->sec_elmnt_seq);
3165 }
3166 for (i = 0; i < conf->sec_elmnt_count; i++) {
3167 if (!__was_sec_seen(i)) {
3168 pr_err("BVD %d is missing\n", i);
3169 return -1;
3170 }
3171 }
3172 return 0;
3173}
3174
8a38db86 3175#define NO_SUCH_REFNUM (0xFFFFFFFF)
3176static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
4e587018 3177 __u32 refnum, unsigned int nmax,
3178 const struct vd_config **bvd,
3179 unsigned int *idx)
8a38db86 3180{
4e587018 3181 unsigned int i, j, n, sec, cnt;
3182
3183 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3184 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3185
3186 for (i = 0, j = 0 ; i < nmax ; i++) {
3187 /* j counts valid entries for this BVD */
3188 if (vc->conf.phys_refnum[i] != 0xffffffff)
3189 j++;
3190 if (vc->conf.phys_refnum[i] == refnum) {
3191 *bvd = &vc->conf;
3192 *idx = i;
3193 return sec * cnt + j - 1;
3194 }
3195 }
3196 if (vc->other_bvds == NULL)
3197 goto bad;
3198
3199 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3200 struct vd_config *vd = vc->other_bvds[n-1];
3201 if (vd == NULL)
3202 continue;
3203 sec = vd->sec_elmnt_seq;
3204 for (i = 0, j = 0 ; i < nmax ; i++) {
3205 if (vd->phys_refnum[i] != 0xffffffff)
3206 j++;
3207 if (vd->phys_refnum[i] == refnum) {
3208 *bvd = vd;
3209 *idx = i;
3210 return sec * cnt + j - 1;
3211 }
3212 }
3213 }
3214bad:
3215 *bvd = NULL;
8a38db86 3216 return NO_SUCH_REFNUM;
3217}
3218
00bbdbda 3219static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3220{
3221 /* Given a container loaded by load_super_ddf_all,
3222 * extract information about all the arrays into
3223 * an mdinfo tree.
3224 *
3225 * For each vcl in conflist: create an mdinfo, fill it in,
3226 * then look for matching devices (phys_refnum) in dlist
3227 * and create appropriate device mdinfo.
3228 */
3229 struct ddf_super *ddf = st->sb;
3230 struct mdinfo *rest = NULL;
3231 struct vcl *vc;
3232
3233 for (vc = ddf->conflist ; vc ; vc=vc->next)
3234 {
f21e18ca
N
3235 unsigned int i;
3236 unsigned int j;
598f0d58 3237 struct mdinfo *this;
00bbdbda 3238 char *ep;
90fa1a29 3239 __u32 *cptr;
8a38db86 3240 unsigned int pd;
00bbdbda
N
3241
3242 if (subarray &&
3243 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3244 *ep != '\0'))
3245 continue;
3246
a5c7adb3 3247 if (vc->conf.sec_elmnt_count > 1) {
3248 if (check_secondary(vc) != 0)
3249 continue;
3250 }
3251
503975b9 3252 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3253 this->next = rest;
3254 rest = this;
3255
4e587018 3256 if (vc->conf.sec_elmnt_count == 1) {
3257 this->array.level = map_num1(ddf_level_num,
3258 vc->conf.prl);
3259 this->array.raid_disks =
3260 __be16_to_cpu(vc->conf.prim_elmnt_count);
3261 this->array.layout =
3262 rlq_to_layout(vc->conf.rlq, vc->conf.prl,
3263 this->array.raid_disks);
3264 } else {
3265 /* The only supported layout is RAID 10.
3266 * Compatibility has been checked in check_secondary()
3267 * above.
3268 */
3269 this->array.level = 10;
3270 this->array.raid_disks =
3271 __be16_to_cpu(vc->conf.prim_elmnt_count)
3272 * vc->conf.sec_elmnt_count;
3273 this->array.layout = 0x100 |
3274 __be16_to_cpu(vc->conf.prim_elmnt_count);
3275 }
598f0d58 3276 this->array.md_minor = -1;
f35f2525
N
3277 this->array.major_version = -1;
3278 this->array.minor_version = -2;
90fa1a29
JS
3279 cptr = (__u32 *)(vc->conf.guid + 16);
3280 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58
NB
3281 this->array.utime = DECADE +
3282 __be32_to_cpu(vc->conf.timestamp);
3283 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3284
59e36268 3285 i = vc->vcnum;
7a7cc504
NB
3286 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3287 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3288 DDF_init_full) {
598f0d58 3289 this->array.state = 0;
ed9d66aa
NB
3290 this->resync_start = 0;
3291 } else {
598f0d58 3292 this->array.state = 1;
b7528a20 3293 this->resync_start = MaxSector;
ed9d66aa 3294 }
db42fa9b
N
3295 memcpy(this->name, ddf->virt->entries[i].name, 16);
3296 this->name[16]=0;
3297 for(j=0; j<16; j++)
3298 if (this->name[j] == ' ')
3299 this->name[j] = 0;
598f0d58
NB
3300
3301 memset(this->uuid, 0, sizeof(this->uuid));
3302 this->component_size = __be64_to_cpu(vc->conf.blocks);
3303 this->array.size = this->component_size / 2;
5f2aace8 3304 this->container_member = i;
598f0d58 3305
c5afc314
N
3306 ddf->currentconf = vc;
3307 uuid_from_super_ddf(st, this->uuid);
3308 ddf->currentconf = NULL;
3309
60f18132 3310 sprintf(this->text_version, "/%s/%d",
4dd2df09 3311 st->container_devnm, this->container_member);
60f18132 3312
8a38db86 3313 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3314 struct mdinfo *dev;
3315 struct dl *d;
4e587018 3316 const struct vd_config *bvd;
3317 unsigned int iphys;
3318 __u64 *lba_offset;
fa033bec 3319 int stt;
598f0d58 3320
8a38db86 3321 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
bc17324f 3322 continue;
0cf5ef67
N
3323
3324 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3325 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3326 != DDF_Online)
3327 continue;
3328
8a38db86 3329 i = get_pd_index_from_refnum(
4e587018 3330 vc, ddf->phys->entries[pd].refnum,
3331 ddf->mppe, &bvd, &iphys);
8a38db86 3332 if (i == NO_SUCH_REFNUM)
3333 continue;
3334
fa033bec 3335 this->array.working_disks++;
bc17324f 3336
0cf5ef67 3337 for (d = ddf->dlist; d ; d=d->next)
8a38db86 3338 if (d->disk.refnum ==
3339 ddf->phys->entries[pd].refnum)
0cf5ef67
N
3340 break;
3341 if (d == NULL)
3342 /* Haven't found that one yet, maybe there are others */
3343 continue;
3344
503975b9 3345 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3346 dev->next = this->devs;
3347 this->devs = dev;
3348
3349 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3350 dev->disk.major = d->major;
3351 dev->disk.minor = d->minor;
3352 dev->disk.raid_disk = i;
3353 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3354 dev->recovery_start = MaxSector;
598f0d58 3355
120f7677 3356 dev->events = __be32_to_cpu(ddf->primary.seq);
4e587018 3357 lba_offset = (__u64 *)&bvd->phys_refnum[ddf->mppe];
3358 dev->data_offset = __be64_to_cpu(lba_offset[iphys]);
3359 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3360 if (d->devname)
3361 strcpy(dev->name, d->devname);
3362 }
3363 }
3364 return rest;
3365}
3366
955e9ea1 3367static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3368{
955e9ea1 3369 struct ddf_super *ddf = st->sb;
a322f70c 3370 unsigned long long dsize;
6416d527 3371 void *buf;
3d2c4fc7 3372 int rc;
a322f70c 3373
955e9ea1
DW
3374 if (!ddf)
3375 return 1;
3376
3377 /* ->dlist and ->conflist will be set for updates, currently not
3378 * supported
3379 */
3380 if (ddf->dlist || ddf->conflist)
3381 return 1;
3382
a322f70c
DW
3383 if (!get_dev_size(fd, NULL, &dsize))
3384 return 1;
3385
3d2c4fc7
DW
3386 if (posix_memalign(&buf, 512, 512) != 0)
3387 return 1;
6416d527
NB
3388 memset(buf, 0, 512);
3389
a322f70c 3390 lseek64(fd, dsize-512, 0);
3d2c4fc7 3391 rc = write(fd, buf, 512);
6416d527 3392 free(buf);
3d2c4fc7
DW
3393 if (rc < 0)
3394 return 1;
a322f70c
DW
3395 return 0;
3396}
3397
a19c88b8
NB
3398static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3399{
3400 /*
3401 * return:
3402 * 0 same, or first was empty, and second was copied
3403 * 1 second had wrong number
3404 * 2 wrong uuid
3405 * 3 wrong other info
3406 */
3407 struct ddf_super *first = st->sb;
3408 struct ddf_super *second = tst->sb;
4eefd651 3409 struct dl *dl1, *dl2;
3410 struct vcl *vl1, *vl2;
2d210697 3411 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3412
3413 if (!first) {
3414 st->sb = tst->sb;
3415 tst->sb = NULL;
3416 return 0;
3417 }
3418
3419 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3420 return 2;
3421
2d210697 3422 if (first->anchor.seq != second->anchor.seq) {
3423 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3424 __be32_to_cpu(first->anchor.seq),
3425 __be32_to_cpu(second->anchor.seq));
3426 return 3;
3427 }
3428 if (first->max_part != second->max_part ||
3429 first->phys->used_pdes != second->phys->used_pdes ||
3430 first->virt->populated_vdes != second->virt->populated_vdes) {
3431 dprintf("%s: PD/VD number mismatch\n", __func__);
3432 return 3;
3433 }
3434
3435 max_pds = __be16_to_cpu(first->phys->used_pdes);
3436 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3437 for (pd = 0; pd < max_pds; pd++)
3438 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3439 break;
3440 if (pd == max_pds) {
3441 dprintf("%s: no match for disk %08x\n", __func__,
3442 __be32_to_cpu(dl2->disk.refnum));
3443 return 3;
3444 }
3445 }
3446
3447 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3448 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3449 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3450 continue;
3451 for (vd = 0; vd < max_vds; vd++)
3452 if (!memcmp(first->virt->entries[vd].guid,
3453 vl2->conf.guid, DDF_GUID_LEN))
3454 break;
3455 if (vd == max_vds) {
3456 dprintf("%s: no match for VD config\n", __func__);
3457 return 3;
3458 }
3459 }
a19c88b8 3460 /* FIXME should I look at anything else? */
2d210697 3461
4eefd651 3462 /*
3463 At this point we are fairly sure that the meta data matches.
3464 But the new disk may contain additional local data.
3465 Add it to the super block.
3466 */
3467 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3468 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3469 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3470 DDF_GUID_LEN))
3471 break;
3472 if (vl1) {
3473 if (vl1->other_bvds != NULL &&
3474 vl1->conf.sec_elmnt_seq !=
3475 vl2->conf.sec_elmnt_seq) {
3476 dprintf("%s: adding BVD %u\n", __func__,
3477 vl2->conf.sec_elmnt_seq);
3478 add_other_bvd(vl1, &vl2->conf,
3479 first->conf_rec_len*512);
3480 }
3481 continue;
3482 }
3483
3484 if (posix_memalign((void **)&vl1, 512,
3485 (first->conf_rec_len*512 +
3486 offsetof(struct vcl, conf))) != 0) {
3487 pr_err("%s could not allocate vcl buf\n",
3488 __func__);
3489 return 3;
3490 }
3491
3492 vl1->next = first->conflist;
3493 vl1->block_sizes = NULL;
3494 if (vl1->conf.sec_elmnt_count > 1) {
3495 vl1->other_bvds = xcalloc(vl2->conf.sec_elmnt_count - 1,
3496 sizeof(struct vd_config *));
3497 } else
3498 vl1->other_bvds = NULL;
3499 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3500 vl1->lba_offset = (__u64 *)
3501 &vl1->conf.phys_refnum[first->mppe];
3502 for (vd = 0; vd < max_vds; vd++)
3503 if (!memcmp(first->virt->entries[vd].guid,
3504 vl1->conf.guid, DDF_GUID_LEN))
3505 break;
3506 vl1->vcnum = vd;
3507 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3508 first->conflist = vl1;
3509 }
3510
3511 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3512 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3513 if (dl1->disk.refnum == dl2->disk.refnum)
3514 break;
3515 if (dl1)
3516 continue;
3517
3518 if (posix_memalign((void **)&dl1, 512,
3519 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3520 != 0) {
3521 pr_err("%s could not allocate disk info buffer\n",
3522 __func__);
3523 return 3;
3524 }
3525 memcpy(dl1, dl2, sizeof(*dl1));
3526 dl1->mdupdate = NULL;
3527 dl1->next = first->dlist;
3528 dl1->fd = -1;
3529 for (pd = 0; pd < max_pds; pd++)
3530 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3531 break;
3532 dl1->pdnum = pd;
3533 if (dl2->spare) {
3534 if (posix_memalign((void **)&dl1->spare, 512,
3535 first->conf_rec_len*512) != 0) {
3536 pr_err("%s could not allocate spare info buf\n",
3537 __func__);
3538 return 3;
3539 }
3540 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3541 }
3542 for (vd = 0 ; vd < first->max_part ; vd++) {
3543 if (!dl2->vlist[vd]) {
3544 dl1->vlist[vd] = NULL;
3545 continue;
3546 }
3547 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3548 if (!memcmp(vl1->conf.guid,
3549 dl2->vlist[vd]->conf.guid,
3550 DDF_GUID_LEN))
3551 break;
3552 dl1->vlist[vd] = vl1;
3553 }
3554 }
3555 first->dlist = dl1;
3556 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3557 dl1->disk.refnum);
3558 }
3559
a19c88b8
NB
3560 return 0;
3561}
3562
0e600426 3563#ifndef MDASSEMBLE
4e5528c6
NB
3564/*
3565 * A new array 'a' has been started which claims to be instance 'inst'
3566 * within container 'c'.
3567 * We need to confirm that the array matches the metadata in 'c' so
3568 * that we don't corrupt any metadata.
3569 */
cba0191b 3570static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3571{
2c514b71 3572 dprintf("ddf: open_new %s\n", inst);
cba0191b 3573 a->info.container_member = atoi(inst);
549e9569
NB
3574 return 0;
3575}
3576
4e5528c6
NB
3577/*
3578 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3579 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3580 * clean up to the point (in sectors). If that cannot be recorded in the
3581 * metadata, then leave it as dirty.
3582 *
3583 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3584 * !global! virtual_disk.virtual_entry structure.
3585 */
01f157d7 3586static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3587{
4e5528c6
NB
3588 struct ddf_super *ddf = a->container->sb;
3589 int inst = a->info.container_member;
18a2f463 3590 int old = ddf->virt->entries[inst].state;
01f157d7
N
3591 if (consistent == 2) {
3592 /* Should check if a recovery should be started FIXME */
3593 consistent = 1;
b7941fd6 3594 if (!is_resync_complete(&a->info))
01f157d7
N
3595 consistent = 0;
3596 }
ed9d66aa
NB
3597 if (consistent)
3598 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3599 else
4e5528c6 3600 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463
NB
3601 if (old != ddf->virt->entries[inst].state)
3602 ddf->updates_pending = 1;
3603
3604 old = ddf->virt->entries[inst].init_state;
ed9d66aa 3605 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 3606 if (is_resync_complete(&a->info))
ed9d66aa 3607 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 3608 else if (a->info.resync_start == 0)
ed9d66aa 3609 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 3610 else
ed9d66aa 3611 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463
NB
3612 if (old != ddf->virt->entries[inst].init_state)
3613 ddf->updates_pending = 1;
ed9d66aa 3614
2c514b71 3615 dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
b7941fd6 3616 a->info.resync_start);
01f157d7 3617 return consistent;
fd7cde1b
DW
3618}
3619
e1316fab 3620#define container_of(ptr, type, member) ({ \
5d500228
N
3621 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
3622 (type *)( (char *)__mptr - offsetof(type,member) );})
7a7cc504
NB
3623/*
3624 * The state of each disk is stored in the global phys_disk structure
3625 * in phys_disk.entries[n].state.
3626 * This makes various combinations awkward.
3627 * - When a device fails in any array, it must be failed in all arrays
3628 * that include a part of this device.
3629 * - When a component is rebuilding, we cannot include it officially in the
3630 * array unless this is the only array that uses the device.
3631 *
3632 * So: when transitioning:
3633 * Online -> failed, just set failed flag. monitor will propagate
3634 * spare -> online, the device might need to be added to the array.
3635 * spare -> failed, just set failed. Don't worry if in array or not.
3636 */
8d45d196 3637static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 3638{
7a7cc504 3639 struct ddf_super *ddf = a->container->sb;
f21e18ca 3640 unsigned int inst = a->info.container_member;
7a7cc504
NB
3641 struct vd_config *vc = find_vdcr(ddf, inst);
3642 int pd = find_phys(ddf, vc->phys_refnum[n]);
3643 int i, st, working;
e1316fab
N
3644 struct mdinfo *mdi;
3645 struct dl *dl;
7a7cc504
NB
3646
3647 if (vc == NULL) {
2c514b71 3648 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
3649 return;
3650 }
e1316fab
N
3651 /* Find the matching slot in 'info'. */
3652 for (mdi = a->info.devs; mdi; mdi = mdi->next)
3653 if (mdi->disk.raid_disk == n)
3654 break;
3655 if (!mdi)
3656 return;
3657
3658 /* and find the 'dl' entry corresponding to that. */
3659 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
3660 if (mdi->state_fd >= 0 &&
3661 mdi->disk.major == dl->major &&
e1316fab
N
3662 mdi->disk.minor == dl->minor)
3663 break;
3664 if (!dl)
3665 return;
3666
3667 if (pd < 0 || pd != dl->pdnum) {
3668 /* disk doesn't currently exist or has changed.
3669 * If it is now in_sync, insert it. */
7a7cc504 3670 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
e1316fab
N
3671 struct vcl *vcl;
3672 pd = dl->pdnum;
3673 vc->phys_refnum[n] = dl->disk.refnum;
3674 vcl = container_of(vc, struct vcl, conf);
3675 vcl->lba_offset[n] = mdi->data_offset;
3676 ddf->phys->entries[pd].type &=
3677 ~__cpu_to_be16(DDF_Global_Spare);
3678 ddf->phys->entries[pd].type |=
3679 __cpu_to_be16(DDF_Active_in_VD);
3680 ddf->updates_pending = 1;
7a7cc504
NB
3681 }
3682 } else {
18a2f463 3683 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
3684 if (state & DS_FAULTY)
3685 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
3686 if (state & DS_INSYNC) {
3687 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
3688 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
3689 }
18a2f463
NB
3690 if (old != ddf->phys->entries[pd].state)
3691 ddf->updates_pending = 1;
7a7cc504
NB
3692 }
3693
2c514b71 3694 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 3695
7a7cc504
NB
3696 /* Now we need to check the state of the array and update
3697 * virtual_disk.entries[n].state.
3698 * It needs to be one of "optimal", "degraded", "failed".
3699 * I don't understand 'deleted' or 'missing'.
3700 */
3701 working = 0;
3702 for (i=0; i < a->info.array.raid_disks; i++) {
3703 pd = find_phys(ddf, vc->phys_refnum[i]);
3704 if (pd < 0)
3705 continue;
57632f4a
NB
3706 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3707 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
7a7cc504
NB
3708 == DDF_Online)
3709 working++;
3710 }
3711 state = DDF_state_degraded;
3712 if (working == a->info.array.raid_disks)
3713 state = DDF_state_optimal;
3714 else switch(vc->prl) {
613b0d17
N
3715 case DDF_RAID0:
3716 case DDF_CONCAT:
3717 case DDF_JBOD:
7a7cc504 3718 state = DDF_state_failed;
613b0d17
N
3719 break;
3720 case DDF_RAID1:
3721 if (working == 0)
3722 state = DDF_state_failed;
3723 else if (working == 2 && state == DDF_state_degraded)
3724 state = DDF_state_part_optimal;
3725 break;
3726 case DDF_RAID4:
3727 case DDF_RAID5:
3728 if (working < a->info.array.raid_disks-1)
3729 state = DDF_state_failed;
3730 break;
3731 case DDF_RAID6:
3732 if (working < a->info.array.raid_disks-2)
3733 state = DDF_state_failed;
3734 else if (working == a->info.array.raid_disks-1)
3735 state = DDF_state_part_optimal;
3736 break;
3737 }
7a7cc504 3738
18a2f463
NB
3739 if (ddf->virt->entries[inst].state !=
3740 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
3741 | state)) {
3742
3743 ddf->virt->entries[inst].state =
3744 (ddf->virt->entries[inst].state & ~DDF_state_mask)
3745 | state;
3746 ddf->updates_pending = 1;
3747 }
7a7cc504 3748
549e9569
NB
3749}
3750
2e735d19 3751static void ddf_sync_metadata(struct supertype *st)
549e9569 3752{
7a7cc504
NB
3753
3754 /*
3755 * Write all data to all devices.
3756 * Later, we might be able to track whether only local changes
3757 * have been made, or whether any global data has been changed,
3758 * but ddf is sufficiently weird that it probably always
3759 * changes global data ....
3760 */
18a2f463
NB
3761 struct ddf_super *ddf = st->sb;
3762 if (!ddf->updates_pending)
3763 return;
3764 ddf->updates_pending = 0;
1cc7f4fe 3765 __write_init_super_ddf(st);
2c514b71 3766 dprintf("ddf: sync_metadata\n");
549e9569
NB
3767}
3768
88c164f4
NB
3769static void ddf_process_update(struct supertype *st,
3770 struct metadata_update *update)
3771{
3772 /* Apply this update to the metadata.
3773 * The first 4 bytes are a DDF_*_MAGIC which guides
3774 * our actions.
3775 * Possible update are:
3776 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
3777 * Add a new physical device or remove an old one.
3778 * Changes to this record only happen implicitly.
88c164f4
NB
3779 * used_pdes is the device number.
3780 * DDF_VIRT_RECORDS_MAGIC
3781 * Add a new VD. Possibly also change the 'access' bits.
3782 * populated_vdes is the entry number.
3783 * DDF_VD_CONF_MAGIC
3784 * New or updated VD. the VIRT_RECORD must already
3785 * exist. For an update, phys_refnum and lba_offset
3786 * (at least) are updated, and the VD_CONF must
3787 * be written to precisely those devices listed with
3788 * a phys_refnum.
3789 * DDF_SPARE_ASSIGN_MAGIC
3790 * replacement Spare Assignment Record... but for which device?
3791 *
3792 * So, e.g.:
3793 * - to create a new array, we send a VIRT_RECORD and
3794 * a VD_CONF. Then assemble and start the array.
3795 * - to activate a spare we send a VD_CONF to add the phys_refnum
3796 * and offset. This will also mark the spare as active with
3797 * a spare-assignment record.
3798 */
3799 struct ddf_super *ddf = st->sb;
3800 __u32 *magic = (__u32*)update->buf;
3801 struct phys_disk *pd;
3802 struct virtual_disk *vd;
3803 struct vd_config *vc;
3804 struct vcl *vcl;
3805 struct dl *dl;
f21e18ca
N
3806 unsigned int mppe;
3807 unsigned int ent;
c7079c84 3808 unsigned int pdnum, pd2;
88c164f4 3809
2c514b71 3810 dprintf("Process update %x\n", *magic);
7e1432fb 3811
88c164f4
NB
3812 switch (*magic) {
3813 case DDF_PHYS_RECORDS_MAGIC:
3814
3815 if (update->len != (sizeof(struct phys_disk) +
3816 sizeof(struct phys_disk_entry)))
3817 return;
3818 pd = (struct phys_disk*)update->buf;
3819
3820 ent = __be16_to_cpu(pd->used_pdes);
3821 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
3822 return;
4dd968cc
N
3823 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
3824 struct dl **dlp;
3825 /* removing this disk. */
3826 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
3827 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
3828 struct dl *dl = *dlp;
3829 if (dl->pdnum == (signed)ent) {
3830 close(dl->fd);
3831 dl->fd = -1;
3832 /* FIXME this doesn't free
3833 * dl->devname */
3834 update->space = dl;
3835 *dlp = dl->next;
3836 break;
3837 }
3838 }
3839 ddf->updates_pending = 1;
3840 return;
3841 }
88c164f4
NB
3842 if (!all_ff(ddf->phys->entries[ent].guid))
3843 return;
3844 ddf->phys->entries[ent] = pd->entries[0];
3845 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 3846 __be16_to_cpu(ddf->phys->used_pdes));
18a2f463 3847 ddf->updates_pending = 1;
2cc2983d
N
3848 if (ddf->add_list) {
3849 struct active_array *a;
3850 struct dl *al = ddf->add_list;
3851 ddf->add_list = al->next;
3852
3853 al->next = ddf->dlist;
3854 ddf->dlist = al;
3855
3856 /* As a device has been added, we should check
3857 * for any degraded devices that might make
3858 * use of this spare */
3859 for (a = st->arrays ; a; a=a->next)
3860 a->check_degraded = 1;
3861 }
88c164f4
NB
3862 break;
3863
3864 case DDF_VIRT_RECORDS_MAGIC:
3865
3866 if (update->len != (sizeof(struct virtual_disk) +
3867 sizeof(struct virtual_entry)))
3868 return;
3869 vd = (struct virtual_disk*)update->buf;
3870
3871 ent = __be16_to_cpu(vd->populated_vdes);
3872 if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
3873 return;
3874 if (!all_ff(ddf->virt->entries[ent].guid))
3875 return;
3876 ddf->virt->entries[ent] = vd->entries[0];
3877 ddf->virt->populated_vdes = __cpu_to_be16(1 +
613b0d17 3878 __be16_to_cpu(ddf->virt->populated_vdes));
18a2f463 3879 ddf->updates_pending = 1;
88c164f4
NB
3880 break;
3881
3882 case DDF_VD_CONF_MAGIC:
2c514b71 3883 dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
88c164f4
NB
3884
3885 mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
f21e18ca 3886 if ((unsigned)update->len != ddf->conf_rec_len * 512)
88c164f4
NB
3887 return;
3888 vc = (struct vd_config*)update->buf;
3889 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
3890 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
3891 break;
2c514b71 3892 dprintf("vcl = %p\n", vcl);
88c164f4
NB
3893 if (vcl) {
3894 /* An update, just copy the phys_refnum and lba_offset
3895 * fields
3896 */
3897 memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
3898 mppe * (sizeof(__u32) + sizeof(__u64)));
3899 } else {
3900 /* A new VD_CONF */
e6b9548d
DW
3901 if (!update->space)
3902 return;
88c164f4
NB
3903 vcl = update->space;
3904 update->space = NULL;
3905 vcl->next = ddf->conflist;
edd8d13c 3906 memcpy(&vcl->conf, vc, update->len);
88c164f4
NB
3907 vcl->lba_offset = (__u64*)
3908 &vcl->conf.phys_refnum[mppe];
1502a43a
N
3909 for (ent = 0;
3910 ent < __be16_to_cpu(ddf->virt->populated_vdes);
3911 ent++)
3912 if (memcmp(vc->guid, ddf->virt->entries[ent].guid,
3913 DDF_GUID_LEN) == 0) {
3914 vcl->vcnum = ent;
3915 break;
3916 }
88c164f4
NB
3917 ddf->conflist = vcl;
3918 }
c7079c84
N
3919 /* Set DDF_Transition on all Failed devices - to help
3920 * us detect those that are no longer in use
3921 */
3922 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
3923 if (ddf->phys->entries[pdnum].state
3924 & __be16_to_cpu(DDF_Failed))
3925 ddf->phys->entries[pdnum].state
3926 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
3927 /* Now make sure vlist is correct for each dl. */
3928 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca
N
3929 unsigned int dn;
3930 unsigned int vn = 0;
8401644c 3931 int in_degraded = 0;
88c164f4
NB
3932 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
3933 for (dn=0; dn < ddf->mppe ; dn++)
3934 if (vcl->conf.phys_refnum[dn] ==
3935 dl->disk.refnum) {
8401644c 3936 int vstate;
2c514b71
NB
3937 dprintf("dev %d has %p at %d\n",
3938 dl->pdnum, vcl, vn);
c7079c84
N
3939 /* Clear the Transition flag */
3940 if (ddf->phys->entries[dl->pdnum].state
3941 & __be16_to_cpu(DDF_Failed))
3942 ddf->phys->entries[dl->pdnum].state &=
3943 ~__be16_to_cpu(DDF_Transition);
3944
88c164f4 3945 dl->vlist[vn++] = vcl;
8401644c
N
3946 vstate = ddf->virt->entries[vcl->vcnum].state
3947 & DDF_state_mask;
3948 if (vstate == DDF_state_degraded ||
3949 vstate == DDF_state_part_optimal)
3950 in_degraded = 1;
88c164f4
NB
3951 break;
3952 }
3953 while (vn < ddf->max_part)
3954 dl->vlist[vn++] = NULL;
7e1432fb
NB
3955 if (dl->vlist[0]) {
3956 ddf->phys->entries[dl->pdnum].type &=
3957 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
3958 if (!(ddf->phys->entries[dl->pdnum].type &
3959 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
3960 ddf->phys->entries[dl->pdnum].type |=
3961 __cpu_to_be16(DDF_Active_in_VD);
3962 if (in_degraded)
3963 ddf->phys->entries[dl->pdnum].state |=
3964 __cpu_to_be16(DDF_Rebuilding);
3965 }
7e1432fb
NB
3966 }
3967 if (dl->spare) {
3968 ddf->phys->entries[dl->pdnum].type &=
3969 ~__cpu_to_be16(DDF_Global_Spare);
3970 ddf->phys->entries[dl->pdnum].type |=
3971 __cpu_to_be16(DDF_Spare);
3972 }
3973 if (!dl->vlist[0] && !dl->spare) {
3974 ddf->phys->entries[dl->pdnum].type |=
3975 __cpu_to_be16(DDF_Global_Spare);
3976 ddf->phys->entries[dl->pdnum].type &=
3977 ~__cpu_to_be16(DDF_Spare |
3978 DDF_Active_in_VD);
3979 }
88c164f4 3980 }
c7079c84
N
3981
3982 /* Now remove any 'Failed' devices that are not part
3983 * of any VD. They will have the Transition flag set.
3984 * Once done, we need to update all dl->pdnum numbers.
3985 */
3986 pd2 = 0;
3987 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
3988 if ((ddf->phys->entries[pdnum].state
3989 & __be16_to_cpu(DDF_Failed))
3990 && (ddf->phys->entries[pdnum].state
3991 & __be16_to_cpu(DDF_Transition)))
3992 /* skip this one */;
3993 else if (pdnum == pd2)
3994 pd2++;
3995 else {
3996 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
3997 for (dl = ddf->dlist; dl; dl = dl->next)
3998 if (dl->pdnum == (int)pdnum)
3999 dl->pdnum = pd2;
4000 pd2++;
4001 }
4002 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4003 while (pd2 < pdnum) {
4004 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4005 pd2++;
4006 }
4007
18a2f463 4008 ddf->updates_pending = 1;
88c164f4
NB
4009 break;
4010 case DDF_SPARE_ASSIGN_MAGIC:
4011 default: break;
4012 }
4013}
4014
edd8d13c
NB
4015static void ddf_prepare_update(struct supertype *st,
4016 struct metadata_update *update)
4017{
4018 /* This update arrived at managemon.
4019 * We are about to pass it to monitor.
4020 * If a malloc is needed, do it here.
4021 */
4022 struct ddf_super *ddf = st->sb;
4023 __u32 *magic = (__u32*)update->buf;
4024 if (*magic == DDF_VD_CONF_MAGIC)
e6b9548d 4025 if (posix_memalign(&update->space, 512,
613b0d17
N
4026 offsetof(struct vcl, conf)
4027 + ddf->conf_rec_len * 512) != 0)
e6b9548d 4028 update->space = NULL;
edd8d13c
NB
4029}
4030
7e1432fb
NB
4031/*
4032 * Check if the array 'a' is degraded but not failed.
4033 * If it is, find as many spares as are available and needed and
4034 * arrange for their inclusion.
4035 * We only choose devices which are not already in the array,
4036 * and prefer those with a spare-assignment to this array.
4037 * otherwise we choose global spares - assuming always that
4038 * there is enough room.
4039 * For each spare that we assign, we return an 'mdinfo' which
4040 * describes the position for the device in the array.
4041 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4042 * the new phys_refnum and lba_offset values.
4043 *
4044 * Only worry about BVDs at the moment.
4045 */
4046static struct mdinfo *ddf_activate_spare(struct active_array *a,
4047 struct metadata_update **updates)
4048{
4049 int working = 0;
4050 struct mdinfo *d;
4051 struct ddf_super *ddf = a->container->sb;
4052 int global_ok = 0;
4053 struct mdinfo *rv = NULL;
4054 struct mdinfo *di;
4055 struct metadata_update *mu;
4056 struct dl *dl;
4057 int i;
4058 struct vd_config *vc;
4059 __u64 *lba;
4060
7e1432fb
NB
4061 for (d = a->info.devs ; d ; d = d->next) {
4062 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4063 d->state_fd >= 0)
7e1432fb
NB
4064 /* wait for Removal to happen */
4065 return NULL;
4066 if (d->state_fd >= 0)
4067 working ++;
4068 }
4069
2c514b71
NB
4070 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4071 a->info.array.level);
7e1432fb
NB
4072 if (working == a->info.array.raid_disks)
4073 return NULL; /* array not degraded */
4074 switch (a->info.array.level) {
4075 case 1:
4076 if (working == 0)
4077 return NULL; /* failed */
4078 break;
4079 case 4:
4080 case 5:
4081 if (working < a->info.array.raid_disks - 1)
4082 return NULL; /* failed */
4083 break;
4084 case 6:
4085 if (working < a->info.array.raid_disks - 2)
4086 return NULL; /* failed */
4087 break;
4088 default: /* concat or stripe */
4089 return NULL; /* failed */
4090 }
4091
4092 /* For each slot, if it is not working, find a spare */
4093 dl = ddf->dlist;
4094 for (i = 0; i < a->info.array.raid_disks; i++) {
4095 for (d = a->info.devs ; d ; d = d->next)
4096 if (d->disk.raid_disk == i)
4097 break;
2c514b71 4098 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4099 if (d && (d->state_fd >= 0))
4100 continue;
4101
4102 /* OK, this device needs recovery. Find a spare */
4103 again:
4104 for ( ; dl ; dl = dl->next) {
4105 unsigned long long esize;
4106 unsigned long long pos;
4107 struct mdinfo *d2;
4108 int is_global = 0;
4109 int is_dedicated = 0;
4110 struct extent *ex;
f21e18ca 4111 unsigned int j;
7e1432fb
NB
4112 /* If in this array, skip */
4113 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4114 if (d2->state_fd >= 0 &&
4115 d2->disk.major == dl->major &&
7e1432fb 4116 d2->disk.minor == dl->minor) {
2c514b71 4117 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4118 break;
4119 }
4120 if (d2)
4121 continue;
4122 if (ddf->phys->entries[dl->pdnum].type &
4123 __cpu_to_be16(DDF_Spare)) {
4124 /* Check spare assign record */
4125 if (dl->spare) {
4126 if (dl->spare->type & DDF_spare_dedicated) {
4127 /* check spare_ents for guid */
4128 for (j = 0 ;
4129 j < __be16_to_cpu(dl->spare->populated);
4130 j++) {
4131 if (memcmp(dl->spare->spare_ents[j].guid,
4132 ddf->virt->entries[a->info.container_member].guid,
4133 DDF_GUID_LEN) == 0)
4134 is_dedicated = 1;
4135 }
4136 } else
4137 is_global = 1;
4138 }
4139 } else if (ddf->phys->entries[dl->pdnum].type &
4140 __cpu_to_be16(DDF_Global_Spare)) {
4141 is_global = 1;
e0e7aeaa
N
4142 } else if (!(ddf->phys->entries[dl->pdnum].state &
4143 __cpu_to_be16(DDF_Failed))) {
4144 /* we can possibly use some of this */
4145 is_global = 1;
7e1432fb
NB
4146 }
4147 if ( ! (is_dedicated ||
4148 (is_global && global_ok))) {
2c514b71 4149 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4150 is_dedicated, is_global);
7e1432fb
NB
4151 continue;
4152 }
4153
4154 /* We are allowed to use this device - is there space?
4155 * We need a->info.component_size sectors */
4156 ex = get_extents(ddf, dl);
4157 if (!ex) {
2c514b71 4158 dprintf("cannot get extents\n");
7e1432fb
NB
4159 continue;
4160 }
4161 j = 0; pos = 0;
4162 esize = 0;
4163
4164 do {
4165 esize = ex[j].start - pos;
4166 if (esize >= a->info.component_size)
4167 break;
e5cc7d46
N
4168 pos = ex[j].start + ex[j].size;
4169 j++;
4170 } while (ex[j-1].size);
7e1432fb
NB
4171
4172 free(ex);
4173 if (esize < a->info.component_size) {
e5cc7d46
N
4174 dprintf("%x:%x has no room: %llu %llu\n",
4175 dl->major, dl->minor,
2c514b71 4176 esize, a->info.component_size);
7e1432fb
NB
4177 /* No room */
4178 continue;
4179 }
4180
4181 /* Cool, we have a device with some space at pos */
503975b9 4182 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4183 di->disk.number = i;
4184 di->disk.raid_disk = i;
4185 di->disk.major = dl->major;
4186 di->disk.minor = dl->minor;
4187 di->disk.state = 0;
d23534e4 4188 di->recovery_start = 0;
7e1432fb
NB
4189 di->data_offset = pos;
4190 di->component_size = a->info.component_size;
4191 di->container_member = dl->pdnum;
4192 di->next = rv;
4193 rv = di;
2c514b71
NB
4194 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4195 i, pos);
7e1432fb
NB
4196
4197 break;
4198 }
4199 if (!dl && ! global_ok) {
4200 /* not enough dedicated spares, try global */
4201 global_ok = 1;
4202 dl = ddf->dlist;
4203 goto again;
4204 }
4205 }
4206
4207 if (!rv)
4208 /* No spares found */
4209 return rv;
4210 /* Now 'rv' has a list of devices to return.
4211 * Create a metadata_update record to update the
4212 * phys_refnum and lba_offset values
4213 */
503975b9
N
4214 mu = xmalloc(sizeof(*mu));
4215 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4216 free(mu);
4217 mu = NULL;
4218 }
503975b9 4219 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4220 mu->len = ddf->conf_rec_len * 512;
4221 mu->space = NULL;
f50ae22e 4222 mu->space_list = NULL;
7e1432fb
NB
4223 mu->next = *updates;
4224 vc = find_vdcr(ddf, a->info.container_member);
4225 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4226
4227 vc = (struct vd_config*)mu->buf;
4228 lba = (__u64*)&vc->phys_refnum[ddf->mppe];
4229 for (di = rv ; di ; di = di->next) {
4230 vc->phys_refnum[di->disk.raid_disk] =
4231 ddf->phys->entries[dl->pdnum].refnum;
4232 lba[di->disk.raid_disk] = di->data_offset;
4233 }
4234 *updates = mu;
4235 return rv;
4236}
0e600426 4237#endif /* MDASSEMBLE */
7e1432fb 4238
b640a252
N
4239static int ddf_level_to_layout(int level)
4240{
4241 switch(level) {
4242 case 0:
4243 case 1:
4244 return 0;
4245 case 5:
4246 return ALGORITHM_LEFT_SYMMETRIC;
4247 case 6:
4248 return ALGORITHM_ROTATING_N_CONTINUE;
4249 case 10:
4250 return 0x102;
4251 default:
4252 return UnSet;
4253 }
4254}
4255
30f58b22
DW
4256static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4257{
4258 if (level && *level == UnSet)
4259 *level = LEVEL_CONTAINER;
4260
4261 if (level && layout && *layout == UnSet)
4262 *layout = ddf_level_to_layout(*level);
4263}
4264
a322f70c
DW
4265struct superswitch super_ddf = {
4266#ifndef MDASSEMBLE
4267 .examine_super = examine_super_ddf,
4268 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4269 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4270 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4271 .detail_super = detail_super_ddf,
4272 .brief_detail_super = brief_detail_super_ddf,
4273 .validate_geometry = validate_geometry_ddf,
78e44928 4274 .write_init_super = write_init_super_ddf,
0e600426 4275 .add_to_super = add_to_super_ddf,
4dd968cc 4276 .remove_from_super = remove_from_super_ddf,
2b959fbf 4277 .load_container = load_container_ddf,
a322f70c
DW
4278#endif
4279 .match_home = match_home_ddf,
4280 .uuid_from_super= uuid_from_super_ddf,
4281 .getinfo_super = getinfo_super_ddf,
4282 .update_super = update_super_ddf,
4283
4284 .avail_size = avail_size_ddf,
4285
a19c88b8
NB
4286 .compare_super = compare_super_ddf,
4287
a322f70c 4288 .load_super = load_super_ddf,
ba7eb04f 4289 .init_super = init_super_ddf,
955e9ea1 4290 .store_super = store_super_ddf,
a322f70c
DW
4291 .free_super = free_super_ddf,
4292 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4293 .container_content = container_content_ddf,
30f58b22 4294 .default_geometry = default_geometry_ddf,
a322f70c 4295
a322f70c 4296 .external = 1,
549e9569 4297
0e600426 4298#ifndef MDASSEMBLE
549e9569
NB
4299/* for mdmon */
4300 .open_new = ddf_open_new,
ed9d66aa 4301 .set_array_state= ddf_set_array_state,
549e9569
NB
4302 .set_disk = ddf_set_disk,
4303 .sync_metadata = ddf_sync_metadata,
88c164f4 4304 .process_update = ddf_process_update,
edd8d13c 4305 .prepare_update = ddf_prepare_update,
7e1432fb 4306 .activate_spare = ddf_activate_spare,
0e600426 4307#endif
4cce4069 4308 .name = "ddf",
a322f70c 4309};