]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: layout_md2ddf: new md->DDF layout conversion
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
a322f70c
DW
50/* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61/* Primary Raid Level (PRL) */
62#define DDF_RAID0 0x00
63#define DDF_RAID1 0x01
64#define DDF_RAID3 0x03
65#define DDF_RAID4 0x04
66#define DDF_RAID5 0x05
67#define DDF_RAID1E 0x11
68#define DDF_JBOD 0x0f
69#define DDF_CONCAT 0x1f
70#define DDF_RAID5E 0x15
71#define DDF_RAID5EE 0x25
59e36268 72#define DDF_RAID6 0x06
a322f70c
DW
73
74/* Raid Level Qualifier (RLQ) */
75#define DDF_RAID0_SIMPLE 0x00
76#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78#define DDF_RAID3_0 0x00 /* parity in first extent */
79#define DDF_RAID3_N 0x01 /* parity in last extent */
80#define DDF_RAID4_0 0x00 /* parity in first extent */
81#define DDF_RAID4_N 0x01 /* parity in last extent */
82/* these apply to raid5e and raid5ee as well */
83#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 84#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
85#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
87
88#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
90
91/* Secondary RAID Level (SRL) */
92#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93#define DDF_2MIRRORED 0x01
94#define DDF_2CONCAT 0x02
95#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
96
97/* Magic numbers */
98#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
108
109#define DDF_GUID_LEN 24
59e36268
NB
110#define DDF_REVISION_0 "01.00.00"
111#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
112
113struct ddf_header {
88c164f4 114 __u32 magic; /* DDF_HEADER_MAGIC */
a322f70c
DW
115 __u32 crc;
116 char guid[DDF_GUID_LEN];
59e36268 117 char revision[8]; /* 01.02.00 */
a322f70c
DW
118 __u32 seq; /* starts at '1' */
119 __u32 timestamp;
120 __u8 openflag;
121 __u8 foreignflag;
122 __u8 enforcegroups;
123 __u8 pad0; /* 0xff */
124 __u8 pad1[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext[32]; /* reserved: fill with 0xff */
127 __u64 primary_lba;
128 __u64 secondary_lba;
129 __u8 type;
130 __u8 pad2[3]; /* 0xff */
131 __u32 workspace_len; /* sectors for vendor space -
132 * at least 32768(sectors) */
133 __u64 workspace_lba;
134 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
139 *12/512) */
140 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset;
144 __u32 controller_section_length;
145 __u32 phys_section_offset;
146 __u32 phys_section_length;
147 __u32 virt_section_offset;
148 __u32 virt_section_length;
149 __u32 config_section_offset;
150 __u32 config_section_length;
151 __u32 data_section_offset;
152 __u32 data_section_length;
153 __u32 bbm_section_offset;
154 __u32 bbm_section_length;
155 __u32 diag_space_offset;
156 __u32 diag_space_length;
157 __u32 vendor_offset;
158 __u32 vendor_length;
159 /* 256 bytes so far */
160 __u8 pad4[256]; /* 0xff */
161};
162
163/* type field */
164#define DDF_HEADER_ANCHOR 0x00
165#define DDF_HEADER_PRIMARY 0x01
166#define DDF_HEADER_SECONDARY 0x02
167
168/* The content of the 'controller section' - global scope */
169struct ddf_controller_data {
88c164f4 170 __u32 magic; /* DDF_CONTROLLER_MAGIC */
a322f70c
DW
171 __u32 crc;
172 char guid[DDF_GUID_LEN];
173 struct controller_type {
174 __u16 vendor_id;
175 __u16 device_id;
176 __u16 sub_vendor_id;
177 __u16 sub_device_id;
178 } type;
179 char product_id[16];
180 __u8 pad[8]; /* 0xff */
181 __u8 vendor_data[448];
182};
183
184/* The content of phys_section - global scope */
185struct phys_disk {
88c164f4 186 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
a322f70c
DW
187 __u32 crc;
188 __u16 used_pdes;
189 __u16 max_pdes;
190 __u8 pad[52];
191 struct phys_disk_entry {
192 char guid[DDF_GUID_LEN];
193 __u32 refnum;
194 __u16 type;
195 __u16 state;
196 __u64 config_size; /* DDF structures must be after here */
197 char path[18]; /* another horrible structure really */
198 __u8 pad[6];
199 } entries[0];
200};
201
202/* phys_disk_entry.type is a bitmap - bigendian remember */
203#define DDF_Forced_PD_GUID 1
204#define DDF_Active_in_VD 2
88c164f4 205#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
206#define DDF_Spare 8 /* overrides Global_spare */
207#define DDF_Foreign 16
208#define DDF_Legacy 32 /* no DDF on this device */
209
210#define DDF_Interface_mask 0xf00
211#define DDF_Interface_SCSI 0x100
212#define DDF_Interface_SAS 0x200
213#define DDF_Interface_SATA 0x300
214#define DDF_Interface_FC 0x400
215
216/* phys_disk_entry.state is a bigendian bitmap */
217#define DDF_Online 1
218#define DDF_Failed 2 /* overrides 1,4,8 */
219#define DDF_Rebuilding 4
220#define DDF_Transition 8
221#define DDF_SMART 16
222#define DDF_ReadErrors 32
223#define DDF_Missing 64
224
225/* The content of the virt_section global scope */
226struct virtual_disk {
88c164f4 227 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
a322f70c
DW
228 __u32 crc;
229 __u16 populated_vdes;
230 __u16 max_vdes;
231 __u8 pad[52];
232 struct virtual_entry {
233 char guid[DDF_GUID_LEN];
234 __u16 unit;
235 __u16 pad0; /* 0xffff */
236 __u16 guid_crc;
237 __u16 type;
238 __u8 state;
239 __u8 init_state;
240 __u8 pad1[14];
241 char name[16];
242 } entries[0];
243};
244
245/* virtual_entry.type is a bitmap - bigendian */
246#define DDF_Shared 1
247#define DDF_Enforce_Groups 2
248#define DDF_Unicode 4
249#define DDF_Owner_Valid 8
250
251/* virtual_entry.state is a bigendian bitmap */
252#define DDF_state_mask 0x7
253#define DDF_state_optimal 0x0
254#define DDF_state_degraded 0x1
255#define DDF_state_deleted 0x2
256#define DDF_state_missing 0x3
257#define DDF_state_failed 0x4
7a7cc504 258#define DDF_state_part_optimal 0x5
a322f70c
DW
259
260#define DDF_state_morphing 0x8
261#define DDF_state_inconsistent 0x10
262
263/* virtual_entry.init_state is a bigendian bitmap */
264#define DDF_initstate_mask 0x03
265#define DDF_init_not 0x00
7a7cc504
NB
266#define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
a322f70c
DW
268#define DDF_init_full 0x02
269
270#define DDF_access_mask 0xc0
271#define DDF_access_rw 0x00
272#define DDF_access_ro 0x80
273#define DDF_access_blocked 0xc0
274
275/* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
278 */
279
280struct vd_config {
88c164f4 281 __u32 magic; /* DDF_VD_CONF_MAGIC */
a322f70c
DW
282 __u32 crc;
283 char guid[DDF_GUID_LEN];
284 __u32 timestamp;
285 __u32 seqnum;
286 __u8 pad0[24];
287 __u16 prim_elmnt_count;
288 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
289 __u8 prl;
290 __u8 rlq;
291 __u8 sec_elmnt_count;
292 __u8 sec_elmnt_seq;
293 __u8 srl;
598f0d58
NB
294 __u64 blocks; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks; /* blocks in array */
a322f70c
DW
298 __u8 pad1[8];
299 __u32 spare_refs[8];
300 __u8 cache_pol[8];
301 __u8 bg_rate;
302 __u8 pad2[3];
303 __u8 pad3[52];
304 __u8 pad4[192];
305 __u8 v0[32]; /* reserved- 0xff */
306 __u8 v1[32]; /* reserved- 0xff */
307 __u8 v2[16]; /* reserved- 0xff */
308 __u8 v3[16]; /* reserved- 0xff */
309 __u8 vendor[32];
310 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
313};
314
315/* vd_config.cache_pol[7] is a bitmap */
316#define DDF_cache_writeback 1 /* else writethrough */
317#define DDF_cache_wadaptive 2 /* only applies if writeback */
318#define DDF_cache_readahead 4
319#define DDF_cache_radaptive 8 /* only if doing read-ahead */
320#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
321#define DDF_cache_wallowed 32 /* enable write caching */
322#define DDF_cache_rallowed 64 /* enable read caching */
323
324struct spare_assign {
88c164f4 325 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
a322f70c
DW
326 __u32 crc;
327 __u32 timestamp;
328 __u8 reserved[7];
329 __u8 type;
330 __u16 populated; /* SAEs used */
331 __u16 max; /* max SAEs */
332 __u8 pad[8];
333 struct spare_assign_entry {
334 char guid[DDF_GUID_LEN];
335 __u16 secondary_element;
336 __u8 pad[6];
337 } spare_ents[0];
338};
339/* spare_assign.type is a bitmap */
340#define DDF_spare_dedicated 0x1 /* else global */
341#define DDF_spare_revertible 0x2 /* else committable */
342#define DDF_spare_active 0x4 /* else not active */
343#define DDF_spare_affinity 0x8 /* enclosure affinity */
344
345/* The data_section contents - local scope */
346struct disk_data {
88c164f4 347 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
a322f70c
DW
348 __u32 crc;
349 char guid[DDF_GUID_LEN];
350 __u32 refnum; /* crc of some magic drive data ... */
351 __u8 forced_ref; /* set when above was not result of magic */
352 __u8 forced_guid; /* set if guid was forced rather than magic */
353 __u8 vendor[32];
354 __u8 pad[442];
355};
356
357/* bbm_section content */
358struct bad_block_log {
359 __u32 magic;
360 __u32 crc;
361 __u16 entry_count;
362 __u32 spare_count;
363 __u8 pad[10];
364 __u64 first_spare;
365 struct mapped_block {
366 __u64 defective_start;
367 __u32 replacement_start;
368 __u16 remap_count;
369 __u8 pad[2];
370 } entries[0];
371};
372
373/* Struct for internally holding ddf structures */
374/* The DDF structure stored on each device is potentially
375 * quite different, as some data is global and some is local.
376 * The global data is:
377 * - ddf header
378 * - controller_data
379 * - Physical disk records
380 * - Virtual disk records
381 * The local data is:
382 * - Configuration records
383 * - Physical Disk data section
384 * ( and Bad block and vendor which I don't care about yet).
385 *
386 * The local data is parsed into separate lists as it is read
387 * and reconstructed for writing. This means that we only need
388 * to make config changes once and they are automatically
389 * propagated to all devices.
390 * Note that the ddf_super has space of the conf and disk data
391 * for this disk and also for a list of all such data.
392 * The list is only used for the superblock that is being
393 * built in Create or Assemble to describe the whole array.
394 */
395struct ddf_super {
6416d527 396 struct ddf_header anchor, primary, secondary;
a322f70c 397 struct ddf_controller_data controller;
6416d527 398 struct ddf_header *active;
a322f70c
DW
399 struct phys_disk *phys;
400 struct virtual_disk *virt;
401 int pdsize, vdsize;
f21e18ca 402 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 403 int currentdev;
18a2f463 404 int updates_pending;
a322f70c 405 struct vcl {
6416d527
NB
406 union {
407 char space[512];
408 struct {
409 struct vcl *next;
410 __u64 *lba_offset; /* location in 'conf' of
411 * the lba table */
f21e18ca 412 unsigned int vcnum; /* index into ->virt */
8ec5d685 413 struct vd_config **other_bvds;
6416d527
NB
414 __u64 *block_sizes; /* NULL if all the same */
415 };
416 };
a322f70c 417 struct vd_config conf;
d2ca6449 418 } *conflist, *currentconf;
a322f70c 419 struct dl {
6416d527
NB
420 union {
421 char space[512];
422 struct {
423 struct dl *next;
424 int major, minor;
425 char *devname;
426 int fd;
427 unsigned long long size; /* sectors */
097bcf00 428 unsigned long long primary_lba; /* sectors */
429 unsigned long long secondary_lba; /* sectors */
430 unsigned long long workspace_lba; /* sectors */
6416d527
NB
431 int pdnum; /* index in ->phys */
432 struct spare_assign *spare;
8592f29d
N
433 void *mdupdate; /* hold metadata update */
434
435 /* These fields used by auto-layout */
436 int raiddisk; /* slot to fill in autolayout */
437 __u64 esize;
6416d527
NB
438 };
439 };
a322f70c 440 struct disk_data disk;
b2280677 441 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 442 } *dlist, *add_list;
a322f70c
DW
443};
444
445#ifndef offsetof
446#define offsetof(t,f) ((size_t)&(((t*)0)->f))
447#endif
448
7d5a7ff3 449#if DEBUG
fb9d0acb 450static int all_ff(const char *guid);
7d5a7ff3 451static void pr_state(struct ddf_super *ddf, const char *msg)
452{
453 unsigned int i;
454 dprintf("%s/%s: ", __func__, msg);
455 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
456 if (all_ff(ddf->virt->entries[i].guid))
457 continue;
458 dprintf("%u(s=%02x i=%02x) ", i,
459 ddf->virt->entries[i].state,
460 ddf->virt->entries[i].init_state);
461 }
462 dprintf("\n");
463}
464#else
465static void pr_state(const struct ddf_super *ddf, const char *msg) {}
466#endif
467
468#define ddf_set_updates_pending(x) \
469 do { (x)->updates_pending = 1; pr_state(x, __func__); } while (0)
470
f21e18ca 471static unsigned int calc_crc(void *buf, int len)
a322f70c
DW
472{
473 /* crcs are always at the same place as in the ddf_header */
474 struct ddf_header *ddf = buf;
475 __u32 oldcrc = ddf->crc;
476 __u32 newcrc;
477 ddf->crc = 0xffffffff;
478
479 newcrc = crc32(0, buf, len);
480 ddf->crc = oldcrc;
4abe6b70
N
481 /* The crc is store (like everything) bigendian, so convert
482 * here for simplicity
483 */
484 return __cpu_to_be32(newcrc);
a322f70c
DW
485}
486
a3163bf0 487#define DDF_INVALID_LEVEL 0xff
488#define DDF_NO_SECONDARY 0xff
489static int err_bad_md_layout(const mdu_array_info_t *array)
490{
491 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
492 array->level, array->layout, array->raid_disks);
493 return DDF_INVALID_LEVEL;
494}
495
496static int layout_md2ddf(const mdu_array_info_t *array,
497 struct vd_config *conf)
498{
499 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
500 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
501 __u8 sec_elmnt_count = 1;
502 __u8 srl = DDF_NO_SECONDARY;
503
504 switch (array->level) {
505 case LEVEL_LINEAR:
506 prl = DDF_CONCAT;
507 break;
508 case 0:
509 rlq = DDF_RAID0_SIMPLE;
510 prl = DDF_RAID0;
511 break;
512 case 1:
513 switch (array->raid_disks) {
514 case 2:
515 rlq = DDF_RAID1_SIMPLE;
516 break;
517 case 3:
518 rlq = DDF_RAID1_MULTI;
519 break;
520 default:
521 return err_bad_md_layout(array);
522 }
523 prl = DDF_RAID1;
524 break;
525 case 4:
526 if (array->layout != 0)
527 return err_bad_md_layout(array);
528 rlq = DDF_RAID4_N;
529 prl = DDF_RAID4;
530 break;
531 case 5:
532 switch (array->layout) {
533 case ALGORITHM_LEFT_ASYMMETRIC:
534 rlq = DDF_RAID5_N_RESTART;
535 break;
536 case ALGORITHM_RIGHT_ASYMMETRIC:
537 rlq = DDF_RAID5_0_RESTART;
538 break;
539 case ALGORITHM_LEFT_SYMMETRIC:
540 rlq = DDF_RAID5_N_CONTINUE;
541 break;
542 case ALGORITHM_RIGHT_SYMMETRIC:
543 /* not mentioned in standard */
544 default:
545 return err_bad_md_layout(array);
546 }
547 prl = DDF_RAID5;
548 break;
549 case 6:
550 switch (array->layout) {
551 case ALGORITHM_ROTATING_N_RESTART:
552 rlq = DDF_RAID5_N_RESTART;
553 break;
554 case ALGORITHM_ROTATING_ZERO_RESTART:
555 rlq = DDF_RAID6_0_RESTART;
556 break;
557 case ALGORITHM_ROTATING_N_CONTINUE:
558 rlq = DDF_RAID5_N_CONTINUE;
559 break;
560 default:
561 return err_bad_md_layout(array);
562 }
563 prl = DDF_RAID6;
564 break;
565 case 10:
566 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
567 rlq = DDF_RAID1_SIMPLE;
568 prim_elmnt_count = __cpu_to_be16(2);
569 sec_elmnt_count = array->raid_disks / 2;
570 } else if (array->raid_disks % 3 == 0
571 && array->layout == 0x103) {
572 rlq = DDF_RAID1_MULTI;
573 prim_elmnt_count = __cpu_to_be16(3);
574 sec_elmnt_count = array->raid_disks / 3;
575 } else
576 return err_bad_md_layout(array);
577 srl = DDF_2SPANNED;
578 prl = DDF_RAID1;
579 break;
580 default:
581 return err_bad_md_layout(array);
582 }
583 conf->prl = prl;
584 conf->prim_elmnt_count = prim_elmnt_count;
585 conf->rlq = rlq;
586 conf->srl = srl;
587 conf->sec_elmnt_count = sec_elmnt_count;
588 return 0;
589}
590
8a2848a7 591static int err_bad_ddf_layout(const struct vd_config *conf)
592{
593 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
594 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
595 return -1;
596}
597
598static int layout_ddf2md(const struct vd_config *conf,
599 mdu_array_info_t *array)
600{
601 int level = LEVEL_UNSUPPORTED;
602 int layout = 0;
603 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
604
605 if (conf->sec_elmnt_count > 1) {
606 /* see also check_secondary() */
607 if (conf->prl != DDF_RAID1 ||
608 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
609 pr_err("Unsupported secondary RAID level %u/%u\n",
610 conf->prl, conf->srl);
611 return -1;
612 }
613 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
614 layout = 0x102;
615 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
616 layout = 0x103;
617 else
618 return err_bad_ddf_layout(conf);
619 raiddisks *= conf->sec_elmnt_count;
620 level = 10;
621 goto good;
622 }
623
624 switch (conf->prl) {
625 case DDF_CONCAT:
626 level = LEVEL_LINEAR;
627 break;
628 case DDF_RAID0:
629 if (conf->rlq != DDF_RAID0_SIMPLE)
630 return err_bad_ddf_layout(conf);
631 level = 0;
632 break;
633 case DDF_RAID1:
634 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
635 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
636 return err_bad_ddf_layout(conf);
637 level = 1;
638 break;
639 case DDF_RAID4:
640 if (conf->rlq != DDF_RAID4_N)
641 return err_bad_ddf_layout(conf);
642 level = 4;
643 break;
644 case DDF_RAID5:
645 switch (conf->rlq) {
646 case DDF_RAID5_N_RESTART:
647 layout = ALGORITHM_LEFT_ASYMMETRIC;
648 break;
649 case DDF_RAID5_0_RESTART:
650 layout = ALGORITHM_RIGHT_ASYMMETRIC;
651 break;
652 case DDF_RAID5_N_CONTINUE:
653 layout = ALGORITHM_LEFT_SYMMETRIC;
654 break;
655 default:
656 return err_bad_ddf_layout(conf);
657 }
658 level = 5;
659 break;
660 case DDF_RAID6:
661 switch (conf->rlq) {
662 case DDF_RAID5_N_RESTART:
663 layout = ALGORITHM_ROTATING_N_RESTART;
664 break;
665 case DDF_RAID6_0_RESTART:
666 layout = ALGORITHM_ROTATING_ZERO_RESTART;
667 break;
668 case DDF_RAID5_N_CONTINUE:
669 layout = ALGORITHM_ROTATING_N_CONTINUE;
670 break;
671 default:
672 return err_bad_ddf_layout(conf);
673 }
674 level = 6;
675 break;
676 default:
677 return err_bad_ddf_layout(conf);
678 };
679
680good:
681 array->level = level;
682 array->layout = layout;
683 array->raid_disks = raiddisks;
684 return 0;
685}
686
a322f70c
DW
687static int load_ddf_header(int fd, unsigned long long lba,
688 unsigned long long size,
689 int type,
690 struct ddf_header *hdr, struct ddf_header *anchor)
691{
692 /* read a ddf header (primary or secondary) from fd/lba
693 * and check that it is consistent with anchor
694 * Need to check:
695 * magic, crc, guid, rev, and LBA's header_type, and
696 * everything after header_type must be the same
697 */
698 if (lba >= size-1)
699 return 0;
700
701 if (lseek64(fd, lba<<9, 0) < 0)
702 return 0;
703
704 if (read(fd, hdr, 512) != 512)
705 return 0;
706
707 if (hdr->magic != DDF_HEADER_MAGIC)
708 return 0;
709 if (calc_crc(hdr, 512) != hdr->crc)
710 return 0;
711 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
712 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
713 anchor->primary_lba != hdr->primary_lba ||
714 anchor->secondary_lba != hdr->secondary_lba ||
715 hdr->type != type ||
716 memcmp(anchor->pad2, hdr->pad2, 512 -
717 offsetof(struct ddf_header, pad2)) != 0)
718 return 0;
719
720 /* Looks good enough to me... */
721 return 1;
722}
723
724static void *load_section(int fd, struct ddf_super *super, void *buf,
725 __u32 offset_be, __u32 len_be, int check)
726{
727 unsigned long long offset = __be32_to_cpu(offset_be);
728 unsigned long long len = __be32_to_cpu(len_be);
729 int dofree = (buf == NULL);
730
731 if (check)
732 if (len != 2 && len != 8 && len != 32
733 && len != 128 && len != 512)
734 return NULL;
735
736 if (len > 1024)
737 return NULL;
738 if (buf) {
739 /* All pre-allocated sections are a single block */
740 if (len != 1)
741 return NULL;
3d2c4fc7
DW
742 } else if (posix_memalign(&buf, 512, len<<9) != 0)
743 buf = NULL;
6416d527 744
a322f70c
DW
745 if (!buf)
746 return NULL;
747
748 if (super->active->type == 1)
749 offset += __be64_to_cpu(super->active->primary_lba);
750 else
751 offset += __be64_to_cpu(super->active->secondary_lba);
752
f21e18ca 753 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
754 if (dofree)
755 free(buf);
756 return NULL;
757 }
f21e18ca 758 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
759 if (dofree)
760 free(buf);
761 return NULL;
762 }
763 return buf;
764}
765
766static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
767{
768 unsigned long long dsize;
769
770 get_dev_size(fd, NULL, &dsize);
771
772 if (lseek64(fd, dsize-512, 0) < 0) {
773 if (devname)
e7b84f9d
N
774 pr_err("Cannot seek to anchor block on %s: %s\n",
775 devname, strerror(errno));
a322f70c
DW
776 return 1;
777 }
778 if (read(fd, &super->anchor, 512) != 512) {
779 if (devname)
e7b84f9d
N
780 pr_err("Cannot read anchor block on %s: %s\n",
781 devname, strerror(errno));
a322f70c
DW
782 return 1;
783 }
784 if (super->anchor.magic != DDF_HEADER_MAGIC) {
785 if (devname)
e7b84f9d 786 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
787 devname);
788 return 2;
789 }
790 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
791 if (devname)
e7b84f9d 792 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
793 devname);
794 return 2;
795 }
59e36268
NB
796 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
797 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 798 if (devname)
e7b84f9d 799 pr_err("can only support super revision"
59e36268
NB
800 " %.8s and earlier, not %.8s on %s\n",
801 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
802 return 2;
803 }
dbeb699a 804 super->active = NULL;
a322f70c
DW
805 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
806 dsize >> 9, 1,
807 &super->primary, &super->anchor) == 0) {
808 if (devname)
e7b84f9d
N
809 pr_err("Failed to load primary DDF header "
810 "on %s\n", devname);
dbeb699a 811 } else
812 super->active = &super->primary;
a322f70c
DW
813 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
814 dsize >> 9, 2,
815 &super->secondary, &super->anchor)) {
816 if ((__be32_to_cpu(super->primary.seq)
817 < __be32_to_cpu(super->secondary.seq) &&
818 !super->secondary.openflag)
819 || (__be32_to_cpu(super->primary.seq)
820 == __be32_to_cpu(super->secondary.seq) &&
821 super->primary.openflag && !super->secondary.openflag)
dbeb699a 822 || super->active == NULL
a322f70c
DW
823 )
824 super->active = &super->secondary;
dbeb699a 825 } else if (devname)
826 pr_err("Failed to load secondary DDF header on %s\n",
827 devname);
828 if (super->active == NULL)
829 return 2;
a322f70c
DW
830 return 0;
831}
832
833static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
834{
835 void *ok;
836 ok = load_section(fd, super, &super->controller,
837 super->active->controller_section_offset,
838 super->active->controller_section_length,
839 0);
840 super->phys = load_section(fd, super, NULL,
841 super->active->phys_section_offset,
842 super->active->phys_section_length,
843 1);
844 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
845
846 super->virt = load_section(fd, super, NULL,
847 super->active->virt_section_offset,
848 super->active->virt_section_length,
849 1);
850 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
851 if (!ok ||
852 !super->phys ||
853 !super->virt) {
854 free(super->phys);
855 free(super->virt);
a2349791
NB
856 super->phys = NULL;
857 super->virt = NULL;
a322f70c
DW
858 return 2;
859 }
860 super->conflist = NULL;
861 super->dlist = NULL;
8c3b8c2c
NB
862
863 super->max_part = __be16_to_cpu(super->active->max_partitions);
864 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
865 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
866 return 0;
867}
868
3dc821b0 869static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
870 unsigned int len)
871{
872 int i;
873 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
874 if (vcl->other_bvds[i] != NULL &&
875 vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
876 break;
877
878 if (i < vcl->conf.sec_elmnt_count-1) {
879 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
880 return;
881 } else {
882 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
883 if (vcl->other_bvds[i] == NULL)
884 break;
885 if (i == vcl->conf.sec_elmnt_count-1) {
886 pr_err("no space for sec level config %u, count is %u\n",
887 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
888 return;
889 }
890 if (posix_memalign((void **)&vcl->other_bvds[i], 512, len)
891 != 0) {
892 pr_err("%s could not allocate vd buf\n", __func__);
893 return;
894 }
895 }
896 memcpy(vcl->other_bvds[i], vd, len);
897}
898
a322f70c
DW
899static int load_ddf_local(int fd, struct ddf_super *super,
900 char *devname, int keep)
901{
902 struct dl *dl;
903 struct stat stb;
904 char *conf;
f21e18ca
N
905 unsigned int i;
906 unsigned int confsec;
b2280677 907 int vnum;
f21e18ca 908 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 909 unsigned long long dsize;
a322f70c
DW
910
911 /* First the local disk info */
3d2c4fc7 912 if (posix_memalign((void**)&dl, 512,
6416d527 913 sizeof(*dl) +
3d2c4fc7 914 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 915 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
916 __func__);
917 return 1;
918 }
a322f70c
DW
919
920 load_section(fd, super, &dl->disk,
921 super->active->data_section_offset,
922 super->active->data_section_length,
923 0);
503975b9 924 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 925
a322f70c
DW
926 fstat(fd, &stb);
927 dl->major = major(stb.st_rdev);
928 dl->minor = minor(stb.st_rdev);
929 dl->next = super->dlist;
930 dl->fd = keep ? fd : -1;
d2ca6449
NB
931
932 dl->size = 0;
933 if (get_dev_size(fd, devname, &dsize))
934 dl->size = dsize >> 9;
097bcf00 935 /* If the disks have different sizes, the LBAs will differ
936 * between phys disks.
937 * At this point here, the values in super->active must be valid
938 * for this phys disk. */
939 dl->primary_lba = super->active->primary_lba;
940 dl->secondary_lba = super->active->secondary_lba;
941 dl->workspace_lba = super->active->workspace_lba;
b2280677 942 dl->spare = NULL;
f21e18ca 943 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
944 dl->vlist[i] = NULL;
945 super->dlist = dl;
59e36268 946 dl->pdnum = -1;
f21e18ca 947 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
948 if (memcmp(super->phys->entries[i].guid,
949 dl->disk.guid, DDF_GUID_LEN) == 0)
950 dl->pdnum = i;
951
a322f70c
DW
952 /* Now the config list. */
953 /* 'conf' is an array of config entries, some of which are
954 * probably invalid. Those which are good need to be copied into
955 * the conflist
956 */
a322f70c
DW
957
958 conf = load_section(fd, super, NULL,
959 super->active->config_section_offset,
960 super->active->config_section_length,
961 0);
962
b2280677 963 vnum = 0;
e223334f
N
964 for (confsec = 0;
965 confsec < __be32_to_cpu(super->active->config_section_length);
966 confsec += super->conf_rec_len) {
a322f70c 967 struct vd_config *vd =
e223334f 968 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
969 struct vcl *vcl;
970
b2280677
NB
971 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
972 if (dl->spare)
973 continue;
3d2c4fc7
DW
974 if (posix_memalign((void**)&dl->spare, 512,
975 super->conf_rec_len*512) != 0) {
e7b84f9d
N
976 pr_err("%s could not allocate spare info buf\n",
977 __func__);
3d2c4fc7
DW
978 return 1;
979 }
613b0d17 980
b2280677
NB
981 memcpy(dl->spare, vd, super->conf_rec_len*512);
982 continue;
983 }
a322f70c
DW
984 if (vd->magic != DDF_VD_CONF_MAGIC)
985 continue;
986 for (vcl = super->conflist; vcl; vcl = vcl->next) {
987 if (memcmp(vcl->conf.guid,
988 vd->guid, DDF_GUID_LEN) == 0)
989 break;
990 }
991
992 if (vcl) {
b2280677 993 dl->vlist[vnum++] = vcl;
3dc821b0 994 if (vcl->other_bvds != NULL &&
995 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
996 add_other_bvd(vcl, vd, super->conf_rec_len*512);
997 continue;
998 }
a322f70c
DW
999 if (__be32_to_cpu(vd->seqnum) <=
1000 __be32_to_cpu(vcl->conf.seqnum))
1001 continue;
59e36268 1002 } else {
3d2c4fc7 1003 if (posix_memalign((void**)&vcl, 512,
6416d527 1004 (super->conf_rec_len*512 +
3d2c4fc7 1005 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
1006 pr_err("%s could not allocate vcl buf\n",
1007 __func__);
3d2c4fc7
DW
1008 return 1;
1009 }
a322f70c 1010 vcl->next = super->conflist;
59e36268 1011 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 1012 if (vd->sec_elmnt_count > 1)
1013 vcl->other_bvds =
1014 xcalloc(vd->sec_elmnt_count - 1,
1015 sizeof(struct vd_config *));
1016 else
1017 vcl->other_bvds = NULL;
a322f70c 1018 super->conflist = vcl;
b2280677 1019 dl->vlist[vnum++] = vcl;
a322f70c 1020 }
8c3b8c2c 1021 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
a322f70c 1022 vcl->lba_offset = (__u64*)
8c3b8c2c 1023 &vcl->conf.phys_refnum[super->mppe];
59e36268
NB
1024
1025 for (i=0; i < max_virt_disks ; i++)
1026 if (memcmp(super->virt->entries[i].guid,
1027 vcl->conf.guid, DDF_GUID_LEN)==0)
1028 break;
1029 if (i < max_virt_disks)
1030 vcl->vcnum = i;
a322f70c
DW
1031 }
1032 free(conf);
1033
1034 return 0;
1035}
1036
1037#ifndef MDASSEMBLE
1038static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 1039 void **sbp, char *devname);
a322f70c 1040#endif
37424f13
DW
1041
1042static void free_super_ddf(struct supertype *st);
1043
a322f70c
DW
1044static int load_super_ddf(struct supertype *st, int fd,
1045 char *devname)
1046{
1047 unsigned long long dsize;
1048 struct ddf_super *super;
1049 int rv;
1050
a322f70c
DW
1051 if (get_dev_size(fd, devname, &dsize) == 0)
1052 return 1;
1053
b31df436 1054 if (!st->ignore_hw_compat && test_partition(fd))
691c6ee1
N
1055 /* DDF is not allowed on partitions */
1056 return 1;
1057
a322f70c
DW
1058 /* 32M is a lower bound */
1059 if (dsize <= 32*1024*1024) {
97320d7c 1060 if (devname)
e7b84f9d
N
1061 pr_err("%s is too small for ddf: "
1062 "size is %llu sectors.\n",
1063 devname, dsize>>9);
97320d7c 1064 return 1;
a322f70c
DW
1065 }
1066 if (dsize & 511) {
97320d7c 1067 if (devname)
e7b84f9d
N
1068 pr_err("%s is an odd size for ddf: "
1069 "size is %llu bytes.\n",
1070 devname, dsize);
97320d7c 1071 return 1;
a322f70c
DW
1072 }
1073
37424f13
DW
1074 free_super_ddf(st);
1075
6416d527 1076 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 1077 pr_err("malloc of %zu failed.\n",
a322f70c
DW
1078 sizeof(*super));
1079 return 1;
1080 }
a2349791 1081 memset(super, 0, sizeof(*super));
a322f70c
DW
1082
1083 rv = load_ddf_headers(fd, super, devname);
1084 if (rv) {
1085 free(super);
1086 return rv;
1087 }
1088
1089 /* Have valid headers and have chosen the best. Let's read in the rest*/
1090
1091 rv = load_ddf_global(fd, super, devname);
1092
1093 if (rv) {
1094 if (devname)
e7b84f9d
N
1095 pr_err("Failed to load all information "
1096 "sections on %s\n", devname);
a322f70c
DW
1097 free(super);
1098 return rv;
1099 }
1100
3d2c4fc7
DW
1101 rv = load_ddf_local(fd, super, devname, 0);
1102
1103 if (rv) {
1104 if (devname)
e7b84f9d
N
1105 pr_err("Failed to load all information "
1106 "sections on %s\n", devname);
3d2c4fc7
DW
1107 free(super);
1108 return rv;
1109 }
a322f70c
DW
1110
1111 /* Should possibly check the sections .... */
1112
1113 st->sb = super;
1114 if (st->ss == NULL) {
1115 st->ss = &super_ddf;
1116 st->minor_version = 0;
1117 st->max_devs = 512;
1118 }
1119 return 0;
1120
1121}
1122
1123static void free_super_ddf(struct supertype *st)
1124{
1125 struct ddf_super *ddf = st->sb;
1126 if (ddf == NULL)
1127 return;
1128 free(ddf->phys);
1129 free(ddf->virt);
1130 while (ddf->conflist) {
1131 struct vcl *v = ddf->conflist;
1132 ddf->conflist = v->next;
59e36268
NB
1133 if (v->block_sizes)
1134 free(v->block_sizes);
3dc821b0 1135 if (v->other_bvds) {
1136 int i;
1137 for (i = 0; i < v->conf.sec_elmnt_count-1; i++)
1138 if (v->other_bvds[i] != NULL)
1139 free(v->other_bvds[i]);
8ec5d685 1140 free(v->other_bvds);
3dc821b0 1141 }
a322f70c
DW
1142 free(v);
1143 }
1144 while (ddf->dlist) {
1145 struct dl *d = ddf->dlist;
1146 ddf->dlist = d->next;
1147 if (d->fd >= 0)
1148 close(d->fd);
b2280677
NB
1149 if (d->spare)
1150 free(d->spare);
a322f70c
DW
1151 free(d);
1152 }
8a38cb04
N
1153 while (ddf->add_list) {
1154 struct dl *d = ddf->add_list;
1155 ddf->add_list = d->next;
1156 if (d->fd >= 0)
1157 close(d->fd);
1158 if (d->spare)
1159 free(d->spare);
1160 free(d);
1161 }
a322f70c
DW
1162 free(ddf);
1163 st->sb = NULL;
1164}
1165
1166static struct supertype *match_metadata_desc_ddf(char *arg)
1167{
1168 /* 'ddf' only support containers */
1169 struct supertype *st;
1170 if (strcmp(arg, "ddf") != 0 &&
1171 strcmp(arg, "default") != 0
1172 )
1173 return NULL;
1174
503975b9 1175 st = xcalloc(1, sizeof(*st));
a322f70c
DW
1176 st->ss = &super_ddf;
1177 st->max_devs = 512;
1178 st->minor_version = 0;
1179 st->sb = NULL;
1180 return st;
1181}
1182
a322f70c
DW
1183#ifndef MDASSEMBLE
1184
1185static mapping_t ddf_state[] = {
1186 { "Optimal", 0},
1187 { "Degraded", 1},
1188 { "Deleted", 2},
1189 { "Missing", 3},
1190 { "Failed", 4},
1191 { "Partially Optimal", 5},
1192 { "-reserved-", 6},
1193 { "-reserved-", 7},
1194 { NULL, 0}
1195};
1196
1197static mapping_t ddf_init_state[] = {
1198 { "Not Initialised", 0},
1199 { "QuickInit in Progress", 1},
1200 { "Fully Initialised", 2},
1201 { "*UNKNOWN*", 3},
1202 { NULL, 0}
1203};
1204static mapping_t ddf_access[] = {
1205 { "Read/Write", 0},
1206 { "Reserved", 1},
1207 { "Read Only", 2},
1208 { "Blocked (no access)", 3},
1209 { NULL ,0}
1210};
1211
1212static mapping_t ddf_level[] = {
1213 { "RAID0", DDF_RAID0},
1214 { "RAID1", DDF_RAID1},
1215 { "RAID3", DDF_RAID3},
1216 { "RAID4", DDF_RAID4},
1217 { "RAID5", DDF_RAID5},
1218 { "RAID1E",DDF_RAID1E},
1219 { "JBOD", DDF_JBOD},
1220 { "CONCAT",DDF_CONCAT},
1221 { "RAID5E",DDF_RAID5E},
1222 { "RAID5EE",DDF_RAID5EE},
1223 { "RAID6", DDF_RAID6},
1224 { NULL, 0}
1225};
1226static mapping_t ddf_sec_level[] = {
1227 { "Striped", DDF_2STRIPED},
1228 { "Mirrored", DDF_2MIRRORED},
1229 { "Concat", DDF_2CONCAT},
1230 { "Spanned", DDF_2SPANNED},
1231 { NULL, 0}
1232};
1233#endif
1234
fb9d0acb 1235static int all_ff(const char *guid)
42dc2744
N
1236{
1237 int i;
1238 for (i = 0; i < DDF_GUID_LEN; i++)
1239 if (guid[i] != (char)0xff)
1240 return 0;
1241 return 1;
1242}
1243
a322f70c
DW
1244#ifndef MDASSEMBLE
1245static void print_guid(char *guid, int tstamp)
1246{
1247 /* A GUIDs are part (or all) ASCII and part binary.
1248 * They tend to be space padded.
59e36268
NB
1249 * We print the GUID in HEX, then in parentheses add
1250 * any initial ASCII sequence, and a possible
1251 * time stamp from bytes 16-19
a322f70c
DW
1252 */
1253 int l = DDF_GUID_LEN;
1254 int i;
59e36268
NB
1255
1256 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1257 if ((i&3)==0 && i != 0) printf(":");
1258 printf("%02X", guid[i]&255);
1259 }
1260
cfccea8c 1261 printf("\n (");
a322f70c
DW
1262 while (l && guid[l-1] == ' ')
1263 l--;
1264 for (i=0 ; i<l ; i++) {
1265 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1266 fputc(guid[i], stdout);
1267 else
59e36268 1268 break;
a322f70c
DW
1269 }
1270 if (tstamp) {
1271 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1272 char tbuf[100];
1273 struct tm *tm;
1274 tm = localtime(&then);
59e36268 1275 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1276 fputs(tbuf, stdout);
1277 }
59e36268 1278 printf(")");
a322f70c
DW
1279}
1280
1281static void examine_vd(int n, struct ddf_super *sb, char *guid)
1282{
8c3b8c2c 1283 int crl = sb->conf_rec_len;
a322f70c
DW
1284 struct vcl *vcl;
1285
1286 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1287 unsigned int i;
a322f70c
DW
1288 struct vd_config *vc = &vcl->conf;
1289
1290 if (calc_crc(vc, crl*512) != vc->crc)
1291 continue;
1292 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1293 continue;
1294
1295 /* Ok, we know about this VD, let's give more details */
b06e3095 1296 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1297 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1298 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1299 int j;
1300 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1301 for (j=0; j<cnt; j++)
1302 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1303 break;
1304 if (i) printf(" ");
1305 if (j < cnt)
1306 printf("%d", j);
1307 else
1308 printf("--");
1309 }
1310 printf(")\n");
1311 if (vc->chunk_shift != 255)
613b0d17
N
1312 printf(" Chunk Size[%d] : %d sectors\n", n,
1313 1 << vc->chunk_shift);
a322f70c
DW
1314 printf(" Raid Level[%d] : %s\n", n,
1315 map_num(ddf_level, vc->prl)?:"-unknown-");
1316 if (vc->sec_elmnt_count != 1) {
1317 printf(" Secondary Position[%d] : %d of %d\n", n,
1318 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1319 printf(" Secondary Level[%d] : %s\n", n,
1320 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1321 }
1322 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1323 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1324 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1325 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1326 }
1327}
1328
1329static void examine_vds(struct ddf_super *sb)
1330{
1331 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
fb9d0acb 1332 unsigned int i;
a322f70c
DW
1333 printf(" Virtual Disks : %d\n", cnt);
1334
fb9d0acb 1335 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
a322f70c 1336 struct virtual_entry *ve = &sb->virt->entries[i];
fb9d0acb 1337 if (all_ff(ve->guid))
1338 continue;
b06e3095 1339 printf("\n");
a322f70c
DW
1340 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1341 printf("\n");
1342 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1343 printf(" state[%d] : %s, %s%s\n", i,
1344 map_num(ddf_state, ve->state & 7),
1345 (ve->state & 8) ? "Morphing, ": "",
1346 (ve->state & 16)? "Not Consistent" : "Consistent");
1347 printf(" init state[%d] : %s\n", i,
1348 map_num(ddf_init_state, ve->init_state&3));
1349 printf(" access[%d] : %s\n", i,
1350 map_num(ddf_access, (ve->init_state>>6) & 3));
1351 printf(" Name[%d] : %.16s\n", i, ve->name);
1352 examine_vd(i, sb, ve->guid);
1353 }
1354 if (cnt) printf("\n");
1355}
1356
1357static void examine_pds(struct ddf_super *sb)
1358{
1359 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1360 int i;
1361 struct dl *dl;
1362 printf(" Physical Disks : %d\n", cnt);
962371a5 1363 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1364
1365 for (i=0 ; i<cnt ; i++) {
1366 struct phys_disk_entry *pd = &sb->phys->entries[i];
1367 int type = __be16_to_cpu(pd->type);
1368 int state = __be16_to_cpu(pd->state);
1369
b06e3095
N
1370 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1371 //printf("\n");
1372 printf(" %3d %08x ", i,
a322f70c 1373 __be32_to_cpu(pd->refnum));
613b0d17 1374 printf("%8lluK ",
c9b6907b 1375 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095
N
1376 for (dl = sb->dlist; dl ; dl = dl->next) {
1377 if (dl->disk.refnum == pd->refnum) {
1378 char *dv = map_dev(dl->major, dl->minor, 0);
1379 if (dv) {
962371a5 1380 printf("%-15s", dv);
b06e3095
N
1381 break;
1382 }
1383 }
1384 }
1385 if (!dl)
962371a5 1386 printf("%15s","");
b06e3095 1387 printf(" %s%s%s%s%s",
a322f70c 1388 (type&2) ? "active":"",
b06e3095 1389 (type&4) ? "Global-Spare":"",
a322f70c
DW
1390 (type&8) ? "spare" : "",
1391 (type&16)? ", foreign" : "",
1392 (type&32)? "pass-through" : "");
18cb4496
N
1393 if (state & DDF_Failed)
1394 /* This over-rides these three */
1395 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1396 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1397 (state&1)? "Online": "Offline",
1398 (state&2)? ", Failed": "",
1399 (state&4)? ", Rebuilding": "",
1400 (state&8)? ", in-transition": "",
b06e3095
N
1401 (state&16)? ", SMART-errors": "",
1402 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1403 (state&64)? ", Missing" : "");
a322f70c
DW
1404 printf("\n");
1405 }
1406}
1407
1408static void examine_super_ddf(struct supertype *st, char *homehost)
1409{
1410 struct ddf_super *sb = st->sb;
1411
1412 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1413 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1414 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1415 printf("\n");
1416 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c
DW
1417 printf("\n");
1418 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1419 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1420 ?"yes" : "no");
1421 examine_vds(sb);
1422 examine_pds(sb);
1423}
1424
a5d85af7 1425static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1426
bedbf68a 1427static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1428static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1429
bedbf68a 1430static unsigned int get_vd_num_of_subarray(struct supertype *st)
1431{
1432 /*
1433 * Figure out the VD number for this supertype.
1434 * Returns DDF_CONTAINER for the container itself,
1435 * and DDF_NOTFOUND on error.
1436 */
1437 struct ddf_super *ddf = st->sb;
1438 struct mdinfo *sra;
1439 char *sub, *end;
1440 unsigned int vcnum;
1441
1442 if (*st->container_devnm == '\0')
1443 return DDF_CONTAINER;
1444
1445 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1446 if (!sra || sra->array.major_version != -1 ||
1447 sra->array.minor_version != -2 ||
1448 !is_subarray(sra->text_version))
1449 return DDF_NOTFOUND;
1450
1451 sub = strchr(sra->text_version + 1, '/');
1452 if (sub != NULL)
1453 vcnum = strtoul(sub + 1, &end, 10);
1454 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1455 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1456 return DDF_NOTFOUND;
1457
1458 return vcnum;
1459}
1460
061f2c6a 1461static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1462{
1463 /* We just write a generic DDF ARRAY entry
1464 */
1465 struct mdinfo info;
1466 char nbuf[64];
a5d85af7 1467 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1468 fname_from_uuid(st, &info, nbuf, ':');
1469
1470 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1471}
1472
1473static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1474{
1475 /* We just write a generic DDF ARRAY entry
a322f70c 1476 */
42dc2744 1477 struct ddf_super *ddf = st->sb;
ff54de6e 1478 struct mdinfo info;
f21e18ca 1479 unsigned int i;
ff54de6e 1480 char nbuf[64];
a5d85af7 1481 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1482 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1483
f21e18ca 1484 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1485 struct virtual_entry *ve = &ddf->virt->entries[i];
1486 struct vcl vcl;
1487 char nbuf1[64];
1488 if (all_ff(ve->guid))
1489 continue;
1490 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1491 ddf->currentconf =&vcl;
1492 uuid_from_super_ddf(st, info.uuid);
1493 fname_from_uuid(st, &info, nbuf1, ':');
1494 printf("ARRAY container=%s member=%d UUID=%s\n",
1495 nbuf+5, i, nbuf1+5);
1496 }
a322f70c
DW
1497}
1498
bceedeec
N
1499static void export_examine_super_ddf(struct supertype *st)
1500{
1501 struct mdinfo info;
1502 char nbuf[64];
a5d85af7 1503 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1504 fname_from_uuid(st, &info, nbuf, ':');
1505 printf("MD_METADATA=ddf\n");
1506 printf("MD_LEVEL=container\n");
1507 printf("MD_UUID=%s\n", nbuf+5);
1508}
bceedeec 1509
74db60b0
N
1510static int copy_metadata_ddf(struct supertype *st, int from, int to)
1511{
1512 void *buf;
1513 unsigned long long dsize, offset;
1514 int bytes;
1515 struct ddf_header *ddf;
1516 int written = 0;
1517
1518 /* The meta consists of an anchor, a primary, and a secondary.
1519 * This all lives at the end of the device.
1520 * So it is easiest to find the earliest of primary and
1521 * secondary, and copy everything from there.
1522 *
1523 * Anchor is 512 from end It contains primary_lba and secondary_lba
1524 * we choose one of those
1525 */
1526
1527 if (posix_memalign(&buf, 4096, 4096) != 0)
1528 return 1;
1529
1530 if (!get_dev_size(from, NULL, &dsize))
1531 goto err;
1532
1533 if (lseek64(from, dsize-512, 0) < 0)
1534 goto err;
1535 if (read(from, buf, 512) != 512)
1536 goto err;
1537 ddf = buf;
1538 if (ddf->magic != DDF_HEADER_MAGIC ||
1539 calc_crc(ddf, 512) != ddf->crc ||
1540 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1541 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1542 goto err;
1543
1544 offset = dsize - 512;
1545 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1546 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1547 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1548 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1549
1550 bytes = dsize - offset;
1551
1552 if (lseek64(from, offset, 0) < 0 ||
1553 lseek64(to, offset, 0) < 0)
1554 goto err;
1555 while (written < bytes) {
1556 int n = bytes - written;
1557 if (n > 4096)
1558 n = 4096;
1559 if (read(from, buf, n) != n)
1560 goto err;
1561 if (write(to, buf, n) != n)
1562 goto err;
1563 written += n;
1564 }
1565 free(buf);
1566 return 0;
1567err:
1568 free(buf);
1569 return 1;
1570}
1571
a322f70c
DW
1572static void detail_super_ddf(struct supertype *st, char *homehost)
1573{
1574 /* FIXME later
1575 * Could print DDF GUID
1576 * Need to find which array
1577 * If whole, briefly list all arrays
1578 * If one, give name
1579 */
1580}
1581
1582static void brief_detail_super_ddf(struct supertype *st)
1583{
ff54de6e
N
1584 struct mdinfo info;
1585 char nbuf[64];
bedbf68a 1586 struct ddf_super *ddf = st->sb;
1587 unsigned int vcnum = get_vd_num_of_subarray(st);
1588 if (vcnum == DDF_CONTAINER)
1589 uuid_from_super_ddf(st, info.uuid);
1590 else if (vcnum == DDF_NOTFOUND)
1591 return;
1592 else
1593 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
ff54de6e
N
1594 fname_from_uuid(st, &info, nbuf,':');
1595 printf(" UUID=%s", nbuf + 5);
a322f70c 1596}
a322f70c
DW
1597#endif
1598
1599static int match_home_ddf(struct supertype *st, char *homehost)
1600{
1601 /* It matches 'this' host if the controller is a
1602 * Linux-MD controller with vendor_data matching
1603 * the hostname
1604 */
1605 struct ddf_super *ddf = st->sb;
f21e18ca 1606 unsigned int len;
d1d3482b
N
1607
1608 if (!homehost)
1609 return 0;
1610 len = strlen(homehost);
a322f70c
DW
1611
1612 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1613 len < sizeof(ddf->controller.vendor_data) &&
1614 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1615 ddf->controller.vendor_data[len] == 0);
1616}
1617
0e600426 1618#ifndef MDASSEMBLE
baba3f4e 1619static int find_index_in_bvd(const struct ddf_super *ddf,
1620 const struct vd_config *conf, unsigned int n,
1621 unsigned int *n_bvd)
1622{
1623 /*
1624 * Find the index of the n-th valid physical disk in this BVD
1625 */
1626 unsigned int i, j;
1627 for (i = 0, j = 0; i < ddf->mppe &&
1628 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1629 if (conf->phys_refnum[i] != 0xffffffff) {
1630 if (n == j) {
1631 *n_bvd = i;
1632 return 1;
1633 }
1634 j++;
1635 }
1636 }
1637 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1638 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1639 return 0;
1640}
1641
1642static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1643 unsigned int n,
1644 unsigned int *n_bvd, struct vcl **vcl)
a322f70c 1645{
7a7cc504 1646 struct vcl *v;
59e36268 1647
baba3f4e 1648 for (v = ddf->conflist; v; v = v->next) {
1649 unsigned int nsec, ibvd;
1650 struct vd_config *conf;
1651 if (inst != v->vcnum)
1652 continue;
1653 conf = &v->conf;
1654 if (conf->sec_elmnt_count == 1) {
1655 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1656 *vcl = v;
1657 return conf;
1658 } else
1659 goto bad;
1660 }
1661 if (v->other_bvds == NULL) {
1662 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1663 __func__, conf->sec_elmnt_count);
1664 goto bad;
1665 }
1666 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1667 if (conf->sec_elmnt_seq != nsec) {
1668 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
1669 if (v->other_bvds[ibvd-1] == NULL)
1670 continue;
1671 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1672 == nsec)
1673 break;
1674 }
1675 if (ibvd == conf->sec_elmnt_count)
1676 goto bad;
1677 conf = v->other_bvds[ibvd-1];
1678 }
1679 if (!find_index_in_bvd(ddf, conf,
1680 n - nsec*conf->sec_elmnt_count, n_bvd))
1681 goto bad;
1682 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1683 , __func__, n, *n_bvd, ibvd-1, inst);
1684 *vcl = v;
1685 return conf;
1686 }
1687bad:
1688 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
7a7cc504
NB
1689 return NULL;
1690}
0e600426 1691#endif
7a7cc504 1692
5ec636b7 1693static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
7a7cc504
NB
1694{
1695 /* Find the entry in phys_disk which has the given refnum
1696 * and return it's index
1697 */
f21e18ca
N
1698 unsigned int i;
1699 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
7a7cc504
NB
1700 if (ddf->phys->entries[i].refnum == phys_refnum)
1701 return i;
1702 return -1;
a322f70c
DW
1703}
1704
bedbf68a 1705static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1706{
1707 char buf[20];
1708 struct sha1_ctx ctx;
1709 sha1_init_ctx(&ctx);
1710 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1711 sha1_finish_ctx(&ctx, buf);
1712 memcpy(uuid, buf, 4*4);
1713}
1714
a322f70c
DW
1715static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1716{
1717 /* The uuid returned here is used for:
1718 * uuid to put into bitmap file (Create, Grow)
1719 * uuid for backup header when saving critical section (Grow)
1720 * comparing uuids when re-adding a device into an array
51006d85
N
1721 * In these cases the uuid required is that of the data-array,
1722 * not the device-set.
1723 * uuid to recognise same set when adding a missing device back
1724 * to an array. This is a uuid for the device-set.
613b0d17 1725 *
a322f70c
DW
1726 * For each of these we can make do with a truncated
1727 * or hashed uuid rather than the original, as long as
1728 * everyone agrees.
a322f70c
DW
1729 * In the case of SVD we assume the BVD is of interest,
1730 * though that might be the case if a bitmap were made for
1731 * a mirrored SVD - worry about that later.
1732 * So we need to find the VD configuration record for the
1733 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1734 * The first 16 bytes of the sha1 of these is used.
1735 */
1736 struct ddf_super *ddf = st->sb;
d2ca6449 1737 struct vcl *vcl = ddf->currentconf;
c5afc314 1738 char *guid;
a322f70c 1739
c5afc314
N
1740 if (vcl)
1741 guid = vcl->conf.guid;
1742 else
1743 guid = ddf->anchor.guid;
bedbf68a 1744 uuid_from_ddf_guid(guid, uuid);
a322f70c
DW
1745}
1746
a5d85af7 1747static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1748
a5d85af7 1749static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1750{
1751 struct ddf_super *ddf = st->sb;
a5d85af7 1752 int map_disks = info->array.raid_disks;
90fa1a29 1753 __u32 *cptr;
a322f70c 1754
78e44928 1755 if (ddf->currentconf) {
a5d85af7 1756 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1757 return;
1758 }
95eeceeb 1759 memset(info, 0, sizeof(*info));
78e44928 1760
a322f70c
DW
1761 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1762 info->array.level = LEVEL_CONTAINER;
1763 info->array.layout = 0;
1764 info->array.md_minor = -1;
90fa1a29
JS
1765 cptr = (__u32 *)(ddf->anchor.guid + 16);
1766 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1767
a322f70c
DW
1768 info->array.utime = 0;
1769 info->array.chunk_size = 0;
510242aa 1770 info->container_enough = 1;
a322f70c 1771
a322f70c
DW
1772 info->disk.major = 0;
1773 info->disk.minor = 0;
cba0191b
NB
1774 if (ddf->dlist) {
1775 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1776 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1777
1778 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1779 entries[info->disk.raid_disk].
1780 config_size);
d2ca6449 1781 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1782 } else {
1783 info->disk.number = -1;
661dce36 1784 info->disk.raid_disk = -1;
cba0191b
NB
1785// info->disk.raid_disk = find refnum in the table and use index;
1786 }
f22385f9 1787 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1788
921d9e16 1789 info->recovery_start = MaxSector;
a19c88b8 1790 info->reshape_active = 0;
6e75048b 1791 info->recovery_blocked = 0;
c5afc314 1792 info->name[0] = 0;
a322f70c 1793
f35f2525
N
1794 info->array.major_version = -1;
1795 info->array.minor_version = -2;
159c3a1a 1796 strcpy(info->text_version, "ddf");
a67dd8cc 1797 info->safe_mode_delay = 0;
159c3a1a 1798
c5afc314 1799 uuid_from_super_ddf(st, info->uuid);
a322f70c 1800
a5d85af7
N
1801 if (map) {
1802 int i;
1803 for (i = 0 ; i < map_disks; i++) {
1804 if (i < info->array.raid_disks &&
1805 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1806 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1807 map[i] = 1;
1808 else
1809 map[i] = 0;
1810 }
1811 }
a322f70c
DW
1812}
1813
a5d85af7 1814static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1815{
1816 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1817 struct vcl *vc = ddf->currentconf;
1818 int cd = ddf->currentdev;
db42fa9b 1819 int j;
8592f29d 1820 struct dl *dl;
a5d85af7 1821 int map_disks = info->array.raid_disks;
90fa1a29 1822 __u32 *cptr;
a322f70c 1823
95eeceeb 1824 memset(info, 0, sizeof(*info));
8a2848a7 1825 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1826 return;
a322f70c 1827 info->array.md_minor = -1;
90fa1a29
JS
1828 cptr = (__u32 *)(vc->conf.guid + 16);
1829 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
d2ca6449
NB
1830 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1831 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1832 info->custom_array_size = 0;
d2ca6449 1833
f21e18ca 1834 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
d2ca6449
NB
1835 info->data_offset = __be64_to_cpu(vc->lba_offset[cd]);
1836 if (vc->block_sizes)
1837 info->component_size = vc->block_sizes[cd];
1838 else
1839 info->component_size = __be64_to_cpu(vc->conf.blocks);
1840 }
a322f70c 1841
fb204fb2
N
1842 for (dl = ddf->dlist; dl ; dl = dl->next)
1843 if (dl->raiddisk == ddf->currentdev)
1844 break;
1845
a322f70c
DW
1846 info->disk.major = 0;
1847 info->disk.minor = 0;
fb204fb2 1848 info->disk.state = 0;
8592f29d
N
1849 if (dl) {
1850 info->disk.major = dl->major;
1851 info->disk.minor = dl->minor;
fb204fb2
N
1852 info->disk.raid_disk = dl->raiddisk;
1853 info->disk.number = dl->pdnum;
1854 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1855 }
a322f70c 1856
103f2410
NB
1857 info->container_member = ddf->currentconf->vcnum;
1858
921d9e16 1859 info->recovery_start = MaxSector;
80d26cb2 1860 info->resync_start = 0;
624c5ad4 1861 info->reshape_active = 0;
6e75048b 1862 info->recovery_blocked = 0;
80d26cb2
NB
1863 if (!(ddf->virt->entries[info->container_member].state
1864 & DDF_state_inconsistent) &&
1865 (ddf->virt->entries[info->container_member].init_state
1866 & DDF_initstate_mask)
1867 == DDF_init_full)
b7528a20 1868 info->resync_start = MaxSector;
80d26cb2 1869
a322f70c
DW
1870 uuid_from_super_ddf(st, info->uuid);
1871
f35f2525
N
1872 info->array.major_version = -1;
1873 info->array.minor_version = -2;
9b63e648 1874 sprintf(info->text_version, "/%s/%d",
4dd2df09 1875 st->container_devnm,
9b63e648 1876 info->container_member);
a67dd8cc 1877 info->safe_mode_delay = 200;
159c3a1a 1878
db42fa9b
N
1879 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1880 info->name[16]=0;
1881 for(j=0; j<16; j++)
1882 if (info->name[j] == ' ')
1883 info->name[j] = 0;
a5d85af7
N
1884
1885 if (map)
1886 for (j = 0; j < map_disks; j++) {
1887 map[j] = 0;
1888 if (j < info->array.raid_disks) {
1889 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1890 if (i >= 0 &&
a5d85af7
N
1891 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1892 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1893 map[i] = 1;
1894 }
1895 }
a322f70c
DW
1896}
1897
1898static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1899 char *update,
1900 char *devname, int verbose,
1901 int uuid_set, char *homehost)
1902{
1903 /* For 'assemble' and 'force' we need to return non-zero if any
1904 * change was made. For others, the return value is ignored.
1905 * Update options are:
1906 * force-one : This device looks a bit old but needs to be included,
1907 * update age info appropriately.
1908 * assemble: clear any 'faulty' flag to allow this device to
1909 * be assembled.
1910 * force-array: Array is degraded but being forced, mark it clean
1911 * if that will be needed to assemble it.
1912 *
1913 * newdev: not used ????
1914 * grow: Array has gained a new device - this is currently for
1915 * linear only
1916 * resync: mark as dirty so a resync will happen.
59e36268 1917 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
1918 * homehost: update the recorded homehost
1919 * name: update the name - preserving the homehost
1920 * _reshape_progress: record new reshape_progress position.
1921 *
1922 * Following are not relevant for this version:
1923 * sparc2.2 : update from old dodgey metadata
1924 * super-minor: change the preferred_minor number
1925 * summaries: update redundant counters.
1926 */
1927 int rv = 0;
1928// struct ddf_super *ddf = st->sb;
7a7cc504 1929// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
1930// struct virtual_entry *ve = find_ve(ddf);
1931
a322f70c
DW
1932 /* we don't need to handle "force-*" or "assemble" as
1933 * there is no need to 'trick' the kernel. We the metadata is
1934 * first updated to activate the array, all the implied modifications
1935 * will just happen.
1936 */
1937
1938 if (strcmp(update, "grow") == 0) {
1939 /* FIXME */
1e2b2765 1940 } else if (strcmp(update, "resync") == 0) {
a322f70c 1941// info->resync_checkpoint = 0;
1e2b2765 1942 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
1943 /* homehost is stored in controller->vendor_data,
1944 * or it is when we are the vendor
1945 */
1946// if (info->vendor_is_local)
1947// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 1948 rv = -1;
f49208ec 1949 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
1950 /* name is stored in virtual_entry->name */
1951// memset(ve->name, ' ', 16);
1952// strncpy(ve->name, info->name, 16);
1e2b2765 1953 rv = -1;
f49208ec 1954 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 1955 /* We don't support reshape yet */
f49208ec
N
1956 } else if (strcmp(update, "assemble") == 0 ) {
1957 /* Do nothing, just succeed */
1958 rv = 0;
1e2b2765
N
1959 } else
1960 rv = -1;
a322f70c
DW
1961
1962// update_all_csum(ddf);
1963
1964 return rv;
1965}
1966
5f8097be
NB
1967static void make_header_guid(char *guid)
1968{
1969 __u32 stamp;
5f8097be
NB
1970 /* Create a DDF Header of Virtual Disk GUID */
1971
1972 /* 24 bytes of fiction required.
1973 * first 8 are a 'vendor-id' - "Linux-MD"
1974 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
1975 * Remaining 8 random number plus timestamp
1976 */
1977 memcpy(guid, T10, sizeof(T10));
1978 stamp = __cpu_to_be32(0xdeadbeef);
1979 memcpy(guid+8, &stamp, 4);
1980 stamp = __cpu_to_be32(0);
1981 memcpy(guid+12, &stamp, 4);
1982 stamp = __cpu_to_be32(time(0) - DECADE);
1983 memcpy(guid+16, &stamp, 4);
bfb7ea78 1984 stamp = random32();
5f8097be 1985 memcpy(guid+20, &stamp, 4);
5f8097be 1986}
59e36268 1987
fb9d0acb 1988static unsigned int find_unused_vde(const struct ddf_super *ddf)
1989{
1990 unsigned int i;
1991 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1992 if (all_ff(ddf->virt->entries[i].guid))
1993 return i;
1994 }
1995 return DDF_NOTFOUND;
1996}
1997
1998static unsigned int find_vde_by_name(const struct ddf_super *ddf,
1999 const char *name)
2000{
2001 unsigned int i;
2002 if (name == NULL)
2003 return DDF_NOTFOUND;
2004 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2005 if (all_ff(ddf->virt->entries[i].guid))
2006 continue;
2007 if (!strncmp(name, ddf->virt->entries[i].name,
2008 sizeof(ddf->virt->entries[i].name)))
2009 return i;
2010 }
2011 return DDF_NOTFOUND;
2012}
2013
2014static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2015 const char *guid)
2016{
2017 unsigned int i;
2018 if (guid == NULL || all_ff(guid))
2019 return DDF_NOTFOUND;
2020 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2021 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2022 return i;
2023 return DDF_NOTFOUND;
2024}
2025
78e44928
NB
2026static int init_super_ddf_bvd(struct supertype *st,
2027 mdu_array_info_t *info,
2028 unsigned long long size,
2029 char *name, char *homehost,
83cd1e97 2030 int *uuid, unsigned long long data_offset);
78e44928 2031
a322f70c
DW
2032static int init_super_ddf(struct supertype *st,
2033 mdu_array_info_t *info,
2034 unsigned long long size, char *name, char *homehost,
83cd1e97 2035 int *uuid, unsigned long long data_offset)
a322f70c
DW
2036{
2037 /* This is primarily called by Create when creating a new array.
2038 * We will then get add_to_super called for each component, and then
2039 * write_init_super called to write it out to each device.
2040 * For DDF, Create can create on fresh devices or on a pre-existing
2041 * array.
2042 * To create on a pre-existing array a different method will be called.
2043 * This one is just for fresh drives.
2044 *
2045 * We need to create the entire 'ddf' structure which includes:
2046 * DDF headers - these are easy.
2047 * Controller data - a Sector describing this controller .. not that
2048 * this is a controller exactly.
2049 * Physical Disk Record - one entry per device, so
2050 * leave plenty of space.
2051 * Virtual Disk Records - again, just leave plenty of space.
2052 * This just lists VDs, doesn't give details
2053 * Config records - describes the VDs that use this disk
2054 * DiskData - describes 'this' device.
2055 * BadBlockManagement - empty
2056 * Diag Space - empty
2057 * Vendor Logs - Could we put bitmaps here?
2058 *
2059 */
2060 struct ddf_super *ddf;
2061 char hostname[17];
2062 int hostlen;
a322f70c
DW
2063 int max_phys_disks, max_virt_disks;
2064 unsigned long long sector;
2065 int clen;
2066 int i;
2067 int pdsize, vdsize;
2068 struct phys_disk *pd;
2069 struct virtual_disk *vd;
2070
83cd1e97 2071 if (data_offset != INVALID_SECTORS) {
ed503f89 2072 pr_err("data-offset not supported by DDF\n");
83cd1e97
N
2073 return 0;
2074 }
2075
78e44928 2076 if (st->sb)
83cd1e97
N
2077 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2078 data_offset);
ba7eb04f 2079
3d2c4fc7 2080 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 2081 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
2082 return 0;
2083 }
6264b437 2084 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
2085 ddf->dlist = NULL; /* no physical disks yet */
2086 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
2087 st->sb = ddf;
2088
2089 if (info == NULL) {
2090 /* zeroing superblock */
2091 return 0;
2092 }
a322f70c
DW
2093
2094 /* At least 32MB *must* be reserved for the ddf. So let's just
2095 * start 32MB from the end, and put the primary header there.
2096 * Don't do secondary for now.
2097 * We don't know exactly where that will be yet as it could be
2098 * different on each device. To just set up the lengths.
2099 *
2100 */
2101
2102 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 2103 make_header_guid(ddf->anchor.guid);
a322f70c 2104
59e36268 2105 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
a322f70c
DW
2106 ddf->anchor.seq = __cpu_to_be32(1);
2107 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2108 ddf->anchor.openflag = 0xFF;
2109 ddf->anchor.foreignflag = 0;
2110 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2111 ddf->anchor.pad0 = 0xff;
2112 memset(ddf->anchor.pad1, 0xff, 12);
2113 memset(ddf->anchor.header_ext, 0xff, 32);
2114 ddf->anchor.primary_lba = ~(__u64)0;
2115 ddf->anchor.secondary_lba = ~(__u64)0;
2116 ddf->anchor.type = DDF_HEADER_ANCHOR;
2117 memset(ddf->anchor.pad2, 0xff, 3);
2118 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2119 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2120 of 32M reserved.. */
2121 max_phys_disks = 1023; /* Should be enough */
2122 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2123 max_virt_disks = 255;
2124 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2125 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2126 ddf->max_part = 64;
8c3b8c2c 2127 ddf->mppe = 256;
59e36268
NB
2128 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2129 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2130 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 2131 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
2132 /* controller sections is one sector long immediately
2133 * after the ddf header */
2134 sector = 1;
2135 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2136 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2137 sector += 1;
2138
2139 /* phys is 8 sectors after that */
2140 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2141 sizeof(struct phys_disk_entry)*max_phys_disks,
2142 512);
2143 switch(pdsize/512) {
2144 case 2: case 8: case 32: case 128: case 512: break;
2145 default: abort();
2146 }
2147 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2148 ddf->anchor.phys_section_length =
2149 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2150 sector += pdsize/512;
2151
2152 /* virt is another 32 sectors */
2153 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2154 sizeof(struct virtual_entry) * max_virt_disks,
2155 512);
2156 switch(vdsize/512) {
2157 case 2: case 8: case 32: case 128: case 512: break;
2158 default: abort();
2159 }
2160 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2161 ddf->anchor.virt_section_length =
2162 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2163 sector += vdsize/512;
2164
59e36268 2165 clen = ddf->conf_rec_len * (ddf->max_part+1);
a322f70c
DW
2166 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2167 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2168 sector += clen;
2169
2170 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2171 ddf->anchor.data_section_length = __cpu_to_be32(1);
2172 sector += 1;
2173
2174 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2175 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2176 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2177 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2178 ddf->anchor.vendor_length = __cpu_to_be32(0);
2179 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2180
2181 memset(ddf->anchor.pad4, 0xff, 256);
2182
2183 memcpy(&ddf->primary, &ddf->anchor, 512);
2184 memcpy(&ddf->secondary, &ddf->anchor, 512);
2185
2186 ddf->primary.openflag = 1; /* I guess.. */
2187 ddf->primary.type = DDF_HEADER_PRIMARY;
2188
2189 ddf->secondary.openflag = 1; /* I guess.. */
2190 ddf->secondary.type = DDF_HEADER_SECONDARY;
2191
2192 ddf->active = &ddf->primary;
2193
2194 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2195
2196 /* 24 more bytes of fiction required.
2197 * first 8 are a 'vendor-id' - "Linux-MD"
2198 * Remaining 16 are serial number.... maybe a hostname would do?
2199 */
2200 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
2201 gethostname(hostname, sizeof(hostname));
2202 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
2203 hostlen = strlen(hostname);
2204 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2205 for (i = strlen(T10) ; i+hostlen < 24; i++)
2206 ddf->controller.guid[i] = ' ';
2207
2208 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2209 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2210 ddf->controller.type.sub_vendor_id = 0;
2211 ddf->controller.type.sub_device_id = 0;
2212 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2213 memset(ddf->controller.pad, 0xff, 8);
2214 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
2215 if (homehost && strlen(homehost) < 440)
2216 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 2217
3d2c4fc7 2218 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 2219 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
2220 return 0;
2221 }
6416d527 2222 ddf->phys = pd;
a322f70c
DW
2223 ddf->pdsize = pdsize;
2224
2225 memset(pd, 0xff, pdsize);
2226 memset(pd, 0, sizeof(*pd));
076515ba 2227 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
2228 pd->used_pdes = __cpu_to_be16(0);
2229 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2230 memset(pd->pad, 0xff, 52);
2231
3d2c4fc7 2232 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 2233 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
2234 return 0;
2235 }
6416d527 2236 ddf->virt = vd;
a322f70c
DW
2237 ddf->vdsize = vdsize;
2238 memset(vd, 0, vdsize);
2239 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2240 vd->populated_vdes = __cpu_to_be16(0);
2241 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2242 memset(vd->pad, 0xff, 52);
2243
5f8097be
NB
2244 for (i=0; i<max_virt_disks; i++)
2245 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2246
a322f70c 2247 st->sb = ddf;
7d5a7ff3 2248 ddf_set_updates_pending(ddf);
a322f70c
DW
2249 return 1;
2250}
2251
5f8097be
NB
2252static int chunk_to_shift(int chunksize)
2253{
2254 return ffs(chunksize/512)-1;
2255}
2256
0e600426 2257#ifndef MDASSEMBLE
59e36268
NB
2258struct extent {
2259 unsigned long long start, size;
2260};
78e44928 2261static int cmp_extent(const void *av, const void *bv)
59e36268
NB
2262{
2263 const struct extent *a = av;
2264 const struct extent *b = bv;
2265 if (a->start < b->start)
2266 return -1;
2267 if (a->start > b->start)
2268 return 1;
2269 return 0;
2270}
2271
78e44928 2272static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2273{
2274 /* find a list of used extents on the give physical device
2275 * (dnum) of the given ddf.
2276 * Return a malloced array of 'struct extent'
2277
613b0d17 2278 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2279
2280 */
2281 struct extent *rv;
2282 int n = 0;
f21e18ca 2283 unsigned int i, j;
59e36268 2284
503975b9 2285 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2286
2287 for (i = 0; i < ddf->max_part; i++) {
2288 struct vcl *v = dl->vlist[i];
2289 if (v == NULL)
2290 continue;
f21e18ca 2291 for (j = 0; j < v->conf.prim_elmnt_count; j++)
59e36268
NB
2292 if (v->conf.phys_refnum[j] == dl->disk.refnum) {
2293 /* This device plays role 'j' in 'v'. */
2294 rv[n].start = __be64_to_cpu(v->lba_offset[j]);
2295 rv[n].size = __be64_to_cpu(v->conf.blocks);
2296 n++;
2297 break;
2298 }
2299 }
2300 qsort(rv, n, sizeof(*rv), cmp_extent);
2301
2302 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2303 rv[n].size = 0;
2304 return rv;
2305}
0e600426 2306#endif
59e36268 2307
5f8097be
NB
2308static int init_super_ddf_bvd(struct supertype *st,
2309 mdu_array_info_t *info,
2310 unsigned long long size,
2311 char *name, char *homehost,
83cd1e97 2312 int *uuid, unsigned long long data_offset)
5f8097be
NB
2313{
2314 /* We are creating a BVD inside a pre-existing container.
2315 * so st->sb is already set.
2316 * We need to create a new vd_config and a new virtual_entry
2317 */
2318 struct ddf_super *ddf = st->sb;
f21e18ca 2319 unsigned int venum;
5f8097be
NB
2320 struct virtual_entry *ve;
2321 struct vcl *vcl;
2322 struct vd_config *vc;
5f8097be 2323
fb9d0acb 2324 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2325 pr_err("This ddf already has an array called %s\n", name);
5f8097be
NB
2326 return 0;
2327 }
fb9d0acb 2328 venum = find_unused_vde(ddf);
2329 if (venum == DDF_NOTFOUND) {
2330 pr_err("Cannot find spare slot for virtual disk\n");
5f8097be
NB
2331 return 0;
2332 }
2333 ve = &ddf->virt->entries[venum];
2334
2335 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2336 * timestamp, random number
2337 */
2338 make_header_guid(ve->guid);
2339 ve->unit = __cpu_to_be16(info->md_minor);
2340 ve->pad0 = 0xFFFF;
2341 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2342 ve->type = 0;
7a7cc504
NB
2343 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2344 if (info->state & 1) /* clean */
2345 ve->init_state = DDF_init_full;
2346 else
2347 ve->init_state = DDF_init_not;
2348
5f8097be
NB
2349 memset(ve->pad1, 0xff, 14);
2350 memset(ve->name, ' ', 16);
2351 if (name)
2352 strncpy(ve->name, name, 16);
2353 ddf->virt->populated_vdes =
2354 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2355
2356 /* Now create a new vd_config */
3d2c4fc7
DW
2357 if (posix_memalign((void**)&vcl, 512,
2358 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2359 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2360 return 0;
2361 }
8c3b8c2c 2362 vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
59e36268
NB
2363 vcl->vcnum = venum;
2364 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 2365 vcl->other_bvds = NULL;
5f8097be
NB
2366
2367 vc = &vcl->conf;
2368
2369 vc->magic = DDF_VD_CONF_MAGIC;
2370 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2371 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2372 vc->seqnum = __cpu_to_be32(1);
2373 memset(vc->pad0, 0xff, 24);
5f8097be 2374 vc->chunk_shift = chunk_to_shift(info->chunk_size);
a3163bf0 2375 if (layout_md2ddf(info, vc) == -1 ||
2376 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2377 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2378 __func__, info->level, info->layout, info->raid_disks);
2379 free(vcl);
2380 return 0;
2381 }
5f8097be 2382 vc->sec_elmnt_seq = 0;
5f8097be
NB
2383 vc->blocks = __cpu_to_be64(info->size * 2);
2384 vc->array_blocks = __cpu_to_be64(
2385 calc_array_size(info->level, info->raid_disks, info->layout,
2386 info->chunk_size, info->size*2));
2387 memset(vc->pad1, 0xff, 8);
2388 vc->spare_refs[0] = 0xffffffff;
2389 vc->spare_refs[1] = 0xffffffff;
2390 vc->spare_refs[2] = 0xffffffff;
2391 vc->spare_refs[3] = 0xffffffff;
2392 vc->spare_refs[4] = 0xffffffff;
2393 vc->spare_refs[5] = 0xffffffff;
2394 vc->spare_refs[6] = 0xffffffff;
2395 vc->spare_refs[7] = 0xffffffff;
2396 memset(vc->cache_pol, 0, 8);
2397 vc->bg_rate = 0x80;
2398 memset(vc->pad2, 0xff, 3);
2399 memset(vc->pad3, 0xff, 52);
2400 memset(vc->pad4, 0xff, 192);
2401 memset(vc->v0, 0xff, 32);
2402 memset(vc->v1, 0xff, 32);
2403 memset(vc->v2, 0xff, 16);
2404 memset(vc->v3, 0xff, 16);
2405 memset(vc->vendor, 0xff, 32);
598f0d58 2406
8c3b8c2c 2407 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2408 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be
NB
2409
2410 vcl->next = ddf->conflist;
2411 ddf->conflist = vcl;
d2ca6449 2412 ddf->currentconf = vcl;
7d5a7ff3 2413 ddf_set_updates_pending(ddf);
5f8097be
NB
2414 return 1;
2415}
2416
0e600426 2417#ifndef MDASSEMBLE
5f8097be
NB
2418static void add_to_super_ddf_bvd(struct supertype *st,
2419 mdu_disk_info_t *dk, int fd, char *devname)
2420{
2421 /* fd and devname identify a device with-in the ddf container (st).
2422 * dk identifies a location in the new BVD.
2423 * We need to find suitable free space in that device and update
2424 * the phys_refnum and lba_offset for the newly created vd_config.
2425 * We might also want to update the type in the phys_disk
5575e7d9 2426 * section.
8592f29d
N
2427 *
2428 * Alternately: fd == -1 and we have already chosen which device to
2429 * use and recorded in dlist->raid_disk;
5f8097be
NB
2430 */
2431 struct dl *dl;
2432 struct ddf_super *ddf = st->sb;
2433 struct vd_config *vc;
2434 __u64 *lba_offset;
f21e18ca
N
2435 unsigned int working;
2436 unsigned int i;
59e36268
NB
2437 unsigned long long blocks, pos, esize;
2438 struct extent *ex;
5f8097be 2439
8592f29d
N
2440 if (fd == -1) {
2441 for (dl = ddf->dlist; dl ; dl = dl->next)
2442 if (dl->raiddisk == dk->raid_disk)
2443 break;
2444 } else {
2445 for (dl = ddf->dlist; dl ; dl = dl->next)
2446 if (dl->major == dk->major &&
2447 dl->minor == dk->minor)
2448 break;
2449 }
5f8097be
NB
2450 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2451 return;
2452
d2ca6449
NB
2453 vc = &ddf->currentconf->conf;
2454 lba_offset = ddf->currentconf->lba_offset;
59e36268
NB
2455
2456 ex = get_extents(ddf, dl);
2457 if (!ex)
2458 return;
2459
2460 i = 0; pos = 0;
2461 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2462 if (ddf->currentconf->block_sizes)
2463 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2464
2465 do {
2466 esize = ex[i].start - pos;
2467 if (esize >= blocks)
2468 break;
2469 pos = ex[i].start + ex[i].size;
2470 i++;
2471 } while (ex[i-1].size);
2472
2473 free(ex);
2474 if (esize < blocks)
2475 return;
2476
d2ca6449 2477 ddf->currentdev = dk->raid_disk;
5f8097be 2478 vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
59e36268 2479 lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
5f8097be 2480
f21e18ca 2481 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2482 if (dl->vlist[i] == NULL)
2483 break;
2484 if (i == ddf->max_part)
2485 return;
d2ca6449 2486 dl->vlist[i] = ddf->currentconf;
5f8097be 2487
8592f29d
N
2488 if (fd >= 0)
2489 dl->fd = fd;
2490 if (devname)
2491 dl->devname = devname;
7a7cc504
NB
2492
2493 /* Check how many working raid_disks, and if we can mark
2494 * array as optimal yet
2495 */
2496 working = 0;
5575e7d9 2497
f21e18ca 2498 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
7a7cc504
NB
2499 if (vc->phys_refnum[i] != 0xffffffff)
2500 working++;
59e36268 2501
5575e7d9 2502 /* Find which virtual_entry */
d2ca6449 2503 i = ddf->currentconf->vcnum;
7a7cc504 2504 if (working == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2505 ddf->virt->entries[i].state =
2506 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504
NB
2507 | DDF_state_optimal;
2508
2509 if (vc->prl == DDF_RAID6 &&
2510 working+1 == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2511 ddf->virt->entries[i].state =
2512 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504 2513 | DDF_state_part_optimal;
5575e7d9
NB
2514
2515 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2516 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
7d5a7ff3 2517 ddf_set_updates_pending(ddf);
5f8097be
NB
2518}
2519
a322f70c
DW
2520/* add a device to a container, either while creating it or while
2521 * expanding a pre-existing container
2522 */
f20c3968 2523static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2524 mdu_disk_info_t *dk, int fd, char *devname,
2525 unsigned long long data_offset)
a322f70c
DW
2526{
2527 struct ddf_super *ddf = st->sb;
2528 struct dl *dd;
2529 time_t now;
2530 struct tm *tm;
2531 unsigned long long size;
2532 struct phys_disk_entry *pde;
f21e18ca 2533 unsigned int n, i;
a322f70c 2534 struct stat stb;
90fa1a29 2535 __u32 *tptr;
a322f70c 2536
78e44928
NB
2537 if (ddf->currentconf) {
2538 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2539 return 0;
78e44928
NB
2540 }
2541
a322f70c
DW
2542 /* This is device numbered dk->number. We need to create
2543 * a phys_disk entry and a more detailed disk_data entry.
2544 */
2545 fstat(fd, &stb);
3d2c4fc7
DW
2546 if (posix_memalign((void**)&dd, 512,
2547 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2548 pr_err("%s could allocate buffer for new disk, aborting\n",
2549 __func__);
f20c3968 2550 return 1;
3d2c4fc7 2551 }
a322f70c
DW
2552 dd->major = major(stb.st_rdev);
2553 dd->minor = minor(stb.st_rdev);
2554 dd->devname = devname;
a322f70c 2555 dd->fd = fd;
b2280677 2556 dd->spare = NULL;
a322f70c
DW
2557
2558 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2559 now = time(0);
2560 tm = localtime(&now);
2561 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2562 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2563 tptr = (__u32 *)(dd->disk.guid + 16);
2564 *tptr++ = random32();
2565 *tptr = random32();
a322f70c 2566
59e36268
NB
2567 do {
2568 /* Cannot be bothered finding a CRC of some irrelevant details*/
bfb7ea78 2569 dd->disk.refnum = random32();
f21e18ca
N
2570 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2571 i > 0; i--)
2572 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
59e36268 2573 break;
f21e18ca 2574 } while (i > 0);
59e36268 2575
a322f70c
DW
2576 dd->disk.forced_ref = 1;
2577 dd->disk.forced_guid = 1;
2578 memset(dd->disk.vendor, ' ', 32);
2579 memcpy(dd->disk.vendor, "Linux", 5);
2580 memset(dd->disk.pad, 0xff, 442);
b2280677 2581 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2582 dd->vlist[i] = NULL;
2583
2584 n = __be16_to_cpu(ddf->phys->used_pdes);
2585 pde = &ddf->phys->entries[n];
5575e7d9
NB
2586 dd->pdnum = n;
2587
2cc2983d
N
2588 if (st->update_tail) {
2589 int len = (sizeof(struct phys_disk) +
2590 sizeof(struct phys_disk_entry));
2591 struct phys_disk *pd;
2592
503975b9 2593 pd = xmalloc(len);
2cc2983d
N
2594 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2595 pd->used_pdes = __cpu_to_be16(n);
2596 pde = &pd->entries[0];
2597 dd->mdupdate = pd;
2598 } else {
2599 n++;
2600 ddf->phys->used_pdes = __cpu_to_be16(n);
2601 }
a322f70c
DW
2602
2603 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2604 pde->refnum = dd->disk.refnum;
5575e7d9 2605 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c
DW
2606 pde->state = __cpu_to_be16(DDF_Online);
2607 get_dev_size(fd, NULL, &size);
2608 /* We are required to reserve 32Meg, and record the size in sectors */
2609 pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
2610 sprintf(pde->path, "%17.17s","Information: nil") ;
2611 memset(pde->pad, 0xff, 6);
2612
d2ca6449 2613 dd->size = size >> 9;
2cc2983d
N
2614 if (st->update_tail) {
2615 dd->next = ddf->add_list;
2616 ddf->add_list = dd;
2617 } else {
2618 dd->next = ddf->dlist;
2619 ddf->dlist = dd;
7d5a7ff3 2620 ddf_set_updates_pending(ddf);
2cc2983d 2621 }
f20c3968
DW
2622
2623 return 0;
a322f70c
DW
2624}
2625
4dd968cc
N
2626static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2627{
2628 struct ddf_super *ddf = st->sb;
2629 struct dl *dl;
2630
2631 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2632 * disappeared from the container.
2633 * We need to arrange that it disappears from the metadata and
2634 * internal data structures too.
2635 * Most of the work is done by ddf_process_update which edits
2636 * the metadata and closes the file handle and attaches the memory
2637 * where free_updates will free it.
2638 */
2639 for (dl = ddf->dlist; dl ; dl = dl->next)
2640 if (dl->major == dk->major &&
2641 dl->minor == dk->minor)
2642 break;
2643 if (!dl)
2644 return -1;
2645
2646 if (st->update_tail) {
2647 int len = (sizeof(struct phys_disk) +
2648 sizeof(struct phys_disk_entry));
2649 struct phys_disk *pd;
2650
503975b9 2651 pd = xmalloc(len);
4dd968cc
N
2652 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2653 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2654 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2655 append_metadata_update(st, pd, len);
2656 }
2657 return 0;
2658}
2659
a322f70c
DW
2660/*
2661 * This is the write_init_super method for a ddf container. It is
2662 * called when creating a container or adding another device to a
2663 * container.
2664 */
42d5dfd9 2665#define NULL_CONF_SZ 4096
18a2f463 2666
e3c2a365 2667static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
2668 __u32 refnum, unsigned int nmax,
2669 const struct vd_config **bvd,
2670 unsigned int *idx);
2671
7f798aca 2672static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2673 char *null_aligned)
a322f70c 2674{
7f798aca 2675 unsigned long long sector;
2676 struct ddf_header *header;
2677 int fd, i, n_config, conf_size;
a4057a88 2678 int ret = 0;
7f798aca 2679
2680 fd = d->fd;
2681
2682 switch (type) {
2683 case DDF_HEADER_PRIMARY:
2684 header = &ddf->primary;
2685 sector = __be64_to_cpu(header->primary_lba);
2686 break;
2687 case DDF_HEADER_SECONDARY:
2688 header = &ddf->secondary;
2689 sector = __be64_to_cpu(header->secondary_lba);
2690 break;
2691 default:
2692 return 0;
2693 }
2694
2695 header->type = type;
a4057a88 2696 header->openflag = 1;
7f798aca 2697 header->crc = calc_crc(header, 512);
2698
2699 lseek64(fd, sector<<9, 0);
2700 if (write(fd, header, 512) < 0)
a4057a88 2701 goto out;
7f798aca 2702
2703 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2704 if (write(fd, &ddf->controller, 512) < 0)
a4057a88 2705 goto out;
a322f70c 2706
7f798aca 2707 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2708 if (write(fd, ddf->phys, ddf->pdsize) < 0)
a4057a88 2709 goto out;
7f798aca 2710 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2711 if (write(fd, ddf->virt, ddf->vdsize) < 0)
a4057a88 2712 goto out;
7f798aca 2713
2714 /* Now write lots of config records. */
2715 n_config = ddf->max_part;
2716 conf_size = ddf->conf_rec_len * 512;
2717 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2718 struct vcl *c;
2719 struct vd_config *vdc = NULL;
2720 if (i == n_config) {
7f798aca 2721 c = (struct vcl *)d->spare;
e3c2a365 2722 if (c)
2723 vdc = &c->conf;
2724 } else {
2725 unsigned int dummy;
2726 c = d->vlist[i];
2727 if (c)
2728 get_pd_index_from_refnum(
2729 c, d->disk.refnum,
2730 ddf->mppe,
2731 (const struct vd_config **)&vdc,
2732 &dummy);
2733 }
7f798aca 2734 if (c) {
dacf3dc5 2735 vdc->seqnum = header->seq;
e3c2a365 2736 vdc->crc = calc_crc(vdc, conf_size);
2737 if (write(fd, vdc, conf_size) < 0)
7f798aca 2738 break;
2739 } else {
2740 unsigned int togo = conf_size;
2741 while (togo > NULL_CONF_SZ) {
2742 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2743 break;
2744 togo -= NULL_CONF_SZ;
2745 }
2746 if (write(fd, null_aligned, togo) < 0)
2747 break;
2748 }
2749 }
2750 if (i <= n_config)
a4057a88 2751 goto out;
7f798aca 2752
2753 d->disk.crc = calc_crc(&d->disk, 512);
2754 if (write(fd, &d->disk, 512) < 0)
a4057a88 2755 goto out;
7f798aca 2756
a4057a88 2757 ret = 1;
2758out:
2759 header->openflag = 0;
2760 header->crc = calc_crc(header, 512);
2761
2762 lseek64(fd, sector<<9, 0);
2763 if (write(fd, header, 512) < 0)
2764 ret = 0;
2765
2766 return ret;
7f798aca 2767}
2768
2769static int __write_init_super_ddf(struct supertype *st)
2770{
a322f70c 2771 struct ddf_super *ddf = st->sb;
a322f70c 2772 struct dl *d;
175593bf
DW
2773 int attempts = 0;
2774 int successes = 0;
7f798aca 2775 unsigned long long size;
42d5dfd9 2776 char *null_aligned;
0175cbf6 2777 __u32 seq;
42d5dfd9 2778
7d5a7ff3 2779 pr_state(ddf, __func__);
42d5dfd9
JS
2780 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2781 return -ENOMEM;
2782 }
2783 memset(null_aligned, 0xff, NULL_CONF_SZ);
a322f70c 2784
dc9e279c 2785 seq = ddf->active->seq + 1;
0175cbf6 2786
175593bf
DW
2787 /* try to write updated metadata,
2788 * if we catch a failure move on to the next disk
2789 */
a322f70c
DW
2790 for (d = ddf->dlist; d; d=d->next) {
2791 int fd = d->fd;
2792
2793 if (fd < 0)
2794 continue;
2795
175593bf 2796 attempts++;
a322f70c
DW
2797 /* We need to fill in the primary, (secondary) and workspace
2798 * lba's in the headers, set their checksums,
2799 * Also checksum phys, virt....
2800 *
2801 * Then write everything out, finally the anchor is written.
2802 */
2803 get_dev_size(fd, NULL, &size);
2804 size /= 512;
097bcf00 2805 if (d->workspace_lba != 0)
2806 ddf->anchor.workspace_lba = d->workspace_lba;
2807 else
2808 ddf->anchor.workspace_lba =
2809 __cpu_to_be64(size - 32*1024*2);
2810 if (d->primary_lba != 0)
2811 ddf->anchor.primary_lba = d->primary_lba;
2812 else
2813 ddf->anchor.primary_lba =
2814 __cpu_to_be64(size - 16*1024*2);
2815 if (d->secondary_lba != 0)
2816 ddf->anchor.secondary_lba = d->secondary_lba;
2817 else
2818 ddf->anchor.secondary_lba =
2819 __cpu_to_be64(size - 32*1024*2);
0175cbf6 2820 ddf->anchor.seq = seq;
a322f70c
DW
2821 memcpy(&ddf->primary, &ddf->anchor, 512);
2822 memcpy(&ddf->secondary, &ddf->anchor, 512);
2823
2824 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2825 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2826 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2827
7f798aca 2828 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2829 null_aligned))
175593bf 2830 continue;
a322f70c 2831
7f798aca 2832 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2833 null_aligned))
175593bf 2834 continue;
a322f70c 2835
a322f70c 2836 lseek64(fd, (size-1)*512, SEEK_SET);
175593bf
DW
2837 if (write(fd, &ddf->anchor, 512) < 0)
2838 continue;
2839 successes++;
2840 }
42d5dfd9 2841 free(null_aligned);
175593bf 2842
175593bf 2843 return attempts != successes;
a322f70c 2844}
7a7cc504
NB
2845
2846static int write_init_super_ddf(struct supertype *st)
2847{
9b1fb677
DW
2848 struct ddf_super *ddf = st->sb;
2849 struct vcl *currentconf = ddf->currentconf;
2850
2851 /* we are done with currentconf reset it to point st at the container */
2852 ddf->currentconf = NULL;
edd8d13c
NB
2853
2854 if (st->update_tail) {
2855 /* queue the virtual_disk and vd_config as metadata updates */
2856 struct virtual_disk *vd;
2857 struct vd_config *vc;
edd8d13c
NB
2858 int len;
2859
9b1fb677 2860 if (!currentconf) {
2cc2983d
N
2861 int len = (sizeof(struct phys_disk) +
2862 sizeof(struct phys_disk_entry));
2863
2864 /* adding a disk to the container. */
2865 if (!ddf->add_list)
2866 return 0;
2867
2868 append_metadata_update(st, ddf->add_list->mdupdate, len);
2869 ddf->add_list->mdupdate = NULL;
2870 return 0;
2871 }
2872
2873 /* Newly created VD */
2874
edd8d13c
NB
2875 /* First the virtual disk. We have a slightly fake header */
2876 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 2877 vd = xmalloc(len);
edd8d13c 2878 *vd = *ddf->virt;
9b1fb677
DW
2879 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2880 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
2881 append_metadata_update(st, vd, len);
2882
2883 /* Then the vd_config */
2884 len = ddf->conf_rec_len * 512;
503975b9 2885 vc = xmalloc(len);
9b1fb677 2886 memcpy(vc, &currentconf->conf, len);
edd8d13c
NB
2887 append_metadata_update(st, vc, len);
2888
2889 /* FIXME I need to close the fds! */
2890 return 0;
613b0d17 2891 } else {
d682f344
N
2892 struct dl *d;
2893 for (d = ddf->dlist; d; d=d->next)
ba728be7 2894 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 2895 return __write_init_super_ddf(st);
d682f344 2896 }
7a7cc504
NB
2897}
2898
a322f70c
DW
2899#endif
2900
387fcd59
N
2901static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2902 unsigned long long data_offset)
a322f70c
DW
2903{
2904 /* We must reserve the last 32Meg */
2905 if (devsize <= 32*1024*2)
2906 return 0;
2907 return devsize - 32*1024*2;
2908}
2909
2910#ifndef MDASSEMBLE
8592f29d
N
2911
2912static int reserve_space(struct supertype *st, int raiddisks,
2913 unsigned long long size, int chunk,
2914 unsigned long long *freesize)
2915{
2916 /* Find 'raiddisks' spare extents at least 'size' big (but
2917 * only caring about multiples of 'chunk') and remember
2918 * them.
2919 * If the cannot be found, fail.
2920 */
2921 struct dl *dl;
2922 struct ddf_super *ddf = st->sb;
2923 int cnt = 0;
2924
2925 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 2926 dl->raiddisk = -1;
8592f29d
N
2927 dl->esize = 0;
2928 }
2929 /* Now find largest extent on each device */
2930 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2931 struct extent *e = get_extents(ddf, dl);
2932 unsigned long long pos = 0;
2933 int i = 0;
2934 int found = 0;
2935 unsigned long long minsize = size;
2936
2937 if (size == 0)
2938 minsize = chunk;
2939
2940 if (!e)
2941 continue;
2942 do {
2943 unsigned long long esize;
2944 esize = e[i].start - pos;
2945 if (esize >= minsize) {
2946 found = 1;
2947 minsize = esize;
2948 }
2949 pos = e[i].start + e[i].size;
2950 i++;
2951 } while (e[i-1].size);
2952 if (found) {
2953 cnt++;
2954 dl->esize = minsize;
2955 }
2956 free(e);
2957 }
2958 if (cnt < raiddisks) {
e7b84f9d 2959 pr_err("not enough devices with space to create array.\n");
8592f29d
N
2960 return 0; /* No enough free spaces large enough */
2961 }
2962 if (size == 0) {
2963 /* choose the largest size of which there are at least 'raiddisk' */
2964 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2965 struct dl *dl2;
2966 if (dl->esize <= size)
2967 continue;
2968 /* This is bigger than 'size', see if there are enough */
2969 cnt = 0;
7b80ad6a 2970 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
2971 if (dl2->esize >= dl->esize)
2972 cnt++;
2973 if (cnt >= raiddisks)
2974 size = dl->esize;
2975 }
2976 if (chunk) {
2977 size = size / chunk;
2978 size *= chunk;
2979 }
2980 *freesize = size;
2981 if (size < 32) {
e7b84f9d 2982 pr_err("not enough spare devices to create array.\n");
8592f29d
N
2983 return 0;
2984 }
2985 }
2986 /* We have a 'size' of which there are enough spaces.
2987 * We simply do a first-fit */
2988 cnt = 0;
2989 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
2990 if (dl->esize < size)
2991 continue;
613b0d17 2992
8592f29d
N
2993 dl->raiddisk = cnt;
2994 cnt++;
2995 }
2996 return 1;
2997}
2998
2c514b71
NB
2999static int
3000validate_geometry_ddf_container(struct supertype *st,
3001 int level, int layout, int raiddisks,
3002 int chunk, unsigned long long size,
af4348dd 3003 unsigned long long data_offset,
2c514b71
NB
3004 char *dev, unsigned long long *freesize,
3005 int verbose);
78e44928
NB
3006
3007static int validate_geometry_ddf_bvd(struct supertype *st,
3008 int level, int layout, int raiddisks,
c21e737b 3009 int *chunk, unsigned long long size,
af4348dd 3010 unsigned long long data_offset,
2c514b71
NB
3011 char *dev, unsigned long long *freesize,
3012 int verbose);
78e44928
NB
3013
3014static int validate_geometry_ddf(struct supertype *st,
2c514b71 3015 int level, int layout, int raiddisks,
c21e737b 3016 int *chunk, unsigned long long size,
af4348dd 3017 unsigned long long data_offset,
2c514b71
NB
3018 char *dev, unsigned long long *freesize,
3019 int verbose)
a322f70c
DW
3020{
3021 int fd;
3022 struct mdinfo *sra;
3023 int cfd;
3024
3025 /* ddf potentially supports lots of things, but it depends on
3026 * what devices are offered (and maybe kernel version?)
3027 * If given unused devices, we will make a container.
3028 * If given devices in a container, we will make a BVD.
3029 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3030 */
3031
bb7295f1
N
3032 if (chunk && *chunk == UnSet)
3033 *chunk = DEFAULT_CHUNK;
3034
542ef4ec 3035 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 3036 if (level == LEVEL_CONTAINER) {
78e44928
NB
3037 /* Must be a fresh device to add to a container */
3038 return validate_geometry_ddf_container(st, level, layout,
c21e737b 3039 raiddisks, chunk?*chunk:0,
af4348dd
N
3040 size, data_offset, dev,
3041 freesize,
2c514b71 3042 verbose);
5f8097be
NB
3043 }
3044
78e44928 3045 if (!dev) {
a3163bf0 3046 mdu_array_info_t array = {
3047 .level = level, .layout = layout,
3048 .raid_disks = raiddisks
3049 };
3050 struct vd_config conf;
3051 if (layout_md2ddf(&array, &conf) == -1) {
b42f577a 3052 if (verbose)
e7b84f9d 3053 pr_err("DDF does not support level %d arrays\n",
613b0d17 3054 level);
78e44928 3055 return 0;
b42f577a 3056 }
78e44928 3057 /* Should check layout? etc */
8592f29d
N
3058
3059 if (st->sb && freesize) {
3060 /* --create was given a container to create in.
3061 * So we need to check that there are enough
3062 * free spaces and return the amount of space.
3063 * We may as well remember which drives were
3064 * chosen so that add_to_super/getinfo_super
3065 * can return them.
3066 */
c21e737b 3067 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 3068 }
a322f70c 3069 return 1;
78e44928 3070 }
a322f70c 3071
8592f29d
N
3072 if (st->sb) {
3073 /* A container has already been opened, so we are
3074 * creating in there. Maybe a BVD, maybe an SVD.
3075 * Should make a distinction one day.
3076 */
3077 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
3078 chunk, size, data_offset, dev,
3079 freesize,
8592f29d
N
3080 verbose);
3081 }
78e44928
NB
3082 /* This is the first device for the array.
3083 * If it is a container, we read it in and do automagic allocations,
3084 * no other devices should be given.
3085 * Otherwise it must be a member device of a container, and we
3086 * do manual allocation.
3087 * Later we should check for a BVD and make an SVD.
a322f70c 3088 */
a322f70c
DW
3089 fd = open(dev, O_RDONLY|O_EXCL, 0);
3090 if (fd >= 0) {
4dd2df09 3091 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
3092 close(fd);
3093 if (sra && sra->array.major_version == -1 &&
78e44928
NB
3094 strcmp(sra->text_version, "ddf") == 0) {
3095
3096 /* load super */
3097 /* find space for 'n' devices. */
3098 /* remember the devices */
3099 /* Somehow return the fact that we have enough */
a322f70c
DW
3100 }
3101
2c514b71 3102 if (verbose)
e7b84f9d
N
3103 pr_err("ddf: Cannot create this array "
3104 "on device %s - a container is required.\n",
3105 dev);
a322f70c
DW
3106 return 0;
3107 }
3108 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 3109 if (verbose)
e7b84f9d 3110 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3111 dev, strerror(errno));
a322f70c
DW
3112 return 0;
3113 }
3114 /* Well, it is in use by someone, maybe a 'ddf' container. */
3115 cfd = open_container(fd);
3116 if (cfd < 0) {
3117 close(fd);
2c514b71 3118 if (verbose)
e7b84f9d 3119 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 3120 dev, strerror(EBUSY));
a322f70c
DW
3121 return 0;
3122 }
4dd2df09 3123 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
3124 close(fd);
3125 if (sra && sra->array.major_version == -1 &&
3126 strcmp(sra->text_version, "ddf") == 0) {
3127 /* This is a member of a ddf container. Load the container
3128 * and try to create a bvd
3129 */
3130 struct ddf_super *ddf;
e1902a7b 3131 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 3132 st->sb = ddf;
4dd2df09 3133 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 3134 close(cfd);
78e44928 3135 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 3136 raiddisks, chunk, size,
af4348dd 3137 data_offset,
2c514b71
NB
3138 dev, freesize,
3139 verbose);
a322f70c
DW
3140 }
3141 close(cfd);
c42ec1ed
DW
3142 } else /* device may belong to a different container */
3143 return 0;
3144
a322f70c
DW
3145 return 1;
3146}
3147
2c514b71
NB
3148static int
3149validate_geometry_ddf_container(struct supertype *st,
3150 int level, int layout, int raiddisks,
3151 int chunk, unsigned long long size,
af4348dd 3152 unsigned long long data_offset,
2c514b71
NB
3153 char *dev, unsigned long long *freesize,
3154 int verbose)
a322f70c
DW
3155{
3156 int fd;
3157 unsigned long long ldsize;
3158
3159 if (level != LEVEL_CONTAINER)
3160 return 0;
3161 if (!dev)
3162 return 1;
3163
3164 fd = open(dev, O_RDONLY|O_EXCL, 0);
3165 if (fd < 0) {
2c514b71 3166 if (verbose)
e7b84f9d 3167 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3168 dev, strerror(errno));
a322f70c
DW
3169 return 0;
3170 }
3171 if (!get_dev_size(fd, dev, &ldsize)) {
3172 close(fd);
3173 return 0;
3174 }
3175 close(fd);
3176
387fcd59 3177 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
3178 if (*freesize == 0)
3179 return 0;
a322f70c
DW
3180
3181 return 1;
3182}
3183
78e44928
NB
3184static int validate_geometry_ddf_bvd(struct supertype *st,
3185 int level, int layout, int raiddisks,
c21e737b 3186 int *chunk, unsigned long long size,
af4348dd 3187 unsigned long long data_offset,
2c514b71
NB
3188 char *dev, unsigned long long *freesize,
3189 int verbose)
a322f70c
DW
3190{
3191 struct stat stb;
3192 struct ddf_super *ddf = st->sb;
3193 struct dl *dl;
5f8097be
NB
3194 unsigned long long pos = 0;
3195 unsigned long long maxsize;
3196 struct extent *e;
3197 int i;
a322f70c 3198 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
3199 if (level == LEVEL_CONTAINER) {
3200 if (verbose)
e7b84f9d 3201 pr_err("DDF cannot create a container within an container\n");
a322f70c 3202 return 0;
b42f577a 3203 }
a322f70c
DW
3204 /* We must have the container info already read in. */
3205 if (!ddf)
3206 return 0;
3207
5f8097be
NB
3208 if (!dev) {
3209 /* General test: make sure there is space for
3210 * 'raiddisks' device extents of size 'size'.
3211 */
3212 unsigned long long minsize = size;
3213 int dcnt = 0;
3214 if (minsize == 0)
3215 minsize = 8;
3216 for (dl = ddf->dlist; dl ; dl = dl->next)
3217 {
3218 int found = 0;
7e1432fb 3219 pos = 0;
5f8097be
NB
3220
3221 i = 0;
3222 e = get_extents(ddf, dl);
3223 if (!e) continue;
3224 do {
3225 unsigned long long esize;
3226 esize = e[i].start - pos;
3227 if (esize >= minsize)
3228 found = 1;
3229 pos = e[i].start + e[i].size;
3230 i++;
3231 } while (e[i-1].size);
3232 if (found)
3233 dcnt++;
3234 free(e);
3235 }
3236 if (dcnt < raiddisks) {
2c514b71 3237 if (verbose)
e7b84f9d
N
3238 pr_err("ddf: Not enough devices with "
3239 "space for this array (%d < %d)\n",
3240 dcnt, raiddisks);
5f8097be
NB
3241 return 0;
3242 }
3243 return 1;
3244 }
a322f70c
DW
3245 /* This device must be a member of the set */
3246 if (stat(dev, &stb) < 0)
3247 return 0;
3248 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3249 return 0;
3250 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
3251 if (dl->major == (int)major(stb.st_rdev) &&
3252 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
3253 break;
3254 }
5f8097be 3255 if (!dl) {
2c514b71 3256 if (verbose)
e7b84f9d 3257 pr_err("ddf: %s is not in the "
613b0d17
N
3258 "same DDF set\n",
3259 dev);
5f8097be
NB
3260 return 0;
3261 }
3262 e = get_extents(ddf, dl);
3263 maxsize = 0;
3264 i = 0;
3265 if (e) do {
613b0d17
N
3266 unsigned long long esize;
3267 esize = e[i].start - pos;
3268 if (esize >= maxsize)
3269 maxsize = esize;
3270 pos = e[i].start + e[i].size;
3271 i++;
3272 } while (e[i-1].size);
5f8097be 3273 *freesize = maxsize;
a322f70c
DW
3274 // FIXME here I am
3275
3276 return 1;
3277}
59e36268 3278
a322f70c 3279static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3280 void **sbp, char *devname)
a322f70c
DW
3281{
3282 struct mdinfo *sra;
3283 struct ddf_super *super;
3284 struct mdinfo *sd, *best = NULL;
3285 int bestseq = 0;
3286 int seq;
3287 char nm[20];
3288 int dfd;
3289
b526e52d 3290 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3291 if (!sra)
3292 return 1;
3293 if (sra->array.major_version != -1 ||
3294 sra->array.minor_version != -2 ||
3295 strcmp(sra->text_version, "ddf") != 0)
3296 return 1;
3297
6416d527 3298 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3299 return 1;
a2349791 3300 memset(super, 0, sizeof(*super));
a322f70c
DW
3301
3302 /* first, try each device, and choose the best ddf */
3303 for (sd = sra->devs ; sd ; sd = sd->next) {
3304 int rv;
3305 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3306 dfd = dev_open(nm, O_RDONLY);
3307 if (dfd < 0)
a322f70c
DW
3308 return 2;
3309 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3310 close(dfd);
a322f70c
DW
3311 if (rv == 0) {
3312 seq = __be32_to_cpu(super->active->seq);
3313 if (super->active->openflag)
3314 seq--;
3315 if (!best || seq > bestseq) {
3316 bestseq = seq;
3317 best = sd;
3318 }
3319 }
3320 }
3321 if (!best)
3322 return 1;
3323 /* OK, load this ddf */
3324 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3325 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3326 if (dfd < 0)
a322f70c
DW
3327 return 1;
3328 load_ddf_headers(dfd, super, NULL);
3329 load_ddf_global(dfd, super, NULL);
3330 close(dfd);
3331 /* Now we need the device-local bits */
3332 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3333 int rv;
3334
a322f70c 3335 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3336 dfd = dev_open(nm, O_RDWR);
7a7cc504 3337 if (dfd < 0)
a322f70c 3338 return 2;
3d2c4fc7
DW
3339 rv = load_ddf_headers(dfd, super, NULL);
3340 if (rv == 0)
e1902a7b 3341 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3342 if (rv)
3343 return 1;
a322f70c 3344 }
33414a01 3345
a322f70c
DW
3346 *sbp = super;
3347 if (st->ss == NULL) {
78e44928 3348 st->ss = &super_ddf;
a322f70c
DW
3349 st->minor_version = 0;
3350 st->max_devs = 512;
3351 }
4dd2df09 3352 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3353 return 0;
3354}
2b959fbf
N
3355
3356static int load_container_ddf(struct supertype *st, int fd,
3357 char *devname)
3358{
3359 return load_super_ddf_all(st, fd, &st->sb, devname);
3360}
3361
0e600426 3362#endif /* MDASSEMBLE */
a322f70c 3363
a5c7adb3 3364static int check_secondary(const struct vcl *vc)
3365{
3366 const struct vd_config *conf = &vc->conf;
3367 int i;
3368
3369 /* The only DDF secondary RAID level md can support is
3370 * RAID 10, if the stripe sizes and Basic volume sizes
3371 * are all equal.
3372 * Other configurations could in theory be supported by exposing
3373 * the BVDs to user space and using device mapper for the secondary
3374 * mapping. So far we don't support that.
3375 */
3376
3377 __u64 sec_elements[4] = {0, 0, 0, 0};
3378#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3379#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3380
3381 if (vc->other_bvds == NULL) {
3382 pr_err("No BVDs for secondary RAID found\n");
3383 return -1;
3384 }
3385 if (conf->prl != DDF_RAID1) {
3386 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3387 return -1;
3388 }
3389 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3390 pr_err("Secondary RAID level %d is unsupported\n",
3391 conf->srl);
3392 return -1;
3393 }
3394 __set_sec_seen(conf->sec_elmnt_seq);
3395 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3396 const struct vd_config *bvd = vc->other_bvds[i];
c98567ba 3397 if (bvd == NULL)
3398 continue;
a5c7adb3 3399 if (bvd->srl != conf->srl) {
3400 pr_err("Inconsistent secondary RAID level across BVDs\n");
3401 return -1;
3402 }
3403 if (bvd->prl != conf->prl) {
3404 pr_err("Different RAID levels for BVDs are unsupported\n");
3405 return -1;
3406 }
3407 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3408 pr_err("All BVDs must have the same number of primary elements\n");
3409 return -1;
3410 }
3411 if (bvd->chunk_shift != conf->chunk_shift) {
3412 pr_err("Different strip sizes for BVDs are unsupported\n");
3413 return -1;
3414 }
3415 if (bvd->array_blocks != conf->array_blocks) {
3416 pr_err("Different BVD sizes are unsupported\n");
3417 return -1;
3418 }
3419 __set_sec_seen(bvd->sec_elmnt_seq);
3420 }
3421 for (i = 0; i < conf->sec_elmnt_count; i++) {
3422 if (!__was_sec_seen(i)) {
3423 pr_err("BVD %d is missing\n", i);
3424 return -1;
3425 }
3426 }
3427 return 0;
3428}
3429
8a38db86 3430static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
4e587018 3431 __u32 refnum, unsigned int nmax,
3432 const struct vd_config **bvd,
3433 unsigned int *idx)
8a38db86 3434{
4e587018 3435 unsigned int i, j, n, sec, cnt;
3436
3437 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3438 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3439
3440 for (i = 0, j = 0 ; i < nmax ; i++) {
3441 /* j counts valid entries for this BVD */
3442 if (vc->conf.phys_refnum[i] != 0xffffffff)
3443 j++;
3444 if (vc->conf.phys_refnum[i] == refnum) {
3445 *bvd = &vc->conf;
3446 *idx = i;
3447 return sec * cnt + j - 1;
3448 }
3449 }
3450 if (vc->other_bvds == NULL)
3451 goto bad;
3452
3453 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3454 struct vd_config *vd = vc->other_bvds[n-1];
3455 if (vd == NULL)
3456 continue;
3457 sec = vd->sec_elmnt_seq;
3458 for (i = 0, j = 0 ; i < nmax ; i++) {
3459 if (vd->phys_refnum[i] != 0xffffffff)
3460 j++;
3461 if (vd->phys_refnum[i] == refnum) {
3462 *bvd = vd;
3463 *idx = i;
3464 return sec * cnt + j - 1;
3465 }
3466 }
3467 }
3468bad:
3469 *bvd = NULL;
d6e7b083 3470 return DDF_NOTFOUND;
8a38db86 3471}
3472
00bbdbda 3473static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3474{
3475 /* Given a container loaded by load_super_ddf_all,
3476 * extract information about all the arrays into
3477 * an mdinfo tree.
3478 *
3479 * For each vcl in conflist: create an mdinfo, fill it in,
3480 * then look for matching devices (phys_refnum) in dlist
3481 * and create appropriate device mdinfo.
3482 */
3483 struct ddf_super *ddf = st->sb;
3484 struct mdinfo *rest = NULL;
3485 struct vcl *vc;
3486
3487 for (vc = ddf->conflist ; vc ; vc=vc->next)
3488 {
f21e18ca
N
3489 unsigned int i;
3490 unsigned int j;
598f0d58 3491 struct mdinfo *this;
00bbdbda 3492 char *ep;
90fa1a29 3493 __u32 *cptr;
8a38db86 3494 unsigned int pd;
00bbdbda
N
3495
3496 if (subarray &&
3497 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3498 *ep != '\0'))
3499 continue;
3500
a5c7adb3 3501 if (vc->conf.sec_elmnt_count > 1) {
3502 if (check_secondary(vc) != 0)
3503 continue;
3504 }
3505
503975b9 3506 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3507 this->next = rest;
3508 rest = this;
3509
8a2848a7 3510 if (layout_ddf2md(&vc->conf, &this->array))
3511 continue;
598f0d58 3512 this->array.md_minor = -1;
f35f2525
N
3513 this->array.major_version = -1;
3514 this->array.minor_version = -2;
90fa1a29
JS
3515 cptr = (__u32 *)(vc->conf.guid + 16);
3516 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58
NB
3517 this->array.utime = DECADE +
3518 __be32_to_cpu(vc->conf.timestamp);
3519 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3520
59e36268 3521 i = vc->vcnum;
7a7cc504
NB
3522 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3523 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3524 DDF_init_full) {
598f0d58 3525 this->array.state = 0;
ed9d66aa
NB
3526 this->resync_start = 0;
3527 } else {
598f0d58 3528 this->array.state = 1;
b7528a20 3529 this->resync_start = MaxSector;
ed9d66aa 3530 }
db42fa9b
N
3531 memcpy(this->name, ddf->virt->entries[i].name, 16);
3532 this->name[16]=0;
3533 for(j=0; j<16; j++)
3534 if (this->name[j] == ' ')
3535 this->name[j] = 0;
598f0d58
NB
3536
3537 memset(this->uuid, 0, sizeof(this->uuid));
3538 this->component_size = __be64_to_cpu(vc->conf.blocks);
3539 this->array.size = this->component_size / 2;
5f2aace8 3540 this->container_member = i;
598f0d58 3541
c5afc314
N
3542 ddf->currentconf = vc;
3543 uuid_from_super_ddf(st, this->uuid);
3544 ddf->currentconf = NULL;
3545
60f18132 3546 sprintf(this->text_version, "/%s/%d",
4dd2df09 3547 st->container_devnm, this->container_member);
60f18132 3548
8a38db86 3549 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3550 struct mdinfo *dev;
3551 struct dl *d;
4e587018 3552 const struct vd_config *bvd;
3553 unsigned int iphys;
3554 __u64 *lba_offset;
fa033bec 3555 int stt;
598f0d58 3556
8a38db86 3557 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
bc17324f 3558 continue;
0cf5ef67
N
3559
3560 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3561 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3562 != DDF_Online)
3563 continue;
3564
8a38db86 3565 i = get_pd_index_from_refnum(
4e587018 3566 vc, ddf->phys->entries[pd].refnum,
3567 ddf->mppe, &bvd, &iphys);
d6e7b083 3568 if (i == DDF_NOTFOUND)
8a38db86 3569 continue;
3570
fa033bec 3571 this->array.working_disks++;
bc17324f 3572
0cf5ef67 3573 for (d = ddf->dlist; d ; d=d->next)
8a38db86 3574 if (d->disk.refnum ==
3575 ddf->phys->entries[pd].refnum)
0cf5ef67
N
3576 break;
3577 if (d == NULL)
3578 /* Haven't found that one yet, maybe there are others */
3579 continue;
3580
503975b9 3581 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3582 dev->next = this->devs;
3583 this->devs = dev;
3584
3585 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3586 dev->disk.major = d->major;
3587 dev->disk.minor = d->minor;
3588 dev->disk.raid_disk = i;
3589 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3590 dev->recovery_start = MaxSector;
598f0d58 3591
120f7677 3592 dev->events = __be32_to_cpu(ddf->primary.seq);
4e587018 3593 lba_offset = (__u64 *)&bvd->phys_refnum[ddf->mppe];
3594 dev->data_offset = __be64_to_cpu(lba_offset[iphys]);
3595 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3596 if (d->devname)
3597 strcpy(dev->name, d->devname);
3598 }
3599 }
3600 return rest;
3601}
3602
955e9ea1 3603static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3604{
955e9ea1 3605 struct ddf_super *ddf = st->sb;
a322f70c 3606 unsigned long long dsize;
6416d527 3607 void *buf;
3d2c4fc7 3608 int rc;
a322f70c 3609
955e9ea1
DW
3610 if (!ddf)
3611 return 1;
3612
a322f70c
DW
3613 if (!get_dev_size(fd, NULL, &dsize))
3614 return 1;
3615
dbf98368 3616 if (ddf->dlist || ddf->conflist) {
3617 struct stat sta;
3618 struct dl *dl;
3619 int ofd, ret;
3620
3621 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3622 pr_err("%s: file descriptor for invalid device\n",
3623 __func__);
3624 return 1;
3625 }
3626 for (dl = ddf->dlist; dl; dl = dl->next)
3627 if (dl->major == (int)major(sta.st_rdev) &&
3628 dl->minor == (int)minor(sta.st_rdev))
3629 break;
3630 if (!dl) {
3631 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3632 (int)major(sta.st_rdev),
3633 (int)minor(sta.st_rdev));
3634 return 1;
3635 }
3636 /*
3637 For DDF, writing to just one disk makes no sense.
3638 We would run the risk of writing inconsistent meta data
3639 to the devices. So just call __write_init_super_ddf and
3640 write to all devices, including this one.
3641 Use the fd passed to this function, just in case dl->fd
3642 is invalid.
3643 */
3644 ofd = dl->fd;
3645 dl->fd = fd;
3646 ret = __write_init_super_ddf(st);
3647 dl->fd = ofd;
3648 return ret;
3649 }
3650
3d2c4fc7
DW
3651 if (posix_memalign(&buf, 512, 512) != 0)
3652 return 1;
6416d527
NB
3653 memset(buf, 0, 512);
3654
a322f70c 3655 lseek64(fd, dsize-512, 0);
3d2c4fc7 3656 rc = write(fd, buf, 512);
6416d527 3657 free(buf);
3d2c4fc7
DW
3658 if (rc < 0)
3659 return 1;
a322f70c
DW
3660 return 0;
3661}
3662
a19c88b8
NB
3663static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3664{
3665 /*
3666 * return:
3667 * 0 same, or first was empty, and second was copied
3668 * 1 second had wrong number
3669 * 2 wrong uuid
3670 * 3 wrong other info
3671 */
3672 struct ddf_super *first = st->sb;
3673 struct ddf_super *second = tst->sb;
4eefd651 3674 struct dl *dl1, *dl2;
3675 struct vcl *vl1, *vl2;
2d210697 3676 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3677
3678 if (!first) {
3679 st->sb = tst->sb;
3680 tst->sb = NULL;
3681 return 0;
3682 }
3683
3684 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3685 return 2;
3686
2d210697 3687 if (first->anchor.seq != second->anchor.seq) {
3688 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3689 __be32_to_cpu(first->anchor.seq),
3690 __be32_to_cpu(second->anchor.seq));
3691 return 3;
3692 }
3693 if (first->max_part != second->max_part ||
3694 first->phys->used_pdes != second->phys->used_pdes ||
3695 first->virt->populated_vdes != second->virt->populated_vdes) {
3696 dprintf("%s: PD/VD number mismatch\n", __func__);
3697 return 3;
3698 }
3699
3700 max_pds = __be16_to_cpu(first->phys->used_pdes);
3701 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3702 for (pd = 0; pd < max_pds; pd++)
3703 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3704 break;
3705 if (pd == max_pds) {
3706 dprintf("%s: no match for disk %08x\n", __func__,
3707 __be32_to_cpu(dl2->disk.refnum));
3708 return 3;
3709 }
3710 }
3711
3712 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3713 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3714 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3715 continue;
3716 for (vd = 0; vd < max_vds; vd++)
3717 if (!memcmp(first->virt->entries[vd].guid,
3718 vl2->conf.guid, DDF_GUID_LEN))
3719 break;
3720 if (vd == max_vds) {
3721 dprintf("%s: no match for VD config\n", __func__);
3722 return 3;
3723 }
3724 }
a19c88b8 3725 /* FIXME should I look at anything else? */
2d210697 3726
4eefd651 3727 /*
3728 At this point we are fairly sure that the meta data matches.
3729 But the new disk may contain additional local data.
3730 Add it to the super block.
3731 */
3732 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3733 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3734 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3735 DDF_GUID_LEN))
3736 break;
3737 if (vl1) {
3738 if (vl1->other_bvds != NULL &&
3739 vl1->conf.sec_elmnt_seq !=
3740 vl2->conf.sec_elmnt_seq) {
3741 dprintf("%s: adding BVD %u\n", __func__,
3742 vl2->conf.sec_elmnt_seq);
3743 add_other_bvd(vl1, &vl2->conf,
3744 first->conf_rec_len*512);
3745 }
3746 continue;
3747 }
3748
3749 if (posix_memalign((void **)&vl1, 512,
3750 (first->conf_rec_len*512 +
3751 offsetof(struct vcl, conf))) != 0) {
3752 pr_err("%s could not allocate vcl buf\n",
3753 __func__);
3754 return 3;
3755 }
3756
3757 vl1->next = first->conflist;
3758 vl1->block_sizes = NULL;
3f188b10 3759 if (vl2->conf.sec_elmnt_count > 1) {
4eefd651 3760 vl1->other_bvds = xcalloc(vl2->conf.sec_elmnt_count - 1,
3761 sizeof(struct vd_config *));
3762 } else
3763 vl1->other_bvds = NULL;
3764 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3765 vl1->lba_offset = (__u64 *)
3766 &vl1->conf.phys_refnum[first->mppe];
3767 for (vd = 0; vd < max_vds; vd++)
3768 if (!memcmp(first->virt->entries[vd].guid,
3769 vl1->conf.guid, DDF_GUID_LEN))
3770 break;
3771 vl1->vcnum = vd;
3772 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3773 first->conflist = vl1;
3774 }
3775
3776 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3777 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3778 if (dl1->disk.refnum == dl2->disk.refnum)
3779 break;
3780 if (dl1)
3781 continue;
3782
3783 if (posix_memalign((void **)&dl1, 512,
3784 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3785 != 0) {
3786 pr_err("%s could not allocate disk info buffer\n",
3787 __func__);
3788 return 3;
3789 }
3790 memcpy(dl1, dl2, sizeof(*dl1));
3791 dl1->mdupdate = NULL;
3792 dl1->next = first->dlist;
3793 dl1->fd = -1;
3794 for (pd = 0; pd < max_pds; pd++)
3795 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3796 break;
3797 dl1->pdnum = pd;
3798 if (dl2->spare) {
3799 if (posix_memalign((void **)&dl1->spare, 512,
3800 first->conf_rec_len*512) != 0) {
3801 pr_err("%s could not allocate spare info buf\n",
3802 __func__);
3803 return 3;
3804 }
3805 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3806 }
3807 for (vd = 0 ; vd < first->max_part ; vd++) {
3808 if (!dl2->vlist[vd]) {
3809 dl1->vlist[vd] = NULL;
3810 continue;
3811 }
3812 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3813 if (!memcmp(vl1->conf.guid,
3814 dl2->vlist[vd]->conf.guid,
3815 DDF_GUID_LEN))
3816 break;
3817 dl1->vlist[vd] = vl1;
3818 }
3819 }
3820 first->dlist = dl1;
3821 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3822 dl1->disk.refnum);
3823 }
3824
a19c88b8
NB
3825 return 0;
3826}
3827
0e600426 3828#ifndef MDASSEMBLE
4e5528c6
NB
3829/*
3830 * A new array 'a' has been started which claims to be instance 'inst'
3831 * within container 'c'.
3832 * We need to confirm that the array matches the metadata in 'c' so
3833 * that we don't corrupt any metadata.
3834 */
cba0191b 3835static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3836{
a2aa439e 3837 struct ddf_super *ddf = c->sb;
3838 int n = atoi(inst);
fb9d0acb 3839 if (all_ff(ddf->virt->entries[n].guid)) {
3840 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
a2aa439e 3841 return -ENODEV;
3842 }
3843 dprintf("ddf: open_new %d\n", n);
3844 a->info.container_member = n;
549e9569
NB
3845 return 0;
3846}
3847
4e5528c6
NB
3848/*
3849 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3850 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3851 * clean up to the point (in sectors). If that cannot be recorded in the
3852 * metadata, then leave it as dirty.
3853 *
3854 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3855 * !global! virtual_disk.virtual_entry structure.
3856 */
01f157d7 3857static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3858{
4e5528c6
NB
3859 struct ddf_super *ddf = a->container->sb;
3860 int inst = a->info.container_member;
18a2f463 3861 int old = ddf->virt->entries[inst].state;
01f157d7
N
3862 if (consistent == 2) {
3863 /* Should check if a recovery should be started FIXME */
3864 consistent = 1;
b7941fd6 3865 if (!is_resync_complete(&a->info))
01f157d7
N
3866 consistent = 0;
3867 }
ed9d66aa
NB
3868 if (consistent)
3869 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3870 else
4e5528c6 3871 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463 3872 if (old != ddf->virt->entries[inst].state)
7d5a7ff3 3873 ddf_set_updates_pending(ddf);
18a2f463
NB
3874
3875 old = ddf->virt->entries[inst].init_state;
ed9d66aa 3876 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 3877 if (is_resync_complete(&a->info))
ed9d66aa 3878 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 3879 else if (a->info.resync_start == 0)
ed9d66aa 3880 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 3881 else
ed9d66aa 3882 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463 3883 if (old != ddf->virt->entries[inst].init_state)
7d5a7ff3 3884 ddf_set_updates_pending(ddf);
ed9d66aa 3885
2c514b71 3886 dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
b7941fd6 3887 a->info.resync_start);
01f157d7 3888 return consistent;
fd7cde1b
DW
3889}
3890
5ec636b7 3891static int get_bvd_state(const struct ddf_super *ddf,
3892 const struct vd_config *vc)
3893{
3894 unsigned int i, n_bvd, working = 0;
3895 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
3896 int pd, st, state;
3897 for (i = 0; i < n_prim; i++) {
3898 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
3899 continue;
3900 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
3901 if (pd < 0)
3902 continue;
3903 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3904 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3905 == DDF_Online)
3906 working++;
3907 }
3908
3909 state = DDF_state_degraded;
3910 if (working == n_prim)
3911 state = DDF_state_optimal;
3912 else
3913 switch (vc->prl) {
3914 case DDF_RAID0:
3915 case DDF_CONCAT:
3916 case DDF_JBOD:
3917 state = DDF_state_failed;
3918 break;
3919 case DDF_RAID1:
3920 if (working == 0)
3921 state = DDF_state_failed;
3922 else if (working >= 2)
3923 state = DDF_state_part_optimal;
3924 break;
3925 case DDF_RAID4:
3926 case DDF_RAID5:
3927 if (working < n_prim - 1)
3928 state = DDF_state_failed;
3929 break;
3930 case DDF_RAID6:
3931 if (working < n_prim - 2)
3932 state = DDF_state_failed;
3933 else if (working == n_prim - 1)
3934 state = DDF_state_part_optimal;
3935 break;
3936 }
3937 return state;
3938}
3939
0777d17d 3940static int secondary_state(int state, int other, int seclevel)
3941{
3942 if (state == DDF_state_optimal && other == DDF_state_optimal)
3943 return DDF_state_optimal;
3944 if (seclevel == DDF_2MIRRORED) {
3945 if (state == DDF_state_optimal || other == DDF_state_optimal)
3946 return DDF_state_part_optimal;
3947 if (state == DDF_state_failed && other == DDF_state_failed)
3948 return DDF_state_failed;
3949 return DDF_state_degraded;
3950 } else {
3951 if (state == DDF_state_failed || other == DDF_state_failed)
3952 return DDF_state_failed;
3953 if (state == DDF_state_degraded || other == DDF_state_degraded)
3954 return DDF_state_degraded;
3955 return DDF_state_part_optimal;
3956 }
3957}
3958
3959static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
3960{
3961 int state = get_bvd_state(ddf, &vcl->conf);
3962 unsigned int i;
3963 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
3964 state = secondary_state(
3965 state,
3966 get_bvd_state(ddf, vcl->other_bvds[i-1]),
3967 vcl->conf.srl);
3968 }
3969 return state;
3970}
3971
7a7cc504
NB
3972/*
3973 * The state of each disk is stored in the global phys_disk structure
3974 * in phys_disk.entries[n].state.
3975 * This makes various combinations awkward.
3976 * - When a device fails in any array, it must be failed in all arrays
3977 * that include a part of this device.
3978 * - When a component is rebuilding, we cannot include it officially in the
3979 * array unless this is the only array that uses the device.
3980 *
3981 * So: when transitioning:
3982 * Online -> failed, just set failed flag. monitor will propagate
3983 * spare -> online, the device might need to be added to the array.
3984 * spare -> failed, just set failed. Don't worry if in array or not.
3985 */
8d45d196 3986static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 3987{
7a7cc504 3988 struct ddf_super *ddf = a->container->sb;
baba3f4e 3989 unsigned int inst = a->info.container_member, n_bvd;
3990 struct vcl *vcl;
3991 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
3992 &n_bvd, &vcl);
3993 int pd;
e1316fab
N
3994 struct mdinfo *mdi;
3995 struct dl *dl;
7a7cc504
NB
3996
3997 if (vc == NULL) {
2c514b71 3998 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
3999 return;
4000 }
e1316fab
N
4001 /* Find the matching slot in 'info'. */
4002 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4003 if (mdi->disk.raid_disk == n)
4004 break;
4005 if (!mdi)
4006 return;
4007
4008 /* and find the 'dl' entry corresponding to that. */
4009 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
4010 if (mdi->state_fd >= 0 &&
4011 mdi->disk.major == dl->major &&
e1316fab
N
4012 mdi->disk.minor == dl->minor)
4013 break;
4014 if (!dl)
4015 return;
4016
baba3f4e 4017 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
e1316fab
N
4018 if (pd < 0 || pd != dl->pdnum) {
4019 /* disk doesn't currently exist or has changed.
4020 * If it is now in_sync, insert it. */
baba3f4e 4021 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4022 __func__, dl->pdnum, dl->major, dl->minor,
4023 dl->disk.refnum);
4024 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4025 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
7a7cc504 4026 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
baba3f4e 4027 __u64 *lba_offset;
4028 pd = dl->pdnum; /* FIXME: is this really correct ? */
4029 vc->phys_refnum[n_bvd] = dl->disk.refnum;
4030 lba_offset = (__u64 *)&vc->phys_refnum[ddf->mppe];
4031 lba_offset[n_bvd] = mdi->data_offset;
e1316fab
N
4032 ddf->phys->entries[pd].type &=
4033 ~__cpu_to_be16(DDF_Global_Spare);
4034 ddf->phys->entries[pd].type |=
4035 __cpu_to_be16(DDF_Active_in_VD);
7d5a7ff3 4036 ddf_set_updates_pending(ddf);
7a7cc504
NB
4037 }
4038 } else {
18a2f463 4039 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
4040 if (state & DS_FAULTY)
4041 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4042 if (state & DS_INSYNC) {
4043 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4044 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4045 }
18a2f463 4046 if (old != ddf->phys->entries[pd].state)
7d5a7ff3 4047 ddf_set_updates_pending(ddf);
7a7cc504
NB
4048 }
4049
2c514b71 4050 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 4051
7a7cc504
NB
4052 /* Now we need to check the state of the array and update
4053 * virtual_disk.entries[n].state.
4054 * It needs to be one of "optimal", "degraded", "failed".
4055 * I don't understand 'deleted' or 'missing'.
4056 */
0777d17d 4057 state = get_svd_state(ddf, vcl);
7a7cc504 4058
18a2f463
NB
4059 if (ddf->virt->entries[inst].state !=
4060 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4061 | state)) {
4062
4063 ddf->virt->entries[inst].state =
4064 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4065 | state;
7d5a7ff3 4066 ddf_set_updates_pending(ddf);
18a2f463 4067 }
7a7cc504 4068
549e9569
NB
4069}
4070
2e735d19 4071static void ddf_sync_metadata(struct supertype *st)
549e9569 4072{
7a7cc504
NB
4073
4074 /*
4075 * Write all data to all devices.
4076 * Later, we might be able to track whether only local changes
4077 * have been made, or whether any global data has been changed,
4078 * but ddf is sufficiently weird that it probably always
4079 * changes global data ....
4080 */
18a2f463
NB
4081 struct ddf_super *ddf = st->sb;
4082 if (!ddf->updates_pending)
4083 return;
4084 ddf->updates_pending = 0;
1cc7f4fe 4085 __write_init_super_ddf(st);
2c514b71 4086 dprintf("ddf: sync_metadata\n");
549e9569
NB
4087}
4088
88c164f4
NB
4089static void ddf_process_update(struct supertype *st,
4090 struct metadata_update *update)
4091{
4092 /* Apply this update to the metadata.
4093 * The first 4 bytes are a DDF_*_MAGIC which guides
4094 * our actions.
4095 * Possible update are:
4096 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
4097 * Add a new physical device or remove an old one.
4098 * Changes to this record only happen implicitly.
88c164f4
NB
4099 * used_pdes is the device number.
4100 * DDF_VIRT_RECORDS_MAGIC
4101 * Add a new VD. Possibly also change the 'access' bits.
4102 * populated_vdes is the entry number.
4103 * DDF_VD_CONF_MAGIC
4104 * New or updated VD. the VIRT_RECORD must already
4105 * exist. For an update, phys_refnum and lba_offset
4106 * (at least) are updated, and the VD_CONF must
4107 * be written to precisely those devices listed with
4108 * a phys_refnum.
4109 * DDF_SPARE_ASSIGN_MAGIC
4110 * replacement Spare Assignment Record... but for which device?
4111 *
4112 * So, e.g.:
4113 * - to create a new array, we send a VIRT_RECORD and
4114 * a VD_CONF. Then assemble and start the array.
4115 * - to activate a spare we send a VD_CONF to add the phys_refnum
4116 * and offset. This will also mark the spare as active with
4117 * a spare-assignment record.
4118 */
4119 struct ddf_super *ddf = st->sb;
4120 __u32 *magic = (__u32*)update->buf;
4121 struct phys_disk *pd;
4122 struct virtual_disk *vd;
4123 struct vd_config *vc;
4124 struct vcl *vcl;
4125 struct dl *dl;
f21e18ca
N
4126 unsigned int mppe;
4127 unsigned int ent;
c7079c84 4128 unsigned int pdnum, pd2;
88c164f4 4129
2c514b71 4130 dprintf("Process update %x\n", *magic);
7e1432fb 4131
88c164f4
NB
4132 switch (*magic) {
4133 case DDF_PHYS_RECORDS_MAGIC:
4134
4135 if (update->len != (sizeof(struct phys_disk) +
4136 sizeof(struct phys_disk_entry)))
4137 return;
4138 pd = (struct phys_disk*)update->buf;
4139
4140 ent = __be16_to_cpu(pd->used_pdes);
4141 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4142 return;
4dd968cc
N
4143 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4144 struct dl **dlp;
4145 /* removing this disk. */
4146 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4147 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4148 struct dl *dl = *dlp;
4149 if (dl->pdnum == (signed)ent) {
4150 close(dl->fd);
4151 dl->fd = -1;
4152 /* FIXME this doesn't free
4153 * dl->devname */
4154 update->space = dl;
4155 *dlp = dl->next;
4156 break;
4157 }
4158 }
7d5a7ff3 4159 ddf_set_updates_pending(ddf);
4dd968cc
N
4160 return;
4161 }
88c164f4
NB
4162 if (!all_ff(ddf->phys->entries[ent].guid))
4163 return;
4164 ddf->phys->entries[ent] = pd->entries[0];
4165 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 4166 __be16_to_cpu(ddf->phys->used_pdes));
7d5a7ff3 4167 ddf_set_updates_pending(ddf);
2cc2983d
N
4168 if (ddf->add_list) {
4169 struct active_array *a;
4170 struct dl *al = ddf->add_list;
4171 ddf->add_list = al->next;
4172
4173 al->next = ddf->dlist;
4174 ddf->dlist = al;
4175
4176 /* As a device has been added, we should check
4177 * for any degraded devices that might make
4178 * use of this spare */
4179 for (a = st->arrays ; a; a=a->next)
4180 a->check_degraded = 1;
4181 }
88c164f4
NB
4182 break;
4183
4184 case DDF_VIRT_RECORDS_MAGIC:
4185
4186 if (update->len != (sizeof(struct virtual_disk) +
4187 sizeof(struct virtual_entry)))
4188 return;
4189 vd = (struct virtual_disk*)update->buf;
4190
fb9d0acb 4191 ent = find_unused_vde(ddf);
4192 if (ent == DDF_NOTFOUND)
88c164f4
NB
4193 return;
4194 ddf->virt->entries[ent] = vd->entries[0];
4195 ddf->virt->populated_vdes = __cpu_to_be16(1 +
613b0d17 4196 __be16_to_cpu(ddf->virt->populated_vdes));
7d5a7ff3 4197 ddf_set_updates_pending(ddf);
88c164f4
NB
4198 break;
4199
4200 case DDF_VD_CONF_MAGIC:
2c514b71 4201 dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
88c164f4
NB
4202
4203 mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
f21e18ca 4204 if ((unsigned)update->len != ddf->conf_rec_len * 512)
88c164f4
NB
4205 return;
4206 vc = (struct vd_config*)update->buf;
4207 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4208 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4209 break;
2c514b71 4210 dprintf("vcl = %p\n", vcl);
88c164f4
NB
4211 if (vcl) {
4212 /* An update, just copy the phys_refnum and lba_offset
4213 * fields
4214 */
4215 memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
4216 mppe * (sizeof(__u32) + sizeof(__u64)));
4217 } else {
4218 /* A new VD_CONF */
e6b9548d
DW
4219 if (!update->space)
4220 return;
88c164f4
NB
4221 vcl = update->space;
4222 update->space = NULL;
4223 vcl->next = ddf->conflist;
edd8d13c 4224 memcpy(&vcl->conf, vc, update->len);
88c164f4
NB
4225 vcl->lba_offset = (__u64*)
4226 &vcl->conf.phys_refnum[mppe];
fb9d0acb 4227 ent = find_vde_by_guid(ddf, vc->guid);
4228 if (ent == DDF_NOTFOUND)
4229 return;
4230 vcl->vcnum = ent;
88c164f4
NB
4231 ddf->conflist = vcl;
4232 }
c7079c84
N
4233 /* Set DDF_Transition on all Failed devices - to help
4234 * us detect those that are no longer in use
4235 */
4236 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4237 if (ddf->phys->entries[pdnum].state
4238 & __be16_to_cpu(DDF_Failed))
4239 ddf->phys->entries[pdnum].state
4240 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
4241 /* Now make sure vlist is correct for each dl. */
4242 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca
N
4243 unsigned int dn;
4244 unsigned int vn = 0;
8401644c 4245 int in_degraded = 0;
88c164f4
NB
4246 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4247 for (dn=0; dn < ddf->mppe ; dn++)
4248 if (vcl->conf.phys_refnum[dn] ==
4249 dl->disk.refnum) {
8401644c 4250 int vstate;
2c514b71
NB
4251 dprintf("dev %d has %p at %d\n",
4252 dl->pdnum, vcl, vn);
c7079c84
N
4253 /* Clear the Transition flag */
4254 if (ddf->phys->entries[dl->pdnum].state
4255 & __be16_to_cpu(DDF_Failed))
4256 ddf->phys->entries[dl->pdnum].state &=
4257 ~__be16_to_cpu(DDF_Transition);
4258
88c164f4 4259 dl->vlist[vn++] = vcl;
8401644c
N
4260 vstate = ddf->virt->entries[vcl->vcnum].state
4261 & DDF_state_mask;
4262 if (vstate == DDF_state_degraded ||
4263 vstate == DDF_state_part_optimal)
4264 in_degraded = 1;
88c164f4
NB
4265 break;
4266 }
4267 while (vn < ddf->max_part)
4268 dl->vlist[vn++] = NULL;
7e1432fb
NB
4269 if (dl->vlist[0]) {
4270 ddf->phys->entries[dl->pdnum].type &=
4271 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
4272 if (!(ddf->phys->entries[dl->pdnum].type &
4273 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
4274 ddf->phys->entries[dl->pdnum].type |=
4275 __cpu_to_be16(DDF_Active_in_VD);
4276 if (in_degraded)
4277 ddf->phys->entries[dl->pdnum].state |=
4278 __cpu_to_be16(DDF_Rebuilding);
4279 }
7e1432fb
NB
4280 }
4281 if (dl->spare) {
4282 ddf->phys->entries[dl->pdnum].type &=
4283 ~__cpu_to_be16(DDF_Global_Spare);
4284 ddf->phys->entries[dl->pdnum].type |=
4285 __cpu_to_be16(DDF_Spare);
4286 }
4287 if (!dl->vlist[0] && !dl->spare) {
4288 ddf->phys->entries[dl->pdnum].type |=
4289 __cpu_to_be16(DDF_Global_Spare);
4290 ddf->phys->entries[dl->pdnum].type &=
4291 ~__cpu_to_be16(DDF_Spare |
4292 DDF_Active_in_VD);
4293 }
88c164f4 4294 }
c7079c84
N
4295
4296 /* Now remove any 'Failed' devices that are not part
4297 * of any VD. They will have the Transition flag set.
4298 * Once done, we need to update all dl->pdnum numbers.
4299 */
4300 pd2 = 0;
4301 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4302 if ((ddf->phys->entries[pdnum].state
4303 & __be16_to_cpu(DDF_Failed))
4304 && (ddf->phys->entries[pdnum].state
4305 & __be16_to_cpu(DDF_Transition)))
4306 /* skip this one */;
4307 else if (pdnum == pd2)
4308 pd2++;
4309 else {
4310 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4311 for (dl = ddf->dlist; dl; dl = dl->next)
4312 if (dl->pdnum == (int)pdnum)
4313 dl->pdnum = pd2;
4314 pd2++;
4315 }
4316 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4317 while (pd2 < pdnum) {
4318 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4319 pd2++;
4320 }
4321
7d5a7ff3 4322 ddf_set_updates_pending(ddf);
88c164f4
NB
4323 break;
4324 case DDF_SPARE_ASSIGN_MAGIC:
4325 default: break;
4326 }
4327}
4328
edd8d13c
NB
4329static void ddf_prepare_update(struct supertype *st,
4330 struct metadata_update *update)
4331{
4332 /* This update arrived at managemon.
4333 * We are about to pass it to monitor.
4334 * If a malloc is needed, do it here.
4335 */
4336 struct ddf_super *ddf = st->sb;
4337 __u32 *magic = (__u32*)update->buf;
4338 if (*magic == DDF_VD_CONF_MAGIC)
e6b9548d 4339 if (posix_memalign(&update->space, 512,
613b0d17
N
4340 offsetof(struct vcl, conf)
4341 + ddf->conf_rec_len * 512) != 0)
e6b9548d 4342 update->space = NULL;
edd8d13c
NB
4343}
4344
7e1432fb
NB
4345/*
4346 * Check if the array 'a' is degraded but not failed.
4347 * If it is, find as many spares as are available and needed and
4348 * arrange for their inclusion.
4349 * We only choose devices which are not already in the array,
4350 * and prefer those with a spare-assignment to this array.
4351 * otherwise we choose global spares - assuming always that
4352 * there is enough room.
4353 * For each spare that we assign, we return an 'mdinfo' which
4354 * describes the position for the device in the array.
4355 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4356 * the new phys_refnum and lba_offset values.
4357 *
4358 * Only worry about BVDs at the moment.
4359 */
4360static struct mdinfo *ddf_activate_spare(struct active_array *a,
4361 struct metadata_update **updates)
4362{
4363 int working = 0;
4364 struct mdinfo *d;
4365 struct ddf_super *ddf = a->container->sb;
4366 int global_ok = 0;
4367 struct mdinfo *rv = NULL;
4368 struct mdinfo *di;
4369 struct metadata_update *mu;
4370 struct dl *dl;
4371 int i;
baba3f4e 4372 struct vcl *vcl;
7e1432fb
NB
4373 struct vd_config *vc;
4374 __u64 *lba;
baba3f4e 4375 unsigned int n_bvd;
7e1432fb 4376
7e1432fb
NB
4377 for (d = a->info.devs ; d ; d = d->next) {
4378 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4379 d->state_fd >= 0)
7e1432fb
NB
4380 /* wait for Removal to happen */
4381 return NULL;
4382 if (d->state_fd >= 0)
4383 working ++;
4384 }
4385
2c514b71
NB
4386 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4387 a->info.array.level);
7e1432fb
NB
4388 if (working == a->info.array.raid_disks)
4389 return NULL; /* array not degraded */
4390 switch (a->info.array.level) {
4391 case 1:
4392 if (working == 0)
4393 return NULL; /* failed */
4394 break;
4395 case 4:
4396 case 5:
4397 if (working < a->info.array.raid_disks - 1)
4398 return NULL; /* failed */
4399 break;
4400 case 6:
4401 if (working < a->info.array.raid_disks - 2)
4402 return NULL; /* failed */
4403 break;
4404 default: /* concat or stripe */
4405 return NULL; /* failed */
4406 }
4407
4408 /* For each slot, if it is not working, find a spare */
4409 dl = ddf->dlist;
4410 for (i = 0; i < a->info.array.raid_disks; i++) {
4411 for (d = a->info.devs ; d ; d = d->next)
4412 if (d->disk.raid_disk == i)
4413 break;
2c514b71 4414 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4415 if (d && (d->state_fd >= 0))
4416 continue;
4417
4418 /* OK, this device needs recovery. Find a spare */
4419 again:
4420 for ( ; dl ; dl = dl->next) {
4421 unsigned long long esize;
4422 unsigned long long pos;
4423 struct mdinfo *d2;
4424 int is_global = 0;
4425 int is_dedicated = 0;
4426 struct extent *ex;
f21e18ca 4427 unsigned int j;
7e1432fb
NB
4428 /* If in this array, skip */
4429 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4430 if (d2->state_fd >= 0 &&
4431 d2->disk.major == dl->major &&
7e1432fb 4432 d2->disk.minor == dl->minor) {
2c514b71 4433 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4434 break;
4435 }
4436 if (d2)
4437 continue;
4438 if (ddf->phys->entries[dl->pdnum].type &
4439 __cpu_to_be16(DDF_Spare)) {
4440 /* Check spare assign record */
4441 if (dl->spare) {
4442 if (dl->spare->type & DDF_spare_dedicated) {
4443 /* check spare_ents for guid */
4444 for (j = 0 ;
4445 j < __be16_to_cpu(dl->spare->populated);
4446 j++) {
4447 if (memcmp(dl->spare->spare_ents[j].guid,
4448 ddf->virt->entries[a->info.container_member].guid,
4449 DDF_GUID_LEN) == 0)
4450 is_dedicated = 1;
4451 }
4452 } else
4453 is_global = 1;
4454 }
4455 } else if (ddf->phys->entries[dl->pdnum].type &
4456 __cpu_to_be16(DDF_Global_Spare)) {
4457 is_global = 1;
e0e7aeaa
N
4458 } else if (!(ddf->phys->entries[dl->pdnum].state &
4459 __cpu_to_be16(DDF_Failed))) {
4460 /* we can possibly use some of this */
4461 is_global = 1;
7e1432fb
NB
4462 }
4463 if ( ! (is_dedicated ||
4464 (is_global && global_ok))) {
2c514b71 4465 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4466 is_dedicated, is_global);
7e1432fb
NB
4467 continue;
4468 }
4469
4470 /* We are allowed to use this device - is there space?
4471 * We need a->info.component_size sectors */
4472 ex = get_extents(ddf, dl);
4473 if (!ex) {
2c514b71 4474 dprintf("cannot get extents\n");
7e1432fb
NB
4475 continue;
4476 }
4477 j = 0; pos = 0;
4478 esize = 0;
4479
4480 do {
4481 esize = ex[j].start - pos;
4482 if (esize >= a->info.component_size)
4483 break;
e5cc7d46
N
4484 pos = ex[j].start + ex[j].size;
4485 j++;
4486 } while (ex[j-1].size);
7e1432fb
NB
4487
4488 free(ex);
4489 if (esize < a->info.component_size) {
e5cc7d46
N
4490 dprintf("%x:%x has no room: %llu %llu\n",
4491 dl->major, dl->minor,
2c514b71 4492 esize, a->info.component_size);
7e1432fb
NB
4493 /* No room */
4494 continue;
4495 }
4496
4497 /* Cool, we have a device with some space at pos */
503975b9 4498 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4499 di->disk.number = i;
4500 di->disk.raid_disk = i;
4501 di->disk.major = dl->major;
4502 di->disk.minor = dl->minor;
4503 di->disk.state = 0;
d23534e4 4504 di->recovery_start = 0;
7e1432fb
NB
4505 di->data_offset = pos;
4506 di->component_size = a->info.component_size;
4507 di->container_member = dl->pdnum;
4508 di->next = rv;
4509 rv = di;
2c514b71
NB
4510 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4511 i, pos);
7e1432fb
NB
4512
4513 break;
4514 }
4515 if (!dl && ! global_ok) {
4516 /* not enough dedicated spares, try global */
4517 global_ok = 1;
4518 dl = ddf->dlist;
4519 goto again;
4520 }
4521 }
4522
4523 if (!rv)
4524 /* No spares found */
4525 return rv;
4526 /* Now 'rv' has a list of devices to return.
4527 * Create a metadata_update record to update the
4528 * phys_refnum and lba_offset values
4529 */
503975b9
N
4530 mu = xmalloc(sizeof(*mu));
4531 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4532 free(mu);
4533 mu = NULL;
4534 }
503975b9 4535 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4536 mu->len = ddf->conf_rec_len * 512;
4537 mu->space = NULL;
f50ae22e 4538 mu->space_list = NULL;
7e1432fb 4539 mu->next = *updates;
baba3f4e 4540 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4541 &n_bvd, &vcl);
7e1432fb
NB
4542 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4543
4544 vc = (struct vd_config*)mu->buf;
4545 lba = (__u64*)&vc->phys_refnum[ddf->mppe];
4546 for (di = rv ; di ; di = di->next) {
4547 vc->phys_refnum[di->disk.raid_disk] =
4548 ddf->phys->entries[dl->pdnum].refnum;
4549 lba[di->disk.raid_disk] = di->data_offset;
4550 }
4551 *updates = mu;
4552 return rv;
4553}
0e600426 4554#endif /* MDASSEMBLE */
7e1432fb 4555
b640a252
N
4556static int ddf_level_to_layout(int level)
4557{
4558 switch(level) {
4559 case 0:
4560 case 1:
4561 return 0;
4562 case 5:
4563 return ALGORITHM_LEFT_SYMMETRIC;
4564 case 6:
4565 return ALGORITHM_ROTATING_N_CONTINUE;
4566 case 10:
4567 return 0x102;
4568 default:
4569 return UnSet;
4570 }
4571}
4572
30f58b22
DW
4573static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4574{
4575 if (level && *level == UnSet)
4576 *level = LEVEL_CONTAINER;
4577
4578 if (level && layout && *layout == UnSet)
4579 *layout = ddf_level_to_layout(*level);
4580}
4581
a322f70c
DW
4582struct superswitch super_ddf = {
4583#ifndef MDASSEMBLE
4584 .examine_super = examine_super_ddf,
4585 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4586 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4587 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4588 .detail_super = detail_super_ddf,
4589 .brief_detail_super = brief_detail_super_ddf,
4590 .validate_geometry = validate_geometry_ddf,
78e44928 4591 .write_init_super = write_init_super_ddf,
0e600426 4592 .add_to_super = add_to_super_ddf,
4dd968cc 4593 .remove_from_super = remove_from_super_ddf,
2b959fbf 4594 .load_container = load_container_ddf,
74db60b0 4595 .copy_metadata = copy_metadata_ddf,
a322f70c
DW
4596#endif
4597 .match_home = match_home_ddf,
4598 .uuid_from_super= uuid_from_super_ddf,
4599 .getinfo_super = getinfo_super_ddf,
4600 .update_super = update_super_ddf,
4601
4602 .avail_size = avail_size_ddf,
4603
a19c88b8
NB
4604 .compare_super = compare_super_ddf,
4605
a322f70c 4606 .load_super = load_super_ddf,
ba7eb04f 4607 .init_super = init_super_ddf,
955e9ea1 4608 .store_super = store_super_ddf,
a322f70c
DW
4609 .free_super = free_super_ddf,
4610 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4611 .container_content = container_content_ddf,
30f58b22 4612 .default_geometry = default_geometry_ddf,
a322f70c 4613
a322f70c 4614 .external = 1,
549e9569 4615
0e600426 4616#ifndef MDASSEMBLE
549e9569
NB
4617/* for mdmon */
4618 .open_new = ddf_open_new,
ed9d66aa 4619 .set_array_state= ddf_set_array_state,
549e9569
NB
4620 .set_disk = ddf_set_disk,
4621 .sync_metadata = ddf_sync_metadata,
88c164f4 4622 .process_update = ddf_process_update,
edd8d13c 4623 .prepare_update = ddf_prepare_update,
7e1432fb 4624 .activate_spare = ddf_activate_spare,
0e600426 4625#endif
4cce4069 4626 .name = "ddf",
a322f70c 4627};