]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: err_bad_md_layout: fix return value
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
a322f70c
DW
50/* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61/* Primary Raid Level (PRL) */
62#define DDF_RAID0 0x00
63#define DDF_RAID1 0x01
64#define DDF_RAID3 0x03
65#define DDF_RAID4 0x04
66#define DDF_RAID5 0x05
67#define DDF_RAID1E 0x11
68#define DDF_JBOD 0x0f
69#define DDF_CONCAT 0x1f
70#define DDF_RAID5E 0x15
71#define DDF_RAID5EE 0x25
59e36268 72#define DDF_RAID6 0x06
a322f70c
DW
73
74/* Raid Level Qualifier (RLQ) */
75#define DDF_RAID0_SIMPLE 0x00
76#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78#define DDF_RAID3_0 0x00 /* parity in first extent */
79#define DDF_RAID3_N 0x01 /* parity in last extent */
80#define DDF_RAID4_0 0x00 /* parity in first extent */
81#define DDF_RAID4_N 0x01 /* parity in last extent */
82/* these apply to raid5e and raid5ee as well */
83#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 84#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
85#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
87
88#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
90
91/* Secondary RAID Level (SRL) */
92#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93#define DDF_2MIRRORED 0x01
94#define DDF_2CONCAT 0x02
95#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
96
97/* Magic numbers */
98#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
108
109#define DDF_GUID_LEN 24
59e36268
NB
110#define DDF_REVISION_0 "01.00.00"
111#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
112
113struct ddf_header {
88c164f4 114 __u32 magic; /* DDF_HEADER_MAGIC */
a322f70c
DW
115 __u32 crc;
116 char guid[DDF_GUID_LEN];
59e36268 117 char revision[8]; /* 01.02.00 */
a322f70c
DW
118 __u32 seq; /* starts at '1' */
119 __u32 timestamp;
120 __u8 openflag;
121 __u8 foreignflag;
122 __u8 enforcegroups;
123 __u8 pad0; /* 0xff */
124 __u8 pad1[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext[32]; /* reserved: fill with 0xff */
127 __u64 primary_lba;
128 __u64 secondary_lba;
129 __u8 type;
130 __u8 pad2[3]; /* 0xff */
131 __u32 workspace_len; /* sectors for vendor space -
132 * at least 32768(sectors) */
133 __u64 workspace_lba;
134 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
139 *12/512) */
140 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset;
144 __u32 controller_section_length;
145 __u32 phys_section_offset;
146 __u32 phys_section_length;
147 __u32 virt_section_offset;
148 __u32 virt_section_length;
149 __u32 config_section_offset;
150 __u32 config_section_length;
151 __u32 data_section_offset;
152 __u32 data_section_length;
153 __u32 bbm_section_offset;
154 __u32 bbm_section_length;
155 __u32 diag_space_offset;
156 __u32 diag_space_length;
157 __u32 vendor_offset;
158 __u32 vendor_length;
159 /* 256 bytes so far */
160 __u8 pad4[256]; /* 0xff */
161};
162
163/* type field */
164#define DDF_HEADER_ANCHOR 0x00
165#define DDF_HEADER_PRIMARY 0x01
166#define DDF_HEADER_SECONDARY 0x02
167
168/* The content of the 'controller section' - global scope */
169struct ddf_controller_data {
88c164f4 170 __u32 magic; /* DDF_CONTROLLER_MAGIC */
a322f70c
DW
171 __u32 crc;
172 char guid[DDF_GUID_LEN];
173 struct controller_type {
174 __u16 vendor_id;
175 __u16 device_id;
176 __u16 sub_vendor_id;
177 __u16 sub_device_id;
178 } type;
179 char product_id[16];
180 __u8 pad[8]; /* 0xff */
181 __u8 vendor_data[448];
182};
183
184/* The content of phys_section - global scope */
185struct phys_disk {
88c164f4 186 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
a322f70c
DW
187 __u32 crc;
188 __u16 used_pdes;
189 __u16 max_pdes;
190 __u8 pad[52];
191 struct phys_disk_entry {
192 char guid[DDF_GUID_LEN];
193 __u32 refnum;
194 __u16 type;
195 __u16 state;
196 __u64 config_size; /* DDF structures must be after here */
197 char path[18]; /* another horrible structure really */
198 __u8 pad[6];
199 } entries[0];
200};
201
202/* phys_disk_entry.type is a bitmap - bigendian remember */
203#define DDF_Forced_PD_GUID 1
204#define DDF_Active_in_VD 2
88c164f4 205#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
206#define DDF_Spare 8 /* overrides Global_spare */
207#define DDF_Foreign 16
208#define DDF_Legacy 32 /* no DDF on this device */
209
210#define DDF_Interface_mask 0xf00
211#define DDF_Interface_SCSI 0x100
212#define DDF_Interface_SAS 0x200
213#define DDF_Interface_SATA 0x300
214#define DDF_Interface_FC 0x400
215
216/* phys_disk_entry.state is a bigendian bitmap */
217#define DDF_Online 1
218#define DDF_Failed 2 /* overrides 1,4,8 */
219#define DDF_Rebuilding 4
220#define DDF_Transition 8
221#define DDF_SMART 16
222#define DDF_ReadErrors 32
223#define DDF_Missing 64
224
225/* The content of the virt_section global scope */
226struct virtual_disk {
88c164f4 227 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
a322f70c
DW
228 __u32 crc;
229 __u16 populated_vdes;
230 __u16 max_vdes;
231 __u8 pad[52];
232 struct virtual_entry {
233 char guid[DDF_GUID_LEN];
234 __u16 unit;
235 __u16 pad0; /* 0xffff */
236 __u16 guid_crc;
237 __u16 type;
238 __u8 state;
239 __u8 init_state;
240 __u8 pad1[14];
241 char name[16];
242 } entries[0];
243};
244
245/* virtual_entry.type is a bitmap - bigendian */
246#define DDF_Shared 1
247#define DDF_Enforce_Groups 2
248#define DDF_Unicode 4
249#define DDF_Owner_Valid 8
250
251/* virtual_entry.state is a bigendian bitmap */
252#define DDF_state_mask 0x7
253#define DDF_state_optimal 0x0
254#define DDF_state_degraded 0x1
255#define DDF_state_deleted 0x2
256#define DDF_state_missing 0x3
257#define DDF_state_failed 0x4
7a7cc504 258#define DDF_state_part_optimal 0x5
a322f70c
DW
259
260#define DDF_state_morphing 0x8
261#define DDF_state_inconsistent 0x10
262
263/* virtual_entry.init_state is a bigendian bitmap */
264#define DDF_initstate_mask 0x03
265#define DDF_init_not 0x00
7a7cc504
NB
266#define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
a322f70c
DW
268#define DDF_init_full 0x02
269
270#define DDF_access_mask 0xc0
271#define DDF_access_rw 0x00
272#define DDF_access_ro 0x80
273#define DDF_access_blocked 0xc0
274
275/* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
278 */
279
280struct vd_config {
88c164f4 281 __u32 magic; /* DDF_VD_CONF_MAGIC */
a322f70c
DW
282 __u32 crc;
283 char guid[DDF_GUID_LEN];
284 __u32 timestamp;
285 __u32 seqnum;
286 __u8 pad0[24];
287 __u16 prim_elmnt_count;
288 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
289 __u8 prl;
290 __u8 rlq;
291 __u8 sec_elmnt_count;
292 __u8 sec_elmnt_seq;
293 __u8 srl;
598f0d58
NB
294 __u64 blocks; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks; /* blocks in array */
a322f70c
DW
298 __u8 pad1[8];
299 __u32 spare_refs[8];
300 __u8 cache_pol[8];
301 __u8 bg_rate;
302 __u8 pad2[3];
303 __u8 pad3[52];
304 __u8 pad4[192];
305 __u8 v0[32]; /* reserved- 0xff */
306 __u8 v1[32]; /* reserved- 0xff */
307 __u8 v2[16]; /* reserved- 0xff */
308 __u8 v3[16]; /* reserved- 0xff */
309 __u8 vendor[32];
310 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
313};
57a66662 314#define LBA_OFFSET(ddf, vd) ((__u64 *) &(vd)->phys_refnum[(ddf)->mppe])
a322f70c
DW
315
316/* vd_config.cache_pol[7] is a bitmap */
317#define DDF_cache_writeback 1 /* else writethrough */
318#define DDF_cache_wadaptive 2 /* only applies if writeback */
319#define DDF_cache_readahead 4
320#define DDF_cache_radaptive 8 /* only if doing read-ahead */
321#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
322#define DDF_cache_wallowed 32 /* enable write caching */
323#define DDF_cache_rallowed 64 /* enable read caching */
324
325struct spare_assign {
88c164f4 326 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
a322f70c
DW
327 __u32 crc;
328 __u32 timestamp;
329 __u8 reserved[7];
330 __u8 type;
331 __u16 populated; /* SAEs used */
332 __u16 max; /* max SAEs */
333 __u8 pad[8];
334 struct spare_assign_entry {
335 char guid[DDF_GUID_LEN];
336 __u16 secondary_element;
337 __u8 pad[6];
338 } spare_ents[0];
339};
340/* spare_assign.type is a bitmap */
341#define DDF_spare_dedicated 0x1 /* else global */
342#define DDF_spare_revertible 0x2 /* else committable */
343#define DDF_spare_active 0x4 /* else not active */
344#define DDF_spare_affinity 0x8 /* enclosure affinity */
345
346/* The data_section contents - local scope */
347struct disk_data {
88c164f4 348 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
a322f70c
DW
349 __u32 crc;
350 char guid[DDF_GUID_LEN];
351 __u32 refnum; /* crc of some magic drive data ... */
352 __u8 forced_ref; /* set when above was not result of magic */
353 __u8 forced_guid; /* set if guid was forced rather than magic */
354 __u8 vendor[32];
355 __u8 pad[442];
356};
357
358/* bbm_section content */
359struct bad_block_log {
360 __u32 magic;
361 __u32 crc;
362 __u16 entry_count;
363 __u32 spare_count;
364 __u8 pad[10];
365 __u64 first_spare;
366 struct mapped_block {
367 __u64 defective_start;
368 __u32 replacement_start;
369 __u16 remap_count;
370 __u8 pad[2];
371 } entries[0];
372};
373
374/* Struct for internally holding ddf structures */
375/* The DDF structure stored on each device is potentially
376 * quite different, as some data is global and some is local.
377 * The global data is:
378 * - ddf header
379 * - controller_data
380 * - Physical disk records
381 * - Virtual disk records
382 * The local data is:
383 * - Configuration records
384 * - Physical Disk data section
385 * ( and Bad block and vendor which I don't care about yet).
386 *
387 * The local data is parsed into separate lists as it is read
388 * and reconstructed for writing. This means that we only need
389 * to make config changes once and they are automatically
390 * propagated to all devices.
391 * Note that the ddf_super has space of the conf and disk data
392 * for this disk and also for a list of all such data.
393 * The list is only used for the superblock that is being
394 * built in Create or Assemble to describe the whole array.
395 */
396struct ddf_super {
6416d527 397 struct ddf_header anchor, primary, secondary;
a322f70c 398 struct ddf_controller_data controller;
6416d527 399 struct ddf_header *active;
a322f70c
DW
400 struct phys_disk *phys;
401 struct virtual_disk *virt;
402 int pdsize, vdsize;
f21e18ca 403 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 404 int currentdev;
18a2f463 405 int updates_pending;
a322f70c 406 struct vcl {
6416d527
NB
407 union {
408 char space[512];
409 struct {
410 struct vcl *next;
f21e18ca 411 unsigned int vcnum; /* index into ->virt */
8ec5d685 412 struct vd_config **other_bvds;
6416d527
NB
413 __u64 *block_sizes; /* NULL if all the same */
414 };
415 };
a322f70c 416 struct vd_config conf;
d2ca6449 417 } *conflist, *currentconf;
a322f70c 418 struct dl {
6416d527
NB
419 union {
420 char space[512];
421 struct {
422 struct dl *next;
423 int major, minor;
424 char *devname;
425 int fd;
426 unsigned long long size; /* sectors */
097bcf00 427 unsigned long long primary_lba; /* sectors */
428 unsigned long long secondary_lba; /* sectors */
429 unsigned long long workspace_lba; /* sectors */
6416d527
NB
430 int pdnum; /* index in ->phys */
431 struct spare_assign *spare;
8592f29d
N
432 void *mdupdate; /* hold metadata update */
433
434 /* These fields used by auto-layout */
435 int raiddisk; /* slot to fill in autolayout */
436 __u64 esize;
6416d527
NB
437 };
438 };
a322f70c 439 struct disk_data disk;
b2280677 440 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 441 } *dlist, *add_list;
a322f70c
DW
442};
443
444#ifndef offsetof
445#define offsetof(t,f) ((size_t)&(((t*)0)->f))
446#endif
447
7d5a7ff3 448#if DEBUG
fb9d0acb 449static int all_ff(const char *guid);
7d5a7ff3 450static void pr_state(struct ddf_super *ddf, const char *msg)
451{
452 unsigned int i;
453 dprintf("%s/%s: ", __func__, msg);
454 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
455 if (all_ff(ddf->virt->entries[i].guid))
456 continue;
457 dprintf("%u(s=%02x i=%02x) ", i,
458 ddf->virt->entries[i].state,
459 ddf->virt->entries[i].init_state);
460 }
461 dprintf("\n");
462}
463#else
464static void pr_state(const struct ddf_super *ddf, const char *msg) {}
465#endif
466
35c3606d 467static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
468{
469 ddf->updates_pending = 1;
470 ddf->active->seq = __cpu_to_be32((__be32_to_cpu(ddf->active->seq)+1));
471 pr_state(ddf, func);
472}
473
474#define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
7d5a7ff3 475
fcc22180 476static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
477 __u32 refnum, unsigned int nmax,
478 const struct vd_config **bvd,
479 unsigned int *idx);
480
f21e18ca 481static unsigned int calc_crc(void *buf, int len)
a322f70c
DW
482{
483 /* crcs are always at the same place as in the ddf_header */
484 struct ddf_header *ddf = buf;
485 __u32 oldcrc = ddf->crc;
486 __u32 newcrc;
487 ddf->crc = 0xffffffff;
488
489 newcrc = crc32(0, buf, len);
490 ddf->crc = oldcrc;
4abe6b70
N
491 /* The crc is store (like everything) bigendian, so convert
492 * here for simplicity
493 */
494 return __cpu_to_be32(newcrc);
a322f70c
DW
495}
496
a3163bf0 497#define DDF_INVALID_LEVEL 0xff
498#define DDF_NO_SECONDARY 0xff
499static int err_bad_md_layout(const mdu_array_info_t *array)
500{
501 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
502 array->level, array->layout, array->raid_disks);
2aba583f 503 return -1;
a3163bf0 504}
505
506static int layout_md2ddf(const mdu_array_info_t *array,
507 struct vd_config *conf)
508{
509 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
510 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
511 __u8 sec_elmnt_count = 1;
512 __u8 srl = DDF_NO_SECONDARY;
513
514 switch (array->level) {
515 case LEVEL_LINEAR:
516 prl = DDF_CONCAT;
517 break;
518 case 0:
519 rlq = DDF_RAID0_SIMPLE;
520 prl = DDF_RAID0;
521 break;
522 case 1:
523 switch (array->raid_disks) {
524 case 2:
525 rlq = DDF_RAID1_SIMPLE;
526 break;
527 case 3:
528 rlq = DDF_RAID1_MULTI;
529 break;
530 default:
531 return err_bad_md_layout(array);
532 }
533 prl = DDF_RAID1;
534 break;
535 case 4:
536 if (array->layout != 0)
537 return err_bad_md_layout(array);
538 rlq = DDF_RAID4_N;
539 prl = DDF_RAID4;
540 break;
541 case 5:
542 switch (array->layout) {
543 case ALGORITHM_LEFT_ASYMMETRIC:
544 rlq = DDF_RAID5_N_RESTART;
545 break;
546 case ALGORITHM_RIGHT_ASYMMETRIC:
547 rlq = DDF_RAID5_0_RESTART;
548 break;
549 case ALGORITHM_LEFT_SYMMETRIC:
550 rlq = DDF_RAID5_N_CONTINUE;
551 break;
552 case ALGORITHM_RIGHT_SYMMETRIC:
553 /* not mentioned in standard */
554 default:
555 return err_bad_md_layout(array);
556 }
557 prl = DDF_RAID5;
558 break;
559 case 6:
560 switch (array->layout) {
561 case ALGORITHM_ROTATING_N_RESTART:
562 rlq = DDF_RAID5_N_RESTART;
563 break;
564 case ALGORITHM_ROTATING_ZERO_RESTART:
565 rlq = DDF_RAID6_0_RESTART;
566 break;
567 case ALGORITHM_ROTATING_N_CONTINUE:
568 rlq = DDF_RAID5_N_CONTINUE;
569 break;
570 default:
571 return err_bad_md_layout(array);
572 }
573 prl = DDF_RAID6;
574 break;
575 case 10:
576 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
577 rlq = DDF_RAID1_SIMPLE;
578 prim_elmnt_count = __cpu_to_be16(2);
579 sec_elmnt_count = array->raid_disks / 2;
580 } else if (array->raid_disks % 3 == 0
581 && array->layout == 0x103) {
582 rlq = DDF_RAID1_MULTI;
583 prim_elmnt_count = __cpu_to_be16(3);
584 sec_elmnt_count = array->raid_disks / 3;
585 } else
586 return err_bad_md_layout(array);
587 srl = DDF_2SPANNED;
588 prl = DDF_RAID1;
589 break;
590 default:
591 return err_bad_md_layout(array);
592 }
593 conf->prl = prl;
594 conf->prim_elmnt_count = prim_elmnt_count;
595 conf->rlq = rlq;
596 conf->srl = srl;
597 conf->sec_elmnt_count = sec_elmnt_count;
598 return 0;
599}
600
8a2848a7 601static int err_bad_ddf_layout(const struct vd_config *conf)
602{
603 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
604 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
605 return -1;
606}
607
608static int layout_ddf2md(const struct vd_config *conf,
609 mdu_array_info_t *array)
610{
611 int level = LEVEL_UNSUPPORTED;
612 int layout = 0;
613 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
614
615 if (conf->sec_elmnt_count > 1) {
616 /* see also check_secondary() */
617 if (conf->prl != DDF_RAID1 ||
618 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
619 pr_err("Unsupported secondary RAID level %u/%u\n",
620 conf->prl, conf->srl);
621 return -1;
622 }
623 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
624 layout = 0x102;
625 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
626 layout = 0x103;
627 else
628 return err_bad_ddf_layout(conf);
629 raiddisks *= conf->sec_elmnt_count;
630 level = 10;
631 goto good;
632 }
633
634 switch (conf->prl) {
635 case DDF_CONCAT:
636 level = LEVEL_LINEAR;
637 break;
638 case DDF_RAID0:
639 if (conf->rlq != DDF_RAID0_SIMPLE)
640 return err_bad_ddf_layout(conf);
641 level = 0;
642 break;
643 case DDF_RAID1:
644 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
645 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
646 return err_bad_ddf_layout(conf);
647 level = 1;
648 break;
649 case DDF_RAID4:
650 if (conf->rlq != DDF_RAID4_N)
651 return err_bad_ddf_layout(conf);
652 level = 4;
653 break;
654 case DDF_RAID5:
655 switch (conf->rlq) {
656 case DDF_RAID5_N_RESTART:
657 layout = ALGORITHM_LEFT_ASYMMETRIC;
658 break;
659 case DDF_RAID5_0_RESTART:
660 layout = ALGORITHM_RIGHT_ASYMMETRIC;
661 break;
662 case DDF_RAID5_N_CONTINUE:
663 layout = ALGORITHM_LEFT_SYMMETRIC;
664 break;
665 default:
666 return err_bad_ddf_layout(conf);
667 }
668 level = 5;
669 break;
670 case DDF_RAID6:
671 switch (conf->rlq) {
672 case DDF_RAID5_N_RESTART:
673 layout = ALGORITHM_ROTATING_N_RESTART;
674 break;
675 case DDF_RAID6_0_RESTART:
676 layout = ALGORITHM_ROTATING_ZERO_RESTART;
677 break;
678 case DDF_RAID5_N_CONTINUE:
679 layout = ALGORITHM_ROTATING_N_CONTINUE;
680 break;
681 default:
682 return err_bad_ddf_layout(conf);
683 }
684 level = 6;
685 break;
686 default:
687 return err_bad_ddf_layout(conf);
688 };
689
690good:
691 array->level = level;
692 array->layout = layout;
693 array->raid_disks = raiddisks;
694 return 0;
695}
696
a322f70c
DW
697static int load_ddf_header(int fd, unsigned long long lba,
698 unsigned long long size,
699 int type,
700 struct ddf_header *hdr, struct ddf_header *anchor)
701{
702 /* read a ddf header (primary or secondary) from fd/lba
703 * and check that it is consistent with anchor
704 * Need to check:
705 * magic, crc, guid, rev, and LBA's header_type, and
706 * everything after header_type must be the same
707 */
708 if (lba >= size-1)
709 return 0;
710
711 if (lseek64(fd, lba<<9, 0) < 0)
712 return 0;
713
714 if (read(fd, hdr, 512) != 512)
715 return 0;
716
717 if (hdr->magic != DDF_HEADER_MAGIC)
718 return 0;
719 if (calc_crc(hdr, 512) != hdr->crc)
720 return 0;
721 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
722 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
723 anchor->primary_lba != hdr->primary_lba ||
724 anchor->secondary_lba != hdr->secondary_lba ||
725 hdr->type != type ||
726 memcmp(anchor->pad2, hdr->pad2, 512 -
727 offsetof(struct ddf_header, pad2)) != 0)
728 return 0;
729
730 /* Looks good enough to me... */
731 return 1;
732}
733
734static void *load_section(int fd, struct ddf_super *super, void *buf,
735 __u32 offset_be, __u32 len_be, int check)
736{
737 unsigned long long offset = __be32_to_cpu(offset_be);
738 unsigned long long len = __be32_to_cpu(len_be);
739 int dofree = (buf == NULL);
740
741 if (check)
742 if (len != 2 && len != 8 && len != 32
743 && len != 128 && len != 512)
744 return NULL;
745
746 if (len > 1024)
747 return NULL;
748 if (buf) {
749 /* All pre-allocated sections are a single block */
750 if (len != 1)
751 return NULL;
3d2c4fc7
DW
752 } else if (posix_memalign(&buf, 512, len<<9) != 0)
753 buf = NULL;
6416d527 754
a322f70c
DW
755 if (!buf)
756 return NULL;
757
758 if (super->active->type == 1)
759 offset += __be64_to_cpu(super->active->primary_lba);
760 else
761 offset += __be64_to_cpu(super->active->secondary_lba);
762
f21e18ca 763 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
764 if (dofree)
765 free(buf);
766 return NULL;
767 }
f21e18ca 768 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
769 if (dofree)
770 free(buf);
771 return NULL;
772 }
773 return buf;
774}
775
776static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
777{
778 unsigned long long dsize;
779
780 get_dev_size(fd, NULL, &dsize);
781
782 if (lseek64(fd, dsize-512, 0) < 0) {
783 if (devname)
e7b84f9d
N
784 pr_err("Cannot seek to anchor block on %s: %s\n",
785 devname, strerror(errno));
a322f70c
DW
786 return 1;
787 }
788 if (read(fd, &super->anchor, 512) != 512) {
789 if (devname)
e7b84f9d
N
790 pr_err("Cannot read anchor block on %s: %s\n",
791 devname, strerror(errno));
a322f70c
DW
792 return 1;
793 }
794 if (super->anchor.magic != DDF_HEADER_MAGIC) {
795 if (devname)
e7b84f9d 796 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
797 devname);
798 return 2;
799 }
800 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
801 if (devname)
e7b84f9d 802 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
803 devname);
804 return 2;
805 }
59e36268
NB
806 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
807 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 808 if (devname)
e7b84f9d 809 pr_err("can only support super revision"
59e36268
NB
810 " %.8s and earlier, not %.8s on %s\n",
811 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
812 return 2;
813 }
dbeb699a 814 super->active = NULL;
a322f70c
DW
815 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
816 dsize >> 9, 1,
817 &super->primary, &super->anchor) == 0) {
818 if (devname)
e7b84f9d
N
819 pr_err("Failed to load primary DDF header "
820 "on %s\n", devname);
dbeb699a 821 } else
822 super->active = &super->primary;
a322f70c
DW
823 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
824 dsize >> 9, 2,
825 &super->secondary, &super->anchor)) {
3eff7c1d
N
826 if (super->active == NULL
827 || (__be32_to_cpu(super->primary.seq)
828 < __be32_to_cpu(super->secondary.seq) &&
829 !super->secondary.openflag)
a322f70c
DW
830 || (__be32_to_cpu(super->primary.seq)
831 == __be32_to_cpu(super->secondary.seq) &&
832 super->primary.openflag && !super->secondary.openflag)
833 )
834 super->active = &super->secondary;
dbeb699a 835 } else if (devname)
836 pr_err("Failed to load secondary DDF header on %s\n",
837 devname);
838 if (super->active == NULL)
839 return 2;
a322f70c
DW
840 return 0;
841}
842
843static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
844{
845 void *ok;
846 ok = load_section(fd, super, &super->controller,
847 super->active->controller_section_offset,
848 super->active->controller_section_length,
849 0);
850 super->phys = load_section(fd, super, NULL,
851 super->active->phys_section_offset,
852 super->active->phys_section_length,
853 1);
854 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
855
856 super->virt = load_section(fd, super, NULL,
857 super->active->virt_section_offset,
858 super->active->virt_section_length,
859 1);
860 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
861 if (!ok ||
862 !super->phys ||
863 !super->virt) {
864 free(super->phys);
865 free(super->virt);
a2349791
NB
866 super->phys = NULL;
867 super->virt = NULL;
a322f70c
DW
868 return 2;
869 }
870 super->conflist = NULL;
871 super->dlist = NULL;
8c3b8c2c
NB
872
873 super->max_part = __be16_to_cpu(super->active->max_partitions);
874 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
875 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
876 return 0;
877}
878
3c48f7be 879#define DDF_UNUSED_BVD 0xff
880static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
881{
882 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
883 unsigned int i, vdsize;
884 void *p;
885 if (n_vds == 0) {
886 vcl->other_bvds = NULL;
887 return 0;
888 }
889 vdsize = ddf->conf_rec_len * 512;
890 if (posix_memalign(&p, 512, n_vds *
891 (vdsize + sizeof(struct vd_config *))) != 0)
892 return -1;
893 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
894 for (i = 0; i < n_vds; i++) {
895 vcl->other_bvds[i] = p + i * vdsize;
896 memset(vcl->other_bvds[i], 0, vdsize);
897 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
898 }
899 return 0;
900}
901
3dc821b0 902static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
903 unsigned int len)
904{
905 int i;
906 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 907 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
3dc821b0 908 break;
909
910 if (i < vcl->conf.sec_elmnt_count-1) {
911 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
912 return;
913 } else {
914 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 915 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
3dc821b0 916 break;
917 if (i == vcl->conf.sec_elmnt_count-1) {
918 pr_err("no space for sec level config %u, count is %u\n",
919 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
920 return;
921 }
3dc821b0 922 }
923 memcpy(vcl->other_bvds[i], vd, len);
924}
925
a322f70c
DW
926static int load_ddf_local(int fd, struct ddf_super *super,
927 char *devname, int keep)
928{
929 struct dl *dl;
930 struct stat stb;
931 char *conf;
f21e18ca
N
932 unsigned int i;
933 unsigned int confsec;
b2280677 934 int vnum;
f21e18ca 935 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 936 unsigned long long dsize;
a322f70c
DW
937
938 /* First the local disk info */
3d2c4fc7 939 if (posix_memalign((void**)&dl, 512,
6416d527 940 sizeof(*dl) +
3d2c4fc7 941 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 942 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
943 __func__);
944 return 1;
945 }
a322f70c
DW
946
947 load_section(fd, super, &dl->disk,
948 super->active->data_section_offset,
949 super->active->data_section_length,
950 0);
503975b9 951 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 952
a322f70c
DW
953 fstat(fd, &stb);
954 dl->major = major(stb.st_rdev);
955 dl->minor = minor(stb.st_rdev);
956 dl->next = super->dlist;
957 dl->fd = keep ? fd : -1;
d2ca6449
NB
958
959 dl->size = 0;
960 if (get_dev_size(fd, devname, &dsize))
961 dl->size = dsize >> 9;
097bcf00 962 /* If the disks have different sizes, the LBAs will differ
963 * between phys disks.
964 * At this point here, the values in super->active must be valid
965 * for this phys disk. */
966 dl->primary_lba = super->active->primary_lba;
967 dl->secondary_lba = super->active->secondary_lba;
968 dl->workspace_lba = super->active->workspace_lba;
b2280677 969 dl->spare = NULL;
f21e18ca 970 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
971 dl->vlist[i] = NULL;
972 super->dlist = dl;
59e36268 973 dl->pdnum = -1;
f21e18ca 974 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
975 if (memcmp(super->phys->entries[i].guid,
976 dl->disk.guid, DDF_GUID_LEN) == 0)
977 dl->pdnum = i;
978
a322f70c
DW
979 /* Now the config list. */
980 /* 'conf' is an array of config entries, some of which are
981 * probably invalid. Those which are good need to be copied into
982 * the conflist
983 */
a322f70c
DW
984
985 conf = load_section(fd, super, NULL,
986 super->active->config_section_offset,
987 super->active->config_section_length,
988 0);
989
b2280677 990 vnum = 0;
e223334f
N
991 for (confsec = 0;
992 confsec < __be32_to_cpu(super->active->config_section_length);
993 confsec += super->conf_rec_len) {
a322f70c 994 struct vd_config *vd =
e223334f 995 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
996 struct vcl *vcl;
997
b2280677
NB
998 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
999 if (dl->spare)
1000 continue;
3d2c4fc7
DW
1001 if (posix_memalign((void**)&dl->spare, 512,
1002 super->conf_rec_len*512) != 0) {
e7b84f9d
N
1003 pr_err("%s could not allocate spare info buf\n",
1004 __func__);
3d2c4fc7
DW
1005 return 1;
1006 }
613b0d17 1007
b2280677
NB
1008 memcpy(dl->spare, vd, super->conf_rec_len*512);
1009 continue;
1010 }
a322f70c
DW
1011 if (vd->magic != DDF_VD_CONF_MAGIC)
1012 continue;
1013 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1014 if (memcmp(vcl->conf.guid,
1015 vd->guid, DDF_GUID_LEN) == 0)
1016 break;
1017 }
1018
1019 if (vcl) {
b2280677 1020 dl->vlist[vnum++] = vcl;
3dc821b0 1021 if (vcl->other_bvds != NULL &&
1022 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1023 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1024 continue;
1025 }
a322f70c
DW
1026 if (__be32_to_cpu(vd->seqnum) <=
1027 __be32_to_cpu(vcl->conf.seqnum))
1028 continue;
59e36268 1029 } else {
3d2c4fc7 1030 if (posix_memalign((void**)&vcl, 512,
6416d527 1031 (super->conf_rec_len*512 +
3d2c4fc7 1032 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
1033 pr_err("%s could not allocate vcl buf\n",
1034 __func__);
3d2c4fc7
DW
1035 return 1;
1036 }
a322f70c 1037 vcl->next = super->conflist;
59e36268 1038 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
3c48f7be 1039 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1040 if (alloc_other_bvds(super, vcl) != 0) {
1041 pr_err("%s could not allocate other bvds\n",
1042 __func__);
1043 free(vcl);
1044 return 1;
1045 };
a322f70c 1046 super->conflist = vcl;
b2280677 1047 dl->vlist[vnum++] = vcl;
a322f70c 1048 }
8c3b8c2c 1049 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
59e36268
NB
1050 for (i=0; i < max_virt_disks ; i++)
1051 if (memcmp(super->virt->entries[i].guid,
1052 vcl->conf.guid, DDF_GUID_LEN)==0)
1053 break;
1054 if (i < max_virt_disks)
1055 vcl->vcnum = i;
a322f70c
DW
1056 }
1057 free(conf);
1058
1059 return 0;
1060}
1061
1062#ifndef MDASSEMBLE
1063static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 1064 void **sbp, char *devname);
a322f70c 1065#endif
37424f13
DW
1066
1067static void free_super_ddf(struct supertype *st);
1068
a322f70c
DW
1069static int load_super_ddf(struct supertype *st, int fd,
1070 char *devname)
1071{
1072 unsigned long long dsize;
1073 struct ddf_super *super;
1074 int rv;
1075
a322f70c
DW
1076 if (get_dev_size(fd, devname, &dsize) == 0)
1077 return 1;
1078
b31df436 1079 if (!st->ignore_hw_compat && test_partition(fd))
691c6ee1
N
1080 /* DDF is not allowed on partitions */
1081 return 1;
1082
a322f70c
DW
1083 /* 32M is a lower bound */
1084 if (dsize <= 32*1024*1024) {
97320d7c 1085 if (devname)
e7b84f9d
N
1086 pr_err("%s is too small for ddf: "
1087 "size is %llu sectors.\n",
1088 devname, dsize>>9);
97320d7c 1089 return 1;
a322f70c
DW
1090 }
1091 if (dsize & 511) {
97320d7c 1092 if (devname)
e7b84f9d
N
1093 pr_err("%s is an odd size for ddf: "
1094 "size is %llu bytes.\n",
1095 devname, dsize);
97320d7c 1096 return 1;
a322f70c
DW
1097 }
1098
37424f13
DW
1099 free_super_ddf(st);
1100
6416d527 1101 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 1102 pr_err("malloc of %zu failed.\n",
a322f70c
DW
1103 sizeof(*super));
1104 return 1;
1105 }
a2349791 1106 memset(super, 0, sizeof(*super));
a322f70c
DW
1107
1108 rv = load_ddf_headers(fd, super, devname);
1109 if (rv) {
1110 free(super);
1111 return rv;
1112 }
1113
1114 /* Have valid headers and have chosen the best. Let's read in the rest*/
1115
1116 rv = load_ddf_global(fd, super, devname);
1117
1118 if (rv) {
1119 if (devname)
e7b84f9d
N
1120 pr_err("Failed to load all information "
1121 "sections on %s\n", devname);
a322f70c
DW
1122 free(super);
1123 return rv;
1124 }
1125
3d2c4fc7
DW
1126 rv = load_ddf_local(fd, super, devname, 0);
1127
1128 if (rv) {
1129 if (devname)
e7b84f9d
N
1130 pr_err("Failed to load all information "
1131 "sections on %s\n", devname);
3d2c4fc7
DW
1132 free(super);
1133 return rv;
1134 }
a322f70c
DW
1135
1136 /* Should possibly check the sections .... */
1137
1138 st->sb = super;
1139 if (st->ss == NULL) {
1140 st->ss = &super_ddf;
1141 st->minor_version = 0;
1142 st->max_devs = 512;
1143 }
1144 return 0;
1145
1146}
1147
1148static void free_super_ddf(struct supertype *st)
1149{
1150 struct ddf_super *ddf = st->sb;
1151 if (ddf == NULL)
1152 return;
1153 free(ddf->phys);
1154 free(ddf->virt);
1155 while (ddf->conflist) {
1156 struct vcl *v = ddf->conflist;
1157 ddf->conflist = v->next;
59e36268
NB
1158 if (v->block_sizes)
1159 free(v->block_sizes);
3c48f7be 1160 if (v->other_bvds)
1161 /*
1162 v->other_bvds[0] points to beginning of buffer,
1163 see alloc_other_bvds()
1164 */
1165 free(v->other_bvds[0]);
a322f70c
DW
1166 free(v);
1167 }
1168 while (ddf->dlist) {
1169 struct dl *d = ddf->dlist;
1170 ddf->dlist = d->next;
1171 if (d->fd >= 0)
1172 close(d->fd);
b2280677
NB
1173 if (d->spare)
1174 free(d->spare);
a322f70c
DW
1175 free(d);
1176 }
8a38cb04
N
1177 while (ddf->add_list) {
1178 struct dl *d = ddf->add_list;
1179 ddf->add_list = d->next;
1180 if (d->fd >= 0)
1181 close(d->fd);
1182 if (d->spare)
1183 free(d->spare);
1184 free(d);
1185 }
a322f70c
DW
1186 free(ddf);
1187 st->sb = NULL;
1188}
1189
1190static struct supertype *match_metadata_desc_ddf(char *arg)
1191{
1192 /* 'ddf' only support containers */
1193 struct supertype *st;
1194 if (strcmp(arg, "ddf") != 0 &&
1195 strcmp(arg, "default") != 0
1196 )
1197 return NULL;
1198
503975b9 1199 st = xcalloc(1, sizeof(*st));
a322f70c
DW
1200 st->ss = &super_ddf;
1201 st->max_devs = 512;
1202 st->minor_version = 0;
1203 st->sb = NULL;
1204 return st;
1205}
1206
a322f70c
DW
1207#ifndef MDASSEMBLE
1208
1209static mapping_t ddf_state[] = {
1210 { "Optimal", 0},
1211 { "Degraded", 1},
1212 { "Deleted", 2},
1213 { "Missing", 3},
1214 { "Failed", 4},
1215 { "Partially Optimal", 5},
1216 { "-reserved-", 6},
1217 { "-reserved-", 7},
1218 { NULL, 0}
1219};
1220
1221static mapping_t ddf_init_state[] = {
1222 { "Not Initialised", 0},
1223 { "QuickInit in Progress", 1},
1224 { "Fully Initialised", 2},
1225 { "*UNKNOWN*", 3},
1226 { NULL, 0}
1227};
1228static mapping_t ddf_access[] = {
1229 { "Read/Write", 0},
1230 { "Reserved", 1},
1231 { "Read Only", 2},
1232 { "Blocked (no access)", 3},
1233 { NULL ,0}
1234};
1235
1236static mapping_t ddf_level[] = {
1237 { "RAID0", DDF_RAID0},
1238 { "RAID1", DDF_RAID1},
1239 { "RAID3", DDF_RAID3},
1240 { "RAID4", DDF_RAID4},
1241 { "RAID5", DDF_RAID5},
1242 { "RAID1E",DDF_RAID1E},
1243 { "JBOD", DDF_JBOD},
1244 { "CONCAT",DDF_CONCAT},
1245 { "RAID5E",DDF_RAID5E},
1246 { "RAID5EE",DDF_RAID5EE},
1247 { "RAID6", DDF_RAID6},
1248 { NULL, 0}
1249};
1250static mapping_t ddf_sec_level[] = {
1251 { "Striped", DDF_2STRIPED},
1252 { "Mirrored", DDF_2MIRRORED},
1253 { "Concat", DDF_2CONCAT},
1254 { "Spanned", DDF_2SPANNED},
1255 { NULL, 0}
1256};
1257#endif
1258
fb9d0acb 1259static int all_ff(const char *guid)
42dc2744
N
1260{
1261 int i;
1262 for (i = 0; i < DDF_GUID_LEN; i++)
1263 if (guid[i] != (char)0xff)
1264 return 0;
1265 return 1;
1266}
1267
a322f70c
DW
1268#ifndef MDASSEMBLE
1269static void print_guid(char *guid, int tstamp)
1270{
1271 /* A GUIDs are part (or all) ASCII and part binary.
1272 * They tend to be space padded.
59e36268
NB
1273 * We print the GUID in HEX, then in parentheses add
1274 * any initial ASCII sequence, and a possible
1275 * time stamp from bytes 16-19
a322f70c
DW
1276 */
1277 int l = DDF_GUID_LEN;
1278 int i;
59e36268
NB
1279
1280 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1281 if ((i&3)==0 && i != 0) printf(":");
1282 printf("%02X", guid[i]&255);
1283 }
1284
cfccea8c 1285 printf("\n (");
a322f70c
DW
1286 while (l && guid[l-1] == ' ')
1287 l--;
1288 for (i=0 ; i<l ; i++) {
1289 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1290 fputc(guid[i], stdout);
1291 else
59e36268 1292 break;
a322f70c
DW
1293 }
1294 if (tstamp) {
1295 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1296 char tbuf[100];
1297 struct tm *tm;
1298 tm = localtime(&then);
59e36268 1299 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1300 fputs(tbuf, stdout);
1301 }
59e36268 1302 printf(")");
a322f70c
DW
1303}
1304
be9b9ef4 1305static const char *guid_str(const char *guid)
1306{
1307 static char buf[DDF_GUID_LEN*2+1];
1308 int i;
1309 char *p = buf;
4a03cbd1 1310 for (i = 0; i < DDF_GUID_LEN; i++) {
1311 unsigned char c = guid[i];
1312 if (c >= 32 && c < 127)
1313 p += sprintf(p, "%c", c);
1314 else
1315 p += sprintf(p, "%02x", c);
1316 }
be9b9ef4 1317 *p = '\0';
1318 return (const char *) buf;
1319}
1320
a322f70c
DW
1321static void examine_vd(int n, struct ddf_super *sb, char *guid)
1322{
8c3b8c2c 1323 int crl = sb->conf_rec_len;
a322f70c
DW
1324 struct vcl *vcl;
1325
1326 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1327 unsigned int i;
a322f70c
DW
1328 struct vd_config *vc = &vcl->conf;
1329
1330 if (calc_crc(vc, crl*512) != vc->crc)
1331 continue;
1332 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1333 continue;
1334
1335 /* Ok, we know about this VD, let's give more details */
b06e3095 1336 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1337 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1338 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1339 int j;
1340 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1341 for (j=0; j<cnt; j++)
1342 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1343 break;
1344 if (i) printf(" ");
1345 if (j < cnt)
1346 printf("%d", j);
1347 else
1348 printf("--");
1349 }
1350 printf(")\n");
1351 if (vc->chunk_shift != 255)
613b0d17
N
1352 printf(" Chunk Size[%d] : %d sectors\n", n,
1353 1 << vc->chunk_shift);
a322f70c
DW
1354 printf(" Raid Level[%d] : %s\n", n,
1355 map_num(ddf_level, vc->prl)?:"-unknown-");
1356 if (vc->sec_elmnt_count != 1) {
1357 printf(" Secondary Position[%d] : %d of %d\n", n,
1358 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1359 printf(" Secondary Level[%d] : %s\n", n,
1360 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1361 }
1362 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1363 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1364 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1365 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1366 }
1367}
1368
1369static void examine_vds(struct ddf_super *sb)
1370{
1371 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
fb9d0acb 1372 unsigned int i;
a322f70c
DW
1373 printf(" Virtual Disks : %d\n", cnt);
1374
fb9d0acb 1375 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
a322f70c 1376 struct virtual_entry *ve = &sb->virt->entries[i];
fb9d0acb 1377 if (all_ff(ve->guid))
1378 continue;
b06e3095 1379 printf("\n");
a322f70c
DW
1380 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1381 printf("\n");
1382 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1383 printf(" state[%d] : %s, %s%s\n", i,
1384 map_num(ddf_state, ve->state & 7),
1385 (ve->state & 8) ? "Morphing, ": "",
1386 (ve->state & 16)? "Not Consistent" : "Consistent");
1387 printf(" init state[%d] : %s\n", i,
1388 map_num(ddf_init_state, ve->init_state&3));
1389 printf(" access[%d] : %s\n", i,
1390 map_num(ddf_access, (ve->init_state>>6) & 3));
1391 printf(" Name[%d] : %.16s\n", i, ve->name);
1392 examine_vd(i, sb, ve->guid);
1393 }
1394 if (cnt) printf("\n");
1395}
1396
1397static void examine_pds(struct ddf_super *sb)
1398{
1399 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1400 int i;
1401 struct dl *dl;
1402 printf(" Physical Disks : %d\n", cnt);
962371a5 1403 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1404
1405 for (i=0 ; i<cnt ; i++) {
1406 struct phys_disk_entry *pd = &sb->phys->entries[i];
1407 int type = __be16_to_cpu(pd->type);
1408 int state = __be16_to_cpu(pd->state);
1409
b06e3095
N
1410 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1411 //printf("\n");
1412 printf(" %3d %08x ", i,
a322f70c 1413 __be32_to_cpu(pd->refnum));
613b0d17 1414 printf("%8lluK ",
c9b6907b 1415 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095
N
1416 for (dl = sb->dlist; dl ; dl = dl->next) {
1417 if (dl->disk.refnum == pd->refnum) {
1418 char *dv = map_dev(dl->major, dl->minor, 0);
1419 if (dv) {
962371a5 1420 printf("%-15s", dv);
b06e3095
N
1421 break;
1422 }
1423 }
1424 }
1425 if (!dl)
962371a5 1426 printf("%15s","");
b06e3095 1427 printf(" %s%s%s%s%s",
a322f70c 1428 (type&2) ? "active":"",
b06e3095 1429 (type&4) ? "Global-Spare":"",
a322f70c
DW
1430 (type&8) ? "spare" : "",
1431 (type&16)? ", foreign" : "",
1432 (type&32)? "pass-through" : "");
18cb4496
N
1433 if (state & DDF_Failed)
1434 /* This over-rides these three */
1435 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1436 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1437 (state&1)? "Online": "Offline",
1438 (state&2)? ", Failed": "",
1439 (state&4)? ", Rebuilding": "",
1440 (state&8)? ", in-transition": "",
b06e3095
N
1441 (state&16)? ", SMART-errors": "",
1442 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1443 (state&64)? ", Missing" : "");
a322f70c
DW
1444 printf("\n");
1445 }
1446}
1447
1448static void examine_super_ddf(struct supertype *st, char *homehost)
1449{
1450 struct ddf_super *sb = st->sb;
1451
1452 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1453 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1454 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1455 printf("\n");
1456 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c
DW
1457 printf("\n");
1458 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1459 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1460 ?"yes" : "no");
1461 examine_vds(sb);
1462 examine_pds(sb);
1463}
1464
a5d85af7 1465static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1466
bedbf68a 1467static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1468static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1469
bedbf68a 1470static unsigned int get_vd_num_of_subarray(struct supertype *st)
1471{
1472 /*
1473 * Figure out the VD number for this supertype.
1474 * Returns DDF_CONTAINER for the container itself,
1475 * and DDF_NOTFOUND on error.
1476 */
1477 struct ddf_super *ddf = st->sb;
1478 struct mdinfo *sra;
1479 char *sub, *end;
1480 unsigned int vcnum;
1481
1482 if (*st->container_devnm == '\0')
1483 return DDF_CONTAINER;
1484
1485 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1486 if (!sra || sra->array.major_version != -1 ||
1487 sra->array.minor_version != -2 ||
1488 !is_subarray(sra->text_version))
1489 return DDF_NOTFOUND;
1490
1491 sub = strchr(sra->text_version + 1, '/');
1492 if (sub != NULL)
1493 vcnum = strtoul(sub + 1, &end, 10);
1494 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1495 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1496 return DDF_NOTFOUND;
1497
1498 return vcnum;
1499}
1500
061f2c6a 1501static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1502{
1503 /* We just write a generic DDF ARRAY entry
1504 */
1505 struct mdinfo info;
1506 char nbuf[64];
a5d85af7 1507 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1508 fname_from_uuid(st, &info, nbuf, ':');
1509
1510 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1511}
1512
1513static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1514{
1515 /* We just write a generic DDF ARRAY entry
a322f70c 1516 */
42dc2744 1517 struct ddf_super *ddf = st->sb;
ff54de6e 1518 struct mdinfo info;
f21e18ca 1519 unsigned int i;
ff54de6e 1520 char nbuf[64];
a5d85af7 1521 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1522 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1523
f21e18ca 1524 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1525 struct virtual_entry *ve = &ddf->virt->entries[i];
1526 struct vcl vcl;
1527 char nbuf1[64];
1528 if (all_ff(ve->guid))
1529 continue;
1530 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1531 ddf->currentconf =&vcl;
1532 uuid_from_super_ddf(st, info.uuid);
1533 fname_from_uuid(st, &info, nbuf1, ':');
1534 printf("ARRAY container=%s member=%d UUID=%s\n",
1535 nbuf+5, i, nbuf1+5);
1536 }
a322f70c
DW
1537}
1538
bceedeec
N
1539static void export_examine_super_ddf(struct supertype *st)
1540{
1541 struct mdinfo info;
1542 char nbuf[64];
a5d85af7 1543 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1544 fname_from_uuid(st, &info, nbuf, ':');
1545 printf("MD_METADATA=ddf\n");
1546 printf("MD_LEVEL=container\n");
1547 printf("MD_UUID=%s\n", nbuf+5);
1548}
bceedeec 1549
74db60b0
N
1550static int copy_metadata_ddf(struct supertype *st, int from, int to)
1551{
1552 void *buf;
1553 unsigned long long dsize, offset;
1554 int bytes;
1555 struct ddf_header *ddf;
1556 int written = 0;
1557
1558 /* The meta consists of an anchor, a primary, and a secondary.
1559 * This all lives at the end of the device.
1560 * So it is easiest to find the earliest of primary and
1561 * secondary, and copy everything from there.
1562 *
1563 * Anchor is 512 from end It contains primary_lba and secondary_lba
1564 * we choose one of those
1565 */
1566
1567 if (posix_memalign(&buf, 4096, 4096) != 0)
1568 return 1;
1569
1570 if (!get_dev_size(from, NULL, &dsize))
1571 goto err;
1572
1573 if (lseek64(from, dsize-512, 0) < 0)
1574 goto err;
1575 if (read(from, buf, 512) != 512)
1576 goto err;
1577 ddf = buf;
1578 if (ddf->magic != DDF_HEADER_MAGIC ||
1579 calc_crc(ddf, 512) != ddf->crc ||
1580 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1581 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1582 goto err;
1583
1584 offset = dsize - 512;
1585 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1586 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1587 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1588 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1589
1590 bytes = dsize - offset;
1591
1592 if (lseek64(from, offset, 0) < 0 ||
1593 lseek64(to, offset, 0) < 0)
1594 goto err;
1595 while (written < bytes) {
1596 int n = bytes - written;
1597 if (n > 4096)
1598 n = 4096;
1599 if (read(from, buf, n) != n)
1600 goto err;
1601 if (write(to, buf, n) != n)
1602 goto err;
1603 written += n;
1604 }
1605 free(buf);
1606 return 0;
1607err:
1608 free(buf);
1609 return 1;
1610}
1611
a322f70c
DW
1612static void detail_super_ddf(struct supertype *st, char *homehost)
1613{
1614 /* FIXME later
1615 * Could print DDF GUID
1616 * Need to find which array
1617 * If whole, briefly list all arrays
1618 * If one, give name
1619 */
1620}
1621
1622static void brief_detail_super_ddf(struct supertype *st)
1623{
ff54de6e
N
1624 struct mdinfo info;
1625 char nbuf[64];
bedbf68a 1626 struct ddf_super *ddf = st->sb;
1627 unsigned int vcnum = get_vd_num_of_subarray(st);
1628 if (vcnum == DDF_CONTAINER)
1629 uuid_from_super_ddf(st, info.uuid);
1630 else if (vcnum == DDF_NOTFOUND)
1631 return;
1632 else
1633 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
ff54de6e
N
1634 fname_from_uuid(st, &info, nbuf,':');
1635 printf(" UUID=%s", nbuf + 5);
a322f70c 1636}
a322f70c
DW
1637#endif
1638
1639static int match_home_ddf(struct supertype *st, char *homehost)
1640{
1641 /* It matches 'this' host if the controller is a
1642 * Linux-MD controller with vendor_data matching
1643 * the hostname
1644 */
1645 struct ddf_super *ddf = st->sb;
f21e18ca 1646 unsigned int len;
d1d3482b
N
1647
1648 if (!homehost)
1649 return 0;
1650 len = strlen(homehost);
a322f70c
DW
1651
1652 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1653 len < sizeof(ddf->controller.vendor_data) &&
1654 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1655 ddf->controller.vendor_data[len] == 0);
1656}
1657
0e600426 1658#ifndef MDASSEMBLE
baba3f4e 1659static int find_index_in_bvd(const struct ddf_super *ddf,
1660 const struct vd_config *conf, unsigned int n,
1661 unsigned int *n_bvd)
1662{
1663 /*
1664 * Find the index of the n-th valid physical disk in this BVD
1665 */
1666 unsigned int i, j;
1667 for (i = 0, j = 0; i < ddf->mppe &&
1668 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1669 if (conf->phys_refnum[i] != 0xffffffff) {
1670 if (n == j) {
1671 *n_bvd = i;
1672 return 1;
1673 }
1674 j++;
1675 }
1676 }
1677 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1678 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1679 return 0;
1680}
1681
1682static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1683 unsigned int n,
1684 unsigned int *n_bvd, struct vcl **vcl)
a322f70c 1685{
7a7cc504 1686 struct vcl *v;
59e36268 1687
baba3f4e 1688 for (v = ddf->conflist; v; v = v->next) {
1689 unsigned int nsec, ibvd;
1690 struct vd_config *conf;
1691 if (inst != v->vcnum)
1692 continue;
1693 conf = &v->conf;
1694 if (conf->sec_elmnt_count == 1) {
1695 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1696 *vcl = v;
1697 return conf;
1698 } else
1699 goto bad;
1700 }
1701 if (v->other_bvds == NULL) {
1702 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1703 __func__, conf->sec_elmnt_count);
1704 goto bad;
1705 }
1706 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1707 if (conf->sec_elmnt_seq != nsec) {
1708 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
baba3f4e 1709 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1710 == nsec)
1711 break;
1712 }
1713 if (ibvd == conf->sec_elmnt_count)
1714 goto bad;
1715 conf = v->other_bvds[ibvd-1];
1716 }
1717 if (!find_index_in_bvd(ddf, conf,
1718 n - nsec*conf->sec_elmnt_count, n_bvd))
1719 goto bad;
1720 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1721 , __func__, n, *n_bvd, ibvd-1, inst);
1722 *vcl = v;
1723 return conf;
1724 }
1725bad:
1726 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
7a7cc504
NB
1727 return NULL;
1728}
0e600426 1729#endif
7a7cc504 1730
5ec636b7 1731static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
7a7cc504
NB
1732{
1733 /* Find the entry in phys_disk which has the given refnum
1734 * and return it's index
1735 */
f21e18ca
N
1736 unsigned int i;
1737 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
7a7cc504
NB
1738 if (ddf->phys->entries[i].refnum == phys_refnum)
1739 return i;
1740 return -1;
a322f70c
DW
1741}
1742
bedbf68a 1743static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1744{
1745 char buf[20];
1746 struct sha1_ctx ctx;
1747 sha1_init_ctx(&ctx);
1748 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1749 sha1_finish_ctx(&ctx, buf);
1750 memcpy(uuid, buf, 4*4);
1751}
1752
a322f70c
DW
1753static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1754{
1755 /* The uuid returned here is used for:
1756 * uuid to put into bitmap file (Create, Grow)
1757 * uuid for backup header when saving critical section (Grow)
1758 * comparing uuids when re-adding a device into an array
51006d85
N
1759 * In these cases the uuid required is that of the data-array,
1760 * not the device-set.
1761 * uuid to recognise same set when adding a missing device back
1762 * to an array. This is a uuid for the device-set.
613b0d17 1763 *
a322f70c
DW
1764 * For each of these we can make do with a truncated
1765 * or hashed uuid rather than the original, as long as
1766 * everyone agrees.
a322f70c
DW
1767 * In the case of SVD we assume the BVD is of interest,
1768 * though that might be the case if a bitmap were made for
1769 * a mirrored SVD - worry about that later.
1770 * So we need to find the VD configuration record for the
1771 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1772 * The first 16 bytes of the sha1 of these is used.
1773 */
1774 struct ddf_super *ddf = st->sb;
d2ca6449 1775 struct vcl *vcl = ddf->currentconf;
c5afc314 1776 char *guid;
a322f70c 1777
c5afc314
N
1778 if (vcl)
1779 guid = vcl->conf.guid;
1780 else
1781 guid = ddf->anchor.guid;
bedbf68a 1782 uuid_from_ddf_guid(guid, uuid);
a322f70c
DW
1783}
1784
a5d85af7 1785static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1786
a5d85af7 1787static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1788{
1789 struct ddf_super *ddf = st->sb;
a5d85af7 1790 int map_disks = info->array.raid_disks;
90fa1a29 1791 __u32 *cptr;
a322f70c 1792
78e44928 1793 if (ddf->currentconf) {
a5d85af7 1794 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1795 return;
1796 }
95eeceeb 1797 memset(info, 0, sizeof(*info));
78e44928 1798
a322f70c
DW
1799 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1800 info->array.level = LEVEL_CONTAINER;
1801 info->array.layout = 0;
1802 info->array.md_minor = -1;
90fa1a29
JS
1803 cptr = (__u32 *)(ddf->anchor.guid + 16);
1804 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1805
a322f70c
DW
1806 info->array.utime = 0;
1807 info->array.chunk_size = 0;
510242aa 1808 info->container_enough = 1;
a322f70c 1809
a322f70c
DW
1810 info->disk.major = 0;
1811 info->disk.minor = 0;
cba0191b
NB
1812 if (ddf->dlist) {
1813 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1814 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1815
1816 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1817 entries[info->disk.raid_disk].
1818 config_size);
d2ca6449 1819 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1820 } else {
1821 info->disk.number = -1;
661dce36 1822 info->disk.raid_disk = -1;
cba0191b
NB
1823// info->disk.raid_disk = find refnum in the table and use index;
1824 }
f22385f9 1825 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1826
921d9e16 1827 info->recovery_start = MaxSector;
a19c88b8 1828 info->reshape_active = 0;
6e75048b 1829 info->recovery_blocked = 0;
c5afc314 1830 info->name[0] = 0;
a322f70c 1831
f35f2525
N
1832 info->array.major_version = -1;
1833 info->array.minor_version = -2;
159c3a1a 1834 strcpy(info->text_version, "ddf");
a67dd8cc 1835 info->safe_mode_delay = 0;
159c3a1a 1836
c5afc314 1837 uuid_from_super_ddf(st, info->uuid);
a322f70c 1838
a5d85af7
N
1839 if (map) {
1840 int i;
1841 for (i = 0 ; i < map_disks; i++) {
1842 if (i < info->array.raid_disks &&
1843 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1844 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1845 map[i] = 1;
1846 else
1847 map[i] = 0;
1848 }
1849 }
a322f70c
DW
1850}
1851
a5d85af7 1852static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1853{
1854 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1855 struct vcl *vc = ddf->currentconf;
1856 int cd = ddf->currentdev;
ddf94a43 1857 int n_prim;
db42fa9b 1858 int j;
8592f29d 1859 struct dl *dl;
a5d85af7 1860 int map_disks = info->array.raid_disks;
90fa1a29 1861 __u32 *cptr;
ddf94a43 1862 struct vd_config *conf;
a322f70c 1863
95eeceeb 1864 memset(info, 0, sizeof(*info));
8a2848a7 1865 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1866 return;
a322f70c 1867 info->array.md_minor = -1;
90fa1a29
JS
1868 cptr = (__u32 *)(vc->conf.guid + 16);
1869 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
d2ca6449
NB
1870 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1871 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1872 info->custom_array_size = 0;
d2ca6449 1873
ddf94a43 1874 conf = &vc->conf;
1875 n_prim = __be16_to_cpu(conf->prim_elmnt_count);
1876 if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
1877 int ibvd = cd / n_prim - 1;
1878 cd %= n_prim;
1879 conf = vc->other_bvds[ibvd];
1880 }
1881
f21e18ca 1882 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
57a66662 1883 info->data_offset =
1884 __be64_to_cpu(LBA_OFFSET(ddf, &vc->conf)[cd]);
d2ca6449
NB
1885 if (vc->block_sizes)
1886 info->component_size = vc->block_sizes[cd];
1887 else
1888 info->component_size = __be64_to_cpu(vc->conf.blocks);
1889 }
a322f70c 1890
fb204fb2 1891 for (dl = ddf->dlist; dl ; dl = dl->next)
f5ded787 1892 if (dl->disk.refnum == conf->phys_refnum[cd])
fb204fb2
N
1893 break;
1894
a322f70c
DW
1895 info->disk.major = 0;
1896 info->disk.minor = 0;
fb204fb2 1897 info->disk.state = 0;
8592f29d
N
1898 if (dl) {
1899 info->disk.major = dl->major;
1900 info->disk.minor = dl->minor;
7c3fb3ec 1901 info->disk.raid_disk = cd + conf->sec_elmnt_seq
1902 * __be16_to_cpu(conf->prim_elmnt_count);
fb204fb2
N
1903 info->disk.number = dl->pdnum;
1904 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1905 }
a322f70c 1906
103f2410
NB
1907 info->container_member = ddf->currentconf->vcnum;
1908
921d9e16 1909 info->recovery_start = MaxSector;
80d26cb2 1910 info->resync_start = 0;
624c5ad4 1911 info->reshape_active = 0;
6e75048b 1912 info->recovery_blocked = 0;
80d26cb2
NB
1913 if (!(ddf->virt->entries[info->container_member].state
1914 & DDF_state_inconsistent) &&
1915 (ddf->virt->entries[info->container_member].init_state
1916 & DDF_initstate_mask)
1917 == DDF_init_full)
b7528a20 1918 info->resync_start = MaxSector;
80d26cb2 1919
a322f70c
DW
1920 uuid_from_super_ddf(st, info->uuid);
1921
f35f2525
N
1922 info->array.major_version = -1;
1923 info->array.minor_version = -2;
9b63e648 1924 sprintf(info->text_version, "/%s/%d",
4dd2df09 1925 st->container_devnm,
9b63e648 1926 info->container_member);
a67dd8cc 1927 info->safe_mode_delay = 200;
159c3a1a 1928
db42fa9b
N
1929 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1930 info->name[16]=0;
1931 for(j=0; j<16; j++)
1932 if (info->name[j] == ' ')
1933 info->name[j] = 0;
a5d85af7
N
1934
1935 if (map)
1936 for (j = 0; j < map_disks; j++) {
1937 map[j] = 0;
1938 if (j < info->array.raid_disks) {
1939 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1940 if (i >= 0 &&
a5d85af7
N
1941 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1942 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1943 map[i] = 1;
1944 }
1945 }
a322f70c
DW
1946}
1947
1948static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1949 char *update,
1950 char *devname, int verbose,
1951 int uuid_set, char *homehost)
1952{
1953 /* For 'assemble' and 'force' we need to return non-zero if any
1954 * change was made. For others, the return value is ignored.
1955 * Update options are:
1956 * force-one : This device looks a bit old but needs to be included,
1957 * update age info appropriately.
1958 * assemble: clear any 'faulty' flag to allow this device to
1959 * be assembled.
1960 * force-array: Array is degraded but being forced, mark it clean
1961 * if that will be needed to assemble it.
1962 *
1963 * newdev: not used ????
1964 * grow: Array has gained a new device - this is currently for
1965 * linear only
1966 * resync: mark as dirty so a resync will happen.
59e36268 1967 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
1968 * homehost: update the recorded homehost
1969 * name: update the name - preserving the homehost
1970 * _reshape_progress: record new reshape_progress position.
1971 *
1972 * Following are not relevant for this version:
1973 * sparc2.2 : update from old dodgey metadata
1974 * super-minor: change the preferred_minor number
1975 * summaries: update redundant counters.
1976 */
1977 int rv = 0;
1978// struct ddf_super *ddf = st->sb;
7a7cc504 1979// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
1980// struct virtual_entry *ve = find_ve(ddf);
1981
a322f70c
DW
1982 /* we don't need to handle "force-*" or "assemble" as
1983 * there is no need to 'trick' the kernel. We the metadata is
1984 * first updated to activate the array, all the implied modifications
1985 * will just happen.
1986 */
1987
1988 if (strcmp(update, "grow") == 0) {
1989 /* FIXME */
1e2b2765 1990 } else if (strcmp(update, "resync") == 0) {
a322f70c 1991// info->resync_checkpoint = 0;
1e2b2765 1992 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
1993 /* homehost is stored in controller->vendor_data,
1994 * or it is when we are the vendor
1995 */
1996// if (info->vendor_is_local)
1997// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 1998 rv = -1;
f49208ec 1999 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
2000 /* name is stored in virtual_entry->name */
2001// memset(ve->name, ' ', 16);
2002// strncpy(ve->name, info->name, 16);
1e2b2765 2003 rv = -1;
f49208ec 2004 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 2005 /* We don't support reshape yet */
f49208ec
N
2006 } else if (strcmp(update, "assemble") == 0 ) {
2007 /* Do nothing, just succeed */
2008 rv = 0;
1e2b2765
N
2009 } else
2010 rv = -1;
a322f70c
DW
2011
2012// update_all_csum(ddf);
2013
2014 return rv;
2015}
2016
5f8097be
NB
2017static void make_header_guid(char *guid)
2018{
2019 __u32 stamp;
5f8097be
NB
2020 /* Create a DDF Header of Virtual Disk GUID */
2021
2022 /* 24 bytes of fiction required.
2023 * first 8 are a 'vendor-id' - "Linux-MD"
2024 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2025 * Remaining 8 random number plus timestamp
2026 */
2027 memcpy(guid, T10, sizeof(T10));
2028 stamp = __cpu_to_be32(0xdeadbeef);
2029 memcpy(guid+8, &stamp, 4);
2030 stamp = __cpu_to_be32(0);
2031 memcpy(guid+12, &stamp, 4);
2032 stamp = __cpu_to_be32(time(0) - DECADE);
2033 memcpy(guid+16, &stamp, 4);
bfb7ea78 2034 stamp = random32();
5f8097be 2035 memcpy(guid+20, &stamp, 4);
5f8097be 2036}
59e36268 2037
fb9d0acb 2038static unsigned int find_unused_vde(const struct ddf_super *ddf)
2039{
2040 unsigned int i;
2041 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2042 if (all_ff(ddf->virt->entries[i].guid))
2043 return i;
2044 }
2045 return DDF_NOTFOUND;
2046}
2047
2048static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2049 const char *name)
2050{
2051 unsigned int i;
2052 if (name == NULL)
2053 return DDF_NOTFOUND;
2054 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2055 if (all_ff(ddf->virt->entries[i].guid))
2056 continue;
2057 if (!strncmp(name, ddf->virt->entries[i].name,
2058 sizeof(ddf->virt->entries[i].name)))
2059 return i;
2060 }
2061 return DDF_NOTFOUND;
2062}
2063
2064static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2065 const char *guid)
2066{
2067 unsigned int i;
2068 if (guid == NULL || all_ff(guid))
2069 return DDF_NOTFOUND;
2070 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2071 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2072 return i;
2073 return DDF_NOTFOUND;
2074}
2075
78e44928
NB
2076static int init_super_ddf_bvd(struct supertype *st,
2077 mdu_array_info_t *info,
2078 unsigned long long size,
2079 char *name, char *homehost,
83cd1e97 2080 int *uuid, unsigned long long data_offset);
78e44928 2081
a322f70c
DW
2082static int init_super_ddf(struct supertype *st,
2083 mdu_array_info_t *info,
2084 unsigned long long size, char *name, char *homehost,
83cd1e97 2085 int *uuid, unsigned long long data_offset)
a322f70c
DW
2086{
2087 /* This is primarily called by Create when creating a new array.
2088 * We will then get add_to_super called for each component, and then
2089 * write_init_super called to write it out to each device.
2090 * For DDF, Create can create on fresh devices or on a pre-existing
2091 * array.
2092 * To create on a pre-existing array a different method will be called.
2093 * This one is just for fresh drives.
2094 *
2095 * We need to create the entire 'ddf' structure which includes:
2096 * DDF headers - these are easy.
2097 * Controller data - a Sector describing this controller .. not that
2098 * this is a controller exactly.
2099 * Physical Disk Record - one entry per device, so
2100 * leave plenty of space.
2101 * Virtual Disk Records - again, just leave plenty of space.
2102 * This just lists VDs, doesn't give details
2103 * Config records - describes the VDs that use this disk
2104 * DiskData - describes 'this' device.
2105 * BadBlockManagement - empty
2106 * Diag Space - empty
2107 * Vendor Logs - Could we put bitmaps here?
2108 *
2109 */
2110 struct ddf_super *ddf;
2111 char hostname[17];
2112 int hostlen;
a322f70c
DW
2113 int max_phys_disks, max_virt_disks;
2114 unsigned long long sector;
2115 int clen;
2116 int i;
2117 int pdsize, vdsize;
2118 struct phys_disk *pd;
2119 struct virtual_disk *vd;
2120
83cd1e97 2121 if (data_offset != INVALID_SECTORS) {
ed503f89 2122 pr_err("data-offset not supported by DDF\n");
83cd1e97
N
2123 return 0;
2124 }
2125
78e44928 2126 if (st->sb)
83cd1e97
N
2127 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2128 data_offset);
ba7eb04f 2129
3d2c4fc7 2130 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 2131 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
2132 return 0;
2133 }
6264b437 2134 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
2135 ddf->dlist = NULL; /* no physical disks yet */
2136 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
2137 st->sb = ddf;
2138
2139 if (info == NULL) {
2140 /* zeroing superblock */
2141 return 0;
2142 }
a322f70c
DW
2143
2144 /* At least 32MB *must* be reserved for the ddf. So let's just
2145 * start 32MB from the end, and put the primary header there.
2146 * Don't do secondary for now.
2147 * We don't know exactly where that will be yet as it could be
2148 * different on each device. To just set up the lengths.
2149 *
2150 */
2151
2152 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 2153 make_header_guid(ddf->anchor.guid);
a322f70c 2154
59e36268 2155 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
a322f70c
DW
2156 ddf->anchor.seq = __cpu_to_be32(1);
2157 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2158 ddf->anchor.openflag = 0xFF;
2159 ddf->anchor.foreignflag = 0;
2160 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2161 ddf->anchor.pad0 = 0xff;
2162 memset(ddf->anchor.pad1, 0xff, 12);
2163 memset(ddf->anchor.header_ext, 0xff, 32);
2164 ddf->anchor.primary_lba = ~(__u64)0;
2165 ddf->anchor.secondary_lba = ~(__u64)0;
2166 ddf->anchor.type = DDF_HEADER_ANCHOR;
2167 memset(ddf->anchor.pad2, 0xff, 3);
2168 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2169 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2170 of 32M reserved.. */
2171 max_phys_disks = 1023; /* Should be enough */
2172 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2173 max_virt_disks = 255;
2174 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2175 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2176 ddf->max_part = 64;
8c3b8c2c 2177 ddf->mppe = 256;
59e36268
NB
2178 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2179 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2180 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 2181 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
2182 /* controller sections is one sector long immediately
2183 * after the ddf header */
2184 sector = 1;
2185 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2186 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2187 sector += 1;
2188
2189 /* phys is 8 sectors after that */
2190 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2191 sizeof(struct phys_disk_entry)*max_phys_disks,
2192 512);
2193 switch(pdsize/512) {
2194 case 2: case 8: case 32: case 128: case 512: break;
2195 default: abort();
2196 }
2197 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2198 ddf->anchor.phys_section_length =
2199 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2200 sector += pdsize/512;
2201
2202 /* virt is another 32 sectors */
2203 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2204 sizeof(struct virtual_entry) * max_virt_disks,
2205 512);
2206 switch(vdsize/512) {
2207 case 2: case 8: case 32: case 128: case 512: break;
2208 default: abort();
2209 }
2210 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2211 ddf->anchor.virt_section_length =
2212 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2213 sector += vdsize/512;
2214
59e36268 2215 clen = ddf->conf_rec_len * (ddf->max_part+1);
a322f70c
DW
2216 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2217 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2218 sector += clen;
2219
2220 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2221 ddf->anchor.data_section_length = __cpu_to_be32(1);
2222 sector += 1;
2223
2224 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2225 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2226 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2227 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2228 ddf->anchor.vendor_length = __cpu_to_be32(0);
2229 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2230
2231 memset(ddf->anchor.pad4, 0xff, 256);
2232
2233 memcpy(&ddf->primary, &ddf->anchor, 512);
2234 memcpy(&ddf->secondary, &ddf->anchor, 512);
2235
2236 ddf->primary.openflag = 1; /* I guess.. */
2237 ddf->primary.type = DDF_HEADER_PRIMARY;
2238
2239 ddf->secondary.openflag = 1; /* I guess.. */
2240 ddf->secondary.type = DDF_HEADER_SECONDARY;
2241
2242 ddf->active = &ddf->primary;
2243
2244 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2245
2246 /* 24 more bytes of fiction required.
2247 * first 8 are a 'vendor-id' - "Linux-MD"
2248 * Remaining 16 are serial number.... maybe a hostname would do?
2249 */
2250 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
2251 gethostname(hostname, sizeof(hostname));
2252 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
2253 hostlen = strlen(hostname);
2254 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2255 for (i = strlen(T10) ; i+hostlen < 24; i++)
2256 ddf->controller.guid[i] = ' ';
2257
2258 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2259 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2260 ddf->controller.type.sub_vendor_id = 0;
2261 ddf->controller.type.sub_device_id = 0;
2262 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2263 memset(ddf->controller.pad, 0xff, 8);
2264 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
2265 if (homehost && strlen(homehost) < 440)
2266 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 2267
3d2c4fc7 2268 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 2269 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
2270 return 0;
2271 }
6416d527 2272 ddf->phys = pd;
a322f70c
DW
2273 ddf->pdsize = pdsize;
2274
2275 memset(pd, 0xff, pdsize);
2276 memset(pd, 0, sizeof(*pd));
076515ba 2277 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
2278 pd->used_pdes = __cpu_to_be16(0);
2279 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2280 memset(pd->pad, 0xff, 52);
4a3ca8ac 2281 for (i = 0; i < max_phys_disks; i++)
2282 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
a322f70c 2283
3d2c4fc7 2284 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 2285 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
2286 return 0;
2287 }
6416d527 2288 ddf->virt = vd;
a322f70c
DW
2289 ddf->vdsize = vdsize;
2290 memset(vd, 0, vdsize);
2291 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2292 vd->populated_vdes = __cpu_to_be16(0);
2293 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2294 memset(vd->pad, 0xff, 52);
2295
5f8097be
NB
2296 for (i=0; i<max_virt_disks; i++)
2297 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2298
a322f70c 2299 st->sb = ddf;
7d5a7ff3 2300 ddf_set_updates_pending(ddf);
a322f70c
DW
2301 return 1;
2302}
2303
5f8097be
NB
2304static int chunk_to_shift(int chunksize)
2305{
2306 return ffs(chunksize/512)-1;
2307}
2308
0e600426 2309#ifndef MDASSEMBLE
59e36268
NB
2310struct extent {
2311 unsigned long long start, size;
2312};
78e44928 2313static int cmp_extent(const void *av, const void *bv)
59e36268
NB
2314{
2315 const struct extent *a = av;
2316 const struct extent *b = bv;
2317 if (a->start < b->start)
2318 return -1;
2319 if (a->start > b->start)
2320 return 1;
2321 return 0;
2322}
2323
78e44928 2324static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2325{
2326 /* find a list of used extents on the give physical device
2327 * (dnum) of the given ddf.
2328 * Return a malloced array of 'struct extent'
2329
613b0d17 2330 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2331
2332 */
2333 struct extent *rv;
2334 int n = 0;
fcc22180 2335 unsigned int i;
59e36268 2336
503975b9 2337 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2338
2339 for (i = 0; i < ddf->max_part; i++) {
fcc22180 2340 const struct vd_config *bvd;
2341 unsigned int ibvd;
59e36268 2342 struct vcl *v = dl->vlist[i];
fcc22180 2343 if (v == NULL ||
2344 get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2345 &bvd, &ibvd) == DDF_NOTFOUND)
59e36268 2346 continue;
fcc22180 2347 rv[n].start = __be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2348 rv[n].size = __be64_to_cpu(bvd->blocks);
2349 n++;
59e36268
NB
2350 }
2351 qsort(rv, n, sizeof(*rv), cmp_extent);
2352
2353 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2354 rv[n].size = 0;
2355 return rv;
2356}
0e600426 2357#endif
59e36268 2358
5f8097be
NB
2359static int init_super_ddf_bvd(struct supertype *st,
2360 mdu_array_info_t *info,
2361 unsigned long long size,
2362 char *name, char *homehost,
83cd1e97 2363 int *uuid, unsigned long long data_offset)
5f8097be
NB
2364{
2365 /* We are creating a BVD inside a pre-existing container.
2366 * so st->sb is already set.
2367 * We need to create a new vd_config and a new virtual_entry
2368 */
2369 struct ddf_super *ddf = st->sb;
5aaf6c7b 2370 unsigned int venum, i;
5f8097be
NB
2371 struct virtual_entry *ve;
2372 struct vcl *vcl;
2373 struct vd_config *vc;
5f8097be 2374
fb9d0acb 2375 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2376 pr_err("This ddf already has an array called %s\n", name);
5f8097be
NB
2377 return 0;
2378 }
fb9d0acb 2379 venum = find_unused_vde(ddf);
2380 if (venum == DDF_NOTFOUND) {
2381 pr_err("Cannot find spare slot for virtual disk\n");
5f8097be
NB
2382 return 0;
2383 }
2384 ve = &ddf->virt->entries[venum];
2385
2386 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2387 * timestamp, random number
2388 */
2389 make_header_guid(ve->guid);
2390 ve->unit = __cpu_to_be16(info->md_minor);
2391 ve->pad0 = 0xFFFF;
2392 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2393 ve->type = 0;
7a7cc504
NB
2394 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2395 if (info->state & 1) /* clean */
2396 ve->init_state = DDF_init_full;
2397 else
2398 ve->init_state = DDF_init_not;
2399
5f8097be
NB
2400 memset(ve->pad1, 0xff, 14);
2401 memset(ve->name, ' ', 16);
2402 if (name)
2403 strncpy(ve->name, name, 16);
2404 ddf->virt->populated_vdes =
2405 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2406
2407 /* Now create a new vd_config */
3d2c4fc7
DW
2408 if (posix_memalign((void**)&vcl, 512,
2409 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2410 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2411 return 0;
2412 }
59e36268
NB
2413 vcl->vcnum = venum;
2414 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
5f8097be
NB
2415 vc = &vcl->conf;
2416
2417 vc->magic = DDF_VD_CONF_MAGIC;
2418 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2419 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2420 vc->seqnum = __cpu_to_be32(1);
2421 memset(vc->pad0, 0xff, 24);
5f8097be 2422 vc->chunk_shift = chunk_to_shift(info->chunk_size);
a3163bf0 2423 if (layout_md2ddf(info, vc) == -1 ||
2424 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2425 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2426 __func__, info->level, info->layout, info->raid_disks);
2427 free(vcl);
2428 return 0;
2429 }
5f8097be 2430 vc->sec_elmnt_seq = 0;
3c48f7be 2431 if (alloc_other_bvds(ddf, vcl) != 0) {
2432 pr_err("%s could not allocate other bvds\n",
2433 __func__);
2434 free(vcl);
2435 return 0;
2436 }
5f8097be
NB
2437 vc->blocks = __cpu_to_be64(info->size * 2);
2438 vc->array_blocks = __cpu_to_be64(
2439 calc_array_size(info->level, info->raid_disks, info->layout,
2440 info->chunk_size, info->size*2));
2441 memset(vc->pad1, 0xff, 8);
2442 vc->spare_refs[0] = 0xffffffff;
2443 vc->spare_refs[1] = 0xffffffff;
2444 vc->spare_refs[2] = 0xffffffff;
2445 vc->spare_refs[3] = 0xffffffff;
2446 vc->spare_refs[4] = 0xffffffff;
2447 vc->spare_refs[5] = 0xffffffff;
2448 vc->spare_refs[6] = 0xffffffff;
2449 vc->spare_refs[7] = 0xffffffff;
2450 memset(vc->cache_pol, 0, 8);
2451 vc->bg_rate = 0x80;
2452 memset(vc->pad2, 0xff, 3);
2453 memset(vc->pad3, 0xff, 52);
2454 memset(vc->pad4, 0xff, 192);
2455 memset(vc->v0, 0xff, 32);
2456 memset(vc->v1, 0xff, 32);
2457 memset(vc->v2, 0xff, 16);
2458 memset(vc->v3, 0xff, 16);
2459 memset(vc->vendor, 0xff, 32);
598f0d58 2460
8c3b8c2c 2461 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2462 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be 2463
5aaf6c7b 2464 for (i = 1; i < vc->sec_elmnt_count; i++) {
2465 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2466 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2467 }
2468
5f8097be
NB
2469 vcl->next = ddf->conflist;
2470 ddf->conflist = vcl;
d2ca6449 2471 ddf->currentconf = vcl;
7d5a7ff3 2472 ddf_set_updates_pending(ddf);
5f8097be
NB
2473 return 1;
2474}
2475
63eb2454 2476static int get_svd_state(const struct ddf_super *, const struct vcl *);
2477
0e600426 2478#ifndef MDASSEMBLE
5f8097be
NB
2479static void add_to_super_ddf_bvd(struct supertype *st,
2480 mdu_disk_info_t *dk, int fd, char *devname)
2481{
2482 /* fd and devname identify a device with-in the ddf container (st).
2483 * dk identifies a location in the new BVD.
2484 * We need to find suitable free space in that device and update
2485 * the phys_refnum and lba_offset for the newly created vd_config.
2486 * We might also want to update the type in the phys_disk
5575e7d9 2487 * section.
8592f29d
N
2488 *
2489 * Alternately: fd == -1 and we have already chosen which device to
2490 * use and recorded in dlist->raid_disk;
5f8097be
NB
2491 */
2492 struct dl *dl;
2493 struct ddf_super *ddf = st->sb;
2494 struct vd_config *vc;
f21e18ca 2495 unsigned int i;
59e36268
NB
2496 unsigned long long blocks, pos, esize;
2497 struct extent *ex;
475ccbdb 2498 unsigned int raid_disk = dk->raid_disk;
5f8097be 2499
8592f29d
N
2500 if (fd == -1) {
2501 for (dl = ddf->dlist; dl ; dl = dl->next)
2502 if (dl->raiddisk == dk->raid_disk)
2503 break;
2504 } else {
2505 for (dl = ddf->dlist; dl ; dl = dl->next)
2506 if (dl->major == dk->major &&
2507 dl->minor == dk->minor)
2508 break;
2509 }
5f8097be
NB
2510 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2511 return;
2512
d2ca6449 2513 vc = &ddf->currentconf->conf;
475ccbdb 2514 if (vc->sec_elmnt_count > 1) {
2515 unsigned int n = __be16_to_cpu(vc->prim_elmnt_count);
2516 if (raid_disk >= n)
2517 vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2518 raid_disk %= n;
2519 }
59e36268
NB
2520
2521 ex = get_extents(ddf, dl);
2522 if (!ex)
2523 return;
2524
2525 i = 0; pos = 0;
2526 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2527 if (ddf->currentconf->block_sizes)
2528 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2529
2530 do {
2531 esize = ex[i].start - pos;
2532 if (esize >= blocks)
2533 break;
2534 pos = ex[i].start + ex[i].size;
2535 i++;
2536 } while (ex[i-1].size);
2537
2538 free(ex);
2539 if (esize < blocks)
2540 return;
2541
d2ca6449 2542 ddf->currentdev = dk->raid_disk;
475ccbdb 2543 vc->phys_refnum[raid_disk] = dl->disk.refnum;
2544 LBA_OFFSET(ddf, vc)[raid_disk] = __cpu_to_be64(pos);
5f8097be 2545
f21e18ca 2546 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2547 if (dl->vlist[i] == NULL)
2548 break;
2549 if (i == ddf->max_part)
2550 return;
d2ca6449 2551 dl->vlist[i] = ddf->currentconf;
5f8097be 2552
8592f29d
N
2553 if (fd >= 0)
2554 dl->fd = fd;
2555 if (devname)
2556 dl->devname = devname;
7a7cc504 2557
63eb2454 2558 /* Check if we can mark array as optimal yet */
d2ca6449 2559 i = ddf->currentconf->vcnum;
63eb2454 2560 ddf->virt->entries[i].state =
2561 (ddf->virt->entries[i].state & ~DDF_state_mask)
2562 | get_svd_state(ddf, ddf->currentconf);
5575e7d9
NB
2563 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2564 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
4f9bbe63 2565 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
2566 __func__, dl->pdnum, __be32_to_cpu(dl->disk.refnum),
2567 ddf->currentconf->vcnum, guid_str(vc->guid),
2568 dk->raid_disk);
7d5a7ff3 2569 ddf_set_updates_pending(ddf);
5f8097be
NB
2570}
2571
4a3ca8ac 2572static unsigned int find_unused_pde(const struct ddf_super *ddf)
2573{
2574 unsigned int i;
2575 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++) {
2576 if (all_ff(ddf->phys->entries[i].guid))
2577 return i;
2578 }
2579 return DDF_NOTFOUND;
2580}
2581
a322f70c
DW
2582/* add a device to a container, either while creating it or while
2583 * expanding a pre-existing container
2584 */
f20c3968 2585static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2586 mdu_disk_info_t *dk, int fd, char *devname,
2587 unsigned long long data_offset)
a322f70c
DW
2588{
2589 struct ddf_super *ddf = st->sb;
2590 struct dl *dd;
2591 time_t now;
2592 struct tm *tm;
2593 unsigned long long size;
2594 struct phys_disk_entry *pde;
f21e18ca 2595 unsigned int n, i;
a322f70c 2596 struct stat stb;
90fa1a29 2597 __u32 *tptr;
a322f70c 2598
78e44928
NB
2599 if (ddf->currentconf) {
2600 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2601 return 0;
78e44928
NB
2602 }
2603
a322f70c
DW
2604 /* This is device numbered dk->number. We need to create
2605 * a phys_disk entry and a more detailed disk_data entry.
2606 */
2607 fstat(fd, &stb);
4a3ca8ac 2608 n = find_unused_pde(ddf);
2609 if (n == DDF_NOTFOUND) {
2610 pr_err("%s: No free slot in array, cannot add disk\n",
2611 __func__);
2612 return 1;
2613 }
2614 pde = &ddf->phys->entries[n];
4ee8cca9 2615 get_dev_size(fd, NULL, &size);
2616 if (size <= 32*1024*1024) {
2617 pr_err("%s: device size must be at least 32MB\n",
2618 __func__);
2619 return 1;
2620 }
2621 size >>= 9;
4a3ca8ac 2622
3d2c4fc7
DW
2623 if (posix_memalign((void**)&dd, 512,
2624 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2625 pr_err("%s could allocate buffer for new disk, aborting\n",
2626 __func__);
f20c3968 2627 return 1;
3d2c4fc7 2628 }
a322f70c
DW
2629 dd->major = major(stb.st_rdev);
2630 dd->minor = minor(stb.st_rdev);
2631 dd->devname = devname;
a322f70c 2632 dd->fd = fd;
b2280677 2633 dd->spare = NULL;
a322f70c
DW
2634
2635 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2636 now = time(0);
2637 tm = localtime(&now);
2638 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2639 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2640 tptr = (__u32 *)(dd->disk.guid + 16);
2641 *tptr++ = random32();
2642 *tptr = random32();
a322f70c 2643
59e36268
NB
2644 do {
2645 /* Cannot be bothered finding a CRC of some irrelevant details*/
bfb7ea78 2646 dd->disk.refnum = random32();
f21e18ca
N
2647 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2648 i > 0; i--)
2649 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
59e36268 2650 break;
f21e18ca 2651 } while (i > 0);
59e36268 2652
a322f70c
DW
2653 dd->disk.forced_ref = 1;
2654 dd->disk.forced_guid = 1;
2655 memset(dd->disk.vendor, ' ', 32);
2656 memcpy(dd->disk.vendor, "Linux", 5);
2657 memset(dd->disk.pad, 0xff, 442);
b2280677 2658 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2659 dd->vlist[i] = NULL;
2660
5575e7d9
NB
2661 dd->pdnum = n;
2662
2cc2983d
N
2663 if (st->update_tail) {
2664 int len = (sizeof(struct phys_disk) +
2665 sizeof(struct phys_disk_entry));
2666 struct phys_disk *pd;
2667
503975b9 2668 pd = xmalloc(len);
2cc2983d
N
2669 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2670 pd->used_pdes = __cpu_to_be16(n);
2671 pde = &pd->entries[0];
2672 dd->mdupdate = pd;
4a3ca8ac 2673 } else
2674 ddf->phys->used_pdes = __cpu_to_be16(
2675 1 + __be16_to_cpu(ddf->phys->used_pdes));
a322f70c
DW
2676
2677 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2678 pde->refnum = dd->disk.refnum;
5575e7d9 2679 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c 2680 pde->state = __cpu_to_be16(DDF_Online);
4ee8cca9 2681 dd->size = size;
2682 /*
2683 * If there is already a device in dlist, try to reserve the same
2684 * amount of workspace. Otherwise, use 32MB.
2685 * We checked disk size above already.
2686 */
2687#define __calc_lba(new, old, lba, mb) do { \
2688 unsigned long long dif; \
2689 if ((old) != NULL) \
2690 dif = (old)->size - __be64_to_cpu((old)->lba); \
2691 else \
2692 dif = (new)->size; \
2693 if ((new)->size > dif) \
2694 (new)->lba = __cpu_to_be64((new)->size - dif); \
2695 else \
2696 (new)->lba = __cpu_to_be64((new)->size - (mb*1024*2)); \
2697 } while (0)
2698 __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2699 __calc_lba(dd, ddf->dlist, primary_lba, 16);
2700 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
2701 pde->config_size = dd->workspace_lba;
2702
a322f70c
DW
2703 sprintf(pde->path, "%17.17s","Information: nil") ;
2704 memset(pde->pad, 0xff, 6);
2705
2cc2983d
N
2706 if (st->update_tail) {
2707 dd->next = ddf->add_list;
2708 ddf->add_list = dd;
2709 } else {
2710 dd->next = ddf->dlist;
2711 ddf->dlist = dd;
7d5a7ff3 2712 ddf_set_updates_pending(ddf);
2cc2983d 2713 }
f20c3968
DW
2714
2715 return 0;
a322f70c
DW
2716}
2717
4dd968cc
N
2718static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2719{
2720 struct ddf_super *ddf = st->sb;
2721 struct dl *dl;
2722
2723 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2724 * disappeared from the container.
2725 * We need to arrange that it disappears from the metadata and
2726 * internal data structures too.
2727 * Most of the work is done by ddf_process_update which edits
2728 * the metadata and closes the file handle and attaches the memory
2729 * where free_updates will free it.
2730 */
2731 for (dl = ddf->dlist; dl ; dl = dl->next)
2732 if (dl->major == dk->major &&
2733 dl->minor == dk->minor)
2734 break;
2735 if (!dl)
2736 return -1;
2737
2738 if (st->update_tail) {
2739 int len = (sizeof(struct phys_disk) +
2740 sizeof(struct phys_disk_entry));
2741 struct phys_disk *pd;
2742
503975b9 2743 pd = xmalloc(len);
4dd968cc
N
2744 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2745 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2746 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2747 append_metadata_update(st, pd, len);
2748 }
2749 return 0;
2750}
2751
a322f70c
DW
2752/*
2753 * This is the write_init_super method for a ddf container. It is
2754 * called when creating a container or adding another device to a
2755 * container.
2756 */
42d5dfd9 2757#define NULL_CONF_SZ 4096
18a2f463 2758
8e9387ac 2759static char *null_aligned;
2760static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
a322f70c 2761{
7f798aca 2762 unsigned long long sector;
2763 struct ddf_header *header;
2764 int fd, i, n_config, conf_size;
a4057a88 2765 int ret = 0;
7f798aca 2766
8e9387ac 2767 if (null_aligned == NULL) {
2768 if (posix_memalign((void **)&null_aligned, 4096, NULL_CONF_SZ)
2769 != 0)
2770 return 0;
2771 memset(null_aligned, 0xff, NULL_CONF_SZ);
2772 }
2773
7f798aca 2774 fd = d->fd;
2775
2776 switch (type) {
2777 case DDF_HEADER_PRIMARY:
2778 header = &ddf->primary;
2779 sector = __be64_to_cpu(header->primary_lba);
2780 break;
2781 case DDF_HEADER_SECONDARY:
2782 header = &ddf->secondary;
2783 sector = __be64_to_cpu(header->secondary_lba);
2784 break;
2785 default:
2786 return 0;
2787 }
2788
2789 header->type = type;
a4057a88 2790 header->openflag = 1;
7f798aca 2791 header->crc = calc_crc(header, 512);
2792
2793 lseek64(fd, sector<<9, 0);
2794 if (write(fd, header, 512) < 0)
a4057a88 2795 goto out;
7f798aca 2796
2797 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2798 if (write(fd, &ddf->controller, 512) < 0)
a4057a88 2799 goto out;
a322f70c 2800
7f798aca 2801 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2802 if (write(fd, ddf->phys, ddf->pdsize) < 0)
a4057a88 2803 goto out;
7f798aca 2804 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2805 if (write(fd, ddf->virt, ddf->vdsize) < 0)
a4057a88 2806 goto out;
7f798aca 2807
2808 /* Now write lots of config records. */
2809 n_config = ddf->max_part;
2810 conf_size = ddf->conf_rec_len * 512;
2811 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2812 struct vcl *c;
2813 struct vd_config *vdc = NULL;
2814 if (i == n_config) {
7f798aca 2815 c = (struct vcl *)d->spare;
e3c2a365 2816 if (c)
2817 vdc = &c->conf;
2818 } else {
2819 unsigned int dummy;
2820 c = d->vlist[i];
2821 if (c)
2822 get_pd_index_from_refnum(
2823 c, d->disk.refnum,
2824 ddf->mppe,
2825 (const struct vd_config **)&vdc,
2826 &dummy);
2827 }
7f798aca 2828 if (c) {
be9b9ef4 2829 dprintf("writing conf record %i on disk %08x for %s/%u\n",
ad60eea1 2830 i, __be32_to_cpu(d->disk.refnum),
2831 guid_str(vdc->guid),
be9b9ef4 2832 vdc->sec_elmnt_seq);
dacf3dc5 2833 vdc->seqnum = header->seq;
e3c2a365 2834 vdc->crc = calc_crc(vdc, conf_size);
2835 if (write(fd, vdc, conf_size) < 0)
7f798aca 2836 break;
2837 } else {
2838 unsigned int togo = conf_size;
2839 while (togo > NULL_CONF_SZ) {
2840 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2841 break;
2842 togo -= NULL_CONF_SZ;
2843 }
2844 if (write(fd, null_aligned, togo) < 0)
2845 break;
2846 }
2847 }
2848 if (i <= n_config)
a4057a88 2849 goto out;
7f798aca 2850
2851 d->disk.crc = calc_crc(&d->disk, 512);
2852 if (write(fd, &d->disk, 512) < 0)
a4057a88 2853 goto out;
7f798aca 2854
a4057a88 2855 ret = 1;
2856out:
2857 header->openflag = 0;
2858 header->crc = calc_crc(header, 512);
2859
2860 lseek64(fd, sector<<9, 0);
2861 if (write(fd, header, 512) < 0)
2862 ret = 0;
2863
2864 return ret;
7f798aca 2865}
2866
9bf38704 2867static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
2868{
2869 unsigned long long size;
2870 int fd = d->fd;
2871 if (fd < 0)
2872 return 0;
2873
2874 /* We need to fill in the primary, (secondary) and workspace
2875 * lba's in the headers, set their checksums,
2876 * Also checksum phys, virt....
2877 *
2878 * Then write everything out, finally the anchor is written.
2879 */
2880 get_dev_size(fd, NULL, &size);
2881 size /= 512;
2882 if (d->workspace_lba != 0)
2883 ddf->anchor.workspace_lba = d->workspace_lba;
2884 else
2885 ddf->anchor.workspace_lba =
2886 __cpu_to_be64(size - 32*1024*2);
2887 if (d->primary_lba != 0)
2888 ddf->anchor.primary_lba = d->primary_lba;
2889 else
2890 ddf->anchor.primary_lba =
2891 __cpu_to_be64(size - 16*1024*2);
2892 if (d->secondary_lba != 0)
2893 ddf->anchor.secondary_lba = d->secondary_lba;
2894 else
2895 ddf->anchor.secondary_lba =
2896 __cpu_to_be64(size - 32*1024*2);
2897 ddf->anchor.seq = ddf->active->seq;
2898 memcpy(&ddf->primary, &ddf->anchor, 512);
2899 memcpy(&ddf->secondary, &ddf->anchor, 512);
2900
2901 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2902 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2903 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2904
2905 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
2906 return 0;
2907
2908 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
2909 return 0;
2910
2911 lseek64(fd, (size-1)*512, SEEK_SET);
2912 if (write(fd, &ddf->anchor, 512) < 0)
2913 return 0;
2914
2915 return 1;
2916}
2917
7f798aca 2918static int __write_init_super_ddf(struct supertype *st)
2919{
a322f70c 2920 struct ddf_super *ddf = st->sb;
a322f70c 2921 struct dl *d;
175593bf
DW
2922 int attempts = 0;
2923 int successes = 0;
42d5dfd9 2924
7d5a7ff3 2925 pr_state(ddf, __func__);
a322f70c 2926
175593bf
DW
2927 /* try to write updated metadata,
2928 * if we catch a failure move on to the next disk
2929 */
a322f70c 2930 for (d = ddf->dlist; d; d=d->next) {
175593bf 2931 attempts++;
9bf38704 2932 successes += _write_super_to_disk(ddf, d);
175593bf
DW
2933 }
2934
175593bf 2935 return attempts != successes;
a322f70c 2936}
7a7cc504
NB
2937
2938static int write_init_super_ddf(struct supertype *st)
2939{
9b1fb677
DW
2940 struct ddf_super *ddf = st->sb;
2941 struct vcl *currentconf = ddf->currentconf;
2942
2943 /* we are done with currentconf reset it to point st at the container */
2944 ddf->currentconf = NULL;
edd8d13c
NB
2945
2946 if (st->update_tail) {
2947 /* queue the virtual_disk and vd_config as metadata updates */
2948 struct virtual_disk *vd;
2949 struct vd_config *vc;
c5943560 2950 int len, tlen;
2951 unsigned int i;
edd8d13c 2952
9b1fb677 2953 if (!currentconf) {
2cc2983d
N
2954 int len = (sizeof(struct phys_disk) +
2955 sizeof(struct phys_disk_entry));
2956
2957 /* adding a disk to the container. */
2958 if (!ddf->add_list)
2959 return 0;
2960
2961 append_metadata_update(st, ddf->add_list->mdupdate, len);
2962 ddf->add_list->mdupdate = NULL;
2963 return 0;
2964 }
2965
2966 /* Newly created VD */
2967
edd8d13c
NB
2968 /* First the virtual disk. We have a slightly fake header */
2969 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 2970 vd = xmalloc(len);
edd8d13c 2971 *vd = *ddf->virt;
9b1fb677
DW
2972 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2973 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
2974 append_metadata_update(st, vd, len);
2975
2976 /* Then the vd_config */
2977 len = ddf->conf_rec_len * 512;
c5943560 2978 tlen = len * currentconf->conf.sec_elmnt_count;
2979 vc = xmalloc(tlen);
9b1fb677 2980 memcpy(vc, &currentconf->conf, len);
c5943560 2981 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
2982 memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
2983 len);
2984 append_metadata_update(st, vc, tlen);
edd8d13c
NB
2985
2986 /* FIXME I need to close the fds! */
2987 return 0;
613b0d17 2988 } else {
d682f344 2989 struct dl *d;
19041058 2990 if (!currentconf)
2991 for (d = ddf->dlist; d; d=d->next)
2992 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 2993 return __write_init_super_ddf(st);
d682f344 2994 }
7a7cc504
NB
2995}
2996
a322f70c
DW
2997#endif
2998
387fcd59
N
2999static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
3000 unsigned long long data_offset)
a322f70c
DW
3001{
3002 /* We must reserve the last 32Meg */
3003 if (devsize <= 32*1024*2)
3004 return 0;
3005 return devsize - 32*1024*2;
3006}
3007
3008#ifndef MDASSEMBLE
8592f29d
N
3009
3010static int reserve_space(struct supertype *st, int raiddisks,
3011 unsigned long long size, int chunk,
3012 unsigned long long *freesize)
3013{
3014 /* Find 'raiddisks' spare extents at least 'size' big (but
3015 * only caring about multiples of 'chunk') and remember
3016 * them.
3017 * If the cannot be found, fail.
3018 */
3019 struct dl *dl;
3020 struct ddf_super *ddf = st->sb;
3021 int cnt = 0;
3022
3023 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 3024 dl->raiddisk = -1;
8592f29d
N
3025 dl->esize = 0;
3026 }
3027 /* Now find largest extent on each device */
3028 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3029 struct extent *e = get_extents(ddf, dl);
3030 unsigned long long pos = 0;
3031 int i = 0;
3032 int found = 0;
3033 unsigned long long minsize = size;
3034
3035 if (size == 0)
3036 minsize = chunk;
3037
3038 if (!e)
3039 continue;
3040 do {
3041 unsigned long long esize;
3042 esize = e[i].start - pos;
3043 if (esize >= minsize) {
3044 found = 1;
3045 minsize = esize;
3046 }
3047 pos = e[i].start + e[i].size;
3048 i++;
3049 } while (e[i-1].size);
3050 if (found) {
3051 cnt++;
3052 dl->esize = minsize;
3053 }
3054 free(e);
3055 }
3056 if (cnt < raiddisks) {
e7b84f9d 3057 pr_err("not enough devices with space to create array.\n");
8592f29d
N
3058 return 0; /* No enough free spaces large enough */
3059 }
3060 if (size == 0) {
3061 /* choose the largest size of which there are at least 'raiddisk' */
3062 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3063 struct dl *dl2;
3064 if (dl->esize <= size)
3065 continue;
3066 /* This is bigger than 'size', see if there are enough */
3067 cnt = 0;
7b80ad6a 3068 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
3069 if (dl2->esize >= dl->esize)
3070 cnt++;
3071 if (cnt >= raiddisks)
3072 size = dl->esize;
3073 }
3074 if (chunk) {
3075 size = size / chunk;
3076 size *= chunk;
3077 }
3078 *freesize = size;
3079 if (size < 32) {
e7b84f9d 3080 pr_err("not enough spare devices to create array.\n");
8592f29d
N
3081 return 0;
3082 }
3083 }
3084 /* We have a 'size' of which there are enough spaces.
3085 * We simply do a first-fit */
3086 cnt = 0;
3087 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3088 if (dl->esize < size)
3089 continue;
613b0d17 3090
8592f29d
N
3091 dl->raiddisk = cnt;
3092 cnt++;
3093 }
3094 return 1;
3095}
3096
2c514b71
NB
3097static int
3098validate_geometry_ddf_container(struct supertype *st,
3099 int level, int layout, int raiddisks,
3100 int chunk, unsigned long long size,
af4348dd 3101 unsigned long long data_offset,
2c514b71
NB
3102 char *dev, unsigned long long *freesize,
3103 int verbose);
78e44928
NB
3104
3105static int validate_geometry_ddf_bvd(struct supertype *st,
3106 int level, int layout, int raiddisks,
c21e737b 3107 int *chunk, unsigned long long size,
af4348dd 3108 unsigned long long data_offset,
2c514b71
NB
3109 char *dev, unsigned long long *freesize,
3110 int verbose);
78e44928
NB
3111
3112static int validate_geometry_ddf(struct supertype *st,
2c514b71 3113 int level, int layout, int raiddisks,
c21e737b 3114 int *chunk, unsigned long long size,
af4348dd 3115 unsigned long long data_offset,
2c514b71
NB
3116 char *dev, unsigned long long *freesize,
3117 int verbose)
a322f70c
DW
3118{
3119 int fd;
3120 struct mdinfo *sra;
3121 int cfd;
3122
3123 /* ddf potentially supports lots of things, but it depends on
3124 * what devices are offered (and maybe kernel version?)
3125 * If given unused devices, we will make a container.
3126 * If given devices in a container, we will make a BVD.
3127 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3128 */
3129
bb7295f1
N
3130 if (chunk && *chunk == UnSet)
3131 *chunk = DEFAULT_CHUNK;
3132
542ef4ec 3133 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 3134 if (level == LEVEL_CONTAINER) {
78e44928
NB
3135 /* Must be a fresh device to add to a container */
3136 return validate_geometry_ddf_container(st, level, layout,
c21e737b 3137 raiddisks, chunk?*chunk:0,
af4348dd
N
3138 size, data_offset, dev,
3139 freesize,
2c514b71 3140 verbose);
5f8097be
NB
3141 }
3142
78e44928 3143 if (!dev) {
a3163bf0 3144 mdu_array_info_t array = {
3145 .level = level, .layout = layout,
3146 .raid_disks = raiddisks
3147 };
3148 struct vd_config conf;
3149 if (layout_md2ddf(&array, &conf) == -1) {
b42f577a 3150 if (verbose)
94b08b7c 3151 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3152 level, layout, raiddisks);
78e44928 3153 return 0;
b42f577a 3154 }
78e44928 3155 /* Should check layout? etc */
8592f29d
N
3156
3157 if (st->sb && freesize) {
3158 /* --create was given a container to create in.
3159 * So we need to check that there are enough
3160 * free spaces and return the amount of space.
3161 * We may as well remember which drives were
3162 * chosen so that add_to_super/getinfo_super
3163 * can return them.
3164 */
c21e737b 3165 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 3166 }
a322f70c 3167 return 1;
78e44928 3168 }
a322f70c 3169
8592f29d
N
3170 if (st->sb) {
3171 /* A container has already been opened, so we are
3172 * creating in there. Maybe a BVD, maybe an SVD.
3173 * Should make a distinction one day.
3174 */
3175 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
3176 chunk, size, data_offset, dev,
3177 freesize,
8592f29d
N
3178 verbose);
3179 }
78e44928
NB
3180 /* This is the first device for the array.
3181 * If it is a container, we read it in and do automagic allocations,
3182 * no other devices should be given.
3183 * Otherwise it must be a member device of a container, and we
3184 * do manual allocation.
3185 * Later we should check for a BVD and make an SVD.
a322f70c 3186 */
a322f70c
DW
3187 fd = open(dev, O_RDONLY|O_EXCL, 0);
3188 if (fd >= 0) {
4dd2df09 3189 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
3190 close(fd);
3191 if (sra && sra->array.major_version == -1 &&
78e44928
NB
3192 strcmp(sra->text_version, "ddf") == 0) {
3193
3194 /* load super */
3195 /* find space for 'n' devices. */
3196 /* remember the devices */
3197 /* Somehow return the fact that we have enough */
a322f70c
DW
3198 }
3199
2c514b71 3200 if (verbose)
e7b84f9d
N
3201 pr_err("ddf: Cannot create this array "
3202 "on device %s - a container is required.\n",
3203 dev);
a322f70c
DW
3204 return 0;
3205 }
3206 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 3207 if (verbose)
e7b84f9d 3208 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3209 dev, strerror(errno));
a322f70c
DW
3210 return 0;
3211 }
3212 /* Well, it is in use by someone, maybe a 'ddf' container. */
3213 cfd = open_container(fd);
3214 if (cfd < 0) {
3215 close(fd);
2c514b71 3216 if (verbose)
e7b84f9d 3217 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 3218 dev, strerror(EBUSY));
a322f70c
DW
3219 return 0;
3220 }
4dd2df09 3221 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
3222 close(fd);
3223 if (sra && sra->array.major_version == -1 &&
3224 strcmp(sra->text_version, "ddf") == 0) {
3225 /* This is a member of a ddf container. Load the container
3226 * and try to create a bvd
3227 */
3228 struct ddf_super *ddf;
e1902a7b 3229 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 3230 st->sb = ddf;
4dd2df09 3231 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 3232 close(cfd);
78e44928 3233 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 3234 raiddisks, chunk, size,
af4348dd 3235 data_offset,
2c514b71
NB
3236 dev, freesize,
3237 verbose);
a322f70c
DW
3238 }
3239 close(cfd);
c42ec1ed
DW
3240 } else /* device may belong to a different container */
3241 return 0;
3242
a322f70c
DW
3243 return 1;
3244}
3245
2c514b71
NB
3246static int
3247validate_geometry_ddf_container(struct supertype *st,
3248 int level, int layout, int raiddisks,
3249 int chunk, unsigned long long size,
af4348dd 3250 unsigned long long data_offset,
2c514b71
NB
3251 char *dev, unsigned long long *freesize,
3252 int verbose)
a322f70c
DW
3253{
3254 int fd;
3255 unsigned long long ldsize;
3256
3257 if (level != LEVEL_CONTAINER)
3258 return 0;
3259 if (!dev)
3260 return 1;
3261
3262 fd = open(dev, O_RDONLY|O_EXCL, 0);
3263 if (fd < 0) {
2c514b71 3264 if (verbose)
e7b84f9d 3265 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3266 dev, strerror(errno));
a322f70c
DW
3267 return 0;
3268 }
3269 if (!get_dev_size(fd, dev, &ldsize)) {
3270 close(fd);
3271 return 0;
3272 }
3273 close(fd);
3274
387fcd59 3275 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
3276 if (*freesize == 0)
3277 return 0;
a322f70c
DW
3278
3279 return 1;
3280}
3281
78e44928
NB
3282static int validate_geometry_ddf_bvd(struct supertype *st,
3283 int level, int layout, int raiddisks,
c21e737b 3284 int *chunk, unsigned long long size,
af4348dd 3285 unsigned long long data_offset,
2c514b71
NB
3286 char *dev, unsigned long long *freesize,
3287 int verbose)
a322f70c
DW
3288{
3289 struct stat stb;
3290 struct ddf_super *ddf = st->sb;
3291 struct dl *dl;
5f8097be
NB
3292 unsigned long long pos = 0;
3293 unsigned long long maxsize;
3294 struct extent *e;
3295 int i;
a322f70c 3296 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
3297 if (level == LEVEL_CONTAINER) {
3298 if (verbose)
e7b84f9d 3299 pr_err("DDF cannot create a container within an container\n");
a322f70c 3300 return 0;
b42f577a 3301 }
a322f70c
DW
3302 /* We must have the container info already read in. */
3303 if (!ddf)
3304 return 0;
3305
5f8097be
NB
3306 if (!dev) {
3307 /* General test: make sure there is space for
3308 * 'raiddisks' device extents of size 'size'.
3309 */
3310 unsigned long long minsize = size;
3311 int dcnt = 0;
3312 if (minsize == 0)
3313 minsize = 8;
3314 for (dl = ddf->dlist; dl ; dl = dl->next)
3315 {
3316 int found = 0;
7e1432fb 3317 pos = 0;
5f8097be
NB
3318
3319 i = 0;
3320 e = get_extents(ddf, dl);
3321 if (!e) continue;
3322 do {
3323 unsigned long long esize;
3324 esize = e[i].start - pos;
3325 if (esize >= minsize)
3326 found = 1;
3327 pos = e[i].start + e[i].size;
3328 i++;
3329 } while (e[i-1].size);
3330 if (found)
3331 dcnt++;
3332 free(e);
3333 }
3334 if (dcnt < raiddisks) {
2c514b71 3335 if (verbose)
e7b84f9d
N
3336 pr_err("ddf: Not enough devices with "
3337 "space for this array (%d < %d)\n",
3338 dcnt, raiddisks);
5f8097be
NB
3339 return 0;
3340 }
3341 return 1;
3342 }
a322f70c
DW
3343 /* This device must be a member of the set */
3344 if (stat(dev, &stb) < 0)
3345 return 0;
3346 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3347 return 0;
3348 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
3349 if (dl->major == (int)major(stb.st_rdev) &&
3350 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
3351 break;
3352 }
5f8097be 3353 if (!dl) {
2c514b71 3354 if (verbose)
e7b84f9d 3355 pr_err("ddf: %s is not in the "
613b0d17
N
3356 "same DDF set\n",
3357 dev);
5f8097be
NB
3358 return 0;
3359 }
3360 e = get_extents(ddf, dl);
3361 maxsize = 0;
3362 i = 0;
3363 if (e) do {
613b0d17
N
3364 unsigned long long esize;
3365 esize = e[i].start - pos;
3366 if (esize >= maxsize)
3367 maxsize = esize;
3368 pos = e[i].start + e[i].size;
3369 i++;
3370 } while (e[i-1].size);
5f8097be 3371 *freesize = maxsize;
a322f70c
DW
3372 // FIXME here I am
3373
3374 return 1;
3375}
59e36268 3376
a322f70c 3377static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3378 void **sbp, char *devname)
a322f70c
DW
3379{
3380 struct mdinfo *sra;
3381 struct ddf_super *super;
3382 struct mdinfo *sd, *best = NULL;
3383 int bestseq = 0;
3384 int seq;
3385 char nm[20];
3386 int dfd;
3387
b526e52d 3388 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3389 if (!sra)
3390 return 1;
3391 if (sra->array.major_version != -1 ||
3392 sra->array.minor_version != -2 ||
3393 strcmp(sra->text_version, "ddf") != 0)
3394 return 1;
3395
6416d527 3396 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3397 return 1;
a2349791 3398 memset(super, 0, sizeof(*super));
a322f70c
DW
3399
3400 /* first, try each device, and choose the best ddf */
3401 for (sd = sra->devs ; sd ; sd = sd->next) {
3402 int rv;
3403 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3404 dfd = dev_open(nm, O_RDONLY);
3405 if (dfd < 0)
a322f70c
DW
3406 return 2;
3407 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3408 close(dfd);
a322f70c
DW
3409 if (rv == 0) {
3410 seq = __be32_to_cpu(super->active->seq);
3411 if (super->active->openflag)
3412 seq--;
3413 if (!best || seq > bestseq) {
3414 bestseq = seq;
3415 best = sd;
3416 }
3417 }
3418 }
3419 if (!best)
3420 return 1;
3421 /* OK, load this ddf */
3422 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3423 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3424 if (dfd < 0)
a322f70c
DW
3425 return 1;
3426 load_ddf_headers(dfd, super, NULL);
3427 load_ddf_global(dfd, super, NULL);
3428 close(dfd);
3429 /* Now we need the device-local bits */
3430 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3431 int rv;
3432
a322f70c 3433 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3434 dfd = dev_open(nm, O_RDWR);
7a7cc504 3435 if (dfd < 0)
a322f70c 3436 return 2;
3d2c4fc7
DW
3437 rv = load_ddf_headers(dfd, super, NULL);
3438 if (rv == 0)
e1902a7b 3439 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3440 if (rv)
3441 return 1;
a322f70c 3442 }
33414a01 3443
a322f70c
DW
3444 *sbp = super;
3445 if (st->ss == NULL) {
78e44928 3446 st->ss = &super_ddf;
a322f70c
DW
3447 st->minor_version = 0;
3448 st->max_devs = 512;
3449 }
4dd2df09 3450 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3451 return 0;
3452}
2b959fbf
N
3453
3454static int load_container_ddf(struct supertype *st, int fd,
3455 char *devname)
3456{
3457 return load_super_ddf_all(st, fd, &st->sb, devname);
3458}
3459
0e600426 3460#endif /* MDASSEMBLE */
a322f70c 3461
a5c7adb3 3462static int check_secondary(const struct vcl *vc)
3463{
3464 const struct vd_config *conf = &vc->conf;
3465 int i;
3466
3467 /* The only DDF secondary RAID level md can support is
3468 * RAID 10, if the stripe sizes and Basic volume sizes
3469 * are all equal.
3470 * Other configurations could in theory be supported by exposing
3471 * the BVDs to user space and using device mapper for the secondary
3472 * mapping. So far we don't support that.
3473 */
3474
3475 __u64 sec_elements[4] = {0, 0, 0, 0};
3476#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3477#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3478
3479 if (vc->other_bvds == NULL) {
3480 pr_err("No BVDs for secondary RAID found\n");
3481 return -1;
3482 }
3483 if (conf->prl != DDF_RAID1) {
3484 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3485 return -1;
3486 }
3487 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3488 pr_err("Secondary RAID level %d is unsupported\n",
3489 conf->srl);
3490 return -1;
3491 }
3492 __set_sec_seen(conf->sec_elmnt_seq);
3493 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3494 const struct vd_config *bvd = vc->other_bvds[i];
3c48f7be 3495 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
c98567ba 3496 continue;
a5c7adb3 3497 if (bvd->srl != conf->srl) {
3498 pr_err("Inconsistent secondary RAID level across BVDs\n");
3499 return -1;
3500 }
3501 if (bvd->prl != conf->prl) {
3502 pr_err("Different RAID levels for BVDs are unsupported\n");
3503 return -1;
3504 }
3505 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3506 pr_err("All BVDs must have the same number of primary elements\n");
3507 return -1;
3508 }
3509 if (bvd->chunk_shift != conf->chunk_shift) {
3510 pr_err("Different strip sizes for BVDs are unsupported\n");
3511 return -1;
3512 }
3513 if (bvd->array_blocks != conf->array_blocks) {
3514 pr_err("Different BVD sizes are unsupported\n");
3515 return -1;
3516 }
3517 __set_sec_seen(bvd->sec_elmnt_seq);
3518 }
3519 for (i = 0; i < conf->sec_elmnt_count; i++) {
3520 if (!__was_sec_seen(i)) {
3521 pr_err("BVD %d is missing\n", i);
3522 return -1;
3523 }
3524 }
3525 return 0;
3526}
3527
8a38db86 3528static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
4e587018 3529 __u32 refnum, unsigned int nmax,
3530 const struct vd_config **bvd,
3531 unsigned int *idx)
8a38db86 3532{
4e587018 3533 unsigned int i, j, n, sec, cnt;
3534
3535 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3536 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3537
3538 for (i = 0, j = 0 ; i < nmax ; i++) {
3539 /* j counts valid entries for this BVD */
3540 if (vc->conf.phys_refnum[i] != 0xffffffff)
3541 j++;
3542 if (vc->conf.phys_refnum[i] == refnum) {
3543 *bvd = &vc->conf;
3544 *idx = i;
3545 return sec * cnt + j - 1;
3546 }
3547 }
3548 if (vc->other_bvds == NULL)
3549 goto bad;
3550
3551 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3552 struct vd_config *vd = vc->other_bvds[n-1];
4e587018 3553 sec = vd->sec_elmnt_seq;
3c48f7be 3554 if (sec == DDF_UNUSED_BVD)
3555 continue;
4e587018 3556 for (i = 0, j = 0 ; i < nmax ; i++) {
3557 if (vd->phys_refnum[i] != 0xffffffff)
3558 j++;
3559 if (vd->phys_refnum[i] == refnum) {
3560 *bvd = vd;
3561 *idx = i;
3562 return sec * cnt + j - 1;
3563 }
3564 }
3565 }
3566bad:
3567 *bvd = NULL;
d6e7b083 3568 return DDF_NOTFOUND;
8a38db86 3569}
3570
00bbdbda 3571static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3572{
3573 /* Given a container loaded by load_super_ddf_all,
3574 * extract information about all the arrays into
3575 * an mdinfo tree.
3576 *
3577 * For each vcl in conflist: create an mdinfo, fill it in,
3578 * then look for matching devices (phys_refnum) in dlist
3579 * and create appropriate device mdinfo.
3580 */
3581 struct ddf_super *ddf = st->sb;
3582 struct mdinfo *rest = NULL;
3583 struct vcl *vc;
3584
3585 for (vc = ddf->conflist ; vc ; vc=vc->next)
3586 {
f21e18ca
N
3587 unsigned int i;
3588 unsigned int j;
598f0d58 3589 struct mdinfo *this;
00bbdbda 3590 char *ep;
90fa1a29 3591 __u32 *cptr;
8a38db86 3592 unsigned int pd;
00bbdbda
N
3593
3594 if (subarray &&
3595 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3596 *ep != '\0'))
3597 continue;
3598
a5c7adb3 3599 if (vc->conf.sec_elmnt_count > 1) {
3600 if (check_secondary(vc) != 0)
3601 continue;
3602 }
3603
503975b9 3604 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3605 this->next = rest;
3606 rest = this;
3607
8a2848a7 3608 if (layout_ddf2md(&vc->conf, &this->array))
3609 continue;
598f0d58 3610 this->array.md_minor = -1;
f35f2525
N
3611 this->array.major_version = -1;
3612 this->array.minor_version = -2;
90fa1a29
JS
3613 cptr = (__u32 *)(vc->conf.guid + 16);
3614 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58
NB
3615 this->array.utime = DECADE +
3616 __be32_to_cpu(vc->conf.timestamp);
3617 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3618
59e36268 3619 i = vc->vcnum;
7a7cc504
NB
3620 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3621 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3622 DDF_init_full) {
598f0d58 3623 this->array.state = 0;
ed9d66aa
NB
3624 this->resync_start = 0;
3625 } else {
598f0d58 3626 this->array.state = 1;
b7528a20 3627 this->resync_start = MaxSector;
ed9d66aa 3628 }
db42fa9b
N
3629 memcpy(this->name, ddf->virt->entries[i].name, 16);
3630 this->name[16]=0;
3631 for(j=0; j<16; j++)
3632 if (this->name[j] == ' ')
3633 this->name[j] = 0;
598f0d58
NB
3634
3635 memset(this->uuid, 0, sizeof(this->uuid));
3636 this->component_size = __be64_to_cpu(vc->conf.blocks);
3637 this->array.size = this->component_size / 2;
5f2aace8 3638 this->container_member = i;
598f0d58 3639
c5afc314
N
3640 ddf->currentconf = vc;
3641 uuid_from_super_ddf(st, this->uuid);
f646805e 3642 if (!subarray)
3643 ddf->currentconf = NULL;
c5afc314 3644
60f18132 3645 sprintf(this->text_version, "/%s/%d",
4dd2df09 3646 st->container_devnm, this->container_member);
60f18132 3647
8a38db86 3648 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3649 struct mdinfo *dev;
3650 struct dl *d;
4e587018 3651 const struct vd_config *bvd;
3652 unsigned int iphys;
fa033bec 3653 int stt;
598f0d58 3654
8a38db86 3655 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
bc17324f 3656 continue;
0cf5ef67
N
3657
3658 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3659 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3660 != DDF_Online)
3661 continue;
3662
8a38db86 3663 i = get_pd_index_from_refnum(
4e587018 3664 vc, ddf->phys->entries[pd].refnum,
3665 ddf->mppe, &bvd, &iphys);
d6e7b083 3666 if (i == DDF_NOTFOUND)
8a38db86 3667 continue;
3668
fa033bec 3669 this->array.working_disks++;
bc17324f 3670
0cf5ef67 3671 for (d = ddf->dlist; d ; d=d->next)
8a38db86 3672 if (d->disk.refnum ==
3673 ddf->phys->entries[pd].refnum)
0cf5ef67
N
3674 break;
3675 if (d == NULL)
3676 /* Haven't found that one yet, maybe there are others */
3677 continue;
3678
503975b9 3679 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3680 dev->next = this->devs;
3681 this->devs = dev;
3682
3683 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3684 dev->disk.major = d->major;
3685 dev->disk.minor = d->minor;
3686 dev->disk.raid_disk = i;
3687 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3688 dev->recovery_start = MaxSector;
598f0d58 3689
120f7677 3690 dev->events = __be32_to_cpu(ddf->primary.seq);
57a66662 3691 dev->data_offset =
3692 __be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
4e587018 3693 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3694 if (d->devname)
3695 strcpy(dev->name, d->devname);
3696 }
3697 }
3698 return rest;
3699}
3700
955e9ea1 3701static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3702{
955e9ea1 3703 struct ddf_super *ddf = st->sb;
a322f70c 3704 unsigned long long dsize;
6416d527 3705 void *buf;
3d2c4fc7 3706 int rc;
a322f70c 3707
955e9ea1
DW
3708 if (!ddf)
3709 return 1;
3710
a322f70c
DW
3711 if (!get_dev_size(fd, NULL, &dsize))
3712 return 1;
3713
dbf98368 3714 if (ddf->dlist || ddf->conflist) {
3715 struct stat sta;
3716 struct dl *dl;
3717 int ofd, ret;
3718
3719 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3720 pr_err("%s: file descriptor for invalid device\n",
3721 __func__);
3722 return 1;
3723 }
3724 for (dl = ddf->dlist; dl; dl = dl->next)
3725 if (dl->major == (int)major(sta.st_rdev) &&
3726 dl->minor == (int)minor(sta.st_rdev))
3727 break;
3728 if (!dl) {
3729 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3730 (int)major(sta.st_rdev),
3731 (int)minor(sta.st_rdev));
3732 return 1;
3733 }
dbf98368 3734 ofd = dl->fd;
3735 dl->fd = fd;
9bf38704 3736 ret = (_write_super_to_disk(ddf, dl) != 1);
dbf98368 3737 dl->fd = ofd;
3738 return ret;
3739 }
3740
3d2c4fc7
DW
3741 if (posix_memalign(&buf, 512, 512) != 0)
3742 return 1;
6416d527
NB
3743 memset(buf, 0, 512);
3744
a322f70c 3745 lseek64(fd, dsize-512, 0);
3d2c4fc7 3746 rc = write(fd, buf, 512);
6416d527 3747 free(buf);
3d2c4fc7
DW
3748 if (rc < 0)
3749 return 1;
a322f70c
DW
3750 return 0;
3751}
3752
a19c88b8
NB
3753static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3754{
3755 /*
3756 * return:
3757 * 0 same, or first was empty, and second was copied
3758 * 1 second had wrong number
3759 * 2 wrong uuid
3760 * 3 wrong other info
3761 */
3762 struct ddf_super *first = st->sb;
3763 struct ddf_super *second = tst->sb;
4eefd651 3764 struct dl *dl1, *dl2;
3765 struct vcl *vl1, *vl2;
2d210697 3766 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3767
3768 if (!first) {
3769 st->sb = tst->sb;
3770 tst->sb = NULL;
3771 return 0;
3772 }
3773
3774 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3775 return 2;
3776
2d210697 3777 if (first->anchor.seq != second->anchor.seq) {
3778 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3779 __be32_to_cpu(first->anchor.seq),
3780 __be32_to_cpu(second->anchor.seq));
3781 return 3;
3782 }
3783 if (first->max_part != second->max_part ||
3784 first->phys->used_pdes != second->phys->used_pdes ||
3785 first->virt->populated_vdes != second->virt->populated_vdes) {
3786 dprintf("%s: PD/VD number mismatch\n", __func__);
3787 return 3;
3788 }
3789
3790 max_pds = __be16_to_cpu(first->phys->used_pdes);
3791 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3792 for (pd = 0; pd < max_pds; pd++)
3793 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3794 break;
3795 if (pd == max_pds) {
3796 dprintf("%s: no match for disk %08x\n", __func__,
3797 __be32_to_cpu(dl2->disk.refnum));
3798 return 3;
3799 }
3800 }
3801
3802 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3803 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3804 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3805 continue;
3806 for (vd = 0; vd < max_vds; vd++)
3807 if (!memcmp(first->virt->entries[vd].guid,
3808 vl2->conf.guid, DDF_GUID_LEN))
3809 break;
3810 if (vd == max_vds) {
3811 dprintf("%s: no match for VD config\n", __func__);
3812 return 3;
3813 }
3814 }
a19c88b8 3815 /* FIXME should I look at anything else? */
2d210697 3816
4eefd651 3817 /*
3818 At this point we are fairly sure that the meta data matches.
3819 But the new disk may contain additional local data.
3820 Add it to the super block.
3821 */
3822 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3823 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3824 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3825 DDF_GUID_LEN))
3826 break;
3827 if (vl1) {
3828 if (vl1->other_bvds != NULL &&
3829 vl1->conf.sec_elmnt_seq !=
3830 vl2->conf.sec_elmnt_seq) {
3831 dprintf("%s: adding BVD %u\n", __func__,
3832 vl2->conf.sec_elmnt_seq);
3833 add_other_bvd(vl1, &vl2->conf,
3834 first->conf_rec_len*512);
3835 }
3836 continue;
3837 }
3838
3839 if (posix_memalign((void **)&vl1, 512,
3840 (first->conf_rec_len*512 +
3841 offsetof(struct vcl, conf))) != 0) {
3842 pr_err("%s could not allocate vcl buf\n",
3843 __func__);
3844 return 3;
3845 }
3846
3847 vl1->next = first->conflist;
3848 vl1->block_sizes = NULL;
4eefd651 3849 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3c48f7be 3850 if (alloc_other_bvds(first, vl1) != 0) {
3851 pr_err("%s could not allocate other bvds\n",
3852 __func__);
3853 free(vl1);
3854 return 3;
3855 }
4eefd651 3856 for (vd = 0; vd < max_vds; vd++)
3857 if (!memcmp(first->virt->entries[vd].guid,
3858 vl1->conf.guid, DDF_GUID_LEN))
3859 break;
3860 vl1->vcnum = vd;
3861 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3862 first->conflist = vl1;
3863 }
3864
3865 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3866 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3867 if (dl1->disk.refnum == dl2->disk.refnum)
3868 break;
3869 if (dl1)
3870 continue;
3871
3872 if (posix_memalign((void **)&dl1, 512,
3873 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3874 != 0) {
3875 pr_err("%s could not allocate disk info buffer\n",
3876 __func__);
3877 return 3;
3878 }
3879 memcpy(dl1, dl2, sizeof(*dl1));
3880 dl1->mdupdate = NULL;
3881 dl1->next = first->dlist;
3882 dl1->fd = -1;
3883 for (pd = 0; pd < max_pds; pd++)
3884 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3885 break;
3886 dl1->pdnum = pd;
3887 if (dl2->spare) {
3888 if (posix_memalign((void **)&dl1->spare, 512,
3889 first->conf_rec_len*512) != 0) {
3890 pr_err("%s could not allocate spare info buf\n",
3891 __func__);
3892 return 3;
3893 }
3894 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3895 }
3896 for (vd = 0 ; vd < first->max_part ; vd++) {
3897 if (!dl2->vlist[vd]) {
3898 dl1->vlist[vd] = NULL;
3899 continue;
3900 }
3901 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3902 if (!memcmp(vl1->conf.guid,
3903 dl2->vlist[vd]->conf.guid,
3904 DDF_GUID_LEN))
3905 break;
3906 dl1->vlist[vd] = vl1;
3907 }
3908 }
3909 first->dlist = dl1;
3910 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
ad60eea1 3911 __be32_to_cpu(dl1->disk.refnum));
4eefd651 3912 }
3913
a19c88b8
NB
3914 return 0;
3915}
3916
0e600426 3917#ifndef MDASSEMBLE
4e5528c6
NB
3918/*
3919 * A new array 'a' has been started which claims to be instance 'inst'
3920 * within container 'c'.
3921 * We need to confirm that the array matches the metadata in 'c' so
3922 * that we don't corrupt any metadata.
3923 */
cba0191b 3924static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3925{
a2aa439e 3926 struct ddf_super *ddf = c->sb;
3927 int n = atoi(inst);
fb9d0acb 3928 if (all_ff(ddf->virt->entries[n].guid)) {
3929 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
a2aa439e 3930 return -ENODEV;
3931 }
3932 dprintf("ddf: open_new %d\n", n);
3933 a->info.container_member = n;
549e9569
NB
3934 return 0;
3935}
3936
4e5528c6
NB
3937/*
3938 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3939 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3940 * clean up to the point (in sectors). If that cannot be recorded in the
3941 * metadata, then leave it as dirty.
3942 *
3943 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3944 * !global! virtual_disk.virtual_entry structure.
3945 */
01f157d7 3946static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3947{
4e5528c6
NB
3948 struct ddf_super *ddf = a->container->sb;
3949 int inst = a->info.container_member;
18a2f463 3950 int old = ddf->virt->entries[inst].state;
01f157d7
N
3951 if (consistent == 2) {
3952 /* Should check if a recovery should be started FIXME */
3953 consistent = 1;
b7941fd6 3954 if (!is_resync_complete(&a->info))
01f157d7
N
3955 consistent = 0;
3956 }
ed9d66aa
NB
3957 if (consistent)
3958 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3959 else
4e5528c6 3960 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463 3961 if (old != ddf->virt->entries[inst].state)
7d5a7ff3 3962 ddf_set_updates_pending(ddf);
18a2f463
NB
3963
3964 old = ddf->virt->entries[inst].init_state;
ed9d66aa 3965 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 3966 if (is_resync_complete(&a->info))
ed9d66aa 3967 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 3968 else if (a->info.resync_start == 0)
ed9d66aa 3969 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 3970 else
ed9d66aa 3971 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463 3972 if (old != ddf->virt->entries[inst].init_state)
7d5a7ff3 3973 ddf_set_updates_pending(ddf);
ed9d66aa 3974
b27336a2 3975 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
3976 guid_str(ddf->virt->entries[inst].guid), a->curr_state,
3977 consistent?"clean":"dirty",
b7941fd6 3978 a->info.resync_start);
01f157d7 3979 return consistent;
fd7cde1b
DW
3980}
3981
5ec636b7 3982static int get_bvd_state(const struct ddf_super *ddf,
3983 const struct vd_config *vc)
3984{
3985 unsigned int i, n_bvd, working = 0;
3986 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
3987 int pd, st, state;
3988 for (i = 0; i < n_prim; i++) {
3989 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
3990 continue;
3991 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
3992 if (pd < 0)
3993 continue;
3994 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3995 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3996 == DDF_Online)
3997 working++;
3998 }
3999
4000 state = DDF_state_degraded;
4001 if (working == n_prim)
4002 state = DDF_state_optimal;
4003 else
4004 switch (vc->prl) {
4005 case DDF_RAID0:
4006 case DDF_CONCAT:
4007 case DDF_JBOD:
4008 state = DDF_state_failed;
4009 break;
4010 case DDF_RAID1:
4011 if (working == 0)
4012 state = DDF_state_failed;
4013 else if (working >= 2)
4014 state = DDF_state_part_optimal;
4015 break;
4016 case DDF_RAID4:
4017 case DDF_RAID5:
4018 if (working < n_prim - 1)
4019 state = DDF_state_failed;
4020 break;
4021 case DDF_RAID6:
4022 if (working < n_prim - 2)
4023 state = DDF_state_failed;
4024 else if (working == n_prim - 1)
4025 state = DDF_state_part_optimal;
4026 break;
4027 }
4028 return state;
4029}
4030
0777d17d 4031static int secondary_state(int state, int other, int seclevel)
4032{
4033 if (state == DDF_state_optimal && other == DDF_state_optimal)
4034 return DDF_state_optimal;
4035 if (seclevel == DDF_2MIRRORED) {
4036 if (state == DDF_state_optimal || other == DDF_state_optimal)
4037 return DDF_state_part_optimal;
4038 if (state == DDF_state_failed && other == DDF_state_failed)
4039 return DDF_state_failed;
4040 return DDF_state_degraded;
4041 } else {
4042 if (state == DDF_state_failed || other == DDF_state_failed)
4043 return DDF_state_failed;
4044 if (state == DDF_state_degraded || other == DDF_state_degraded)
4045 return DDF_state_degraded;
4046 return DDF_state_part_optimal;
4047 }
4048}
4049
4050static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
4051{
4052 int state = get_bvd_state(ddf, &vcl->conf);
4053 unsigned int i;
4054 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
4055 state = secondary_state(
4056 state,
4057 get_bvd_state(ddf, vcl->other_bvds[i-1]),
4058 vcl->conf.srl);
4059 }
4060 return state;
4061}
4062
7a7cc504
NB
4063/*
4064 * The state of each disk is stored in the global phys_disk structure
4065 * in phys_disk.entries[n].state.
4066 * This makes various combinations awkward.
4067 * - When a device fails in any array, it must be failed in all arrays
4068 * that include a part of this device.
4069 * - When a component is rebuilding, we cannot include it officially in the
4070 * array unless this is the only array that uses the device.
4071 *
4072 * So: when transitioning:
4073 * Online -> failed, just set failed flag. monitor will propagate
4074 * spare -> online, the device might need to be added to the array.
4075 * spare -> failed, just set failed. Don't worry if in array or not.
4076 */
8d45d196 4077static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 4078{
7a7cc504 4079 struct ddf_super *ddf = a->container->sb;
baba3f4e 4080 unsigned int inst = a->info.container_member, n_bvd;
4081 struct vcl *vcl;
4082 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4083 &n_bvd, &vcl);
4084 int pd;
e1316fab
N
4085 struct mdinfo *mdi;
4086 struct dl *dl;
7a7cc504
NB
4087
4088 if (vc == NULL) {
2c514b71 4089 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
4090 return;
4091 }
e1316fab
N
4092 /* Find the matching slot in 'info'. */
4093 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4094 if (mdi->disk.raid_disk == n)
4095 break;
4096 if (!mdi)
4097 return;
4098
4099 /* and find the 'dl' entry corresponding to that. */
4100 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
4101 if (mdi->state_fd >= 0 &&
4102 mdi->disk.major == dl->major &&
e1316fab
N
4103 mdi->disk.minor == dl->minor)
4104 break;
4105 if (!dl)
4106 return;
4107
baba3f4e 4108 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
e1316fab
N
4109 if (pd < 0 || pd != dl->pdnum) {
4110 /* disk doesn't currently exist or has changed.
4111 * If it is now in_sync, insert it. */
baba3f4e 4112 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4113 __func__, dl->pdnum, dl->major, dl->minor,
ad60eea1 4114 __be32_to_cpu(dl->disk.refnum));
baba3f4e 4115 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4116 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
7a7cc504 4117 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
baba3f4e 4118 pd = dl->pdnum; /* FIXME: is this really correct ? */
4119 vc->phys_refnum[n_bvd] = dl->disk.refnum;
57a66662 4120 LBA_OFFSET(ddf, vc)[n_bvd] =
4121 __cpu_to_be64(mdi->data_offset);
e1316fab
N
4122 ddf->phys->entries[pd].type &=
4123 ~__cpu_to_be16(DDF_Global_Spare);
4124 ddf->phys->entries[pd].type |=
4125 __cpu_to_be16(DDF_Active_in_VD);
7d5a7ff3 4126 ddf_set_updates_pending(ddf);
7a7cc504
NB
4127 }
4128 } else {
18a2f463 4129 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
4130 if (state & DS_FAULTY)
4131 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4132 if (state & DS_INSYNC) {
4133 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4134 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4135 }
18a2f463 4136 if (old != ddf->phys->entries[pd].state)
7d5a7ff3 4137 ddf_set_updates_pending(ddf);
7a7cc504
NB
4138 }
4139
2c514b71 4140 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 4141
7a7cc504
NB
4142 /* Now we need to check the state of the array and update
4143 * virtual_disk.entries[n].state.
4144 * It needs to be one of "optimal", "degraded", "failed".
4145 * I don't understand 'deleted' or 'missing'.
4146 */
0777d17d 4147 state = get_svd_state(ddf, vcl);
7a7cc504 4148
18a2f463
NB
4149 if (ddf->virt->entries[inst].state !=
4150 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4151 | state)) {
4152
4153 ddf->virt->entries[inst].state =
4154 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4155 | state;
7d5a7ff3 4156 ddf_set_updates_pending(ddf);
18a2f463 4157 }
7a7cc504 4158
549e9569
NB
4159}
4160
2e735d19 4161static void ddf_sync_metadata(struct supertype *st)
549e9569 4162{
7a7cc504
NB
4163
4164 /*
4165 * Write all data to all devices.
4166 * Later, we might be able to track whether only local changes
4167 * have been made, or whether any global data has been changed,
4168 * but ddf is sufficiently weird that it probably always
4169 * changes global data ....
4170 */
18a2f463
NB
4171 struct ddf_super *ddf = st->sb;
4172 if (!ddf->updates_pending)
4173 return;
4174 ddf->updates_pending = 0;
1cc7f4fe 4175 __write_init_super_ddf(st);
2c514b71 4176 dprintf("ddf: sync_metadata\n");
549e9569
NB
4177}
4178
f646805e 4179static int del_from_conflist(struct vcl **list, const char *guid)
4180{
4181 struct vcl **p;
4182 int found = 0;
4183 for (p = list; p && *p; p = &((*p)->next))
4184 if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
4185 found = 1;
4186 *p = (*p)->next;
4187 }
4188 return found;
4189}
4190
4191static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
4192{
4193 struct dl *dl;
4194 unsigned int vdnum, i;
4195 vdnum = find_vde_by_guid(ddf, guid);
4196 if (vdnum == DDF_NOTFOUND) {
4197 pr_err("%s: could not find VD %s\n", __func__,
4198 guid_str(guid));
4199 return -1;
4200 }
4201 if (del_from_conflist(&ddf->conflist, guid) == 0) {
4202 pr_err("%s: could not find conf %s\n", __func__,
4203 guid_str(guid));
4204 return -1;
4205 }
4206 for (dl = ddf->dlist; dl; dl = dl->next)
4207 for (i = 0; i < ddf->max_part; i++)
4208 if (dl->vlist[i] != NULL &&
4209 !memcmp(dl->vlist[i]->conf.guid, guid,
4210 DDF_GUID_LEN))
4211 dl->vlist[i] = NULL;
4212 memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
4213 dprintf("%s: deleted %s\n", __func__, guid_str(guid));
4214 return 0;
4215}
4216
4217static int kill_subarray_ddf(struct supertype *st)
4218{
4219 struct ddf_super *ddf = st->sb;
4220 /*
4221 * currentconf is set in container_content_ddf,
4222 * called with subarray arg
4223 */
4224 struct vcl *victim = ddf->currentconf;
4225 struct vd_config *conf;
4226 ddf->currentconf = NULL;
4227 unsigned int vdnum;
4228 if (!victim) {
4229 pr_err("%s: nothing to kill\n", __func__);
4230 return -1;
4231 }
4232 conf = &victim->conf;
4233 vdnum = find_vde_by_guid(ddf, conf->guid);
4234 if (vdnum == DDF_NOTFOUND) {
4235 pr_err("%s: could not find VD %s\n", __func__,
4236 guid_str(conf->guid));
4237 return -1;
4238 }
4239 if (st->update_tail) {
4240 struct virtual_disk *vd;
4241 int len = sizeof(struct virtual_disk)
4242 + sizeof(struct virtual_entry);
4243 vd = xmalloc(len);
4244 if (vd == NULL) {
4245 pr_err("%s: failed to allocate %d bytes\n", __func__,
4246 len);
4247 return -1;
4248 }
4249 memset(vd, 0 , len);
4250 vd->magic = DDF_VIRT_RECORDS_MAGIC;
4251 vd->populated_vdes = 0;
4252 memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
4253 /* we use DDF_state_deleted as marker */
4254 vd->entries[0].state = DDF_state_deleted;
4255 append_metadata_update(st, vd, len);
4256 } else
4257 _kill_subarray_ddf(ddf, conf->guid);
4258 return 0;
4259}
4260
c5943560 4261static void copy_matching_bvd(struct ddf_super *ddf,
4262 struct vd_config *conf,
4263 const struct metadata_update *update)
4264{
4265 unsigned int mppe =
4266 __be16_to_cpu(ddf->anchor.max_primary_element_entries);
4267 unsigned int len = ddf->conf_rec_len * 512;
4268 char *p;
4269 struct vd_config *vc;
4270 for (p = update->buf; p < update->buf + update->len; p += len) {
4271 vc = (struct vd_config *) p;
4272 if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
4273 memcpy(conf->phys_refnum, vc->phys_refnum,
4274 mppe * (sizeof(__u32) + sizeof(__u64)));
4275 return;
4276 }
4277 }
4278 pr_err("%s: no match for BVD %d of %s in update\n", __func__,
4279 conf->sec_elmnt_seq, guid_str(conf->guid));
4280}
4281
88c164f4
NB
4282static void ddf_process_update(struct supertype *st,
4283 struct metadata_update *update)
4284{
4285 /* Apply this update to the metadata.
4286 * The first 4 bytes are a DDF_*_MAGIC which guides
4287 * our actions.
4288 * Possible update are:
4289 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
4290 * Add a new physical device or remove an old one.
4291 * Changes to this record only happen implicitly.
88c164f4
NB
4292 * used_pdes is the device number.
4293 * DDF_VIRT_RECORDS_MAGIC
4294 * Add a new VD. Possibly also change the 'access' bits.
4295 * populated_vdes is the entry number.
4296 * DDF_VD_CONF_MAGIC
4297 * New or updated VD. the VIRT_RECORD must already
4298 * exist. For an update, phys_refnum and lba_offset
4299 * (at least) are updated, and the VD_CONF must
4300 * be written to precisely those devices listed with
4301 * a phys_refnum.
4302 * DDF_SPARE_ASSIGN_MAGIC
4303 * replacement Spare Assignment Record... but for which device?
4304 *
4305 * So, e.g.:
4306 * - to create a new array, we send a VIRT_RECORD and
4307 * a VD_CONF. Then assemble and start the array.
4308 * - to activate a spare we send a VD_CONF to add the phys_refnum
4309 * and offset. This will also mark the spare as active with
4310 * a spare-assignment record.
4311 */
4312 struct ddf_super *ddf = st->sb;
4313 __u32 *magic = (__u32*)update->buf;
4314 struct phys_disk *pd;
4315 struct virtual_disk *vd;
4316 struct vd_config *vc;
4317 struct vcl *vcl;
4318 struct dl *dl;
f21e18ca 4319 unsigned int ent;
c5943560 4320 unsigned int pdnum, pd2, len;
88c164f4 4321
2c514b71 4322 dprintf("Process update %x\n", *magic);
7e1432fb 4323
88c164f4
NB
4324 switch (*magic) {
4325 case DDF_PHYS_RECORDS_MAGIC:
4326
4327 if (update->len != (sizeof(struct phys_disk) +
4328 sizeof(struct phys_disk_entry)))
4329 return;
4330 pd = (struct phys_disk*)update->buf;
4331
4332 ent = __be16_to_cpu(pd->used_pdes);
4333 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4334 return;
4dd968cc
N
4335 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4336 struct dl **dlp;
4337 /* removing this disk. */
4338 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4339 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4340 struct dl *dl = *dlp;
4341 if (dl->pdnum == (signed)ent) {
4342 close(dl->fd);
4343 dl->fd = -1;
4344 /* FIXME this doesn't free
4345 * dl->devname */
4346 update->space = dl;
4347 *dlp = dl->next;
4348 break;
4349 }
4350 }
7d5a7ff3 4351 ddf_set_updates_pending(ddf);
4dd968cc
N
4352 return;
4353 }
88c164f4
NB
4354 if (!all_ff(ddf->phys->entries[ent].guid))
4355 return;
4356 ddf->phys->entries[ent] = pd->entries[0];
4357 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 4358 __be16_to_cpu(ddf->phys->used_pdes));
7d5a7ff3 4359 ddf_set_updates_pending(ddf);
2cc2983d
N
4360 if (ddf->add_list) {
4361 struct active_array *a;
4362 struct dl *al = ddf->add_list;
4363 ddf->add_list = al->next;
4364
4365 al->next = ddf->dlist;
4366 ddf->dlist = al;
4367
4368 /* As a device has been added, we should check
4369 * for any degraded devices that might make
4370 * use of this spare */
4371 for (a = st->arrays ; a; a=a->next)
4372 a->check_degraded = 1;
4373 }
88c164f4
NB
4374 break;
4375
4376 case DDF_VIRT_RECORDS_MAGIC:
4377
4378 if (update->len != (sizeof(struct virtual_disk) +
4379 sizeof(struct virtual_entry)))
4380 return;
4381 vd = (struct virtual_disk*)update->buf;
4382
f646805e 4383 if (vd->entries[0].state == DDF_state_deleted) {
4384 if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
4385 return;
4386 } else {
4387
6a7e7ecc 4388 ent = find_vde_by_guid(ddf, vd->entries[0].guid);
4389 if (ent != DDF_NOTFOUND) {
4390 dprintf("%s: VD %s exists already in slot %d\n",
4391 __func__, guid_str(vd->entries[0].guid),
4392 ent);
4393 return;
4394 }
f646805e 4395 ent = find_unused_vde(ddf);
4396 if (ent == DDF_NOTFOUND)
4397 return;
4398 ddf->virt->entries[ent] = vd->entries[0];
4399 ddf->virt->populated_vdes =
4400 __cpu_to_be16(
4401 1 + __be16_to_cpu(
4402 ddf->virt->populated_vdes));
ed5ff7a2 4403 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4404 __func__, guid_str(vd->entries[0].guid), ent,
4405 ddf->virt->entries[ent].state,
4406 ddf->virt->entries[ent].init_state);
f646805e 4407 }
7d5a7ff3 4408 ddf_set_updates_pending(ddf);
88c164f4
NB
4409 break;
4410
4411 case DDF_VD_CONF_MAGIC:
88c164f4 4412 vc = (struct vd_config*)update->buf;
c5943560 4413 len = ddf->conf_rec_len * 512;
4414 if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
4415 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4416 __func__, guid_str(vc->guid), update->len,
4417 vc->sec_elmnt_count);
4418 return;
4419 }
88c164f4
NB
4420 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4421 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4422 break;
ed5ff7a2 4423 dprintf("%s: conf update for %s (%s)\n", __func__,
4424 guid_str(vc->guid), (vcl ? "old" : "new"));
88c164f4
NB
4425 if (vcl) {
4426 /* An update, just copy the phys_refnum and lba_offset
4427 * fields
4428 */
c5943560 4429 unsigned int i;
4430 copy_matching_bvd(ddf, &vcl->conf, update);
4431 for (i = 1; i < vc->sec_elmnt_count; i++)
4432 copy_matching_bvd(ddf, vcl->other_bvds[i-1],
4433 update);
88c164f4
NB
4434 } else {
4435 /* A new VD_CONF */
c5943560 4436 unsigned int i;
e6b9548d
DW
4437 if (!update->space)
4438 return;
88c164f4
NB
4439 vcl = update->space;
4440 update->space = NULL;
4441 vcl->next = ddf->conflist;
c5943560 4442 memcpy(&vcl->conf, vc, len);
fb9d0acb 4443 ent = find_vde_by_guid(ddf, vc->guid);
4444 if (ent == DDF_NOTFOUND)
4445 return;
4446 vcl->vcnum = ent;
88c164f4 4447 ddf->conflist = vcl;
c5943560 4448 for (i = 1; i < vc->sec_elmnt_count; i++)
4449 memcpy(vcl->other_bvds[i-1],
4450 update->buf + len * i, len);
88c164f4 4451 }
c7079c84
N
4452 /* Set DDF_Transition on all Failed devices - to help
4453 * us detect those that are no longer in use
4454 */
4455 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4456 if (ddf->phys->entries[pdnum].state
4457 & __be16_to_cpu(DDF_Failed))
4458 ddf->phys->entries[pdnum].state
4459 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
4460 /* Now make sure vlist is correct for each dl. */
4461 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca 4462 unsigned int vn = 0;
8401644c 4463 int in_degraded = 0;
5838fccd 4464 for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
4465 unsigned int dn, ibvd;
4466 const struct vd_config *conf;
4467 int vstate;
4468 dn = get_pd_index_from_refnum(vcl,
4469 dl->disk.refnum,
4470 ddf->mppe,
4471 &conf, &ibvd);
4472 if (dn == DDF_NOTFOUND)
4473 continue;
4474 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
ad60eea1 4475 dl->pdnum,
4476 __be32_to_cpu(dl->disk.refnum),
5838fccd 4477 guid_str(conf->guid),
4478 conf->sec_elmnt_seq, vn);
4479 /* Clear the Transition flag */
4480 if (ddf->phys->entries[dl->pdnum].state
4481 & __be16_to_cpu(DDF_Failed))
4482 ddf->phys->entries[dl->pdnum].state &=
4483 ~__be16_to_cpu(DDF_Transition);
4484 dl->vlist[vn++] = vcl;
4485 vstate = ddf->virt->entries[vcl->vcnum].state
4486 & DDF_state_mask;
4487 if (vstate == DDF_state_degraded ||
4488 vstate == DDF_state_part_optimal)
4489 in_degraded = 1;
4490 }
88c164f4
NB
4491 while (vn < ddf->max_part)
4492 dl->vlist[vn++] = NULL;
7e1432fb
NB
4493 if (dl->vlist[0]) {
4494 ddf->phys->entries[dl->pdnum].type &=
4495 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
4496 if (!(ddf->phys->entries[dl->pdnum].type &
4497 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
4498 ddf->phys->entries[dl->pdnum].type |=
4499 __cpu_to_be16(DDF_Active_in_VD);
4500 if (in_degraded)
4501 ddf->phys->entries[dl->pdnum].state |=
4502 __cpu_to_be16(DDF_Rebuilding);
4503 }
7e1432fb
NB
4504 }
4505 if (dl->spare) {
4506 ddf->phys->entries[dl->pdnum].type &=
4507 ~__cpu_to_be16(DDF_Global_Spare);
4508 ddf->phys->entries[dl->pdnum].type |=
4509 __cpu_to_be16(DDF_Spare);
4510 }
4511 if (!dl->vlist[0] && !dl->spare) {
4512 ddf->phys->entries[dl->pdnum].type |=
4513 __cpu_to_be16(DDF_Global_Spare);
4514 ddf->phys->entries[dl->pdnum].type &=
4515 ~__cpu_to_be16(DDF_Spare |
4516 DDF_Active_in_VD);
4517 }
88c164f4 4518 }
c7079c84
N
4519
4520 /* Now remove any 'Failed' devices that are not part
4521 * of any VD. They will have the Transition flag set.
4522 * Once done, we need to update all dl->pdnum numbers.
4523 */
4524 pd2 = 0;
4525 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4526 if ((ddf->phys->entries[pdnum].state
4527 & __be16_to_cpu(DDF_Failed))
4528 && (ddf->phys->entries[pdnum].state
4529 & __be16_to_cpu(DDF_Transition)))
4530 /* skip this one */;
4531 else if (pdnum == pd2)
4532 pd2++;
4533 else {
4534 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4535 for (dl = ddf->dlist; dl; dl = dl->next)
4536 if (dl->pdnum == (int)pdnum)
4537 dl->pdnum = pd2;
4538 pd2++;
4539 }
4540 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4541 while (pd2 < pdnum) {
4542 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4543 pd2++;
4544 }
4545
7d5a7ff3 4546 ddf_set_updates_pending(ddf);
88c164f4
NB
4547 break;
4548 case DDF_SPARE_ASSIGN_MAGIC:
4549 default: break;
4550 }
4551}
4552
edd8d13c
NB
4553static void ddf_prepare_update(struct supertype *st,
4554 struct metadata_update *update)
4555{
4556 /* This update arrived at managemon.
4557 * We are about to pass it to monitor.
4558 * If a malloc is needed, do it here.
4559 */
4560 struct ddf_super *ddf = st->sb;
4561 __u32 *magic = (__u32*)update->buf;
c5943560 4562 if (*magic == DDF_VD_CONF_MAGIC) {
4563 struct vcl *vcl;
4564 struct vd_config *conf = (struct vd_config *) update->buf;
e6b9548d 4565 if (posix_memalign(&update->space, 512,
613b0d17 4566 offsetof(struct vcl, conf)
c5943560 4567 + ddf->conf_rec_len * 512) != 0) {
4568 update->space = NULL;
4569 return;
4570 }
4571 vcl = update->space;
4572 vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
4573 if (alloc_other_bvds(ddf, vcl) != 0) {
4574 free(update->space);
e6b9548d 4575 update->space = NULL;
c5943560 4576 }
4577 }
edd8d13c
NB
4578}
4579
7e1432fb
NB
4580/*
4581 * Check if the array 'a' is degraded but not failed.
4582 * If it is, find as many spares as are available and needed and
4583 * arrange for their inclusion.
4584 * We only choose devices which are not already in the array,
4585 * and prefer those with a spare-assignment to this array.
4586 * otherwise we choose global spares - assuming always that
4587 * there is enough room.
4588 * For each spare that we assign, we return an 'mdinfo' which
4589 * describes the position for the device in the array.
4590 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4591 * the new phys_refnum and lba_offset values.
4592 *
4593 * Only worry about BVDs at the moment.
4594 */
4595static struct mdinfo *ddf_activate_spare(struct active_array *a,
4596 struct metadata_update **updates)
4597{
4598 int working = 0;
4599 struct mdinfo *d;
4600 struct ddf_super *ddf = a->container->sb;
4601 int global_ok = 0;
4602 struct mdinfo *rv = NULL;
4603 struct mdinfo *di;
4604 struct metadata_update *mu;
4605 struct dl *dl;
4606 int i;
baba3f4e 4607 struct vcl *vcl;
7e1432fb 4608 struct vd_config *vc;
baba3f4e 4609 unsigned int n_bvd;
7e1432fb 4610
7e1432fb
NB
4611 for (d = a->info.devs ; d ; d = d->next) {
4612 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4613 d->state_fd >= 0)
7e1432fb
NB
4614 /* wait for Removal to happen */
4615 return NULL;
4616 if (d->state_fd >= 0)
4617 working ++;
4618 }
4619
2c514b71
NB
4620 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4621 a->info.array.level);
7e1432fb
NB
4622 if (working == a->info.array.raid_disks)
4623 return NULL; /* array not degraded */
4624 switch (a->info.array.level) {
4625 case 1:
4626 if (working == 0)
4627 return NULL; /* failed */
4628 break;
4629 case 4:
4630 case 5:
4631 if (working < a->info.array.raid_disks - 1)
4632 return NULL; /* failed */
4633 break;
4634 case 6:
4635 if (working < a->info.array.raid_disks - 2)
4636 return NULL; /* failed */
4637 break;
4638 default: /* concat or stripe */
4639 return NULL; /* failed */
4640 }
4641
4642 /* For each slot, if it is not working, find a spare */
4643 dl = ddf->dlist;
4644 for (i = 0; i < a->info.array.raid_disks; i++) {
4645 for (d = a->info.devs ; d ; d = d->next)
4646 if (d->disk.raid_disk == i)
4647 break;
2c514b71 4648 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4649 if (d && (d->state_fd >= 0))
4650 continue;
4651
4652 /* OK, this device needs recovery. Find a spare */
4653 again:
4654 for ( ; dl ; dl = dl->next) {
4655 unsigned long long esize;
4656 unsigned long long pos;
4657 struct mdinfo *d2;
4658 int is_global = 0;
4659 int is_dedicated = 0;
4660 struct extent *ex;
f21e18ca 4661 unsigned int j;
7e1432fb
NB
4662 /* If in this array, skip */
4663 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4664 if (d2->state_fd >= 0 &&
4665 d2->disk.major == dl->major &&
7e1432fb 4666 d2->disk.minor == dl->minor) {
2c514b71 4667 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4668 break;
4669 }
4670 if (d2)
4671 continue;
4672 if (ddf->phys->entries[dl->pdnum].type &
4673 __cpu_to_be16(DDF_Spare)) {
4674 /* Check spare assign record */
4675 if (dl->spare) {
4676 if (dl->spare->type & DDF_spare_dedicated) {
4677 /* check spare_ents for guid */
4678 for (j = 0 ;
4679 j < __be16_to_cpu(dl->spare->populated);
4680 j++) {
4681 if (memcmp(dl->spare->spare_ents[j].guid,
4682 ddf->virt->entries[a->info.container_member].guid,
4683 DDF_GUID_LEN) == 0)
4684 is_dedicated = 1;
4685 }
4686 } else
4687 is_global = 1;
4688 }
4689 } else if (ddf->phys->entries[dl->pdnum].type &
4690 __cpu_to_be16(DDF_Global_Spare)) {
4691 is_global = 1;
e0e7aeaa
N
4692 } else if (!(ddf->phys->entries[dl->pdnum].state &
4693 __cpu_to_be16(DDF_Failed))) {
4694 /* we can possibly use some of this */
4695 is_global = 1;
7e1432fb
NB
4696 }
4697 if ( ! (is_dedicated ||
4698 (is_global && global_ok))) {
2c514b71 4699 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4700 is_dedicated, is_global);
7e1432fb
NB
4701 continue;
4702 }
4703
4704 /* We are allowed to use this device - is there space?
4705 * We need a->info.component_size sectors */
4706 ex = get_extents(ddf, dl);
4707 if (!ex) {
2c514b71 4708 dprintf("cannot get extents\n");
7e1432fb
NB
4709 continue;
4710 }
4711 j = 0; pos = 0;
4712 esize = 0;
4713
4714 do {
4715 esize = ex[j].start - pos;
4716 if (esize >= a->info.component_size)
4717 break;
e5cc7d46
N
4718 pos = ex[j].start + ex[j].size;
4719 j++;
4720 } while (ex[j-1].size);
7e1432fb
NB
4721
4722 free(ex);
4723 if (esize < a->info.component_size) {
e5cc7d46
N
4724 dprintf("%x:%x has no room: %llu %llu\n",
4725 dl->major, dl->minor,
2c514b71 4726 esize, a->info.component_size);
7e1432fb
NB
4727 /* No room */
4728 continue;
4729 }
4730
4731 /* Cool, we have a device with some space at pos */
503975b9 4732 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4733 di->disk.number = i;
4734 di->disk.raid_disk = i;
4735 di->disk.major = dl->major;
4736 di->disk.minor = dl->minor;
4737 di->disk.state = 0;
d23534e4 4738 di->recovery_start = 0;
7e1432fb
NB
4739 di->data_offset = pos;
4740 di->component_size = a->info.component_size;
4741 di->container_member = dl->pdnum;
4742 di->next = rv;
4743 rv = di;
2c514b71
NB
4744 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4745 i, pos);
7e1432fb
NB
4746
4747 break;
4748 }
4749 if (!dl && ! global_ok) {
4750 /* not enough dedicated spares, try global */
4751 global_ok = 1;
4752 dl = ddf->dlist;
4753 goto again;
4754 }
4755 }
4756
4757 if (!rv)
4758 /* No spares found */
4759 return rv;
4760 /* Now 'rv' has a list of devices to return.
4761 * Create a metadata_update record to update the
4762 * phys_refnum and lba_offset values
4763 */
503975b9
N
4764 mu = xmalloc(sizeof(*mu));
4765 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4766 free(mu);
4767 mu = NULL;
4768 }
503975b9 4769 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4770 mu->len = ddf->conf_rec_len * 512;
4771 mu->space = NULL;
f50ae22e 4772 mu->space_list = NULL;
7e1432fb 4773 mu->next = *updates;
baba3f4e 4774 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4775 &n_bvd, &vcl);
7e1432fb
NB
4776 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4777
4778 vc = (struct vd_config*)mu->buf;
7e1432fb
NB
4779 for (di = rv ; di ; di = di->next) {
4780 vc->phys_refnum[di->disk.raid_disk] =
4781 ddf->phys->entries[dl->pdnum].refnum;
57a66662 4782 LBA_OFFSET(ddf, vc)[di->disk.raid_disk]
4783 = __cpu_to_be64(di->data_offset);
7e1432fb
NB
4784 }
4785 *updates = mu;
4786 return rv;
4787}
0e600426 4788#endif /* MDASSEMBLE */
7e1432fb 4789
b640a252
N
4790static int ddf_level_to_layout(int level)
4791{
4792 switch(level) {
4793 case 0:
4794 case 1:
4795 return 0;
4796 case 5:
4797 return ALGORITHM_LEFT_SYMMETRIC;
4798 case 6:
4799 return ALGORITHM_ROTATING_N_CONTINUE;
4800 case 10:
4801 return 0x102;
4802 default:
4803 return UnSet;
4804 }
4805}
4806
30f58b22
DW
4807static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4808{
4809 if (level && *level == UnSet)
4810 *level = LEVEL_CONTAINER;
4811
4812 if (level && layout && *layout == UnSet)
4813 *layout = ddf_level_to_layout(*level);
4814}
4815
a322f70c
DW
4816struct superswitch super_ddf = {
4817#ifndef MDASSEMBLE
4818 .examine_super = examine_super_ddf,
4819 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4820 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4821 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4822 .detail_super = detail_super_ddf,
4823 .brief_detail_super = brief_detail_super_ddf,
4824 .validate_geometry = validate_geometry_ddf,
78e44928 4825 .write_init_super = write_init_super_ddf,
0e600426 4826 .add_to_super = add_to_super_ddf,
4dd968cc 4827 .remove_from_super = remove_from_super_ddf,
2b959fbf 4828 .load_container = load_container_ddf,
74db60b0 4829 .copy_metadata = copy_metadata_ddf,
a322f70c
DW
4830#endif
4831 .match_home = match_home_ddf,
4832 .uuid_from_super= uuid_from_super_ddf,
4833 .getinfo_super = getinfo_super_ddf,
4834 .update_super = update_super_ddf,
4835
4836 .avail_size = avail_size_ddf,
4837
a19c88b8
NB
4838 .compare_super = compare_super_ddf,
4839
a322f70c 4840 .load_super = load_super_ddf,
ba7eb04f 4841 .init_super = init_super_ddf,
955e9ea1 4842 .store_super = store_super_ddf,
a322f70c
DW
4843 .free_super = free_super_ddf,
4844 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4845 .container_content = container_content_ddf,
30f58b22 4846 .default_geometry = default_geometry_ddf,
f646805e 4847 .kill_subarray = kill_subarray_ddf,
a322f70c 4848
a322f70c 4849 .external = 1,
549e9569 4850
0e600426 4851#ifndef MDASSEMBLE
549e9569
NB
4852/* for mdmon */
4853 .open_new = ddf_open_new,
ed9d66aa 4854 .set_array_state= ddf_set_array_state,
549e9569
NB
4855 .set_disk = ddf_set_disk,
4856 .sync_metadata = ddf_sync_metadata,
88c164f4 4857 .process_update = ddf_process_update,
edd8d13c 4858 .prepare_update = ddf_prepare_update,
7e1432fb 4859 .activate_spare = ddf_activate_spare,
0e600426 4860#endif
4cce4069 4861 .name = "ddf",
a322f70c 4862};