]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: increase seq number in ddf_set_updates_pending
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
a322f70c
DW
50/* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61/* Primary Raid Level (PRL) */
62#define DDF_RAID0 0x00
63#define DDF_RAID1 0x01
64#define DDF_RAID3 0x03
65#define DDF_RAID4 0x04
66#define DDF_RAID5 0x05
67#define DDF_RAID1E 0x11
68#define DDF_JBOD 0x0f
69#define DDF_CONCAT 0x1f
70#define DDF_RAID5E 0x15
71#define DDF_RAID5EE 0x25
59e36268 72#define DDF_RAID6 0x06
a322f70c
DW
73
74/* Raid Level Qualifier (RLQ) */
75#define DDF_RAID0_SIMPLE 0x00
76#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78#define DDF_RAID3_0 0x00 /* parity in first extent */
79#define DDF_RAID3_N 0x01 /* parity in last extent */
80#define DDF_RAID4_0 0x00 /* parity in first extent */
81#define DDF_RAID4_N 0x01 /* parity in last extent */
82/* these apply to raid5e and raid5ee as well */
83#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 84#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
85#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
87
88#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
90
91/* Secondary RAID Level (SRL) */
92#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93#define DDF_2MIRRORED 0x01
94#define DDF_2CONCAT 0x02
95#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
96
97/* Magic numbers */
98#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
108
109#define DDF_GUID_LEN 24
59e36268
NB
110#define DDF_REVISION_0 "01.00.00"
111#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
112
113struct ddf_header {
88c164f4 114 __u32 magic; /* DDF_HEADER_MAGIC */
a322f70c
DW
115 __u32 crc;
116 char guid[DDF_GUID_LEN];
59e36268 117 char revision[8]; /* 01.02.00 */
a322f70c
DW
118 __u32 seq; /* starts at '1' */
119 __u32 timestamp;
120 __u8 openflag;
121 __u8 foreignflag;
122 __u8 enforcegroups;
123 __u8 pad0; /* 0xff */
124 __u8 pad1[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext[32]; /* reserved: fill with 0xff */
127 __u64 primary_lba;
128 __u64 secondary_lba;
129 __u8 type;
130 __u8 pad2[3]; /* 0xff */
131 __u32 workspace_len; /* sectors for vendor space -
132 * at least 32768(sectors) */
133 __u64 workspace_lba;
134 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
139 *12/512) */
140 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset;
144 __u32 controller_section_length;
145 __u32 phys_section_offset;
146 __u32 phys_section_length;
147 __u32 virt_section_offset;
148 __u32 virt_section_length;
149 __u32 config_section_offset;
150 __u32 config_section_length;
151 __u32 data_section_offset;
152 __u32 data_section_length;
153 __u32 bbm_section_offset;
154 __u32 bbm_section_length;
155 __u32 diag_space_offset;
156 __u32 diag_space_length;
157 __u32 vendor_offset;
158 __u32 vendor_length;
159 /* 256 bytes so far */
160 __u8 pad4[256]; /* 0xff */
161};
162
163/* type field */
164#define DDF_HEADER_ANCHOR 0x00
165#define DDF_HEADER_PRIMARY 0x01
166#define DDF_HEADER_SECONDARY 0x02
167
168/* The content of the 'controller section' - global scope */
169struct ddf_controller_data {
88c164f4 170 __u32 magic; /* DDF_CONTROLLER_MAGIC */
a322f70c
DW
171 __u32 crc;
172 char guid[DDF_GUID_LEN];
173 struct controller_type {
174 __u16 vendor_id;
175 __u16 device_id;
176 __u16 sub_vendor_id;
177 __u16 sub_device_id;
178 } type;
179 char product_id[16];
180 __u8 pad[8]; /* 0xff */
181 __u8 vendor_data[448];
182};
183
184/* The content of phys_section - global scope */
185struct phys_disk {
88c164f4 186 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
a322f70c
DW
187 __u32 crc;
188 __u16 used_pdes;
189 __u16 max_pdes;
190 __u8 pad[52];
191 struct phys_disk_entry {
192 char guid[DDF_GUID_LEN];
193 __u32 refnum;
194 __u16 type;
195 __u16 state;
196 __u64 config_size; /* DDF structures must be after here */
197 char path[18]; /* another horrible structure really */
198 __u8 pad[6];
199 } entries[0];
200};
201
202/* phys_disk_entry.type is a bitmap - bigendian remember */
203#define DDF_Forced_PD_GUID 1
204#define DDF_Active_in_VD 2
88c164f4 205#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
206#define DDF_Spare 8 /* overrides Global_spare */
207#define DDF_Foreign 16
208#define DDF_Legacy 32 /* no DDF on this device */
209
210#define DDF_Interface_mask 0xf00
211#define DDF_Interface_SCSI 0x100
212#define DDF_Interface_SAS 0x200
213#define DDF_Interface_SATA 0x300
214#define DDF_Interface_FC 0x400
215
216/* phys_disk_entry.state is a bigendian bitmap */
217#define DDF_Online 1
218#define DDF_Failed 2 /* overrides 1,4,8 */
219#define DDF_Rebuilding 4
220#define DDF_Transition 8
221#define DDF_SMART 16
222#define DDF_ReadErrors 32
223#define DDF_Missing 64
224
225/* The content of the virt_section global scope */
226struct virtual_disk {
88c164f4 227 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
a322f70c
DW
228 __u32 crc;
229 __u16 populated_vdes;
230 __u16 max_vdes;
231 __u8 pad[52];
232 struct virtual_entry {
233 char guid[DDF_GUID_LEN];
234 __u16 unit;
235 __u16 pad0; /* 0xffff */
236 __u16 guid_crc;
237 __u16 type;
238 __u8 state;
239 __u8 init_state;
240 __u8 pad1[14];
241 char name[16];
242 } entries[0];
243};
244
245/* virtual_entry.type is a bitmap - bigendian */
246#define DDF_Shared 1
247#define DDF_Enforce_Groups 2
248#define DDF_Unicode 4
249#define DDF_Owner_Valid 8
250
251/* virtual_entry.state is a bigendian bitmap */
252#define DDF_state_mask 0x7
253#define DDF_state_optimal 0x0
254#define DDF_state_degraded 0x1
255#define DDF_state_deleted 0x2
256#define DDF_state_missing 0x3
257#define DDF_state_failed 0x4
7a7cc504 258#define DDF_state_part_optimal 0x5
a322f70c
DW
259
260#define DDF_state_morphing 0x8
261#define DDF_state_inconsistent 0x10
262
263/* virtual_entry.init_state is a bigendian bitmap */
264#define DDF_initstate_mask 0x03
265#define DDF_init_not 0x00
7a7cc504
NB
266#define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
a322f70c
DW
268#define DDF_init_full 0x02
269
270#define DDF_access_mask 0xc0
271#define DDF_access_rw 0x00
272#define DDF_access_ro 0x80
273#define DDF_access_blocked 0xc0
274
275/* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
278 */
279
280struct vd_config {
88c164f4 281 __u32 magic; /* DDF_VD_CONF_MAGIC */
a322f70c
DW
282 __u32 crc;
283 char guid[DDF_GUID_LEN];
284 __u32 timestamp;
285 __u32 seqnum;
286 __u8 pad0[24];
287 __u16 prim_elmnt_count;
288 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
289 __u8 prl;
290 __u8 rlq;
291 __u8 sec_elmnt_count;
292 __u8 sec_elmnt_seq;
293 __u8 srl;
598f0d58
NB
294 __u64 blocks; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks; /* blocks in array */
a322f70c
DW
298 __u8 pad1[8];
299 __u32 spare_refs[8];
300 __u8 cache_pol[8];
301 __u8 bg_rate;
302 __u8 pad2[3];
303 __u8 pad3[52];
304 __u8 pad4[192];
305 __u8 v0[32]; /* reserved- 0xff */
306 __u8 v1[32]; /* reserved- 0xff */
307 __u8 v2[16]; /* reserved- 0xff */
308 __u8 v3[16]; /* reserved- 0xff */
309 __u8 vendor[32];
310 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
313};
57a66662 314#define LBA_OFFSET(ddf, vd) ((__u64 *) &(vd)->phys_refnum[(ddf)->mppe])
a322f70c
DW
315
316/* vd_config.cache_pol[7] is a bitmap */
317#define DDF_cache_writeback 1 /* else writethrough */
318#define DDF_cache_wadaptive 2 /* only applies if writeback */
319#define DDF_cache_readahead 4
320#define DDF_cache_radaptive 8 /* only if doing read-ahead */
321#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
322#define DDF_cache_wallowed 32 /* enable write caching */
323#define DDF_cache_rallowed 64 /* enable read caching */
324
325struct spare_assign {
88c164f4 326 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
a322f70c
DW
327 __u32 crc;
328 __u32 timestamp;
329 __u8 reserved[7];
330 __u8 type;
331 __u16 populated; /* SAEs used */
332 __u16 max; /* max SAEs */
333 __u8 pad[8];
334 struct spare_assign_entry {
335 char guid[DDF_GUID_LEN];
336 __u16 secondary_element;
337 __u8 pad[6];
338 } spare_ents[0];
339};
340/* spare_assign.type is a bitmap */
341#define DDF_spare_dedicated 0x1 /* else global */
342#define DDF_spare_revertible 0x2 /* else committable */
343#define DDF_spare_active 0x4 /* else not active */
344#define DDF_spare_affinity 0x8 /* enclosure affinity */
345
346/* The data_section contents - local scope */
347struct disk_data {
88c164f4 348 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
a322f70c
DW
349 __u32 crc;
350 char guid[DDF_GUID_LEN];
351 __u32 refnum; /* crc of some magic drive data ... */
352 __u8 forced_ref; /* set when above was not result of magic */
353 __u8 forced_guid; /* set if guid was forced rather than magic */
354 __u8 vendor[32];
355 __u8 pad[442];
356};
357
358/* bbm_section content */
359struct bad_block_log {
360 __u32 magic;
361 __u32 crc;
362 __u16 entry_count;
363 __u32 spare_count;
364 __u8 pad[10];
365 __u64 first_spare;
366 struct mapped_block {
367 __u64 defective_start;
368 __u32 replacement_start;
369 __u16 remap_count;
370 __u8 pad[2];
371 } entries[0];
372};
373
374/* Struct for internally holding ddf structures */
375/* The DDF structure stored on each device is potentially
376 * quite different, as some data is global and some is local.
377 * The global data is:
378 * - ddf header
379 * - controller_data
380 * - Physical disk records
381 * - Virtual disk records
382 * The local data is:
383 * - Configuration records
384 * - Physical Disk data section
385 * ( and Bad block and vendor which I don't care about yet).
386 *
387 * The local data is parsed into separate lists as it is read
388 * and reconstructed for writing. This means that we only need
389 * to make config changes once and they are automatically
390 * propagated to all devices.
391 * Note that the ddf_super has space of the conf and disk data
392 * for this disk and also for a list of all such data.
393 * The list is only used for the superblock that is being
394 * built in Create or Assemble to describe the whole array.
395 */
396struct ddf_super {
6416d527 397 struct ddf_header anchor, primary, secondary;
a322f70c 398 struct ddf_controller_data controller;
6416d527 399 struct ddf_header *active;
a322f70c
DW
400 struct phys_disk *phys;
401 struct virtual_disk *virt;
402 int pdsize, vdsize;
f21e18ca 403 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 404 int currentdev;
18a2f463 405 int updates_pending;
a322f70c 406 struct vcl {
6416d527
NB
407 union {
408 char space[512];
409 struct {
410 struct vcl *next;
f21e18ca 411 unsigned int vcnum; /* index into ->virt */
8ec5d685 412 struct vd_config **other_bvds;
6416d527
NB
413 __u64 *block_sizes; /* NULL if all the same */
414 };
415 };
a322f70c 416 struct vd_config conf;
d2ca6449 417 } *conflist, *currentconf;
a322f70c 418 struct dl {
6416d527
NB
419 union {
420 char space[512];
421 struct {
422 struct dl *next;
423 int major, minor;
424 char *devname;
425 int fd;
426 unsigned long long size; /* sectors */
097bcf00 427 unsigned long long primary_lba; /* sectors */
428 unsigned long long secondary_lba; /* sectors */
429 unsigned long long workspace_lba; /* sectors */
6416d527
NB
430 int pdnum; /* index in ->phys */
431 struct spare_assign *spare;
8592f29d
N
432 void *mdupdate; /* hold metadata update */
433
434 /* These fields used by auto-layout */
435 int raiddisk; /* slot to fill in autolayout */
436 __u64 esize;
6416d527
NB
437 };
438 };
a322f70c 439 struct disk_data disk;
b2280677 440 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 441 } *dlist, *add_list;
a322f70c
DW
442};
443
444#ifndef offsetof
445#define offsetof(t,f) ((size_t)&(((t*)0)->f))
446#endif
447
7d5a7ff3 448#if DEBUG
fb9d0acb 449static int all_ff(const char *guid);
7d5a7ff3 450static void pr_state(struct ddf_super *ddf, const char *msg)
451{
452 unsigned int i;
453 dprintf("%s/%s: ", __func__, msg);
454 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
455 if (all_ff(ddf->virt->entries[i].guid))
456 continue;
457 dprintf("%u(s=%02x i=%02x) ", i,
458 ddf->virt->entries[i].state,
459 ddf->virt->entries[i].init_state);
460 }
461 dprintf("\n");
462}
463#else
464static void pr_state(const struct ddf_super *ddf, const char *msg) {}
465#endif
466
35c3606d 467static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
468{
469 ddf->updates_pending = 1;
470 ddf->active->seq = __cpu_to_be32((__be32_to_cpu(ddf->active->seq)+1));
471 pr_state(ddf, func);
472}
473
474#define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
7d5a7ff3 475
fcc22180 476static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
477 __u32 refnum, unsigned int nmax,
478 const struct vd_config **bvd,
479 unsigned int *idx);
480
f21e18ca 481static unsigned int calc_crc(void *buf, int len)
a322f70c
DW
482{
483 /* crcs are always at the same place as in the ddf_header */
484 struct ddf_header *ddf = buf;
485 __u32 oldcrc = ddf->crc;
486 __u32 newcrc;
487 ddf->crc = 0xffffffff;
488
489 newcrc = crc32(0, buf, len);
490 ddf->crc = oldcrc;
4abe6b70
N
491 /* The crc is store (like everything) bigendian, so convert
492 * here for simplicity
493 */
494 return __cpu_to_be32(newcrc);
a322f70c
DW
495}
496
a3163bf0 497#define DDF_INVALID_LEVEL 0xff
498#define DDF_NO_SECONDARY 0xff
499static int err_bad_md_layout(const mdu_array_info_t *array)
500{
501 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
502 array->level, array->layout, array->raid_disks);
503 return DDF_INVALID_LEVEL;
504}
505
506static int layout_md2ddf(const mdu_array_info_t *array,
507 struct vd_config *conf)
508{
509 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
510 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
511 __u8 sec_elmnt_count = 1;
512 __u8 srl = DDF_NO_SECONDARY;
513
514 switch (array->level) {
515 case LEVEL_LINEAR:
516 prl = DDF_CONCAT;
517 break;
518 case 0:
519 rlq = DDF_RAID0_SIMPLE;
520 prl = DDF_RAID0;
521 break;
522 case 1:
523 switch (array->raid_disks) {
524 case 2:
525 rlq = DDF_RAID1_SIMPLE;
526 break;
527 case 3:
528 rlq = DDF_RAID1_MULTI;
529 break;
530 default:
531 return err_bad_md_layout(array);
532 }
533 prl = DDF_RAID1;
534 break;
535 case 4:
536 if (array->layout != 0)
537 return err_bad_md_layout(array);
538 rlq = DDF_RAID4_N;
539 prl = DDF_RAID4;
540 break;
541 case 5:
542 switch (array->layout) {
543 case ALGORITHM_LEFT_ASYMMETRIC:
544 rlq = DDF_RAID5_N_RESTART;
545 break;
546 case ALGORITHM_RIGHT_ASYMMETRIC:
547 rlq = DDF_RAID5_0_RESTART;
548 break;
549 case ALGORITHM_LEFT_SYMMETRIC:
550 rlq = DDF_RAID5_N_CONTINUE;
551 break;
552 case ALGORITHM_RIGHT_SYMMETRIC:
553 /* not mentioned in standard */
554 default:
555 return err_bad_md_layout(array);
556 }
557 prl = DDF_RAID5;
558 break;
559 case 6:
560 switch (array->layout) {
561 case ALGORITHM_ROTATING_N_RESTART:
562 rlq = DDF_RAID5_N_RESTART;
563 break;
564 case ALGORITHM_ROTATING_ZERO_RESTART:
565 rlq = DDF_RAID6_0_RESTART;
566 break;
567 case ALGORITHM_ROTATING_N_CONTINUE:
568 rlq = DDF_RAID5_N_CONTINUE;
569 break;
570 default:
571 return err_bad_md_layout(array);
572 }
573 prl = DDF_RAID6;
574 break;
575 case 10:
576 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
577 rlq = DDF_RAID1_SIMPLE;
578 prim_elmnt_count = __cpu_to_be16(2);
579 sec_elmnt_count = array->raid_disks / 2;
580 } else if (array->raid_disks % 3 == 0
581 && array->layout == 0x103) {
582 rlq = DDF_RAID1_MULTI;
583 prim_elmnt_count = __cpu_to_be16(3);
584 sec_elmnt_count = array->raid_disks / 3;
585 } else
586 return err_bad_md_layout(array);
587 srl = DDF_2SPANNED;
588 prl = DDF_RAID1;
589 break;
590 default:
591 return err_bad_md_layout(array);
592 }
593 conf->prl = prl;
594 conf->prim_elmnt_count = prim_elmnt_count;
595 conf->rlq = rlq;
596 conf->srl = srl;
597 conf->sec_elmnt_count = sec_elmnt_count;
598 return 0;
599}
600
8a2848a7 601static int err_bad_ddf_layout(const struct vd_config *conf)
602{
603 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
604 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
605 return -1;
606}
607
608static int layout_ddf2md(const struct vd_config *conf,
609 mdu_array_info_t *array)
610{
611 int level = LEVEL_UNSUPPORTED;
612 int layout = 0;
613 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
614
615 if (conf->sec_elmnt_count > 1) {
616 /* see also check_secondary() */
617 if (conf->prl != DDF_RAID1 ||
618 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
619 pr_err("Unsupported secondary RAID level %u/%u\n",
620 conf->prl, conf->srl);
621 return -1;
622 }
623 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
624 layout = 0x102;
625 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
626 layout = 0x103;
627 else
628 return err_bad_ddf_layout(conf);
629 raiddisks *= conf->sec_elmnt_count;
630 level = 10;
631 goto good;
632 }
633
634 switch (conf->prl) {
635 case DDF_CONCAT:
636 level = LEVEL_LINEAR;
637 break;
638 case DDF_RAID0:
639 if (conf->rlq != DDF_RAID0_SIMPLE)
640 return err_bad_ddf_layout(conf);
641 level = 0;
642 break;
643 case DDF_RAID1:
644 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
645 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
646 return err_bad_ddf_layout(conf);
647 level = 1;
648 break;
649 case DDF_RAID4:
650 if (conf->rlq != DDF_RAID4_N)
651 return err_bad_ddf_layout(conf);
652 level = 4;
653 break;
654 case DDF_RAID5:
655 switch (conf->rlq) {
656 case DDF_RAID5_N_RESTART:
657 layout = ALGORITHM_LEFT_ASYMMETRIC;
658 break;
659 case DDF_RAID5_0_RESTART:
660 layout = ALGORITHM_RIGHT_ASYMMETRIC;
661 break;
662 case DDF_RAID5_N_CONTINUE:
663 layout = ALGORITHM_LEFT_SYMMETRIC;
664 break;
665 default:
666 return err_bad_ddf_layout(conf);
667 }
668 level = 5;
669 break;
670 case DDF_RAID6:
671 switch (conf->rlq) {
672 case DDF_RAID5_N_RESTART:
673 layout = ALGORITHM_ROTATING_N_RESTART;
674 break;
675 case DDF_RAID6_0_RESTART:
676 layout = ALGORITHM_ROTATING_ZERO_RESTART;
677 break;
678 case DDF_RAID5_N_CONTINUE:
679 layout = ALGORITHM_ROTATING_N_CONTINUE;
680 break;
681 default:
682 return err_bad_ddf_layout(conf);
683 }
684 level = 6;
685 break;
686 default:
687 return err_bad_ddf_layout(conf);
688 };
689
690good:
691 array->level = level;
692 array->layout = layout;
693 array->raid_disks = raiddisks;
694 return 0;
695}
696
a322f70c
DW
697static int load_ddf_header(int fd, unsigned long long lba,
698 unsigned long long size,
699 int type,
700 struct ddf_header *hdr, struct ddf_header *anchor)
701{
702 /* read a ddf header (primary or secondary) from fd/lba
703 * and check that it is consistent with anchor
704 * Need to check:
705 * magic, crc, guid, rev, and LBA's header_type, and
706 * everything after header_type must be the same
707 */
708 if (lba >= size-1)
709 return 0;
710
711 if (lseek64(fd, lba<<9, 0) < 0)
712 return 0;
713
714 if (read(fd, hdr, 512) != 512)
715 return 0;
716
717 if (hdr->magic != DDF_HEADER_MAGIC)
718 return 0;
719 if (calc_crc(hdr, 512) != hdr->crc)
720 return 0;
721 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
722 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
723 anchor->primary_lba != hdr->primary_lba ||
724 anchor->secondary_lba != hdr->secondary_lba ||
725 hdr->type != type ||
726 memcmp(anchor->pad2, hdr->pad2, 512 -
727 offsetof(struct ddf_header, pad2)) != 0)
728 return 0;
729
730 /* Looks good enough to me... */
731 return 1;
732}
733
734static void *load_section(int fd, struct ddf_super *super, void *buf,
735 __u32 offset_be, __u32 len_be, int check)
736{
737 unsigned long long offset = __be32_to_cpu(offset_be);
738 unsigned long long len = __be32_to_cpu(len_be);
739 int dofree = (buf == NULL);
740
741 if (check)
742 if (len != 2 && len != 8 && len != 32
743 && len != 128 && len != 512)
744 return NULL;
745
746 if (len > 1024)
747 return NULL;
748 if (buf) {
749 /* All pre-allocated sections are a single block */
750 if (len != 1)
751 return NULL;
3d2c4fc7
DW
752 } else if (posix_memalign(&buf, 512, len<<9) != 0)
753 buf = NULL;
6416d527 754
a322f70c
DW
755 if (!buf)
756 return NULL;
757
758 if (super->active->type == 1)
759 offset += __be64_to_cpu(super->active->primary_lba);
760 else
761 offset += __be64_to_cpu(super->active->secondary_lba);
762
f21e18ca 763 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
764 if (dofree)
765 free(buf);
766 return NULL;
767 }
f21e18ca 768 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
769 if (dofree)
770 free(buf);
771 return NULL;
772 }
773 return buf;
774}
775
776static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
777{
778 unsigned long long dsize;
779
780 get_dev_size(fd, NULL, &dsize);
781
782 if (lseek64(fd, dsize-512, 0) < 0) {
783 if (devname)
e7b84f9d
N
784 pr_err("Cannot seek to anchor block on %s: %s\n",
785 devname, strerror(errno));
a322f70c
DW
786 return 1;
787 }
788 if (read(fd, &super->anchor, 512) != 512) {
789 if (devname)
e7b84f9d
N
790 pr_err("Cannot read anchor block on %s: %s\n",
791 devname, strerror(errno));
a322f70c
DW
792 return 1;
793 }
794 if (super->anchor.magic != DDF_HEADER_MAGIC) {
795 if (devname)
e7b84f9d 796 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
797 devname);
798 return 2;
799 }
800 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
801 if (devname)
e7b84f9d 802 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
803 devname);
804 return 2;
805 }
59e36268
NB
806 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
807 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 808 if (devname)
e7b84f9d 809 pr_err("can only support super revision"
59e36268
NB
810 " %.8s and earlier, not %.8s on %s\n",
811 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
812 return 2;
813 }
dbeb699a 814 super->active = NULL;
a322f70c
DW
815 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
816 dsize >> 9, 1,
817 &super->primary, &super->anchor) == 0) {
818 if (devname)
e7b84f9d
N
819 pr_err("Failed to load primary DDF header "
820 "on %s\n", devname);
dbeb699a 821 } else
822 super->active = &super->primary;
a322f70c
DW
823 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
824 dsize >> 9, 2,
825 &super->secondary, &super->anchor)) {
3eff7c1d
N
826 if (super->active == NULL
827 || (__be32_to_cpu(super->primary.seq)
828 < __be32_to_cpu(super->secondary.seq) &&
829 !super->secondary.openflag)
a322f70c
DW
830 || (__be32_to_cpu(super->primary.seq)
831 == __be32_to_cpu(super->secondary.seq) &&
832 super->primary.openflag && !super->secondary.openflag)
833 )
834 super->active = &super->secondary;
dbeb699a 835 } else if (devname)
836 pr_err("Failed to load secondary DDF header on %s\n",
837 devname);
838 if (super->active == NULL)
839 return 2;
a322f70c
DW
840 return 0;
841}
842
843static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
844{
845 void *ok;
846 ok = load_section(fd, super, &super->controller,
847 super->active->controller_section_offset,
848 super->active->controller_section_length,
849 0);
850 super->phys = load_section(fd, super, NULL,
851 super->active->phys_section_offset,
852 super->active->phys_section_length,
853 1);
854 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
855
856 super->virt = load_section(fd, super, NULL,
857 super->active->virt_section_offset,
858 super->active->virt_section_length,
859 1);
860 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
861 if (!ok ||
862 !super->phys ||
863 !super->virt) {
864 free(super->phys);
865 free(super->virt);
a2349791
NB
866 super->phys = NULL;
867 super->virt = NULL;
a322f70c
DW
868 return 2;
869 }
870 super->conflist = NULL;
871 super->dlist = NULL;
8c3b8c2c
NB
872
873 super->max_part = __be16_to_cpu(super->active->max_partitions);
874 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
875 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
876 return 0;
877}
878
3c48f7be 879#define DDF_UNUSED_BVD 0xff
880static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
881{
882 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
883 unsigned int i, vdsize;
884 void *p;
885 if (n_vds == 0) {
886 vcl->other_bvds = NULL;
887 return 0;
888 }
889 vdsize = ddf->conf_rec_len * 512;
890 if (posix_memalign(&p, 512, n_vds *
891 (vdsize + sizeof(struct vd_config *))) != 0)
892 return -1;
893 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
894 for (i = 0; i < n_vds; i++) {
895 vcl->other_bvds[i] = p + i * vdsize;
896 memset(vcl->other_bvds[i], 0, vdsize);
897 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
898 }
899 return 0;
900}
901
3dc821b0 902static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
903 unsigned int len)
904{
905 int i;
906 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 907 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
3dc821b0 908 break;
909
910 if (i < vcl->conf.sec_elmnt_count-1) {
911 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
912 return;
913 } else {
914 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 915 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
3dc821b0 916 break;
917 if (i == vcl->conf.sec_elmnt_count-1) {
918 pr_err("no space for sec level config %u, count is %u\n",
919 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
920 return;
921 }
3dc821b0 922 }
923 memcpy(vcl->other_bvds[i], vd, len);
924}
925
a322f70c
DW
926static int load_ddf_local(int fd, struct ddf_super *super,
927 char *devname, int keep)
928{
929 struct dl *dl;
930 struct stat stb;
931 char *conf;
f21e18ca
N
932 unsigned int i;
933 unsigned int confsec;
b2280677 934 int vnum;
f21e18ca 935 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 936 unsigned long long dsize;
a322f70c
DW
937
938 /* First the local disk info */
3d2c4fc7 939 if (posix_memalign((void**)&dl, 512,
6416d527 940 sizeof(*dl) +
3d2c4fc7 941 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 942 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
943 __func__);
944 return 1;
945 }
a322f70c
DW
946
947 load_section(fd, super, &dl->disk,
948 super->active->data_section_offset,
949 super->active->data_section_length,
950 0);
503975b9 951 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 952
a322f70c
DW
953 fstat(fd, &stb);
954 dl->major = major(stb.st_rdev);
955 dl->minor = minor(stb.st_rdev);
956 dl->next = super->dlist;
957 dl->fd = keep ? fd : -1;
d2ca6449
NB
958
959 dl->size = 0;
960 if (get_dev_size(fd, devname, &dsize))
961 dl->size = dsize >> 9;
097bcf00 962 /* If the disks have different sizes, the LBAs will differ
963 * between phys disks.
964 * At this point here, the values in super->active must be valid
965 * for this phys disk. */
966 dl->primary_lba = super->active->primary_lba;
967 dl->secondary_lba = super->active->secondary_lba;
968 dl->workspace_lba = super->active->workspace_lba;
b2280677 969 dl->spare = NULL;
f21e18ca 970 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
971 dl->vlist[i] = NULL;
972 super->dlist = dl;
59e36268 973 dl->pdnum = -1;
f21e18ca 974 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
975 if (memcmp(super->phys->entries[i].guid,
976 dl->disk.guid, DDF_GUID_LEN) == 0)
977 dl->pdnum = i;
978
a322f70c
DW
979 /* Now the config list. */
980 /* 'conf' is an array of config entries, some of which are
981 * probably invalid. Those which are good need to be copied into
982 * the conflist
983 */
a322f70c
DW
984
985 conf = load_section(fd, super, NULL,
986 super->active->config_section_offset,
987 super->active->config_section_length,
988 0);
989
b2280677 990 vnum = 0;
e223334f
N
991 for (confsec = 0;
992 confsec < __be32_to_cpu(super->active->config_section_length);
993 confsec += super->conf_rec_len) {
a322f70c 994 struct vd_config *vd =
e223334f 995 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
996 struct vcl *vcl;
997
b2280677
NB
998 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
999 if (dl->spare)
1000 continue;
3d2c4fc7
DW
1001 if (posix_memalign((void**)&dl->spare, 512,
1002 super->conf_rec_len*512) != 0) {
e7b84f9d
N
1003 pr_err("%s could not allocate spare info buf\n",
1004 __func__);
3d2c4fc7
DW
1005 return 1;
1006 }
613b0d17 1007
b2280677
NB
1008 memcpy(dl->spare, vd, super->conf_rec_len*512);
1009 continue;
1010 }
a322f70c
DW
1011 if (vd->magic != DDF_VD_CONF_MAGIC)
1012 continue;
1013 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1014 if (memcmp(vcl->conf.guid,
1015 vd->guid, DDF_GUID_LEN) == 0)
1016 break;
1017 }
1018
1019 if (vcl) {
b2280677 1020 dl->vlist[vnum++] = vcl;
3dc821b0 1021 if (vcl->other_bvds != NULL &&
1022 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1023 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1024 continue;
1025 }
a322f70c
DW
1026 if (__be32_to_cpu(vd->seqnum) <=
1027 __be32_to_cpu(vcl->conf.seqnum))
1028 continue;
59e36268 1029 } else {
3d2c4fc7 1030 if (posix_memalign((void**)&vcl, 512,
6416d527 1031 (super->conf_rec_len*512 +
3d2c4fc7 1032 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
1033 pr_err("%s could not allocate vcl buf\n",
1034 __func__);
3d2c4fc7
DW
1035 return 1;
1036 }
a322f70c 1037 vcl->next = super->conflist;
59e36268 1038 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
3c48f7be 1039 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1040 if (alloc_other_bvds(super, vcl) != 0) {
1041 pr_err("%s could not allocate other bvds\n",
1042 __func__);
1043 free(vcl);
1044 return 1;
1045 };
a322f70c 1046 super->conflist = vcl;
b2280677 1047 dl->vlist[vnum++] = vcl;
a322f70c 1048 }
8c3b8c2c 1049 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
59e36268
NB
1050 for (i=0; i < max_virt_disks ; i++)
1051 if (memcmp(super->virt->entries[i].guid,
1052 vcl->conf.guid, DDF_GUID_LEN)==0)
1053 break;
1054 if (i < max_virt_disks)
1055 vcl->vcnum = i;
a322f70c
DW
1056 }
1057 free(conf);
1058
1059 return 0;
1060}
1061
1062#ifndef MDASSEMBLE
1063static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 1064 void **sbp, char *devname);
a322f70c 1065#endif
37424f13
DW
1066
1067static void free_super_ddf(struct supertype *st);
1068
a322f70c
DW
1069static int load_super_ddf(struct supertype *st, int fd,
1070 char *devname)
1071{
1072 unsigned long long dsize;
1073 struct ddf_super *super;
1074 int rv;
1075
a322f70c
DW
1076 if (get_dev_size(fd, devname, &dsize) == 0)
1077 return 1;
1078
b31df436 1079 if (!st->ignore_hw_compat && test_partition(fd))
691c6ee1
N
1080 /* DDF is not allowed on partitions */
1081 return 1;
1082
a322f70c
DW
1083 /* 32M is a lower bound */
1084 if (dsize <= 32*1024*1024) {
97320d7c 1085 if (devname)
e7b84f9d
N
1086 pr_err("%s is too small for ddf: "
1087 "size is %llu sectors.\n",
1088 devname, dsize>>9);
97320d7c 1089 return 1;
a322f70c
DW
1090 }
1091 if (dsize & 511) {
97320d7c 1092 if (devname)
e7b84f9d
N
1093 pr_err("%s is an odd size for ddf: "
1094 "size is %llu bytes.\n",
1095 devname, dsize);
97320d7c 1096 return 1;
a322f70c
DW
1097 }
1098
37424f13
DW
1099 free_super_ddf(st);
1100
6416d527 1101 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 1102 pr_err("malloc of %zu failed.\n",
a322f70c
DW
1103 sizeof(*super));
1104 return 1;
1105 }
a2349791 1106 memset(super, 0, sizeof(*super));
a322f70c
DW
1107
1108 rv = load_ddf_headers(fd, super, devname);
1109 if (rv) {
1110 free(super);
1111 return rv;
1112 }
1113
1114 /* Have valid headers and have chosen the best. Let's read in the rest*/
1115
1116 rv = load_ddf_global(fd, super, devname);
1117
1118 if (rv) {
1119 if (devname)
e7b84f9d
N
1120 pr_err("Failed to load all information "
1121 "sections on %s\n", devname);
a322f70c
DW
1122 free(super);
1123 return rv;
1124 }
1125
3d2c4fc7
DW
1126 rv = load_ddf_local(fd, super, devname, 0);
1127
1128 if (rv) {
1129 if (devname)
e7b84f9d
N
1130 pr_err("Failed to load all information "
1131 "sections on %s\n", devname);
3d2c4fc7
DW
1132 free(super);
1133 return rv;
1134 }
a322f70c
DW
1135
1136 /* Should possibly check the sections .... */
1137
1138 st->sb = super;
1139 if (st->ss == NULL) {
1140 st->ss = &super_ddf;
1141 st->minor_version = 0;
1142 st->max_devs = 512;
1143 }
1144 return 0;
1145
1146}
1147
1148static void free_super_ddf(struct supertype *st)
1149{
1150 struct ddf_super *ddf = st->sb;
1151 if (ddf == NULL)
1152 return;
1153 free(ddf->phys);
1154 free(ddf->virt);
1155 while (ddf->conflist) {
1156 struct vcl *v = ddf->conflist;
1157 ddf->conflist = v->next;
59e36268
NB
1158 if (v->block_sizes)
1159 free(v->block_sizes);
3c48f7be 1160 if (v->other_bvds)
1161 /*
1162 v->other_bvds[0] points to beginning of buffer,
1163 see alloc_other_bvds()
1164 */
1165 free(v->other_bvds[0]);
a322f70c
DW
1166 free(v);
1167 }
1168 while (ddf->dlist) {
1169 struct dl *d = ddf->dlist;
1170 ddf->dlist = d->next;
1171 if (d->fd >= 0)
1172 close(d->fd);
b2280677
NB
1173 if (d->spare)
1174 free(d->spare);
a322f70c
DW
1175 free(d);
1176 }
8a38cb04
N
1177 while (ddf->add_list) {
1178 struct dl *d = ddf->add_list;
1179 ddf->add_list = d->next;
1180 if (d->fd >= 0)
1181 close(d->fd);
1182 if (d->spare)
1183 free(d->spare);
1184 free(d);
1185 }
a322f70c
DW
1186 free(ddf);
1187 st->sb = NULL;
1188}
1189
1190static struct supertype *match_metadata_desc_ddf(char *arg)
1191{
1192 /* 'ddf' only support containers */
1193 struct supertype *st;
1194 if (strcmp(arg, "ddf") != 0 &&
1195 strcmp(arg, "default") != 0
1196 )
1197 return NULL;
1198
503975b9 1199 st = xcalloc(1, sizeof(*st));
a322f70c
DW
1200 st->ss = &super_ddf;
1201 st->max_devs = 512;
1202 st->minor_version = 0;
1203 st->sb = NULL;
1204 return st;
1205}
1206
a322f70c
DW
1207#ifndef MDASSEMBLE
1208
1209static mapping_t ddf_state[] = {
1210 { "Optimal", 0},
1211 { "Degraded", 1},
1212 { "Deleted", 2},
1213 { "Missing", 3},
1214 { "Failed", 4},
1215 { "Partially Optimal", 5},
1216 { "-reserved-", 6},
1217 { "-reserved-", 7},
1218 { NULL, 0}
1219};
1220
1221static mapping_t ddf_init_state[] = {
1222 { "Not Initialised", 0},
1223 { "QuickInit in Progress", 1},
1224 { "Fully Initialised", 2},
1225 { "*UNKNOWN*", 3},
1226 { NULL, 0}
1227};
1228static mapping_t ddf_access[] = {
1229 { "Read/Write", 0},
1230 { "Reserved", 1},
1231 { "Read Only", 2},
1232 { "Blocked (no access)", 3},
1233 { NULL ,0}
1234};
1235
1236static mapping_t ddf_level[] = {
1237 { "RAID0", DDF_RAID0},
1238 { "RAID1", DDF_RAID1},
1239 { "RAID3", DDF_RAID3},
1240 { "RAID4", DDF_RAID4},
1241 { "RAID5", DDF_RAID5},
1242 { "RAID1E",DDF_RAID1E},
1243 { "JBOD", DDF_JBOD},
1244 { "CONCAT",DDF_CONCAT},
1245 { "RAID5E",DDF_RAID5E},
1246 { "RAID5EE",DDF_RAID5EE},
1247 { "RAID6", DDF_RAID6},
1248 { NULL, 0}
1249};
1250static mapping_t ddf_sec_level[] = {
1251 { "Striped", DDF_2STRIPED},
1252 { "Mirrored", DDF_2MIRRORED},
1253 { "Concat", DDF_2CONCAT},
1254 { "Spanned", DDF_2SPANNED},
1255 { NULL, 0}
1256};
1257#endif
1258
fb9d0acb 1259static int all_ff(const char *guid)
42dc2744
N
1260{
1261 int i;
1262 for (i = 0; i < DDF_GUID_LEN; i++)
1263 if (guid[i] != (char)0xff)
1264 return 0;
1265 return 1;
1266}
1267
a322f70c
DW
1268#ifndef MDASSEMBLE
1269static void print_guid(char *guid, int tstamp)
1270{
1271 /* A GUIDs are part (or all) ASCII and part binary.
1272 * They tend to be space padded.
59e36268
NB
1273 * We print the GUID in HEX, then in parentheses add
1274 * any initial ASCII sequence, and a possible
1275 * time stamp from bytes 16-19
a322f70c
DW
1276 */
1277 int l = DDF_GUID_LEN;
1278 int i;
59e36268
NB
1279
1280 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1281 if ((i&3)==0 && i != 0) printf(":");
1282 printf("%02X", guid[i]&255);
1283 }
1284
cfccea8c 1285 printf("\n (");
a322f70c
DW
1286 while (l && guid[l-1] == ' ')
1287 l--;
1288 for (i=0 ; i<l ; i++) {
1289 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1290 fputc(guid[i], stdout);
1291 else
59e36268 1292 break;
a322f70c
DW
1293 }
1294 if (tstamp) {
1295 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1296 char tbuf[100];
1297 struct tm *tm;
1298 tm = localtime(&then);
59e36268 1299 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1300 fputs(tbuf, stdout);
1301 }
59e36268 1302 printf(")");
a322f70c
DW
1303}
1304
be9b9ef4 1305static const char *guid_str(const char *guid)
1306{
1307 static char buf[DDF_GUID_LEN*2+1];
1308 int i;
1309 char *p = buf;
4a03cbd1 1310 for (i = 0; i < DDF_GUID_LEN; i++) {
1311 unsigned char c = guid[i];
1312 if (c >= 32 && c < 127)
1313 p += sprintf(p, "%c", c);
1314 else
1315 p += sprintf(p, "%02x", c);
1316 }
be9b9ef4 1317 *p = '\0';
1318 return (const char *) buf;
1319}
1320
a322f70c
DW
1321static void examine_vd(int n, struct ddf_super *sb, char *guid)
1322{
8c3b8c2c 1323 int crl = sb->conf_rec_len;
a322f70c
DW
1324 struct vcl *vcl;
1325
1326 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1327 unsigned int i;
a322f70c
DW
1328 struct vd_config *vc = &vcl->conf;
1329
1330 if (calc_crc(vc, crl*512) != vc->crc)
1331 continue;
1332 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1333 continue;
1334
1335 /* Ok, we know about this VD, let's give more details */
b06e3095 1336 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1337 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1338 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1339 int j;
1340 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1341 for (j=0; j<cnt; j++)
1342 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1343 break;
1344 if (i) printf(" ");
1345 if (j < cnt)
1346 printf("%d", j);
1347 else
1348 printf("--");
1349 }
1350 printf(")\n");
1351 if (vc->chunk_shift != 255)
613b0d17
N
1352 printf(" Chunk Size[%d] : %d sectors\n", n,
1353 1 << vc->chunk_shift);
a322f70c
DW
1354 printf(" Raid Level[%d] : %s\n", n,
1355 map_num(ddf_level, vc->prl)?:"-unknown-");
1356 if (vc->sec_elmnt_count != 1) {
1357 printf(" Secondary Position[%d] : %d of %d\n", n,
1358 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1359 printf(" Secondary Level[%d] : %s\n", n,
1360 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1361 }
1362 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1363 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1364 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1365 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1366 }
1367}
1368
1369static void examine_vds(struct ddf_super *sb)
1370{
1371 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
fb9d0acb 1372 unsigned int i;
a322f70c
DW
1373 printf(" Virtual Disks : %d\n", cnt);
1374
fb9d0acb 1375 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
a322f70c 1376 struct virtual_entry *ve = &sb->virt->entries[i];
fb9d0acb 1377 if (all_ff(ve->guid))
1378 continue;
b06e3095 1379 printf("\n");
a322f70c
DW
1380 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1381 printf("\n");
1382 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1383 printf(" state[%d] : %s, %s%s\n", i,
1384 map_num(ddf_state, ve->state & 7),
1385 (ve->state & 8) ? "Morphing, ": "",
1386 (ve->state & 16)? "Not Consistent" : "Consistent");
1387 printf(" init state[%d] : %s\n", i,
1388 map_num(ddf_init_state, ve->init_state&3));
1389 printf(" access[%d] : %s\n", i,
1390 map_num(ddf_access, (ve->init_state>>6) & 3));
1391 printf(" Name[%d] : %.16s\n", i, ve->name);
1392 examine_vd(i, sb, ve->guid);
1393 }
1394 if (cnt) printf("\n");
1395}
1396
1397static void examine_pds(struct ddf_super *sb)
1398{
1399 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1400 int i;
1401 struct dl *dl;
1402 printf(" Physical Disks : %d\n", cnt);
962371a5 1403 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1404
1405 for (i=0 ; i<cnt ; i++) {
1406 struct phys_disk_entry *pd = &sb->phys->entries[i];
1407 int type = __be16_to_cpu(pd->type);
1408 int state = __be16_to_cpu(pd->state);
1409
b06e3095
N
1410 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1411 //printf("\n");
1412 printf(" %3d %08x ", i,
a322f70c 1413 __be32_to_cpu(pd->refnum));
613b0d17 1414 printf("%8lluK ",
c9b6907b 1415 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095
N
1416 for (dl = sb->dlist; dl ; dl = dl->next) {
1417 if (dl->disk.refnum == pd->refnum) {
1418 char *dv = map_dev(dl->major, dl->minor, 0);
1419 if (dv) {
962371a5 1420 printf("%-15s", dv);
b06e3095
N
1421 break;
1422 }
1423 }
1424 }
1425 if (!dl)
962371a5 1426 printf("%15s","");
b06e3095 1427 printf(" %s%s%s%s%s",
a322f70c 1428 (type&2) ? "active":"",
b06e3095 1429 (type&4) ? "Global-Spare":"",
a322f70c
DW
1430 (type&8) ? "spare" : "",
1431 (type&16)? ", foreign" : "",
1432 (type&32)? "pass-through" : "");
18cb4496
N
1433 if (state & DDF_Failed)
1434 /* This over-rides these three */
1435 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1436 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1437 (state&1)? "Online": "Offline",
1438 (state&2)? ", Failed": "",
1439 (state&4)? ", Rebuilding": "",
1440 (state&8)? ", in-transition": "",
b06e3095
N
1441 (state&16)? ", SMART-errors": "",
1442 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1443 (state&64)? ", Missing" : "");
a322f70c
DW
1444 printf("\n");
1445 }
1446}
1447
1448static void examine_super_ddf(struct supertype *st, char *homehost)
1449{
1450 struct ddf_super *sb = st->sb;
1451
1452 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1453 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1454 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1455 printf("\n");
1456 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c
DW
1457 printf("\n");
1458 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1459 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1460 ?"yes" : "no");
1461 examine_vds(sb);
1462 examine_pds(sb);
1463}
1464
a5d85af7 1465static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1466
bedbf68a 1467static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1468static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1469
bedbf68a 1470static unsigned int get_vd_num_of_subarray(struct supertype *st)
1471{
1472 /*
1473 * Figure out the VD number for this supertype.
1474 * Returns DDF_CONTAINER for the container itself,
1475 * and DDF_NOTFOUND on error.
1476 */
1477 struct ddf_super *ddf = st->sb;
1478 struct mdinfo *sra;
1479 char *sub, *end;
1480 unsigned int vcnum;
1481
1482 if (*st->container_devnm == '\0')
1483 return DDF_CONTAINER;
1484
1485 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1486 if (!sra || sra->array.major_version != -1 ||
1487 sra->array.minor_version != -2 ||
1488 !is_subarray(sra->text_version))
1489 return DDF_NOTFOUND;
1490
1491 sub = strchr(sra->text_version + 1, '/');
1492 if (sub != NULL)
1493 vcnum = strtoul(sub + 1, &end, 10);
1494 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1495 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1496 return DDF_NOTFOUND;
1497
1498 return vcnum;
1499}
1500
061f2c6a 1501static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1502{
1503 /* We just write a generic DDF ARRAY entry
1504 */
1505 struct mdinfo info;
1506 char nbuf[64];
a5d85af7 1507 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1508 fname_from_uuid(st, &info, nbuf, ':');
1509
1510 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1511}
1512
1513static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1514{
1515 /* We just write a generic DDF ARRAY entry
a322f70c 1516 */
42dc2744 1517 struct ddf_super *ddf = st->sb;
ff54de6e 1518 struct mdinfo info;
f21e18ca 1519 unsigned int i;
ff54de6e 1520 char nbuf[64];
a5d85af7 1521 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1522 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1523
f21e18ca 1524 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1525 struct virtual_entry *ve = &ddf->virt->entries[i];
1526 struct vcl vcl;
1527 char nbuf1[64];
1528 if (all_ff(ve->guid))
1529 continue;
1530 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1531 ddf->currentconf =&vcl;
1532 uuid_from_super_ddf(st, info.uuid);
1533 fname_from_uuid(st, &info, nbuf1, ':');
1534 printf("ARRAY container=%s member=%d UUID=%s\n",
1535 nbuf+5, i, nbuf1+5);
1536 }
a322f70c
DW
1537}
1538
bceedeec
N
1539static void export_examine_super_ddf(struct supertype *st)
1540{
1541 struct mdinfo info;
1542 char nbuf[64];
a5d85af7 1543 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1544 fname_from_uuid(st, &info, nbuf, ':');
1545 printf("MD_METADATA=ddf\n");
1546 printf("MD_LEVEL=container\n");
1547 printf("MD_UUID=%s\n", nbuf+5);
1548}
bceedeec 1549
74db60b0
N
1550static int copy_metadata_ddf(struct supertype *st, int from, int to)
1551{
1552 void *buf;
1553 unsigned long long dsize, offset;
1554 int bytes;
1555 struct ddf_header *ddf;
1556 int written = 0;
1557
1558 /* The meta consists of an anchor, a primary, and a secondary.
1559 * This all lives at the end of the device.
1560 * So it is easiest to find the earliest of primary and
1561 * secondary, and copy everything from there.
1562 *
1563 * Anchor is 512 from end It contains primary_lba and secondary_lba
1564 * we choose one of those
1565 */
1566
1567 if (posix_memalign(&buf, 4096, 4096) != 0)
1568 return 1;
1569
1570 if (!get_dev_size(from, NULL, &dsize))
1571 goto err;
1572
1573 if (lseek64(from, dsize-512, 0) < 0)
1574 goto err;
1575 if (read(from, buf, 512) != 512)
1576 goto err;
1577 ddf = buf;
1578 if (ddf->magic != DDF_HEADER_MAGIC ||
1579 calc_crc(ddf, 512) != ddf->crc ||
1580 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1581 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1582 goto err;
1583
1584 offset = dsize - 512;
1585 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1586 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1587 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1588 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1589
1590 bytes = dsize - offset;
1591
1592 if (lseek64(from, offset, 0) < 0 ||
1593 lseek64(to, offset, 0) < 0)
1594 goto err;
1595 while (written < bytes) {
1596 int n = bytes - written;
1597 if (n > 4096)
1598 n = 4096;
1599 if (read(from, buf, n) != n)
1600 goto err;
1601 if (write(to, buf, n) != n)
1602 goto err;
1603 written += n;
1604 }
1605 free(buf);
1606 return 0;
1607err:
1608 free(buf);
1609 return 1;
1610}
1611
a322f70c
DW
1612static void detail_super_ddf(struct supertype *st, char *homehost)
1613{
1614 /* FIXME later
1615 * Could print DDF GUID
1616 * Need to find which array
1617 * If whole, briefly list all arrays
1618 * If one, give name
1619 */
1620}
1621
1622static void brief_detail_super_ddf(struct supertype *st)
1623{
ff54de6e
N
1624 struct mdinfo info;
1625 char nbuf[64];
bedbf68a 1626 struct ddf_super *ddf = st->sb;
1627 unsigned int vcnum = get_vd_num_of_subarray(st);
1628 if (vcnum == DDF_CONTAINER)
1629 uuid_from_super_ddf(st, info.uuid);
1630 else if (vcnum == DDF_NOTFOUND)
1631 return;
1632 else
1633 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
ff54de6e
N
1634 fname_from_uuid(st, &info, nbuf,':');
1635 printf(" UUID=%s", nbuf + 5);
a322f70c 1636}
a322f70c
DW
1637#endif
1638
1639static int match_home_ddf(struct supertype *st, char *homehost)
1640{
1641 /* It matches 'this' host if the controller is a
1642 * Linux-MD controller with vendor_data matching
1643 * the hostname
1644 */
1645 struct ddf_super *ddf = st->sb;
f21e18ca 1646 unsigned int len;
d1d3482b
N
1647
1648 if (!homehost)
1649 return 0;
1650 len = strlen(homehost);
a322f70c
DW
1651
1652 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1653 len < sizeof(ddf->controller.vendor_data) &&
1654 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1655 ddf->controller.vendor_data[len] == 0);
1656}
1657
0e600426 1658#ifndef MDASSEMBLE
baba3f4e 1659static int find_index_in_bvd(const struct ddf_super *ddf,
1660 const struct vd_config *conf, unsigned int n,
1661 unsigned int *n_bvd)
1662{
1663 /*
1664 * Find the index of the n-th valid physical disk in this BVD
1665 */
1666 unsigned int i, j;
1667 for (i = 0, j = 0; i < ddf->mppe &&
1668 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1669 if (conf->phys_refnum[i] != 0xffffffff) {
1670 if (n == j) {
1671 *n_bvd = i;
1672 return 1;
1673 }
1674 j++;
1675 }
1676 }
1677 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1678 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1679 return 0;
1680}
1681
1682static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1683 unsigned int n,
1684 unsigned int *n_bvd, struct vcl **vcl)
a322f70c 1685{
7a7cc504 1686 struct vcl *v;
59e36268 1687
baba3f4e 1688 for (v = ddf->conflist; v; v = v->next) {
1689 unsigned int nsec, ibvd;
1690 struct vd_config *conf;
1691 if (inst != v->vcnum)
1692 continue;
1693 conf = &v->conf;
1694 if (conf->sec_elmnt_count == 1) {
1695 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1696 *vcl = v;
1697 return conf;
1698 } else
1699 goto bad;
1700 }
1701 if (v->other_bvds == NULL) {
1702 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1703 __func__, conf->sec_elmnt_count);
1704 goto bad;
1705 }
1706 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1707 if (conf->sec_elmnt_seq != nsec) {
1708 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
baba3f4e 1709 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1710 == nsec)
1711 break;
1712 }
1713 if (ibvd == conf->sec_elmnt_count)
1714 goto bad;
1715 conf = v->other_bvds[ibvd-1];
1716 }
1717 if (!find_index_in_bvd(ddf, conf,
1718 n - nsec*conf->sec_elmnt_count, n_bvd))
1719 goto bad;
1720 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1721 , __func__, n, *n_bvd, ibvd-1, inst);
1722 *vcl = v;
1723 return conf;
1724 }
1725bad:
1726 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
7a7cc504
NB
1727 return NULL;
1728}
0e600426 1729#endif
7a7cc504 1730
5ec636b7 1731static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
7a7cc504
NB
1732{
1733 /* Find the entry in phys_disk which has the given refnum
1734 * and return it's index
1735 */
f21e18ca
N
1736 unsigned int i;
1737 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
7a7cc504
NB
1738 if (ddf->phys->entries[i].refnum == phys_refnum)
1739 return i;
1740 return -1;
a322f70c
DW
1741}
1742
bedbf68a 1743static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1744{
1745 char buf[20];
1746 struct sha1_ctx ctx;
1747 sha1_init_ctx(&ctx);
1748 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1749 sha1_finish_ctx(&ctx, buf);
1750 memcpy(uuid, buf, 4*4);
1751}
1752
a322f70c
DW
1753static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1754{
1755 /* The uuid returned here is used for:
1756 * uuid to put into bitmap file (Create, Grow)
1757 * uuid for backup header when saving critical section (Grow)
1758 * comparing uuids when re-adding a device into an array
51006d85
N
1759 * In these cases the uuid required is that of the data-array,
1760 * not the device-set.
1761 * uuid to recognise same set when adding a missing device back
1762 * to an array. This is a uuid for the device-set.
613b0d17 1763 *
a322f70c
DW
1764 * For each of these we can make do with a truncated
1765 * or hashed uuid rather than the original, as long as
1766 * everyone agrees.
a322f70c
DW
1767 * In the case of SVD we assume the BVD is of interest,
1768 * though that might be the case if a bitmap were made for
1769 * a mirrored SVD - worry about that later.
1770 * So we need to find the VD configuration record for the
1771 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1772 * The first 16 bytes of the sha1 of these is used.
1773 */
1774 struct ddf_super *ddf = st->sb;
d2ca6449 1775 struct vcl *vcl = ddf->currentconf;
c5afc314 1776 char *guid;
a322f70c 1777
c5afc314
N
1778 if (vcl)
1779 guid = vcl->conf.guid;
1780 else
1781 guid = ddf->anchor.guid;
bedbf68a 1782 uuid_from_ddf_guid(guid, uuid);
a322f70c
DW
1783}
1784
a5d85af7 1785static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1786
a5d85af7 1787static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1788{
1789 struct ddf_super *ddf = st->sb;
a5d85af7 1790 int map_disks = info->array.raid_disks;
90fa1a29 1791 __u32 *cptr;
a322f70c 1792
78e44928 1793 if (ddf->currentconf) {
a5d85af7 1794 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1795 return;
1796 }
95eeceeb 1797 memset(info, 0, sizeof(*info));
78e44928 1798
a322f70c
DW
1799 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1800 info->array.level = LEVEL_CONTAINER;
1801 info->array.layout = 0;
1802 info->array.md_minor = -1;
90fa1a29
JS
1803 cptr = (__u32 *)(ddf->anchor.guid + 16);
1804 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1805
a322f70c
DW
1806 info->array.utime = 0;
1807 info->array.chunk_size = 0;
510242aa 1808 info->container_enough = 1;
a322f70c 1809
a322f70c
DW
1810 info->disk.major = 0;
1811 info->disk.minor = 0;
cba0191b
NB
1812 if (ddf->dlist) {
1813 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1814 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1815
1816 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1817 entries[info->disk.raid_disk].
1818 config_size);
d2ca6449 1819 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1820 } else {
1821 info->disk.number = -1;
661dce36 1822 info->disk.raid_disk = -1;
cba0191b
NB
1823// info->disk.raid_disk = find refnum in the table and use index;
1824 }
f22385f9 1825 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1826
921d9e16 1827 info->recovery_start = MaxSector;
a19c88b8 1828 info->reshape_active = 0;
6e75048b 1829 info->recovery_blocked = 0;
c5afc314 1830 info->name[0] = 0;
a322f70c 1831
f35f2525
N
1832 info->array.major_version = -1;
1833 info->array.minor_version = -2;
159c3a1a 1834 strcpy(info->text_version, "ddf");
a67dd8cc 1835 info->safe_mode_delay = 0;
159c3a1a 1836
c5afc314 1837 uuid_from_super_ddf(st, info->uuid);
a322f70c 1838
a5d85af7
N
1839 if (map) {
1840 int i;
1841 for (i = 0 ; i < map_disks; i++) {
1842 if (i < info->array.raid_disks &&
1843 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1844 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1845 map[i] = 1;
1846 else
1847 map[i] = 0;
1848 }
1849 }
a322f70c
DW
1850}
1851
a5d85af7 1852static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1853{
1854 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1855 struct vcl *vc = ddf->currentconf;
1856 int cd = ddf->currentdev;
ddf94a43 1857 int n_prim;
db42fa9b 1858 int j;
8592f29d 1859 struct dl *dl;
a5d85af7 1860 int map_disks = info->array.raid_disks;
90fa1a29 1861 __u32 *cptr;
ddf94a43 1862 struct vd_config *conf;
a322f70c 1863
95eeceeb 1864 memset(info, 0, sizeof(*info));
8a2848a7 1865 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1866 return;
a322f70c 1867 info->array.md_minor = -1;
90fa1a29
JS
1868 cptr = (__u32 *)(vc->conf.guid + 16);
1869 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
d2ca6449
NB
1870 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1871 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1872 info->custom_array_size = 0;
d2ca6449 1873
ddf94a43 1874 conf = &vc->conf;
1875 n_prim = __be16_to_cpu(conf->prim_elmnt_count);
1876 if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
1877 int ibvd = cd / n_prim - 1;
1878 cd %= n_prim;
1879 conf = vc->other_bvds[ibvd];
1880 }
1881
f21e18ca 1882 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
57a66662 1883 info->data_offset =
1884 __be64_to_cpu(LBA_OFFSET(ddf, &vc->conf)[cd]);
d2ca6449
NB
1885 if (vc->block_sizes)
1886 info->component_size = vc->block_sizes[cd];
1887 else
1888 info->component_size = __be64_to_cpu(vc->conf.blocks);
1889 }
a322f70c 1890
fb204fb2 1891 for (dl = ddf->dlist; dl ; dl = dl->next)
f5ded787 1892 if (dl->disk.refnum == conf->phys_refnum[cd])
fb204fb2
N
1893 break;
1894
a322f70c
DW
1895 info->disk.major = 0;
1896 info->disk.minor = 0;
fb204fb2 1897 info->disk.state = 0;
8592f29d
N
1898 if (dl) {
1899 info->disk.major = dl->major;
1900 info->disk.minor = dl->minor;
7c3fb3ec 1901 info->disk.raid_disk = cd + conf->sec_elmnt_seq
1902 * __be16_to_cpu(conf->prim_elmnt_count);
fb204fb2
N
1903 info->disk.number = dl->pdnum;
1904 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1905 }
a322f70c 1906
103f2410
NB
1907 info->container_member = ddf->currentconf->vcnum;
1908
921d9e16 1909 info->recovery_start = MaxSector;
80d26cb2 1910 info->resync_start = 0;
624c5ad4 1911 info->reshape_active = 0;
6e75048b 1912 info->recovery_blocked = 0;
80d26cb2
NB
1913 if (!(ddf->virt->entries[info->container_member].state
1914 & DDF_state_inconsistent) &&
1915 (ddf->virt->entries[info->container_member].init_state
1916 & DDF_initstate_mask)
1917 == DDF_init_full)
b7528a20 1918 info->resync_start = MaxSector;
80d26cb2 1919
a322f70c
DW
1920 uuid_from_super_ddf(st, info->uuid);
1921
f35f2525
N
1922 info->array.major_version = -1;
1923 info->array.minor_version = -2;
9b63e648 1924 sprintf(info->text_version, "/%s/%d",
4dd2df09 1925 st->container_devnm,
9b63e648 1926 info->container_member);
a67dd8cc 1927 info->safe_mode_delay = 200;
159c3a1a 1928
db42fa9b
N
1929 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1930 info->name[16]=0;
1931 for(j=0; j<16; j++)
1932 if (info->name[j] == ' ')
1933 info->name[j] = 0;
a5d85af7
N
1934
1935 if (map)
1936 for (j = 0; j < map_disks; j++) {
1937 map[j] = 0;
1938 if (j < info->array.raid_disks) {
1939 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1940 if (i >= 0 &&
a5d85af7
N
1941 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1942 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1943 map[i] = 1;
1944 }
1945 }
a322f70c
DW
1946}
1947
1948static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1949 char *update,
1950 char *devname, int verbose,
1951 int uuid_set, char *homehost)
1952{
1953 /* For 'assemble' and 'force' we need to return non-zero if any
1954 * change was made. For others, the return value is ignored.
1955 * Update options are:
1956 * force-one : This device looks a bit old but needs to be included,
1957 * update age info appropriately.
1958 * assemble: clear any 'faulty' flag to allow this device to
1959 * be assembled.
1960 * force-array: Array is degraded but being forced, mark it clean
1961 * if that will be needed to assemble it.
1962 *
1963 * newdev: not used ????
1964 * grow: Array has gained a new device - this is currently for
1965 * linear only
1966 * resync: mark as dirty so a resync will happen.
59e36268 1967 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
1968 * homehost: update the recorded homehost
1969 * name: update the name - preserving the homehost
1970 * _reshape_progress: record new reshape_progress position.
1971 *
1972 * Following are not relevant for this version:
1973 * sparc2.2 : update from old dodgey metadata
1974 * super-minor: change the preferred_minor number
1975 * summaries: update redundant counters.
1976 */
1977 int rv = 0;
1978// struct ddf_super *ddf = st->sb;
7a7cc504 1979// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
1980// struct virtual_entry *ve = find_ve(ddf);
1981
a322f70c
DW
1982 /* we don't need to handle "force-*" or "assemble" as
1983 * there is no need to 'trick' the kernel. We the metadata is
1984 * first updated to activate the array, all the implied modifications
1985 * will just happen.
1986 */
1987
1988 if (strcmp(update, "grow") == 0) {
1989 /* FIXME */
1e2b2765 1990 } else if (strcmp(update, "resync") == 0) {
a322f70c 1991// info->resync_checkpoint = 0;
1e2b2765 1992 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
1993 /* homehost is stored in controller->vendor_data,
1994 * or it is when we are the vendor
1995 */
1996// if (info->vendor_is_local)
1997// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 1998 rv = -1;
f49208ec 1999 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
2000 /* name is stored in virtual_entry->name */
2001// memset(ve->name, ' ', 16);
2002// strncpy(ve->name, info->name, 16);
1e2b2765 2003 rv = -1;
f49208ec 2004 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 2005 /* We don't support reshape yet */
f49208ec
N
2006 } else if (strcmp(update, "assemble") == 0 ) {
2007 /* Do nothing, just succeed */
2008 rv = 0;
1e2b2765
N
2009 } else
2010 rv = -1;
a322f70c
DW
2011
2012// update_all_csum(ddf);
2013
2014 return rv;
2015}
2016
5f8097be
NB
2017static void make_header_guid(char *guid)
2018{
2019 __u32 stamp;
5f8097be
NB
2020 /* Create a DDF Header of Virtual Disk GUID */
2021
2022 /* 24 bytes of fiction required.
2023 * first 8 are a 'vendor-id' - "Linux-MD"
2024 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2025 * Remaining 8 random number plus timestamp
2026 */
2027 memcpy(guid, T10, sizeof(T10));
2028 stamp = __cpu_to_be32(0xdeadbeef);
2029 memcpy(guid+8, &stamp, 4);
2030 stamp = __cpu_to_be32(0);
2031 memcpy(guid+12, &stamp, 4);
2032 stamp = __cpu_to_be32(time(0) - DECADE);
2033 memcpy(guid+16, &stamp, 4);
bfb7ea78 2034 stamp = random32();
5f8097be 2035 memcpy(guid+20, &stamp, 4);
5f8097be 2036}
59e36268 2037
fb9d0acb 2038static unsigned int find_unused_vde(const struct ddf_super *ddf)
2039{
2040 unsigned int i;
2041 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2042 if (all_ff(ddf->virt->entries[i].guid))
2043 return i;
2044 }
2045 return DDF_NOTFOUND;
2046}
2047
2048static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2049 const char *name)
2050{
2051 unsigned int i;
2052 if (name == NULL)
2053 return DDF_NOTFOUND;
2054 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2055 if (all_ff(ddf->virt->entries[i].guid))
2056 continue;
2057 if (!strncmp(name, ddf->virt->entries[i].name,
2058 sizeof(ddf->virt->entries[i].name)))
2059 return i;
2060 }
2061 return DDF_NOTFOUND;
2062}
2063
2064static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2065 const char *guid)
2066{
2067 unsigned int i;
2068 if (guid == NULL || all_ff(guid))
2069 return DDF_NOTFOUND;
2070 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2071 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2072 return i;
2073 return DDF_NOTFOUND;
2074}
2075
78e44928
NB
2076static int init_super_ddf_bvd(struct supertype *st,
2077 mdu_array_info_t *info,
2078 unsigned long long size,
2079 char *name, char *homehost,
83cd1e97 2080 int *uuid, unsigned long long data_offset);
78e44928 2081
a322f70c
DW
2082static int init_super_ddf(struct supertype *st,
2083 mdu_array_info_t *info,
2084 unsigned long long size, char *name, char *homehost,
83cd1e97 2085 int *uuid, unsigned long long data_offset)
a322f70c
DW
2086{
2087 /* This is primarily called by Create when creating a new array.
2088 * We will then get add_to_super called for each component, and then
2089 * write_init_super called to write it out to each device.
2090 * For DDF, Create can create on fresh devices or on a pre-existing
2091 * array.
2092 * To create on a pre-existing array a different method will be called.
2093 * This one is just for fresh drives.
2094 *
2095 * We need to create the entire 'ddf' structure which includes:
2096 * DDF headers - these are easy.
2097 * Controller data - a Sector describing this controller .. not that
2098 * this is a controller exactly.
2099 * Physical Disk Record - one entry per device, so
2100 * leave plenty of space.
2101 * Virtual Disk Records - again, just leave plenty of space.
2102 * This just lists VDs, doesn't give details
2103 * Config records - describes the VDs that use this disk
2104 * DiskData - describes 'this' device.
2105 * BadBlockManagement - empty
2106 * Diag Space - empty
2107 * Vendor Logs - Could we put bitmaps here?
2108 *
2109 */
2110 struct ddf_super *ddf;
2111 char hostname[17];
2112 int hostlen;
a322f70c
DW
2113 int max_phys_disks, max_virt_disks;
2114 unsigned long long sector;
2115 int clen;
2116 int i;
2117 int pdsize, vdsize;
2118 struct phys_disk *pd;
2119 struct virtual_disk *vd;
2120
83cd1e97 2121 if (data_offset != INVALID_SECTORS) {
ed503f89 2122 pr_err("data-offset not supported by DDF\n");
83cd1e97
N
2123 return 0;
2124 }
2125
78e44928 2126 if (st->sb)
83cd1e97
N
2127 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2128 data_offset);
ba7eb04f 2129
3d2c4fc7 2130 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 2131 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
2132 return 0;
2133 }
6264b437 2134 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
2135 ddf->dlist = NULL; /* no physical disks yet */
2136 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
2137 st->sb = ddf;
2138
2139 if (info == NULL) {
2140 /* zeroing superblock */
2141 return 0;
2142 }
a322f70c
DW
2143
2144 /* At least 32MB *must* be reserved for the ddf. So let's just
2145 * start 32MB from the end, and put the primary header there.
2146 * Don't do secondary for now.
2147 * We don't know exactly where that will be yet as it could be
2148 * different on each device. To just set up the lengths.
2149 *
2150 */
2151
2152 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 2153 make_header_guid(ddf->anchor.guid);
a322f70c 2154
59e36268 2155 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
a322f70c
DW
2156 ddf->anchor.seq = __cpu_to_be32(1);
2157 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2158 ddf->anchor.openflag = 0xFF;
2159 ddf->anchor.foreignflag = 0;
2160 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2161 ddf->anchor.pad0 = 0xff;
2162 memset(ddf->anchor.pad1, 0xff, 12);
2163 memset(ddf->anchor.header_ext, 0xff, 32);
2164 ddf->anchor.primary_lba = ~(__u64)0;
2165 ddf->anchor.secondary_lba = ~(__u64)0;
2166 ddf->anchor.type = DDF_HEADER_ANCHOR;
2167 memset(ddf->anchor.pad2, 0xff, 3);
2168 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2169 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2170 of 32M reserved.. */
2171 max_phys_disks = 1023; /* Should be enough */
2172 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2173 max_virt_disks = 255;
2174 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2175 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2176 ddf->max_part = 64;
8c3b8c2c 2177 ddf->mppe = 256;
59e36268
NB
2178 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2179 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2180 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 2181 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
2182 /* controller sections is one sector long immediately
2183 * after the ddf header */
2184 sector = 1;
2185 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2186 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2187 sector += 1;
2188
2189 /* phys is 8 sectors after that */
2190 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2191 sizeof(struct phys_disk_entry)*max_phys_disks,
2192 512);
2193 switch(pdsize/512) {
2194 case 2: case 8: case 32: case 128: case 512: break;
2195 default: abort();
2196 }
2197 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2198 ddf->anchor.phys_section_length =
2199 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2200 sector += pdsize/512;
2201
2202 /* virt is another 32 sectors */
2203 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2204 sizeof(struct virtual_entry) * max_virt_disks,
2205 512);
2206 switch(vdsize/512) {
2207 case 2: case 8: case 32: case 128: case 512: break;
2208 default: abort();
2209 }
2210 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2211 ddf->anchor.virt_section_length =
2212 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2213 sector += vdsize/512;
2214
59e36268 2215 clen = ddf->conf_rec_len * (ddf->max_part+1);
a322f70c
DW
2216 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2217 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2218 sector += clen;
2219
2220 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2221 ddf->anchor.data_section_length = __cpu_to_be32(1);
2222 sector += 1;
2223
2224 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2225 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2226 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2227 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2228 ddf->anchor.vendor_length = __cpu_to_be32(0);
2229 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2230
2231 memset(ddf->anchor.pad4, 0xff, 256);
2232
2233 memcpy(&ddf->primary, &ddf->anchor, 512);
2234 memcpy(&ddf->secondary, &ddf->anchor, 512);
2235
2236 ddf->primary.openflag = 1; /* I guess.. */
2237 ddf->primary.type = DDF_HEADER_PRIMARY;
2238
2239 ddf->secondary.openflag = 1; /* I guess.. */
2240 ddf->secondary.type = DDF_HEADER_SECONDARY;
2241
2242 ddf->active = &ddf->primary;
2243
2244 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2245
2246 /* 24 more bytes of fiction required.
2247 * first 8 are a 'vendor-id' - "Linux-MD"
2248 * Remaining 16 are serial number.... maybe a hostname would do?
2249 */
2250 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
2251 gethostname(hostname, sizeof(hostname));
2252 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
2253 hostlen = strlen(hostname);
2254 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2255 for (i = strlen(T10) ; i+hostlen < 24; i++)
2256 ddf->controller.guid[i] = ' ';
2257
2258 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2259 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2260 ddf->controller.type.sub_vendor_id = 0;
2261 ddf->controller.type.sub_device_id = 0;
2262 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2263 memset(ddf->controller.pad, 0xff, 8);
2264 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
2265 if (homehost && strlen(homehost) < 440)
2266 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 2267
3d2c4fc7 2268 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 2269 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
2270 return 0;
2271 }
6416d527 2272 ddf->phys = pd;
a322f70c
DW
2273 ddf->pdsize = pdsize;
2274
2275 memset(pd, 0xff, pdsize);
2276 memset(pd, 0, sizeof(*pd));
076515ba 2277 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
2278 pd->used_pdes = __cpu_to_be16(0);
2279 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2280 memset(pd->pad, 0xff, 52);
4a3ca8ac 2281 for (i = 0; i < max_phys_disks; i++)
2282 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
a322f70c 2283
3d2c4fc7 2284 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 2285 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
2286 return 0;
2287 }
6416d527 2288 ddf->virt = vd;
a322f70c
DW
2289 ddf->vdsize = vdsize;
2290 memset(vd, 0, vdsize);
2291 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2292 vd->populated_vdes = __cpu_to_be16(0);
2293 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2294 memset(vd->pad, 0xff, 52);
2295
5f8097be
NB
2296 for (i=0; i<max_virt_disks; i++)
2297 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2298
a322f70c 2299 st->sb = ddf;
7d5a7ff3 2300 ddf_set_updates_pending(ddf);
a322f70c
DW
2301 return 1;
2302}
2303
5f8097be
NB
2304static int chunk_to_shift(int chunksize)
2305{
2306 return ffs(chunksize/512)-1;
2307}
2308
0e600426 2309#ifndef MDASSEMBLE
59e36268
NB
2310struct extent {
2311 unsigned long long start, size;
2312};
78e44928 2313static int cmp_extent(const void *av, const void *bv)
59e36268
NB
2314{
2315 const struct extent *a = av;
2316 const struct extent *b = bv;
2317 if (a->start < b->start)
2318 return -1;
2319 if (a->start > b->start)
2320 return 1;
2321 return 0;
2322}
2323
78e44928 2324static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2325{
2326 /* find a list of used extents on the give physical device
2327 * (dnum) of the given ddf.
2328 * Return a malloced array of 'struct extent'
2329
613b0d17 2330 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2331
2332 */
2333 struct extent *rv;
2334 int n = 0;
fcc22180 2335 unsigned int i;
59e36268 2336
503975b9 2337 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2338
2339 for (i = 0; i < ddf->max_part; i++) {
fcc22180 2340 const struct vd_config *bvd;
2341 unsigned int ibvd;
59e36268 2342 struct vcl *v = dl->vlist[i];
fcc22180 2343 if (v == NULL ||
2344 get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2345 &bvd, &ibvd) == DDF_NOTFOUND)
59e36268 2346 continue;
fcc22180 2347 rv[n].start = __be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2348 rv[n].size = __be64_to_cpu(bvd->blocks);
2349 n++;
59e36268
NB
2350 }
2351 qsort(rv, n, sizeof(*rv), cmp_extent);
2352
2353 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2354 rv[n].size = 0;
2355 return rv;
2356}
0e600426 2357#endif
59e36268 2358
5f8097be
NB
2359static int init_super_ddf_bvd(struct supertype *st,
2360 mdu_array_info_t *info,
2361 unsigned long long size,
2362 char *name, char *homehost,
83cd1e97 2363 int *uuid, unsigned long long data_offset)
5f8097be
NB
2364{
2365 /* We are creating a BVD inside a pre-existing container.
2366 * so st->sb is already set.
2367 * We need to create a new vd_config and a new virtual_entry
2368 */
2369 struct ddf_super *ddf = st->sb;
5aaf6c7b 2370 unsigned int venum, i;
5f8097be
NB
2371 struct virtual_entry *ve;
2372 struct vcl *vcl;
2373 struct vd_config *vc;
5f8097be 2374
fb9d0acb 2375 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2376 pr_err("This ddf already has an array called %s\n", name);
5f8097be
NB
2377 return 0;
2378 }
fb9d0acb 2379 venum = find_unused_vde(ddf);
2380 if (venum == DDF_NOTFOUND) {
2381 pr_err("Cannot find spare slot for virtual disk\n");
5f8097be
NB
2382 return 0;
2383 }
2384 ve = &ddf->virt->entries[venum];
2385
2386 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2387 * timestamp, random number
2388 */
2389 make_header_guid(ve->guid);
2390 ve->unit = __cpu_to_be16(info->md_minor);
2391 ve->pad0 = 0xFFFF;
2392 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2393 ve->type = 0;
7a7cc504
NB
2394 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2395 if (info->state & 1) /* clean */
2396 ve->init_state = DDF_init_full;
2397 else
2398 ve->init_state = DDF_init_not;
2399
5f8097be
NB
2400 memset(ve->pad1, 0xff, 14);
2401 memset(ve->name, ' ', 16);
2402 if (name)
2403 strncpy(ve->name, name, 16);
2404 ddf->virt->populated_vdes =
2405 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2406
2407 /* Now create a new vd_config */
3d2c4fc7
DW
2408 if (posix_memalign((void**)&vcl, 512,
2409 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2410 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2411 return 0;
2412 }
59e36268
NB
2413 vcl->vcnum = venum;
2414 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
5f8097be
NB
2415 vc = &vcl->conf;
2416
2417 vc->magic = DDF_VD_CONF_MAGIC;
2418 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2419 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2420 vc->seqnum = __cpu_to_be32(1);
2421 memset(vc->pad0, 0xff, 24);
5f8097be 2422 vc->chunk_shift = chunk_to_shift(info->chunk_size);
a3163bf0 2423 if (layout_md2ddf(info, vc) == -1 ||
2424 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2425 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2426 __func__, info->level, info->layout, info->raid_disks);
2427 free(vcl);
2428 return 0;
2429 }
5f8097be 2430 vc->sec_elmnt_seq = 0;
3c48f7be 2431 if (alloc_other_bvds(ddf, vcl) != 0) {
2432 pr_err("%s could not allocate other bvds\n",
2433 __func__);
2434 free(vcl);
2435 return 0;
2436 }
5f8097be
NB
2437 vc->blocks = __cpu_to_be64(info->size * 2);
2438 vc->array_blocks = __cpu_to_be64(
2439 calc_array_size(info->level, info->raid_disks, info->layout,
2440 info->chunk_size, info->size*2));
2441 memset(vc->pad1, 0xff, 8);
2442 vc->spare_refs[0] = 0xffffffff;
2443 vc->spare_refs[1] = 0xffffffff;
2444 vc->spare_refs[2] = 0xffffffff;
2445 vc->spare_refs[3] = 0xffffffff;
2446 vc->spare_refs[4] = 0xffffffff;
2447 vc->spare_refs[5] = 0xffffffff;
2448 vc->spare_refs[6] = 0xffffffff;
2449 vc->spare_refs[7] = 0xffffffff;
2450 memset(vc->cache_pol, 0, 8);
2451 vc->bg_rate = 0x80;
2452 memset(vc->pad2, 0xff, 3);
2453 memset(vc->pad3, 0xff, 52);
2454 memset(vc->pad4, 0xff, 192);
2455 memset(vc->v0, 0xff, 32);
2456 memset(vc->v1, 0xff, 32);
2457 memset(vc->v2, 0xff, 16);
2458 memset(vc->v3, 0xff, 16);
2459 memset(vc->vendor, 0xff, 32);
598f0d58 2460
8c3b8c2c 2461 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2462 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be 2463
5aaf6c7b 2464 for (i = 1; i < vc->sec_elmnt_count; i++) {
2465 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2466 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2467 }
2468
5f8097be
NB
2469 vcl->next = ddf->conflist;
2470 ddf->conflist = vcl;
d2ca6449 2471 ddf->currentconf = vcl;
7d5a7ff3 2472 ddf_set_updates_pending(ddf);
5f8097be
NB
2473 return 1;
2474}
2475
63eb2454 2476static int get_svd_state(const struct ddf_super *, const struct vcl *);
2477
0e600426 2478#ifndef MDASSEMBLE
5f8097be
NB
2479static void add_to_super_ddf_bvd(struct supertype *st,
2480 mdu_disk_info_t *dk, int fd, char *devname)
2481{
2482 /* fd and devname identify a device with-in the ddf container (st).
2483 * dk identifies a location in the new BVD.
2484 * We need to find suitable free space in that device and update
2485 * the phys_refnum and lba_offset for the newly created vd_config.
2486 * We might also want to update the type in the phys_disk
5575e7d9 2487 * section.
8592f29d
N
2488 *
2489 * Alternately: fd == -1 and we have already chosen which device to
2490 * use and recorded in dlist->raid_disk;
5f8097be
NB
2491 */
2492 struct dl *dl;
2493 struct ddf_super *ddf = st->sb;
2494 struct vd_config *vc;
f21e18ca 2495 unsigned int i;
59e36268
NB
2496 unsigned long long blocks, pos, esize;
2497 struct extent *ex;
475ccbdb 2498 unsigned int raid_disk = dk->raid_disk;
5f8097be 2499
8592f29d
N
2500 if (fd == -1) {
2501 for (dl = ddf->dlist; dl ; dl = dl->next)
2502 if (dl->raiddisk == dk->raid_disk)
2503 break;
2504 } else {
2505 for (dl = ddf->dlist; dl ; dl = dl->next)
2506 if (dl->major == dk->major &&
2507 dl->minor == dk->minor)
2508 break;
2509 }
5f8097be
NB
2510 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2511 return;
2512
d2ca6449 2513 vc = &ddf->currentconf->conf;
475ccbdb 2514 if (vc->sec_elmnt_count > 1) {
2515 unsigned int n = __be16_to_cpu(vc->prim_elmnt_count);
2516 if (raid_disk >= n)
2517 vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2518 raid_disk %= n;
2519 }
59e36268
NB
2520
2521 ex = get_extents(ddf, dl);
2522 if (!ex)
2523 return;
2524
2525 i = 0; pos = 0;
2526 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2527 if (ddf->currentconf->block_sizes)
2528 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2529
2530 do {
2531 esize = ex[i].start - pos;
2532 if (esize >= blocks)
2533 break;
2534 pos = ex[i].start + ex[i].size;
2535 i++;
2536 } while (ex[i-1].size);
2537
2538 free(ex);
2539 if (esize < blocks)
2540 return;
2541
d2ca6449 2542 ddf->currentdev = dk->raid_disk;
475ccbdb 2543 vc->phys_refnum[raid_disk] = dl->disk.refnum;
2544 LBA_OFFSET(ddf, vc)[raid_disk] = __cpu_to_be64(pos);
5f8097be 2545
f21e18ca 2546 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2547 if (dl->vlist[i] == NULL)
2548 break;
2549 if (i == ddf->max_part)
2550 return;
d2ca6449 2551 dl->vlist[i] = ddf->currentconf;
5f8097be 2552
8592f29d
N
2553 if (fd >= 0)
2554 dl->fd = fd;
2555 if (devname)
2556 dl->devname = devname;
7a7cc504 2557
63eb2454 2558 /* Check if we can mark array as optimal yet */
d2ca6449 2559 i = ddf->currentconf->vcnum;
63eb2454 2560 ddf->virt->entries[i].state =
2561 (ddf->virt->entries[i].state & ~DDF_state_mask)
2562 | get_svd_state(ddf, ddf->currentconf);
5575e7d9
NB
2563 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2564 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
4f9bbe63 2565 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
2566 __func__, dl->pdnum, __be32_to_cpu(dl->disk.refnum),
2567 ddf->currentconf->vcnum, guid_str(vc->guid),
2568 dk->raid_disk);
7d5a7ff3 2569 ddf_set_updates_pending(ddf);
5f8097be
NB
2570}
2571
4a3ca8ac 2572static unsigned int find_unused_pde(const struct ddf_super *ddf)
2573{
2574 unsigned int i;
2575 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++) {
2576 if (all_ff(ddf->phys->entries[i].guid))
2577 return i;
2578 }
2579 return DDF_NOTFOUND;
2580}
2581
a322f70c
DW
2582/* add a device to a container, either while creating it or while
2583 * expanding a pre-existing container
2584 */
f20c3968 2585static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2586 mdu_disk_info_t *dk, int fd, char *devname,
2587 unsigned long long data_offset)
a322f70c
DW
2588{
2589 struct ddf_super *ddf = st->sb;
2590 struct dl *dd;
2591 time_t now;
2592 struct tm *tm;
2593 unsigned long long size;
2594 struct phys_disk_entry *pde;
f21e18ca 2595 unsigned int n, i;
a322f70c 2596 struct stat stb;
90fa1a29 2597 __u32 *tptr;
a322f70c 2598
78e44928
NB
2599 if (ddf->currentconf) {
2600 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2601 return 0;
78e44928
NB
2602 }
2603
a322f70c
DW
2604 /* This is device numbered dk->number. We need to create
2605 * a phys_disk entry and a more detailed disk_data entry.
2606 */
2607 fstat(fd, &stb);
4a3ca8ac 2608 n = find_unused_pde(ddf);
2609 if (n == DDF_NOTFOUND) {
2610 pr_err("%s: No free slot in array, cannot add disk\n",
2611 __func__);
2612 return 1;
2613 }
2614 pde = &ddf->phys->entries[n];
4ee8cca9 2615 get_dev_size(fd, NULL, &size);
2616 if (size <= 32*1024*1024) {
2617 pr_err("%s: device size must be at least 32MB\n",
2618 __func__);
2619 return 1;
2620 }
2621 size >>= 9;
4a3ca8ac 2622
3d2c4fc7
DW
2623 if (posix_memalign((void**)&dd, 512,
2624 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2625 pr_err("%s could allocate buffer for new disk, aborting\n",
2626 __func__);
f20c3968 2627 return 1;
3d2c4fc7 2628 }
a322f70c
DW
2629 dd->major = major(stb.st_rdev);
2630 dd->minor = minor(stb.st_rdev);
2631 dd->devname = devname;
a322f70c 2632 dd->fd = fd;
b2280677 2633 dd->spare = NULL;
a322f70c
DW
2634
2635 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2636 now = time(0);
2637 tm = localtime(&now);
2638 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2639 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2640 tptr = (__u32 *)(dd->disk.guid + 16);
2641 *tptr++ = random32();
2642 *tptr = random32();
a322f70c 2643
59e36268
NB
2644 do {
2645 /* Cannot be bothered finding a CRC of some irrelevant details*/
bfb7ea78 2646 dd->disk.refnum = random32();
f21e18ca
N
2647 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2648 i > 0; i--)
2649 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
59e36268 2650 break;
f21e18ca 2651 } while (i > 0);
59e36268 2652
a322f70c
DW
2653 dd->disk.forced_ref = 1;
2654 dd->disk.forced_guid = 1;
2655 memset(dd->disk.vendor, ' ', 32);
2656 memcpy(dd->disk.vendor, "Linux", 5);
2657 memset(dd->disk.pad, 0xff, 442);
b2280677 2658 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2659 dd->vlist[i] = NULL;
2660
5575e7d9
NB
2661 dd->pdnum = n;
2662
2cc2983d
N
2663 if (st->update_tail) {
2664 int len = (sizeof(struct phys_disk) +
2665 sizeof(struct phys_disk_entry));
2666 struct phys_disk *pd;
2667
503975b9 2668 pd = xmalloc(len);
2cc2983d
N
2669 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2670 pd->used_pdes = __cpu_to_be16(n);
2671 pde = &pd->entries[0];
2672 dd->mdupdate = pd;
4a3ca8ac 2673 } else
2674 ddf->phys->used_pdes = __cpu_to_be16(
2675 1 + __be16_to_cpu(ddf->phys->used_pdes));
a322f70c
DW
2676
2677 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2678 pde->refnum = dd->disk.refnum;
5575e7d9 2679 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c 2680 pde->state = __cpu_to_be16(DDF_Online);
4ee8cca9 2681 dd->size = size;
2682 /*
2683 * If there is already a device in dlist, try to reserve the same
2684 * amount of workspace. Otherwise, use 32MB.
2685 * We checked disk size above already.
2686 */
2687#define __calc_lba(new, old, lba, mb) do { \
2688 unsigned long long dif; \
2689 if ((old) != NULL) \
2690 dif = (old)->size - __be64_to_cpu((old)->lba); \
2691 else \
2692 dif = (new)->size; \
2693 if ((new)->size > dif) \
2694 (new)->lba = __cpu_to_be64((new)->size - dif); \
2695 else \
2696 (new)->lba = __cpu_to_be64((new)->size - (mb*1024*2)); \
2697 } while (0)
2698 __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2699 __calc_lba(dd, ddf->dlist, primary_lba, 16);
2700 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
2701 pde->config_size = dd->workspace_lba;
2702
a322f70c
DW
2703 sprintf(pde->path, "%17.17s","Information: nil") ;
2704 memset(pde->pad, 0xff, 6);
2705
2cc2983d
N
2706 if (st->update_tail) {
2707 dd->next = ddf->add_list;
2708 ddf->add_list = dd;
2709 } else {
2710 dd->next = ddf->dlist;
2711 ddf->dlist = dd;
7d5a7ff3 2712 ddf_set_updates_pending(ddf);
2cc2983d 2713 }
f20c3968
DW
2714
2715 return 0;
a322f70c
DW
2716}
2717
4dd968cc
N
2718static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2719{
2720 struct ddf_super *ddf = st->sb;
2721 struct dl *dl;
2722
2723 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2724 * disappeared from the container.
2725 * We need to arrange that it disappears from the metadata and
2726 * internal data structures too.
2727 * Most of the work is done by ddf_process_update which edits
2728 * the metadata and closes the file handle and attaches the memory
2729 * where free_updates will free it.
2730 */
2731 for (dl = ddf->dlist; dl ; dl = dl->next)
2732 if (dl->major == dk->major &&
2733 dl->minor == dk->minor)
2734 break;
2735 if (!dl)
2736 return -1;
2737
2738 if (st->update_tail) {
2739 int len = (sizeof(struct phys_disk) +
2740 sizeof(struct phys_disk_entry));
2741 struct phys_disk *pd;
2742
503975b9 2743 pd = xmalloc(len);
4dd968cc
N
2744 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2745 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2746 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2747 append_metadata_update(st, pd, len);
2748 }
2749 return 0;
2750}
2751
a322f70c
DW
2752/*
2753 * This is the write_init_super method for a ddf container. It is
2754 * called when creating a container or adding another device to a
2755 * container.
2756 */
42d5dfd9 2757#define NULL_CONF_SZ 4096
18a2f463 2758
7f798aca 2759static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2760 char *null_aligned)
a322f70c 2761{
7f798aca 2762 unsigned long long sector;
2763 struct ddf_header *header;
2764 int fd, i, n_config, conf_size;
a4057a88 2765 int ret = 0;
7f798aca 2766
2767 fd = d->fd;
2768
2769 switch (type) {
2770 case DDF_HEADER_PRIMARY:
2771 header = &ddf->primary;
2772 sector = __be64_to_cpu(header->primary_lba);
2773 break;
2774 case DDF_HEADER_SECONDARY:
2775 header = &ddf->secondary;
2776 sector = __be64_to_cpu(header->secondary_lba);
2777 break;
2778 default:
2779 return 0;
2780 }
2781
2782 header->type = type;
a4057a88 2783 header->openflag = 1;
7f798aca 2784 header->crc = calc_crc(header, 512);
2785
2786 lseek64(fd, sector<<9, 0);
2787 if (write(fd, header, 512) < 0)
a4057a88 2788 goto out;
7f798aca 2789
2790 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2791 if (write(fd, &ddf->controller, 512) < 0)
a4057a88 2792 goto out;
a322f70c 2793
7f798aca 2794 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2795 if (write(fd, ddf->phys, ddf->pdsize) < 0)
a4057a88 2796 goto out;
7f798aca 2797 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2798 if (write(fd, ddf->virt, ddf->vdsize) < 0)
a4057a88 2799 goto out;
7f798aca 2800
2801 /* Now write lots of config records. */
2802 n_config = ddf->max_part;
2803 conf_size = ddf->conf_rec_len * 512;
2804 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2805 struct vcl *c;
2806 struct vd_config *vdc = NULL;
2807 if (i == n_config) {
7f798aca 2808 c = (struct vcl *)d->spare;
e3c2a365 2809 if (c)
2810 vdc = &c->conf;
2811 } else {
2812 unsigned int dummy;
2813 c = d->vlist[i];
2814 if (c)
2815 get_pd_index_from_refnum(
2816 c, d->disk.refnum,
2817 ddf->mppe,
2818 (const struct vd_config **)&vdc,
2819 &dummy);
2820 }
7f798aca 2821 if (c) {
be9b9ef4 2822 dprintf("writing conf record %i on disk %08x for %s/%u\n",
ad60eea1 2823 i, __be32_to_cpu(d->disk.refnum),
2824 guid_str(vdc->guid),
be9b9ef4 2825 vdc->sec_elmnt_seq);
dacf3dc5 2826 vdc->seqnum = header->seq;
e3c2a365 2827 vdc->crc = calc_crc(vdc, conf_size);
2828 if (write(fd, vdc, conf_size) < 0)
7f798aca 2829 break;
2830 } else {
2831 unsigned int togo = conf_size;
2832 while (togo > NULL_CONF_SZ) {
2833 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2834 break;
2835 togo -= NULL_CONF_SZ;
2836 }
2837 if (write(fd, null_aligned, togo) < 0)
2838 break;
2839 }
2840 }
2841 if (i <= n_config)
a4057a88 2842 goto out;
7f798aca 2843
2844 d->disk.crc = calc_crc(&d->disk, 512);
2845 if (write(fd, &d->disk, 512) < 0)
a4057a88 2846 goto out;
7f798aca 2847
a4057a88 2848 ret = 1;
2849out:
2850 header->openflag = 0;
2851 header->crc = calc_crc(header, 512);
2852
2853 lseek64(fd, sector<<9, 0);
2854 if (write(fd, header, 512) < 0)
2855 ret = 0;
2856
2857 return ret;
7f798aca 2858}
2859
2860static int __write_init_super_ddf(struct supertype *st)
2861{
a322f70c 2862 struct ddf_super *ddf = st->sb;
a322f70c 2863 struct dl *d;
175593bf
DW
2864 int attempts = 0;
2865 int successes = 0;
7f798aca 2866 unsigned long long size;
42d5dfd9 2867 char *null_aligned;
0175cbf6 2868 __u32 seq;
42d5dfd9 2869
7d5a7ff3 2870 pr_state(ddf, __func__);
42d5dfd9
JS
2871 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2872 return -ENOMEM;
2873 }
2874 memset(null_aligned, 0xff, NULL_CONF_SZ);
a322f70c 2875
35c3606d 2876 seq = ddf->active->seq;
0175cbf6 2877
175593bf
DW
2878 /* try to write updated metadata,
2879 * if we catch a failure move on to the next disk
2880 */
a322f70c
DW
2881 for (d = ddf->dlist; d; d=d->next) {
2882 int fd = d->fd;
2883
2884 if (fd < 0)
2885 continue;
2886
175593bf 2887 attempts++;
a322f70c
DW
2888 /* We need to fill in the primary, (secondary) and workspace
2889 * lba's in the headers, set their checksums,
2890 * Also checksum phys, virt....
2891 *
2892 * Then write everything out, finally the anchor is written.
2893 */
2894 get_dev_size(fd, NULL, &size);
2895 size /= 512;
097bcf00 2896 if (d->workspace_lba != 0)
2897 ddf->anchor.workspace_lba = d->workspace_lba;
2898 else
2899 ddf->anchor.workspace_lba =
2900 __cpu_to_be64(size - 32*1024*2);
2901 if (d->primary_lba != 0)
2902 ddf->anchor.primary_lba = d->primary_lba;
2903 else
2904 ddf->anchor.primary_lba =
2905 __cpu_to_be64(size - 16*1024*2);
2906 if (d->secondary_lba != 0)
2907 ddf->anchor.secondary_lba = d->secondary_lba;
2908 else
2909 ddf->anchor.secondary_lba =
2910 __cpu_to_be64(size - 32*1024*2);
0175cbf6 2911 ddf->anchor.seq = seq;
a322f70c
DW
2912 memcpy(&ddf->primary, &ddf->anchor, 512);
2913 memcpy(&ddf->secondary, &ddf->anchor, 512);
2914
2915 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2916 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2917 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2918
7f798aca 2919 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2920 null_aligned))
175593bf 2921 continue;
a322f70c 2922
7f798aca 2923 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2924 null_aligned))
175593bf 2925 continue;
a322f70c 2926
a322f70c 2927 lseek64(fd, (size-1)*512, SEEK_SET);
175593bf
DW
2928 if (write(fd, &ddf->anchor, 512) < 0)
2929 continue;
2930 successes++;
2931 }
42d5dfd9 2932 free(null_aligned);
175593bf 2933
175593bf 2934 return attempts != successes;
a322f70c 2935}
7a7cc504
NB
2936
2937static int write_init_super_ddf(struct supertype *st)
2938{
9b1fb677
DW
2939 struct ddf_super *ddf = st->sb;
2940 struct vcl *currentconf = ddf->currentconf;
2941
2942 /* we are done with currentconf reset it to point st at the container */
2943 ddf->currentconf = NULL;
edd8d13c
NB
2944
2945 if (st->update_tail) {
2946 /* queue the virtual_disk and vd_config as metadata updates */
2947 struct virtual_disk *vd;
2948 struct vd_config *vc;
c5943560 2949 int len, tlen;
2950 unsigned int i;
edd8d13c 2951
9b1fb677 2952 if (!currentconf) {
2cc2983d
N
2953 int len = (sizeof(struct phys_disk) +
2954 sizeof(struct phys_disk_entry));
2955
2956 /* adding a disk to the container. */
2957 if (!ddf->add_list)
2958 return 0;
2959
2960 append_metadata_update(st, ddf->add_list->mdupdate, len);
2961 ddf->add_list->mdupdate = NULL;
2962 return 0;
2963 }
2964
2965 /* Newly created VD */
2966
edd8d13c
NB
2967 /* First the virtual disk. We have a slightly fake header */
2968 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 2969 vd = xmalloc(len);
edd8d13c 2970 *vd = *ddf->virt;
9b1fb677
DW
2971 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2972 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
2973 append_metadata_update(st, vd, len);
2974
2975 /* Then the vd_config */
2976 len = ddf->conf_rec_len * 512;
c5943560 2977 tlen = len * currentconf->conf.sec_elmnt_count;
2978 vc = xmalloc(tlen);
9b1fb677 2979 memcpy(vc, &currentconf->conf, len);
c5943560 2980 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
2981 memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
2982 len);
2983 append_metadata_update(st, vc, tlen);
edd8d13c
NB
2984
2985 /* FIXME I need to close the fds! */
2986 return 0;
613b0d17 2987 } else {
d682f344 2988 struct dl *d;
19041058 2989 if (!currentconf)
2990 for (d = ddf->dlist; d; d=d->next)
2991 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 2992 return __write_init_super_ddf(st);
d682f344 2993 }
7a7cc504
NB
2994}
2995
a322f70c
DW
2996#endif
2997
387fcd59
N
2998static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2999 unsigned long long data_offset)
a322f70c
DW
3000{
3001 /* We must reserve the last 32Meg */
3002 if (devsize <= 32*1024*2)
3003 return 0;
3004 return devsize - 32*1024*2;
3005}
3006
3007#ifndef MDASSEMBLE
8592f29d
N
3008
3009static int reserve_space(struct supertype *st, int raiddisks,
3010 unsigned long long size, int chunk,
3011 unsigned long long *freesize)
3012{
3013 /* Find 'raiddisks' spare extents at least 'size' big (but
3014 * only caring about multiples of 'chunk') and remember
3015 * them.
3016 * If the cannot be found, fail.
3017 */
3018 struct dl *dl;
3019 struct ddf_super *ddf = st->sb;
3020 int cnt = 0;
3021
3022 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 3023 dl->raiddisk = -1;
8592f29d
N
3024 dl->esize = 0;
3025 }
3026 /* Now find largest extent on each device */
3027 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3028 struct extent *e = get_extents(ddf, dl);
3029 unsigned long long pos = 0;
3030 int i = 0;
3031 int found = 0;
3032 unsigned long long minsize = size;
3033
3034 if (size == 0)
3035 minsize = chunk;
3036
3037 if (!e)
3038 continue;
3039 do {
3040 unsigned long long esize;
3041 esize = e[i].start - pos;
3042 if (esize >= minsize) {
3043 found = 1;
3044 minsize = esize;
3045 }
3046 pos = e[i].start + e[i].size;
3047 i++;
3048 } while (e[i-1].size);
3049 if (found) {
3050 cnt++;
3051 dl->esize = minsize;
3052 }
3053 free(e);
3054 }
3055 if (cnt < raiddisks) {
e7b84f9d 3056 pr_err("not enough devices with space to create array.\n");
8592f29d
N
3057 return 0; /* No enough free spaces large enough */
3058 }
3059 if (size == 0) {
3060 /* choose the largest size of which there are at least 'raiddisk' */
3061 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3062 struct dl *dl2;
3063 if (dl->esize <= size)
3064 continue;
3065 /* This is bigger than 'size', see if there are enough */
3066 cnt = 0;
7b80ad6a 3067 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
3068 if (dl2->esize >= dl->esize)
3069 cnt++;
3070 if (cnt >= raiddisks)
3071 size = dl->esize;
3072 }
3073 if (chunk) {
3074 size = size / chunk;
3075 size *= chunk;
3076 }
3077 *freesize = size;
3078 if (size < 32) {
e7b84f9d 3079 pr_err("not enough spare devices to create array.\n");
8592f29d
N
3080 return 0;
3081 }
3082 }
3083 /* We have a 'size' of which there are enough spaces.
3084 * We simply do a first-fit */
3085 cnt = 0;
3086 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3087 if (dl->esize < size)
3088 continue;
613b0d17 3089
8592f29d
N
3090 dl->raiddisk = cnt;
3091 cnt++;
3092 }
3093 return 1;
3094}
3095
2c514b71
NB
3096static int
3097validate_geometry_ddf_container(struct supertype *st,
3098 int level, int layout, int raiddisks,
3099 int chunk, unsigned long long size,
af4348dd 3100 unsigned long long data_offset,
2c514b71
NB
3101 char *dev, unsigned long long *freesize,
3102 int verbose);
78e44928
NB
3103
3104static int validate_geometry_ddf_bvd(struct supertype *st,
3105 int level, int layout, int raiddisks,
c21e737b 3106 int *chunk, unsigned long long size,
af4348dd 3107 unsigned long long data_offset,
2c514b71
NB
3108 char *dev, unsigned long long *freesize,
3109 int verbose);
78e44928
NB
3110
3111static int validate_geometry_ddf(struct supertype *st,
2c514b71 3112 int level, int layout, int raiddisks,
c21e737b 3113 int *chunk, unsigned long long size,
af4348dd 3114 unsigned long long data_offset,
2c514b71
NB
3115 char *dev, unsigned long long *freesize,
3116 int verbose)
a322f70c
DW
3117{
3118 int fd;
3119 struct mdinfo *sra;
3120 int cfd;
3121
3122 /* ddf potentially supports lots of things, but it depends on
3123 * what devices are offered (and maybe kernel version?)
3124 * If given unused devices, we will make a container.
3125 * If given devices in a container, we will make a BVD.
3126 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3127 */
3128
bb7295f1
N
3129 if (chunk && *chunk == UnSet)
3130 *chunk = DEFAULT_CHUNK;
3131
542ef4ec 3132 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 3133 if (level == LEVEL_CONTAINER) {
78e44928
NB
3134 /* Must be a fresh device to add to a container */
3135 return validate_geometry_ddf_container(st, level, layout,
c21e737b 3136 raiddisks, chunk?*chunk:0,
af4348dd
N
3137 size, data_offset, dev,
3138 freesize,
2c514b71 3139 verbose);
5f8097be
NB
3140 }
3141
78e44928 3142 if (!dev) {
a3163bf0 3143 mdu_array_info_t array = {
3144 .level = level, .layout = layout,
3145 .raid_disks = raiddisks
3146 };
3147 struct vd_config conf;
3148 if (layout_md2ddf(&array, &conf) == -1) {
b42f577a 3149 if (verbose)
94b08b7c 3150 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3151 level, layout, raiddisks);
78e44928 3152 return 0;
b42f577a 3153 }
78e44928 3154 /* Should check layout? etc */
8592f29d
N
3155
3156 if (st->sb && freesize) {
3157 /* --create was given a container to create in.
3158 * So we need to check that there are enough
3159 * free spaces and return the amount of space.
3160 * We may as well remember which drives were
3161 * chosen so that add_to_super/getinfo_super
3162 * can return them.
3163 */
c21e737b 3164 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 3165 }
a322f70c 3166 return 1;
78e44928 3167 }
a322f70c 3168
8592f29d
N
3169 if (st->sb) {
3170 /* A container has already been opened, so we are
3171 * creating in there. Maybe a BVD, maybe an SVD.
3172 * Should make a distinction one day.
3173 */
3174 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
3175 chunk, size, data_offset, dev,
3176 freesize,
8592f29d
N
3177 verbose);
3178 }
78e44928
NB
3179 /* This is the first device for the array.
3180 * If it is a container, we read it in and do automagic allocations,
3181 * no other devices should be given.
3182 * Otherwise it must be a member device of a container, and we
3183 * do manual allocation.
3184 * Later we should check for a BVD and make an SVD.
a322f70c 3185 */
a322f70c
DW
3186 fd = open(dev, O_RDONLY|O_EXCL, 0);
3187 if (fd >= 0) {
4dd2df09 3188 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
3189 close(fd);
3190 if (sra && sra->array.major_version == -1 &&
78e44928
NB
3191 strcmp(sra->text_version, "ddf") == 0) {
3192
3193 /* load super */
3194 /* find space for 'n' devices. */
3195 /* remember the devices */
3196 /* Somehow return the fact that we have enough */
a322f70c
DW
3197 }
3198
2c514b71 3199 if (verbose)
e7b84f9d
N
3200 pr_err("ddf: Cannot create this array "
3201 "on device %s - a container is required.\n",
3202 dev);
a322f70c
DW
3203 return 0;
3204 }
3205 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 3206 if (verbose)
e7b84f9d 3207 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3208 dev, strerror(errno));
a322f70c
DW
3209 return 0;
3210 }
3211 /* Well, it is in use by someone, maybe a 'ddf' container. */
3212 cfd = open_container(fd);
3213 if (cfd < 0) {
3214 close(fd);
2c514b71 3215 if (verbose)
e7b84f9d 3216 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 3217 dev, strerror(EBUSY));
a322f70c
DW
3218 return 0;
3219 }
4dd2df09 3220 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
3221 close(fd);
3222 if (sra && sra->array.major_version == -1 &&
3223 strcmp(sra->text_version, "ddf") == 0) {
3224 /* This is a member of a ddf container. Load the container
3225 * and try to create a bvd
3226 */
3227 struct ddf_super *ddf;
e1902a7b 3228 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 3229 st->sb = ddf;
4dd2df09 3230 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 3231 close(cfd);
78e44928 3232 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 3233 raiddisks, chunk, size,
af4348dd 3234 data_offset,
2c514b71
NB
3235 dev, freesize,
3236 verbose);
a322f70c
DW
3237 }
3238 close(cfd);
c42ec1ed
DW
3239 } else /* device may belong to a different container */
3240 return 0;
3241
a322f70c
DW
3242 return 1;
3243}
3244
2c514b71
NB
3245static int
3246validate_geometry_ddf_container(struct supertype *st,
3247 int level, int layout, int raiddisks,
3248 int chunk, unsigned long long size,
af4348dd 3249 unsigned long long data_offset,
2c514b71
NB
3250 char *dev, unsigned long long *freesize,
3251 int verbose)
a322f70c
DW
3252{
3253 int fd;
3254 unsigned long long ldsize;
3255
3256 if (level != LEVEL_CONTAINER)
3257 return 0;
3258 if (!dev)
3259 return 1;
3260
3261 fd = open(dev, O_RDONLY|O_EXCL, 0);
3262 if (fd < 0) {
2c514b71 3263 if (verbose)
e7b84f9d 3264 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3265 dev, strerror(errno));
a322f70c
DW
3266 return 0;
3267 }
3268 if (!get_dev_size(fd, dev, &ldsize)) {
3269 close(fd);
3270 return 0;
3271 }
3272 close(fd);
3273
387fcd59 3274 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
3275 if (*freesize == 0)
3276 return 0;
a322f70c
DW
3277
3278 return 1;
3279}
3280
78e44928
NB
3281static int validate_geometry_ddf_bvd(struct supertype *st,
3282 int level, int layout, int raiddisks,
c21e737b 3283 int *chunk, unsigned long long size,
af4348dd 3284 unsigned long long data_offset,
2c514b71
NB
3285 char *dev, unsigned long long *freesize,
3286 int verbose)
a322f70c
DW
3287{
3288 struct stat stb;
3289 struct ddf_super *ddf = st->sb;
3290 struct dl *dl;
5f8097be
NB
3291 unsigned long long pos = 0;
3292 unsigned long long maxsize;
3293 struct extent *e;
3294 int i;
a322f70c 3295 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
3296 if (level == LEVEL_CONTAINER) {
3297 if (verbose)
e7b84f9d 3298 pr_err("DDF cannot create a container within an container\n");
a322f70c 3299 return 0;
b42f577a 3300 }
a322f70c
DW
3301 /* We must have the container info already read in. */
3302 if (!ddf)
3303 return 0;
3304
5f8097be
NB
3305 if (!dev) {
3306 /* General test: make sure there is space for
3307 * 'raiddisks' device extents of size 'size'.
3308 */
3309 unsigned long long minsize = size;
3310 int dcnt = 0;
3311 if (minsize == 0)
3312 minsize = 8;
3313 for (dl = ddf->dlist; dl ; dl = dl->next)
3314 {
3315 int found = 0;
7e1432fb 3316 pos = 0;
5f8097be
NB
3317
3318 i = 0;
3319 e = get_extents(ddf, dl);
3320 if (!e) continue;
3321 do {
3322 unsigned long long esize;
3323 esize = e[i].start - pos;
3324 if (esize >= minsize)
3325 found = 1;
3326 pos = e[i].start + e[i].size;
3327 i++;
3328 } while (e[i-1].size);
3329 if (found)
3330 dcnt++;
3331 free(e);
3332 }
3333 if (dcnt < raiddisks) {
2c514b71 3334 if (verbose)
e7b84f9d
N
3335 pr_err("ddf: Not enough devices with "
3336 "space for this array (%d < %d)\n",
3337 dcnt, raiddisks);
5f8097be
NB
3338 return 0;
3339 }
3340 return 1;
3341 }
a322f70c
DW
3342 /* This device must be a member of the set */
3343 if (stat(dev, &stb) < 0)
3344 return 0;
3345 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3346 return 0;
3347 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
3348 if (dl->major == (int)major(stb.st_rdev) &&
3349 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
3350 break;
3351 }
5f8097be 3352 if (!dl) {
2c514b71 3353 if (verbose)
e7b84f9d 3354 pr_err("ddf: %s is not in the "
613b0d17
N
3355 "same DDF set\n",
3356 dev);
5f8097be
NB
3357 return 0;
3358 }
3359 e = get_extents(ddf, dl);
3360 maxsize = 0;
3361 i = 0;
3362 if (e) do {
613b0d17
N
3363 unsigned long long esize;
3364 esize = e[i].start - pos;
3365 if (esize >= maxsize)
3366 maxsize = esize;
3367 pos = e[i].start + e[i].size;
3368 i++;
3369 } while (e[i-1].size);
5f8097be 3370 *freesize = maxsize;
a322f70c
DW
3371 // FIXME here I am
3372
3373 return 1;
3374}
59e36268 3375
a322f70c 3376static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3377 void **sbp, char *devname)
a322f70c
DW
3378{
3379 struct mdinfo *sra;
3380 struct ddf_super *super;
3381 struct mdinfo *sd, *best = NULL;
3382 int bestseq = 0;
3383 int seq;
3384 char nm[20];
3385 int dfd;
3386
b526e52d 3387 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3388 if (!sra)
3389 return 1;
3390 if (sra->array.major_version != -1 ||
3391 sra->array.minor_version != -2 ||
3392 strcmp(sra->text_version, "ddf") != 0)
3393 return 1;
3394
6416d527 3395 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3396 return 1;
a2349791 3397 memset(super, 0, sizeof(*super));
a322f70c
DW
3398
3399 /* first, try each device, and choose the best ddf */
3400 for (sd = sra->devs ; sd ; sd = sd->next) {
3401 int rv;
3402 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3403 dfd = dev_open(nm, O_RDONLY);
3404 if (dfd < 0)
a322f70c
DW
3405 return 2;
3406 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3407 close(dfd);
a322f70c
DW
3408 if (rv == 0) {
3409 seq = __be32_to_cpu(super->active->seq);
3410 if (super->active->openflag)
3411 seq--;
3412 if (!best || seq > bestseq) {
3413 bestseq = seq;
3414 best = sd;
3415 }
3416 }
3417 }
3418 if (!best)
3419 return 1;
3420 /* OK, load this ddf */
3421 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3422 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3423 if (dfd < 0)
a322f70c
DW
3424 return 1;
3425 load_ddf_headers(dfd, super, NULL);
3426 load_ddf_global(dfd, super, NULL);
3427 close(dfd);
3428 /* Now we need the device-local bits */
3429 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3430 int rv;
3431
a322f70c 3432 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3433 dfd = dev_open(nm, O_RDWR);
7a7cc504 3434 if (dfd < 0)
a322f70c 3435 return 2;
3d2c4fc7
DW
3436 rv = load_ddf_headers(dfd, super, NULL);
3437 if (rv == 0)
e1902a7b 3438 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3439 if (rv)
3440 return 1;
a322f70c 3441 }
33414a01 3442
a322f70c
DW
3443 *sbp = super;
3444 if (st->ss == NULL) {
78e44928 3445 st->ss = &super_ddf;
a322f70c
DW
3446 st->minor_version = 0;
3447 st->max_devs = 512;
3448 }
4dd2df09 3449 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3450 return 0;
3451}
2b959fbf
N
3452
3453static int load_container_ddf(struct supertype *st, int fd,
3454 char *devname)
3455{
3456 return load_super_ddf_all(st, fd, &st->sb, devname);
3457}
3458
0e600426 3459#endif /* MDASSEMBLE */
a322f70c 3460
a5c7adb3 3461static int check_secondary(const struct vcl *vc)
3462{
3463 const struct vd_config *conf = &vc->conf;
3464 int i;
3465
3466 /* The only DDF secondary RAID level md can support is
3467 * RAID 10, if the stripe sizes and Basic volume sizes
3468 * are all equal.
3469 * Other configurations could in theory be supported by exposing
3470 * the BVDs to user space and using device mapper for the secondary
3471 * mapping. So far we don't support that.
3472 */
3473
3474 __u64 sec_elements[4] = {0, 0, 0, 0};
3475#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3476#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3477
3478 if (vc->other_bvds == NULL) {
3479 pr_err("No BVDs for secondary RAID found\n");
3480 return -1;
3481 }
3482 if (conf->prl != DDF_RAID1) {
3483 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3484 return -1;
3485 }
3486 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3487 pr_err("Secondary RAID level %d is unsupported\n",
3488 conf->srl);
3489 return -1;
3490 }
3491 __set_sec_seen(conf->sec_elmnt_seq);
3492 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3493 const struct vd_config *bvd = vc->other_bvds[i];
3c48f7be 3494 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
c98567ba 3495 continue;
a5c7adb3 3496 if (bvd->srl != conf->srl) {
3497 pr_err("Inconsistent secondary RAID level across BVDs\n");
3498 return -1;
3499 }
3500 if (bvd->prl != conf->prl) {
3501 pr_err("Different RAID levels for BVDs are unsupported\n");
3502 return -1;
3503 }
3504 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3505 pr_err("All BVDs must have the same number of primary elements\n");
3506 return -1;
3507 }
3508 if (bvd->chunk_shift != conf->chunk_shift) {
3509 pr_err("Different strip sizes for BVDs are unsupported\n");
3510 return -1;
3511 }
3512 if (bvd->array_blocks != conf->array_blocks) {
3513 pr_err("Different BVD sizes are unsupported\n");
3514 return -1;
3515 }
3516 __set_sec_seen(bvd->sec_elmnt_seq);
3517 }
3518 for (i = 0; i < conf->sec_elmnt_count; i++) {
3519 if (!__was_sec_seen(i)) {
3520 pr_err("BVD %d is missing\n", i);
3521 return -1;
3522 }
3523 }
3524 return 0;
3525}
3526
8a38db86 3527static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
4e587018 3528 __u32 refnum, unsigned int nmax,
3529 const struct vd_config **bvd,
3530 unsigned int *idx)
8a38db86 3531{
4e587018 3532 unsigned int i, j, n, sec, cnt;
3533
3534 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3535 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3536
3537 for (i = 0, j = 0 ; i < nmax ; i++) {
3538 /* j counts valid entries for this BVD */
3539 if (vc->conf.phys_refnum[i] != 0xffffffff)
3540 j++;
3541 if (vc->conf.phys_refnum[i] == refnum) {
3542 *bvd = &vc->conf;
3543 *idx = i;
3544 return sec * cnt + j - 1;
3545 }
3546 }
3547 if (vc->other_bvds == NULL)
3548 goto bad;
3549
3550 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3551 struct vd_config *vd = vc->other_bvds[n-1];
4e587018 3552 sec = vd->sec_elmnt_seq;
3c48f7be 3553 if (sec == DDF_UNUSED_BVD)
3554 continue;
4e587018 3555 for (i = 0, j = 0 ; i < nmax ; i++) {
3556 if (vd->phys_refnum[i] != 0xffffffff)
3557 j++;
3558 if (vd->phys_refnum[i] == refnum) {
3559 *bvd = vd;
3560 *idx = i;
3561 return sec * cnt + j - 1;
3562 }
3563 }
3564 }
3565bad:
3566 *bvd = NULL;
d6e7b083 3567 return DDF_NOTFOUND;
8a38db86 3568}
3569
00bbdbda 3570static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3571{
3572 /* Given a container loaded by load_super_ddf_all,
3573 * extract information about all the arrays into
3574 * an mdinfo tree.
3575 *
3576 * For each vcl in conflist: create an mdinfo, fill it in,
3577 * then look for matching devices (phys_refnum) in dlist
3578 * and create appropriate device mdinfo.
3579 */
3580 struct ddf_super *ddf = st->sb;
3581 struct mdinfo *rest = NULL;
3582 struct vcl *vc;
3583
3584 for (vc = ddf->conflist ; vc ; vc=vc->next)
3585 {
f21e18ca
N
3586 unsigned int i;
3587 unsigned int j;
598f0d58 3588 struct mdinfo *this;
00bbdbda 3589 char *ep;
90fa1a29 3590 __u32 *cptr;
8a38db86 3591 unsigned int pd;
00bbdbda
N
3592
3593 if (subarray &&
3594 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3595 *ep != '\0'))
3596 continue;
3597
a5c7adb3 3598 if (vc->conf.sec_elmnt_count > 1) {
3599 if (check_secondary(vc) != 0)
3600 continue;
3601 }
3602
503975b9 3603 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3604 this->next = rest;
3605 rest = this;
3606
8a2848a7 3607 if (layout_ddf2md(&vc->conf, &this->array))
3608 continue;
598f0d58 3609 this->array.md_minor = -1;
f35f2525
N
3610 this->array.major_version = -1;
3611 this->array.minor_version = -2;
90fa1a29
JS
3612 cptr = (__u32 *)(vc->conf.guid + 16);
3613 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58
NB
3614 this->array.utime = DECADE +
3615 __be32_to_cpu(vc->conf.timestamp);
3616 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3617
59e36268 3618 i = vc->vcnum;
7a7cc504
NB
3619 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3620 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3621 DDF_init_full) {
598f0d58 3622 this->array.state = 0;
ed9d66aa
NB
3623 this->resync_start = 0;
3624 } else {
598f0d58 3625 this->array.state = 1;
b7528a20 3626 this->resync_start = MaxSector;
ed9d66aa 3627 }
db42fa9b
N
3628 memcpy(this->name, ddf->virt->entries[i].name, 16);
3629 this->name[16]=0;
3630 for(j=0; j<16; j++)
3631 if (this->name[j] == ' ')
3632 this->name[j] = 0;
598f0d58
NB
3633
3634 memset(this->uuid, 0, sizeof(this->uuid));
3635 this->component_size = __be64_to_cpu(vc->conf.blocks);
3636 this->array.size = this->component_size / 2;
5f2aace8 3637 this->container_member = i;
598f0d58 3638
c5afc314
N
3639 ddf->currentconf = vc;
3640 uuid_from_super_ddf(st, this->uuid);
f646805e 3641 if (!subarray)
3642 ddf->currentconf = NULL;
c5afc314 3643
60f18132 3644 sprintf(this->text_version, "/%s/%d",
4dd2df09 3645 st->container_devnm, this->container_member);
60f18132 3646
8a38db86 3647 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3648 struct mdinfo *dev;
3649 struct dl *d;
4e587018 3650 const struct vd_config *bvd;
3651 unsigned int iphys;
fa033bec 3652 int stt;
598f0d58 3653
8a38db86 3654 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
bc17324f 3655 continue;
0cf5ef67
N
3656
3657 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3658 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3659 != DDF_Online)
3660 continue;
3661
8a38db86 3662 i = get_pd_index_from_refnum(
4e587018 3663 vc, ddf->phys->entries[pd].refnum,
3664 ddf->mppe, &bvd, &iphys);
d6e7b083 3665 if (i == DDF_NOTFOUND)
8a38db86 3666 continue;
3667
fa033bec 3668 this->array.working_disks++;
bc17324f 3669
0cf5ef67 3670 for (d = ddf->dlist; d ; d=d->next)
8a38db86 3671 if (d->disk.refnum ==
3672 ddf->phys->entries[pd].refnum)
0cf5ef67
N
3673 break;
3674 if (d == NULL)
3675 /* Haven't found that one yet, maybe there are others */
3676 continue;
3677
503975b9 3678 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3679 dev->next = this->devs;
3680 this->devs = dev;
3681
3682 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3683 dev->disk.major = d->major;
3684 dev->disk.minor = d->minor;
3685 dev->disk.raid_disk = i;
3686 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3687 dev->recovery_start = MaxSector;
598f0d58 3688
120f7677 3689 dev->events = __be32_to_cpu(ddf->primary.seq);
57a66662 3690 dev->data_offset =
3691 __be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
4e587018 3692 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3693 if (d->devname)
3694 strcpy(dev->name, d->devname);
3695 }
3696 }
3697 return rest;
3698}
3699
955e9ea1 3700static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3701{
955e9ea1 3702 struct ddf_super *ddf = st->sb;
a322f70c 3703 unsigned long long dsize;
6416d527 3704 void *buf;
3d2c4fc7 3705 int rc;
a322f70c 3706
955e9ea1
DW
3707 if (!ddf)
3708 return 1;
3709
a322f70c
DW
3710 if (!get_dev_size(fd, NULL, &dsize))
3711 return 1;
3712
dbf98368 3713 if (ddf->dlist || ddf->conflist) {
3714 struct stat sta;
3715 struct dl *dl;
3716 int ofd, ret;
3717
3718 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3719 pr_err("%s: file descriptor for invalid device\n",
3720 __func__);
3721 return 1;
3722 }
3723 for (dl = ddf->dlist; dl; dl = dl->next)
3724 if (dl->major == (int)major(sta.st_rdev) &&
3725 dl->minor == (int)minor(sta.st_rdev))
3726 break;
3727 if (!dl) {
3728 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3729 (int)major(sta.st_rdev),
3730 (int)minor(sta.st_rdev));
3731 return 1;
3732 }
3733 /*
3734 For DDF, writing to just one disk makes no sense.
3735 We would run the risk of writing inconsistent meta data
3736 to the devices. So just call __write_init_super_ddf and
3737 write to all devices, including this one.
3738 Use the fd passed to this function, just in case dl->fd
3739 is invalid.
3740 */
3741 ofd = dl->fd;
3742 dl->fd = fd;
3743 ret = __write_init_super_ddf(st);
3744 dl->fd = ofd;
3745 return ret;
3746 }
3747
3d2c4fc7
DW
3748 if (posix_memalign(&buf, 512, 512) != 0)
3749 return 1;
6416d527
NB
3750 memset(buf, 0, 512);
3751
a322f70c 3752 lseek64(fd, dsize-512, 0);
3d2c4fc7 3753 rc = write(fd, buf, 512);
6416d527 3754 free(buf);
3d2c4fc7
DW
3755 if (rc < 0)
3756 return 1;
a322f70c
DW
3757 return 0;
3758}
3759
a19c88b8
NB
3760static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3761{
3762 /*
3763 * return:
3764 * 0 same, or first was empty, and second was copied
3765 * 1 second had wrong number
3766 * 2 wrong uuid
3767 * 3 wrong other info
3768 */
3769 struct ddf_super *first = st->sb;
3770 struct ddf_super *second = tst->sb;
4eefd651 3771 struct dl *dl1, *dl2;
3772 struct vcl *vl1, *vl2;
2d210697 3773 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3774
3775 if (!first) {
3776 st->sb = tst->sb;
3777 tst->sb = NULL;
3778 return 0;
3779 }
3780
3781 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3782 return 2;
3783
2d210697 3784 if (first->anchor.seq != second->anchor.seq) {
3785 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3786 __be32_to_cpu(first->anchor.seq),
3787 __be32_to_cpu(second->anchor.seq));
3788 return 3;
3789 }
3790 if (first->max_part != second->max_part ||
3791 first->phys->used_pdes != second->phys->used_pdes ||
3792 first->virt->populated_vdes != second->virt->populated_vdes) {
3793 dprintf("%s: PD/VD number mismatch\n", __func__);
3794 return 3;
3795 }
3796
3797 max_pds = __be16_to_cpu(first->phys->used_pdes);
3798 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3799 for (pd = 0; pd < max_pds; pd++)
3800 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3801 break;
3802 if (pd == max_pds) {
3803 dprintf("%s: no match for disk %08x\n", __func__,
3804 __be32_to_cpu(dl2->disk.refnum));
3805 return 3;
3806 }
3807 }
3808
3809 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3810 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3811 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3812 continue;
3813 for (vd = 0; vd < max_vds; vd++)
3814 if (!memcmp(first->virt->entries[vd].guid,
3815 vl2->conf.guid, DDF_GUID_LEN))
3816 break;
3817 if (vd == max_vds) {
3818 dprintf("%s: no match for VD config\n", __func__);
3819 return 3;
3820 }
3821 }
a19c88b8 3822 /* FIXME should I look at anything else? */
2d210697 3823
4eefd651 3824 /*
3825 At this point we are fairly sure that the meta data matches.
3826 But the new disk may contain additional local data.
3827 Add it to the super block.
3828 */
3829 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3830 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3831 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3832 DDF_GUID_LEN))
3833 break;
3834 if (vl1) {
3835 if (vl1->other_bvds != NULL &&
3836 vl1->conf.sec_elmnt_seq !=
3837 vl2->conf.sec_elmnt_seq) {
3838 dprintf("%s: adding BVD %u\n", __func__,
3839 vl2->conf.sec_elmnt_seq);
3840 add_other_bvd(vl1, &vl2->conf,
3841 first->conf_rec_len*512);
3842 }
3843 continue;
3844 }
3845
3846 if (posix_memalign((void **)&vl1, 512,
3847 (first->conf_rec_len*512 +
3848 offsetof(struct vcl, conf))) != 0) {
3849 pr_err("%s could not allocate vcl buf\n",
3850 __func__);
3851 return 3;
3852 }
3853
3854 vl1->next = first->conflist;
3855 vl1->block_sizes = NULL;
4eefd651 3856 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3c48f7be 3857 if (alloc_other_bvds(first, vl1) != 0) {
3858 pr_err("%s could not allocate other bvds\n",
3859 __func__);
3860 free(vl1);
3861 return 3;
3862 }
4eefd651 3863 for (vd = 0; vd < max_vds; vd++)
3864 if (!memcmp(first->virt->entries[vd].guid,
3865 vl1->conf.guid, DDF_GUID_LEN))
3866 break;
3867 vl1->vcnum = vd;
3868 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3869 first->conflist = vl1;
3870 }
3871
3872 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3873 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3874 if (dl1->disk.refnum == dl2->disk.refnum)
3875 break;
3876 if (dl1)
3877 continue;
3878
3879 if (posix_memalign((void **)&dl1, 512,
3880 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3881 != 0) {
3882 pr_err("%s could not allocate disk info buffer\n",
3883 __func__);
3884 return 3;
3885 }
3886 memcpy(dl1, dl2, sizeof(*dl1));
3887 dl1->mdupdate = NULL;
3888 dl1->next = first->dlist;
3889 dl1->fd = -1;
3890 for (pd = 0; pd < max_pds; pd++)
3891 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3892 break;
3893 dl1->pdnum = pd;
3894 if (dl2->spare) {
3895 if (posix_memalign((void **)&dl1->spare, 512,
3896 first->conf_rec_len*512) != 0) {
3897 pr_err("%s could not allocate spare info buf\n",
3898 __func__);
3899 return 3;
3900 }
3901 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3902 }
3903 for (vd = 0 ; vd < first->max_part ; vd++) {
3904 if (!dl2->vlist[vd]) {
3905 dl1->vlist[vd] = NULL;
3906 continue;
3907 }
3908 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3909 if (!memcmp(vl1->conf.guid,
3910 dl2->vlist[vd]->conf.guid,
3911 DDF_GUID_LEN))
3912 break;
3913 dl1->vlist[vd] = vl1;
3914 }
3915 }
3916 first->dlist = dl1;
3917 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
ad60eea1 3918 __be32_to_cpu(dl1->disk.refnum));
4eefd651 3919 }
3920
a19c88b8
NB
3921 return 0;
3922}
3923
0e600426 3924#ifndef MDASSEMBLE
4e5528c6
NB
3925/*
3926 * A new array 'a' has been started which claims to be instance 'inst'
3927 * within container 'c'.
3928 * We need to confirm that the array matches the metadata in 'c' so
3929 * that we don't corrupt any metadata.
3930 */
cba0191b 3931static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3932{
a2aa439e 3933 struct ddf_super *ddf = c->sb;
3934 int n = atoi(inst);
fb9d0acb 3935 if (all_ff(ddf->virt->entries[n].guid)) {
3936 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
a2aa439e 3937 return -ENODEV;
3938 }
3939 dprintf("ddf: open_new %d\n", n);
3940 a->info.container_member = n;
549e9569
NB
3941 return 0;
3942}
3943
4e5528c6
NB
3944/*
3945 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3946 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3947 * clean up to the point (in sectors). If that cannot be recorded in the
3948 * metadata, then leave it as dirty.
3949 *
3950 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3951 * !global! virtual_disk.virtual_entry structure.
3952 */
01f157d7 3953static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3954{
4e5528c6
NB
3955 struct ddf_super *ddf = a->container->sb;
3956 int inst = a->info.container_member;
18a2f463 3957 int old = ddf->virt->entries[inst].state;
01f157d7
N
3958 if (consistent == 2) {
3959 /* Should check if a recovery should be started FIXME */
3960 consistent = 1;
b7941fd6 3961 if (!is_resync_complete(&a->info))
01f157d7
N
3962 consistent = 0;
3963 }
ed9d66aa
NB
3964 if (consistent)
3965 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3966 else
4e5528c6 3967 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463 3968 if (old != ddf->virt->entries[inst].state)
7d5a7ff3 3969 ddf_set_updates_pending(ddf);
18a2f463
NB
3970
3971 old = ddf->virt->entries[inst].init_state;
ed9d66aa 3972 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 3973 if (is_resync_complete(&a->info))
ed9d66aa 3974 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 3975 else if (a->info.resync_start == 0)
ed9d66aa 3976 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 3977 else
ed9d66aa 3978 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463 3979 if (old != ddf->virt->entries[inst].init_state)
7d5a7ff3 3980 ddf_set_updates_pending(ddf);
ed9d66aa 3981
b27336a2 3982 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
3983 guid_str(ddf->virt->entries[inst].guid), a->curr_state,
3984 consistent?"clean":"dirty",
b7941fd6 3985 a->info.resync_start);
01f157d7 3986 return consistent;
fd7cde1b
DW
3987}
3988
5ec636b7 3989static int get_bvd_state(const struct ddf_super *ddf,
3990 const struct vd_config *vc)
3991{
3992 unsigned int i, n_bvd, working = 0;
3993 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
3994 int pd, st, state;
3995 for (i = 0; i < n_prim; i++) {
3996 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
3997 continue;
3998 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
3999 if (pd < 0)
4000 continue;
4001 st = __be16_to_cpu(ddf->phys->entries[pd].state);
4002 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
4003 == DDF_Online)
4004 working++;
4005 }
4006
4007 state = DDF_state_degraded;
4008 if (working == n_prim)
4009 state = DDF_state_optimal;
4010 else
4011 switch (vc->prl) {
4012 case DDF_RAID0:
4013 case DDF_CONCAT:
4014 case DDF_JBOD:
4015 state = DDF_state_failed;
4016 break;
4017 case DDF_RAID1:
4018 if (working == 0)
4019 state = DDF_state_failed;
4020 else if (working >= 2)
4021 state = DDF_state_part_optimal;
4022 break;
4023 case DDF_RAID4:
4024 case DDF_RAID5:
4025 if (working < n_prim - 1)
4026 state = DDF_state_failed;
4027 break;
4028 case DDF_RAID6:
4029 if (working < n_prim - 2)
4030 state = DDF_state_failed;
4031 else if (working == n_prim - 1)
4032 state = DDF_state_part_optimal;
4033 break;
4034 }
4035 return state;
4036}
4037
0777d17d 4038static int secondary_state(int state, int other, int seclevel)
4039{
4040 if (state == DDF_state_optimal && other == DDF_state_optimal)
4041 return DDF_state_optimal;
4042 if (seclevel == DDF_2MIRRORED) {
4043 if (state == DDF_state_optimal || other == DDF_state_optimal)
4044 return DDF_state_part_optimal;
4045 if (state == DDF_state_failed && other == DDF_state_failed)
4046 return DDF_state_failed;
4047 return DDF_state_degraded;
4048 } else {
4049 if (state == DDF_state_failed || other == DDF_state_failed)
4050 return DDF_state_failed;
4051 if (state == DDF_state_degraded || other == DDF_state_degraded)
4052 return DDF_state_degraded;
4053 return DDF_state_part_optimal;
4054 }
4055}
4056
4057static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
4058{
4059 int state = get_bvd_state(ddf, &vcl->conf);
4060 unsigned int i;
4061 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
4062 state = secondary_state(
4063 state,
4064 get_bvd_state(ddf, vcl->other_bvds[i-1]),
4065 vcl->conf.srl);
4066 }
4067 return state;
4068}
4069
7a7cc504
NB
4070/*
4071 * The state of each disk is stored in the global phys_disk structure
4072 * in phys_disk.entries[n].state.
4073 * This makes various combinations awkward.
4074 * - When a device fails in any array, it must be failed in all arrays
4075 * that include a part of this device.
4076 * - When a component is rebuilding, we cannot include it officially in the
4077 * array unless this is the only array that uses the device.
4078 *
4079 * So: when transitioning:
4080 * Online -> failed, just set failed flag. monitor will propagate
4081 * spare -> online, the device might need to be added to the array.
4082 * spare -> failed, just set failed. Don't worry if in array or not.
4083 */
8d45d196 4084static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 4085{
7a7cc504 4086 struct ddf_super *ddf = a->container->sb;
baba3f4e 4087 unsigned int inst = a->info.container_member, n_bvd;
4088 struct vcl *vcl;
4089 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4090 &n_bvd, &vcl);
4091 int pd;
e1316fab
N
4092 struct mdinfo *mdi;
4093 struct dl *dl;
7a7cc504
NB
4094
4095 if (vc == NULL) {
2c514b71 4096 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
4097 return;
4098 }
e1316fab
N
4099 /* Find the matching slot in 'info'. */
4100 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4101 if (mdi->disk.raid_disk == n)
4102 break;
4103 if (!mdi)
4104 return;
4105
4106 /* and find the 'dl' entry corresponding to that. */
4107 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
4108 if (mdi->state_fd >= 0 &&
4109 mdi->disk.major == dl->major &&
e1316fab
N
4110 mdi->disk.minor == dl->minor)
4111 break;
4112 if (!dl)
4113 return;
4114
baba3f4e 4115 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
e1316fab
N
4116 if (pd < 0 || pd != dl->pdnum) {
4117 /* disk doesn't currently exist or has changed.
4118 * If it is now in_sync, insert it. */
baba3f4e 4119 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4120 __func__, dl->pdnum, dl->major, dl->minor,
ad60eea1 4121 __be32_to_cpu(dl->disk.refnum));
baba3f4e 4122 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4123 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
7a7cc504 4124 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
baba3f4e 4125 pd = dl->pdnum; /* FIXME: is this really correct ? */
4126 vc->phys_refnum[n_bvd] = dl->disk.refnum;
57a66662 4127 LBA_OFFSET(ddf, vc)[n_bvd] =
4128 __cpu_to_be64(mdi->data_offset);
e1316fab
N
4129 ddf->phys->entries[pd].type &=
4130 ~__cpu_to_be16(DDF_Global_Spare);
4131 ddf->phys->entries[pd].type |=
4132 __cpu_to_be16(DDF_Active_in_VD);
7d5a7ff3 4133 ddf_set_updates_pending(ddf);
7a7cc504
NB
4134 }
4135 } else {
18a2f463 4136 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
4137 if (state & DS_FAULTY)
4138 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4139 if (state & DS_INSYNC) {
4140 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4141 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4142 }
18a2f463 4143 if (old != ddf->phys->entries[pd].state)
7d5a7ff3 4144 ddf_set_updates_pending(ddf);
7a7cc504
NB
4145 }
4146
2c514b71 4147 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 4148
7a7cc504
NB
4149 /* Now we need to check the state of the array and update
4150 * virtual_disk.entries[n].state.
4151 * It needs to be one of "optimal", "degraded", "failed".
4152 * I don't understand 'deleted' or 'missing'.
4153 */
0777d17d 4154 state = get_svd_state(ddf, vcl);
7a7cc504 4155
18a2f463
NB
4156 if (ddf->virt->entries[inst].state !=
4157 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4158 | state)) {
4159
4160 ddf->virt->entries[inst].state =
4161 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4162 | state;
7d5a7ff3 4163 ddf_set_updates_pending(ddf);
18a2f463 4164 }
7a7cc504 4165
549e9569
NB
4166}
4167
2e735d19 4168static void ddf_sync_metadata(struct supertype *st)
549e9569 4169{
7a7cc504
NB
4170
4171 /*
4172 * Write all data to all devices.
4173 * Later, we might be able to track whether only local changes
4174 * have been made, or whether any global data has been changed,
4175 * but ddf is sufficiently weird that it probably always
4176 * changes global data ....
4177 */
18a2f463
NB
4178 struct ddf_super *ddf = st->sb;
4179 if (!ddf->updates_pending)
4180 return;
4181 ddf->updates_pending = 0;
1cc7f4fe 4182 __write_init_super_ddf(st);
2c514b71 4183 dprintf("ddf: sync_metadata\n");
549e9569
NB
4184}
4185
f646805e 4186static int del_from_conflist(struct vcl **list, const char *guid)
4187{
4188 struct vcl **p;
4189 int found = 0;
4190 for (p = list; p && *p; p = &((*p)->next))
4191 if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
4192 found = 1;
4193 *p = (*p)->next;
4194 }
4195 return found;
4196}
4197
4198static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
4199{
4200 struct dl *dl;
4201 unsigned int vdnum, i;
4202 vdnum = find_vde_by_guid(ddf, guid);
4203 if (vdnum == DDF_NOTFOUND) {
4204 pr_err("%s: could not find VD %s\n", __func__,
4205 guid_str(guid));
4206 return -1;
4207 }
4208 if (del_from_conflist(&ddf->conflist, guid) == 0) {
4209 pr_err("%s: could not find conf %s\n", __func__,
4210 guid_str(guid));
4211 return -1;
4212 }
4213 for (dl = ddf->dlist; dl; dl = dl->next)
4214 for (i = 0; i < ddf->max_part; i++)
4215 if (dl->vlist[i] != NULL &&
4216 !memcmp(dl->vlist[i]->conf.guid, guid,
4217 DDF_GUID_LEN))
4218 dl->vlist[i] = NULL;
4219 memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
4220 dprintf("%s: deleted %s\n", __func__, guid_str(guid));
4221 return 0;
4222}
4223
4224static int kill_subarray_ddf(struct supertype *st)
4225{
4226 struct ddf_super *ddf = st->sb;
4227 /*
4228 * currentconf is set in container_content_ddf,
4229 * called with subarray arg
4230 */
4231 struct vcl *victim = ddf->currentconf;
4232 struct vd_config *conf;
4233 ddf->currentconf = NULL;
4234 unsigned int vdnum;
4235 if (!victim) {
4236 pr_err("%s: nothing to kill\n", __func__);
4237 return -1;
4238 }
4239 conf = &victim->conf;
4240 vdnum = find_vde_by_guid(ddf, conf->guid);
4241 if (vdnum == DDF_NOTFOUND) {
4242 pr_err("%s: could not find VD %s\n", __func__,
4243 guid_str(conf->guid));
4244 return -1;
4245 }
4246 if (st->update_tail) {
4247 struct virtual_disk *vd;
4248 int len = sizeof(struct virtual_disk)
4249 + sizeof(struct virtual_entry);
4250 vd = xmalloc(len);
4251 if (vd == NULL) {
4252 pr_err("%s: failed to allocate %d bytes\n", __func__,
4253 len);
4254 return -1;
4255 }
4256 memset(vd, 0 , len);
4257 vd->magic = DDF_VIRT_RECORDS_MAGIC;
4258 vd->populated_vdes = 0;
4259 memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
4260 /* we use DDF_state_deleted as marker */
4261 vd->entries[0].state = DDF_state_deleted;
4262 append_metadata_update(st, vd, len);
4263 } else
4264 _kill_subarray_ddf(ddf, conf->guid);
4265 return 0;
4266}
4267
c5943560 4268static void copy_matching_bvd(struct ddf_super *ddf,
4269 struct vd_config *conf,
4270 const struct metadata_update *update)
4271{
4272 unsigned int mppe =
4273 __be16_to_cpu(ddf->anchor.max_primary_element_entries);
4274 unsigned int len = ddf->conf_rec_len * 512;
4275 char *p;
4276 struct vd_config *vc;
4277 for (p = update->buf; p < update->buf + update->len; p += len) {
4278 vc = (struct vd_config *) p;
4279 if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
4280 memcpy(conf->phys_refnum, vc->phys_refnum,
4281 mppe * (sizeof(__u32) + sizeof(__u64)));
4282 return;
4283 }
4284 }
4285 pr_err("%s: no match for BVD %d of %s in update\n", __func__,
4286 conf->sec_elmnt_seq, guid_str(conf->guid));
4287}
4288
88c164f4
NB
4289static void ddf_process_update(struct supertype *st,
4290 struct metadata_update *update)
4291{
4292 /* Apply this update to the metadata.
4293 * The first 4 bytes are a DDF_*_MAGIC which guides
4294 * our actions.
4295 * Possible update are:
4296 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
4297 * Add a new physical device or remove an old one.
4298 * Changes to this record only happen implicitly.
88c164f4
NB
4299 * used_pdes is the device number.
4300 * DDF_VIRT_RECORDS_MAGIC
4301 * Add a new VD. Possibly also change the 'access' bits.
4302 * populated_vdes is the entry number.
4303 * DDF_VD_CONF_MAGIC
4304 * New or updated VD. the VIRT_RECORD must already
4305 * exist. For an update, phys_refnum and lba_offset
4306 * (at least) are updated, and the VD_CONF must
4307 * be written to precisely those devices listed with
4308 * a phys_refnum.
4309 * DDF_SPARE_ASSIGN_MAGIC
4310 * replacement Spare Assignment Record... but for which device?
4311 *
4312 * So, e.g.:
4313 * - to create a new array, we send a VIRT_RECORD and
4314 * a VD_CONF. Then assemble and start the array.
4315 * - to activate a spare we send a VD_CONF to add the phys_refnum
4316 * and offset. This will also mark the spare as active with
4317 * a spare-assignment record.
4318 */
4319 struct ddf_super *ddf = st->sb;
4320 __u32 *magic = (__u32*)update->buf;
4321 struct phys_disk *pd;
4322 struct virtual_disk *vd;
4323 struct vd_config *vc;
4324 struct vcl *vcl;
4325 struct dl *dl;
f21e18ca 4326 unsigned int ent;
c5943560 4327 unsigned int pdnum, pd2, len;
88c164f4 4328
2c514b71 4329 dprintf("Process update %x\n", *magic);
7e1432fb 4330
88c164f4
NB
4331 switch (*magic) {
4332 case DDF_PHYS_RECORDS_MAGIC:
4333
4334 if (update->len != (sizeof(struct phys_disk) +
4335 sizeof(struct phys_disk_entry)))
4336 return;
4337 pd = (struct phys_disk*)update->buf;
4338
4339 ent = __be16_to_cpu(pd->used_pdes);
4340 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4341 return;
4dd968cc
N
4342 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4343 struct dl **dlp;
4344 /* removing this disk. */
4345 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4346 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4347 struct dl *dl = *dlp;
4348 if (dl->pdnum == (signed)ent) {
4349 close(dl->fd);
4350 dl->fd = -1;
4351 /* FIXME this doesn't free
4352 * dl->devname */
4353 update->space = dl;
4354 *dlp = dl->next;
4355 break;
4356 }
4357 }
7d5a7ff3 4358 ddf_set_updates_pending(ddf);
4dd968cc
N
4359 return;
4360 }
88c164f4
NB
4361 if (!all_ff(ddf->phys->entries[ent].guid))
4362 return;
4363 ddf->phys->entries[ent] = pd->entries[0];
4364 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 4365 __be16_to_cpu(ddf->phys->used_pdes));
7d5a7ff3 4366 ddf_set_updates_pending(ddf);
2cc2983d
N
4367 if (ddf->add_list) {
4368 struct active_array *a;
4369 struct dl *al = ddf->add_list;
4370 ddf->add_list = al->next;
4371
4372 al->next = ddf->dlist;
4373 ddf->dlist = al;
4374
4375 /* As a device has been added, we should check
4376 * for any degraded devices that might make
4377 * use of this spare */
4378 for (a = st->arrays ; a; a=a->next)
4379 a->check_degraded = 1;
4380 }
88c164f4
NB
4381 break;
4382
4383 case DDF_VIRT_RECORDS_MAGIC:
4384
4385 if (update->len != (sizeof(struct virtual_disk) +
4386 sizeof(struct virtual_entry)))
4387 return;
4388 vd = (struct virtual_disk*)update->buf;
4389
f646805e 4390 if (vd->entries[0].state == DDF_state_deleted) {
4391 if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
4392 return;
4393 } else {
4394
6a7e7ecc 4395 ent = find_vde_by_guid(ddf, vd->entries[0].guid);
4396 if (ent != DDF_NOTFOUND) {
4397 dprintf("%s: VD %s exists already in slot %d\n",
4398 __func__, guid_str(vd->entries[0].guid),
4399 ent);
4400 return;
4401 }
f646805e 4402 ent = find_unused_vde(ddf);
4403 if (ent == DDF_NOTFOUND)
4404 return;
4405 ddf->virt->entries[ent] = vd->entries[0];
4406 ddf->virt->populated_vdes =
4407 __cpu_to_be16(
4408 1 + __be16_to_cpu(
4409 ddf->virt->populated_vdes));
ed5ff7a2 4410 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4411 __func__, guid_str(vd->entries[0].guid), ent,
4412 ddf->virt->entries[ent].state,
4413 ddf->virt->entries[ent].init_state);
f646805e 4414 }
7d5a7ff3 4415 ddf_set_updates_pending(ddf);
88c164f4
NB
4416 break;
4417
4418 case DDF_VD_CONF_MAGIC:
88c164f4 4419 vc = (struct vd_config*)update->buf;
c5943560 4420 len = ddf->conf_rec_len * 512;
4421 if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
4422 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4423 __func__, guid_str(vc->guid), update->len,
4424 vc->sec_elmnt_count);
4425 return;
4426 }
88c164f4
NB
4427 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4428 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4429 break;
ed5ff7a2 4430 dprintf("%s: conf update for %s (%s)\n", __func__,
4431 guid_str(vc->guid), (vcl ? "old" : "new"));
88c164f4
NB
4432 if (vcl) {
4433 /* An update, just copy the phys_refnum and lba_offset
4434 * fields
4435 */
c5943560 4436 unsigned int i;
4437 copy_matching_bvd(ddf, &vcl->conf, update);
4438 for (i = 1; i < vc->sec_elmnt_count; i++)
4439 copy_matching_bvd(ddf, vcl->other_bvds[i-1],
4440 update);
88c164f4
NB
4441 } else {
4442 /* A new VD_CONF */
c5943560 4443 unsigned int i;
e6b9548d
DW
4444 if (!update->space)
4445 return;
88c164f4
NB
4446 vcl = update->space;
4447 update->space = NULL;
4448 vcl->next = ddf->conflist;
c5943560 4449 memcpy(&vcl->conf, vc, len);
fb9d0acb 4450 ent = find_vde_by_guid(ddf, vc->guid);
4451 if (ent == DDF_NOTFOUND)
4452 return;
4453 vcl->vcnum = ent;
88c164f4 4454 ddf->conflist = vcl;
c5943560 4455 for (i = 1; i < vc->sec_elmnt_count; i++)
4456 memcpy(vcl->other_bvds[i-1],
4457 update->buf + len * i, len);
88c164f4 4458 }
c7079c84
N
4459 /* Set DDF_Transition on all Failed devices - to help
4460 * us detect those that are no longer in use
4461 */
4462 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4463 if (ddf->phys->entries[pdnum].state
4464 & __be16_to_cpu(DDF_Failed))
4465 ddf->phys->entries[pdnum].state
4466 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
4467 /* Now make sure vlist is correct for each dl. */
4468 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca 4469 unsigned int vn = 0;
8401644c 4470 int in_degraded = 0;
5838fccd 4471 for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
4472 unsigned int dn, ibvd;
4473 const struct vd_config *conf;
4474 int vstate;
4475 dn = get_pd_index_from_refnum(vcl,
4476 dl->disk.refnum,
4477 ddf->mppe,
4478 &conf, &ibvd);
4479 if (dn == DDF_NOTFOUND)
4480 continue;
4481 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
ad60eea1 4482 dl->pdnum,
4483 __be32_to_cpu(dl->disk.refnum),
5838fccd 4484 guid_str(conf->guid),
4485 conf->sec_elmnt_seq, vn);
4486 /* Clear the Transition flag */
4487 if (ddf->phys->entries[dl->pdnum].state
4488 & __be16_to_cpu(DDF_Failed))
4489 ddf->phys->entries[dl->pdnum].state &=
4490 ~__be16_to_cpu(DDF_Transition);
4491 dl->vlist[vn++] = vcl;
4492 vstate = ddf->virt->entries[vcl->vcnum].state
4493 & DDF_state_mask;
4494 if (vstate == DDF_state_degraded ||
4495 vstate == DDF_state_part_optimal)
4496 in_degraded = 1;
4497 }
88c164f4
NB
4498 while (vn < ddf->max_part)
4499 dl->vlist[vn++] = NULL;
7e1432fb
NB
4500 if (dl->vlist[0]) {
4501 ddf->phys->entries[dl->pdnum].type &=
4502 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
4503 if (!(ddf->phys->entries[dl->pdnum].type &
4504 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
4505 ddf->phys->entries[dl->pdnum].type |=
4506 __cpu_to_be16(DDF_Active_in_VD);
4507 if (in_degraded)
4508 ddf->phys->entries[dl->pdnum].state |=
4509 __cpu_to_be16(DDF_Rebuilding);
4510 }
7e1432fb
NB
4511 }
4512 if (dl->spare) {
4513 ddf->phys->entries[dl->pdnum].type &=
4514 ~__cpu_to_be16(DDF_Global_Spare);
4515 ddf->phys->entries[dl->pdnum].type |=
4516 __cpu_to_be16(DDF_Spare);
4517 }
4518 if (!dl->vlist[0] && !dl->spare) {
4519 ddf->phys->entries[dl->pdnum].type |=
4520 __cpu_to_be16(DDF_Global_Spare);
4521 ddf->phys->entries[dl->pdnum].type &=
4522 ~__cpu_to_be16(DDF_Spare |
4523 DDF_Active_in_VD);
4524 }
88c164f4 4525 }
c7079c84
N
4526
4527 /* Now remove any 'Failed' devices that are not part
4528 * of any VD. They will have the Transition flag set.
4529 * Once done, we need to update all dl->pdnum numbers.
4530 */
4531 pd2 = 0;
4532 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4533 if ((ddf->phys->entries[pdnum].state
4534 & __be16_to_cpu(DDF_Failed))
4535 && (ddf->phys->entries[pdnum].state
4536 & __be16_to_cpu(DDF_Transition)))
4537 /* skip this one */;
4538 else if (pdnum == pd2)
4539 pd2++;
4540 else {
4541 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4542 for (dl = ddf->dlist; dl; dl = dl->next)
4543 if (dl->pdnum == (int)pdnum)
4544 dl->pdnum = pd2;
4545 pd2++;
4546 }
4547 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4548 while (pd2 < pdnum) {
4549 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4550 pd2++;
4551 }
4552
7d5a7ff3 4553 ddf_set_updates_pending(ddf);
88c164f4
NB
4554 break;
4555 case DDF_SPARE_ASSIGN_MAGIC:
4556 default: break;
4557 }
4558}
4559
edd8d13c
NB
4560static void ddf_prepare_update(struct supertype *st,
4561 struct metadata_update *update)
4562{
4563 /* This update arrived at managemon.
4564 * We are about to pass it to monitor.
4565 * If a malloc is needed, do it here.
4566 */
4567 struct ddf_super *ddf = st->sb;
4568 __u32 *magic = (__u32*)update->buf;
c5943560 4569 if (*magic == DDF_VD_CONF_MAGIC) {
4570 struct vcl *vcl;
4571 struct vd_config *conf = (struct vd_config *) update->buf;
e6b9548d 4572 if (posix_memalign(&update->space, 512,
613b0d17 4573 offsetof(struct vcl, conf)
c5943560 4574 + ddf->conf_rec_len * 512) != 0) {
4575 update->space = NULL;
4576 return;
4577 }
4578 vcl = update->space;
4579 vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
4580 if (alloc_other_bvds(ddf, vcl) != 0) {
4581 free(update->space);
e6b9548d 4582 update->space = NULL;
c5943560 4583 }
4584 }
edd8d13c
NB
4585}
4586
7e1432fb
NB
4587/*
4588 * Check if the array 'a' is degraded but not failed.
4589 * If it is, find as many spares as are available and needed and
4590 * arrange for their inclusion.
4591 * We only choose devices which are not already in the array,
4592 * and prefer those with a spare-assignment to this array.
4593 * otherwise we choose global spares - assuming always that
4594 * there is enough room.
4595 * For each spare that we assign, we return an 'mdinfo' which
4596 * describes the position for the device in the array.
4597 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4598 * the new phys_refnum and lba_offset values.
4599 *
4600 * Only worry about BVDs at the moment.
4601 */
4602static struct mdinfo *ddf_activate_spare(struct active_array *a,
4603 struct metadata_update **updates)
4604{
4605 int working = 0;
4606 struct mdinfo *d;
4607 struct ddf_super *ddf = a->container->sb;
4608 int global_ok = 0;
4609 struct mdinfo *rv = NULL;
4610 struct mdinfo *di;
4611 struct metadata_update *mu;
4612 struct dl *dl;
4613 int i;
baba3f4e 4614 struct vcl *vcl;
7e1432fb 4615 struct vd_config *vc;
baba3f4e 4616 unsigned int n_bvd;
7e1432fb 4617
7e1432fb
NB
4618 for (d = a->info.devs ; d ; d = d->next) {
4619 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4620 d->state_fd >= 0)
7e1432fb
NB
4621 /* wait for Removal to happen */
4622 return NULL;
4623 if (d->state_fd >= 0)
4624 working ++;
4625 }
4626
2c514b71
NB
4627 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4628 a->info.array.level);
7e1432fb
NB
4629 if (working == a->info.array.raid_disks)
4630 return NULL; /* array not degraded */
4631 switch (a->info.array.level) {
4632 case 1:
4633 if (working == 0)
4634 return NULL; /* failed */
4635 break;
4636 case 4:
4637 case 5:
4638 if (working < a->info.array.raid_disks - 1)
4639 return NULL; /* failed */
4640 break;
4641 case 6:
4642 if (working < a->info.array.raid_disks - 2)
4643 return NULL; /* failed */
4644 break;
4645 default: /* concat or stripe */
4646 return NULL; /* failed */
4647 }
4648
4649 /* For each slot, if it is not working, find a spare */
4650 dl = ddf->dlist;
4651 for (i = 0; i < a->info.array.raid_disks; i++) {
4652 for (d = a->info.devs ; d ; d = d->next)
4653 if (d->disk.raid_disk == i)
4654 break;
2c514b71 4655 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4656 if (d && (d->state_fd >= 0))
4657 continue;
4658
4659 /* OK, this device needs recovery. Find a spare */
4660 again:
4661 for ( ; dl ; dl = dl->next) {
4662 unsigned long long esize;
4663 unsigned long long pos;
4664 struct mdinfo *d2;
4665 int is_global = 0;
4666 int is_dedicated = 0;
4667 struct extent *ex;
f21e18ca 4668 unsigned int j;
7e1432fb
NB
4669 /* If in this array, skip */
4670 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4671 if (d2->state_fd >= 0 &&
4672 d2->disk.major == dl->major &&
7e1432fb 4673 d2->disk.minor == dl->minor) {
2c514b71 4674 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4675 break;
4676 }
4677 if (d2)
4678 continue;
4679 if (ddf->phys->entries[dl->pdnum].type &
4680 __cpu_to_be16(DDF_Spare)) {
4681 /* Check spare assign record */
4682 if (dl->spare) {
4683 if (dl->spare->type & DDF_spare_dedicated) {
4684 /* check spare_ents for guid */
4685 for (j = 0 ;
4686 j < __be16_to_cpu(dl->spare->populated);
4687 j++) {
4688 if (memcmp(dl->spare->spare_ents[j].guid,
4689 ddf->virt->entries[a->info.container_member].guid,
4690 DDF_GUID_LEN) == 0)
4691 is_dedicated = 1;
4692 }
4693 } else
4694 is_global = 1;
4695 }
4696 } else if (ddf->phys->entries[dl->pdnum].type &
4697 __cpu_to_be16(DDF_Global_Spare)) {
4698 is_global = 1;
e0e7aeaa
N
4699 } else if (!(ddf->phys->entries[dl->pdnum].state &
4700 __cpu_to_be16(DDF_Failed))) {
4701 /* we can possibly use some of this */
4702 is_global = 1;
7e1432fb
NB
4703 }
4704 if ( ! (is_dedicated ||
4705 (is_global && global_ok))) {
2c514b71 4706 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4707 is_dedicated, is_global);
7e1432fb
NB
4708 continue;
4709 }
4710
4711 /* We are allowed to use this device - is there space?
4712 * We need a->info.component_size sectors */
4713 ex = get_extents(ddf, dl);
4714 if (!ex) {
2c514b71 4715 dprintf("cannot get extents\n");
7e1432fb
NB
4716 continue;
4717 }
4718 j = 0; pos = 0;
4719 esize = 0;
4720
4721 do {
4722 esize = ex[j].start - pos;
4723 if (esize >= a->info.component_size)
4724 break;
e5cc7d46
N
4725 pos = ex[j].start + ex[j].size;
4726 j++;
4727 } while (ex[j-1].size);
7e1432fb
NB
4728
4729 free(ex);
4730 if (esize < a->info.component_size) {
e5cc7d46
N
4731 dprintf("%x:%x has no room: %llu %llu\n",
4732 dl->major, dl->minor,
2c514b71 4733 esize, a->info.component_size);
7e1432fb
NB
4734 /* No room */
4735 continue;
4736 }
4737
4738 /* Cool, we have a device with some space at pos */
503975b9 4739 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4740 di->disk.number = i;
4741 di->disk.raid_disk = i;
4742 di->disk.major = dl->major;
4743 di->disk.minor = dl->minor;
4744 di->disk.state = 0;
d23534e4 4745 di->recovery_start = 0;
7e1432fb
NB
4746 di->data_offset = pos;
4747 di->component_size = a->info.component_size;
4748 di->container_member = dl->pdnum;
4749 di->next = rv;
4750 rv = di;
2c514b71
NB
4751 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4752 i, pos);
7e1432fb
NB
4753
4754 break;
4755 }
4756 if (!dl && ! global_ok) {
4757 /* not enough dedicated spares, try global */
4758 global_ok = 1;
4759 dl = ddf->dlist;
4760 goto again;
4761 }
4762 }
4763
4764 if (!rv)
4765 /* No spares found */
4766 return rv;
4767 /* Now 'rv' has a list of devices to return.
4768 * Create a metadata_update record to update the
4769 * phys_refnum and lba_offset values
4770 */
503975b9
N
4771 mu = xmalloc(sizeof(*mu));
4772 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4773 free(mu);
4774 mu = NULL;
4775 }
503975b9 4776 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4777 mu->len = ddf->conf_rec_len * 512;
4778 mu->space = NULL;
f50ae22e 4779 mu->space_list = NULL;
7e1432fb 4780 mu->next = *updates;
baba3f4e 4781 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4782 &n_bvd, &vcl);
7e1432fb
NB
4783 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4784
4785 vc = (struct vd_config*)mu->buf;
7e1432fb
NB
4786 for (di = rv ; di ; di = di->next) {
4787 vc->phys_refnum[di->disk.raid_disk] =
4788 ddf->phys->entries[dl->pdnum].refnum;
57a66662 4789 LBA_OFFSET(ddf, vc)[di->disk.raid_disk]
4790 = __cpu_to_be64(di->data_offset);
7e1432fb
NB
4791 }
4792 *updates = mu;
4793 return rv;
4794}
0e600426 4795#endif /* MDASSEMBLE */
7e1432fb 4796
b640a252
N
4797static int ddf_level_to_layout(int level)
4798{
4799 switch(level) {
4800 case 0:
4801 case 1:
4802 return 0;
4803 case 5:
4804 return ALGORITHM_LEFT_SYMMETRIC;
4805 case 6:
4806 return ALGORITHM_ROTATING_N_CONTINUE;
4807 case 10:
4808 return 0x102;
4809 default:
4810 return UnSet;
4811 }
4812}
4813
30f58b22
DW
4814static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4815{
4816 if (level && *level == UnSet)
4817 *level = LEVEL_CONTAINER;
4818
4819 if (level && layout && *layout == UnSet)
4820 *layout = ddf_level_to_layout(*level);
4821}
4822
a322f70c
DW
4823struct superswitch super_ddf = {
4824#ifndef MDASSEMBLE
4825 .examine_super = examine_super_ddf,
4826 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4827 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4828 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4829 .detail_super = detail_super_ddf,
4830 .brief_detail_super = brief_detail_super_ddf,
4831 .validate_geometry = validate_geometry_ddf,
78e44928 4832 .write_init_super = write_init_super_ddf,
0e600426 4833 .add_to_super = add_to_super_ddf,
4dd968cc 4834 .remove_from_super = remove_from_super_ddf,
2b959fbf 4835 .load_container = load_container_ddf,
74db60b0 4836 .copy_metadata = copy_metadata_ddf,
a322f70c
DW
4837#endif
4838 .match_home = match_home_ddf,
4839 .uuid_from_super= uuid_from_super_ddf,
4840 .getinfo_super = getinfo_super_ddf,
4841 .update_super = update_super_ddf,
4842
4843 .avail_size = avail_size_ddf,
4844
a19c88b8
NB
4845 .compare_super = compare_super_ddf,
4846
a322f70c 4847 .load_super = load_super_ddf,
ba7eb04f 4848 .init_super = init_super_ddf,
955e9ea1 4849 .store_super = store_super_ddf,
a322f70c
DW
4850 .free_super = free_super_ddf,
4851 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4852 .container_content = container_content_ddf,
30f58b22 4853 .default_geometry = default_geometry_ddf,
f646805e 4854 .kill_subarray = kill_subarray_ddf,
a322f70c 4855
a322f70c 4856 .external = 1,
549e9569 4857
0e600426 4858#ifndef MDASSEMBLE
549e9569
NB
4859/* for mdmon */
4860 .open_new = ddf_open_new,
ed9d66aa 4861 .set_array_state= ddf_set_array_state,
549e9569
NB
4862 .set_disk = ddf_set_disk,
4863 .sync_metadata = ddf_sync_metadata,
88c164f4 4864 .process_update = ddf_process_update,
edd8d13c 4865 .prepare_update = ddf_prepare_update,
7e1432fb 4866 .activate_spare = ddf_activate_spare,
0e600426 4867#endif
4cce4069 4868 .name = "ddf",
a322f70c 4869};