]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-ddf.c
DDF: layout_md2ddf: new md->DDF layout conversion
[thirdparty/mdadm.git] / super-ddf.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28 #define HAVE_STDINT_H 1
29 #include "mdadm.h"
30 #include "mdmon.h"
31 #include "sha1.h"
32 #include <values.h>
33
34 /* a non-official T10 name for creation GUIDs */
35 static char T10[] = "Linux-MD";
36
37 /* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41 #define DECADE (3600*24*(365*10+2))
42 unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
47 #define DDF_NOTFOUND (~0U)
48 #define DDF_CONTAINER (DDF_NOTFOUND-1)
49
50 /* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61 /* Primary Raid Level (PRL) */
62 #define DDF_RAID0 0x00
63 #define DDF_RAID1 0x01
64 #define DDF_RAID3 0x03
65 #define DDF_RAID4 0x04
66 #define DDF_RAID5 0x05
67 #define DDF_RAID1E 0x11
68 #define DDF_JBOD 0x0f
69 #define DDF_CONCAT 0x1f
70 #define DDF_RAID5E 0x15
71 #define DDF_RAID5EE 0x25
72 #define DDF_RAID6 0x06
73
74 /* Raid Level Qualifier (RLQ) */
75 #define DDF_RAID0_SIMPLE 0x00
76 #define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77 #define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78 #define DDF_RAID3_0 0x00 /* parity in first extent */
79 #define DDF_RAID3_N 0x01 /* parity in last extent */
80 #define DDF_RAID4_0 0x00 /* parity in first extent */
81 #define DDF_RAID4_N 0x01 /* parity in last extent */
82 /* these apply to raid5e and raid5ee as well */
83 #define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
84 #define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
85 #define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86 #define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
87
88 #define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89 #define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
90
91 /* Secondary RAID Level (SRL) */
92 #define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93 #define DDF_2MIRRORED 0x01
94 #define DDF_2CONCAT 0x02
95 #define DDF_2SPANNED 0x03 /* This is also weird - be careful */
96
97 /* Magic numbers */
98 #define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99 #define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100 #define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101 #define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102 #define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103 #define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104 #define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105 #define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106 #define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107 #define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
108
109 #define DDF_GUID_LEN 24
110 #define DDF_REVISION_0 "01.00.00"
111 #define DDF_REVISION_2 "01.02.00"
112
113 struct ddf_header {
114 __u32 magic; /* DDF_HEADER_MAGIC */
115 __u32 crc;
116 char guid[DDF_GUID_LEN];
117 char revision[8]; /* 01.02.00 */
118 __u32 seq; /* starts at '1' */
119 __u32 timestamp;
120 __u8 openflag;
121 __u8 foreignflag;
122 __u8 enforcegroups;
123 __u8 pad0; /* 0xff */
124 __u8 pad1[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext[32]; /* reserved: fill with 0xff */
127 __u64 primary_lba;
128 __u64 secondary_lba;
129 __u8 type;
130 __u8 pad2[3]; /* 0xff */
131 __u32 workspace_len; /* sectors for vendor space -
132 * at least 32768(sectors) */
133 __u64 workspace_lba;
134 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
139 *12/512) */
140 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset;
144 __u32 controller_section_length;
145 __u32 phys_section_offset;
146 __u32 phys_section_length;
147 __u32 virt_section_offset;
148 __u32 virt_section_length;
149 __u32 config_section_offset;
150 __u32 config_section_length;
151 __u32 data_section_offset;
152 __u32 data_section_length;
153 __u32 bbm_section_offset;
154 __u32 bbm_section_length;
155 __u32 diag_space_offset;
156 __u32 diag_space_length;
157 __u32 vendor_offset;
158 __u32 vendor_length;
159 /* 256 bytes so far */
160 __u8 pad4[256]; /* 0xff */
161 };
162
163 /* type field */
164 #define DDF_HEADER_ANCHOR 0x00
165 #define DDF_HEADER_PRIMARY 0x01
166 #define DDF_HEADER_SECONDARY 0x02
167
168 /* The content of the 'controller section' - global scope */
169 struct ddf_controller_data {
170 __u32 magic; /* DDF_CONTROLLER_MAGIC */
171 __u32 crc;
172 char guid[DDF_GUID_LEN];
173 struct controller_type {
174 __u16 vendor_id;
175 __u16 device_id;
176 __u16 sub_vendor_id;
177 __u16 sub_device_id;
178 } type;
179 char product_id[16];
180 __u8 pad[8]; /* 0xff */
181 __u8 vendor_data[448];
182 };
183
184 /* The content of phys_section - global scope */
185 struct phys_disk {
186 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
187 __u32 crc;
188 __u16 used_pdes;
189 __u16 max_pdes;
190 __u8 pad[52];
191 struct phys_disk_entry {
192 char guid[DDF_GUID_LEN];
193 __u32 refnum;
194 __u16 type;
195 __u16 state;
196 __u64 config_size; /* DDF structures must be after here */
197 char path[18]; /* another horrible structure really */
198 __u8 pad[6];
199 } entries[0];
200 };
201
202 /* phys_disk_entry.type is a bitmap - bigendian remember */
203 #define DDF_Forced_PD_GUID 1
204 #define DDF_Active_in_VD 2
205 #define DDF_Global_Spare 4 /* VD_CONF records are ignored */
206 #define DDF_Spare 8 /* overrides Global_spare */
207 #define DDF_Foreign 16
208 #define DDF_Legacy 32 /* no DDF on this device */
209
210 #define DDF_Interface_mask 0xf00
211 #define DDF_Interface_SCSI 0x100
212 #define DDF_Interface_SAS 0x200
213 #define DDF_Interface_SATA 0x300
214 #define DDF_Interface_FC 0x400
215
216 /* phys_disk_entry.state is a bigendian bitmap */
217 #define DDF_Online 1
218 #define DDF_Failed 2 /* overrides 1,4,8 */
219 #define DDF_Rebuilding 4
220 #define DDF_Transition 8
221 #define DDF_SMART 16
222 #define DDF_ReadErrors 32
223 #define DDF_Missing 64
224
225 /* The content of the virt_section global scope */
226 struct virtual_disk {
227 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
228 __u32 crc;
229 __u16 populated_vdes;
230 __u16 max_vdes;
231 __u8 pad[52];
232 struct virtual_entry {
233 char guid[DDF_GUID_LEN];
234 __u16 unit;
235 __u16 pad0; /* 0xffff */
236 __u16 guid_crc;
237 __u16 type;
238 __u8 state;
239 __u8 init_state;
240 __u8 pad1[14];
241 char name[16];
242 } entries[0];
243 };
244
245 /* virtual_entry.type is a bitmap - bigendian */
246 #define DDF_Shared 1
247 #define DDF_Enforce_Groups 2
248 #define DDF_Unicode 4
249 #define DDF_Owner_Valid 8
250
251 /* virtual_entry.state is a bigendian bitmap */
252 #define DDF_state_mask 0x7
253 #define DDF_state_optimal 0x0
254 #define DDF_state_degraded 0x1
255 #define DDF_state_deleted 0x2
256 #define DDF_state_missing 0x3
257 #define DDF_state_failed 0x4
258 #define DDF_state_part_optimal 0x5
259
260 #define DDF_state_morphing 0x8
261 #define DDF_state_inconsistent 0x10
262
263 /* virtual_entry.init_state is a bigendian bitmap */
264 #define DDF_initstate_mask 0x03
265 #define DDF_init_not 0x00
266 #define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
268 #define DDF_init_full 0x02
269
270 #define DDF_access_mask 0xc0
271 #define DDF_access_rw 0x00
272 #define DDF_access_ro 0x80
273 #define DDF_access_blocked 0xc0
274
275 /* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
278 */
279
280 struct vd_config {
281 __u32 magic; /* DDF_VD_CONF_MAGIC */
282 __u32 crc;
283 char guid[DDF_GUID_LEN];
284 __u32 timestamp;
285 __u32 seqnum;
286 __u8 pad0[24];
287 __u16 prim_elmnt_count;
288 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
289 __u8 prl;
290 __u8 rlq;
291 __u8 sec_elmnt_count;
292 __u8 sec_elmnt_seq;
293 __u8 srl;
294 __u64 blocks; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks; /* blocks in array */
298 __u8 pad1[8];
299 __u32 spare_refs[8];
300 __u8 cache_pol[8];
301 __u8 bg_rate;
302 __u8 pad2[3];
303 __u8 pad3[52];
304 __u8 pad4[192];
305 __u8 v0[32]; /* reserved- 0xff */
306 __u8 v1[32]; /* reserved- 0xff */
307 __u8 v2[16]; /* reserved- 0xff */
308 __u8 v3[16]; /* reserved- 0xff */
309 __u8 vendor[32];
310 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
313 };
314
315 /* vd_config.cache_pol[7] is a bitmap */
316 #define DDF_cache_writeback 1 /* else writethrough */
317 #define DDF_cache_wadaptive 2 /* only applies if writeback */
318 #define DDF_cache_readahead 4
319 #define DDF_cache_radaptive 8 /* only if doing read-ahead */
320 #define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
321 #define DDF_cache_wallowed 32 /* enable write caching */
322 #define DDF_cache_rallowed 64 /* enable read caching */
323
324 struct spare_assign {
325 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
326 __u32 crc;
327 __u32 timestamp;
328 __u8 reserved[7];
329 __u8 type;
330 __u16 populated; /* SAEs used */
331 __u16 max; /* max SAEs */
332 __u8 pad[8];
333 struct spare_assign_entry {
334 char guid[DDF_GUID_LEN];
335 __u16 secondary_element;
336 __u8 pad[6];
337 } spare_ents[0];
338 };
339 /* spare_assign.type is a bitmap */
340 #define DDF_spare_dedicated 0x1 /* else global */
341 #define DDF_spare_revertible 0x2 /* else committable */
342 #define DDF_spare_active 0x4 /* else not active */
343 #define DDF_spare_affinity 0x8 /* enclosure affinity */
344
345 /* The data_section contents - local scope */
346 struct disk_data {
347 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
348 __u32 crc;
349 char guid[DDF_GUID_LEN];
350 __u32 refnum; /* crc of some magic drive data ... */
351 __u8 forced_ref; /* set when above was not result of magic */
352 __u8 forced_guid; /* set if guid was forced rather than magic */
353 __u8 vendor[32];
354 __u8 pad[442];
355 };
356
357 /* bbm_section content */
358 struct bad_block_log {
359 __u32 magic;
360 __u32 crc;
361 __u16 entry_count;
362 __u32 spare_count;
363 __u8 pad[10];
364 __u64 first_spare;
365 struct mapped_block {
366 __u64 defective_start;
367 __u32 replacement_start;
368 __u16 remap_count;
369 __u8 pad[2];
370 } entries[0];
371 };
372
373 /* Struct for internally holding ddf structures */
374 /* The DDF structure stored on each device is potentially
375 * quite different, as some data is global and some is local.
376 * The global data is:
377 * - ddf header
378 * - controller_data
379 * - Physical disk records
380 * - Virtual disk records
381 * The local data is:
382 * - Configuration records
383 * - Physical Disk data section
384 * ( and Bad block and vendor which I don't care about yet).
385 *
386 * The local data is parsed into separate lists as it is read
387 * and reconstructed for writing. This means that we only need
388 * to make config changes once and they are automatically
389 * propagated to all devices.
390 * Note that the ddf_super has space of the conf and disk data
391 * for this disk and also for a list of all such data.
392 * The list is only used for the superblock that is being
393 * built in Create or Assemble to describe the whole array.
394 */
395 struct ddf_super {
396 struct ddf_header anchor, primary, secondary;
397 struct ddf_controller_data controller;
398 struct ddf_header *active;
399 struct phys_disk *phys;
400 struct virtual_disk *virt;
401 int pdsize, vdsize;
402 unsigned int max_part, mppe, conf_rec_len;
403 int currentdev;
404 int updates_pending;
405 struct vcl {
406 union {
407 char space[512];
408 struct {
409 struct vcl *next;
410 __u64 *lba_offset; /* location in 'conf' of
411 * the lba table */
412 unsigned int vcnum; /* index into ->virt */
413 struct vd_config **other_bvds;
414 __u64 *block_sizes; /* NULL if all the same */
415 };
416 };
417 struct vd_config conf;
418 } *conflist, *currentconf;
419 struct dl {
420 union {
421 char space[512];
422 struct {
423 struct dl *next;
424 int major, minor;
425 char *devname;
426 int fd;
427 unsigned long long size; /* sectors */
428 unsigned long long primary_lba; /* sectors */
429 unsigned long long secondary_lba; /* sectors */
430 unsigned long long workspace_lba; /* sectors */
431 int pdnum; /* index in ->phys */
432 struct spare_assign *spare;
433 void *mdupdate; /* hold metadata update */
434
435 /* These fields used by auto-layout */
436 int raiddisk; /* slot to fill in autolayout */
437 __u64 esize;
438 };
439 };
440 struct disk_data disk;
441 struct vcl *vlist[0]; /* max_part in size */
442 } *dlist, *add_list;
443 };
444
445 #ifndef offsetof
446 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
447 #endif
448
449 #if DEBUG
450 static int all_ff(const char *guid);
451 static void pr_state(struct ddf_super *ddf, const char *msg)
452 {
453 unsigned int i;
454 dprintf("%s/%s: ", __func__, msg);
455 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
456 if (all_ff(ddf->virt->entries[i].guid))
457 continue;
458 dprintf("%u(s=%02x i=%02x) ", i,
459 ddf->virt->entries[i].state,
460 ddf->virt->entries[i].init_state);
461 }
462 dprintf("\n");
463 }
464 #else
465 static void pr_state(const struct ddf_super *ddf, const char *msg) {}
466 #endif
467
468 #define ddf_set_updates_pending(x) \
469 do { (x)->updates_pending = 1; pr_state(x, __func__); } while (0)
470
471 static unsigned int calc_crc(void *buf, int len)
472 {
473 /* crcs are always at the same place as in the ddf_header */
474 struct ddf_header *ddf = buf;
475 __u32 oldcrc = ddf->crc;
476 __u32 newcrc;
477 ddf->crc = 0xffffffff;
478
479 newcrc = crc32(0, buf, len);
480 ddf->crc = oldcrc;
481 /* The crc is store (like everything) bigendian, so convert
482 * here for simplicity
483 */
484 return __cpu_to_be32(newcrc);
485 }
486
487 #define DDF_INVALID_LEVEL 0xff
488 #define DDF_NO_SECONDARY 0xff
489 static int err_bad_md_layout(const mdu_array_info_t *array)
490 {
491 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
492 array->level, array->layout, array->raid_disks);
493 return DDF_INVALID_LEVEL;
494 }
495
496 static int layout_md2ddf(const mdu_array_info_t *array,
497 struct vd_config *conf)
498 {
499 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
500 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
501 __u8 sec_elmnt_count = 1;
502 __u8 srl = DDF_NO_SECONDARY;
503
504 switch (array->level) {
505 case LEVEL_LINEAR:
506 prl = DDF_CONCAT;
507 break;
508 case 0:
509 rlq = DDF_RAID0_SIMPLE;
510 prl = DDF_RAID0;
511 break;
512 case 1:
513 switch (array->raid_disks) {
514 case 2:
515 rlq = DDF_RAID1_SIMPLE;
516 break;
517 case 3:
518 rlq = DDF_RAID1_MULTI;
519 break;
520 default:
521 return err_bad_md_layout(array);
522 }
523 prl = DDF_RAID1;
524 break;
525 case 4:
526 if (array->layout != 0)
527 return err_bad_md_layout(array);
528 rlq = DDF_RAID4_N;
529 prl = DDF_RAID4;
530 break;
531 case 5:
532 switch (array->layout) {
533 case ALGORITHM_LEFT_ASYMMETRIC:
534 rlq = DDF_RAID5_N_RESTART;
535 break;
536 case ALGORITHM_RIGHT_ASYMMETRIC:
537 rlq = DDF_RAID5_0_RESTART;
538 break;
539 case ALGORITHM_LEFT_SYMMETRIC:
540 rlq = DDF_RAID5_N_CONTINUE;
541 break;
542 case ALGORITHM_RIGHT_SYMMETRIC:
543 /* not mentioned in standard */
544 default:
545 return err_bad_md_layout(array);
546 }
547 prl = DDF_RAID5;
548 break;
549 case 6:
550 switch (array->layout) {
551 case ALGORITHM_ROTATING_N_RESTART:
552 rlq = DDF_RAID5_N_RESTART;
553 break;
554 case ALGORITHM_ROTATING_ZERO_RESTART:
555 rlq = DDF_RAID6_0_RESTART;
556 break;
557 case ALGORITHM_ROTATING_N_CONTINUE:
558 rlq = DDF_RAID5_N_CONTINUE;
559 break;
560 default:
561 return err_bad_md_layout(array);
562 }
563 prl = DDF_RAID6;
564 break;
565 case 10:
566 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
567 rlq = DDF_RAID1_SIMPLE;
568 prim_elmnt_count = __cpu_to_be16(2);
569 sec_elmnt_count = array->raid_disks / 2;
570 } else if (array->raid_disks % 3 == 0
571 && array->layout == 0x103) {
572 rlq = DDF_RAID1_MULTI;
573 prim_elmnt_count = __cpu_to_be16(3);
574 sec_elmnt_count = array->raid_disks / 3;
575 } else
576 return err_bad_md_layout(array);
577 srl = DDF_2SPANNED;
578 prl = DDF_RAID1;
579 break;
580 default:
581 return err_bad_md_layout(array);
582 }
583 conf->prl = prl;
584 conf->prim_elmnt_count = prim_elmnt_count;
585 conf->rlq = rlq;
586 conf->srl = srl;
587 conf->sec_elmnt_count = sec_elmnt_count;
588 return 0;
589 }
590
591 static int err_bad_ddf_layout(const struct vd_config *conf)
592 {
593 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
594 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
595 return -1;
596 }
597
598 static int layout_ddf2md(const struct vd_config *conf,
599 mdu_array_info_t *array)
600 {
601 int level = LEVEL_UNSUPPORTED;
602 int layout = 0;
603 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
604
605 if (conf->sec_elmnt_count > 1) {
606 /* see also check_secondary() */
607 if (conf->prl != DDF_RAID1 ||
608 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
609 pr_err("Unsupported secondary RAID level %u/%u\n",
610 conf->prl, conf->srl);
611 return -1;
612 }
613 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
614 layout = 0x102;
615 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
616 layout = 0x103;
617 else
618 return err_bad_ddf_layout(conf);
619 raiddisks *= conf->sec_elmnt_count;
620 level = 10;
621 goto good;
622 }
623
624 switch (conf->prl) {
625 case DDF_CONCAT:
626 level = LEVEL_LINEAR;
627 break;
628 case DDF_RAID0:
629 if (conf->rlq != DDF_RAID0_SIMPLE)
630 return err_bad_ddf_layout(conf);
631 level = 0;
632 break;
633 case DDF_RAID1:
634 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
635 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
636 return err_bad_ddf_layout(conf);
637 level = 1;
638 break;
639 case DDF_RAID4:
640 if (conf->rlq != DDF_RAID4_N)
641 return err_bad_ddf_layout(conf);
642 level = 4;
643 break;
644 case DDF_RAID5:
645 switch (conf->rlq) {
646 case DDF_RAID5_N_RESTART:
647 layout = ALGORITHM_LEFT_ASYMMETRIC;
648 break;
649 case DDF_RAID5_0_RESTART:
650 layout = ALGORITHM_RIGHT_ASYMMETRIC;
651 break;
652 case DDF_RAID5_N_CONTINUE:
653 layout = ALGORITHM_LEFT_SYMMETRIC;
654 break;
655 default:
656 return err_bad_ddf_layout(conf);
657 }
658 level = 5;
659 break;
660 case DDF_RAID6:
661 switch (conf->rlq) {
662 case DDF_RAID5_N_RESTART:
663 layout = ALGORITHM_ROTATING_N_RESTART;
664 break;
665 case DDF_RAID6_0_RESTART:
666 layout = ALGORITHM_ROTATING_ZERO_RESTART;
667 break;
668 case DDF_RAID5_N_CONTINUE:
669 layout = ALGORITHM_ROTATING_N_CONTINUE;
670 break;
671 default:
672 return err_bad_ddf_layout(conf);
673 }
674 level = 6;
675 break;
676 default:
677 return err_bad_ddf_layout(conf);
678 };
679
680 good:
681 array->level = level;
682 array->layout = layout;
683 array->raid_disks = raiddisks;
684 return 0;
685 }
686
687 static int load_ddf_header(int fd, unsigned long long lba,
688 unsigned long long size,
689 int type,
690 struct ddf_header *hdr, struct ddf_header *anchor)
691 {
692 /* read a ddf header (primary or secondary) from fd/lba
693 * and check that it is consistent with anchor
694 * Need to check:
695 * magic, crc, guid, rev, and LBA's header_type, and
696 * everything after header_type must be the same
697 */
698 if (lba >= size-1)
699 return 0;
700
701 if (lseek64(fd, lba<<9, 0) < 0)
702 return 0;
703
704 if (read(fd, hdr, 512) != 512)
705 return 0;
706
707 if (hdr->magic != DDF_HEADER_MAGIC)
708 return 0;
709 if (calc_crc(hdr, 512) != hdr->crc)
710 return 0;
711 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
712 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
713 anchor->primary_lba != hdr->primary_lba ||
714 anchor->secondary_lba != hdr->secondary_lba ||
715 hdr->type != type ||
716 memcmp(anchor->pad2, hdr->pad2, 512 -
717 offsetof(struct ddf_header, pad2)) != 0)
718 return 0;
719
720 /* Looks good enough to me... */
721 return 1;
722 }
723
724 static void *load_section(int fd, struct ddf_super *super, void *buf,
725 __u32 offset_be, __u32 len_be, int check)
726 {
727 unsigned long long offset = __be32_to_cpu(offset_be);
728 unsigned long long len = __be32_to_cpu(len_be);
729 int dofree = (buf == NULL);
730
731 if (check)
732 if (len != 2 && len != 8 && len != 32
733 && len != 128 && len != 512)
734 return NULL;
735
736 if (len > 1024)
737 return NULL;
738 if (buf) {
739 /* All pre-allocated sections are a single block */
740 if (len != 1)
741 return NULL;
742 } else if (posix_memalign(&buf, 512, len<<9) != 0)
743 buf = NULL;
744
745 if (!buf)
746 return NULL;
747
748 if (super->active->type == 1)
749 offset += __be64_to_cpu(super->active->primary_lba);
750 else
751 offset += __be64_to_cpu(super->active->secondary_lba);
752
753 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
754 if (dofree)
755 free(buf);
756 return NULL;
757 }
758 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
759 if (dofree)
760 free(buf);
761 return NULL;
762 }
763 return buf;
764 }
765
766 static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
767 {
768 unsigned long long dsize;
769
770 get_dev_size(fd, NULL, &dsize);
771
772 if (lseek64(fd, dsize-512, 0) < 0) {
773 if (devname)
774 pr_err("Cannot seek to anchor block on %s: %s\n",
775 devname, strerror(errno));
776 return 1;
777 }
778 if (read(fd, &super->anchor, 512) != 512) {
779 if (devname)
780 pr_err("Cannot read anchor block on %s: %s\n",
781 devname, strerror(errno));
782 return 1;
783 }
784 if (super->anchor.magic != DDF_HEADER_MAGIC) {
785 if (devname)
786 pr_err("no DDF anchor found on %s\n",
787 devname);
788 return 2;
789 }
790 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
791 if (devname)
792 pr_err("bad CRC on anchor on %s\n",
793 devname);
794 return 2;
795 }
796 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
797 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
798 if (devname)
799 pr_err("can only support super revision"
800 " %.8s and earlier, not %.8s on %s\n",
801 DDF_REVISION_2, super->anchor.revision,devname);
802 return 2;
803 }
804 super->active = NULL;
805 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
806 dsize >> 9, 1,
807 &super->primary, &super->anchor) == 0) {
808 if (devname)
809 pr_err("Failed to load primary DDF header "
810 "on %s\n", devname);
811 } else
812 super->active = &super->primary;
813 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
814 dsize >> 9, 2,
815 &super->secondary, &super->anchor)) {
816 if ((__be32_to_cpu(super->primary.seq)
817 < __be32_to_cpu(super->secondary.seq) &&
818 !super->secondary.openflag)
819 || (__be32_to_cpu(super->primary.seq)
820 == __be32_to_cpu(super->secondary.seq) &&
821 super->primary.openflag && !super->secondary.openflag)
822 || super->active == NULL
823 )
824 super->active = &super->secondary;
825 } else if (devname)
826 pr_err("Failed to load secondary DDF header on %s\n",
827 devname);
828 if (super->active == NULL)
829 return 2;
830 return 0;
831 }
832
833 static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
834 {
835 void *ok;
836 ok = load_section(fd, super, &super->controller,
837 super->active->controller_section_offset,
838 super->active->controller_section_length,
839 0);
840 super->phys = load_section(fd, super, NULL,
841 super->active->phys_section_offset,
842 super->active->phys_section_length,
843 1);
844 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
845
846 super->virt = load_section(fd, super, NULL,
847 super->active->virt_section_offset,
848 super->active->virt_section_length,
849 1);
850 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
851 if (!ok ||
852 !super->phys ||
853 !super->virt) {
854 free(super->phys);
855 free(super->virt);
856 super->phys = NULL;
857 super->virt = NULL;
858 return 2;
859 }
860 super->conflist = NULL;
861 super->dlist = NULL;
862
863 super->max_part = __be16_to_cpu(super->active->max_partitions);
864 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
865 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
866 return 0;
867 }
868
869 static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
870 unsigned int len)
871 {
872 int i;
873 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
874 if (vcl->other_bvds[i] != NULL &&
875 vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
876 break;
877
878 if (i < vcl->conf.sec_elmnt_count-1) {
879 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
880 return;
881 } else {
882 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
883 if (vcl->other_bvds[i] == NULL)
884 break;
885 if (i == vcl->conf.sec_elmnt_count-1) {
886 pr_err("no space for sec level config %u, count is %u\n",
887 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
888 return;
889 }
890 if (posix_memalign((void **)&vcl->other_bvds[i], 512, len)
891 != 0) {
892 pr_err("%s could not allocate vd buf\n", __func__);
893 return;
894 }
895 }
896 memcpy(vcl->other_bvds[i], vd, len);
897 }
898
899 static int load_ddf_local(int fd, struct ddf_super *super,
900 char *devname, int keep)
901 {
902 struct dl *dl;
903 struct stat stb;
904 char *conf;
905 unsigned int i;
906 unsigned int confsec;
907 int vnum;
908 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
909 unsigned long long dsize;
910
911 /* First the local disk info */
912 if (posix_memalign((void**)&dl, 512,
913 sizeof(*dl) +
914 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
915 pr_err("%s could not allocate disk info buffer\n",
916 __func__);
917 return 1;
918 }
919
920 load_section(fd, super, &dl->disk,
921 super->active->data_section_offset,
922 super->active->data_section_length,
923 0);
924 dl->devname = devname ? xstrdup(devname) : NULL;
925
926 fstat(fd, &stb);
927 dl->major = major(stb.st_rdev);
928 dl->minor = minor(stb.st_rdev);
929 dl->next = super->dlist;
930 dl->fd = keep ? fd : -1;
931
932 dl->size = 0;
933 if (get_dev_size(fd, devname, &dsize))
934 dl->size = dsize >> 9;
935 /* If the disks have different sizes, the LBAs will differ
936 * between phys disks.
937 * At this point here, the values in super->active must be valid
938 * for this phys disk. */
939 dl->primary_lba = super->active->primary_lba;
940 dl->secondary_lba = super->active->secondary_lba;
941 dl->workspace_lba = super->active->workspace_lba;
942 dl->spare = NULL;
943 for (i = 0 ; i < super->max_part ; i++)
944 dl->vlist[i] = NULL;
945 super->dlist = dl;
946 dl->pdnum = -1;
947 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
948 if (memcmp(super->phys->entries[i].guid,
949 dl->disk.guid, DDF_GUID_LEN) == 0)
950 dl->pdnum = i;
951
952 /* Now the config list. */
953 /* 'conf' is an array of config entries, some of which are
954 * probably invalid. Those which are good need to be copied into
955 * the conflist
956 */
957
958 conf = load_section(fd, super, NULL,
959 super->active->config_section_offset,
960 super->active->config_section_length,
961 0);
962
963 vnum = 0;
964 for (confsec = 0;
965 confsec < __be32_to_cpu(super->active->config_section_length);
966 confsec += super->conf_rec_len) {
967 struct vd_config *vd =
968 (struct vd_config *)((char*)conf + confsec*512);
969 struct vcl *vcl;
970
971 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
972 if (dl->spare)
973 continue;
974 if (posix_memalign((void**)&dl->spare, 512,
975 super->conf_rec_len*512) != 0) {
976 pr_err("%s could not allocate spare info buf\n",
977 __func__);
978 return 1;
979 }
980
981 memcpy(dl->spare, vd, super->conf_rec_len*512);
982 continue;
983 }
984 if (vd->magic != DDF_VD_CONF_MAGIC)
985 continue;
986 for (vcl = super->conflist; vcl; vcl = vcl->next) {
987 if (memcmp(vcl->conf.guid,
988 vd->guid, DDF_GUID_LEN) == 0)
989 break;
990 }
991
992 if (vcl) {
993 dl->vlist[vnum++] = vcl;
994 if (vcl->other_bvds != NULL &&
995 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
996 add_other_bvd(vcl, vd, super->conf_rec_len*512);
997 continue;
998 }
999 if (__be32_to_cpu(vd->seqnum) <=
1000 __be32_to_cpu(vcl->conf.seqnum))
1001 continue;
1002 } else {
1003 if (posix_memalign((void**)&vcl, 512,
1004 (super->conf_rec_len*512 +
1005 offsetof(struct vcl, conf))) != 0) {
1006 pr_err("%s could not allocate vcl buf\n",
1007 __func__);
1008 return 1;
1009 }
1010 vcl->next = super->conflist;
1011 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
1012 if (vd->sec_elmnt_count > 1)
1013 vcl->other_bvds =
1014 xcalloc(vd->sec_elmnt_count - 1,
1015 sizeof(struct vd_config *));
1016 else
1017 vcl->other_bvds = NULL;
1018 super->conflist = vcl;
1019 dl->vlist[vnum++] = vcl;
1020 }
1021 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
1022 vcl->lba_offset = (__u64*)
1023 &vcl->conf.phys_refnum[super->mppe];
1024
1025 for (i=0; i < max_virt_disks ; i++)
1026 if (memcmp(super->virt->entries[i].guid,
1027 vcl->conf.guid, DDF_GUID_LEN)==0)
1028 break;
1029 if (i < max_virt_disks)
1030 vcl->vcnum = i;
1031 }
1032 free(conf);
1033
1034 return 0;
1035 }
1036
1037 #ifndef MDASSEMBLE
1038 static int load_super_ddf_all(struct supertype *st, int fd,
1039 void **sbp, char *devname);
1040 #endif
1041
1042 static void free_super_ddf(struct supertype *st);
1043
1044 static int load_super_ddf(struct supertype *st, int fd,
1045 char *devname)
1046 {
1047 unsigned long long dsize;
1048 struct ddf_super *super;
1049 int rv;
1050
1051 if (get_dev_size(fd, devname, &dsize) == 0)
1052 return 1;
1053
1054 if (!st->ignore_hw_compat && test_partition(fd))
1055 /* DDF is not allowed on partitions */
1056 return 1;
1057
1058 /* 32M is a lower bound */
1059 if (dsize <= 32*1024*1024) {
1060 if (devname)
1061 pr_err("%s is too small for ddf: "
1062 "size is %llu sectors.\n",
1063 devname, dsize>>9);
1064 return 1;
1065 }
1066 if (dsize & 511) {
1067 if (devname)
1068 pr_err("%s is an odd size for ddf: "
1069 "size is %llu bytes.\n",
1070 devname, dsize);
1071 return 1;
1072 }
1073
1074 free_super_ddf(st);
1075
1076 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
1077 pr_err("malloc of %zu failed.\n",
1078 sizeof(*super));
1079 return 1;
1080 }
1081 memset(super, 0, sizeof(*super));
1082
1083 rv = load_ddf_headers(fd, super, devname);
1084 if (rv) {
1085 free(super);
1086 return rv;
1087 }
1088
1089 /* Have valid headers and have chosen the best. Let's read in the rest*/
1090
1091 rv = load_ddf_global(fd, super, devname);
1092
1093 if (rv) {
1094 if (devname)
1095 pr_err("Failed to load all information "
1096 "sections on %s\n", devname);
1097 free(super);
1098 return rv;
1099 }
1100
1101 rv = load_ddf_local(fd, super, devname, 0);
1102
1103 if (rv) {
1104 if (devname)
1105 pr_err("Failed to load all information "
1106 "sections on %s\n", devname);
1107 free(super);
1108 return rv;
1109 }
1110
1111 /* Should possibly check the sections .... */
1112
1113 st->sb = super;
1114 if (st->ss == NULL) {
1115 st->ss = &super_ddf;
1116 st->minor_version = 0;
1117 st->max_devs = 512;
1118 }
1119 return 0;
1120
1121 }
1122
1123 static void free_super_ddf(struct supertype *st)
1124 {
1125 struct ddf_super *ddf = st->sb;
1126 if (ddf == NULL)
1127 return;
1128 free(ddf->phys);
1129 free(ddf->virt);
1130 while (ddf->conflist) {
1131 struct vcl *v = ddf->conflist;
1132 ddf->conflist = v->next;
1133 if (v->block_sizes)
1134 free(v->block_sizes);
1135 if (v->other_bvds) {
1136 int i;
1137 for (i = 0; i < v->conf.sec_elmnt_count-1; i++)
1138 if (v->other_bvds[i] != NULL)
1139 free(v->other_bvds[i]);
1140 free(v->other_bvds);
1141 }
1142 free(v);
1143 }
1144 while (ddf->dlist) {
1145 struct dl *d = ddf->dlist;
1146 ddf->dlist = d->next;
1147 if (d->fd >= 0)
1148 close(d->fd);
1149 if (d->spare)
1150 free(d->spare);
1151 free(d);
1152 }
1153 while (ddf->add_list) {
1154 struct dl *d = ddf->add_list;
1155 ddf->add_list = d->next;
1156 if (d->fd >= 0)
1157 close(d->fd);
1158 if (d->spare)
1159 free(d->spare);
1160 free(d);
1161 }
1162 free(ddf);
1163 st->sb = NULL;
1164 }
1165
1166 static struct supertype *match_metadata_desc_ddf(char *arg)
1167 {
1168 /* 'ddf' only support containers */
1169 struct supertype *st;
1170 if (strcmp(arg, "ddf") != 0 &&
1171 strcmp(arg, "default") != 0
1172 )
1173 return NULL;
1174
1175 st = xcalloc(1, sizeof(*st));
1176 st->ss = &super_ddf;
1177 st->max_devs = 512;
1178 st->minor_version = 0;
1179 st->sb = NULL;
1180 return st;
1181 }
1182
1183 #ifndef MDASSEMBLE
1184
1185 static mapping_t ddf_state[] = {
1186 { "Optimal", 0},
1187 { "Degraded", 1},
1188 { "Deleted", 2},
1189 { "Missing", 3},
1190 { "Failed", 4},
1191 { "Partially Optimal", 5},
1192 { "-reserved-", 6},
1193 { "-reserved-", 7},
1194 { NULL, 0}
1195 };
1196
1197 static mapping_t ddf_init_state[] = {
1198 { "Not Initialised", 0},
1199 { "QuickInit in Progress", 1},
1200 { "Fully Initialised", 2},
1201 { "*UNKNOWN*", 3},
1202 { NULL, 0}
1203 };
1204 static mapping_t ddf_access[] = {
1205 { "Read/Write", 0},
1206 { "Reserved", 1},
1207 { "Read Only", 2},
1208 { "Blocked (no access)", 3},
1209 { NULL ,0}
1210 };
1211
1212 static mapping_t ddf_level[] = {
1213 { "RAID0", DDF_RAID0},
1214 { "RAID1", DDF_RAID1},
1215 { "RAID3", DDF_RAID3},
1216 { "RAID4", DDF_RAID4},
1217 { "RAID5", DDF_RAID5},
1218 { "RAID1E",DDF_RAID1E},
1219 { "JBOD", DDF_JBOD},
1220 { "CONCAT",DDF_CONCAT},
1221 { "RAID5E",DDF_RAID5E},
1222 { "RAID5EE",DDF_RAID5EE},
1223 { "RAID6", DDF_RAID6},
1224 { NULL, 0}
1225 };
1226 static mapping_t ddf_sec_level[] = {
1227 { "Striped", DDF_2STRIPED},
1228 { "Mirrored", DDF_2MIRRORED},
1229 { "Concat", DDF_2CONCAT},
1230 { "Spanned", DDF_2SPANNED},
1231 { NULL, 0}
1232 };
1233 #endif
1234
1235 static int all_ff(const char *guid)
1236 {
1237 int i;
1238 for (i = 0; i < DDF_GUID_LEN; i++)
1239 if (guid[i] != (char)0xff)
1240 return 0;
1241 return 1;
1242 }
1243
1244 #ifndef MDASSEMBLE
1245 static void print_guid(char *guid, int tstamp)
1246 {
1247 /* A GUIDs are part (or all) ASCII and part binary.
1248 * They tend to be space padded.
1249 * We print the GUID in HEX, then in parentheses add
1250 * any initial ASCII sequence, and a possible
1251 * time stamp from bytes 16-19
1252 */
1253 int l = DDF_GUID_LEN;
1254 int i;
1255
1256 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1257 if ((i&3)==0 && i != 0) printf(":");
1258 printf("%02X", guid[i]&255);
1259 }
1260
1261 printf("\n (");
1262 while (l && guid[l-1] == ' ')
1263 l--;
1264 for (i=0 ; i<l ; i++) {
1265 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1266 fputc(guid[i], stdout);
1267 else
1268 break;
1269 }
1270 if (tstamp) {
1271 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1272 char tbuf[100];
1273 struct tm *tm;
1274 tm = localtime(&then);
1275 strftime(tbuf, 100, " %D %T",tm);
1276 fputs(tbuf, stdout);
1277 }
1278 printf(")");
1279 }
1280
1281 static void examine_vd(int n, struct ddf_super *sb, char *guid)
1282 {
1283 int crl = sb->conf_rec_len;
1284 struct vcl *vcl;
1285
1286 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
1287 unsigned int i;
1288 struct vd_config *vc = &vcl->conf;
1289
1290 if (calc_crc(vc, crl*512) != vc->crc)
1291 continue;
1292 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1293 continue;
1294
1295 /* Ok, we know about this VD, let's give more details */
1296 printf(" Raid Devices[%d] : %d (", n,
1297 __be16_to_cpu(vc->prim_elmnt_count));
1298 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
1299 int j;
1300 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1301 for (j=0; j<cnt; j++)
1302 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1303 break;
1304 if (i) printf(" ");
1305 if (j < cnt)
1306 printf("%d", j);
1307 else
1308 printf("--");
1309 }
1310 printf(")\n");
1311 if (vc->chunk_shift != 255)
1312 printf(" Chunk Size[%d] : %d sectors\n", n,
1313 1 << vc->chunk_shift);
1314 printf(" Raid Level[%d] : %s\n", n,
1315 map_num(ddf_level, vc->prl)?:"-unknown-");
1316 if (vc->sec_elmnt_count != 1) {
1317 printf(" Secondary Position[%d] : %d of %d\n", n,
1318 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1319 printf(" Secondary Level[%d] : %s\n", n,
1320 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1321 }
1322 printf(" Device Size[%d] : %llu\n", n,
1323 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
1324 printf(" Array Size[%d] : %llu\n", n,
1325 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
1326 }
1327 }
1328
1329 static void examine_vds(struct ddf_super *sb)
1330 {
1331 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
1332 unsigned int i;
1333 printf(" Virtual Disks : %d\n", cnt);
1334
1335 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
1336 struct virtual_entry *ve = &sb->virt->entries[i];
1337 if (all_ff(ve->guid))
1338 continue;
1339 printf("\n");
1340 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1341 printf("\n");
1342 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1343 printf(" state[%d] : %s, %s%s\n", i,
1344 map_num(ddf_state, ve->state & 7),
1345 (ve->state & 8) ? "Morphing, ": "",
1346 (ve->state & 16)? "Not Consistent" : "Consistent");
1347 printf(" init state[%d] : %s\n", i,
1348 map_num(ddf_init_state, ve->init_state&3));
1349 printf(" access[%d] : %s\n", i,
1350 map_num(ddf_access, (ve->init_state>>6) & 3));
1351 printf(" Name[%d] : %.16s\n", i, ve->name);
1352 examine_vd(i, sb, ve->guid);
1353 }
1354 if (cnt) printf("\n");
1355 }
1356
1357 static void examine_pds(struct ddf_super *sb)
1358 {
1359 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1360 int i;
1361 struct dl *dl;
1362 printf(" Physical Disks : %d\n", cnt);
1363 printf(" Number RefNo Size Device Type/State\n");
1364
1365 for (i=0 ; i<cnt ; i++) {
1366 struct phys_disk_entry *pd = &sb->phys->entries[i];
1367 int type = __be16_to_cpu(pd->type);
1368 int state = __be16_to_cpu(pd->state);
1369
1370 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1371 //printf("\n");
1372 printf(" %3d %08x ", i,
1373 __be32_to_cpu(pd->refnum));
1374 printf("%8lluK ",
1375 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
1376 for (dl = sb->dlist; dl ; dl = dl->next) {
1377 if (dl->disk.refnum == pd->refnum) {
1378 char *dv = map_dev(dl->major, dl->minor, 0);
1379 if (dv) {
1380 printf("%-15s", dv);
1381 break;
1382 }
1383 }
1384 }
1385 if (!dl)
1386 printf("%15s","");
1387 printf(" %s%s%s%s%s",
1388 (type&2) ? "active":"",
1389 (type&4) ? "Global-Spare":"",
1390 (type&8) ? "spare" : "",
1391 (type&16)? ", foreign" : "",
1392 (type&32)? "pass-through" : "");
1393 if (state & DDF_Failed)
1394 /* This over-rides these three */
1395 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
1396 printf("/%s%s%s%s%s%s%s",
1397 (state&1)? "Online": "Offline",
1398 (state&2)? ", Failed": "",
1399 (state&4)? ", Rebuilding": "",
1400 (state&8)? ", in-transition": "",
1401 (state&16)? ", SMART-errors": "",
1402 (state&32)? ", Unrecovered-Read-Errors": "",
1403 (state&64)? ", Missing" : "");
1404 printf("\n");
1405 }
1406 }
1407
1408 static void examine_super_ddf(struct supertype *st, char *homehost)
1409 {
1410 struct ddf_super *sb = st->sb;
1411
1412 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1413 printf(" Version : %.8s\n", sb->anchor.revision);
1414 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1415 printf("\n");
1416 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
1417 printf("\n");
1418 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1419 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1420 ?"yes" : "no");
1421 examine_vds(sb);
1422 examine_pds(sb);
1423 }
1424
1425 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
1426
1427 static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
1428 static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
1429
1430 static unsigned int get_vd_num_of_subarray(struct supertype *st)
1431 {
1432 /*
1433 * Figure out the VD number for this supertype.
1434 * Returns DDF_CONTAINER for the container itself,
1435 * and DDF_NOTFOUND on error.
1436 */
1437 struct ddf_super *ddf = st->sb;
1438 struct mdinfo *sra;
1439 char *sub, *end;
1440 unsigned int vcnum;
1441
1442 if (*st->container_devnm == '\0')
1443 return DDF_CONTAINER;
1444
1445 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1446 if (!sra || sra->array.major_version != -1 ||
1447 sra->array.minor_version != -2 ||
1448 !is_subarray(sra->text_version))
1449 return DDF_NOTFOUND;
1450
1451 sub = strchr(sra->text_version + 1, '/');
1452 if (sub != NULL)
1453 vcnum = strtoul(sub + 1, &end, 10);
1454 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1455 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1456 return DDF_NOTFOUND;
1457
1458 return vcnum;
1459 }
1460
1461 static void brief_examine_super_ddf(struct supertype *st, int verbose)
1462 {
1463 /* We just write a generic DDF ARRAY entry
1464 */
1465 struct mdinfo info;
1466 char nbuf[64];
1467 getinfo_super_ddf(st, &info, NULL);
1468 fname_from_uuid(st, &info, nbuf, ':');
1469
1470 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1471 }
1472
1473 static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
1474 {
1475 /* We just write a generic DDF ARRAY entry
1476 */
1477 struct ddf_super *ddf = st->sb;
1478 struct mdinfo info;
1479 unsigned int i;
1480 char nbuf[64];
1481 getinfo_super_ddf(st, &info, NULL);
1482 fname_from_uuid(st, &info, nbuf, ':');
1483
1484 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1485 struct virtual_entry *ve = &ddf->virt->entries[i];
1486 struct vcl vcl;
1487 char nbuf1[64];
1488 if (all_ff(ve->guid))
1489 continue;
1490 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1491 ddf->currentconf =&vcl;
1492 uuid_from_super_ddf(st, info.uuid);
1493 fname_from_uuid(st, &info, nbuf1, ':');
1494 printf("ARRAY container=%s member=%d UUID=%s\n",
1495 nbuf+5, i, nbuf1+5);
1496 }
1497 }
1498
1499 static void export_examine_super_ddf(struct supertype *st)
1500 {
1501 struct mdinfo info;
1502 char nbuf[64];
1503 getinfo_super_ddf(st, &info, NULL);
1504 fname_from_uuid(st, &info, nbuf, ':');
1505 printf("MD_METADATA=ddf\n");
1506 printf("MD_LEVEL=container\n");
1507 printf("MD_UUID=%s\n", nbuf+5);
1508 }
1509
1510 static int copy_metadata_ddf(struct supertype *st, int from, int to)
1511 {
1512 void *buf;
1513 unsigned long long dsize, offset;
1514 int bytes;
1515 struct ddf_header *ddf;
1516 int written = 0;
1517
1518 /* The meta consists of an anchor, a primary, and a secondary.
1519 * This all lives at the end of the device.
1520 * So it is easiest to find the earliest of primary and
1521 * secondary, and copy everything from there.
1522 *
1523 * Anchor is 512 from end It contains primary_lba and secondary_lba
1524 * we choose one of those
1525 */
1526
1527 if (posix_memalign(&buf, 4096, 4096) != 0)
1528 return 1;
1529
1530 if (!get_dev_size(from, NULL, &dsize))
1531 goto err;
1532
1533 if (lseek64(from, dsize-512, 0) < 0)
1534 goto err;
1535 if (read(from, buf, 512) != 512)
1536 goto err;
1537 ddf = buf;
1538 if (ddf->magic != DDF_HEADER_MAGIC ||
1539 calc_crc(ddf, 512) != ddf->crc ||
1540 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1541 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1542 goto err;
1543
1544 offset = dsize - 512;
1545 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1546 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1547 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1548 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1549
1550 bytes = dsize - offset;
1551
1552 if (lseek64(from, offset, 0) < 0 ||
1553 lseek64(to, offset, 0) < 0)
1554 goto err;
1555 while (written < bytes) {
1556 int n = bytes - written;
1557 if (n > 4096)
1558 n = 4096;
1559 if (read(from, buf, n) != n)
1560 goto err;
1561 if (write(to, buf, n) != n)
1562 goto err;
1563 written += n;
1564 }
1565 free(buf);
1566 return 0;
1567 err:
1568 free(buf);
1569 return 1;
1570 }
1571
1572 static void detail_super_ddf(struct supertype *st, char *homehost)
1573 {
1574 /* FIXME later
1575 * Could print DDF GUID
1576 * Need to find which array
1577 * If whole, briefly list all arrays
1578 * If one, give name
1579 */
1580 }
1581
1582 static void brief_detail_super_ddf(struct supertype *st)
1583 {
1584 struct mdinfo info;
1585 char nbuf[64];
1586 struct ddf_super *ddf = st->sb;
1587 unsigned int vcnum = get_vd_num_of_subarray(st);
1588 if (vcnum == DDF_CONTAINER)
1589 uuid_from_super_ddf(st, info.uuid);
1590 else if (vcnum == DDF_NOTFOUND)
1591 return;
1592 else
1593 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
1594 fname_from_uuid(st, &info, nbuf,':');
1595 printf(" UUID=%s", nbuf + 5);
1596 }
1597 #endif
1598
1599 static int match_home_ddf(struct supertype *st, char *homehost)
1600 {
1601 /* It matches 'this' host if the controller is a
1602 * Linux-MD controller with vendor_data matching
1603 * the hostname
1604 */
1605 struct ddf_super *ddf = st->sb;
1606 unsigned int len;
1607
1608 if (!homehost)
1609 return 0;
1610 len = strlen(homehost);
1611
1612 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1613 len < sizeof(ddf->controller.vendor_data) &&
1614 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1615 ddf->controller.vendor_data[len] == 0);
1616 }
1617
1618 #ifndef MDASSEMBLE
1619 static int find_index_in_bvd(const struct ddf_super *ddf,
1620 const struct vd_config *conf, unsigned int n,
1621 unsigned int *n_bvd)
1622 {
1623 /*
1624 * Find the index of the n-th valid physical disk in this BVD
1625 */
1626 unsigned int i, j;
1627 for (i = 0, j = 0; i < ddf->mppe &&
1628 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1629 if (conf->phys_refnum[i] != 0xffffffff) {
1630 if (n == j) {
1631 *n_bvd = i;
1632 return 1;
1633 }
1634 j++;
1635 }
1636 }
1637 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1638 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1639 return 0;
1640 }
1641
1642 static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1643 unsigned int n,
1644 unsigned int *n_bvd, struct vcl **vcl)
1645 {
1646 struct vcl *v;
1647
1648 for (v = ddf->conflist; v; v = v->next) {
1649 unsigned int nsec, ibvd;
1650 struct vd_config *conf;
1651 if (inst != v->vcnum)
1652 continue;
1653 conf = &v->conf;
1654 if (conf->sec_elmnt_count == 1) {
1655 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1656 *vcl = v;
1657 return conf;
1658 } else
1659 goto bad;
1660 }
1661 if (v->other_bvds == NULL) {
1662 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1663 __func__, conf->sec_elmnt_count);
1664 goto bad;
1665 }
1666 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1667 if (conf->sec_elmnt_seq != nsec) {
1668 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
1669 if (v->other_bvds[ibvd-1] == NULL)
1670 continue;
1671 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1672 == nsec)
1673 break;
1674 }
1675 if (ibvd == conf->sec_elmnt_count)
1676 goto bad;
1677 conf = v->other_bvds[ibvd-1];
1678 }
1679 if (!find_index_in_bvd(ddf, conf,
1680 n - nsec*conf->sec_elmnt_count, n_bvd))
1681 goto bad;
1682 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1683 , __func__, n, *n_bvd, ibvd-1, inst);
1684 *vcl = v;
1685 return conf;
1686 }
1687 bad:
1688 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
1689 return NULL;
1690 }
1691 #endif
1692
1693 static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
1694 {
1695 /* Find the entry in phys_disk which has the given refnum
1696 * and return it's index
1697 */
1698 unsigned int i;
1699 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
1700 if (ddf->phys->entries[i].refnum == phys_refnum)
1701 return i;
1702 return -1;
1703 }
1704
1705 static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1706 {
1707 char buf[20];
1708 struct sha1_ctx ctx;
1709 sha1_init_ctx(&ctx);
1710 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1711 sha1_finish_ctx(&ctx, buf);
1712 memcpy(uuid, buf, 4*4);
1713 }
1714
1715 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1716 {
1717 /* The uuid returned here is used for:
1718 * uuid to put into bitmap file (Create, Grow)
1719 * uuid for backup header when saving critical section (Grow)
1720 * comparing uuids when re-adding a device into an array
1721 * In these cases the uuid required is that of the data-array,
1722 * not the device-set.
1723 * uuid to recognise same set when adding a missing device back
1724 * to an array. This is a uuid for the device-set.
1725 *
1726 * For each of these we can make do with a truncated
1727 * or hashed uuid rather than the original, as long as
1728 * everyone agrees.
1729 * In the case of SVD we assume the BVD is of interest,
1730 * though that might be the case if a bitmap were made for
1731 * a mirrored SVD - worry about that later.
1732 * So we need to find the VD configuration record for the
1733 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1734 * The first 16 bytes of the sha1 of these is used.
1735 */
1736 struct ddf_super *ddf = st->sb;
1737 struct vcl *vcl = ddf->currentconf;
1738 char *guid;
1739
1740 if (vcl)
1741 guid = vcl->conf.guid;
1742 else
1743 guid = ddf->anchor.guid;
1744 uuid_from_ddf_guid(guid, uuid);
1745 }
1746
1747 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
1748
1749 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
1750 {
1751 struct ddf_super *ddf = st->sb;
1752 int map_disks = info->array.raid_disks;
1753 __u32 *cptr;
1754
1755 if (ddf->currentconf) {
1756 getinfo_super_ddf_bvd(st, info, map);
1757 return;
1758 }
1759 memset(info, 0, sizeof(*info));
1760
1761 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1762 info->array.level = LEVEL_CONTAINER;
1763 info->array.layout = 0;
1764 info->array.md_minor = -1;
1765 cptr = (__u32 *)(ddf->anchor.guid + 16);
1766 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1767
1768 info->array.utime = 0;
1769 info->array.chunk_size = 0;
1770 info->container_enough = 1;
1771
1772 info->disk.major = 0;
1773 info->disk.minor = 0;
1774 if (ddf->dlist) {
1775 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
1776 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
1777
1778 info->data_offset = __be64_to_cpu(ddf->phys->
1779 entries[info->disk.raid_disk].
1780 config_size);
1781 info->component_size = ddf->dlist->size - info->data_offset;
1782 } else {
1783 info->disk.number = -1;
1784 info->disk.raid_disk = -1;
1785 // info->disk.raid_disk = find refnum in the table and use index;
1786 }
1787 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
1788
1789 info->recovery_start = MaxSector;
1790 info->reshape_active = 0;
1791 info->recovery_blocked = 0;
1792 info->name[0] = 0;
1793
1794 info->array.major_version = -1;
1795 info->array.minor_version = -2;
1796 strcpy(info->text_version, "ddf");
1797 info->safe_mode_delay = 0;
1798
1799 uuid_from_super_ddf(st, info->uuid);
1800
1801 if (map) {
1802 int i;
1803 for (i = 0 ; i < map_disks; i++) {
1804 if (i < info->array.raid_disks &&
1805 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1806 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1807 map[i] = 1;
1808 else
1809 map[i] = 0;
1810 }
1811 }
1812 }
1813
1814 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
1815 {
1816 struct ddf_super *ddf = st->sb;
1817 struct vcl *vc = ddf->currentconf;
1818 int cd = ddf->currentdev;
1819 int j;
1820 struct dl *dl;
1821 int map_disks = info->array.raid_disks;
1822 __u32 *cptr;
1823
1824 memset(info, 0, sizeof(*info));
1825 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1826 return;
1827 info->array.md_minor = -1;
1828 cptr = (__u32 *)(vc->conf.guid + 16);
1829 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1830 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1831 info->array.chunk_size = 512 << vc->conf.chunk_shift;
1832 info->custom_array_size = 0;
1833
1834 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
1835 info->data_offset = __be64_to_cpu(vc->lba_offset[cd]);
1836 if (vc->block_sizes)
1837 info->component_size = vc->block_sizes[cd];
1838 else
1839 info->component_size = __be64_to_cpu(vc->conf.blocks);
1840 }
1841
1842 for (dl = ddf->dlist; dl ; dl = dl->next)
1843 if (dl->raiddisk == ddf->currentdev)
1844 break;
1845
1846 info->disk.major = 0;
1847 info->disk.minor = 0;
1848 info->disk.state = 0;
1849 if (dl) {
1850 info->disk.major = dl->major;
1851 info->disk.minor = dl->minor;
1852 info->disk.raid_disk = dl->raiddisk;
1853 info->disk.number = dl->pdnum;
1854 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
1855 }
1856
1857 info->container_member = ddf->currentconf->vcnum;
1858
1859 info->recovery_start = MaxSector;
1860 info->resync_start = 0;
1861 info->reshape_active = 0;
1862 info->recovery_blocked = 0;
1863 if (!(ddf->virt->entries[info->container_member].state
1864 & DDF_state_inconsistent) &&
1865 (ddf->virt->entries[info->container_member].init_state
1866 & DDF_initstate_mask)
1867 == DDF_init_full)
1868 info->resync_start = MaxSector;
1869
1870 uuid_from_super_ddf(st, info->uuid);
1871
1872 info->array.major_version = -1;
1873 info->array.minor_version = -2;
1874 sprintf(info->text_version, "/%s/%d",
1875 st->container_devnm,
1876 info->container_member);
1877 info->safe_mode_delay = 200;
1878
1879 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1880 info->name[16]=0;
1881 for(j=0; j<16; j++)
1882 if (info->name[j] == ' ')
1883 info->name[j] = 0;
1884
1885 if (map)
1886 for (j = 0; j < map_disks; j++) {
1887 map[j] = 0;
1888 if (j < info->array.raid_disks) {
1889 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
1890 if (i >= 0 &&
1891 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1892 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1893 map[i] = 1;
1894 }
1895 }
1896 }
1897
1898 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1899 char *update,
1900 char *devname, int verbose,
1901 int uuid_set, char *homehost)
1902 {
1903 /* For 'assemble' and 'force' we need to return non-zero if any
1904 * change was made. For others, the return value is ignored.
1905 * Update options are:
1906 * force-one : This device looks a bit old but needs to be included,
1907 * update age info appropriately.
1908 * assemble: clear any 'faulty' flag to allow this device to
1909 * be assembled.
1910 * force-array: Array is degraded but being forced, mark it clean
1911 * if that will be needed to assemble it.
1912 *
1913 * newdev: not used ????
1914 * grow: Array has gained a new device - this is currently for
1915 * linear only
1916 * resync: mark as dirty so a resync will happen.
1917 * uuid: Change the uuid of the array to match what is given
1918 * homehost: update the recorded homehost
1919 * name: update the name - preserving the homehost
1920 * _reshape_progress: record new reshape_progress position.
1921 *
1922 * Following are not relevant for this version:
1923 * sparc2.2 : update from old dodgey metadata
1924 * super-minor: change the preferred_minor number
1925 * summaries: update redundant counters.
1926 */
1927 int rv = 0;
1928 // struct ddf_super *ddf = st->sb;
1929 // struct vd_config *vd = find_vdcr(ddf, info->container_member);
1930 // struct virtual_entry *ve = find_ve(ddf);
1931
1932 /* we don't need to handle "force-*" or "assemble" as
1933 * there is no need to 'trick' the kernel. We the metadata is
1934 * first updated to activate the array, all the implied modifications
1935 * will just happen.
1936 */
1937
1938 if (strcmp(update, "grow") == 0) {
1939 /* FIXME */
1940 } else if (strcmp(update, "resync") == 0) {
1941 // info->resync_checkpoint = 0;
1942 } else if (strcmp(update, "homehost") == 0) {
1943 /* homehost is stored in controller->vendor_data,
1944 * or it is when we are the vendor
1945 */
1946 // if (info->vendor_is_local)
1947 // strcpy(ddf->controller.vendor_data, homehost);
1948 rv = -1;
1949 } else if (strcmp(update, "name") == 0) {
1950 /* name is stored in virtual_entry->name */
1951 // memset(ve->name, ' ', 16);
1952 // strncpy(ve->name, info->name, 16);
1953 rv = -1;
1954 } else if (strcmp(update, "_reshape_progress") == 0) {
1955 /* We don't support reshape yet */
1956 } else if (strcmp(update, "assemble") == 0 ) {
1957 /* Do nothing, just succeed */
1958 rv = 0;
1959 } else
1960 rv = -1;
1961
1962 // update_all_csum(ddf);
1963
1964 return rv;
1965 }
1966
1967 static void make_header_guid(char *guid)
1968 {
1969 __u32 stamp;
1970 /* Create a DDF Header of Virtual Disk GUID */
1971
1972 /* 24 bytes of fiction required.
1973 * first 8 are a 'vendor-id' - "Linux-MD"
1974 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
1975 * Remaining 8 random number plus timestamp
1976 */
1977 memcpy(guid, T10, sizeof(T10));
1978 stamp = __cpu_to_be32(0xdeadbeef);
1979 memcpy(guid+8, &stamp, 4);
1980 stamp = __cpu_to_be32(0);
1981 memcpy(guid+12, &stamp, 4);
1982 stamp = __cpu_to_be32(time(0) - DECADE);
1983 memcpy(guid+16, &stamp, 4);
1984 stamp = random32();
1985 memcpy(guid+20, &stamp, 4);
1986 }
1987
1988 static unsigned int find_unused_vde(const struct ddf_super *ddf)
1989 {
1990 unsigned int i;
1991 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1992 if (all_ff(ddf->virt->entries[i].guid))
1993 return i;
1994 }
1995 return DDF_NOTFOUND;
1996 }
1997
1998 static unsigned int find_vde_by_name(const struct ddf_super *ddf,
1999 const char *name)
2000 {
2001 unsigned int i;
2002 if (name == NULL)
2003 return DDF_NOTFOUND;
2004 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2005 if (all_ff(ddf->virt->entries[i].guid))
2006 continue;
2007 if (!strncmp(name, ddf->virt->entries[i].name,
2008 sizeof(ddf->virt->entries[i].name)))
2009 return i;
2010 }
2011 return DDF_NOTFOUND;
2012 }
2013
2014 static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2015 const char *guid)
2016 {
2017 unsigned int i;
2018 if (guid == NULL || all_ff(guid))
2019 return DDF_NOTFOUND;
2020 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2021 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2022 return i;
2023 return DDF_NOTFOUND;
2024 }
2025
2026 static int init_super_ddf_bvd(struct supertype *st,
2027 mdu_array_info_t *info,
2028 unsigned long long size,
2029 char *name, char *homehost,
2030 int *uuid, unsigned long long data_offset);
2031
2032 static int init_super_ddf(struct supertype *st,
2033 mdu_array_info_t *info,
2034 unsigned long long size, char *name, char *homehost,
2035 int *uuid, unsigned long long data_offset)
2036 {
2037 /* This is primarily called by Create when creating a new array.
2038 * We will then get add_to_super called for each component, and then
2039 * write_init_super called to write it out to each device.
2040 * For DDF, Create can create on fresh devices or on a pre-existing
2041 * array.
2042 * To create on a pre-existing array a different method will be called.
2043 * This one is just for fresh drives.
2044 *
2045 * We need to create the entire 'ddf' structure which includes:
2046 * DDF headers - these are easy.
2047 * Controller data - a Sector describing this controller .. not that
2048 * this is a controller exactly.
2049 * Physical Disk Record - one entry per device, so
2050 * leave plenty of space.
2051 * Virtual Disk Records - again, just leave plenty of space.
2052 * This just lists VDs, doesn't give details
2053 * Config records - describes the VDs that use this disk
2054 * DiskData - describes 'this' device.
2055 * BadBlockManagement - empty
2056 * Diag Space - empty
2057 * Vendor Logs - Could we put bitmaps here?
2058 *
2059 */
2060 struct ddf_super *ddf;
2061 char hostname[17];
2062 int hostlen;
2063 int max_phys_disks, max_virt_disks;
2064 unsigned long long sector;
2065 int clen;
2066 int i;
2067 int pdsize, vdsize;
2068 struct phys_disk *pd;
2069 struct virtual_disk *vd;
2070
2071 if (data_offset != INVALID_SECTORS) {
2072 pr_err("data-offset not supported by DDF\n");
2073 return 0;
2074 }
2075
2076 if (st->sb)
2077 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2078 data_offset);
2079
2080 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
2081 pr_err("%s could not allocate superblock\n", __func__);
2082 return 0;
2083 }
2084 memset(ddf, 0, sizeof(*ddf));
2085 ddf->dlist = NULL; /* no physical disks yet */
2086 ddf->conflist = NULL; /* No virtual disks yet */
2087 st->sb = ddf;
2088
2089 if (info == NULL) {
2090 /* zeroing superblock */
2091 return 0;
2092 }
2093
2094 /* At least 32MB *must* be reserved for the ddf. So let's just
2095 * start 32MB from the end, and put the primary header there.
2096 * Don't do secondary for now.
2097 * We don't know exactly where that will be yet as it could be
2098 * different on each device. To just set up the lengths.
2099 *
2100 */
2101
2102 ddf->anchor.magic = DDF_HEADER_MAGIC;
2103 make_header_guid(ddf->anchor.guid);
2104
2105 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
2106 ddf->anchor.seq = __cpu_to_be32(1);
2107 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2108 ddf->anchor.openflag = 0xFF;
2109 ddf->anchor.foreignflag = 0;
2110 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2111 ddf->anchor.pad0 = 0xff;
2112 memset(ddf->anchor.pad1, 0xff, 12);
2113 memset(ddf->anchor.header_ext, 0xff, 32);
2114 ddf->anchor.primary_lba = ~(__u64)0;
2115 ddf->anchor.secondary_lba = ~(__u64)0;
2116 ddf->anchor.type = DDF_HEADER_ANCHOR;
2117 memset(ddf->anchor.pad2, 0xff, 3);
2118 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2119 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2120 of 32M reserved.. */
2121 max_phys_disks = 1023; /* Should be enough */
2122 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2123 max_virt_disks = 255;
2124 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2125 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2126 ddf->max_part = 64;
2127 ddf->mppe = 256;
2128 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2129 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2130 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
2131 memset(ddf->anchor.pad3, 0xff, 54);
2132 /* controller sections is one sector long immediately
2133 * after the ddf header */
2134 sector = 1;
2135 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2136 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2137 sector += 1;
2138
2139 /* phys is 8 sectors after that */
2140 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2141 sizeof(struct phys_disk_entry)*max_phys_disks,
2142 512);
2143 switch(pdsize/512) {
2144 case 2: case 8: case 32: case 128: case 512: break;
2145 default: abort();
2146 }
2147 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2148 ddf->anchor.phys_section_length =
2149 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2150 sector += pdsize/512;
2151
2152 /* virt is another 32 sectors */
2153 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2154 sizeof(struct virtual_entry) * max_virt_disks,
2155 512);
2156 switch(vdsize/512) {
2157 case 2: case 8: case 32: case 128: case 512: break;
2158 default: abort();
2159 }
2160 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2161 ddf->anchor.virt_section_length =
2162 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2163 sector += vdsize/512;
2164
2165 clen = ddf->conf_rec_len * (ddf->max_part+1);
2166 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2167 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2168 sector += clen;
2169
2170 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2171 ddf->anchor.data_section_length = __cpu_to_be32(1);
2172 sector += 1;
2173
2174 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2175 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2176 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2177 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2178 ddf->anchor.vendor_length = __cpu_to_be32(0);
2179 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2180
2181 memset(ddf->anchor.pad4, 0xff, 256);
2182
2183 memcpy(&ddf->primary, &ddf->anchor, 512);
2184 memcpy(&ddf->secondary, &ddf->anchor, 512);
2185
2186 ddf->primary.openflag = 1; /* I guess.. */
2187 ddf->primary.type = DDF_HEADER_PRIMARY;
2188
2189 ddf->secondary.openflag = 1; /* I guess.. */
2190 ddf->secondary.type = DDF_HEADER_SECONDARY;
2191
2192 ddf->active = &ddf->primary;
2193
2194 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2195
2196 /* 24 more bytes of fiction required.
2197 * first 8 are a 'vendor-id' - "Linux-MD"
2198 * Remaining 16 are serial number.... maybe a hostname would do?
2199 */
2200 memcpy(ddf->controller.guid, T10, sizeof(T10));
2201 gethostname(hostname, sizeof(hostname));
2202 hostname[sizeof(hostname) - 1] = 0;
2203 hostlen = strlen(hostname);
2204 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2205 for (i = strlen(T10) ; i+hostlen < 24; i++)
2206 ddf->controller.guid[i] = ' ';
2207
2208 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2209 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2210 ddf->controller.type.sub_vendor_id = 0;
2211 ddf->controller.type.sub_device_id = 0;
2212 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2213 memset(ddf->controller.pad, 0xff, 8);
2214 memset(ddf->controller.vendor_data, 0xff, 448);
2215 if (homehost && strlen(homehost) < 440)
2216 strcpy((char*)ddf->controller.vendor_data, homehost);
2217
2218 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
2219 pr_err("%s could not allocate pd\n", __func__);
2220 return 0;
2221 }
2222 ddf->phys = pd;
2223 ddf->pdsize = pdsize;
2224
2225 memset(pd, 0xff, pdsize);
2226 memset(pd, 0, sizeof(*pd));
2227 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2228 pd->used_pdes = __cpu_to_be16(0);
2229 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2230 memset(pd->pad, 0xff, 52);
2231
2232 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
2233 pr_err("%s could not allocate vd\n", __func__);
2234 return 0;
2235 }
2236 ddf->virt = vd;
2237 ddf->vdsize = vdsize;
2238 memset(vd, 0, vdsize);
2239 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2240 vd->populated_vdes = __cpu_to_be16(0);
2241 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2242 memset(vd->pad, 0xff, 52);
2243
2244 for (i=0; i<max_virt_disks; i++)
2245 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2246
2247 st->sb = ddf;
2248 ddf_set_updates_pending(ddf);
2249 return 1;
2250 }
2251
2252 static int chunk_to_shift(int chunksize)
2253 {
2254 return ffs(chunksize/512)-1;
2255 }
2256
2257 #ifndef MDASSEMBLE
2258 struct extent {
2259 unsigned long long start, size;
2260 };
2261 static int cmp_extent(const void *av, const void *bv)
2262 {
2263 const struct extent *a = av;
2264 const struct extent *b = bv;
2265 if (a->start < b->start)
2266 return -1;
2267 if (a->start > b->start)
2268 return 1;
2269 return 0;
2270 }
2271
2272 static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
2273 {
2274 /* find a list of used extents on the give physical device
2275 * (dnum) of the given ddf.
2276 * Return a malloced array of 'struct extent'
2277
2278 * FIXME ignore DDF_Legacy devices?
2279
2280 */
2281 struct extent *rv;
2282 int n = 0;
2283 unsigned int i, j;
2284
2285 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
2286
2287 for (i = 0; i < ddf->max_part; i++) {
2288 struct vcl *v = dl->vlist[i];
2289 if (v == NULL)
2290 continue;
2291 for (j = 0; j < v->conf.prim_elmnt_count; j++)
2292 if (v->conf.phys_refnum[j] == dl->disk.refnum) {
2293 /* This device plays role 'j' in 'v'. */
2294 rv[n].start = __be64_to_cpu(v->lba_offset[j]);
2295 rv[n].size = __be64_to_cpu(v->conf.blocks);
2296 n++;
2297 break;
2298 }
2299 }
2300 qsort(rv, n, sizeof(*rv), cmp_extent);
2301
2302 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2303 rv[n].size = 0;
2304 return rv;
2305 }
2306 #endif
2307
2308 static int init_super_ddf_bvd(struct supertype *st,
2309 mdu_array_info_t *info,
2310 unsigned long long size,
2311 char *name, char *homehost,
2312 int *uuid, unsigned long long data_offset)
2313 {
2314 /* We are creating a BVD inside a pre-existing container.
2315 * so st->sb is already set.
2316 * We need to create a new vd_config and a new virtual_entry
2317 */
2318 struct ddf_super *ddf = st->sb;
2319 unsigned int venum;
2320 struct virtual_entry *ve;
2321 struct vcl *vcl;
2322 struct vd_config *vc;
2323
2324 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2325 pr_err("This ddf already has an array called %s\n", name);
2326 return 0;
2327 }
2328 venum = find_unused_vde(ddf);
2329 if (venum == DDF_NOTFOUND) {
2330 pr_err("Cannot find spare slot for virtual disk\n");
2331 return 0;
2332 }
2333 ve = &ddf->virt->entries[venum];
2334
2335 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2336 * timestamp, random number
2337 */
2338 make_header_guid(ve->guid);
2339 ve->unit = __cpu_to_be16(info->md_minor);
2340 ve->pad0 = 0xFFFF;
2341 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2342 ve->type = 0;
2343 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2344 if (info->state & 1) /* clean */
2345 ve->init_state = DDF_init_full;
2346 else
2347 ve->init_state = DDF_init_not;
2348
2349 memset(ve->pad1, 0xff, 14);
2350 memset(ve->name, ' ', 16);
2351 if (name)
2352 strncpy(ve->name, name, 16);
2353 ddf->virt->populated_vdes =
2354 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2355
2356 /* Now create a new vd_config */
2357 if (posix_memalign((void**)&vcl, 512,
2358 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
2359 pr_err("%s could not allocate vd_config\n", __func__);
2360 return 0;
2361 }
2362 vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
2363 vcl->vcnum = venum;
2364 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
2365 vcl->other_bvds = NULL;
2366
2367 vc = &vcl->conf;
2368
2369 vc->magic = DDF_VD_CONF_MAGIC;
2370 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2371 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2372 vc->seqnum = __cpu_to_be32(1);
2373 memset(vc->pad0, 0xff, 24);
2374 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2375 if (layout_md2ddf(info, vc) == -1 ||
2376 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2377 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2378 __func__, info->level, info->layout, info->raid_disks);
2379 free(vcl);
2380 return 0;
2381 }
2382 vc->sec_elmnt_seq = 0;
2383 vc->blocks = __cpu_to_be64(info->size * 2);
2384 vc->array_blocks = __cpu_to_be64(
2385 calc_array_size(info->level, info->raid_disks, info->layout,
2386 info->chunk_size, info->size*2));
2387 memset(vc->pad1, 0xff, 8);
2388 vc->spare_refs[0] = 0xffffffff;
2389 vc->spare_refs[1] = 0xffffffff;
2390 vc->spare_refs[2] = 0xffffffff;
2391 vc->spare_refs[3] = 0xffffffff;
2392 vc->spare_refs[4] = 0xffffffff;
2393 vc->spare_refs[5] = 0xffffffff;
2394 vc->spare_refs[6] = 0xffffffff;
2395 vc->spare_refs[7] = 0xffffffff;
2396 memset(vc->cache_pol, 0, 8);
2397 vc->bg_rate = 0x80;
2398 memset(vc->pad2, 0xff, 3);
2399 memset(vc->pad3, 0xff, 52);
2400 memset(vc->pad4, 0xff, 192);
2401 memset(vc->v0, 0xff, 32);
2402 memset(vc->v1, 0xff, 32);
2403 memset(vc->v2, 0xff, 16);
2404 memset(vc->v3, 0xff, 16);
2405 memset(vc->vendor, 0xff, 32);
2406
2407 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
2408 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
2409
2410 vcl->next = ddf->conflist;
2411 ddf->conflist = vcl;
2412 ddf->currentconf = vcl;
2413 ddf_set_updates_pending(ddf);
2414 return 1;
2415 }
2416
2417 #ifndef MDASSEMBLE
2418 static void add_to_super_ddf_bvd(struct supertype *st,
2419 mdu_disk_info_t *dk, int fd, char *devname)
2420 {
2421 /* fd and devname identify a device with-in the ddf container (st).
2422 * dk identifies a location in the new BVD.
2423 * We need to find suitable free space in that device and update
2424 * the phys_refnum and lba_offset for the newly created vd_config.
2425 * We might also want to update the type in the phys_disk
2426 * section.
2427 *
2428 * Alternately: fd == -1 and we have already chosen which device to
2429 * use and recorded in dlist->raid_disk;
2430 */
2431 struct dl *dl;
2432 struct ddf_super *ddf = st->sb;
2433 struct vd_config *vc;
2434 __u64 *lba_offset;
2435 unsigned int working;
2436 unsigned int i;
2437 unsigned long long blocks, pos, esize;
2438 struct extent *ex;
2439
2440 if (fd == -1) {
2441 for (dl = ddf->dlist; dl ; dl = dl->next)
2442 if (dl->raiddisk == dk->raid_disk)
2443 break;
2444 } else {
2445 for (dl = ddf->dlist; dl ; dl = dl->next)
2446 if (dl->major == dk->major &&
2447 dl->minor == dk->minor)
2448 break;
2449 }
2450 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2451 return;
2452
2453 vc = &ddf->currentconf->conf;
2454 lba_offset = ddf->currentconf->lba_offset;
2455
2456 ex = get_extents(ddf, dl);
2457 if (!ex)
2458 return;
2459
2460 i = 0; pos = 0;
2461 blocks = __be64_to_cpu(vc->blocks);
2462 if (ddf->currentconf->block_sizes)
2463 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
2464
2465 do {
2466 esize = ex[i].start - pos;
2467 if (esize >= blocks)
2468 break;
2469 pos = ex[i].start + ex[i].size;
2470 i++;
2471 } while (ex[i-1].size);
2472
2473 free(ex);
2474 if (esize < blocks)
2475 return;
2476
2477 ddf->currentdev = dk->raid_disk;
2478 vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
2479 lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
2480
2481 for (i = 0; i < ddf->max_part ; i++)
2482 if (dl->vlist[i] == NULL)
2483 break;
2484 if (i == ddf->max_part)
2485 return;
2486 dl->vlist[i] = ddf->currentconf;
2487
2488 if (fd >= 0)
2489 dl->fd = fd;
2490 if (devname)
2491 dl->devname = devname;
2492
2493 /* Check how many working raid_disks, and if we can mark
2494 * array as optimal yet
2495 */
2496 working = 0;
2497
2498 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
2499 if (vc->phys_refnum[i] != 0xffffffff)
2500 working++;
2501
2502 /* Find which virtual_entry */
2503 i = ddf->currentconf->vcnum;
2504 if (working == __be16_to_cpu(vc->prim_elmnt_count))
2505 ddf->virt->entries[i].state =
2506 (ddf->virt->entries[i].state & ~DDF_state_mask)
2507 | DDF_state_optimal;
2508
2509 if (vc->prl == DDF_RAID6 &&
2510 working+1 == __be16_to_cpu(vc->prim_elmnt_count))
2511 ddf->virt->entries[i].state =
2512 (ddf->virt->entries[i].state & ~DDF_state_mask)
2513 | DDF_state_part_optimal;
2514
2515 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2516 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
2517 ddf_set_updates_pending(ddf);
2518 }
2519
2520 /* add a device to a container, either while creating it or while
2521 * expanding a pre-existing container
2522 */
2523 static int add_to_super_ddf(struct supertype *st,
2524 mdu_disk_info_t *dk, int fd, char *devname,
2525 unsigned long long data_offset)
2526 {
2527 struct ddf_super *ddf = st->sb;
2528 struct dl *dd;
2529 time_t now;
2530 struct tm *tm;
2531 unsigned long long size;
2532 struct phys_disk_entry *pde;
2533 unsigned int n, i;
2534 struct stat stb;
2535 __u32 *tptr;
2536
2537 if (ddf->currentconf) {
2538 add_to_super_ddf_bvd(st, dk, fd, devname);
2539 return 0;
2540 }
2541
2542 /* This is device numbered dk->number. We need to create
2543 * a phys_disk entry and a more detailed disk_data entry.
2544 */
2545 fstat(fd, &stb);
2546 if (posix_memalign((void**)&dd, 512,
2547 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
2548 pr_err("%s could allocate buffer for new disk, aborting\n",
2549 __func__);
2550 return 1;
2551 }
2552 dd->major = major(stb.st_rdev);
2553 dd->minor = minor(stb.st_rdev);
2554 dd->devname = devname;
2555 dd->fd = fd;
2556 dd->spare = NULL;
2557
2558 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2559 now = time(0);
2560 tm = localtime(&now);
2561 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2562 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
2563 tptr = (__u32 *)(dd->disk.guid + 16);
2564 *tptr++ = random32();
2565 *tptr = random32();
2566
2567 do {
2568 /* Cannot be bothered finding a CRC of some irrelevant details*/
2569 dd->disk.refnum = random32();
2570 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2571 i > 0; i--)
2572 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
2573 break;
2574 } while (i > 0);
2575
2576 dd->disk.forced_ref = 1;
2577 dd->disk.forced_guid = 1;
2578 memset(dd->disk.vendor, ' ', 32);
2579 memcpy(dd->disk.vendor, "Linux", 5);
2580 memset(dd->disk.pad, 0xff, 442);
2581 for (i = 0; i < ddf->max_part ; i++)
2582 dd->vlist[i] = NULL;
2583
2584 n = __be16_to_cpu(ddf->phys->used_pdes);
2585 pde = &ddf->phys->entries[n];
2586 dd->pdnum = n;
2587
2588 if (st->update_tail) {
2589 int len = (sizeof(struct phys_disk) +
2590 sizeof(struct phys_disk_entry));
2591 struct phys_disk *pd;
2592
2593 pd = xmalloc(len);
2594 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2595 pd->used_pdes = __cpu_to_be16(n);
2596 pde = &pd->entries[0];
2597 dd->mdupdate = pd;
2598 } else {
2599 n++;
2600 ddf->phys->used_pdes = __cpu_to_be16(n);
2601 }
2602
2603 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2604 pde->refnum = dd->disk.refnum;
2605 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
2606 pde->state = __cpu_to_be16(DDF_Online);
2607 get_dev_size(fd, NULL, &size);
2608 /* We are required to reserve 32Meg, and record the size in sectors */
2609 pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
2610 sprintf(pde->path, "%17.17s","Information: nil") ;
2611 memset(pde->pad, 0xff, 6);
2612
2613 dd->size = size >> 9;
2614 if (st->update_tail) {
2615 dd->next = ddf->add_list;
2616 ddf->add_list = dd;
2617 } else {
2618 dd->next = ddf->dlist;
2619 ddf->dlist = dd;
2620 ddf_set_updates_pending(ddf);
2621 }
2622
2623 return 0;
2624 }
2625
2626 static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2627 {
2628 struct ddf_super *ddf = st->sb;
2629 struct dl *dl;
2630
2631 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2632 * disappeared from the container.
2633 * We need to arrange that it disappears from the metadata and
2634 * internal data structures too.
2635 * Most of the work is done by ddf_process_update which edits
2636 * the metadata and closes the file handle and attaches the memory
2637 * where free_updates will free it.
2638 */
2639 for (dl = ddf->dlist; dl ; dl = dl->next)
2640 if (dl->major == dk->major &&
2641 dl->minor == dk->minor)
2642 break;
2643 if (!dl)
2644 return -1;
2645
2646 if (st->update_tail) {
2647 int len = (sizeof(struct phys_disk) +
2648 sizeof(struct phys_disk_entry));
2649 struct phys_disk *pd;
2650
2651 pd = xmalloc(len);
2652 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2653 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2654 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2655 append_metadata_update(st, pd, len);
2656 }
2657 return 0;
2658 }
2659
2660 /*
2661 * This is the write_init_super method for a ddf container. It is
2662 * called when creating a container or adding another device to a
2663 * container.
2664 */
2665 #define NULL_CONF_SZ 4096
2666
2667 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
2668 __u32 refnum, unsigned int nmax,
2669 const struct vd_config **bvd,
2670 unsigned int *idx);
2671
2672 static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2673 char *null_aligned)
2674 {
2675 unsigned long long sector;
2676 struct ddf_header *header;
2677 int fd, i, n_config, conf_size;
2678 int ret = 0;
2679
2680 fd = d->fd;
2681
2682 switch (type) {
2683 case DDF_HEADER_PRIMARY:
2684 header = &ddf->primary;
2685 sector = __be64_to_cpu(header->primary_lba);
2686 break;
2687 case DDF_HEADER_SECONDARY:
2688 header = &ddf->secondary;
2689 sector = __be64_to_cpu(header->secondary_lba);
2690 break;
2691 default:
2692 return 0;
2693 }
2694
2695 header->type = type;
2696 header->openflag = 1;
2697 header->crc = calc_crc(header, 512);
2698
2699 lseek64(fd, sector<<9, 0);
2700 if (write(fd, header, 512) < 0)
2701 goto out;
2702
2703 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2704 if (write(fd, &ddf->controller, 512) < 0)
2705 goto out;
2706
2707 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2708 if (write(fd, ddf->phys, ddf->pdsize) < 0)
2709 goto out;
2710 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2711 if (write(fd, ddf->virt, ddf->vdsize) < 0)
2712 goto out;
2713
2714 /* Now write lots of config records. */
2715 n_config = ddf->max_part;
2716 conf_size = ddf->conf_rec_len * 512;
2717 for (i = 0 ; i <= n_config ; i++) {
2718 struct vcl *c;
2719 struct vd_config *vdc = NULL;
2720 if (i == n_config) {
2721 c = (struct vcl *)d->spare;
2722 if (c)
2723 vdc = &c->conf;
2724 } else {
2725 unsigned int dummy;
2726 c = d->vlist[i];
2727 if (c)
2728 get_pd_index_from_refnum(
2729 c, d->disk.refnum,
2730 ddf->mppe,
2731 (const struct vd_config **)&vdc,
2732 &dummy);
2733 }
2734 if (c) {
2735 vdc->seqnum = header->seq;
2736 vdc->crc = calc_crc(vdc, conf_size);
2737 if (write(fd, vdc, conf_size) < 0)
2738 break;
2739 } else {
2740 unsigned int togo = conf_size;
2741 while (togo > NULL_CONF_SZ) {
2742 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2743 break;
2744 togo -= NULL_CONF_SZ;
2745 }
2746 if (write(fd, null_aligned, togo) < 0)
2747 break;
2748 }
2749 }
2750 if (i <= n_config)
2751 goto out;
2752
2753 d->disk.crc = calc_crc(&d->disk, 512);
2754 if (write(fd, &d->disk, 512) < 0)
2755 goto out;
2756
2757 ret = 1;
2758 out:
2759 header->openflag = 0;
2760 header->crc = calc_crc(header, 512);
2761
2762 lseek64(fd, sector<<9, 0);
2763 if (write(fd, header, 512) < 0)
2764 ret = 0;
2765
2766 return ret;
2767 }
2768
2769 static int __write_init_super_ddf(struct supertype *st)
2770 {
2771 struct ddf_super *ddf = st->sb;
2772 struct dl *d;
2773 int attempts = 0;
2774 int successes = 0;
2775 unsigned long long size;
2776 char *null_aligned;
2777 __u32 seq;
2778
2779 pr_state(ddf, __func__);
2780 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2781 return -ENOMEM;
2782 }
2783 memset(null_aligned, 0xff, NULL_CONF_SZ);
2784
2785 seq = ddf->active->seq + 1;
2786
2787 /* try to write updated metadata,
2788 * if we catch a failure move on to the next disk
2789 */
2790 for (d = ddf->dlist; d; d=d->next) {
2791 int fd = d->fd;
2792
2793 if (fd < 0)
2794 continue;
2795
2796 attempts++;
2797 /* We need to fill in the primary, (secondary) and workspace
2798 * lba's in the headers, set their checksums,
2799 * Also checksum phys, virt....
2800 *
2801 * Then write everything out, finally the anchor is written.
2802 */
2803 get_dev_size(fd, NULL, &size);
2804 size /= 512;
2805 if (d->workspace_lba != 0)
2806 ddf->anchor.workspace_lba = d->workspace_lba;
2807 else
2808 ddf->anchor.workspace_lba =
2809 __cpu_to_be64(size - 32*1024*2);
2810 if (d->primary_lba != 0)
2811 ddf->anchor.primary_lba = d->primary_lba;
2812 else
2813 ddf->anchor.primary_lba =
2814 __cpu_to_be64(size - 16*1024*2);
2815 if (d->secondary_lba != 0)
2816 ddf->anchor.secondary_lba = d->secondary_lba;
2817 else
2818 ddf->anchor.secondary_lba =
2819 __cpu_to_be64(size - 32*1024*2);
2820 ddf->anchor.seq = seq;
2821 memcpy(&ddf->primary, &ddf->anchor, 512);
2822 memcpy(&ddf->secondary, &ddf->anchor, 512);
2823
2824 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2825 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2826 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2827
2828 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2829 null_aligned))
2830 continue;
2831
2832 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2833 null_aligned))
2834 continue;
2835
2836 lseek64(fd, (size-1)*512, SEEK_SET);
2837 if (write(fd, &ddf->anchor, 512) < 0)
2838 continue;
2839 successes++;
2840 }
2841 free(null_aligned);
2842
2843 return attempts != successes;
2844 }
2845
2846 static int write_init_super_ddf(struct supertype *st)
2847 {
2848 struct ddf_super *ddf = st->sb;
2849 struct vcl *currentconf = ddf->currentconf;
2850
2851 /* we are done with currentconf reset it to point st at the container */
2852 ddf->currentconf = NULL;
2853
2854 if (st->update_tail) {
2855 /* queue the virtual_disk and vd_config as metadata updates */
2856 struct virtual_disk *vd;
2857 struct vd_config *vc;
2858 int len;
2859
2860 if (!currentconf) {
2861 int len = (sizeof(struct phys_disk) +
2862 sizeof(struct phys_disk_entry));
2863
2864 /* adding a disk to the container. */
2865 if (!ddf->add_list)
2866 return 0;
2867
2868 append_metadata_update(st, ddf->add_list->mdupdate, len);
2869 ddf->add_list->mdupdate = NULL;
2870 return 0;
2871 }
2872
2873 /* Newly created VD */
2874
2875 /* First the virtual disk. We have a slightly fake header */
2876 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
2877 vd = xmalloc(len);
2878 *vd = *ddf->virt;
2879 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2880 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
2881 append_metadata_update(st, vd, len);
2882
2883 /* Then the vd_config */
2884 len = ddf->conf_rec_len * 512;
2885 vc = xmalloc(len);
2886 memcpy(vc, &currentconf->conf, len);
2887 append_metadata_update(st, vc, len);
2888
2889 /* FIXME I need to close the fds! */
2890 return 0;
2891 } else {
2892 struct dl *d;
2893 for (d = ddf->dlist; d; d=d->next)
2894 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
2895 return __write_init_super_ddf(st);
2896 }
2897 }
2898
2899 #endif
2900
2901 static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2902 unsigned long long data_offset)
2903 {
2904 /* We must reserve the last 32Meg */
2905 if (devsize <= 32*1024*2)
2906 return 0;
2907 return devsize - 32*1024*2;
2908 }
2909
2910 #ifndef MDASSEMBLE
2911
2912 static int reserve_space(struct supertype *st, int raiddisks,
2913 unsigned long long size, int chunk,
2914 unsigned long long *freesize)
2915 {
2916 /* Find 'raiddisks' spare extents at least 'size' big (but
2917 * only caring about multiples of 'chunk') and remember
2918 * them.
2919 * If the cannot be found, fail.
2920 */
2921 struct dl *dl;
2922 struct ddf_super *ddf = st->sb;
2923 int cnt = 0;
2924
2925 for (dl = ddf->dlist; dl ; dl=dl->next) {
2926 dl->raiddisk = -1;
2927 dl->esize = 0;
2928 }
2929 /* Now find largest extent on each device */
2930 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2931 struct extent *e = get_extents(ddf, dl);
2932 unsigned long long pos = 0;
2933 int i = 0;
2934 int found = 0;
2935 unsigned long long minsize = size;
2936
2937 if (size == 0)
2938 minsize = chunk;
2939
2940 if (!e)
2941 continue;
2942 do {
2943 unsigned long long esize;
2944 esize = e[i].start - pos;
2945 if (esize >= minsize) {
2946 found = 1;
2947 minsize = esize;
2948 }
2949 pos = e[i].start + e[i].size;
2950 i++;
2951 } while (e[i-1].size);
2952 if (found) {
2953 cnt++;
2954 dl->esize = minsize;
2955 }
2956 free(e);
2957 }
2958 if (cnt < raiddisks) {
2959 pr_err("not enough devices with space to create array.\n");
2960 return 0; /* No enough free spaces large enough */
2961 }
2962 if (size == 0) {
2963 /* choose the largest size of which there are at least 'raiddisk' */
2964 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2965 struct dl *dl2;
2966 if (dl->esize <= size)
2967 continue;
2968 /* This is bigger than 'size', see if there are enough */
2969 cnt = 0;
2970 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
2971 if (dl2->esize >= dl->esize)
2972 cnt++;
2973 if (cnt >= raiddisks)
2974 size = dl->esize;
2975 }
2976 if (chunk) {
2977 size = size / chunk;
2978 size *= chunk;
2979 }
2980 *freesize = size;
2981 if (size < 32) {
2982 pr_err("not enough spare devices to create array.\n");
2983 return 0;
2984 }
2985 }
2986 /* We have a 'size' of which there are enough spaces.
2987 * We simply do a first-fit */
2988 cnt = 0;
2989 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
2990 if (dl->esize < size)
2991 continue;
2992
2993 dl->raiddisk = cnt;
2994 cnt++;
2995 }
2996 return 1;
2997 }
2998
2999 static int
3000 validate_geometry_ddf_container(struct supertype *st,
3001 int level, int layout, int raiddisks,
3002 int chunk, unsigned long long size,
3003 unsigned long long data_offset,
3004 char *dev, unsigned long long *freesize,
3005 int verbose);
3006
3007 static int validate_geometry_ddf_bvd(struct supertype *st,
3008 int level, int layout, int raiddisks,
3009 int *chunk, unsigned long long size,
3010 unsigned long long data_offset,
3011 char *dev, unsigned long long *freesize,
3012 int verbose);
3013
3014 static int validate_geometry_ddf(struct supertype *st,
3015 int level, int layout, int raiddisks,
3016 int *chunk, unsigned long long size,
3017 unsigned long long data_offset,
3018 char *dev, unsigned long long *freesize,
3019 int verbose)
3020 {
3021 int fd;
3022 struct mdinfo *sra;
3023 int cfd;
3024
3025 /* ddf potentially supports lots of things, but it depends on
3026 * what devices are offered (and maybe kernel version?)
3027 * If given unused devices, we will make a container.
3028 * If given devices in a container, we will make a BVD.
3029 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3030 */
3031
3032 if (chunk && *chunk == UnSet)
3033 *chunk = DEFAULT_CHUNK;
3034
3035 if (level == -1000000) level = LEVEL_CONTAINER;
3036 if (level == LEVEL_CONTAINER) {
3037 /* Must be a fresh device to add to a container */
3038 return validate_geometry_ddf_container(st, level, layout,
3039 raiddisks, chunk?*chunk:0,
3040 size, data_offset, dev,
3041 freesize,
3042 verbose);
3043 }
3044
3045 if (!dev) {
3046 mdu_array_info_t array = {
3047 .level = level, .layout = layout,
3048 .raid_disks = raiddisks
3049 };
3050 struct vd_config conf;
3051 if (layout_md2ddf(&array, &conf) == -1) {
3052 if (verbose)
3053 pr_err("DDF does not support level %d arrays\n",
3054 level);
3055 return 0;
3056 }
3057 /* Should check layout? etc */
3058
3059 if (st->sb && freesize) {
3060 /* --create was given a container to create in.
3061 * So we need to check that there are enough
3062 * free spaces and return the amount of space.
3063 * We may as well remember which drives were
3064 * chosen so that add_to_super/getinfo_super
3065 * can return them.
3066 */
3067 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
3068 }
3069 return 1;
3070 }
3071
3072 if (st->sb) {
3073 /* A container has already been opened, so we are
3074 * creating in there. Maybe a BVD, maybe an SVD.
3075 * Should make a distinction one day.
3076 */
3077 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
3078 chunk, size, data_offset, dev,
3079 freesize,
3080 verbose);
3081 }
3082 /* This is the first device for the array.
3083 * If it is a container, we read it in and do automagic allocations,
3084 * no other devices should be given.
3085 * Otherwise it must be a member device of a container, and we
3086 * do manual allocation.
3087 * Later we should check for a BVD and make an SVD.
3088 */
3089 fd = open(dev, O_RDONLY|O_EXCL, 0);
3090 if (fd >= 0) {
3091 sra = sysfs_read(fd, NULL, GET_VERSION);
3092 close(fd);
3093 if (sra && sra->array.major_version == -1 &&
3094 strcmp(sra->text_version, "ddf") == 0) {
3095
3096 /* load super */
3097 /* find space for 'n' devices. */
3098 /* remember the devices */
3099 /* Somehow return the fact that we have enough */
3100 }
3101
3102 if (verbose)
3103 pr_err("ddf: Cannot create this array "
3104 "on device %s - a container is required.\n",
3105 dev);
3106 return 0;
3107 }
3108 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
3109 if (verbose)
3110 pr_err("ddf: Cannot open %s: %s\n",
3111 dev, strerror(errno));
3112 return 0;
3113 }
3114 /* Well, it is in use by someone, maybe a 'ddf' container. */
3115 cfd = open_container(fd);
3116 if (cfd < 0) {
3117 close(fd);
3118 if (verbose)
3119 pr_err("ddf: Cannot use %s: %s\n",
3120 dev, strerror(EBUSY));
3121 return 0;
3122 }
3123 sra = sysfs_read(cfd, NULL, GET_VERSION);
3124 close(fd);
3125 if (sra && sra->array.major_version == -1 &&
3126 strcmp(sra->text_version, "ddf") == 0) {
3127 /* This is a member of a ddf container. Load the container
3128 * and try to create a bvd
3129 */
3130 struct ddf_super *ddf;
3131 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
3132 st->sb = ddf;
3133 strcpy(st->container_devnm, fd2devnm(cfd));
3134 close(cfd);
3135 return validate_geometry_ddf_bvd(st, level, layout,
3136 raiddisks, chunk, size,
3137 data_offset,
3138 dev, freesize,
3139 verbose);
3140 }
3141 close(cfd);
3142 } else /* device may belong to a different container */
3143 return 0;
3144
3145 return 1;
3146 }
3147
3148 static int
3149 validate_geometry_ddf_container(struct supertype *st,
3150 int level, int layout, int raiddisks,
3151 int chunk, unsigned long long size,
3152 unsigned long long data_offset,
3153 char *dev, unsigned long long *freesize,
3154 int verbose)
3155 {
3156 int fd;
3157 unsigned long long ldsize;
3158
3159 if (level != LEVEL_CONTAINER)
3160 return 0;
3161 if (!dev)
3162 return 1;
3163
3164 fd = open(dev, O_RDONLY|O_EXCL, 0);
3165 if (fd < 0) {
3166 if (verbose)
3167 pr_err("ddf: Cannot open %s: %s\n",
3168 dev, strerror(errno));
3169 return 0;
3170 }
3171 if (!get_dev_size(fd, dev, &ldsize)) {
3172 close(fd);
3173 return 0;
3174 }
3175 close(fd);
3176
3177 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
3178 if (*freesize == 0)
3179 return 0;
3180
3181 return 1;
3182 }
3183
3184 static int validate_geometry_ddf_bvd(struct supertype *st,
3185 int level, int layout, int raiddisks,
3186 int *chunk, unsigned long long size,
3187 unsigned long long data_offset,
3188 char *dev, unsigned long long *freesize,
3189 int verbose)
3190 {
3191 struct stat stb;
3192 struct ddf_super *ddf = st->sb;
3193 struct dl *dl;
3194 unsigned long long pos = 0;
3195 unsigned long long maxsize;
3196 struct extent *e;
3197 int i;
3198 /* ddf/bvd supports lots of things, but not containers */
3199 if (level == LEVEL_CONTAINER) {
3200 if (verbose)
3201 pr_err("DDF cannot create a container within an container\n");
3202 return 0;
3203 }
3204 /* We must have the container info already read in. */
3205 if (!ddf)
3206 return 0;
3207
3208 if (!dev) {
3209 /* General test: make sure there is space for
3210 * 'raiddisks' device extents of size 'size'.
3211 */
3212 unsigned long long minsize = size;
3213 int dcnt = 0;
3214 if (minsize == 0)
3215 minsize = 8;
3216 for (dl = ddf->dlist; dl ; dl = dl->next)
3217 {
3218 int found = 0;
3219 pos = 0;
3220
3221 i = 0;
3222 e = get_extents(ddf, dl);
3223 if (!e) continue;
3224 do {
3225 unsigned long long esize;
3226 esize = e[i].start - pos;
3227 if (esize >= minsize)
3228 found = 1;
3229 pos = e[i].start + e[i].size;
3230 i++;
3231 } while (e[i-1].size);
3232 if (found)
3233 dcnt++;
3234 free(e);
3235 }
3236 if (dcnt < raiddisks) {
3237 if (verbose)
3238 pr_err("ddf: Not enough devices with "
3239 "space for this array (%d < %d)\n",
3240 dcnt, raiddisks);
3241 return 0;
3242 }
3243 return 1;
3244 }
3245 /* This device must be a member of the set */
3246 if (stat(dev, &stb) < 0)
3247 return 0;
3248 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3249 return 0;
3250 for (dl = ddf->dlist ; dl ; dl = dl->next) {
3251 if (dl->major == (int)major(stb.st_rdev) &&
3252 dl->minor == (int)minor(stb.st_rdev))
3253 break;
3254 }
3255 if (!dl) {
3256 if (verbose)
3257 pr_err("ddf: %s is not in the "
3258 "same DDF set\n",
3259 dev);
3260 return 0;
3261 }
3262 e = get_extents(ddf, dl);
3263 maxsize = 0;
3264 i = 0;
3265 if (e) do {
3266 unsigned long long esize;
3267 esize = e[i].start - pos;
3268 if (esize >= maxsize)
3269 maxsize = esize;
3270 pos = e[i].start + e[i].size;
3271 i++;
3272 } while (e[i-1].size);
3273 *freesize = maxsize;
3274 // FIXME here I am
3275
3276 return 1;
3277 }
3278
3279 static int load_super_ddf_all(struct supertype *st, int fd,
3280 void **sbp, char *devname)
3281 {
3282 struct mdinfo *sra;
3283 struct ddf_super *super;
3284 struct mdinfo *sd, *best = NULL;
3285 int bestseq = 0;
3286 int seq;
3287 char nm[20];
3288 int dfd;
3289
3290 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
3291 if (!sra)
3292 return 1;
3293 if (sra->array.major_version != -1 ||
3294 sra->array.minor_version != -2 ||
3295 strcmp(sra->text_version, "ddf") != 0)
3296 return 1;
3297
3298 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
3299 return 1;
3300 memset(super, 0, sizeof(*super));
3301
3302 /* first, try each device, and choose the best ddf */
3303 for (sd = sra->devs ; sd ; sd = sd->next) {
3304 int rv;
3305 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3306 dfd = dev_open(nm, O_RDONLY);
3307 if (dfd < 0)
3308 return 2;
3309 rv = load_ddf_headers(dfd, super, NULL);
3310 close(dfd);
3311 if (rv == 0) {
3312 seq = __be32_to_cpu(super->active->seq);
3313 if (super->active->openflag)
3314 seq--;
3315 if (!best || seq > bestseq) {
3316 bestseq = seq;
3317 best = sd;
3318 }
3319 }
3320 }
3321 if (!best)
3322 return 1;
3323 /* OK, load this ddf */
3324 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3325 dfd = dev_open(nm, O_RDONLY);
3326 if (dfd < 0)
3327 return 1;
3328 load_ddf_headers(dfd, super, NULL);
3329 load_ddf_global(dfd, super, NULL);
3330 close(dfd);
3331 /* Now we need the device-local bits */
3332 for (sd = sra->devs ; sd ; sd = sd->next) {
3333 int rv;
3334
3335 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3336 dfd = dev_open(nm, O_RDWR);
3337 if (dfd < 0)
3338 return 2;
3339 rv = load_ddf_headers(dfd, super, NULL);
3340 if (rv == 0)
3341 rv = load_ddf_local(dfd, super, NULL, 1);
3342 if (rv)
3343 return 1;
3344 }
3345
3346 *sbp = super;
3347 if (st->ss == NULL) {
3348 st->ss = &super_ddf;
3349 st->minor_version = 0;
3350 st->max_devs = 512;
3351 }
3352 strcpy(st->container_devnm, fd2devnm(fd));
3353 return 0;
3354 }
3355
3356 static int load_container_ddf(struct supertype *st, int fd,
3357 char *devname)
3358 {
3359 return load_super_ddf_all(st, fd, &st->sb, devname);
3360 }
3361
3362 #endif /* MDASSEMBLE */
3363
3364 static int check_secondary(const struct vcl *vc)
3365 {
3366 const struct vd_config *conf = &vc->conf;
3367 int i;
3368
3369 /* The only DDF secondary RAID level md can support is
3370 * RAID 10, if the stripe sizes and Basic volume sizes
3371 * are all equal.
3372 * Other configurations could in theory be supported by exposing
3373 * the BVDs to user space and using device mapper for the secondary
3374 * mapping. So far we don't support that.
3375 */
3376
3377 __u64 sec_elements[4] = {0, 0, 0, 0};
3378 #define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3379 #define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3380
3381 if (vc->other_bvds == NULL) {
3382 pr_err("No BVDs for secondary RAID found\n");
3383 return -1;
3384 }
3385 if (conf->prl != DDF_RAID1) {
3386 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3387 return -1;
3388 }
3389 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3390 pr_err("Secondary RAID level %d is unsupported\n",
3391 conf->srl);
3392 return -1;
3393 }
3394 __set_sec_seen(conf->sec_elmnt_seq);
3395 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3396 const struct vd_config *bvd = vc->other_bvds[i];
3397 if (bvd == NULL)
3398 continue;
3399 if (bvd->srl != conf->srl) {
3400 pr_err("Inconsistent secondary RAID level across BVDs\n");
3401 return -1;
3402 }
3403 if (bvd->prl != conf->prl) {
3404 pr_err("Different RAID levels for BVDs are unsupported\n");
3405 return -1;
3406 }
3407 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3408 pr_err("All BVDs must have the same number of primary elements\n");
3409 return -1;
3410 }
3411 if (bvd->chunk_shift != conf->chunk_shift) {
3412 pr_err("Different strip sizes for BVDs are unsupported\n");
3413 return -1;
3414 }
3415 if (bvd->array_blocks != conf->array_blocks) {
3416 pr_err("Different BVD sizes are unsupported\n");
3417 return -1;
3418 }
3419 __set_sec_seen(bvd->sec_elmnt_seq);
3420 }
3421 for (i = 0; i < conf->sec_elmnt_count; i++) {
3422 if (!__was_sec_seen(i)) {
3423 pr_err("BVD %d is missing\n", i);
3424 return -1;
3425 }
3426 }
3427 return 0;
3428 }
3429
3430 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
3431 __u32 refnum, unsigned int nmax,
3432 const struct vd_config **bvd,
3433 unsigned int *idx)
3434 {
3435 unsigned int i, j, n, sec, cnt;
3436
3437 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3438 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3439
3440 for (i = 0, j = 0 ; i < nmax ; i++) {
3441 /* j counts valid entries for this BVD */
3442 if (vc->conf.phys_refnum[i] != 0xffffffff)
3443 j++;
3444 if (vc->conf.phys_refnum[i] == refnum) {
3445 *bvd = &vc->conf;
3446 *idx = i;
3447 return sec * cnt + j - 1;
3448 }
3449 }
3450 if (vc->other_bvds == NULL)
3451 goto bad;
3452
3453 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3454 struct vd_config *vd = vc->other_bvds[n-1];
3455 if (vd == NULL)
3456 continue;
3457 sec = vd->sec_elmnt_seq;
3458 for (i = 0, j = 0 ; i < nmax ; i++) {
3459 if (vd->phys_refnum[i] != 0xffffffff)
3460 j++;
3461 if (vd->phys_refnum[i] == refnum) {
3462 *bvd = vd;
3463 *idx = i;
3464 return sec * cnt + j - 1;
3465 }
3466 }
3467 }
3468 bad:
3469 *bvd = NULL;
3470 return DDF_NOTFOUND;
3471 }
3472
3473 static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
3474 {
3475 /* Given a container loaded by load_super_ddf_all,
3476 * extract information about all the arrays into
3477 * an mdinfo tree.
3478 *
3479 * For each vcl in conflist: create an mdinfo, fill it in,
3480 * then look for matching devices (phys_refnum) in dlist
3481 * and create appropriate device mdinfo.
3482 */
3483 struct ddf_super *ddf = st->sb;
3484 struct mdinfo *rest = NULL;
3485 struct vcl *vc;
3486
3487 for (vc = ddf->conflist ; vc ; vc=vc->next)
3488 {
3489 unsigned int i;
3490 unsigned int j;
3491 struct mdinfo *this;
3492 char *ep;
3493 __u32 *cptr;
3494 unsigned int pd;
3495
3496 if (subarray &&
3497 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3498 *ep != '\0'))
3499 continue;
3500
3501 if (vc->conf.sec_elmnt_count > 1) {
3502 if (check_secondary(vc) != 0)
3503 continue;
3504 }
3505
3506 this = xcalloc(1, sizeof(*this));
3507 this->next = rest;
3508 rest = this;
3509
3510 if (layout_ddf2md(&vc->conf, &this->array))
3511 continue;
3512 this->array.md_minor = -1;
3513 this->array.major_version = -1;
3514 this->array.minor_version = -2;
3515 cptr = (__u32 *)(vc->conf.guid + 16);
3516 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
3517 this->array.utime = DECADE +
3518 __be32_to_cpu(vc->conf.timestamp);
3519 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3520
3521 i = vc->vcnum;
3522 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3523 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
3524 DDF_init_full) {
3525 this->array.state = 0;
3526 this->resync_start = 0;
3527 } else {
3528 this->array.state = 1;
3529 this->resync_start = MaxSector;
3530 }
3531 memcpy(this->name, ddf->virt->entries[i].name, 16);
3532 this->name[16]=0;
3533 for(j=0; j<16; j++)
3534 if (this->name[j] == ' ')
3535 this->name[j] = 0;
3536
3537 memset(this->uuid, 0, sizeof(this->uuid));
3538 this->component_size = __be64_to_cpu(vc->conf.blocks);
3539 this->array.size = this->component_size / 2;
3540 this->container_member = i;
3541
3542 ddf->currentconf = vc;
3543 uuid_from_super_ddf(st, this->uuid);
3544 ddf->currentconf = NULL;
3545
3546 sprintf(this->text_version, "/%s/%d",
3547 st->container_devnm, this->container_member);
3548
3549 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
3550 struct mdinfo *dev;
3551 struct dl *d;
3552 const struct vd_config *bvd;
3553 unsigned int iphys;
3554 __u64 *lba_offset;
3555 int stt;
3556
3557 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
3558 continue;
3559
3560 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
3561 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3562 != DDF_Online)
3563 continue;
3564
3565 i = get_pd_index_from_refnum(
3566 vc, ddf->phys->entries[pd].refnum,
3567 ddf->mppe, &bvd, &iphys);
3568 if (i == DDF_NOTFOUND)
3569 continue;
3570
3571 this->array.working_disks++;
3572
3573 for (d = ddf->dlist; d ; d=d->next)
3574 if (d->disk.refnum ==
3575 ddf->phys->entries[pd].refnum)
3576 break;
3577 if (d == NULL)
3578 /* Haven't found that one yet, maybe there are others */
3579 continue;
3580
3581 dev = xcalloc(1, sizeof(*dev));
3582 dev->next = this->devs;
3583 this->devs = dev;
3584
3585 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3586 dev->disk.major = d->major;
3587 dev->disk.minor = d->minor;
3588 dev->disk.raid_disk = i;
3589 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
3590 dev->recovery_start = MaxSector;
3591
3592 dev->events = __be32_to_cpu(ddf->primary.seq);
3593 lba_offset = (__u64 *)&bvd->phys_refnum[ddf->mppe];
3594 dev->data_offset = __be64_to_cpu(lba_offset[iphys]);
3595 dev->component_size = __be64_to_cpu(bvd->blocks);
3596 if (d->devname)
3597 strcpy(dev->name, d->devname);
3598 }
3599 }
3600 return rest;
3601 }
3602
3603 static int store_super_ddf(struct supertype *st, int fd)
3604 {
3605 struct ddf_super *ddf = st->sb;
3606 unsigned long long dsize;
3607 void *buf;
3608 int rc;
3609
3610 if (!ddf)
3611 return 1;
3612
3613 if (!get_dev_size(fd, NULL, &dsize))
3614 return 1;
3615
3616 if (ddf->dlist || ddf->conflist) {
3617 struct stat sta;
3618 struct dl *dl;
3619 int ofd, ret;
3620
3621 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3622 pr_err("%s: file descriptor for invalid device\n",
3623 __func__);
3624 return 1;
3625 }
3626 for (dl = ddf->dlist; dl; dl = dl->next)
3627 if (dl->major == (int)major(sta.st_rdev) &&
3628 dl->minor == (int)minor(sta.st_rdev))
3629 break;
3630 if (!dl) {
3631 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3632 (int)major(sta.st_rdev),
3633 (int)minor(sta.st_rdev));
3634 return 1;
3635 }
3636 /*
3637 For DDF, writing to just one disk makes no sense.
3638 We would run the risk of writing inconsistent meta data
3639 to the devices. So just call __write_init_super_ddf and
3640 write to all devices, including this one.
3641 Use the fd passed to this function, just in case dl->fd
3642 is invalid.
3643 */
3644 ofd = dl->fd;
3645 dl->fd = fd;
3646 ret = __write_init_super_ddf(st);
3647 dl->fd = ofd;
3648 return ret;
3649 }
3650
3651 if (posix_memalign(&buf, 512, 512) != 0)
3652 return 1;
3653 memset(buf, 0, 512);
3654
3655 lseek64(fd, dsize-512, 0);
3656 rc = write(fd, buf, 512);
3657 free(buf);
3658 if (rc < 0)
3659 return 1;
3660 return 0;
3661 }
3662
3663 static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3664 {
3665 /*
3666 * return:
3667 * 0 same, or first was empty, and second was copied
3668 * 1 second had wrong number
3669 * 2 wrong uuid
3670 * 3 wrong other info
3671 */
3672 struct ddf_super *first = st->sb;
3673 struct ddf_super *second = tst->sb;
3674 struct dl *dl1, *dl2;
3675 struct vcl *vl1, *vl2;
3676 unsigned int max_vds, max_pds, pd, vd;
3677
3678 if (!first) {
3679 st->sb = tst->sb;
3680 tst->sb = NULL;
3681 return 0;
3682 }
3683
3684 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3685 return 2;
3686
3687 if (first->anchor.seq != second->anchor.seq) {
3688 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3689 __be32_to_cpu(first->anchor.seq),
3690 __be32_to_cpu(second->anchor.seq));
3691 return 3;
3692 }
3693 if (first->max_part != second->max_part ||
3694 first->phys->used_pdes != second->phys->used_pdes ||
3695 first->virt->populated_vdes != second->virt->populated_vdes) {
3696 dprintf("%s: PD/VD number mismatch\n", __func__);
3697 return 3;
3698 }
3699
3700 max_pds = __be16_to_cpu(first->phys->used_pdes);
3701 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3702 for (pd = 0; pd < max_pds; pd++)
3703 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3704 break;
3705 if (pd == max_pds) {
3706 dprintf("%s: no match for disk %08x\n", __func__,
3707 __be32_to_cpu(dl2->disk.refnum));
3708 return 3;
3709 }
3710 }
3711
3712 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3713 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3714 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3715 continue;
3716 for (vd = 0; vd < max_vds; vd++)
3717 if (!memcmp(first->virt->entries[vd].guid,
3718 vl2->conf.guid, DDF_GUID_LEN))
3719 break;
3720 if (vd == max_vds) {
3721 dprintf("%s: no match for VD config\n", __func__);
3722 return 3;
3723 }
3724 }
3725 /* FIXME should I look at anything else? */
3726
3727 /*
3728 At this point we are fairly sure that the meta data matches.
3729 But the new disk may contain additional local data.
3730 Add it to the super block.
3731 */
3732 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3733 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3734 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3735 DDF_GUID_LEN))
3736 break;
3737 if (vl1) {
3738 if (vl1->other_bvds != NULL &&
3739 vl1->conf.sec_elmnt_seq !=
3740 vl2->conf.sec_elmnt_seq) {
3741 dprintf("%s: adding BVD %u\n", __func__,
3742 vl2->conf.sec_elmnt_seq);
3743 add_other_bvd(vl1, &vl2->conf,
3744 first->conf_rec_len*512);
3745 }
3746 continue;
3747 }
3748
3749 if (posix_memalign((void **)&vl1, 512,
3750 (first->conf_rec_len*512 +
3751 offsetof(struct vcl, conf))) != 0) {
3752 pr_err("%s could not allocate vcl buf\n",
3753 __func__);
3754 return 3;
3755 }
3756
3757 vl1->next = first->conflist;
3758 vl1->block_sizes = NULL;
3759 if (vl2->conf.sec_elmnt_count > 1) {
3760 vl1->other_bvds = xcalloc(vl2->conf.sec_elmnt_count - 1,
3761 sizeof(struct vd_config *));
3762 } else
3763 vl1->other_bvds = NULL;
3764 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3765 vl1->lba_offset = (__u64 *)
3766 &vl1->conf.phys_refnum[first->mppe];
3767 for (vd = 0; vd < max_vds; vd++)
3768 if (!memcmp(first->virt->entries[vd].guid,
3769 vl1->conf.guid, DDF_GUID_LEN))
3770 break;
3771 vl1->vcnum = vd;
3772 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3773 first->conflist = vl1;
3774 }
3775
3776 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3777 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3778 if (dl1->disk.refnum == dl2->disk.refnum)
3779 break;
3780 if (dl1)
3781 continue;
3782
3783 if (posix_memalign((void **)&dl1, 512,
3784 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3785 != 0) {
3786 pr_err("%s could not allocate disk info buffer\n",
3787 __func__);
3788 return 3;
3789 }
3790 memcpy(dl1, dl2, sizeof(*dl1));
3791 dl1->mdupdate = NULL;
3792 dl1->next = first->dlist;
3793 dl1->fd = -1;
3794 for (pd = 0; pd < max_pds; pd++)
3795 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3796 break;
3797 dl1->pdnum = pd;
3798 if (dl2->spare) {
3799 if (posix_memalign((void **)&dl1->spare, 512,
3800 first->conf_rec_len*512) != 0) {
3801 pr_err("%s could not allocate spare info buf\n",
3802 __func__);
3803 return 3;
3804 }
3805 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3806 }
3807 for (vd = 0 ; vd < first->max_part ; vd++) {
3808 if (!dl2->vlist[vd]) {
3809 dl1->vlist[vd] = NULL;
3810 continue;
3811 }
3812 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3813 if (!memcmp(vl1->conf.guid,
3814 dl2->vlist[vd]->conf.guid,
3815 DDF_GUID_LEN))
3816 break;
3817 dl1->vlist[vd] = vl1;
3818 }
3819 }
3820 first->dlist = dl1;
3821 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3822 dl1->disk.refnum);
3823 }
3824
3825 return 0;
3826 }
3827
3828 #ifndef MDASSEMBLE
3829 /*
3830 * A new array 'a' has been started which claims to be instance 'inst'
3831 * within container 'c'.
3832 * We need to confirm that the array matches the metadata in 'c' so
3833 * that we don't corrupt any metadata.
3834 */
3835 static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
3836 {
3837 struct ddf_super *ddf = c->sb;
3838 int n = atoi(inst);
3839 if (all_ff(ddf->virt->entries[n].guid)) {
3840 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
3841 return -ENODEV;
3842 }
3843 dprintf("ddf: open_new %d\n", n);
3844 a->info.container_member = n;
3845 return 0;
3846 }
3847
3848 /*
3849 * The array 'a' is to be marked clean in the metadata.
3850 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
3851 * clean up to the point (in sectors). If that cannot be recorded in the
3852 * metadata, then leave it as dirty.
3853 *
3854 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3855 * !global! virtual_disk.virtual_entry structure.
3856 */
3857 static int ddf_set_array_state(struct active_array *a, int consistent)
3858 {
3859 struct ddf_super *ddf = a->container->sb;
3860 int inst = a->info.container_member;
3861 int old = ddf->virt->entries[inst].state;
3862 if (consistent == 2) {
3863 /* Should check if a recovery should be started FIXME */
3864 consistent = 1;
3865 if (!is_resync_complete(&a->info))
3866 consistent = 0;
3867 }
3868 if (consistent)
3869 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3870 else
3871 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
3872 if (old != ddf->virt->entries[inst].state)
3873 ddf_set_updates_pending(ddf);
3874
3875 old = ddf->virt->entries[inst].init_state;
3876 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
3877 if (is_resync_complete(&a->info))
3878 ddf->virt->entries[inst].init_state |= DDF_init_full;
3879 else if (a->info.resync_start == 0)
3880 ddf->virt->entries[inst].init_state |= DDF_init_not;
3881 else
3882 ddf->virt->entries[inst].init_state |= DDF_init_quick;
3883 if (old != ddf->virt->entries[inst].init_state)
3884 ddf_set_updates_pending(ddf);
3885
3886 dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
3887 a->info.resync_start);
3888 return consistent;
3889 }
3890
3891 static int get_bvd_state(const struct ddf_super *ddf,
3892 const struct vd_config *vc)
3893 {
3894 unsigned int i, n_bvd, working = 0;
3895 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
3896 int pd, st, state;
3897 for (i = 0; i < n_prim; i++) {
3898 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
3899 continue;
3900 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
3901 if (pd < 0)
3902 continue;
3903 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3904 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3905 == DDF_Online)
3906 working++;
3907 }
3908
3909 state = DDF_state_degraded;
3910 if (working == n_prim)
3911 state = DDF_state_optimal;
3912 else
3913 switch (vc->prl) {
3914 case DDF_RAID0:
3915 case DDF_CONCAT:
3916 case DDF_JBOD:
3917 state = DDF_state_failed;
3918 break;
3919 case DDF_RAID1:
3920 if (working == 0)
3921 state = DDF_state_failed;
3922 else if (working >= 2)
3923 state = DDF_state_part_optimal;
3924 break;
3925 case DDF_RAID4:
3926 case DDF_RAID5:
3927 if (working < n_prim - 1)
3928 state = DDF_state_failed;
3929 break;
3930 case DDF_RAID6:
3931 if (working < n_prim - 2)
3932 state = DDF_state_failed;
3933 else if (working == n_prim - 1)
3934 state = DDF_state_part_optimal;
3935 break;
3936 }
3937 return state;
3938 }
3939
3940 static int secondary_state(int state, int other, int seclevel)
3941 {
3942 if (state == DDF_state_optimal && other == DDF_state_optimal)
3943 return DDF_state_optimal;
3944 if (seclevel == DDF_2MIRRORED) {
3945 if (state == DDF_state_optimal || other == DDF_state_optimal)
3946 return DDF_state_part_optimal;
3947 if (state == DDF_state_failed && other == DDF_state_failed)
3948 return DDF_state_failed;
3949 return DDF_state_degraded;
3950 } else {
3951 if (state == DDF_state_failed || other == DDF_state_failed)
3952 return DDF_state_failed;
3953 if (state == DDF_state_degraded || other == DDF_state_degraded)
3954 return DDF_state_degraded;
3955 return DDF_state_part_optimal;
3956 }
3957 }
3958
3959 static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
3960 {
3961 int state = get_bvd_state(ddf, &vcl->conf);
3962 unsigned int i;
3963 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
3964 state = secondary_state(
3965 state,
3966 get_bvd_state(ddf, vcl->other_bvds[i-1]),
3967 vcl->conf.srl);
3968 }
3969 return state;
3970 }
3971
3972 /*
3973 * The state of each disk is stored in the global phys_disk structure
3974 * in phys_disk.entries[n].state.
3975 * This makes various combinations awkward.
3976 * - When a device fails in any array, it must be failed in all arrays
3977 * that include a part of this device.
3978 * - When a component is rebuilding, we cannot include it officially in the
3979 * array unless this is the only array that uses the device.
3980 *
3981 * So: when transitioning:
3982 * Online -> failed, just set failed flag. monitor will propagate
3983 * spare -> online, the device might need to be added to the array.
3984 * spare -> failed, just set failed. Don't worry if in array or not.
3985 */
3986 static void ddf_set_disk(struct active_array *a, int n, int state)
3987 {
3988 struct ddf_super *ddf = a->container->sb;
3989 unsigned int inst = a->info.container_member, n_bvd;
3990 struct vcl *vcl;
3991 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
3992 &n_bvd, &vcl);
3993 int pd;
3994 struct mdinfo *mdi;
3995 struct dl *dl;
3996
3997 if (vc == NULL) {
3998 dprintf("ddf: cannot find instance %d!!\n", inst);
3999 return;
4000 }
4001 /* Find the matching slot in 'info'. */
4002 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4003 if (mdi->disk.raid_disk == n)
4004 break;
4005 if (!mdi)
4006 return;
4007
4008 /* and find the 'dl' entry corresponding to that. */
4009 for (dl = ddf->dlist; dl; dl = dl->next)
4010 if (mdi->state_fd >= 0 &&
4011 mdi->disk.major == dl->major &&
4012 mdi->disk.minor == dl->minor)
4013 break;
4014 if (!dl)
4015 return;
4016
4017 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4018 if (pd < 0 || pd != dl->pdnum) {
4019 /* disk doesn't currently exist or has changed.
4020 * If it is now in_sync, insert it. */
4021 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4022 __func__, dl->pdnum, dl->major, dl->minor,
4023 dl->disk.refnum);
4024 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4025 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
4026 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
4027 __u64 *lba_offset;
4028 pd = dl->pdnum; /* FIXME: is this really correct ? */
4029 vc->phys_refnum[n_bvd] = dl->disk.refnum;
4030 lba_offset = (__u64 *)&vc->phys_refnum[ddf->mppe];
4031 lba_offset[n_bvd] = mdi->data_offset;
4032 ddf->phys->entries[pd].type &=
4033 ~__cpu_to_be16(DDF_Global_Spare);
4034 ddf->phys->entries[pd].type |=
4035 __cpu_to_be16(DDF_Active_in_VD);
4036 ddf_set_updates_pending(ddf);
4037 }
4038 } else {
4039 int old = ddf->phys->entries[pd].state;
4040 if (state & DS_FAULTY)
4041 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4042 if (state & DS_INSYNC) {
4043 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4044 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4045 }
4046 if (old != ddf->phys->entries[pd].state)
4047 ddf_set_updates_pending(ddf);
4048 }
4049
4050 dprintf("ddf: set_disk %d to %x\n", n, state);
4051
4052 /* Now we need to check the state of the array and update
4053 * virtual_disk.entries[n].state.
4054 * It needs to be one of "optimal", "degraded", "failed".
4055 * I don't understand 'deleted' or 'missing'.
4056 */
4057 state = get_svd_state(ddf, vcl);
4058
4059 if (ddf->virt->entries[inst].state !=
4060 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4061 | state)) {
4062
4063 ddf->virt->entries[inst].state =
4064 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4065 | state;
4066 ddf_set_updates_pending(ddf);
4067 }
4068
4069 }
4070
4071 static void ddf_sync_metadata(struct supertype *st)
4072 {
4073
4074 /*
4075 * Write all data to all devices.
4076 * Later, we might be able to track whether only local changes
4077 * have been made, or whether any global data has been changed,
4078 * but ddf is sufficiently weird that it probably always
4079 * changes global data ....
4080 */
4081 struct ddf_super *ddf = st->sb;
4082 if (!ddf->updates_pending)
4083 return;
4084 ddf->updates_pending = 0;
4085 __write_init_super_ddf(st);
4086 dprintf("ddf: sync_metadata\n");
4087 }
4088
4089 static void ddf_process_update(struct supertype *st,
4090 struct metadata_update *update)
4091 {
4092 /* Apply this update to the metadata.
4093 * The first 4 bytes are a DDF_*_MAGIC which guides
4094 * our actions.
4095 * Possible update are:
4096 * DDF_PHYS_RECORDS_MAGIC
4097 * Add a new physical device or remove an old one.
4098 * Changes to this record only happen implicitly.
4099 * used_pdes is the device number.
4100 * DDF_VIRT_RECORDS_MAGIC
4101 * Add a new VD. Possibly also change the 'access' bits.
4102 * populated_vdes is the entry number.
4103 * DDF_VD_CONF_MAGIC
4104 * New or updated VD. the VIRT_RECORD must already
4105 * exist. For an update, phys_refnum and lba_offset
4106 * (at least) are updated, and the VD_CONF must
4107 * be written to precisely those devices listed with
4108 * a phys_refnum.
4109 * DDF_SPARE_ASSIGN_MAGIC
4110 * replacement Spare Assignment Record... but for which device?
4111 *
4112 * So, e.g.:
4113 * - to create a new array, we send a VIRT_RECORD and
4114 * a VD_CONF. Then assemble and start the array.
4115 * - to activate a spare we send a VD_CONF to add the phys_refnum
4116 * and offset. This will also mark the spare as active with
4117 * a spare-assignment record.
4118 */
4119 struct ddf_super *ddf = st->sb;
4120 __u32 *magic = (__u32*)update->buf;
4121 struct phys_disk *pd;
4122 struct virtual_disk *vd;
4123 struct vd_config *vc;
4124 struct vcl *vcl;
4125 struct dl *dl;
4126 unsigned int mppe;
4127 unsigned int ent;
4128 unsigned int pdnum, pd2;
4129
4130 dprintf("Process update %x\n", *magic);
4131
4132 switch (*magic) {
4133 case DDF_PHYS_RECORDS_MAGIC:
4134
4135 if (update->len != (sizeof(struct phys_disk) +
4136 sizeof(struct phys_disk_entry)))
4137 return;
4138 pd = (struct phys_disk*)update->buf;
4139
4140 ent = __be16_to_cpu(pd->used_pdes);
4141 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4142 return;
4143 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4144 struct dl **dlp;
4145 /* removing this disk. */
4146 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4147 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4148 struct dl *dl = *dlp;
4149 if (dl->pdnum == (signed)ent) {
4150 close(dl->fd);
4151 dl->fd = -1;
4152 /* FIXME this doesn't free
4153 * dl->devname */
4154 update->space = dl;
4155 *dlp = dl->next;
4156 break;
4157 }
4158 }
4159 ddf_set_updates_pending(ddf);
4160 return;
4161 }
4162 if (!all_ff(ddf->phys->entries[ent].guid))
4163 return;
4164 ddf->phys->entries[ent] = pd->entries[0];
4165 ddf->phys->used_pdes = __cpu_to_be16(1 +
4166 __be16_to_cpu(ddf->phys->used_pdes));
4167 ddf_set_updates_pending(ddf);
4168 if (ddf->add_list) {
4169 struct active_array *a;
4170 struct dl *al = ddf->add_list;
4171 ddf->add_list = al->next;
4172
4173 al->next = ddf->dlist;
4174 ddf->dlist = al;
4175
4176 /* As a device has been added, we should check
4177 * for any degraded devices that might make
4178 * use of this spare */
4179 for (a = st->arrays ; a; a=a->next)
4180 a->check_degraded = 1;
4181 }
4182 break;
4183
4184 case DDF_VIRT_RECORDS_MAGIC:
4185
4186 if (update->len != (sizeof(struct virtual_disk) +
4187 sizeof(struct virtual_entry)))
4188 return;
4189 vd = (struct virtual_disk*)update->buf;
4190
4191 ent = find_unused_vde(ddf);
4192 if (ent == DDF_NOTFOUND)
4193 return;
4194 ddf->virt->entries[ent] = vd->entries[0];
4195 ddf->virt->populated_vdes = __cpu_to_be16(1 +
4196 __be16_to_cpu(ddf->virt->populated_vdes));
4197 ddf_set_updates_pending(ddf);
4198 break;
4199
4200 case DDF_VD_CONF_MAGIC:
4201 dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
4202
4203 mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
4204 if ((unsigned)update->len != ddf->conf_rec_len * 512)
4205 return;
4206 vc = (struct vd_config*)update->buf;
4207 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4208 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4209 break;
4210 dprintf("vcl = %p\n", vcl);
4211 if (vcl) {
4212 /* An update, just copy the phys_refnum and lba_offset
4213 * fields
4214 */
4215 memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
4216 mppe * (sizeof(__u32) + sizeof(__u64)));
4217 } else {
4218 /* A new VD_CONF */
4219 if (!update->space)
4220 return;
4221 vcl = update->space;
4222 update->space = NULL;
4223 vcl->next = ddf->conflist;
4224 memcpy(&vcl->conf, vc, update->len);
4225 vcl->lba_offset = (__u64*)
4226 &vcl->conf.phys_refnum[mppe];
4227 ent = find_vde_by_guid(ddf, vc->guid);
4228 if (ent == DDF_NOTFOUND)
4229 return;
4230 vcl->vcnum = ent;
4231 ddf->conflist = vcl;
4232 }
4233 /* Set DDF_Transition on all Failed devices - to help
4234 * us detect those that are no longer in use
4235 */
4236 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4237 if (ddf->phys->entries[pdnum].state
4238 & __be16_to_cpu(DDF_Failed))
4239 ddf->phys->entries[pdnum].state
4240 |= __be16_to_cpu(DDF_Transition);
4241 /* Now make sure vlist is correct for each dl. */
4242 for (dl = ddf->dlist; dl; dl = dl->next) {
4243 unsigned int dn;
4244 unsigned int vn = 0;
4245 int in_degraded = 0;
4246 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4247 for (dn=0; dn < ddf->mppe ; dn++)
4248 if (vcl->conf.phys_refnum[dn] ==
4249 dl->disk.refnum) {
4250 int vstate;
4251 dprintf("dev %d has %p at %d\n",
4252 dl->pdnum, vcl, vn);
4253 /* Clear the Transition flag */
4254 if (ddf->phys->entries[dl->pdnum].state
4255 & __be16_to_cpu(DDF_Failed))
4256 ddf->phys->entries[dl->pdnum].state &=
4257 ~__be16_to_cpu(DDF_Transition);
4258
4259 dl->vlist[vn++] = vcl;
4260 vstate = ddf->virt->entries[vcl->vcnum].state
4261 & DDF_state_mask;
4262 if (vstate == DDF_state_degraded ||
4263 vstate == DDF_state_part_optimal)
4264 in_degraded = 1;
4265 break;
4266 }
4267 while (vn < ddf->max_part)
4268 dl->vlist[vn++] = NULL;
4269 if (dl->vlist[0]) {
4270 ddf->phys->entries[dl->pdnum].type &=
4271 ~__cpu_to_be16(DDF_Global_Spare);
4272 if (!(ddf->phys->entries[dl->pdnum].type &
4273 __cpu_to_be16(DDF_Active_in_VD))) {
4274 ddf->phys->entries[dl->pdnum].type |=
4275 __cpu_to_be16(DDF_Active_in_VD);
4276 if (in_degraded)
4277 ddf->phys->entries[dl->pdnum].state |=
4278 __cpu_to_be16(DDF_Rebuilding);
4279 }
4280 }
4281 if (dl->spare) {
4282 ddf->phys->entries[dl->pdnum].type &=
4283 ~__cpu_to_be16(DDF_Global_Spare);
4284 ddf->phys->entries[dl->pdnum].type |=
4285 __cpu_to_be16(DDF_Spare);
4286 }
4287 if (!dl->vlist[0] && !dl->spare) {
4288 ddf->phys->entries[dl->pdnum].type |=
4289 __cpu_to_be16(DDF_Global_Spare);
4290 ddf->phys->entries[dl->pdnum].type &=
4291 ~__cpu_to_be16(DDF_Spare |
4292 DDF_Active_in_VD);
4293 }
4294 }
4295
4296 /* Now remove any 'Failed' devices that are not part
4297 * of any VD. They will have the Transition flag set.
4298 * Once done, we need to update all dl->pdnum numbers.
4299 */
4300 pd2 = 0;
4301 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4302 if ((ddf->phys->entries[pdnum].state
4303 & __be16_to_cpu(DDF_Failed))
4304 && (ddf->phys->entries[pdnum].state
4305 & __be16_to_cpu(DDF_Transition)))
4306 /* skip this one */;
4307 else if (pdnum == pd2)
4308 pd2++;
4309 else {
4310 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4311 for (dl = ddf->dlist; dl; dl = dl->next)
4312 if (dl->pdnum == (int)pdnum)
4313 dl->pdnum = pd2;
4314 pd2++;
4315 }
4316 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4317 while (pd2 < pdnum) {
4318 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4319 pd2++;
4320 }
4321
4322 ddf_set_updates_pending(ddf);
4323 break;
4324 case DDF_SPARE_ASSIGN_MAGIC:
4325 default: break;
4326 }
4327 }
4328
4329 static void ddf_prepare_update(struct supertype *st,
4330 struct metadata_update *update)
4331 {
4332 /* This update arrived at managemon.
4333 * We are about to pass it to monitor.
4334 * If a malloc is needed, do it here.
4335 */
4336 struct ddf_super *ddf = st->sb;
4337 __u32 *magic = (__u32*)update->buf;
4338 if (*magic == DDF_VD_CONF_MAGIC)
4339 if (posix_memalign(&update->space, 512,
4340 offsetof(struct vcl, conf)
4341 + ddf->conf_rec_len * 512) != 0)
4342 update->space = NULL;
4343 }
4344
4345 /*
4346 * Check if the array 'a' is degraded but not failed.
4347 * If it is, find as many spares as are available and needed and
4348 * arrange for their inclusion.
4349 * We only choose devices which are not already in the array,
4350 * and prefer those with a spare-assignment to this array.
4351 * otherwise we choose global spares - assuming always that
4352 * there is enough room.
4353 * For each spare that we assign, we return an 'mdinfo' which
4354 * describes the position for the device in the array.
4355 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4356 * the new phys_refnum and lba_offset values.
4357 *
4358 * Only worry about BVDs at the moment.
4359 */
4360 static struct mdinfo *ddf_activate_spare(struct active_array *a,
4361 struct metadata_update **updates)
4362 {
4363 int working = 0;
4364 struct mdinfo *d;
4365 struct ddf_super *ddf = a->container->sb;
4366 int global_ok = 0;
4367 struct mdinfo *rv = NULL;
4368 struct mdinfo *di;
4369 struct metadata_update *mu;
4370 struct dl *dl;
4371 int i;
4372 struct vcl *vcl;
4373 struct vd_config *vc;
4374 __u64 *lba;
4375 unsigned int n_bvd;
4376
4377 for (d = a->info.devs ; d ; d = d->next) {
4378 if ((d->curr_state & DS_FAULTY) &&
4379 d->state_fd >= 0)
4380 /* wait for Removal to happen */
4381 return NULL;
4382 if (d->state_fd >= 0)
4383 working ++;
4384 }
4385
4386 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4387 a->info.array.level);
4388 if (working == a->info.array.raid_disks)
4389 return NULL; /* array not degraded */
4390 switch (a->info.array.level) {
4391 case 1:
4392 if (working == 0)
4393 return NULL; /* failed */
4394 break;
4395 case 4:
4396 case 5:
4397 if (working < a->info.array.raid_disks - 1)
4398 return NULL; /* failed */
4399 break;
4400 case 6:
4401 if (working < a->info.array.raid_disks - 2)
4402 return NULL; /* failed */
4403 break;
4404 default: /* concat or stripe */
4405 return NULL; /* failed */
4406 }
4407
4408 /* For each slot, if it is not working, find a spare */
4409 dl = ddf->dlist;
4410 for (i = 0; i < a->info.array.raid_disks; i++) {
4411 for (d = a->info.devs ; d ; d = d->next)
4412 if (d->disk.raid_disk == i)
4413 break;
4414 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
4415 if (d && (d->state_fd >= 0))
4416 continue;
4417
4418 /* OK, this device needs recovery. Find a spare */
4419 again:
4420 for ( ; dl ; dl = dl->next) {
4421 unsigned long long esize;
4422 unsigned long long pos;
4423 struct mdinfo *d2;
4424 int is_global = 0;
4425 int is_dedicated = 0;
4426 struct extent *ex;
4427 unsigned int j;
4428 /* If in this array, skip */
4429 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
4430 if (d2->state_fd >= 0 &&
4431 d2->disk.major == dl->major &&
4432 d2->disk.minor == dl->minor) {
4433 dprintf("%x:%x already in array\n", dl->major, dl->minor);
4434 break;
4435 }
4436 if (d2)
4437 continue;
4438 if (ddf->phys->entries[dl->pdnum].type &
4439 __cpu_to_be16(DDF_Spare)) {
4440 /* Check spare assign record */
4441 if (dl->spare) {
4442 if (dl->spare->type & DDF_spare_dedicated) {
4443 /* check spare_ents for guid */
4444 for (j = 0 ;
4445 j < __be16_to_cpu(dl->spare->populated);
4446 j++) {
4447 if (memcmp(dl->spare->spare_ents[j].guid,
4448 ddf->virt->entries[a->info.container_member].guid,
4449 DDF_GUID_LEN) == 0)
4450 is_dedicated = 1;
4451 }
4452 } else
4453 is_global = 1;
4454 }
4455 } else if (ddf->phys->entries[dl->pdnum].type &
4456 __cpu_to_be16(DDF_Global_Spare)) {
4457 is_global = 1;
4458 } else if (!(ddf->phys->entries[dl->pdnum].state &
4459 __cpu_to_be16(DDF_Failed))) {
4460 /* we can possibly use some of this */
4461 is_global = 1;
4462 }
4463 if ( ! (is_dedicated ||
4464 (is_global && global_ok))) {
4465 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
4466 is_dedicated, is_global);
4467 continue;
4468 }
4469
4470 /* We are allowed to use this device - is there space?
4471 * We need a->info.component_size sectors */
4472 ex = get_extents(ddf, dl);
4473 if (!ex) {
4474 dprintf("cannot get extents\n");
4475 continue;
4476 }
4477 j = 0; pos = 0;
4478 esize = 0;
4479
4480 do {
4481 esize = ex[j].start - pos;
4482 if (esize >= a->info.component_size)
4483 break;
4484 pos = ex[j].start + ex[j].size;
4485 j++;
4486 } while (ex[j-1].size);
4487
4488 free(ex);
4489 if (esize < a->info.component_size) {
4490 dprintf("%x:%x has no room: %llu %llu\n",
4491 dl->major, dl->minor,
4492 esize, a->info.component_size);
4493 /* No room */
4494 continue;
4495 }
4496
4497 /* Cool, we have a device with some space at pos */
4498 di = xcalloc(1, sizeof(*di));
4499 di->disk.number = i;
4500 di->disk.raid_disk = i;
4501 di->disk.major = dl->major;
4502 di->disk.minor = dl->minor;
4503 di->disk.state = 0;
4504 di->recovery_start = 0;
4505 di->data_offset = pos;
4506 di->component_size = a->info.component_size;
4507 di->container_member = dl->pdnum;
4508 di->next = rv;
4509 rv = di;
4510 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4511 i, pos);
4512
4513 break;
4514 }
4515 if (!dl && ! global_ok) {
4516 /* not enough dedicated spares, try global */
4517 global_ok = 1;
4518 dl = ddf->dlist;
4519 goto again;
4520 }
4521 }
4522
4523 if (!rv)
4524 /* No spares found */
4525 return rv;
4526 /* Now 'rv' has a list of devices to return.
4527 * Create a metadata_update record to update the
4528 * phys_refnum and lba_offset values
4529 */
4530 mu = xmalloc(sizeof(*mu));
4531 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
4532 free(mu);
4533 mu = NULL;
4534 }
4535 mu->buf = xmalloc(ddf->conf_rec_len * 512);
4536 mu->len = ddf->conf_rec_len * 512;
4537 mu->space = NULL;
4538 mu->space_list = NULL;
4539 mu->next = *updates;
4540 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4541 &n_bvd, &vcl);
4542 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4543
4544 vc = (struct vd_config*)mu->buf;
4545 lba = (__u64*)&vc->phys_refnum[ddf->mppe];
4546 for (di = rv ; di ; di = di->next) {
4547 vc->phys_refnum[di->disk.raid_disk] =
4548 ddf->phys->entries[dl->pdnum].refnum;
4549 lba[di->disk.raid_disk] = di->data_offset;
4550 }
4551 *updates = mu;
4552 return rv;
4553 }
4554 #endif /* MDASSEMBLE */
4555
4556 static int ddf_level_to_layout(int level)
4557 {
4558 switch(level) {
4559 case 0:
4560 case 1:
4561 return 0;
4562 case 5:
4563 return ALGORITHM_LEFT_SYMMETRIC;
4564 case 6:
4565 return ALGORITHM_ROTATING_N_CONTINUE;
4566 case 10:
4567 return 0x102;
4568 default:
4569 return UnSet;
4570 }
4571 }
4572
4573 static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4574 {
4575 if (level && *level == UnSet)
4576 *level = LEVEL_CONTAINER;
4577
4578 if (level && layout && *layout == UnSet)
4579 *layout = ddf_level_to_layout(*level);
4580 }
4581
4582 struct superswitch super_ddf = {
4583 #ifndef MDASSEMBLE
4584 .examine_super = examine_super_ddf,
4585 .brief_examine_super = brief_examine_super_ddf,
4586 .brief_examine_subarrays = brief_examine_subarrays_ddf,
4587 .export_examine_super = export_examine_super_ddf,
4588 .detail_super = detail_super_ddf,
4589 .brief_detail_super = brief_detail_super_ddf,
4590 .validate_geometry = validate_geometry_ddf,
4591 .write_init_super = write_init_super_ddf,
4592 .add_to_super = add_to_super_ddf,
4593 .remove_from_super = remove_from_super_ddf,
4594 .load_container = load_container_ddf,
4595 .copy_metadata = copy_metadata_ddf,
4596 #endif
4597 .match_home = match_home_ddf,
4598 .uuid_from_super= uuid_from_super_ddf,
4599 .getinfo_super = getinfo_super_ddf,
4600 .update_super = update_super_ddf,
4601
4602 .avail_size = avail_size_ddf,
4603
4604 .compare_super = compare_super_ddf,
4605
4606 .load_super = load_super_ddf,
4607 .init_super = init_super_ddf,
4608 .store_super = store_super_ddf,
4609 .free_super = free_super_ddf,
4610 .match_metadata_desc = match_metadata_desc_ddf,
4611 .container_content = container_content_ddf,
4612 .default_geometry = default_geometry_ddf,
4613
4614 .external = 1,
4615
4616 #ifndef MDASSEMBLE
4617 /* for mdmon */
4618 .open_new = ddf_open_new,
4619 .set_array_state= ddf_set_array_state,
4620 .set_disk = ddf_set_disk,
4621 .sync_metadata = ddf_sync_metadata,
4622 .process_update = ddf_process_update,
4623 .prepare_update = ddf_prepare_update,
4624 .activate_spare = ddf_activate_spare,
4625 #endif
4626 .name = "ddf",
4627 };