]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-ddf.c
DDF: use LBA_OFFSET macro instead of lba_offset field
[thirdparty/mdadm.git] / super-ddf.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28 #define HAVE_STDINT_H 1
29 #include "mdadm.h"
30 #include "mdmon.h"
31 #include "sha1.h"
32 #include <values.h>
33
34 /* a non-official T10 name for creation GUIDs */
35 static char T10[] = "Linux-MD";
36
37 /* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41 #define DECADE (3600*24*(365*10+2))
42 unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
47 #define DDF_NOTFOUND (~0U)
48 #define DDF_CONTAINER (DDF_NOTFOUND-1)
49
50 /* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61 /* Primary Raid Level (PRL) */
62 #define DDF_RAID0 0x00
63 #define DDF_RAID1 0x01
64 #define DDF_RAID3 0x03
65 #define DDF_RAID4 0x04
66 #define DDF_RAID5 0x05
67 #define DDF_RAID1E 0x11
68 #define DDF_JBOD 0x0f
69 #define DDF_CONCAT 0x1f
70 #define DDF_RAID5E 0x15
71 #define DDF_RAID5EE 0x25
72 #define DDF_RAID6 0x06
73
74 /* Raid Level Qualifier (RLQ) */
75 #define DDF_RAID0_SIMPLE 0x00
76 #define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77 #define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78 #define DDF_RAID3_0 0x00 /* parity in first extent */
79 #define DDF_RAID3_N 0x01 /* parity in last extent */
80 #define DDF_RAID4_0 0x00 /* parity in first extent */
81 #define DDF_RAID4_N 0x01 /* parity in last extent */
82 /* these apply to raid5e and raid5ee as well */
83 #define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
84 #define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
85 #define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86 #define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
87
88 #define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89 #define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
90
91 /* Secondary RAID Level (SRL) */
92 #define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93 #define DDF_2MIRRORED 0x01
94 #define DDF_2CONCAT 0x02
95 #define DDF_2SPANNED 0x03 /* This is also weird - be careful */
96
97 /* Magic numbers */
98 #define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99 #define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100 #define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101 #define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102 #define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103 #define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104 #define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105 #define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106 #define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107 #define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
108
109 #define DDF_GUID_LEN 24
110 #define DDF_REVISION_0 "01.00.00"
111 #define DDF_REVISION_2 "01.02.00"
112
113 struct ddf_header {
114 __u32 magic; /* DDF_HEADER_MAGIC */
115 __u32 crc;
116 char guid[DDF_GUID_LEN];
117 char revision[8]; /* 01.02.00 */
118 __u32 seq; /* starts at '1' */
119 __u32 timestamp;
120 __u8 openflag;
121 __u8 foreignflag;
122 __u8 enforcegroups;
123 __u8 pad0; /* 0xff */
124 __u8 pad1[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext[32]; /* reserved: fill with 0xff */
127 __u64 primary_lba;
128 __u64 secondary_lba;
129 __u8 type;
130 __u8 pad2[3]; /* 0xff */
131 __u32 workspace_len; /* sectors for vendor space -
132 * at least 32768(sectors) */
133 __u64 workspace_lba;
134 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
139 *12/512) */
140 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset;
144 __u32 controller_section_length;
145 __u32 phys_section_offset;
146 __u32 phys_section_length;
147 __u32 virt_section_offset;
148 __u32 virt_section_length;
149 __u32 config_section_offset;
150 __u32 config_section_length;
151 __u32 data_section_offset;
152 __u32 data_section_length;
153 __u32 bbm_section_offset;
154 __u32 bbm_section_length;
155 __u32 diag_space_offset;
156 __u32 diag_space_length;
157 __u32 vendor_offset;
158 __u32 vendor_length;
159 /* 256 bytes so far */
160 __u8 pad4[256]; /* 0xff */
161 };
162
163 /* type field */
164 #define DDF_HEADER_ANCHOR 0x00
165 #define DDF_HEADER_PRIMARY 0x01
166 #define DDF_HEADER_SECONDARY 0x02
167
168 /* The content of the 'controller section' - global scope */
169 struct ddf_controller_data {
170 __u32 magic; /* DDF_CONTROLLER_MAGIC */
171 __u32 crc;
172 char guid[DDF_GUID_LEN];
173 struct controller_type {
174 __u16 vendor_id;
175 __u16 device_id;
176 __u16 sub_vendor_id;
177 __u16 sub_device_id;
178 } type;
179 char product_id[16];
180 __u8 pad[8]; /* 0xff */
181 __u8 vendor_data[448];
182 };
183
184 /* The content of phys_section - global scope */
185 struct phys_disk {
186 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
187 __u32 crc;
188 __u16 used_pdes;
189 __u16 max_pdes;
190 __u8 pad[52];
191 struct phys_disk_entry {
192 char guid[DDF_GUID_LEN];
193 __u32 refnum;
194 __u16 type;
195 __u16 state;
196 __u64 config_size; /* DDF structures must be after here */
197 char path[18]; /* another horrible structure really */
198 __u8 pad[6];
199 } entries[0];
200 };
201
202 /* phys_disk_entry.type is a bitmap - bigendian remember */
203 #define DDF_Forced_PD_GUID 1
204 #define DDF_Active_in_VD 2
205 #define DDF_Global_Spare 4 /* VD_CONF records are ignored */
206 #define DDF_Spare 8 /* overrides Global_spare */
207 #define DDF_Foreign 16
208 #define DDF_Legacy 32 /* no DDF on this device */
209
210 #define DDF_Interface_mask 0xf00
211 #define DDF_Interface_SCSI 0x100
212 #define DDF_Interface_SAS 0x200
213 #define DDF_Interface_SATA 0x300
214 #define DDF_Interface_FC 0x400
215
216 /* phys_disk_entry.state is a bigendian bitmap */
217 #define DDF_Online 1
218 #define DDF_Failed 2 /* overrides 1,4,8 */
219 #define DDF_Rebuilding 4
220 #define DDF_Transition 8
221 #define DDF_SMART 16
222 #define DDF_ReadErrors 32
223 #define DDF_Missing 64
224
225 /* The content of the virt_section global scope */
226 struct virtual_disk {
227 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
228 __u32 crc;
229 __u16 populated_vdes;
230 __u16 max_vdes;
231 __u8 pad[52];
232 struct virtual_entry {
233 char guid[DDF_GUID_LEN];
234 __u16 unit;
235 __u16 pad0; /* 0xffff */
236 __u16 guid_crc;
237 __u16 type;
238 __u8 state;
239 __u8 init_state;
240 __u8 pad1[14];
241 char name[16];
242 } entries[0];
243 };
244
245 /* virtual_entry.type is a bitmap - bigendian */
246 #define DDF_Shared 1
247 #define DDF_Enforce_Groups 2
248 #define DDF_Unicode 4
249 #define DDF_Owner_Valid 8
250
251 /* virtual_entry.state is a bigendian bitmap */
252 #define DDF_state_mask 0x7
253 #define DDF_state_optimal 0x0
254 #define DDF_state_degraded 0x1
255 #define DDF_state_deleted 0x2
256 #define DDF_state_missing 0x3
257 #define DDF_state_failed 0x4
258 #define DDF_state_part_optimal 0x5
259
260 #define DDF_state_morphing 0x8
261 #define DDF_state_inconsistent 0x10
262
263 /* virtual_entry.init_state is a bigendian bitmap */
264 #define DDF_initstate_mask 0x03
265 #define DDF_init_not 0x00
266 #define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
268 #define DDF_init_full 0x02
269
270 #define DDF_access_mask 0xc0
271 #define DDF_access_rw 0x00
272 #define DDF_access_ro 0x80
273 #define DDF_access_blocked 0xc0
274
275 /* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
278 */
279
280 struct vd_config {
281 __u32 magic; /* DDF_VD_CONF_MAGIC */
282 __u32 crc;
283 char guid[DDF_GUID_LEN];
284 __u32 timestamp;
285 __u32 seqnum;
286 __u8 pad0[24];
287 __u16 prim_elmnt_count;
288 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
289 __u8 prl;
290 __u8 rlq;
291 __u8 sec_elmnt_count;
292 __u8 sec_elmnt_seq;
293 __u8 srl;
294 __u64 blocks; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks; /* blocks in array */
298 __u8 pad1[8];
299 __u32 spare_refs[8];
300 __u8 cache_pol[8];
301 __u8 bg_rate;
302 __u8 pad2[3];
303 __u8 pad3[52];
304 __u8 pad4[192];
305 __u8 v0[32]; /* reserved- 0xff */
306 __u8 v1[32]; /* reserved- 0xff */
307 __u8 v2[16]; /* reserved- 0xff */
308 __u8 v3[16]; /* reserved- 0xff */
309 __u8 vendor[32];
310 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
313 };
314 #define LBA_OFFSET(ddf, vd) ((__u64 *) &(vd)->phys_refnum[(ddf)->mppe])
315
316 /* vd_config.cache_pol[7] is a bitmap */
317 #define DDF_cache_writeback 1 /* else writethrough */
318 #define DDF_cache_wadaptive 2 /* only applies if writeback */
319 #define DDF_cache_readahead 4
320 #define DDF_cache_radaptive 8 /* only if doing read-ahead */
321 #define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
322 #define DDF_cache_wallowed 32 /* enable write caching */
323 #define DDF_cache_rallowed 64 /* enable read caching */
324
325 struct spare_assign {
326 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
327 __u32 crc;
328 __u32 timestamp;
329 __u8 reserved[7];
330 __u8 type;
331 __u16 populated; /* SAEs used */
332 __u16 max; /* max SAEs */
333 __u8 pad[8];
334 struct spare_assign_entry {
335 char guid[DDF_GUID_LEN];
336 __u16 secondary_element;
337 __u8 pad[6];
338 } spare_ents[0];
339 };
340 /* spare_assign.type is a bitmap */
341 #define DDF_spare_dedicated 0x1 /* else global */
342 #define DDF_spare_revertible 0x2 /* else committable */
343 #define DDF_spare_active 0x4 /* else not active */
344 #define DDF_spare_affinity 0x8 /* enclosure affinity */
345
346 /* The data_section contents - local scope */
347 struct disk_data {
348 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
349 __u32 crc;
350 char guid[DDF_GUID_LEN];
351 __u32 refnum; /* crc of some magic drive data ... */
352 __u8 forced_ref; /* set when above was not result of magic */
353 __u8 forced_guid; /* set if guid was forced rather than magic */
354 __u8 vendor[32];
355 __u8 pad[442];
356 };
357
358 /* bbm_section content */
359 struct bad_block_log {
360 __u32 magic;
361 __u32 crc;
362 __u16 entry_count;
363 __u32 spare_count;
364 __u8 pad[10];
365 __u64 first_spare;
366 struct mapped_block {
367 __u64 defective_start;
368 __u32 replacement_start;
369 __u16 remap_count;
370 __u8 pad[2];
371 } entries[0];
372 };
373
374 /* Struct for internally holding ddf structures */
375 /* The DDF structure stored on each device is potentially
376 * quite different, as some data is global and some is local.
377 * The global data is:
378 * - ddf header
379 * - controller_data
380 * - Physical disk records
381 * - Virtual disk records
382 * The local data is:
383 * - Configuration records
384 * - Physical Disk data section
385 * ( and Bad block and vendor which I don't care about yet).
386 *
387 * The local data is parsed into separate lists as it is read
388 * and reconstructed for writing. This means that we only need
389 * to make config changes once and they are automatically
390 * propagated to all devices.
391 * Note that the ddf_super has space of the conf and disk data
392 * for this disk and also for a list of all such data.
393 * The list is only used for the superblock that is being
394 * built in Create or Assemble to describe the whole array.
395 */
396 struct ddf_super {
397 struct ddf_header anchor, primary, secondary;
398 struct ddf_controller_data controller;
399 struct ddf_header *active;
400 struct phys_disk *phys;
401 struct virtual_disk *virt;
402 int pdsize, vdsize;
403 unsigned int max_part, mppe, conf_rec_len;
404 int currentdev;
405 int updates_pending;
406 struct vcl {
407 union {
408 char space[512];
409 struct {
410 struct vcl *next;
411 unsigned int vcnum; /* index into ->virt */
412 struct vd_config **other_bvds;
413 __u64 *block_sizes; /* NULL if all the same */
414 };
415 };
416 struct vd_config conf;
417 } *conflist, *currentconf;
418 struct dl {
419 union {
420 char space[512];
421 struct {
422 struct dl *next;
423 int major, minor;
424 char *devname;
425 int fd;
426 unsigned long long size; /* sectors */
427 unsigned long long primary_lba; /* sectors */
428 unsigned long long secondary_lba; /* sectors */
429 unsigned long long workspace_lba; /* sectors */
430 int pdnum; /* index in ->phys */
431 struct spare_assign *spare;
432 void *mdupdate; /* hold metadata update */
433
434 /* These fields used by auto-layout */
435 int raiddisk; /* slot to fill in autolayout */
436 __u64 esize;
437 };
438 };
439 struct disk_data disk;
440 struct vcl *vlist[0]; /* max_part in size */
441 } *dlist, *add_list;
442 };
443
444 #ifndef offsetof
445 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
446 #endif
447
448 #if DEBUG
449 static int all_ff(const char *guid);
450 static void pr_state(struct ddf_super *ddf, const char *msg)
451 {
452 unsigned int i;
453 dprintf("%s/%s: ", __func__, msg);
454 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
455 if (all_ff(ddf->virt->entries[i].guid))
456 continue;
457 dprintf("%u(s=%02x i=%02x) ", i,
458 ddf->virt->entries[i].state,
459 ddf->virt->entries[i].init_state);
460 }
461 dprintf("\n");
462 }
463 #else
464 static void pr_state(const struct ddf_super *ddf, const char *msg) {}
465 #endif
466
467 #define ddf_set_updates_pending(x) \
468 do { (x)->updates_pending = 1; pr_state(x, __func__); } while (0)
469
470 static unsigned int calc_crc(void *buf, int len)
471 {
472 /* crcs are always at the same place as in the ddf_header */
473 struct ddf_header *ddf = buf;
474 __u32 oldcrc = ddf->crc;
475 __u32 newcrc;
476 ddf->crc = 0xffffffff;
477
478 newcrc = crc32(0, buf, len);
479 ddf->crc = oldcrc;
480 /* The crc is store (like everything) bigendian, so convert
481 * here for simplicity
482 */
483 return __cpu_to_be32(newcrc);
484 }
485
486 #define DDF_INVALID_LEVEL 0xff
487 #define DDF_NO_SECONDARY 0xff
488 static int err_bad_md_layout(const mdu_array_info_t *array)
489 {
490 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
491 array->level, array->layout, array->raid_disks);
492 return DDF_INVALID_LEVEL;
493 }
494
495 static int layout_md2ddf(const mdu_array_info_t *array,
496 struct vd_config *conf)
497 {
498 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
499 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
500 __u8 sec_elmnt_count = 1;
501 __u8 srl = DDF_NO_SECONDARY;
502
503 switch (array->level) {
504 case LEVEL_LINEAR:
505 prl = DDF_CONCAT;
506 break;
507 case 0:
508 rlq = DDF_RAID0_SIMPLE;
509 prl = DDF_RAID0;
510 break;
511 case 1:
512 switch (array->raid_disks) {
513 case 2:
514 rlq = DDF_RAID1_SIMPLE;
515 break;
516 case 3:
517 rlq = DDF_RAID1_MULTI;
518 break;
519 default:
520 return err_bad_md_layout(array);
521 }
522 prl = DDF_RAID1;
523 break;
524 case 4:
525 if (array->layout != 0)
526 return err_bad_md_layout(array);
527 rlq = DDF_RAID4_N;
528 prl = DDF_RAID4;
529 break;
530 case 5:
531 switch (array->layout) {
532 case ALGORITHM_LEFT_ASYMMETRIC:
533 rlq = DDF_RAID5_N_RESTART;
534 break;
535 case ALGORITHM_RIGHT_ASYMMETRIC:
536 rlq = DDF_RAID5_0_RESTART;
537 break;
538 case ALGORITHM_LEFT_SYMMETRIC:
539 rlq = DDF_RAID5_N_CONTINUE;
540 break;
541 case ALGORITHM_RIGHT_SYMMETRIC:
542 /* not mentioned in standard */
543 default:
544 return err_bad_md_layout(array);
545 }
546 prl = DDF_RAID5;
547 break;
548 case 6:
549 switch (array->layout) {
550 case ALGORITHM_ROTATING_N_RESTART:
551 rlq = DDF_RAID5_N_RESTART;
552 break;
553 case ALGORITHM_ROTATING_ZERO_RESTART:
554 rlq = DDF_RAID6_0_RESTART;
555 break;
556 case ALGORITHM_ROTATING_N_CONTINUE:
557 rlq = DDF_RAID5_N_CONTINUE;
558 break;
559 default:
560 return err_bad_md_layout(array);
561 }
562 prl = DDF_RAID6;
563 break;
564 case 10:
565 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
566 rlq = DDF_RAID1_SIMPLE;
567 prim_elmnt_count = __cpu_to_be16(2);
568 sec_elmnt_count = array->raid_disks / 2;
569 } else if (array->raid_disks % 3 == 0
570 && array->layout == 0x103) {
571 rlq = DDF_RAID1_MULTI;
572 prim_elmnt_count = __cpu_to_be16(3);
573 sec_elmnt_count = array->raid_disks / 3;
574 } else
575 return err_bad_md_layout(array);
576 srl = DDF_2SPANNED;
577 prl = DDF_RAID1;
578 break;
579 default:
580 return err_bad_md_layout(array);
581 }
582 conf->prl = prl;
583 conf->prim_elmnt_count = prim_elmnt_count;
584 conf->rlq = rlq;
585 conf->srl = srl;
586 conf->sec_elmnt_count = sec_elmnt_count;
587 return 0;
588 }
589
590 static int err_bad_ddf_layout(const struct vd_config *conf)
591 {
592 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
593 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
594 return -1;
595 }
596
597 static int layout_ddf2md(const struct vd_config *conf,
598 mdu_array_info_t *array)
599 {
600 int level = LEVEL_UNSUPPORTED;
601 int layout = 0;
602 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
603
604 if (conf->sec_elmnt_count > 1) {
605 /* see also check_secondary() */
606 if (conf->prl != DDF_RAID1 ||
607 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
608 pr_err("Unsupported secondary RAID level %u/%u\n",
609 conf->prl, conf->srl);
610 return -1;
611 }
612 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
613 layout = 0x102;
614 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
615 layout = 0x103;
616 else
617 return err_bad_ddf_layout(conf);
618 raiddisks *= conf->sec_elmnt_count;
619 level = 10;
620 goto good;
621 }
622
623 switch (conf->prl) {
624 case DDF_CONCAT:
625 level = LEVEL_LINEAR;
626 break;
627 case DDF_RAID0:
628 if (conf->rlq != DDF_RAID0_SIMPLE)
629 return err_bad_ddf_layout(conf);
630 level = 0;
631 break;
632 case DDF_RAID1:
633 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
634 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
635 return err_bad_ddf_layout(conf);
636 level = 1;
637 break;
638 case DDF_RAID4:
639 if (conf->rlq != DDF_RAID4_N)
640 return err_bad_ddf_layout(conf);
641 level = 4;
642 break;
643 case DDF_RAID5:
644 switch (conf->rlq) {
645 case DDF_RAID5_N_RESTART:
646 layout = ALGORITHM_LEFT_ASYMMETRIC;
647 break;
648 case DDF_RAID5_0_RESTART:
649 layout = ALGORITHM_RIGHT_ASYMMETRIC;
650 break;
651 case DDF_RAID5_N_CONTINUE:
652 layout = ALGORITHM_LEFT_SYMMETRIC;
653 break;
654 default:
655 return err_bad_ddf_layout(conf);
656 }
657 level = 5;
658 break;
659 case DDF_RAID6:
660 switch (conf->rlq) {
661 case DDF_RAID5_N_RESTART:
662 layout = ALGORITHM_ROTATING_N_RESTART;
663 break;
664 case DDF_RAID6_0_RESTART:
665 layout = ALGORITHM_ROTATING_ZERO_RESTART;
666 break;
667 case DDF_RAID5_N_CONTINUE:
668 layout = ALGORITHM_ROTATING_N_CONTINUE;
669 break;
670 default:
671 return err_bad_ddf_layout(conf);
672 }
673 level = 6;
674 break;
675 default:
676 return err_bad_ddf_layout(conf);
677 };
678
679 good:
680 array->level = level;
681 array->layout = layout;
682 array->raid_disks = raiddisks;
683 return 0;
684 }
685
686 static int load_ddf_header(int fd, unsigned long long lba,
687 unsigned long long size,
688 int type,
689 struct ddf_header *hdr, struct ddf_header *anchor)
690 {
691 /* read a ddf header (primary or secondary) from fd/lba
692 * and check that it is consistent with anchor
693 * Need to check:
694 * magic, crc, guid, rev, and LBA's header_type, and
695 * everything after header_type must be the same
696 */
697 if (lba >= size-1)
698 return 0;
699
700 if (lseek64(fd, lba<<9, 0) < 0)
701 return 0;
702
703 if (read(fd, hdr, 512) != 512)
704 return 0;
705
706 if (hdr->magic != DDF_HEADER_MAGIC)
707 return 0;
708 if (calc_crc(hdr, 512) != hdr->crc)
709 return 0;
710 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
711 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
712 anchor->primary_lba != hdr->primary_lba ||
713 anchor->secondary_lba != hdr->secondary_lba ||
714 hdr->type != type ||
715 memcmp(anchor->pad2, hdr->pad2, 512 -
716 offsetof(struct ddf_header, pad2)) != 0)
717 return 0;
718
719 /* Looks good enough to me... */
720 return 1;
721 }
722
723 static void *load_section(int fd, struct ddf_super *super, void *buf,
724 __u32 offset_be, __u32 len_be, int check)
725 {
726 unsigned long long offset = __be32_to_cpu(offset_be);
727 unsigned long long len = __be32_to_cpu(len_be);
728 int dofree = (buf == NULL);
729
730 if (check)
731 if (len != 2 && len != 8 && len != 32
732 && len != 128 && len != 512)
733 return NULL;
734
735 if (len > 1024)
736 return NULL;
737 if (buf) {
738 /* All pre-allocated sections are a single block */
739 if (len != 1)
740 return NULL;
741 } else if (posix_memalign(&buf, 512, len<<9) != 0)
742 buf = NULL;
743
744 if (!buf)
745 return NULL;
746
747 if (super->active->type == 1)
748 offset += __be64_to_cpu(super->active->primary_lba);
749 else
750 offset += __be64_to_cpu(super->active->secondary_lba);
751
752 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
753 if (dofree)
754 free(buf);
755 return NULL;
756 }
757 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
758 if (dofree)
759 free(buf);
760 return NULL;
761 }
762 return buf;
763 }
764
765 static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
766 {
767 unsigned long long dsize;
768
769 get_dev_size(fd, NULL, &dsize);
770
771 if (lseek64(fd, dsize-512, 0) < 0) {
772 if (devname)
773 pr_err("Cannot seek to anchor block on %s: %s\n",
774 devname, strerror(errno));
775 return 1;
776 }
777 if (read(fd, &super->anchor, 512) != 512) {
778 if (devname)
779 pr_err("Cannot read anchor block on %s: %s\n",
780 devname, strerror(errno));
781 return 1;
782 }
783 if (super->anchor.magic != DDF_HEADER_MAGIC) {
784 if (devname)
785 pr_err("no DDF anchor found on %s\n",
786 devname);
787 return 2;
788 }
789 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
790 if (devname)
791 pr_err("bad CRC on anchor on %s\n",
792 devname);
793 return 2;
794 }
795 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
796 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
797 if (devname)
798 pr_err("can only support super revision"
799 " %.8s and earlier, not %.8s on %s\n",
800 DDF_REVISION_2, super->anchor.revision,devname);
801 return 2;
802 }
803 super->active = NULL;
804 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
805 dsize >> 9, 1,
806 &super->primary, &super->anchor) == 0) {
807 if (devname)
808 pr_err("Failed to load primary DDF header "
809 "on %s\n", devname);
810 } else
811 super->active = &super->primary;
812 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
813 dsize >> 9, 2,
814 &super->secondary, &super->anchor)) {
815 if ((__be32_to_cpu(super->primary.seq)
816 < __be32_to_cpu(super->secondary.seq) &&
817 !super->secondary.openflag)
818 || (__be32_to_cpu(super->primary.seq)
819 == __be32_to_cpu(super->secondary.seq) &&
820 super->primary.openflag && !super->secondary.openflag)
821 || super->active == NULL
822 )
823 super->active = &super->secondary;
824 } else if (devname)
825 pr_err("Failed to load secondary DDF header on %s\n",
826 devname);
827 if (super->active == NULL)
828 return 2;
829 return 0;
830 }
831
832 static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
833 {
834 void *ok;
835 ok = load_section(fd, super, &super->controller,
836 super->active->controller_section_offset,
837 super->active->controller_section_length,
838 0);
839 super->phys = load_section(fd, super, NULL,
840 super->active->phys_section_offset,
841 super->active->phys_section_length,
842 1);
843 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
844
845 super->virt = load_section(fd, super, NULL,
846 super->active->virt_section_offset,
847 super->active->virt_section_length,
848 1);
849 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
850 if (!ok ||
851 !super->phys ||
852 !super->virt) {
853 free(super->phys);
854 free(super->virt);
855 super->phys = NULL;
856 super->virt = NULL;
857 return 2;
858 }
859 super->conflist = NULL;
860 super->dlist = NULL;
861
862 super->max_part = __be16_to_cpu(super->active->max_partitions);
863 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
864 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
865 return 0;
866 }
867
868 #define DDF_UNUSED_BVD 0xff
869 static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
870 {
871 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
872 unsigned int i, vdsize;
873 void *p;
874 if (n_vds == 0) {
875 vcl->other_bvds = NULL;
876 return 0;
877 }
878 vdsize = ddf->conf_rec_len * 512;
879 if (posix_memalign(&p, 512, n_vds *
880 (vdsize + sizeof(struct vd_config *))) != 0)
881 return -1;
882 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
883 for (i = 0; i < n_vds; i++) {
884 vcl->other_bvds[i] = p + i * vdsize;
885 memset(vcl->other_bvds[i], 0, vdsize);
886 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
887 }
888 return 0;
889 }
890
891 static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
892 unsigned int len)
893 {
894 int i;
895 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
896 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
897 break;
898
899 if (i < vcl->conf.sec_elmnt_count-1) {
900 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
901 return;
902 } else {
903 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
904 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
905 break;
906 if (i == vcl->conf.sec_elmnt_count-1) {
907 pr_err("no space for sec level config %u, count is %u\n",
908 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
909 return;
910 }
911 }
912 memcpy(vcl->other_bvds[i], vd, len);
913 }
914
915 static int load_ddf_local(int fd, struct ddf_super *super,
916 char *devname, int keep)
917 {
918 struct dl *dl;
919 struct stat stb;
920 char *conf;
921 unsigned int i;
922 unsigned int confsec;
923 int vnum;
924 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
925 unsigned long long dsize;
926
927 /* First the local disk info */
928 if (posix_memalign((void**)&dl, 512,
929 sizeof(*dl) +
930 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
931 pr_err("%s could not allocate disk info buffer\n",
932 __func__);
933 return 1;
934 }
935
936 load_section(fd, super, &dl->disk,
937 super->active->data_section_offset,
938 super->active->data_section_length,
939 0);
940 dl->devname = devname ? xstrdup(devname) : NULL;
941
942 fstat(fd, &stb);
943 dl->major = major(stb.st_rdev);
944 dl->minor = minor(stb.st_rdev);
945 dl->next = super->dlist;
946 dl->fd = keep ? fd : -1;
947
948 dl->size = 0;
949 if (get_dev_size(fd, devname, &dsize))
950 dl->size = dsize >> 9;
951 /* If the disks have different sizes, the LBAs will differ
952 * between phys disks.
953 * At this point here, the values in super->active must be valid
954 * for this phys disk. */
955 dl->primary_lba = super->active->primary_lba;
956 dl->secondary_lba = super->active->secondary_lba;
957 dl->workspace_lba = super->active->workspace_lba;
958 dl->spare = NULL;
959 for (i = 0 ; i < super->max_part ; i++)
960 dl->vlist[i] = NULL;
961 super->dlist = dl;
962 dl->pdnum = -1;
963 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
964 if (memcmp(super->phys->entries[i].guid,
965 dl->disk.guid, DDF_GUID_LEN) == 0)
966 dl->pdnum = i;
967
968 /* Now the config list. */
969 /* 'conf' is an array of config entries, some of which are
970 * probably invalid. Those which are good need to be copied into
971 * the conflist
972 */
973
974 conf = load_section(fd, super, NULL,
975 super->active->config_section_offset,
976 super->active->config_section_length,
977 0);
978
979 vnum = 0;
980 for (confsec = 0;
981 confsec < __be32_to_cpu(super->active->config_section_length);
982 confsec += super->conf_rec_len) {
983 struct vd_config *vd =
984 (struct vd_config *)((char*)conf + confsec*512);
985 struct vcl *vcl;
986
987 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
988 if (dl->spare)
989 continue;
990 if (posix_memalign((void**)&dl->spare, 512,
991 super->conf_rec_len*512) != 0) {
992 pr_err("%s could not allocate spare info buf\n",
993 __func__);
994 return 1;
995 }
996
997 memcpy(dl->spare, vd, super->conf_rec_len*512);
998 continue;
999 }
1000 if (vd->magic != DDF_VD_CONF_MAGIC)
1001 continue;
1002 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1003 if (memcmp(vcl->conf.guid,
1004 vd->guid, DDF_GUID_LEN) == 0)
1005 break;
1006 }
1007
1008 if (vcl) {
1009 dl->vlist[vnum++] = vcl;
1010 if (vcl->other_bvds != NULL &&
1011 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1012 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1013 continue;
1014 }
1015 if (__be32_to_cpu(vd->seqnum) <=
1016 __be32_to_cpu(vcl->conf.seqnum))
1017 continue;
1018 } else {
1019 if (posix_memalign((void**)&vcl, 512,
1020 (super->conf_rec_len*512 +
1021 offsetof(struct vcl, conf))) != 0) {
1022 pr_err("%s could not allocate vcl buf\n",
1023 __func__);
1024 return 1;
1025 }
1026 vcl->next = super->conflist;
1027 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
1028 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1029 if (alloc_other_bvds(super, vcl) != 0) {
1030 pr_err("%s could not allocate other bvds\n",
1031 __func__);
1032 free(vcl);
1033 return 1;
1034 };
1035 super->conflist = vcl;
1036 dl->vlist[vnum++] = vcl;
1037 }
1038 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
1039 for (i=0; i < max_virt_disks ; i++)
1040 if (memcmp(super->virt->entries[i].guid,
1041 vcl->conf.guid, DDF_GUID_LEN)==0)
1042 break;
1043 if (i < max_virt_disks)
1044 vcl->vcnum = i;
1045 }
1046 free(conf);
1047
1048 return 0;
1049 }
1050
1051 #ifndef MDASSEMBLE
1052 static int load_super_ddf_all(struct supertype *st, int fd,
1053 void **sbp, char *devname);
1054 #endif
1055
1056 static void free_super_ddf(struct supertype *st);
1057
1058 static int load_super_ddf(struct supertype *st, int fd,
1059 char *devname)
1060 {
1061 unsigned long long dsize;
1062 struct ddf_super *super;
1063 int rv;
1064
1065 if (get_dev_size(fd, devname, &dsize) == 0)
1066 return 1;
1067
1068 if (!st->ignore_hw_compat && test_partition(fd))
1069 /* DDF is not allowed on partitions */
1070 return 1;
1071
1072 /* 32M is a lower bound */
1073 if (dsize <= 32*1024*1024) {
1074 if (devname)
1075 pr_err("%s is too small for ddf: "
1076 "size is %llu sectors.\n",
1077 devname, dsize>>9);
1078 return 1;
1079 }
1080 if (dsize & 511) {
1081 if (devname)
1082 pr_err("%s is an odd size for ddf: "
1083 "size is %llu bytes.\n",
1084 devname, dsize);
1085 return 1;
1086 }
1087
1088 free_super_ddf(st);
1089
1090 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
1091 pr_err("malloc of %zu failed.\n",
1092 sizeof(*super));
1093 return 1;
1094 }
1095 memset(super, 0, sizeof(*super));
1096
1097 rv = load_ddf_headers(fd, super, devname);
1098 if (rv) {
1099 free(super);
1100 return rv;
1101 }
1102
1103 /* Have valid headers and have chosen the best. Let's read in the rest*/
1104
1105 rv = load_ddf_global(fd, super, devname);
1106
1107 if (rv) {
1108 if (devname)
1109 pr_err("Failed to load all information "
1110 "sections on %s\n", devname);
1111 free(super);
1112 return rv;
1113 }
1114
1115 rv = load_ddf_local(fd, super, devname, 0);
1116
1117 if (rv) {
1118 if (devname)
1119 pr_err("Failed to load all information "
1120 "sections on %s\n", devname);
1121 free(super);
1122 return rv;
1123 }
1124
1125 /* Should possibly check the sections .... */
1126
1127 st->sb = super;
1128 if (st->ss == NULL) {
1129 st->ss = &super_ddf;
1130 st->minor_version = 0;
1131 st->max_devs = 512;
1132 }
1133 return 0;
1134
1135 }
1136
1137 static void free_super_ddf(struct supertype *st)
1138 {
1139 struct ddf_super *ddf = st->sb;
1140 if (ddf == NULL)
1141 return;
1142 free(ddf->phys);
1143 free(ddf->virt);
1144 while (ddf->conflist) {
1145 struct vcl *v = ddf->conflist;
1146 ddf->conflist = v->next;
1147 if (v->block_sizes)
1148 free(v->block_sizes);
1149 if (v->other_bvds)
1150 /*
1151 v->other_bvds[0] points to beginning of buffer,
1152 see alloc_other_bvds()
1153 */
1154 free(v->other_bvds[0]);
1155 free(v);
1156 }
1157 while (ddf->dlist) {
1158 struct dl *d = ddf->dlist;
1159 ddf->dlist = d->next;
1160 if (d->fd >= 0)
1161 close(d->fd);
1162 if (d->spare)
1163 free(d->spare);
1164 free(d);
1165 }
1166 while (ddf->add_list) {
1167 struct dl *d = ddf->add_list;
1168 ddf->add_list = d->next;
1169 if (d->fd >= 0)
1170 close(d->fd);
1171 if (d->spare)
1172 free(d->spare);
1173 free(d);
1174 }
1175 free(ddf);
1176 st->sb = NULL;
1177 }
1178
1179 static struct supertype *match_metadata_desc_ddf(char *arg)
1180 {
1181 /* 'ddf' only support containers */
1182 struct supertype *st;
1183 if (strcmp(arg, "ddf") != 0 &&
1184 strcmp(arg, "default") != 0
1185 )
1186 return NULL;
1187
1188 st = xcalloc(1, sizeof(*st));
1189 st->ss = &super_ddf;
1190 st->max_devs = 512;
1191 st->minor_version = 0;
1192 st->sb = NULL;
1193 return st;
1194 }
1195
1196 #ifndef MDASSEMBLE
1197
1198 static mapping_t ddf_state[] = {
1199 { "Optimal", 0},
1200 { "Degraded", 1},
1201 { "Deleted", 2},
1202 { "Missing", 3},
1203 { "Failed", 4},
1204 { "Partially Optimal", 5},
1205 { "-reserved-", 6},
1206 { "-reserved-", 7},
1207 { NULL, 0}
1208 };
1209
1210 static mapping_t ddf_init_state[] = {
1211 { "Not Initialised", 0},
1212 { "QuickInit in Progress", 1},
1213 { "Fully Initialised", 2},
1214 { "*UNKNOWN*", 3},
1215 { NULL, 0}
1216 };
1217 static mapping_t ddf_access[] = {
1218 { "Read/Write", 0},
1219 { "Reserved", 1},
1220 { "Read Only", 2},
1221 { "Blocked (no access)", 3},
1222 { NULL ,0}
1223 };
1224
1225 static mapping_t ddf_level[] = {
1226 { "RAID0", DDF_RAID0},
1227 { "RAID1", DDF_RAID1},
1228 { "RAID3", DDF_RAID3},
1229 { "RAID4", DDF_RAID4},
1230 { "RAID5", DDF_RAID5},
1231 { "RAID1E",DDF_RAID1E},
1232 { "JBOD", DDF_JBOD},
1233 { "CONCAT",DDF_CONCAT},
1234 { "RAID5E",DDF_RAID5E},
1235 { "RAID5EE",DDF_RAID5EE},
1236 { "RAID6", DDF_RAID6},
1237 { NULL, 0}
1238 };
1239 static mapping_t ddf_sec_level[] = {
1240 { "Striped", DDF_2STRIPED},
1241 { "Mirrored", DDF_2MIRRORED},
1242 { "Concat", DDF_2CONCAT},
1243 { "Spanned", DDF_2SPANNED},
1244 { NULL, 0}
1245 };
1246 #endif
1247
1248 static int all_ff(const char *guid)
1249 {
1250 int i;
1251 for (i = 0; i < DDF_GUID_LEN; i++)
1252 if (guid[i] != (char)0xff)
1253 return 0;
1254 return 1;
1255 }
1256
1257 #ifndef MDASSEMBLE
1258 static void print_guid(char *guid, int tstamp)
1259 {
1260 /* A GUIDs are part (or all) ASCII and part binary.
1261 * They tend to be space padded.
1262 * We print the GUID in HEX, then in parentheses add
1263 * any initial ASCII sequence, and a possible
1264 * time stamp from bytes 16-19
1265 */
1266 int l = DDF_GUID_LEN;
1267 int i;
1268
1269 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1270 if ((i&3)==0 && i != 0) printf(":");
1271 printf("%02X", guid[i]&255);
1272 }
1273
1274 printf("\n (");
1275 while (l && guid[l-1] == ' ')
1276 l--;
1277 for (i=0 ; i<l ; i++) {
1278 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1279 fputc(guid[i], stdout);
1280 else
1281 break;
1282 }
1283 if (tstamp) {
1284 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1285 char tbuf[100];
1286 struct tm *tm;
1287 tm = localtime(&then);
1288 strftime(tbuf, 100, " %D %T",tm);
1289 fputs(tbuf, stdout);
1290 }
1291 printf(")");
1292 }
1293
1294 static void examine_vd(int n, struct ddf_super *sb, char *guid)
1295 {
1296 int crl = sb->conf_rec_len;
1297 struct vcl *vcl;
1298
1299 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
1300 unsigned int i;
1301 struct vd_config *vc = &vcl->conf;
1302
1303 if (calc_crc(vc, crl*512) != vc->crc)
1304 continue;
1305 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1306 continue;
1307
1308 /* Ok, we know about this VD, let's give more details */
1309 printf(" Raid Devices[%d] : %d (", n,
1310 __be16_to_cpu(vc->prim_elmnt_count));
1311 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
1312 int j;
1313 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1314 for (j=0; j<cnt; j++)
1315 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1316 break;
1317 if (i) printf(" ");
1318 if (j < cnt)
1319 printf("%d", j);
1320 else
1321 printf("--");
1322 }
1323 printf(")\n");
1324 if (vc->chunk_shift != 255)
1325 printf(" Chunk Size[%d] : %d sectors\n", n,
1326 1 << vc->chunk_shift);
1327 printf(" Raid Level[%d] : %s\n", n,
1328 map_num(ddf_level, vc->prl)?:"-unknown-");
1329 if (vc->sec_elmnt_count != 1) {
1330 printf(" Secondary Position[%d] : %d of %d\n", n,
1331 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1332 printf(" Secondary Level[%d] : %s\n", n,
1333 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1334 }
1335 printf(" Device Size[%d] : %llu\n", n,
1336 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
1337 printf(" Array Size[%d] : %llu\n", n,
1338 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
1339 }
1340 }
1341
1342 static void examine_vds(struct ddf_super *sb)
1343 {
1344 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
1345 unsigned int i;
1346 printf(" Virtual Disks : %d\n", cnt);
1347
1348 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
1349 struct virtual_entry *ve = &sb->virt->entries[i];
1350 if (all_ff(ve->guid))
1351 continue;
1352 printf("\n");
1353 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1354 printf("\n");
1355 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1356 printf(" state[%d] : %s, %s%s\n", i,
1357 map_num(ddf_state, ve->state & 7),
1358 (ve->state & 8) ? "Morphing, ": "",
1359 (ve->state & 16)? "Not Consistent" : "Consistent");
1360 printf(" init state[%d] : %s\n", i,
1361 map_num(ddf_init_state, ve->init_state&3));
1362 printf(" access[%d] : %s\n", i,
1363 map_num(ddf_access, (ve->init_state>>6) & 3));
1364 printf(" Name[%d] : %.16s\n", i, ve->name);
1365 examine_vd(i, sb, ve->guid);
1366 }
1367 if (cnt) printf("\n");
1368 }
1369
1370 static void examine_pds(struct ddf_super *sb)
1371 {
1372 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1373 int i;
1374 struct dl *dl;
1375 printf(" Physical Disks : %d\n", cnt);
1376 printf(" Number RefNo Size Device Type/State\n");
1377
1378 for (i=0 ; i<cnt ; i++) {
1379 struct phys_disk_entry *pd = &sb->phys->entries[i];
1380 int type = __be16_to_cpu(pd->type);
1381 int state = __be16_to_cpu(pd->state);
1382
1383 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1384 //printf("\n");
1385 printf(" %3d %08x ", i,
1386 __be32_to_cpu(pd->refnum));
1387 printf("%8lluK ",
1388 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
1389 for (dl = sb->dlist; dl ; dl = dl->next) {
1390 if (dl->disk.refnum == pd->refnum) {
1391 char *dv = map_dev(dl->major, dl->minor, 0);
1392 if (dv) {
1393 printf("%-15s", dv);
1394 break;
1395 }
1396 }
1397 }
1398 if (!dl)
1399 printf("%15s","");
1400 printf(" %s%s%s%s%s",
1401 (type&2) ? "active":"",
1402 (type&4) ? "Global-Spare":"",
1403 (type&8) ? "spare" : "",
1404 (type&16)? ", foreign" : "",
1405 (type&32)? "pass-through" : "");
1406 if (state & DDF_Failed)
1407 /* This over-rides these three */
1408 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
1409 printf("/%s%s%s%s%s%s%s",
1410 (state&1)? "Online": "Offline",
1411 (state&2)? ", Failed": "",
1412 (state&4)? ", Rebuilding": "",
1413 (state&8)? ", in-transition": "",
1414 (state&16)? ", SMART-errors": "",
1415 (state&32)? ", Unrecovered-Read-Errors": "",
1416 (state&64)? ", Missing" : "");
1417 printf("\n");
1418 }
1419 }
1420
1421 static void examine_super_ddf(struct supertype *st, char *homehost)
1422 {
1423 struct ddf_super *sb = st->sb;
1424
1425 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1426 printf(" Version : %.8s\n", sb->anchor.revision);
1427 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1428 printf("\n");
1429 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
1430 printf("\n");
1431 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1432 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1433 ?"yes" : "no");
1434 examine_vds(sb);
1435 examine_pds(sb);
1436 }
1437
1438 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
1439
1440 static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
1441 static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
1442
1443 static unsigned int get_vd_num_of_subarray(struct supertype *st)
1444 {
1445 /*
1446 * Figure out the VD number for this supertype.
1447 * Returns DDF_CONTAINER for the container itself,
1448 * and DDF_NOTFOUND on error.
1449 */
1450 struct ddf_super *ddf = st->sb;
1451 struct mdinfo *sra;
1452 char *sub, *end;
1453 unsigned int vcnum;
1454
1455 if (*st->container_devnm == '\0')
1456 return DDF_CONTAINER;
1457
1458 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1459 if (!sra || sra->array.major_version != -1 ||
1460 sra->array.minor_version != -2 ||
1461 !is_subarray(sra->text_version))
1462 return DDF_NOTFOUND;
1463
1464 sub = strchr(sra->text_version + 1, '/');
1465 if (sub != NULL)
1466 vcnum = strtoul(sub + 1, &end, 10);
1467 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1468 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1469 return DDF_NOTFOUND;
1470
1471 return vcnum;
1472 }
1473
1474 static void brief_examine_super_ddf(struct supertype *st, int verbose)
1475 {
1476 /* We just write a generic DDF ARRAY entry
1477 */
1478 struct mdinfo info;
1479 char nbuf[64];
1480 getinfo_super_ddf(st, &info, NULL);
1481 fname_from_uuid(st, &info, nbuf, ':');
1482
1483 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1484 }
1485
1486 static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
1487 {
1488 /* We just write a generic DDF ARRAY entry
1489 */
1490 struct ddf_super *ddf = st->sb;
1491 struct mdinfo info;
1492 unsigned int i;
1493 char nbuf[64];
1494 getinfo_super_ddf(st, &info, NULL);
1495 fname_from_uuid(st, &info, nbuf, ':');
1496
1497 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1498 struct virtual_entry *ve = &ddf->virt->entries[i];
1499 struct vcl vcl;
1500 char nbuf1[64];
1501 if (all_ff(ve->guid))
1502 continue;
1503 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1504 ddf->currentconf =&vcl;
1505 uuid_from_super_ddf(st, info.uuid);
1506 fname_from_uuid(st, &info, nbuf1, ':');
1507 printf("ARRAY container=%s member=%d UUID=%s\n",
1508 nbuf+5, i, nbuf1+5);
1509 }
1510 }
1511
1512 static void export_examine_super_ddf(struct supertype *st)
1513 {
1514 struct mdinfo info;
1515 char nbuf[64];
1516 getinfo_super_ddf(st, &info, NULL);
1517 fname_from_uuid(st, &info, nbuf, ':');
1518 printf("MD_METADATA=ddf\n");
1519 printf("MD_LEVEL=container\n");
1520 printf("MD_UUID=%s\n", nbuf+5);
1521 }
1522
1523 static int copy_metadata_ddf(struct supertype *st, int from, int to)
1524 {
1525 void *buf;
1526 unsigned long long dsize, offset;
1527 int bytes;
1528 struct ddf_header *ddf;
1529 int written = 0;
1530
1531 /* The meta consists of an anchor, a primary, and a secondary.
1532 * This all lives at the end of the device.
1533 * So it is easiest to find the earliest of primary and
1534 * secondary, and copy everything from there.
1535 *
1536 * Anchor is 512 from end It contains primary_lba and secondary_lba
1537 * we choose one of those
1538 */
1539
1540 if (posix_memalign(&buf, 4096, 4096) != 0)
1541 return 1;
1542
1543 if (!get_dev_size(from, NULL, &dsize))
1544 goto err;
1545
1546 if (lseek64(from, dsize-512, 0) < 0)
1547 goto err;
1548 if (read(from, buf, 512) != 512)
1549 goto err;
1550 ddf = buf;
1551 if (ddf->magic != DDF_HEADER_MAGIC ||
1552 calc_crc(ddf, 512) != ddf->crc ||
1553 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1554 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1555 goto err;
1556
1557 offset = dsize - 512;
1558 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1559 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1560 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1561 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1562
1563 bytes = dsize - offset;
1564
1565 if (lseek64(from, offset, 0) < 0 ||
1566 lseek64(to, offset, 0) < 0)
1567 goto err;
1568 while (written < bytes) {
1569 int n = bytes - written;
1570 if (n > 4096)
1571 n = 4096;
1572 if (read(from, buf, n) != n)
1573 goto err;
1574 if (write(to, buf, n) != n)
1575 goto err;
1576 written += n;
1577 }
1578 free(buf);
1579 return 0;
1580 err:
1581 free(buf);
1582 return 1;
1583 }
1584
1585 static void detail_super_ddf(struct supertype *st, char *homehost)
1586 {
1587 /* FIXME later
1588 * Could print DDF GUID
1589 * Need to find which array
1590 * If whole, briefly list all arrays
1591 * If one, give name
1592 */
1593 }
1594
1595 static void brief_detail_super_ddf(struct supertype *st)
1596 {
1597 struct mdinfo info;
1598 char nbuf[64];
1599 struct ddf_super *ddf = st->sb;
1600 unsigned int vcnum = get_vd_num_of_subarray(st);
1601 if (vcnum == DDF_CONTAINER)
1602 uuid_from_super_ddf(st, info.uuid);
1603 else if (vcnum == DDF_NOTFOUND)
1604 return;
1605 else
1606 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
1607 fname_from_uuid(st, &info, nbuf,':');
1608 printf(" UUID=%s", nbuf + 5);
1609 }
1610 #endif
1611
1612 static int match_home_ddf(struct supertype *st, char *homehost)
1613 {
1614 /* It matches 'this' host if the controller is a
1615 * Linux-MD controller with vendor_data matching
1616 * the hostname
1617 */
1618 struct ddf_super *ddf = st->sb;
1619 unsigned int len;
1620
1621 if (!homehost)
1622 return 0;
1623 len = strlen(homehost);
1624
1625 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1626 len < sizeof(ddf->controller.vendor_data) &&
1627 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1628 ddf->controller.vendor_data[len] == 0);
1629 }
1630
1631 #ifndef MDASSEMBLE
1632 static int find_index_in_bvd(const struct ddf_super *ddf,
1633 const struct vd_config *conf, unsigned int n,
1634 unsigned int *n_bvd)
1635 {
1636 /*
1637 * Find the index of the n-th valid physical disk in this BVD
1638 */
1639 unsigned int i, j;
1640 for (i = 0, j = 0; i < ddf->mppe &&
1641 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1642 if (conf->phys_refnum[i] != 0xffffffff) {
1643 if (n == j) {
1644 *n_bvd = i;
1645 return 1;
1646 }
1647 j++;
1648 }
1649 }
1650 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1651 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1652 return 0;
1653 }
1654
1655 static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1656 unsigned int n,
1657 unsigned int *n_bvd, struct vcl **vcl)
1658 {
1659 struct vcl *v;
1660
1661 for (v = ddf->conflist; v; v = v->next) {
1662 unsigned int nsec, ibvd;
1663 struct vd_config *conf;
1664 if (inst != v->vcnum)
1665 continue;
1666 conf = &v->conf;
1667 if (conf->sec_elmnt_count == 1) {
1668 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1669 *vcl = v;
1670 return conf;
1671 } else
1672 goto bad;
1673 }
1674 if (v->other_bvds == NULL) {
1675 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1676 __func__, conf->sec_elmnt_count);
1677 goto bad;
1678 }
1679 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1680 if (conf->sec_elmnt_seq != nsec) {
1681 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
1682 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1683 == nsec)
1684 break;
1685 }
1686 if (ibvd == conf->sec_elmnt_count)
1687 goto bad;
1688 conf = v->other_bvds[ibvd-1];
1689 }
1690 if (!find_index_in_bvd(ddf, conf,
1691 n - nsec*conf->sec_elmnt_count, n_bvd))
1692 goto bad;
1693 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1694 , __func__, n, *n_bvd, ibvd-1, inst);
1695 *vcl = v;
1696 return conf;
1697 }
1698 bad:
1699 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
1700 return NULL;
1701 }
1702 #endif
1703
1704 static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
1705 {
1706 /* Find the entry in phys_disk which has the given refnum
1707 * and return it's index
1708 */
1709 unsigned int i;
1710 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
1711 if (ddf->phys->entries[i].refnum == phys_refnum)
1712 return i;
1713 return -1;
1714 }
1715
1716 static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1717 {
1718 char buf[20];
1719 struct sha1_ctx ctx;
1720 sha1_init_ctx(&ctx);
1721 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1722 sha1_finish_ctx(&ctx, buf);
1723 memcpy(uuid, buf, 4*4);
1724 }
1725
1726 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1727 {
1728 /* The uuid returned here is used for:
1729 * uuid to put into bitmap file (Create, Grow)
1730 * uuid for backup header when saving critical section (Grow)
1731 * comparing uuids when re-adding a device into an array
1732 * In these cases the uuid required is that of the data-array,
1733 * not the device-set.
1734 * uuid to recognise same set when adding a missing device back
1735 * to an array. This is a uuid for the device-set.
1736 *
1737 * For each of these we can make do with a truncated
1738 * or hashed uuid rather than the original, as long as
1739 * everyone agrees.
1740 * In the case of SVD we assume the BVD is of interest,
1741 * though that might be the case if a bitmap were made for
1742 * a mirrored SVD - worry about that later.
1743 * So we need to find the VD configuration record for the
1744 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1745 * The first 16 bytes of the sha1 of these is used.
1746 */
1747 struct ddf_super *ddf = st->sb;
1748 struct vcl *vcl = ddf->currentconf;
1749 char *guid;
1750
1751 if (vcl)
1752 guid = vcl->conf.guid;
1753 else
1754 guid = ddf->anchor.guid;
1755 uuid_from_ddf_guid(guid, uuid);
1756 }
1757
1758 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
1759
1760 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
1761 {
1762 struct ddf_super *ddf = st->sb;
1763 int map_disks = info->array.raid_disks;
1764 __u32 *cptr;
1765
1766 if (ddf->currentconf) {
1767 getinfo_super_ddf_bvd(st, info, map);
1768 return;
1769 }
1770 memset(info, 0, sizeof(*info));
1771
1772 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1773 info->array.level = LEVEL_CONTAINER;
1774 info->array.layout = 0;
1775 info->array.md_minor = -1;
1776 cptr = (__u32 *)(ddf->anchor.guid + 16);
1777 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1778
1779 info->array.utime = 0;
1780 info->array.chunk_size = 0;
1781 info->container_enough = 1;
1782
1783 info->disk.major = 0;
1784 info->disk.minor = 0;
1785 if (ddf->dlist) {
1786 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
1787 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
1788
1789 info->data_offset = __be64_to_cpu(ddf->phys->
1790 entries[info->disk.raid_disk].
1791 config_size);
1792 info->component_size = ddf->dlist->size - info->data_offset;
1793 } else {
1794 info->disk.number = -1;
1795 info->disk.raid_disk = -1;
1796 // info->disk.raid_disk = find refnum in the table and use index;
1797 }
1798 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
1799
1800 info->recovery_start = MaxSector;
1801 info->reshape_active = 0;
1802 info->recovery_blocked = 0;
1803 info->name[0] = 0;
1804
1805 info->array.major_version = -1;
1806 info->array.minor_version = -2;
1807 strcpy(info->text_version, "ddf");
1808 info->safe_mode_delay = 0;
1809
1810 uuid_from_super_ddf(st, info->uuid);
1811
1812 if (map) {
1813 int i;
1814 for (i = 0 ; i < map_disks; i++) {
1815 if (i < info->array.raid_disks &&
1816 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1817 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1818 map[i] = 1;
1819 else
1820 map[i] = 0;
1821 }
1822 }
1823 }
1824
1825 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
1826 {
1827 struct ddf_super *ddf = st->sb;
1828 struct vcl *vc = ddf->currentconf;
1829 int cd = ddf->currentdev;
1830 int j;
1831 struct dl *dl;
1832 int map_disks = info->array.raid_disks;
1833 __u32 *cptr;
1834
1835 memset(info, 0, sizeof(*info));
1836 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1837 return;
1838 info->array.md_minor = -1;
1839 cptr = (__u32 *)(vc->conf.guid + 16);
1840 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1841 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1842 info->array.chunk_size = 512 << vc->conf.chunk_shift;
1843 info->custom_array_size = 0;
1844
1845 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
1846 info->data_offset =
1847 __be64_to_cpu(LBA_OFFSET(ddf, &vc->conf)[cd]);
1848 if (vc->block_sizes)
1849 info->component_size = vc->block_sizes[cd];
1850 else
1851 info->component_size = __be64_to_cpu(vc->conf.blocks);
1852 }
1853
1854 for (dl = ddf->dlist; dl ; dl = dl->next)
1855 if (dl->raiddisk == ddf->currentdev)
1856 break;
1857
1858 info->disk.major = 0;
1859 info->disk.minor = 0;
1860 info->disk.state = 0;
1861 if (dl) {
1862 info->disk.major = dl->major;
1863 info->disk.minor = dl->minor;
1864 info->disk.raid_disk = dl->raiddisk;
1865 info->disk.number = dl->pdnum;
1866 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
1867 }
1868
1869 info->container_member = ddf->currentconf->vcnum;
1870
1871 info->recovery_start = MaxSector;
1872 info->resync_start = 0;
1873 info->reshape_active = 0;
1874 info->recovery_blocked = 0;
1875 if (!(ddf->virt->entries[info->container_member].state
1876 & DDF_state_inconsistent) &&
1877 (ddf->virt->entries[info->container_member].init_state
1878 & DDF_initstate_mask)
1879 == DDF_init_full)
1880 info->resync_start = MaxSector;
1881
1882 uuid_from_super_ddf(st, info->uuid);
1883
1884 info->array.major_version = -1;
1885 info->array.minor_version = -2;
1886 sprintf(info->text_version, "/%s/%d",
1887 st->container_devnm,
1888 info->container_member);
1889 info->safe_mode_delay = 200;
1890
1891 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1892 info->name[16]=0;
1893 for(j=0; j<16; j++)
1894 if (info->name[j] == ' ')
1895 info->name[j] = 0;
1896
1897 if (map)
1898 for (j = 0; j < map_disks; j++) {
1899 map[j] = 0;
1900 if (j < info->array.raid_disks) {
1901 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
1902 if (i >= 0 &&
1903 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1904 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1905 map[i] = 1;
1906 }
1907 }
1908 }
1909
1910 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1911 char *update,
1912 char *devname, int verbose,
1913 int uuid_set, char *homehost)
1914 {
1915 /* For 'assemble' and 'force' we need to return non-zero if any
1916 * change was made. For others, the return value is ignored.
1917 * Update options are:
1918 * force-one : This device looks a bit old but needs to be included,
1919 * update age info appropriately.
1920 * assemble: clear any 'faulty' flag to allow this device to
1921 * be assembled.
1922 * force-array: Array is degraded but being forced, mark it clean
1923 * if that will be needed to assemble it.
1924 *
1925 * newdev: not used ????
1926 * grow: Array has gained a new device - this is currently for
1927 * linear only
1928 * resync: mark as dirty so a resync will happen.
1929 * uuid: Change the uuid of the array to match what is given
1930 * homehost: update the recorded homehost
1931 * name: update the name - preserving the homehost
1932 * _reshape_progress: record new reshape_progress position.
1933 *
1934 * Following are not relevant for this version:
1935 * sparc2.2 : update from old dodgey metadata
1936 * super-minor: change the preferred_minor number
1937 * summaries: update redundant counters.
1938 */
1939 int rv = 0;
1940 // struct ddf_super *ddf = st->sb;
1941 // struct vd_config *vd = find_vdcr(ddf, info->container_member);
1942 // struct virtual_entry *ve = find_ve(ddf);
1943
1944 /* we don't need to handle "force-*" or "assemble" as
1945 * there is no need to 'trick' the kernel. We the metadata is
1946 * first updated to activate the array, all the implied modifications
1947 * will just happen.
1948 */
1949
1950 if (strcmp(update, "grow") == 0) {
1951 /* FIXME */
1952 } else if (strcmp(update, "resync") == 0) {
1953 // info->resync_checkpoint = 0;
1954 } else if (strcmp(update, "homehost") == 0) {
1955 /* homehost is stored in controller->vendor_data,
1956 * or it is when we are the vendor
1957 */
1958 // if (info->vendor_is_local)
1959 // strcpy(ddf->controller.vendor_data, homehost);
1960 rv = -1;
1961 } else if (strcmp(update, "name") == 0) {
1962 /* name is stored in virtual_entry->name */
1963 // memset(ve->name, ' ', 16);
1964 // strncpy(ve->name, info->name, 16);
1965 rv = -1;
1966 } else if (strcmp(update, "_reshape_progress") == 0) {
1967 /* We don't support reshape yet */
1968 } else if (strcmp(update, "assemble") == 0 ) {
1969 /* Do nothing, just succeed */
1970 rv = 0;
1971 } else
1972 rv = -1;
1973
1974 // update_all_csum(ddf);
1975
1976 return rv;
1977 }
1978
1979 static void make_header_guid(char *guid)
1980 {
1981 __u32 stamp;
1982 /* Create a DDF Header of Virtual Disk GUID */
1983
1984 /* 24 bytes of fiction required.
1985 * first 8 are a 'vendor-id' - "Linux-MD"
1986 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
1987 * Remaining 8 random number plus timestamp
1988 */
1989 memcpy(guid, T10, sizeof(T10));
1990 stamp = __cpu_to_be32(0xdeadbeef);
1991 memcpy(guid+8, &stamp, 4);
1992 stamp = __cpu_to_be32(0);
1993 memcpy(guid+12, &stamp, 4);
1994 stamp = __cpu_to_be32(time(0) - DECADE);
1995 memcpy(guid+16, &stamp, 4);
1996 stamp = random32();
1997 memcpy(guid+20, &stamp, 4);
1998 }
1999
2000 static unsigned int find_unused_vde(const struct ddf_super *ddf)
2001 {
2002 unsigned int i;
2003 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2004 if (all_ff(ddf->virt->entries[i].guid))
2005 return i;
2006 }
2007 return DDF_NOTFOUND;
2008 }
2009
2010 static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2011 const char *name)
2012 {
2013 unsigned int i;
2014 if (name == NULL)
2015 return DDF_NOTFOUND;
2016 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2017 if (all_ff(ddf->virt->entries[i].guid))
2018 continue;
2019 if (!strncmp(name, ddf->virt->entries[i].name,
2020 sizeof(ddf->virt->entries[i].name)))
2021 return i;
2022 }
2023 return DDF_NOTFOUND;
2024 }
2025
2026 static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2027 const char *guid)
2028 {
2029 unsigned int i;
2030 if (guid == NULL || all_ff(guid))
2031 return DDF_NOTFOUND;
2032 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2033 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2034 return i;
2035 return DDF_NOTFOUND;
2036 }
2037
2038 static int init_super_ddf_bvd(struct supertype *st,
2039 mdu_array_info_t *info,
2040 unsigned long long size,
2041 char *name, char *homehost,
2042 int *uuid, unsigned long long data_offset);
2043
2044 static int init_super_ddf(struct supertype *st,
2045 mdu_array_info_t *info,
2046 unsigned long long size, char *name, char *homehost,
2047 int *uuid, unsigned long long data_offset)
2048 {
2049 /* This is primarily called by Create when creating a new array.
2050 * We will then get add_to_super called for each component, and then
2051 * write_init_super called to write it out to each device.
2052 * For DDF, Create can create on fresh devices or on a pre-existing
2053 * array.
2054 * To create on a pre-existing array a different method will be called.
2055 * This one is just for fresh drives.
2056 *
2057 * We need to create the entire 'ddf' structure which includes:
2058 * DDF headers - these are easy.
2059 * Controller data - a Sector describing this controller .. not that
2060 * this is a controller exactly.
2061 * Physical Disk Record - one entry per device, so
2062 * leave plenty of space.
2063 * Virtual Disk Records - again, just leave plenty of space.
2064 * This just lists VDs, doesn't give details
2065 * Config records - describes the VDs that use this disk
2066 * DiskData - describes 'this' device.
2067 * BadBlockManagement - empty
2068 * Diag Space - empty
2069 * Vendor Logs - Could we put bitmaps here?
2070 *
2071 */
2072 struct ddf_super *ddf;
2073 char hostname[17];
2074 int hostlen;
2075 int max_phys_disks, max_virt_disks;
2076 unsigned long long sector;
2077 int clen;
2078 int i;
2079 int pdsize, vdsize;
2080 struct phys_disk *pd;
2081 struct virtual_disk *vd;
2082
2083 if (data_offset != INVALID_SECTORS) {
2084 pr_err("data-offset not supported by DDF\n");
2085 return 0;
2086 }
2087
2088 if (st->sb)
2089 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2090 data_offset);
2091
2092 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
2093 pr_err("%s could not allocate superblock\n", __func__);
2094 return 0;
2095 }
2096 memset(ddf, 0, sizeof(*ddf));
2097 ddf->dlist = NULL; /* no physical disks yet */
2098 ddf->conflist = NULL; /* No virtual disks yet */
2099 st->sb = ddf;
2100
2101 if (info == NULL) {
2102 /* zeroing superblock */
2103 return 0;
2104 }
2105
2106 /* At least 32MB *must* be reserved for the ddf. So let's just
2107 * start 32MB from the end, and put the primary header there.
2108 * Don't do secondary for now.
2109 * We don't know exactly where that will be yet as it could be
2110 * different on each device. To just set up the lengths.
2111 *
2112 */
2113
2114 ddf->anchor.magic = DDF_HEADER_MAGIC;
2115 make_header_guid(ddf->anchor.guid);
2116
2117 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
2118 ddf->anchor.seq = __cpu_to_be32(1);
2119 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2120 ddf->anchor.openflag = 0xFF;
2121 ddf->anchor.foreignflag = 0;
2122 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2123 ddf->anchor.pad0 = 0xff;
2124 memset(ddf->anchor.pad1, 0xff, 12);
2125 memset(ddf->anchor.header_ext, 0xff, 32);
2126 ddf->anchor.primary_lba = ~(__u64)0;
2127 ddf->anchor.secondary_lba = ~(__u64)0;
2128 ddf->anchor.type = DDF_HEADER_ANCHOR;
2129 memset(ddf->anchor.pad2, 0xff, 3);
2130 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2131 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2132 of 32M reserved.. */
2133 max_phys_disks = 1023; /* Should be enough */
2134 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2135 max_virt_disks = 255;
2136 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2137 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2138 ddf->max_part = 64;
2139 ddf->mppe = 256;
2140 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2141 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2142 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
2143 memset(ddf->anchor.pad3, 0xff, 54);
2144 /* controller sections is one sector long immediately
2145 * after the ddf header */
2146 sector = 1;
2147 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2148 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2149 sector += 1;
2150
2151 /* phys is 8 sectors after that */
2152 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2153 sizeof(struct phys_disk_entry)*max_phys_disks,
2154 512);
2155 switch(pdsize/512) {
2156 case 2: case 8: case 32: case 128: case 512: break;
2157 default: abort();
2158 }
2159 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2160 ddf->anchor.phys_section_length =
2161 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2162 sector += pdsize/512;
2163
2164 /* virt is another 32 sectors */
2165 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2166 sizeof(struct virtual_entry) * max_virt_disks,
2167 512);
2168 switch(vdsize/512) {
2169 case 2: case 8: case 32: case 128: case 512: break;
2170 default: abort();
2171 }
2172 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2173 ddf->anchor.virt_section_length =
2174 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2175 sector += vdsize/512;
2176
2177 clen = ddf->conf_rec_len * (ddf->max_part+1);
2178 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2179 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2180 sector += clen;
2181
2182 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2183 ddf->anchor.data_section_length = __cpu_to_be32(1);
2184 sector += 1;
2185
2186 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2187 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2188 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2189 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2190 ddf->anchor.vendor_length = __cpu_to_be32(0);
2191 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2192
2193 memset(ddf->anchor.pad4, 0xff, 256);
2194
2195 memcpy(&ddf->primary, &ddf->anchor, 512);
2196 memcpy(&ddf->secondary, &ddf->anchor, 512);
2197
2198 ddf->primary.openflag = 1; /* I guess.. */
2199 ddf->primary.type = DDF_HEADER_PRIMARY;
2200
2201 ddf->secondary.openflag = 1; /* I guess.. */
2202 ddf->secondary.type = DDF_HEADER_SECONDARY;
2203
2204 ddf->active = &ddf->primary;
2205
2206 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2207
2208 /* 24 more bytes of fiction required.
2209 * first 8 are a 'vendor-id' - "Linux-MD"
2210 * Remaining 16 are serial number.... maybe a hostname would do?
2211 */
2212 memcpy(ddf->controller.guid, T10, sizeof(T10));
2213 gethostname(hostname, sizeof(hostname));
2214 hostname[sizeof(hostname) - 1] = 0;
2215 hostlen = strlen(hostname);
2216 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2217 for (i = strlen(T10) ; i+hostlen < 24; i++)
2218 ddf->controller.guid[i] = ' ';
2219
2220 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2221 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2222 ddf->controller.type.sub_vendor_id = 0;
2223 ddf->controller.type.sub_device_id = 0;
2224 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2225 memset(ddf->controller.pad, 0xff, 8);
2226 memset(ddf->controller.vendor_data, 0xff, 448);
2227 if (homehost && strlen(homehost) < 440)
2228 strcpy((char*)ddf->controller.vendor_data, homehost);
2229
2230 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
2231 pr_err("%s could not allocate pd\n", __func__);
2232 return 0;
2233 }
2234 ddf->phys = pd;
2235 ddf->pdsize = pdsize;
2236
2237 memset(pd, 0xff, pdsize);
2238 memset(pd, 0, sizeof(*pd));
2239 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2240 pd->used_pdes = __cpu_to_be16(0);
2241 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2242 memset(pd->pad, 0xff, 52);
2243
2244 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
2245 pr_err("%s could not allocate vd\n", __func__);
2246 return 0;
2247 }
2248 ddf->virt = vd;
2249 ddf->vdsize = vdsize;
2250 memset(vd, 0, vdsize);
2251 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2252 vd->populated_vdes = __cpu_to_be16(0);
2253 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2254 memset(vd->pad, 0xff, 52);
2255
2256 for (i=0; i<max_virt_disks; i++)
2257 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2258
2259 st->sb = ddf;
2260 ddf_set_updates_pending(ddf);
2261 return 1;
2262 }
2263
2264 static int chunk_to_shift(int chunksize)
2265 {
2266 return ffs(chunksize/512)-1;
2267 }
2268
2269 #ifndef MDASSEMBLE
2270 struct extent {
2271 unsigned long long start, size;
2272 };
2273 static int cmp_extent(const void *av, const void *bv)
2274 {
2275 const struct extent *a = av;
2276 const struct extent *b = bv;
2277 if (a->start < b->start)
2278 return -1;
2279 if (a->start > b->start)
2280 return 1;
2281 return 0;
2282 }
2283
2284 static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
2285 {
2286 /* find a list of used extents on the give physical device
2287 * (dnum) of the given ddf.
2288 * Return a malloced array of 'struct extent'
2289
2290 * FIXME ignore DDF_Legacy devices?
2291
2292 */
2293 struct extent *rv;
2294 int n = 0;
2295 unsigned int i, j;
2296
2297 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
2298
2299 for (i = 0; i < ddf->max_part; i++) {
2300 struct vcl *v = dl->vlist[i];
2301 if (v == NULL)
2302 continue;
2303 for (j = 0; j < v->conf.prim_elmnt_count; j++)
2304 if (v->conf.phys_refnum[j] == dl->disk.refnum) {
2305 /* This device plays role 'j' in 'v'. */
2306 rv[n].start = __be64_to_cpu(
2307 LBA_OFFSET(ddf, &v->conf)[j]);
2308 rv[n].size = __be64_to_cpu(v->conf.blocks);
2309 n++;
2310 break;
2311 }
2312 }
2313 qsort(rv, n, sizeof(*rv), cmp_extent);
2314
2315 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2316 rv[n].size = 0;
2317 return rv;
2318 }
2319 #endif
2320
2321 static int init_super_ddf_bvd(struct supertype *st,
2322 mdu_array_info_t *info,
2323 unsigned long long size,
2324 char *name, char *homehost,
2325 int *uuid, unsigned long long data_offset)
2326 {
2327 /* We are creating a BVD inside a pre-existing container.
2328 * so st->sb is already set.
2329 * We need to create a new vd_config and a new virtual_entry
2330 */
2331 struct ddf_super *ddf = st->sb;
2332 unsigned int venum, i;
2333 struct virtual_entry *ve;
2334 struct vcl *vcl;
2335 struct vd_config *vc;
2336
2337 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2338 pr_err("This ddf already has an array called %s\n", name);
2339 return 0;
2340 }
2341 venum = find_unused_vde(ddf);
2342 if (venum == DDF_NOTFOUND) {
2343 pr_err("Cannot find spare slot for virtual disk\n");
2344 return 0;
2345 }
2346 ve = &ddf->virt->entries[venum];
2347
2348 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2349 * timestamp, random number
2350 */
2351 make_header_guid(ve->guid);
2352 ve->unit = __cpu_to_be16(info->md_minor);
2353 ve->pad0 = 0xFFFF;
2354 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2355 ve->type = 0;
2356 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2357 if (info->state & 1) /* clean */
2358 ve->init_state = DDF_init_full;
2359 else
2360 ve->init_state = DDF_init_not;
2361
2362 memset(ve->pad1, 0xff, 14);
2363 memset(ve->name, ' ', 16);
2364 if (name)
2365 strncpy(ve->name, name, 16);
2366 ddf->virt->populated_vdes =
2367 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2368
2369 /* Now create a new vd_config */
2370 if (posix_memalign((void**)&vcl, 512,
2371 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
2372 pr_err("%s could not allocate vd_config\n", __func__);
2373 return 0;
2374 }
2375 vcl->vcnum = venum;
2376 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
2377 vc = &vcl->conf;
2378
2379 vc->magic = DDF_VD_CONF_MAGIC;
2380 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2381 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2382 vc->seqnum = __cpu_to_be32(1);
2383 memset(vc->pad0, 0xff, 24);
2384 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2385 if (layout_md2ddf(info, vc) == -1 ||
2386 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2387 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2388 __func__, info->level, info->layout, info->raid_disks);
2389 free(vcl);
2390 return 0;
2391 }
2392 vc->sec_elmnt_seq = 0;
2393 if (alloc_other_bvds(ddf, vcl) != 0) {
2394 pr_err("%s could not allocate other bvds\n",
2395 __func__);
2396 free(vcl);
2397 return 0;
2398 }
2399 vc->blocks = __cpu_to_be64(info->size * 2);
2400 vc->array_blocks = __cpu_to_be64(
2401 calc_array_size(info->level, info->raid_disks, info->layout,
2402 info->chunk_size, info->size*2));
2403 memset(vc->pad1, 0xff, 8);
2404 vc->spare_refs[0] = 0xffffffff;
2405 vc->spare_refs[1] = 0xffffffff;
2406 vc->spare_refs[2] = 0xffffffff;
2407 vc->spare_refs[3] = 0xffffffff;
2408 vc->spare_refs[4] = 0xffffffff;
2409 vc->spare_refs[5] = 0xffffffff;
2410 vc->spare_refs[6] = 0xffffffff;
2411 vc->spare_refs[7] = 0xffffffff;
2412 memset(vc->cache_pol, 0, 8);
2413 vc->bg_rate = 0x80;
2414 memset(vc->pad2, 0xff, 3);
2415 memset(vc->pad3, 0xff, 52);
2416 memset(vc->pad4, 0xff, 192);
2417 memset(vc->v0, 0xff, 32);
2418 memset(vc->v1, 0xff, 32);
2419 memset(vc->v2, 0xff, 16);
2420 memset(vc->v3, 0xff, 16);
2421 memset(vc->vendor, 0xff, 32);
2422
2423 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
2424 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
2425
2426 for (i = 1; i < vc->sec_elmnt_count; i++) {
2427 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2428 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2429 }
2430
2431 vcl->next = ddf->conflist;
2432 ddf->conflist = vcl;
2433 ddf->currentconf = vcl;
2434 ddf_set_updates_pending(ddf);
2435 return 1;
2436 }
2437
2438 #ifndef MDASSEMBLE
2439 static void add_to_super_ddf_bvd(struct supertype *st,
2440 mdu_disk_info_t *dk, int fd, char *devname)
2441 {
2442 /* fd and devname identify a device with-in the ddf container (st).
2443 * dk identifies a location in the new BVD.
2444 * We need to find suitable free space in that device and update
2445 * the phys_refnum and lba_offset for the newly created vd_config.
2446 * We might also want to update the type in the phys_disk
2447 * section.
2448 *
2449 * Alternately: fd == -1 and we have already chosen which device to
2450 * use and recorded in dlist->raid_disk;
2451 */
2452 struct dl *dl;
2453 struct ddf_super *ddf = st->sb;
2454 struct vd_config *vc;
2455 __u64 *lba_offset;
2456 unsigned int working;
2457 unsigned int i;
2458 unsigned long long blocks, pos, esize;
2459 struct extent *ex;
2460
2461 if (fd == -1) {
2462 for (dl = ddf->dlist; dl ; dl = dl->next)
2463 if (dl->raiddisk == dk->raid_disk)
2464 break;
2465 } else {
2466 for (dl = ddf->dlist; dl ; dl = dl->next)
2467 if (dl->major == dk->major &&
2468 dl->minor == dk->minor)
2469 break;
2470 }
2471 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2472 return;
2473
2474 vc = &ddf->currentconf->conf;
2475 lba_offset = LBA_OFFSET(ddf, &ddf->currentconf->conf);
2476
2477 ex = get_extents(ddf, dl);
2478 if (!ex)
2479 return;
2480
2481 i = 0; pos = 0;
2482 blocks = __be64_to_cpu(vc->blocks);
2483 if (ddf->currentconf->block_sizes)
2484 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
2485
2486 do {
2487 esize = ex[i].start - pos;
2488 if (esize >= blocks)
2489 break;
2490 pos = ex[i].start + ex[i].size;
2491 i++;
2492 } while (ex[i-1].size);
2493
2494 free(ex);
2495 if (esize < blocks)
2496 return;
2497
2498 ddf->currentdev = dk->raid_disk;
2499 vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
2500 lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
2501
2502 for (i = 0; i < ddf->max_part ; i++)
2503 if (dl->vlist[i] == NULL)
2504 break;
2505 if (i == ddf->max_part)
2506 return;
2507 dl->vlist[i] = ddf->currentconf;
2508
2509 if (fd >= 0)
2510 dl->fd = fd;
2511 if (devname)
2512 dl->devname = devname;
2513
2514 /* Check how many working raid_disks, and if we can mark
2515 * array as optimal yet
2516 */
2517 working = 0;
2518
2519 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
2520 if (vc->phys_refnum[i] != 0xffffffff)
2521 working++;
2522
2523 /* Find which virtual_entry */
2524 i = ddf->currentconf->vcnum;
2525 if (working == __be16_to_cpu(vc->prim_elmnt_count))
2526 ddf->virt->entries[i].state =
2527 (ddf->virt->entries[i].state & ~DDF_state_mask)
2528 | DDF_state_optimal;
2529
2530 if (vc->prl == DDF_RAID6 &&
2531 working+1 == __be16_to_cpu(vc->prim_elmnt_count))
2532 ddf->virt->entries[i].state =
2533 (ddf->virt->entries[i].state & ~DDF_state_mask)
2534 | DDF_state_part_optimal;
2535
2536 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2537 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
2538 ddf_set_updates_pending(ddf);
2539 }
2540
2541 /* add a device to a container, either while creating it or while
2542 * expanding a pre-existing container
2543 */
2544 static int add_to_super_ddf(struct supertype *st,
2545 mdu_disk_info_t *dk, int fd, char *devname,
2546 unsigned long long data_offset)
2547 {
2548 struct ddf_super *ddf = st->sb;
2549 struct dl *dd;
2550 time_t now;
2551 struct tm *tm;
2552 unsigned long long size;
2553 struct phys_disk_entry *pde;
2554 unsigned int n, i;
2555 struct stat stb;
2556 __u32 *tptr;
2557
2558 if (ddf->currentconf) {
2559 add_to_super_ddf_bvd(st, dk, fd, devname);
2560 return 0;
2561 }
2562
2563 /* This is device numbered dk->number. We need to create
2564 * a phys_disk entry and a more detailed disk_data entry.
2565 */
2566 fstat(fd, &stb);
2567 if (posix_memalign((void**)&dd, 512,
2568 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
2569 pr_err("%s could allocate buffer for new disk, aborting\n",
2570 __func__);
2571 return 1;
2572 }
2573 dd->major = major(stb.st_rdev);
2574 dd->minor = minor(stb.st_rdev);
2575 dd->devname = devname;
2576 dd->fd = fd;
2577 dd->spare = NULL;
2578
2579 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2580 now = time(0);
2581 tm = localtime(&now);
2582 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2583 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
2584 tptr = (__u32 *)(dd->disk.guid + 16);
2585 *tptr++ = random32();
2586 *tptr = random32();
2587
2588 do {
2589 /* Cannot be bothered finding a CRC of some irrelevant details*/
2590 dd->disk.refnum = random32();
2591 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2592 i > 0; i--)
2593 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
2594 break;
2595 } while (i > 0);
2596
2597 dd->disk.forced_ref = 1;
2598 dd->disk.forced_guid = 1;
2599 memset(dd->disk.vendor, ' ', 32);
2600 memcpy(dd->disk.vendor, "Linux", 5);
2601 memset(dd->disk.pad, 0xff, 442);
2602 for (i = 0; i < ddf->max_part ; i++)
2603 dd->vlist[i] = NULL;
2604
2605 n = __be16_to_cpu(ddf->phys->used_pdes);
2606 pde = &ddf->phys->entries[n];
2607 dd->pdnum = n;
2608
2609 if (st->update_tail) {
2610 int len = (sizeof(struct phys_disk) +
2611 sizeof(struct phys_disk_entry));
2612 struct phys_disk *pd;
2613
2614 pd = xmalloc(len);
2615 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2616 pd->used_pdes = __cpu_to_be16(n);
2617 pde = &pd->entries[0];
2618 dd->mdupdate = pd;
2619 } else {
2620 n++;
2621 ddf->phys->used_pdes = __cpu_to_be16(n);
2622 }
2623
2624 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2625 pde->refnum = dd->disk.refnum;
2626 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
2627 pde->state = __cpu_to_be16(DDF_Online);
2628 get_dev_size(fd, NULL, &size);
2629 /* We are required to reserve 32Meg, and record the size in sectors */
2630 pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
2631 sprintf(pde->path, "%17.17s","Information: nil") ;
2632 memset(pde->pad, 0xff, 6);
2633
2634 dd->size = size >> 9;
2635 if (st->update_tail) {
2636 dd->next = ddf->add_list;
2637 ddf->add_list = dd;
2638 } else {
2639 dd->next = ddf->dlist;
2640 ddf->dlist = dd;
2641 ddf_set_updates_pending(ddf);
2642 }
2643
2644 return 0;
2645 }
2646
2647 static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2648 {
2649 struct ddf_super *ddf = st->sb;
2650 struct dl *dl;
2651
2652 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2653 * disappeared from the container.
2654 * We need to arrange that it disappears from the metadata and
2655 * internal data structures too.
2656 * Most of the work is done by ddf_process_update which edits
2657 * the metadata and closes the file handle and attaches the memory
2658 * where free_updates will free it.
2659 */
2660 for (dl = ddf->dlist; dl ; dl = dl->next)
2661 if (dl->major == dk->major &&
2662 dl->minor == dk->minor)
2663 break;
2664 if (!dl)
2665 return -1;
2666
2667 if (st->update_tail) {
2668 int len = (sizeof(struct phys_disk) +
2669 sizeof(struct phys_disk_entry));
2670 struct phys_disk *pd;
2671
2672 pd = xmalloc(len);
2673 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2674 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2675 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2676 append_metadata_update(st, pd, len);
2677 }
2678 return 0;
2679 }
2680
2681 /*
2682 * This is the write_init_super method for a ddf container. It is
2683 * called when creating a container or adding another device to a
2684 * container.
2685 */
2686 #define NULL_CONF_SZ 4096
2687
2688 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
2689 __u32 refnum, unsigned int nmax,
2690 const struct vd_config **bvd,
2691 unsigned int *idx);
2692
2693 static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2694 char *null_aligned)
2695 {
2696 unsigned long long sector;
2697 struct ddf_header *header;
2698 int fd, i, n_config, conf_size;
2699 int ret = 0;
2700
2701 fd = d->fd;
2702
2703 switch (type) {
2704 case DDF_HEADER_PRIMARY:
2705 header = &ddf->primary;
2706 sector = __be64_to_cpu(header->primary_lba);
2707 break;
2708 case DDF_HEADER_SECONDARY:
2709 header = &ddf->secondary;
2710 sector = __be64_to_cpu(header->secondary_lba);
2711 break;
2712 default:
2713 return 0;
2714 }
2715
2716 header->type = type;
2717 header->openflag = 1;
2718 header->crc = calc_crc(header, 512);
2719
2720 lseek64(fd, sector<<9, 0);
2721 if (write(fd, header, 512) < 0)
2722 goto out;
2723
2724 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2725 if (write(fd, &ddf->controller, 512) < 0)
2726 goto out;
2727
2728 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2729 if (write(fd, ddf->phys, ddf->pdsize) < 0)
2730 goto out;
2731 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2732 if (write(fd, ddf->virt, ddf->vdsize) < 0)
2733 goto out;
2734
2735 /* Now write lots of config records. */
2736 n_config = ddf->max_part;
2737 conf_size = ddf->conf_rec_len * 512;
2738 for (i = 0 ; i <= n_config ; i++) {
2739 struct vcl *c;
2740 struct vd_config *vdc = NULL;
2741 if (i == n_config) {
2742 c = (struct vcl *)d->spare;
2743 if (c)
2744 vdc = &c->conf;
2745 } else {
2746 unsigned int dummy;
2747 c = d->vlist[i];
2748 if (c)
2749 get_pd_index_from_refnum(
2750 c, d->disk.refnum,
2751 ddf->mppe,
2752 (const struct vd_config **)&vdc,
2753 &dummy);
2754 }
2755 if (c) {
2756 vdc->seqnum = header->seq;
2757 vdc->crc = calc_crc(vdc, conf_size);
2758 if (write(fd, vdc, conf_size) < 0)
2759 break;
2760 } else {
2761 unsigned int togo = conf_size;
2762 while (togo > NULL_CONF_SZ) {
2763 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2764 break;
2765 togo -= NULL_CONF_SZ;
2766 }
2767 if (write(fd, null_aligned, togo) < 0)
2768 break;
2769 }
2770 }
2771 if (i <= n_config)
2772 goto out;
2773
2774 d->disk.crc = calc_crc(&d->disk, 512);
2775 if (write(fd, &d->disk, 512) < 0)
2776 goto out;
2777
2778 ret = 1;
2779 out:
2780 header->openflag = 0;
2781 header->crc = calc_crc(header, 512);
2782
2783 lseek64(fd, sector<<9, 0);
2784 if (write(fd, header, 512) < 0)
2785 ret = 0;
2786
2787 return ret;
2788 }
2789
2790 static int __write_init_super_ddf(struct supertype *st)
2791 {
2792 struct ddf_super *ddf = st->sb;
2793 struct dl *d;
2794 int attempts = 0;
2795 int successes = 0;
2796 unsigned long long size;
2797 char *null_aligned;
2798 __u32 seq;
2799
2800 pr_state(ddf, __func__);
2801 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2802 return -ENOMEM;
2803 }
2804 memset(null_aligned, 0xff, NULL_CONF_SZ);
2805
2806 seq = ddf->active->seq + 1;
2807
2808 /* try to write updated metadata,
2809 * if we catch a failure move on to the next disk
2810 */
2811 for (d = ddf->dlist; d; d=d->next) {
2812 int fd = d->fd;
2813
2814 if (fd < 0)
2815 continue;
2816
2817 attempts++;
2818 /* We need to fill in the primary, (secondary) and workspace
2819 * lba's in the headers, set their checksums,
2820 * Also checksum phys, virt....
2821 *
2822 * Then write everything out, finally the anchor is written.
2823 */
2824 get_dev_size(fd, NULL, &size);
2825 size /= 512;
2826 if (d->workspace_lba != 0)
2827 ddf->anchor.workspace_lba = d->workspace_lba;
2828 else
2829 ddf->anchor.workspace_lba =
2830 __cpu_to_be64(size - 32*1024*2);
2831 if (d->primary_lba != 0)
2832 ddf->anchor.primary_lba = d->primary_lba;
2833 else
2834 ddf->anchor.primary_lba =
2835 __cpu_to_be64(size - 16*1024*2);
2836 if (d->secondary_lba != 0)
2837 ddf->anchor.secondary_lba = d->secondary_lba;
2838 else
2839 ddf->anchor.secondary_lba =
2840 __cpu_to_be64(size - 32*1024*2);
2841 ddf->anchor.seq = seq;
2842 memcpy(&ddf->primary, &ddf->anchor, 512);
2843 memcpy(&ddf->secondary, &ddf->anchor, 512);
2844
2845 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2846 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2847 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2848
2849 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2850 null_aligned))
2851 continue;
2852
2853 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2854 null_aligned))
2855 continue;
2856
2857 lseek64(fd, (size-1)*512, SEEK_SET);
2858 if (write(fd, &ddf->anchor, 512) < 0)
2859 continue;
2860 successes++;
2861 }
2862 free(null_aligned);
2863
2864 return attempts != successes;
2865 }
2866
2867 static int write_init_super_ddf(struct supertype *st)
2868 {
2869 struct ddf_super *ddf = st->sb;
2870 struct vcl *currentconf = ddf->currentconf;
2871
2872 /* we are done with currentconf reset it to point st at the container */
2873 ddf->currentconf = NULL;
2874
2875 if (st->update_tail) {
2876 /* queue the virtual_disk and vd_config as metadata updates */
2877 struct virtual_disk *vd;
2878 struct vd_config *vc;
2879 int len;
2880
2881 if (!currentconf) {
2882 int len = (sizeof(struct phys_disk) +
2883 sizeof(struct phys_disk_entry));
2884
2885 /* adding a disk to the container. */
2886 if (!ddf->add_list)
2887 return 0;
2888
2889 append_metadata_update(st, ddf->add_list->mdupdate, len);
2890 ddf->add_list->mdupdate = NULL;
2891 return 0;
2892 }
2893
2894 /* Newly created VD */
2895
2896 /* First the virtual disk. We have a slightly fake header */
2897 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
2898 vd = xmalloc(len);
2899 *vd = *ddf->virt;
2900 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2901 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
2902 append_metadata_update(st, vd, len);
2903
2904 /* Then the vd_config */
2905 len = ddf->conf_rec_len * 512;
2906 vc = xmalloc(len);
2907 memcpy(vc, &currentconf->conf, len);
2908 append_metadata_update(st, vc, len);
2909
2910 /* FIXME I need to close the fds! */
2911 return 0;
2912 } else {
2913 struct dl *d;
2914 for (d = ddf->dlist; d; d=d->next)
2915 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
2916 return __write_init_super_ddf(st);
2917 }
2918 }
2919
2920 #endif
2921
2922 static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2923 unsigned long long data_offset)
2924 {
2925 /* We must reserve the last 32Meg */
2926 if (devsize <= 32*1024*2)
2927 return 0;
2928 return devsize - 32*1024*2;
2929 }
2930
2931 #ifndef MDASSEMBLE
2932
2933 static int reserve_space(struct supertype *st, int raiddisks,
2934 unsigned long long size, int chunk,
2935 unsigned long long *freesize)
2936 {
2937 /* Find 'raiddisks' spare extents at least 'size' big (but
2938 * only caring about multiples of 'chunk') and remember
2939 * them.
2940 * If the cannot be found, fail.
2941 */
2942 struct dl *dl;
2943 struct ddf_super *ddf = st->sb;
2944 int cnt = 0;
2945
2946 for (dl = ddf->dlist; dl ; dl=dl->next) {
2947 dl->raiddisk = -1;
2948 dl->esize = 0;
2949 }
2950 /* Now find largest extent on each device */
2951 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2952 struct extent *e = get_extents(ddf, dl);
2953 unsigned long long pos = 0;
2954 int i = 0;
2955 int found = 0;
2956 unsigned long long minsize = size;
2957
2958 if (size == 0)
2959 minsize = chunk;
2960
2961 if (!e)
2962 continue;
2963 do {
2964 unsigned long long esize;
2965 esize = e[i].start - pos;
2966 if (esize >= minsize) {
2967 found = 1;
2968 minsize = esize;
2969 }
2970 pos = e[i].start + e[i].size;
2971 i++;
2972 } while (e[i-1].size);
2973 if (found) {
2974 cnt++;
2975 dl->esize = minsize;
2976 }
2977 free(e);
2978 }
2979 if (cnt < raiddisks) {
2980 pr_err("not enough devices with space to create array.\n");
2981 return 0; /* No enough free spaces large enough */
2982 }
2983 if (size == 0) {
2984 /* choose the largest size of which there are at least 'raiddisk' */
2985 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2986 struct dl *dl2;
2987 if (dl->esize <= size)
2988 continue;
2989 /* This is bigger than 'size', see if there are enough */
2990 cnt = 0;
2991 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
2992 if (dl2->esize >= dl->esize)
2993 cnt++;
2994 if (cnt >= raiddisks)
2995 size = dl->esize;
2996 }
2997 if (chunk) {
2998 size = size / chunk;
2999 size *= chunk;
3000 }
3001 *freesize = size;
3002 if (size < 32) {
3003 pr_err("not enough spare devices to create array.\n");
3004 return 0;
3005 }
3006 }
3007 /* We have a 'size' of which there are enough spaces.
3008 * We simply do a first-fit */
3009 cnt = 0;
3010 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3011 if (dl->esize < size)
3012 continue;
3013
3014 dl->raiddisk = cnt;
3015 cnt++;
3016 }
3017 return 1;
3018 }
3019
3020 static int
3021 validate_geometry_ddf_container(struct supertype *st,
3022 int level, int layout, int raiddisks,
3023 int chunk, unsigned long long size,
3024 unsigned long long data_offset,
3025 char *dev, unsigned long long *freesize,
3026 int verbose);
3027
3028 static int validate_geometry_ddf_bvd(struct supertype *st,
3029 int level, int layout, int raiddisks,
3030 int *chunk, unsigned long long size,
3031 unsigned long long data_offset,
3032 char *dev, unsigned long long *freesize,
3033 int verbose);
3034
3035 static int validate_geometry_ddf(struct supertype *st,
3036 int level, int layout, int raiddisks,
3037 int *chunk, unsigned long long size,
3038 unsigned long long data_offset,
3039 char *dev, unsigned long long *freesize,
3040 int verbose)
3041 {
3042 int fd;
3043 struct mdinfo *sra;
3044 int cfd;
3045
3046 /* ddf potentially supports lots of things, but it depends on
3047 * what devices are offered (and maybe kernel version?)
3048 * If given unused devices, we will make a container.
3049 * If given devices in a container, we will make a BVD.
3050 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3051 */
3052
3053 if (chunk && *chunk == UnSet)
3054 *chunk = DEFAULT_CHUNK;
3055
3056 if (level == -1000000) level = LEVEL_CONTAINER;
3057 if (level == LEVEL_CONTAINER) {
3058 /* Must be a fresh device to add to a container */
3059 return validate_geometry_ddf_container(st, level, layout,
3060 raiddisks, chunk?*chunk:0,
3061 size, data_offset, dev,
3062 freesize,
3063 verbose);
3064 }
3065
3066 if (!dev) {
3067 mdu_array_info_t array = {
3068 .level = level, .layout = layout,
3069 .raid_disks = raiddisks
3070 };
3071 struct vd_config conf;
3072 if (layout_md2ddf(&array, &conf) == -1) {
3073 if (verbose)
3074 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3075 level, layout, raiddisks);
3076 return 0;
3077 }
3078 /* Should check layout? etc */
3079
3080 if (st->sb && freesize) {
3081 /* --create was given a container to create in.
3082 * So we need to check that there are enough
3083 * free spaces and return the amount of space.
3084 * We may as well remember which drives were
3085 * chosen so that add_to_super/getinfo_super
3086 * can return them.
3087 */
3088 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
3089 }
3090 return 1;
3091 }
3092
3093 if (st->sb) {
3094 /* A container has already been opened, so we are
3095 * creating in there. Maybe a BVD, maybe an SVD.
3096 * Should make a distinction one day.
3097 */
3098 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
3099 chunk, size, data_offset, dev,
3100 freesize,
3101 verbose);
3102 }
3103 /* This is the first device for the array.
3104 * If it is a container, we read it in and do automagic allocations,
3105 * no other devices should be given.
3106 * Otherwise it must be a member device of a container, and we
3107 * do manual allocation.
3108 * Later we should check for a BVD and make an SVD.
3109 */
3110 fd = open(dev, O_RDONLY|O_EXCL, 0);
3111 if (fd >= 0) {
3112 sra = sysfs_read(fd, NULL, GET_VERSION);
3113 close(fd);
3114 if (sra && sra->array.major_version == -1 &&
3115 strcmp(sra->text_version, "ddf") == 0) {
3116
3117 /* load super */
3118 /* find space for 'n' devices. */
3119 /* remember the devices */
3120 /* Somehow return the fact that we have enough */
3121 }
3122
3123 if (verbose)
3124 pr_err("ddf: Cannot create this array "
3125 "on device %s - a container is required.\n",
3126 dev);
3127 return 0;
3128 }
3129 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
3130 if (verbose)
3131 pr_err("ddf: Cannot open %s: %s\n",
3132 dev, strerror(errno));
3133 return 0;
3134 }
3135 /* Well, it is in use by someone, maybe a 'ddf' container. */
3136 cfd = open_container(fd);
3137 if (cfd < 0) {
3138 close(fd);
3139 if (verbose)
3140 pr_err("ddf: Cannot use %s: %s\n",
3141 dev, strerror(EBUSY));
3142 return 0;
3143 }
3144 sra = sysfs_read(cfd, NULL, GET_VERSION);
3145 close(fd);
3146 if (sra && sra->array.major_version == -1 &&
3147 strcmp(sra->text_version, "ddf") == 0) {
3148 /* This is a member of a ddf container. Load the container
3149 * and try to create a bvd
3150 */
3151 struct ddf_super *ddf;
3152 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
3153 st->sb = ddf;
3154 strcpy(st->container_devnm, fd2devnm(cfd));
3155 close(cfd);
3156 return validate_geometry_ddf_bvd(st, level, layout,
3157 raiddisks, chunk, size,
3158 data_offset,
3159 dev, freesize,
3160 verbose);
3161 }
3162 close(cfd);
3163 } else /* device may belong to a different container */
3164 return 0;
3165
3166 return 1;
3167 }
3168
3169 static int
3170 validate_geometry_ddf_container(struct supertype *st,
3171 int level, int layout, int raiddisks,
3172 int chunk, unsigned long long size,
3173 unsigned long long data_offset,
3174 char *dev, unsigned long long *freesize,
3175 int verbose)
3176 {
3177 int fd;
3178 unsigned long long ldsize;
3179
3180 if (level != LEVEL_CONTAINER)
3181 return 0;
3182 if (!dev)
3183 return 1;
3184
3185 fd = open(dev, O_RDONLY|O_EXCL, 0);
3186 if (fd < 0) {
3187 if (verbose)
3188 pr_err("ddf: Cannot open %s: %s\n",
3189 dev, strerror(errno));
3190 return 0;
3191 }
3192 if (!get_dev_size(fd, dev, &ldsize)) {
3193 close(fd);
3194 return 0;
3195 }
3196 close(fd);
3197
3198 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
3199 if (*freesize == 0)
3200 return 0;
3201
3202 return 1;
3203 }
3204
3205 static int validate_geometry_ddf_bvd(struct supertype *st,
3206 int level, int layout, int raiddisks,
3207 int *chunk, unsigned long long size,
3208 unsigned long long data_offset,
3209 char *dev, unsigned long long *freesize,
3210 int verbose)
3211 {
3212 struct stat stb;
3213 struct ddf_super *ddf = st->sb;
3214 struct dl *dl;
3215 unsigned long long pos = 0;
3216 unsigned long long maxsize;
3217 struct extent *e;
3218 int i;
3219 /* ddf/bvd supports lots of things, but not containers */
3220 if (level == LEVEL_CONTAINER) {
3221 if (verbose)
3222 pr_err("DDF cannot create a container within an container\n");
3223 return 0;
3224 }
3225 /* We must have the container info already read in. */
3226 if (!ddf)
3227 return 0;
3228
3229 if (!dev) {
3230 /* General test: make sure there is space for
3231 * 'raiddisks' device extents of size 'size'.
3232 */
3233 unsigned long long minsize = size;
3234 int dcnt = 0;
3235 if (minsize == 0)
3236 minsize = 8;
3237 for (dl = ddf->dlist; dl ; dl = dl->next)
3238 {
3239 int found = 0;
3240 pos = 0;
3241
3242 i = 0;
3243 e = get_extents(ddf, dl);
3244 if (!e) continue;
3245 do {
3246 unsigned long long esize;
3247 esize = e[i].start - pos;
3248 if (esize >= minsize)
3249 found = 1;
3250 pos = e[i].start + e[i].size;
3251 i++;
3252 } while (e[i-1].size);
3253 if (found)
3254 dcnt++;
3255 free(e);
3256 }
3257 if (dcnt < raiddisks) {
3258 if (verbose)
3259 pr_err("ddf: Not enough devices with "
3260 "space for this array (%d < %d)\n",
3261 dcnt, raiddisks);
3262 return 0;
3263 }
3264 return 1;
3265 }
3266 /* This device must be a member of the set */
3267 if (stat(dev, &stb) < 0)
3268 return 0;
3269 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3270 return 0;
3271 for (dl = ddf->dlist ; dl ; dl = dl->next) {
3272 if (dl->major == (int)major(stb.st_rdev) &&
3273 dl->minor == (int)minor(stb.st_rdev))
3274 break;
3275 }
3276 if (!dl) {
3277 if (verbose)
3278 pr_err("ddf: %s is not in the "
3279 "same DDF set\n",
3280 dev);
3281 return 0;
3282 }
3283 e = get_extents(ddf, dl);
3284 maxsize = 0;
3285 i = 0;
3286 if (e) do {
3287 unsigned long long esize;
3288 esize = e[i].start - pos;
3289 if (esize >= maxsize)
3290 maxsize = esize;
3291 pos = e[i].start + e[i].size;
3292 i++;
3293 } while (e[i-1].size);
3294 *freesize = maxsize;
3295 // FIXME here I am
3296
3297 return 1;
3298 }
3299
3300 static int load_super_ddf_all(struct supertype *st, int fd,
3301 void **sbp, char *devname)
3302 {
3303 struct mdinfo *sra;
3304 struct ddf_super *super;
3305 struct mdinfo *sd, *best = NULL;
3306 int bestseq = 0;
3307 int seq;
3308 char nm[20];
3309 int dfd;
3310
3311 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
3312 if (!sra)
3313 return 1;
3314 if (sra->array.major_version != -1 ||
3315 sra->array.minor_version != -2 ||
3316 strcmp(sra->text_version, "ddf") != 0)
3317 return 1;
3318
3319 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
3320 return 1;
3321 memset(super, 0, sizeof(*super));
3322
3323 /* first, try each device, and choose the best ddf */
3324 for (sd = sra->devs ; sd ; sd = sd->next) {
3325 int rv;
3326 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3327 dfd = dev_open(nm, O_RDONLY);
3328 if (dfd < 0)
3329 return 2;
3330 rv = load_ddf_headers(dfd, super, NULL);
3331 close(dfd);
3332 if (rv == 0) {
3333 seq = __be32_to_cpu(super->active->seq);
3334 if (super->active->openflag)
3335 seq--;
3336 if (!best || seq > bestseq) {
3337 bestseq = seq;
3338 best = sd;
3339 }
3340 }
3341 }
3342 if (!best)
3343 return 1;
3344 /* OK, load this ddf */
3345 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3346 dfd = dev_open(nm, O_RDONLY);
3347 if (dfd < 0)
3348 return 1;
3349 load_ddf_headers(dfd, super, NULL);
3350 load_ddf_global(dfd, super, NULL);
3351 close(dfd);
3352 /* Now we need the device-local bits */
3353 for (sd = sra->devs ; sd ; sd = sd->next) {
3354 int rv;
3355
3356 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3357 dfd = dev_open(nm, O_RDWR);
3358 if (dfd < 0)
3359 return 2;
3360 rv = load_ddf_headers(dfd, super, NULL);
3361 if (rv == 0)
3362 rv = load_ddf_local(dfd, super, NULL, 1);
3363 if (rv)
3364 return 1;
3365 }
3366
3367 *sbp = super;
3368 if (st->ss == NULL) {
3369 st->ss = &super_ddf;
3370 st->minor_version = 0;
3371 st->max_devs = 512;
3372 }
3373 strcpy(st->container_devnm, fd2devnm(fd));
3374 return 0;
3375 }
3376
3377 static int load_container_ddf(struct supertype *st, int fd,
3378 char *devname)
3379 {
3380 return load_super_ddf_all(st, fd, &st->sb, devname);
3381 }
3382
3383 #endif /* MDASSEMBLE */
3384
3385 static int check_secondary(const struct vcl *vc)
3386 {
3387 const struct vd_config *conf = &vc->conf;
3388 int i;
3389
3390 /* The only DDF secondary RAID level md can support is
3391 * RAID 10, if the stripe sizes and Basic volume sizes
3392 * are all equal.
3393 * Other configurations could in theory be supported by exposing
3394 * the BVDs to user space and using device mapper for the secondary
3395 * mapping. So far we don't support that.
3396 */
3397
3398 __u64 sec_elements[4] = {0, 0, 0, 0};
3399 #define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3400 #define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3401
3402 if (vc->other_bvds == NULL) {
3403 pr_err("No BVDs for secondary RAID found\n");
3404 return -1;
3405 }
3406 if (conf->prl != DDF_RAID1) {
3407 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3408 return -1;
3409 }
3410 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3411 pr_err("Secondary RAID level %d is unsupported\n",
3412 conf->srl);
3413 return -1;
3414 }
3415 __set_sec_seen(conf->sec_elmnt_seq);
3416 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3417 const struct vd_config *bvd = vc->other_bvds[i];
3418 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
3419 continue;
3420 if (bvd->srl != conf->srl) {
3421 pr_err("Inconsistent secondary RAID level across BVDs\n");
3422 return -1;
3423 }
3424 if (bvd->prl != conf->prl) {
3425 pr_err("Different RAID levels for BVDs are unsupported\n");
3426 return -1;
3427 }
3428 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3429 pr_err("All BVDs must have the same number of primary elements\n");
3430 return -1;
3431 }
3432 if (bvd->chunk_shift != conf->chunk_shift) {
3433 pr_err("Different strip sizes for BVDs are unsupported\n");
3434 return -1;
3435 }
3436 if (bvd->array_blocks != conf->array_blocks) {
3437 pr_err("Different BVD sizes are unsupported\n");
3438 return -1;
3439 }
3440 __set_sec_seen(bvd->sec_elmnt_seq);
3441 }
3442 for (i = 0; i < conf->sec_elmnt_count; i++) {
3443 if (!__was_sec_seen(i)) {
3444 pr_err("BVD %d is missing\n", i);
3445 return -1;
3446 }
3447 }
3448 return 0;
3449 }
3450
3451 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
3452 __u32 refnum, unsigned int nmax,
3453 const struct vd_config **bvd,
3454 unsigned int *idx)
3455 {
3456 unsigned int i, j, n, sec, cnt;
3457
3458 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3459 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3460
3461 for (i = 0, j = 0 ; i < nmax ; i++) {
3462 /* j counts valid entries for this BVD */
3463 if (vc->conf.phys_refnum[i] != 0xffffffff)
3464 j++;
3465 if (vc->conf.phys_refnum[i] == refnum) {
3466 *bvd = &vc->conf;
3467 *idx = i;
3468 return sec * cnt + j - 1;
3469 }
3470 }
3471 if (vc->other_bvds == NULL)
3472 goto bad;
3473
3474 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3475 struct vd_config *vd = vc->other_bvds[n-1];
3476 sec = vd->sec_elmnt_seq;
3477 if (sec == DDF_UNUSED_BVD)
3478 continue;
3479 for (i = 0, j = 0 ; i < nmax ; i++) {
3480 if (vd->phys_refnum[i] != 0xffffffff)
3481 j++;
3482 if (vd->phys_refnum[i] == refnum) {
3483 *bvd = vd;
3484 *idx = i;
3485 return sec * cnt + j - 1;
3486 }
3487 }
3488 }
3489 bad:
3490 *bvd = NULL;
3491 return DDF_NOTFOUND;
3492 }
3493
3494 static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
3495 {
3496 /* Given a container loaded by load_super_ddf_all,
3497 * extract information about all the arrays into
3498 * an mdinfo tree.
3499 *
3500 * For each vcl in conflist: create an mdinfo, fill it in,
3501 * then look for matching devices (phys_refnum) in dlist
3502 * and create appropriate device mdinfo.
3503 */
3504 struct ddf_super *ddf = st->sb;
3505 struct mdinfo *rest = NULL;
3506 struct vcl *vc;
3507
3508 for (vc = ddf->conflist ; vc ; vc=vc->next)
3509 {
3510 unsigned int i;
3511 unsigned int j;
3512 struct mdinfo *this;
3513 char *ep;
3514 __u32 *cptr;
3515 unsigned int pd;
3516
3517 if (subarray &&
3518 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3519 *ep != '\0'))
3520 continue;
3521
3522 if (vc->conf.sec_elmnt_count > 1) {
3523 if (check_secondary(vc) != 0)
3524 continue;
3525 }
3526
3527 this = xcalloc(1, sizeof(*this));
3528 this->next = rest;
3529 rest = this;
3530
3531 if (layout_ddf2md(&vc->conf, &this->array))
3532 continue;
3533 this->array.md_minor = -1;
3534 this->array.major_version = -1;
3535 this->array.minor_version = -2;
3536 cptr = (__u32 *)(vc->conf.guid + 16);
3537 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
3538 this->array.utime = DECADE +
3539 __be32_to_cpu(vc->conf.timestamp);
3540 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3541
3542 i = vc->vcnum;
3543 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3544 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
3545 DDF_init_full) {
3546 this->array.state = 0;
3547 this->resync_start = 0;
3548 } else {
3549 this->array.state = 1;
3550 this->resync_start = MaxSector;
3551 }
3552 memcpy(this->name, ddf->virt->entries[i].name, 16);
3553 this->name[16]=0;
3554 for(j=0; j<16; j++)
3555 if (this->name[j] == ' ')
3556 this->name[j] = 0;
3557
3558 memset(this->uuid, 0, sizeof(this->uuid));
3559 this->component_size = __be64_to_cpu(vc->conf.blocks);
3560 this->array.size = this->component_size / 2;
3561 this->container_member = i;
3562
3563 ddf->currentconf = vc;
3564 uuid_from_super_ddf(st, this->uuid);
3565 ddf->currentconf = NULL;
3566
3567 sprintf(this->text_version, "/%s/%d",
3568 st->container_devnm, this->container_member);
3569
3570 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
3571 struct mdinfo *dev;
3572 struct dl *d;
3573 const struct vd_config *bvd;
3574 unsigned int iphys;
3575 int stt;
3576
3577 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
3578 continue;
3579
3580 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
3581 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3582 != DDF_Online)
3583 continue;
3584
3585 i = get_pd_index_from_refnum(
3586 vc, ddf->phys->entries[pd].refnum,
3587 ddf->mppe, &bvd, &iphys);
3588 if (i == DDF_NOTFOUND)
3589 continue;
3590
3591 this->array.working_disks++;
3592
3593 for (d = ddf->dlist; d ; d=d->next)
3594 if (d->disk.refnum ==
3595 ddf->phys->entries[pd].refnum)
3596 break;
3597 if (d == NULL)
3598 /* Haven't found that one yet, maybe there are others */
3599 continue;
3600
3601 dev = xcalloc(1, sizeof(*dev));
3602 dev->next = this->devs;
3603 this->devs = dev;
3604
3605 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3606 dev->disk.major = d->major;
3607 dev->disk.minor = d->minor;
3608 dev->disk.raid_disk = i;
3609 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
3610 dev->recovery_start = MaxSector;
3611
3612 dev->events = __be32_to_cpu(ddf->primary.seq);
3613 dev->data_offset =
3614 __be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
3615 dev->component_size = __be64_to_cpu(bvd->blocks);
3616 if (d->devname)
3617 strcpy(dev->name, d->devname);
3618 }
3619 }
3620 return rest;
3621 }
3622
3623 static int store_super_ddf(struct supertype *st, int fd)
3624 {
3625 struct ddf_super *ddf = st->sb;
3626 unsigned long long dsize;
3627 void *buf;
3628 int rc;
3629
3630 if (!ddf)
3631 return 1;
3632
3633 if (!get_dev_size(fd, NULL, &dsize))
3634 return 1;
3635
3636 if (ddf->dlist || ddf->conflist) {
3637 struct stat sta;
3638 struct dl *dl;
3639 int ofd, ret;
3640
3641 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3642 pr_err("%s: file descriptor for invalid device\n",
3643 __func__);
3644 return 1;
3645 }
3646 for (dl = ddf->dlist; dl; dl = dl->next)
3647 if (dl->major == (int)major(sta.st_rdev) &&
3648 dl->minor == (int)minor(sta.st_rdev))
3649 break;
3650 if (!dl) {
3651 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3652 (int)major(sta.st_rdev),
3653 (int)minor(sta.st_rdev));
3654 return 1;
3655 }
3656 /*
3657 For DDF, writing to just one disk makes no sense.
3658 We would run the risk of writing inconsistent meta data
3659 to the devices. So just call __write_init_super_ddf and
3660 write to all devices, including this one.
3661 Use the fd passed to this function, just in case dl->fd
3662 is invalid.
3663 */
3664 ofd = dl->fd;
3665 dl->fd = fd;
3666 ret = __write_init_super_ddf(st);
3667 dl->fd = ofd;
3668 return ret;
3669 }
3670
3671 if (posix_memalign(&buf, 512, 512) != 0)
3672 return 1;
3673 memset(buf, 0, 512);
3674
3675 lseek64(fd, dsize-512, 0);
3676 rc = write(fd, buf, 512);
3677 free(buf);
3678 if (rc < 0)
3679 return 1;
3680 return 0;
3681 }
3682
3683 static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3684 {
3685 /*
3686 * return:
3687 * 0 same, or first was empty, and second was copied
3688 * 1 second had wrong number
3689 * 2 wrong uuid
3690 * 3 wrong other info
3691 */
3692 struct ddf_super *first = st->sb;
3693 struct ddf_super *second = tst->sb;
3694 struct dl *dl1, *dl2;
3695 struct vcl *vl1, *vl2;
3696 unsigned int max_vds, max_pds, pd, vd;
3697
3698 if (!first) {
3699 st->sb = tst->sb;
3700 tst->sb = NULL;
3701 return 0;
3702 }
3703
3704 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3705 return 2;
3706
3707 if (first->anchor.seq != second->anchor.seq) {
3708 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3709 __be32_to_cpu(first->anchor.seq),
3710 __be32_to_cpu(second->anchor.seq));
3711 return 3;
3712 }
3713 if (first->max_part != second->max_part ||
3714 first->phys->used_pdes != second->phys->used_pdes ||
3715 first->virt->populated_vdes != second->virt->populated_vdes) {
3716 dprintf("%s: PD/VD number mismatch\n", __func__);
3717 return 3;
3718 }
3719
3720 max_pds = __be16_to_cpu(first->phys->used_pdes);
3721 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3722 for (pd = 0; pd < max_pds; pd++)
3723 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3724 break;
3725 if (pd == max_pds) {
3726 dprintf("%s: no match for disk %08x\n", __func__,
3727 __be32_to_cpu(dl2->disk.refnum));
3728 return 3;
3729 }
3730 }
3731
3732 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3733 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3734 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3735 continue;
3736 for (vd = 0; vd < max_vds; vd++)
3737 if (!memcmp(first->virt->entries[vd].guid,
3738 vl2->conf.guid, DDF_GUID_LEN))
3739 break;
3740 if (vd == max_vds) {
3741 dprintf("%s: no match for VD config\n", __func__);
3742 return 3;
3743 }
3744 }
3745 /* FIXME should I look at anything else? */
3746
3747 /*
3748 At this point we are fairly sure that the meta data matches.
3749 But the new disk may contain additional local data.
3750 Add it to the super block.
3751 */
3752 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3753 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3754 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3755 DDF_GUID_LEN))
3756 break;
3757 if (vl1) {
3758 if (vl1->other_bvds != NULL &&
3759 vl1->conf.sec_elmnt_seq !=
3760 vl2->conf.sec_elmnt_seq) {
3761 dprintf("%s: adding BVD %u\n", __func__,
3762 vl2->conf.sec_elmnt_seq);
3763 add_other_bvd(vl1, &vl2->conf,
3764 first->conf_rec_len*512);
3765 }
3766 continue;
3767 }
3768
3769 if (posix_memalign((void **)&vl1, 512,
3770 (first->conf_rec_len*512 +
3771 offsetof(struct vcl, conf))) != 0) {
3772 pr_err("%s could not allocate vcl buf\n",
3773 __func__);
3774 return 3;
3775 }
3776
3777 vl1->next = first->conflist;
3778 vl1->block_sizes = NULL;
3779 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3780 if (alloc_other_bvds(first, vl1) != 0) {
3781 pr_err("%s could not allocate other bvds\n",
3782 __func__);
3783 free(vl1);
3784 return 3;
3785 }
3786 for (vd = 0; vd < max_vds; vd++)
3787 if (!memcmp(first->virt->entries[vd].guid,
3788 vl1->conf.guid, DDF_GUID_LEN))
3789 break;
3790 vl1->vcnum = vd;
3791 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3792 first->conflist = vl1;
3793 }
3794
3795 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3796 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3797 if (dl1->disk.refnum == dl2->disk.refnum)
3798 break;
3799 if (dl1)
3800 continue;
3801
3802 if (posix_memalign((void **)&dl1, 512,
3803 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3804 != 0) {
3805 pr_err("%s could not allocate disk info buffer\n",
3806 __func__);
3807 return 3;
3808 }
3809 memcpy(dl1, dl2, sizeof(*dl1));
3810 dl1->mdupdate = NULL;
3811 dl1->next = first->dlist;
3812 dl1->fd = -1;
3813 for (pd = 0; pd < max_pds; pd++)
3814 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3815 break;
3816 dl1->pdnum = pd;
3817 if (dl2->spare) {
3818 if (posix_memalign((void **)&dl1->spare, 512,
3819 first->conf_rec_len*512) != 0) {
3820 pr_err("%s could not allocate spare info buf\n",
3821 __func__);
3822 return 3;
3823 }
3824 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3825 }
3826 for (vd = 0 ; vd < first->max_part ; vd++) {
3827 if (!dl2->vlist[vd]) {
3828 dl1->vlist[vd] = NULL;
3829 continue;
3830 }
3831 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3832 if (!memcmp(vl1->conf.guid,
3833 dl2->vlist[vd]->conf.guid,
3834 DDF_GUID_LEN))
3835 break;
3836 dl1->vlist[vd] = vl1;
3837 }
3838 }
3839 first->dlist = dl1;
3840 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3841 dl1->disk.refnum);
3842 }
3843
3844 return 0;
3845 }
3846
3847 #ifndef MDASSEMBLE
3848 /*
3849 * A new array 'a' has been started which claims to be instance 'inst'
3850 * within container 'c'.
3851 * We need to confirm that the array matches the metadata in 'c' so
3852 * that we don't corrupt any metadata.
3853 */
3854 static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
3855 {
3856 struct ddf_super *ddf = c->sb;
3857 int n = atoi(inst);
3858 if (all_ff(ddf->virt->entries[n].guid)) {
3859 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
3860 return -ENODEV;
3861 }
3862 dprintf("ddf: open_new %d\n", n);
3863 a->info.container_member = n;
3864 return 0;
3865 }
3866
3867 /*
3868 * The array 'a' is to be marked clean in the metadata.
3869 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
3870 * clean up to the point (in sectors). If that cannot be recorded in the
3871 * metadata, then leave it as dirty.
3872 *
3873 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3874 * !global! virtual_disk.virtual_entry structure.
3875 */
3876 static int ddf_set_array_state(struct active_array *a, int consistent)
3877 {
3878 struct ddf_super *ddf = a->container->sb;
3879 int inst = a->info.container_member;
3880 int old = ddf->virt->entries[inst].state;
3881 if (consistent == 2) {
3882 /* Should check if a recovery should be started FIXME */
3883 consistent = 1;
3884 if (!is_resync_complete(&a->info))
3885 consistent = 0;
3886 }
3887 if (consistent)
3888 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3889 else
3890 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
3891 if (old != ddf->virt->entries[inst].state)
3892 ddf_set_updates_pending(ddf);
3893
3894 old = ddf->virt->entries[inst].init_state;
3895 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
3896 if (is_resync_complete(&a->info))
3897 ddf->virt->entries[inst].init_state |= DDF_init_full;
3898 else if (a->info.resync_start == 0)
3899 ddf->virt->entries[inst].init_state |= DDF_init_not;
3900 else
3901 ddf->virt->entries[inst].init_state |= DDF_init_quick;
3902 if (old != ddf->virt->entries[inst].init_state)
3903 ddf_set_updates_pending(ddf);
3904
3905 dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
3906 a->info.resync_start);
3907 return consistent;
3908 }
3909
3910 static int get_bvd_state(const struct ddf_super *ddf,
3911 const struct vd_config *vc)
3912 {
3913 unsigned int i, n_bvd, working = 0;
3914 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
3915 int pd, st, state;
3916 for (i = 0; i < n_prim; i++) {
3917 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
3918 continue;
3919 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
3920 if (pd < 0)
3921 continue;
3922 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3923 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3924 == DDF_Online)
3925 working++;
3926 }
3927
3928 state = DDF_state_degraded;
3929 if (working == n_prim)
3930 state = DDF_state_optimal;
3931 else
3932 switch (vc->prl) {
3933 case DDF_RAID0:
3934 case DDF_CONCAT:
3935 case DDF_JBOD:
3936 state = DDF_state_failed;
3937 break;
3938 case DDF_RAID1:
3939 if (working == 0)
3940 state = DDF_state_failed;
3941 else if (working >= 2)
3942 state = DDF_state_part_optimal;
3943 break;
3944 case DDF_RAID4:
3945 case DDF_RAID5:
3946 if (working < n_prim - 1)
3947 state = DDF_state_failed;
3948 break;
3949 case DDF_RAID6:
3950 if (working < n_prim - 2)
3951 state = DDF_state_failed;
3952 else if (working == n_prim - 1)
3953 state = DDF_state_part_optimal;
3954 break;
3955 }
3956 return state;
3957 }
3958
3959 static int secondary_state(int state, int other, int seclevel)
3960 {
3961 if (state == DDF_state_optimal && other == DDF_state_optimal)
3962 return DDF_state_optimal;
3963 if (seclevel == DDF_2MIRRORED) {
3964 if (state == DDF_state_optimal || other == DDF_state_optimal)
3965 return DDF_state_part_optimal;
3966 if (state == DDF_state_failed && other == DDF_state_failed)
3967 return DDF_state_failed;
3968 return DDF_state_degraded;
3969 } else {
3970 if (state == DDF_state_failed || other == DDF_state_failed)
3971 return DDF_state_failed;
3972 if (state == DDF_state_degraded || other == DDF_state_degraded)
3973 return DDF_state_degraded;
3974 return DDF_state_part_optimal;
3975 }
3976 }
3977
3978 static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
3979 {
3980 int state = get_bvd_state(ddf, &vcl->conf);
3981 unsigned int i;
3982 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
3983 state = secondary_state(
3984 state,
3985 get_bvd_state(ddf, vcl->other_bvds[i-1]),
3986 vcl->conf.srl);
3987 }
3988 return state;
3989 }
3990
3991 /*
3992 * The state of each disk is stored in the global phys_disk structure
3993 * in phys_disk.entries[n].state.
3994 * This makes various combinations awkward.
3995 * - When a device fails in any array, it must be failed in all arrays
3996 * that include a part of this device.
3997 * - When a component is rebuilding, we cannot include it officially in the
3998 * array unless this is the only array that uses the device.
3999 *
4000 * So: when transitioning:
4001 * Online -> failed, just set failed flag. monitor will propagate
4002 * spare -> online, the device might need to be added to the array.
4003 * spare -> failed, just set failed. Don't worry if in array or not.
4004 */
4005 static void ddf_set_disk(struct active_array *a, int n, int state)
4006 {
4007 struct ddf_super *ddf = a->container->sb;
4008 unsigned int inst = a->info.container_member, n_bvd;
4009 struct vcl *vcl;
4010 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4011 &n_bvd, &vcl);
4012 int pd;
4013 struct mdinfo *mdi;
4014 struct dl *dl;
4015
4016 if (vc == NULL) {
4017 dprintf("ddf: cannot find instance %d!!\n", inst);
4018 return;
4019 }
4020 /* Find the matching slot in 'info'. */
4021 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4022 if (mdi->disk.raid_disk == n)
4023 break;
4024 if (!mdi)
4025 return;
4026
4027 /* and find the 'dl' entry corresponding to that. */
4028 for (dl = ddf->dlist; dl; dl = dl->next)
4029 if (mdi->state_fd >= 0 &&
4030 mdi->disk.major == dl->major &&
4031 mdi->disk.minor == dl->minor)
4032 break;
4033 if (!dl)
4034 return;
4035
4036 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4037 if (pd < 0 || pd != dl->pdnum) {
4038 /* disk doesn't currently exist or has changed.
4039 * If it is now in_sync, insert it. */
4040 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4041 __func__, dl->pdnum, dl->major, dl->minor,
4042 dl->disk.refnum);
4043 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4044 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
4045 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
4046 pd = dl->pdnum; /* FIXME: is this really correct ? */
4047 vc->phys_refnum[n_bvd] = dl->disk.refnum;
4048 LBA_OFFSET(ddf, vc)[n_bvd] =
4049 __cpu_to_be64(mdi->data_offset);
4050 ddf->phys->entries[pd].type &=
4051 ~__cpu_to_be16(DDF_Global_Spare);
4052 ddf->phys->entries[pd].type |=
4053 __cpu_to_be16(DDF_Active_in_VD);
4054 ddf_set_updates_pending(ddf);
4055 }
4056 } else {
4057 int old = ddf->phys->entries[pd].state;
4058 if (state & DS_FAULTY)
4059 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4060 if (state & DS_INSYNC) {
4061 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4062 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4063 }
4064 if (old != ddf->phys->entries[pd].state)
4065 ddf_set_updates_pending(ddf);
4066 }
4067
4068 dprintf("ddf: set_disk %d to %x\n", n, state);
4069
4070 /* Now we need to check the state of the array and update
4071 * virtual_disk.entries[n].state.
4072 * It needs to be one of "optimal", "degraded", "failed".
4073 * I don't understand 'deleted' or 'missing'.
4074 */
4075 state = get_svd_state(ddf, vcl);
4076
4077 if (ddf->virt->entries[inst].state !=
4078 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4079 | state)) {
4080
4081 ddf->virt->entries[inst].state =
4082 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4083 | state;
4084 ddf_set_updates_pending(ddf);
4085 }
4086
4087 }
4088
4089 static void ddf_sync_metadata(struct supertype *st)
4090 {
4091
4092 /*
4093 * Write all data to all devices.
4094 * Later, we might be able to track whether only local changes
4095 * have been made, or whether any global data has been changed,
4096 * but ddf is sufficiently weird that it probably always
4097 * changes global data ....
4098 */
4099 struct ddf_super *ddf = st->sb;
4100 if (!ddf->updates_pending)
4101 return;
4102 ddf->updates_pending = 0;
4103 __write_init_super_ddf(st);
4104 dprintf("ddf: sync_metadata\n");
4105 }
4106
4107 static void ddf_process_update(struct supertype *st,
4108 struct metadata_update *update)
4109 {
4110 /* Apply this update to the metadata.
4111 * The first 4 bytes are a DDF_*_MAGIC which guides
4112 * our actions.
4113 * Possible update are:
4114 * DDF_PHYS_RECORDS_MAGIC
4115 * Add a new physical device or remove an old one.
4116 * Changes to this record only happen implicitly.
4117 * used_pdes is the device number.
4118 * DDF_VIRT_RECORDS_MAGIC
4119 * Add a new VD. Possibly also change the 'access' bits.
4120 * populated_vdes is the entry number.
4121 * DDF_VD_CONF_MAGIC
4122 * New or updated VD. the VIRT_RECORD must already
4123 * exist. For an update, phys_refnum and lba_offset
4124 * (at least) are updated, and the VD_CONF must
4125 * be written to precisely those devices listed with
4126 * a phys_refnum.
4127 * DDF_SPARE_ASSIGN_MAGIC
4128 * replacement Spare Assignment Record... but for which device?
4129 *
4130 * So, e.g.:
4131 * - to create a new array, we send a VIRT_RECORD and
4132 * a VD_CONF. Then assemble and start the array.
4133 * - to activate a spare we send a VD_CONF to add the phys_refnum
4134 * and offset. This will also mark the spare as active with
4135 * a spare-assignment record.
4136 */
4137 struct ddf_super *ddf = st->sb;
4138 __u32 *magic = (__u32*)update->buf;
4139 struct phys_disk *pd;
4140 struct virtual_disk *vd;
4141 struct vd_config *vc;
4142 struct vcl *vcl;
4143 struct dl *dl;
4144 unsigned int mppe;
4145 unsigned int ent;
4146 unsigned int pdnum, pd2;
4147
4148 dprintf("Process update %x\n", *magic);
4149
4150 switch (*magic) {
4151 case DDF_PHYS_RECORDS_MAGIC:
4152
4153 if (update->len != (sizeof(struct phys_disk) +
4154 sizeof(struct phys_disk_entry)))
4155 return;
4156 pd = (struct phys_disk*)update->buf;
4157
4158 ent = __be16_to_cpu(pd->used_pdes);
4159 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4160 return;
4161 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4162 struct dl **dlp;
4163 /* removing this disk. */
4164 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4165 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4166 struct dl *dl = *dlp;
4167 if (dl->pdnum == (signed)ent) {
4168 close(dl->fd);
4169 dl->fd = -1;
4170 /* FIXME this doesn't free
4171 * dl->devname */
4172 update->space = dl;
4173 *dlp = dl->next;
4174 break;
4175 }
4176 }
4177 ddf_set_updates_pending(ddf);
4178 return;
4179 }
4180 if (!all_ff(ddf->phys->entries[ent].guid))
4181 return;
4182 ddf->phys->entries[ent] = pd->entries[0];
4183 ddf->phys->used_pdes = __cpu_to_be16(1 +
4184 __be16_to_cpu(ddf->phys->used_pdes));
4185 ddf_set_updates_pending(ddf);
4186 if (ddf->add_list) {
4187 struct active_array *a;
4188 struct dl *al = ddf->add_list;
4189 ddf->add_list = al->next;
4190
4191 al->next = ddf->dlist;
4192 ddf->dlist = al;
4193
4194 /* As a device has been added, we should check
4195 * for any degraded devices that might make
4196 * use of this spare */
4197 for (a = st->arrays ; a; a=a->next)
4198 a->check_degraded = 1;
4199 }
4200 break;
4201
4202 case DDF_VIRT_RECORDS_MAGIC:
4203
4204 if (update->len != (sizeof(struct virtual_disk) +
4205 sizeof(struct virtual_entry)))
4206 return;
4207 vd = (struct virtual_disk*)update->buf;
4208
4209 ent = find_unused_vde(ddf);
4210 if (ent == DDF_NOTFOUND)
4211 return;
4212 ddf->virt->entries[ent] = vd->entries[0];
4213 ddf->virt->populated_vdes = __cpu_to_be16(1 +
4214 __be16_to_cpu(ddf->virt->populated_vdes));
4215 ddf_set_updates_pending(ddf);
4216 break;
4217
4218 case DDF_VD_CONF_MAGIC:
4219 dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
4220
4221 mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
4222 if ((unsigned)update->len != ddf->conf_rec_len * 512)
4223 return;
4224 vc = (struct vd_config*)update->buf;
4225 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4226 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4227 break;
4228 dprintf("vcl = %p\n", vcl);
4229 if (vcl) {
4230 /* An update, just copy the phys_refnum and lba_offset
4231 * fields
4232 */
4233 memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
4234 mppe * (sizeof(__u32) + sizeof(__u64)));
4235 } else {
4236 /* A new VD_CONF */
4237 if (!update->space)
4238 return;
4239 vcl = update->space;
4240 update->space = NULL;
4241 vcl->next = ddf->conflist;
4242 memcpy(&vcl->conf, vc, update->len);
4243 ent = find_vde_by_guid(ddf, vc->guid);
4244 if (ent == DDF_NOTFOUND)
4245 return;
4246 vcl->vcnum = ent;
4247 ddf->conflist = vcl;
4248 }
4249 /* Set DDF_Transition on all Failed devices - to help
4250 * us detect those that are no longer in use
4251 */
4252 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4253 if (ddf->phys->entries[pdnum].state
4254 & __be16_to_cpu(DDF_Failed))
4255 ddf->phys->entries[pdnum].state
4256 |= __be16_to_cpu(DDF_Transition);
4257 /* Now make sure vlist is correct for each dl. */
4258 for (dl = ddf->dlist; dl; dl = dl->next) {
4259 unsigned int dn;
4260 unsigned int vn = 0;
4261 int in_degraded = 0;
4262 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4263 for (dn=0; dn < ddf->mppe ; dn++)
4264 if (vcl->conf.phys_refnum[dn] ==
4265 dl->disk.refnum) {
4266 int vstate;
4267 dprintf("dev %d has %p at %d\n",
4268 dl->pdnum, vcl, vn);
4269 /* Clear the Transition flag */
4270 if (ddf->phys->entries[dl->pdnum].state
4271 & __be16_to_cpu(DDF_Failed))
4272 ddf->phys->entries[dl->pdnum].state &=
4273 ~__be16_to_cpu(DDF_Transition);
4274
4275 dl->vlist[vn++] = vcl;
4276 vstate = ddf->virt->entries[vcl->vcnum].state
4277 & DDF_state_mask;
4278 if (vstate == DDF_state_degraded ||
4279 vstate == DDF_state_part_optimal)
4280 in_degraded = 1;
4281 break;
4282 }
4283 while (vn < ddf->max_part)
4284 dl->vlist[vn++] = NULL;
4285 if (dl->vlist[0]) {
4286 ddf->phys->entries[dl->pdnum].type &=
4287 ~__cpu_to_be16(DDF_Global_Spare);
4288 if (!(ddf->phys->entries[dl->pdnum].type &
4289 __cpu_to_be16(DDF_Active_in_VD))) {
4290 ddf->phys->entries[dl->pdnum].type |=
4291 __cpu_to_be16(DDF_Active_in_VD);
4292 if (in_degraded)
4293 ddf->phys->entries[dl->pdnum].state |=
4294 __cpu_to_be16(DDF_Rebuilding);
4295 }
4296 }
4297 if (dl->spare) {
4298 ddf->phys->entries[dl->pdnum].type &=
4299 ~__cpu_to_be16(DDF_Global_Spare);
4300 ddf->phys->entries[dl->pdnum].type |=
4301 __cpu_to_be16(DDF_Spare);
4302 }
4303 if (!dl->vlist[0] && !dl->spare) {
4304 ddf->phys->entries[dl->pdnum].type |=
4305 __cpu_to_be16(DDF_Global_Spare);
4306 ddf->phys->entries[dl->pdnum].type &=
4307 ~__cpu_to_be16(DDF_Spare |
4308 DDF_Active_in_VD);
4309 }
4310 }
4311
4312 /* Now remove any 'Failed' devices that are not part
4313 * of any VD. They will have the Transition flag set.
4314 * Once done, we need to update all dl->pdnum numbers.
4315 */
4316 pd2 = 0;
4317 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4318 if ((ddf->phys->entries[pdnum].state
4319 & __be16_to_cpu(DDF_Failed))
4320 && (ddf->phys->entries[pdnum].state
4321 & __be16_to_cpu(DDF_Transition)))
4322 /* skip this one */;
4323 else if (pdnum == pd2)
4324 pd2++;
4325 else {
4326 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4327 for (dl = ddf->dlist; dl; dl = dl->next)
4328 if (dl->pdnum == (int)pdnum)
4329 dl->pdnum = pd2;
4330 pd2++;
4331 }
4332 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4333 while (pd2 < pdnum) {
4334 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4335 pd2++;
4336 }
4337
4338 ddf_set_updates_pending(ddf);
4339 break;
4340 case DDF_SPARE_ASSIGN_MAGIC:
4341 default: break;
4342 }
4343 }
4344
4345 static void ddf_prepare_update(struct supertype *st,
4346 struct metadata_update *update)
4347 {
4348 /* This update arrived at managemon.
4349 * We are about to pass it to monitor.
4350 * If a malloc is needed, do it here.
4351 */
4352 struct ddf_super *ddf = st->sb;
4353 __u32 *magic = (__u32*)update->buf;
4354 if (*magic == DDF_VD_CONF_MAGIC)
4355 if (posix_memalign(&update->space, 512,
4356 offsetof(struct vcl, conf)
4357 + ddf->conf_rec_len * 512) != 0)
4358 update->space = NULL;
4359 }
4360
4361 /*
4362 * Check if the array 'a' is degraded but not failed.
4363 * If it is, find as many spares as are available and needed and
4364 * arrange for their inclusion.
4365 * We only choose devices which are not already in the array,
4366 * and prefer those with a spare-assignment to this array.
4367 * otherwise we choose global spares - assuming always that
4368 * there is enough room.
4369 * For each spare that we assign, we return an 'mdinfo' which
4370 * describes the position for the device in the array.
4371 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4372 * the new phys_refnum and lba_offset values.
4373 *
4374 * Only worry about BVDs at the moment.
4375 */
4376 static struct mdinfo *ddf_activate_spare(struct active_array *a,
4377 struct metadata_update **updates)
4378 {
4379 int working = 0;
4380 struct mdinfo *d;
4381 struct ddf_super *ddf = a->container->sb;
4382 int global_ok = 0;
4383 struct mdinfo *rv = NULL;
4384 struct mdinfo *di;
4385 struct metadata_update *mu;
4386 struct dl *dl;
4387 int i;
4388 struct vcl *vcl;
4389 struct vd_config *vc;
4390 unsigned int n_bvd;
4391
4392 for (d = a->info.devs ; d ; d = d->next) {
4393 if ((d->curr_state & DS_FAULTY) &&
4394 d->state_fd >= 0)
4395 /* wait for Removal to happen */
4396 return NULL;
4397 if (d->state_fd >= 0)
4398 working ++;
4399 }
4400
4401 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4402 a->info.array.level);
4403 if (working == a->info.array.raid_disks)
4404 return NULL; /* array not degraded */
4405 switch (a->info.array.level) {
4406 case 1:
4407 if (working == 0)
4408 return NULL; /* failed */
4409 break;
4410 case 4:
4411 case 5:
4412 if (working < a->info.array.raid_disks - 1)
4413 return NULL; /* failed */
4414 break;
4415 case 6:
4416 if (working < a->info.array.raid_disks - 2)
4417 return NULL; /* failed */
4418 break;
4419 default: /* concat or stripe */
4420 return NULL; /* failed */
4421 }
4422
4423 /* For each slot, if it is not working, find a spare */
4424 dl = ddf->dlist;
4425 for (i = 0; i < a->info.array.raid_disks; i++) {
4426 for (d = a->info.devs ; d ; d = d->next)
4427 if (d->disk.raid_disk == i)
4428 break;
4429 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
4430 if (d && (d->state_fd >= 0))
4431 continue;
4432
4433 /* OK, this device needs recovery. Find a spare */
4434 again:
4435 for ( ; dl ; dl = dl->next) {
4436 unsigned long long esize;
4437 unsigned long long pos;
4438 struct mdinfo *d2;
4439 int is_global = 0;
4440 int is_dedicated = 0;
4441 struct extent *ex;
4442 unsigned int j;
4443 /* If in this array, skip */
4444 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
4445 if (d2->state_fd >= 0 &&
4446 d2->disk.major == dl->major &&
4447 d2->disk.minor == dl->minor) {
4448 dprintf("%x:%x already in array\n", dl->major, dl->minor);
4449 break;
4450 }
4451 if (d2)
4452 continue;
4453 if (ddf->phys->entries[dl->pdnum].type &
4454 __cpu_to_be16(DDF_Spare)) {
4455 /* Check spare assign record */
4456 if (dl->spare) {
4457 if (dl->spare->type & DDF_spare_dedicated) {
4458 /* check spare_ents for guid */
4459 for (j = 0 ;
4460 j < __be16_to_cpu(dl->spare->populated);
4461 j++) {
4462 if (memcmp(dl->spare->spare_ents[j].guid,
4463 ddf->virt->entries[a->info.container_member].guid,
4464 DDF_GUID_LEN) == 0)
4465 is_dedicated = 1;
4466 }
4467 } else
4468 is_global = 1;
4469 }
4470 } else if (ddf->phys->entries[dl->pdnum].type &
4471 __cpu_to_be16(DDF_Global_Spare)) {
4472 is_global = 1;
4473 } else if (!(ddf->phys->entries[dl->pdnum].state &
4474 __cpu_to_be16(DDF_Failed))) {
4475 /* we can possibly use some of this */
4476 is_global = 1;
4477 }
4478 if ( ! (is_dedicated ||
4479 (is_global && global_ok))) {
4480 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
4481 is_dedicated, is_global);
4482 continue;
4483 }
4484
4485 /* We are allowed to use this device - is there space?
4486 * We need a->info.component_size sectors */
4487 ex = get_extents(ddf, dl);
4488 if (!ex) {
4489 dprintf("cannot get extents\n");
4490 continue;
4491 }
4492 j = 0; pos = 0;
4493 esize = 0;
4494
4495 do {
4496 esize = ex[j].start - pos;
4497 if (esize >= a->info.component_size)
4498 break;
4499 pos = ex[j].start + ex[j].size;
4500 j++;
4501 } while (ex[j-1].size);
4502
4503 free(ex);
4504 if (esize < a->info.component_size) {
4505 dprintf("%x:%x has no room: %llu %llu\n",
4506 dl->major, dl->minor,
4507 esize, a->info.component_size);
4508 /* No room */
4509 continue;
4510 }
4511
4512 /* Cool, we have a device with some space at pos */
4513 di = xcalloc(1, sizeof(*di));
4514 di->disk.number = i;
4515 di->disk.raid_disk = i;
4516 di->disk.major = dl->major;
4517 di->disk.minor = dl->minor;
4518 di->disk.state = 0;
4519 di->recovery_start = 0;
4520 di->data_offset = pos;
4521 di->component_size = a->info.component_size;
4522 di->container_member = dl->pdnum;
4523 di->next = rv;
4524 rv = di;
4525 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4526 i, pos);
4527
4528 break;
4529 }
4530 if (!dl && ! global_ok) {
4531 /* not enough dedicated spares, try global */
4532 global_ok = 1;
4533 dl = ddf->dlist;
4534 goto again;
4535 }
4536 }
4537
4538 if (!rv)
4539 /* No spares found */
4540 return rv;
4541 /* Now 'rv' has a list of devices to return.
4542 * Create a metadata_update record to update the
4543 * phys_refnum and lba_offset values
4544 */
4545 mu = xmalloc(sizeof(*mu));
4546 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
4547 free(mu);
4548 mu = NULL;
4549 }
4550 mu->buf = xmalloc(ddf->conf_rec_len * 512);
4551 mu->len = ddf->conf_rec_len * 512;
4552 mu->space = NULL;
4553 mu->space_list = NULL;
4554 mu->next = *updates;
4555 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4556 &n_bvd, &vcl);
4557 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4558
4559 vc = (struct vd_config*)mu->buf;
4560 for (di = rv ; di ; di = di->next) {
4561 vc->phys_refnum[di->disk.raid_disk] =
4562 ddf->phys->entries[dl->pdnum].refnum;
4563 LBA_OFFSET(ddf, vc)[di->disk.raid_disk]
4564 = __cpu_to_be64(di->data_offset);
4565 }
4566 *updates = mu;
4567 return rv;
4568 }
4569 #endif /* MDASSEMBLE */
4570
4571 static int ddf_level_to_layout(int level)
4572 {
4573 switch(level) {
4574 case 0:
4575 case 1:
4576 return 0;
4577 case 5:
4578 return ALGORITHM_LEFT_SYMMETRIC;
4579 case 6:
4580 return ALGORITHM_ROTATING_N_CONTINUE;
4581 case 10:
4582 return 0x102;
4583 default:
4584 return UnSet;
4585 }
4586 }
4587
4588 static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4589 {
4590 if (level && *level == UnSet)
4591 *level = LEVEL_CONTAINER;
4592
4593 if (level && layout && *layout == UnSet)
4594 *layout = ddf_level_to_layout(*level);
4595 }
4596
4597 struct superswitch super_ddf = {
4598 #ifndef MDASSEMBLE
4599 .examine_super = examine_super_ddf,
4600 .brief_examine_super = brief_examine_super_ddf,
4601 .brief_examine_subarrays = brief_examine_subarrays_ddf,
4602 .export_examine_super = export_examine_super_ddf,
4603 .detail_super = detail_super_ddf,
4604 .brief_detail_super = brief_detail_super_ddf,
4605 .validate_geometry = validate_geometry_ddf,
4606 .write_init_super = write_init_super_ddf,
4607 .add_to_super = add_to_super_ddf,
4608 .remove_from_super = remove_from_super_ddf,
4609 .load_container = load_container_ddf,
4610 .copy_metadata = copy_metadata_ddf,
4611 #endif
4612 .match_home = match_home_ddf,
4613 .uuid_from_super= uuid_from_super_ddf,
4614 .getinfo_super = getinfo_super_ddf,
4615 .update_super = update_super_ddf,
4616
4617 .avail_size = avail_size_ddf,
4618
4619 .compare_super = compare_super_ddf,
4620
4621 .load_super = load_super_ddf,
4622 .init_super = init_super_ddf,
4623 .store_super = store_super_ddf,
4624 .free_super = free_super_ddf,
4625 .match_metadata_desc = match_metadata_desc_ddf,
4626 .container_content = container_content_ddf,
4627 .default_geometry = default_geometry_ddf,
4628
4629 .external = 1,
4630
4631 #ifndef MDASSEMBLE
4632 /* for mdmon */
4633 .open_new = ddf_open_new,
4634 .set_array_state= ddf_set_array_state,
4635 .set_disk = ddf_set_disk,
4636 .sync_metadata = ddf_sync_metadata,
4637 .process_update = ddf_process_update,
4638 .prepare_update = ddf_prepare_update,
4639 .activate_spare = ddf_activate_spare,
4640 #endif
4641 .name = "ddf",
4642 };