]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-ddf.c
DDF: getinfo_super_ddf_bvd: fix offset calculation for SVDs
[thirdparty/mdadm.git] / super-ddf.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28 #define HAVE_STDINT_H 1
29 #include "mdadm.h"
30 #include "mdmon.h"
31 #include "sha1.h"
32 #include <values.h>
33
34 /* a non-official T10 name for creation GUIDs */
35 static char T10[] = "Linux-MD";
36
37 /* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41 #define DECADE (3600*24*(365*10+2))
42 unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
47 #define DDF_NOTFOUND (~0U)
48 #define DDF_CONTAINER (DDF_NOTFOUND-1)
49
50 /* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61 /* Primary Raid Level (PRL) */
62 #define DDF_RAID0 0x00
63 #define DDF_RAID1 0x01
64 #define DDF_RAID3 0x03
65 #define DDF_RAID4 0x04
66 #define DDF_RAID5 0x05
67 #define DDF_RAID1E 0x11
68 #define DDF_JBOD 0x0f
69 #define DDF_CONCAT 0x1f
70 #define DDF_RAID5E 0x15
71 #define DDF_RAID5EE 0x25
72 #define DDF_RAID6 0x06
73
74 /* Raid Level Qualifier (RLQ) */
75 #define DDF_RAID0_SIMPLE 0x00
76 #define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77 #define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78 #define DDF_RAID3_0 0x00 /* parity in first extent */
79 #define DDF_RAID3_N 0x01 /* parity in last extent */
80 #define DDF_RAID4_0 0x00 /* parity in first extent */
81 #define DDF_RAID4_N 0x01 /* parity in last extent */
82 /* these apply to raid5e and raid5ee as well */
83 #define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
84 #define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
85 #define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86 #define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
87
88 #define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89 #define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
90
91 /* Secondary RAID Level (SRL) */
92 #define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93 #define DDF_2MIRRORED 0x01
94 #define DDF_2CONCAT 0x02
95 #define DDF_2SPANNED 0x03 /* This is also weird - be careful */
96
97 /* Magic numbers */
98 #define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99 #define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100 #define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101 #define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102 #define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103 #define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104 #define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105 #define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106 #define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107 #define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
108
109 #define DDF_GUID_LEN 24
110 #define DDF_REVISION_0 "01.00.00"
111 #define DDF_REVISION_2 "01.02.00"
112
113 struct ddf_header {
114 __u32 magic; /* DDF_HEADER_MAGIC */
115 __u32 crc;
116 char guid[DDF_GUID_LEN];
117 char revision[8]; /* 01.02.00 */
118 __u32 seq; /* starts at '1' */
119 __u32 timestamp;
120 __u8 openflag;
121 __u8 foreignflag;
122 __u8 enforcegroups;
123 __u8 pad0; /* 0xff */
124 __u8 pad1[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext[32]; /* reserved: fill with 0xff */
127 __u64 primary_lba;
128 __u64 secondary_lba;
129 __u8 type;
130 __u8 pad2[3]; /* 0xff */
131 __u32 workspace_len; /* sectors for vendor space -
132 * at least 32768(sectors) */
133 __u64 workspace_lba;
134 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
139 *12/512) */
140 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset;
144 __u32 controller_section_length;
145 __u32 phys_section_offset;
146 __u32 phys_section_length;
147 __u32 virt_section_offset;
148 __u32 virt_section_length;
149 __u32 config_section_offset;
150 __u32 config_section_length;
151 __u32 data_section_offset;
152 __u32 data_section_length;
153 __u32 bbm_section_offset;
154 __u32 bbm_section_length;
155 __u32 diag_space_offset;
156 __u32 diag_space_length;
157 __u32 vendor_offset;
158 __u32 vendor_length;
159 /* 256 bytes so far */
160 __u8 pad4[256]; /* 0xff */
161 };
162
163 /* type field */
164 #define DDF_HEADER_ANCHOR 0x00
165 #define DDF_HEADER_PRIMARY 0x01
166 #define DDF_HEADER_SECONDARY 0x02
167
168 /* The content of the 'controller section' - global scope */
169 struct ddf_controller_data {
170 __u32 magic; /* DDF_CONTROLLER_MAGIC */
171 __u32 crc;
172 char guid[DDF_GUID_LEN];
173 struct controller_type {
174 __u16 vendor_id;
175 __u16 device_id;
176 __u16 sub_vendor_id;
177 __u16 sub_device_id;
178 } type;
179 char product_id[16];
180 __u8 pad[8]; /* 0xff */
181 __u8 vendor_data[448];
182 };
183
184 /* The content of phys_section - global scope */
185 struct phys_disk {
186 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
187 __u32 crc;
188 __u16 used_pdes;
189 __u16 max_pdes;
190 __u8 pad[52];
191 struct phys_disk_entry {
192 char guid[DDF_GUID_LEN];
193 __u32 refnum;
194 __u16 type;
195 __u16 state;
196 __u64 config_size; /* DDF structures must be after here */
197 char path[18]; /* another horrible structure really */
198 __u8 pad[6];
199 } entries[0];
200 };
201
202 /* phys_disk_entry.type is a bitmap - bigendian remember */
203 #define DDF_Forced_PD_GUID 1
204 #define DDF_Active_in_VD 2
205 #define DDF_Global_Spare 4 /* VD_CONF records are ignored */
206 #define DDF_Spare 8 /* overrides Global_spare */
207 #define DDF_Foreign 16
208 #define DDF_Legacy 32 /* no DDF on this device */
209
210 #define DDF_Interface_mask 0xf00
211 #define DDF_Interface_SCSI 0x100
212 #define DDF_Interface_SAS 0x200
213 #define DDF_Interface_SATA 0x300
214 #define DDF_Interface_FC 0x400
215
216 /* phys_disk_entry.state is a bigendian bitmap */
217 #define DDF_Online 1
218 #define DDF_Failed 2 /* overrides 1,4,8 */
219 #define DDF_Rebuilding 4
220 #define DDF_Transition 8
221 #define DDF_SMART 16
222 #define DDF_ReadErrors 32
223 #define DDF_Missing 64
224
225 /* The content of the virt_section global scope */
226 struct virtual_disk {
227 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
228 __u32 crc;
229 __u16 populated_vdes;
230 __u16 max_vdes;
231 __u8 pad[52];
232 struct virtual_entry {
233 char guid[DDF_GUID_LEN];
234 __u16 unit;
235 __u16 pad0; /* 0xffff */
236 __u16 guid_crc;
237 __u16 type;
238 __u8 state;
239 __u8 init_state;
240 __u8 pad1[14];
241 char name[16];
242 } entries[0];
243 };
244
245 /* virtual_entry.type is a bitmap - bigendian */
246 #define DDF_Shared 1
247 #define DDF_Enforce_Groups 2
248 #define DDF_Unicode 4
249 #define DDF_Owner_Valid 8
250
251 /* virtual_entry.state is a bigendian bitmap */
252 #define DDF_state_mask 0x7
253 #define DDF_state_optimal 0x0
254 #define DDF_state_degraded 0x1
255 #define DDF_state_deleted 0x2
256 #define DDF_state_missing 0x3
257 #define DDF_state_failed 0x4
258 #define DDF_state_part_optimal 0x5
259
260 #define DDF_state_morphing 0x8
261 #define DDF_state_inconsistent 0x10
262
263 /* virtual_entry.init_state is a bigendian bitmap */
264 #define DDF_initstate_mask 0x03
265 #define DDF_init_not 0x00
266 #define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
268 #define DDF_init_full 0x02
269
270 #define DDF_access_mask 0xc0
271 #define DDF_access_rw 0x00
272 #define DDF_access_ro 0x80
273 #define DDF_access_blocked 0xc0
274
275 /* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
278 */
279
280 struct vd_config {
281 __u32 magic; /* DDF_VD_CONF_MAGIC */
282 __u32 crc;
283 char guid[DDF_GUID_LEN];
284 __u32 timestamp;
285 __u32 seqnum;
286 __u8 pad0[24];
287 __u16 prim_elmnt_count;
288 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
289 __u8 prl;
290 __u8 rlq;
291 __u8 sec_elmnt_count;
292 __u8 sec_elmnt_seq;
293 __u8 srl;
294 __u64 blocks; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks; /* blocks in array */
298 __u8 pad1[8];
299 __u32 spare_refs[8];
300 __u8 cache_pol[8];
301 __u8 bg_rate;
302 __u8 pad2[3];
303 __u8 pad3[52];
304 __u8 pad4[192];
305 __u8 v0[32]; /* reserved- 0xff */
306 __u8 v1[32]; /* reserved- 0xff */
307 __u8 v2[16]; /* reserved- 0xff */
308 __u8 v3[16]; /* reserved- 0xff */
309 __u8 vendor[32];
310 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
313 };
314 #define LBA_OFFSET(ddf, vd) ((__u64 *) &(vd)->phys_refnum[(ddf)->mppe])
315
316 /* vd_config.cache_pol[7] is a bitmap */
317 #define DDF_cache_writeback 1 /* else writethrough */
318 #define DDF_cache_wadaptive 2 /* only applies if writeback */
319 #define DDF_cache_readahead 4
320 #define DDF_cache_radaptive 8 /* only if doing read-ahead */
321 #define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
322 #define DDF_cache_wallowed 32 /* enable write caching */
323 #define DDF_cache_rallowed 64 /* enable read caching */
324
325 struct spare_assign {
326 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
327 __u32 crc;
328 __u32 timestamp;
329 __u8 reserved[7];
330 __u8 type;
331 __u16 populated; /* SAEs used */
332 __u16 max; /* max SAEs */
333 __u8 pad[8];
334 struct spare_assign_entry {
335 char guid[DDF_GUID_LEN];
336 __u16 secondary_element;
337 __u8 pad[6];
338 } spare_ents[0];
339 };
340 /* spare_assign.type is a bitmap */
341 #define DDF_spare_dedicated 0x1 /* else global */
342 #define DDF_spare_revertible 0x2 /* else committable */
343 #define DDF_spare_active 0x4 /* else not active */
344 #define DDF_spare_affinity 0x8 /* enclosure affinity */
345
346 /* The data_section contents - local scope */
347 struct disk_data {
348 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
349 __u32 crc;
350 char guid[DDF_GUID_LEN];
351 __u32 refnum; /* crc of some magic drive data ... */
352 __u8 forced_ref; /* set when above was not result of magic */
353 __u8 forced_guid; /* set if guid was forced rather than magic */
354 __u8 vendor[32];
355 __u8 pad[442];
356 };
357
358 /* bbm_section content */
359 struct bad_block_log {
360 __u32 magic;
361 __u32 crc;
362 __u16 entry_count;
363 __u32 spare_count;
364 __u8 pad[10];
365 __u64 first_spare;
366 struct mapped_block {
367 __u64 defective_start;
368 __u32 replacement_start;
369 __u16 remap_count;
370 __u8 pad[2];
371 } entries[0];
372 };
373
374 /* Struct for internally holding ddf structures */
375 /* The DDF structure stored on each device is potentially
376 * quite different, as some data is global and some is local.
377 * The global data is:
378 * - ddf header
379 * - controller_data
380 * - Physical disk records
381 * - Virtual disk records
382 * The local data is:
383 * - Configuration records
384 * - Physical Disk data section
385 * ( and Bad block and vendor which I don't care about yet).
386 *
387 * The local data is parsed into separate lists as it is read
388 * and reconstructed for writing. This means that we only need
389 * to make config changes once and they are automatically
390 * propagated to all devices.
391 * Note that the ddf_super has space of the conf and disk data
392 * for this disk and also for a list of all such data.
393 * The list is only used for the superblock that is being
394 * built in Create or Assemble to describe the whole array.
395 */
396 struct ddf_super {
397 struct ddf_header anchor, primary, secondary;
398 struct ddf_controller_data controller;
399 struct ddf_header *active;
400 struct phys_disk *phys;
401 struct virtual_disk *virt;
402 int pdsize, vdsize;
403 unsigned int max_part, mppe, conf_rec_len;
404 int currentdev;
405 int updates_pending;
406 struct vcl {
407 union {
408 char space[512];
409 struct {
410 struct vcl *next;
411 unsigned int vcnum; /* index into ->virt */
412 struct vd_config **other_bvds;
413 __u64 *block_sizes; /* NULL if all the same */
414 };
415 };
416 struct vd_config conf;
417 } *conflist, *currentconf;
418 struct dl {
419 union {
420 char space[512];
421 struct {
422 struct dl *next;
423 int major, minor;
424 char *devname;
425 int fd;
426 unsigned long long size; /* sectors */
427 unsigned long long primary_lba; /* sectors */
428 unsigned long long secondary_lba; /* sectors */
429 unsigned long long workspace_lba; /* sectors */
430 int pdnum; /* index in ->phys */
431 struct spare_assign *spare;
432 void *mdupdate; /* hold metadata update */
433
434 /* These fields used by auto-layout */
435 int raiddisk; /* slot to fill in autolayout */
436 __u64 esize;
437 };
438 };
439 struct disk_data disk;
440 struct vcl *vlist[0]; /* max_part in size */
441 } *dlist, *add_list;
442 };
443
444 #ifndef offsetof
445 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
446 #endif
447
448 #if DEBUG
449 static int all_ff(const char *guid);
450 static void pr_state(struct ddf_super *ddf, const char *msg)
451 {
452 unsigned int i;
453 dprintf("%s/%s: ", __func__, msg);
454 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
455 if (all_ff(ddf->virt->entries[i].guid))
456 continue;
457 dprintf("%u(s=%02x i=%02x) ", i,
458 ddf->virt->entries[i].state,
459 ddf->virt->entries[i].init_state);
460 }
461 dprintf("\n");
462 }
463 #else
464 static void pr_state(const struct ddf_super *ddf, const char *msg) {}
465 #endif
466
467 static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
468 {
469 ddf->updates_pending = 1;
470 ddf->active->seq = __cpu_to_be32((__be32_to_cpu(ddf->active->seq)+1));
471 pr_state(ddf, func);
472 }
473
474 #define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
475
476 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
477 __u32 refnum, unsigned int nmax,
478 const struct vd_config **bvd,
479 unsigned int *idx);
480
481 static unsigned int calc_crc(void *buf, int len)
482 {
483 /* crcs are always at the same place as in the ddf_header */
484 struct ddf_header *ddf = buf;
485 __u32 oldcrc = ddf->crc;
486 __u32 newcrc;
487 ddf->crc = 0xffffffff;
488
489 newcrc = crc32(0, buf, len);
490 ddf->crc = oldcrc;
491 /* The crc is store (like everything) bigendian, so convert
492 * here for simplicity
493 */
494 return __cpu_to_be32(newcrc);
495 }
496
497 #define DDF_INVALID_LEVEL 0xff
498 #define DDF_NO_SECONDARY 0xff
499 static int err_bad_md_layout(const mdu_array_info_t *array)
500 {
501 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
502 array->level, array->layout, array->raid_disks);
503 return -1;
504 }
505
506 static int layout_md2ddf(const mdu_array_info_t *array,
507 struct vd_config *conf)
508 {
509 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
510 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
511 __u8 sec_elmnt_count = 1;
512 __u8 srl = DDF_NO_SECONDARY;
513
514 switch (array->level) {
515 case LEVEL_LINEAR:
516 prl = DDF_CONCAT;
517 break;
518 case 0:
519 rlq = DDF_RAID0_SIMPLE;
520 prl = DDF_RAID0;
521 break;
522 case 1:
523 switch (array->raid_disks) {
524 case 2:
525 rlq = DDF_RAID1_SIMPLE;
526 break;
527 case 3:
528 rlq = DDF_RAID1_MULTI;
529 break;
530 default:
531 return err_bad_md_layout(array);
532 }
533 prl = DDF_RAID1;
534 break;
535 case 4:
536 if (array->layout != 0)
537 return err_bad_md_layout(array);
538 rlq = DDF_RAID4_N;
539 prl = DDF_RAID4;
540 break;
541 case 5:
542 switch (array->layout) {
543 case ALGORITHM_LEFT_ASYMMETRIC:
544 rlq = DDF_RAID5_N_RESTART;
545 break;
546 case ALGORITHM_RIGHT_ASYMMETRIC:
547 rlq = DDF_RAID5_0_RESTART;
548 break;
549 case ALGORITHM_LEFT_SYMMETRIC:
550 rlq = DDF_RAID5_N_CONTINUE;
551 break;
552 case ALGORITHM_RIGHT_SYMMETRIC:
553 /* not mentioned in standard */
554 default:
555 return err_bad_md_layout(array);
556 }
557 prl = DDF_RAID5;
558 break;
559 case 6:
560 switch (array->layout) {
561 case ALGORITHM_ROTATING_N_RESTART:
562 rlq = DDF_RAID5_N_RESTART;
563 break;
564 case ALGORITHM_ROTATING_ZERO_RESTART:
565 rlq = DDF_RAID6_0_RESTART;
566 break;
567 case ALGORITHM_ROTATING_N_CONTINUE:
568 rlq = DDF_RAID5_N_CONTINUE;
569 break;
570 default:
571 return err_bad_md_layout(array);
572 }
573 prl = DDF_RAID6;
574 break;
575 case 10:
576 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
577 rlq = DDF_RAID1_SIMPLE;
578 prim_elmnt_count = __cpu_to_be16(2);
579 sec_elmnt_count = array->raid_disks / 2;
580 } else if (array->raid_disks % 3 == 0
581 && array->layout == 0x103) {
582 rlq = DDF_RAID1_MULTI;
583 prim_elmnt_count = __cpu_to_be16(3);
584 sec_elmnt_count = array->raid_disks / 3;
585 } else
586 return err_bad_md_layout(array);
587 srl = DDF_2SPANNED;
588 prl = DDF_RAID1;
589 break;
590 default:
591 return err_bad_md_layout(array);
592 }
593 conf->prl = prl;
594 conf->prim_elmnt_count = prim_elmnt_count;
595 conf->rlq = rlq;
596 conf->srl = srl;
597 conf->sec_elmnt_count = sec_elmnt_count;
598 return 0;
599 }
600
601 static int err_bad_ddf_layout(const struct vd_config *conf)
602 {
603 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
604 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
605 return -1;
606 }
607
608 static int layout_ddf2md(const struct vd_config *conf,
609 mdu_array_info_t *array)
610 {
611 int level = LEVEL_UNSUPPORTED;
612 int layout = 0;
613 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
614
615 if (conf->sec_elmnt_count > 1) {
616 /* see also check_secondary() */
617 if (conf->prl != DDF_RAID1 ||
618 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
619 pr_err("Unsupported secondary RAID level %u/%u\n",
620 conf->prl, conf->srl);
621 return -1;
622 }
623 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
624 layout = 0x102;
625 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
626 layout = 0x103;
627 else
628 return err_bad_ddf_layout(conf);
629 raiddisks *= conf->sec_elmnt_count;
630 level = 10;
631 goto good;
632 }
633
634 switch (conf->prl) {
635 case DDF_CONCAT:
636 level = LEVEL_LINEAR;
637 break;
638 case DDF_RAID0:
639 if (conf->rlq != DDF_RAID0_SIMPLE)
640 return err_bad_ddf_layout(conf);
641 level = 0;
642 break;
643 case DDF_RAID1:
644 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
645 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
646 return err_bad_ddf_layout(conf);
647 level = 1;
648 break;
649 case DDF_RAID4:
650 if (conf->rlq != DDF_RAID4_N)
651 return err_bad_ddf_layout(conf);
652 level = 4;
653 break;
654 case DDF_RAID5:
655 switch (conf->rlq) {
656 case DDF_RAID5_N_RESTART:
657 layout = ALGORITHM_LEFT_ASYMMETRIC;
658 break;
659 case DDF_RAID5_0_RESTART:
660 layout = ALGORITHM_RIGHT_ASYMMETRIC;
661 break;
662 case DDF_RAID5_N_CONTINUE:
663 layout = ALGORITHM_LEFT_SYMMETRIC;
664 break;
665 default:
666 return err_bad_ddf_layout(conf);
667 }
668 level = 5;
669 break;
670 case DDF_RAID6:
671 switch (conf->rlq) {
672 case DDF_RAID5_N_RESTART:
673 layout = ALGORITHM_ROTATING_N_RESTART;
674 break;
675 case DDF_RAID6_0_RESTART:
676 layout = ALGORITHM_ROTATING_ZERO_RESTART;
677 break;
678 case DDF_RAID5_N_CONTINUE:
679 layout = ALGORITHM_ROTATING_N_CONTINUE;
680 break;
681 default:
682 return err_bad_ddf_layout(conf);
683 }
684 level = 6;
685 break;
686 default:
687 return err_bad_ddf_layout(conf);
688 };
689
690 good:
691 array->level = level;
692 array->layout = layout;
693 array->raid_disks = raiddisks;
694 return 0;
695 }
696
697 static int load_ddf_header(int fd, unsigned long long lba,
698 unsigned long long size,
699 int type,
700 struct ddf_header *hdr, struct ddf_header *anchor)
701 {
702 /* read a ddf header (primary or secondary) from fd/lba
703 * and check that it is consistent with anchor
704 * Need to check:
705 * magic, crc, guid, rev, and LBA's header_type, and
706 * everything after header_type must be the same
707 */
708 if (lba >= size-1)
709 return 0;
710
711 if (lseek64(fd, lba<<9, 0) < 0)
712 return 0;
713
714 if (read(fd, hdr, 512) != 512)
715 return 0;
716
717 if (hdr->magic != DDF_HEADER_MAGIC)
718 return 0;
719 if (calc_crc(hdr, 512) != hdr->crc)
720 return 0;
721 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
722 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
723 anchor->primary_lba != hdr->primary_lba ||
724 anchor->secondary_lba != hdr->secondary_lba ||
725 hdr->type != type ||
726 memcmp(anchor->pad2, hdr->pad2, 512 -
727 offsetof(struct ddf_header, pad2)) != 0)
728 return 0;
729
730 /* Looks good enough to me... */
731 return 1;
732 }
733
734 static void *load_section(int fd, struct ddf_super *super, void *buf,
735 __u32 offset_be, __u32 len_be, int check)
736 {
737 unsigned long long offset = __be32_to_cpu(offset_be);
738 unsigned long long len = __be32_to_cpu(len_be);
739 int dofree = (buf == NULL);
740
741 if (check)
742 if (len != 2 && len != 8 && len != 32
743 && len != 128 && len != 512)
744 return NULL;
745
746 if (len > 1024)
747 return NULL;
748 if (buf) {
749 /* All pre-allocated sections are a single block */
750 if (len != 1)
751 return NULL;
752 } else if (posix_memalign(&buf, 512, len<<9) != 0)
753 buf = NULL;
754
755 if (!buf)
756 return NULL;
757
758 if (super->active->type == 1)
759 offset += __be64_to_cpu(super->active->primary_lba);
760 else
761 offset += __be64_to_cpu(super->active->secondary_lba);
762
763 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
764 if (dofree)
765 free(buf);
766 return NULL;
767 }
768 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
769 if (dofree)
770 free(buf);
771 return NULL;
772 }
773 return buf;
774 }
775
776 static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
777 {
778 unsigned long long dsize;
779
780 get_dev_size(fd, NULL, &dsize);
781
782 if (lseek64(fd, dsize-512, 0) < 0) {
783 if (devname)
784 pr_err("Cannot seek to anchor block on %s: %s\n",
785 devname, strerror(errno));
786 return 1;
787 }
788 if (read(fd, &super->anchor, 512) != 512) {
789 if (devname)
790 pr_err("Cannot read anchor block on %s: %s\n",
791 devname, strerror(errno));
792 return 1;
793 }
794 if (super->anchor.magic != DDF_HEADER_MAGIC) {
795 if (devname)
796 pr_err("no DDF anchor found on %s\n",
797 devname);
798 return 2;
799 }
800 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
801 if (devname)
802 pr_err("bad CRC on anchor on %s\n",
803 devname);
804 return 2;
805 }
806 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
807 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
808 if (devname)
809 pr_err("can only support super revision"
810 " %.8s and earlier, not %.8s on %s\n",
811 DDF_REVISION_2, super->anchor.revision,devname);
812 return 2;
813 }
814 super->active = NULL;
815 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
816 dsize >> 9, 1,
817 &super->primary, &super->anchor) == 0) {
818 if (devname)
819 pr_err("Failed to load primary DDF header "
820 "on %s\n", devname);
821 } else
822 super->active = &super->primary;
823 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
824 dsize >> 9, 2,
825 &super->secondary, &super->anchor)) {
826 if (super->active == NULL
827 || (__be32_to_cpu(super->primary.seq)
828 < __be32_to_cpu(super->secondary.seq) &&
829 !super->secondary.openflag)
830 || (__be32_to_cpu(super->primary.seq)
831 == __be32_to_cpu(super->secondary.seq) &&
832 super->primary.openflag && !super->secondary.openflag)
833 )
834 super->active = &super->secondary;
835 } else if (devname)
836 pr_err("Failed to load secondary DDF header on %s\n",
837 devname);
838 if (super->active == NULL)
839 return 2;
840 return 0;
841 }
842
843 static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
844 {
845 void *ok;
846 ok = load_section(fd, super, &super->controller,
847 super->active->controller_section_offset,
848 super->active->controller_section_length,
849 0);
850 super->phys = load_section(fd, super, NULL,
851 super->active->phys_section_offset,
852 super->active->phys_section_length,
853 1);
854 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
855
856 super->virt = load_section(fd, super, NULL,
857 super->active->virt_section_offset,
858 super->active->virt_section_length,
859 1);
860 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
861 if (!ok ||
862 !super->phys ||
863 !super->virt) {
864 free(super->phys);
865 free(super->virt);
866 super->phys = NULL;
867 super->virt = NULL;
868 return 2;
869 }
870 super->conflist = NULL;
871 super->dlist = NULL;
872
873 super->max_part = __be16_to_cpu(super->active->max_partitions);
874 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
875 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
876 return 0;
877 }
878
879 #define DDF_UNUSED_BVD 0xff
880 static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
881 {
882 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
883 unsigned int i, vdsize;
884 void *p;
885 if (n_vds == 0) {
886 vcl->other_bvds = NULL;
887 return 0;
888 }
889 vdsize = ddf->conf_rec_len * 512;
890 if (posix_memalign(&p, 512, n_vds *
891 (vdsize + sizeof(struct vd_config *))) != 0)
892 return -1;
893 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
894 for (i = 0; i < n_vds; i++) {
895 vcl->other_bvds[i] = p + i * vdsize;
896 memset(vcl->other_bvds[i], 0, vdsize);
897 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
898 }
899 return 0;
900 }
901
902 static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
903 unsigned int len)
904 {
905 int i;
906 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
907 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
908 break;
909
910 if (i < vcl->conf.sec_elmnt_count-1) {
911 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
912 return;
913 } else {
914 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
915 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
916 break;
917 if (i == vcl->conf.sec_elmnt_count-1) {
918 pr_err("no space for sec level config %u, count is %u\n",
919 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
920 return;
921 }
922 }
923 memcpy(vcl->other_bvds[i], vd, len);
924 }
925
926 static int load_ddf_local(int fd, struct ddf_super *super,
927 char *devname, int keep)
928 {
929 struct dl *dl;
930 struct stat stb;
931 char *conf;
932 unsigned int i;
933 unsigned int confsec;
934 int vnum;
935 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
936 unsigned long long dsize;
937
938 /* First the local disk info */
939 if (posix_memalign((void**)&dl, 512,
940 sizeof(*dl) +
941 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
942 pr_err("%s could not allocate disk info buffer\n",
943 __func__);
944 return 1;
945 }
946
947 load_section(fd, super, &dl->disk,
948 super->active->data_section_offset,
949 super->active->data_section_length,
950 0);
951 dl->devname = devname ? xstrdup(devname) : NULL;
952
953 fstat(fd, &stb);
954 dl->major = major(stb.st_rdev);
955 dl->minor = minor(stb.st_rdev);
956 dl->next = super->dlist;
957 dl->fd = keep ? fd : -1;
958
959 dl->size = 0;
960 if (get_dev_size(fd, devname, &dsize))
961 dl->size = dsize >> 9;
962 /* If the disks have different sizes, the LBAs will differ
963 * between phys disks.
964 * At this point here, the values in super->active must be valid
965 * for this phys disk. */
966 dl->primary_lba = super->active->primary_lba;
967 dl->secondary_lba = super->active->secondary_lba;
968 dl->workspace_lba = super->active->workspace_lba;
969 dl->spare = NULL;
970 for (i = 0 ; i < super->max_part ; i++)
971 dl->vlist[i] = NULL;
972 super->dlist = dl;
973 dl->pdnum = -1;
974 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
975 if (memcmp(super->phys->entries[i].guid,
976 dl->disk.guid, DDF_GUID_LEN) == 0)
977 dl->pdnum = i;
978
979 /* Now the config list. */
980 /* 'conf' is an array of config entries, some of which are
981 * probably invalid. Those which are good need to be copied into
982 * the conflist
983 */
984
985 conf = load_section(fd, super, NULL,
986 super->active->config_section_offset,
987 super->active->config_section_length,
988 0);
989
990 vnum = 0;
991 for (confsec = 0;
992 confsec < __be32_to_cpu(super->active->config_section_length);
993 confsec += super->conf_rec_len) {
994 struct vd_config *vd =
995 (struct vd_config *)((char*)conf + confsec*512);
996 struct vcl *vcl;
997
998 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
999 if (dl->spare)
1000 continue;
1001 if (posix_memalign((void**)&dl->spare, 512,
1002 super->conf_rec_len*512) != 0) {
1003 pr_err("%s could not allocate spare info buf\n",
1004 __func__);
1005 return 1;
1006 }
1007
1008 memcpy(dl->spare, vd, super->conf_rec_len*512);
1009 continue;
1010 }
1011 if (vd->magic != DDF_VD_CONF_MAGIC)
1012 continue;
1013 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1014 if (memcmp(vcl->conf.guid,
1015 vd->guid, DDF_GUID_LEN) == 0)
1016 break;
1017 }
1018
1019 if (vcl) {
1020 dl->vlist[vnum++] = vcl;
1021 if (vcl->other_bvds != NULL &&
1022 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1023 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1024 continue;
1025 }
1026 if (__be32_to_cpu(vd->seqnum) <=
1027 __be32_to_cpu(vcl->conf.seqnum))
1028 continue;
1029 } else {
1030 if (posix_memalign((void**)&vcl, 512,
1031 (super->conf_rec_len*512 +
1032 offsetof(struct vcl, conf))) != 0) {
1033 pr_err("%s could not allocate vcl buf\n",
1034 __func__);
1035 return 1;
1036 }
1037 vcl->next = super->conflist;
1038 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
1039 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1040 if (alloc_other_bvds(super, vcl) != 0) {
1041 pr_err("%s could not allocate other bvds\n",
1042 __func__);
1043 free(vcl);
1044 return 1;
1045 };
1046 super->conflist = vcl;
1047 dl->vlist[vnum++] = vcl;
1048 }
1049 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
1050 for (i=0; i < max_virt_disks ; i++)
1051 if (memcmp(super->virt->entries[i].guid,
1052 vcl->conf.guid, DDF_GUID_LEN)==0)
1053 break;
1054 if (i < max_virt_disks)
1055 vcl->vcnum = i;
1056 }
1057 free(conf);
1058
1059 return 0;
1060 }
1061
1062 #ifndef MDASSEMBLE
1063 static int load_super_ddf_all(struct supertype *st, int fd,
1064 void **sbp, char *devname);
1065 #endif
1066
1067 static void free_super_ddf(struct supertype *st);
1068
1069 static int load_super_ddf(struct supertype *st, int fd,
1070 char *devname)
1071 {
1072 unsigned long long dsize;
1073 struct ddf_super *super;
1074 int rv;
1075
1076 if (get_dev_size(fd, devname, &dsize) == 0)
1077 return 1;
1078
1079 if (!st->ignore_hw_compat && test_partition(fd))
1080 /* DDF is not allowed on partitions */
1081 return 1;
1082
1083 /* 32M is a lower bound */
1084 if (dsize <= 32*1024*1024) {
1085 if (devname)
1086 pr_err("%s is too small for ddf: "
1087 "size is %llu sectors.\n",
1088 devname, dsize>>9);
1089 return 1;
1090 }
1091 if (dsize & 511) {
1092 if (devname)
1093 pr_err("%s is an odd size for ddf: "
1094 "size is %llu bytes.\n",
1095 devname, dsize);
1096 return 1;
1097 }
1098
1099 free_super_ddf(st);
1100
1101 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
1102 pr_err("malloc of %zu failed.\n",
1103 sizeof(*super));
1104 return 1;
1105 }
1106 memset(super, 0, sizeof(*super));
1107
1108 rv = load_ddf_headers(fd, super, devname);
1109 if (rv) {
1110 free(super);
1111 return rv;
1112 }
1113
1114 /* Have valid headers and have chosen the best. Let's read in the rest*/
1115
1116 rv = load_ddf_global(fd, super, devname);
1117
1118 if (rv) {
1119 if (devname)
1120 pr_err("Failed to load all information "
1121 "sections on %s\n", devname);
1122 free(super);
1123 return rv;
1124 }
1125
1126 rv = load_ddf_local(fd, super, devname, 0);
1127
1128 if (rv) {
1129 if (devname)
1130 pr_err("Failed to load all information "
1131 "sections on %s\n", devname);
1132 free(super);
1133 return rv;
1134 }
1135
1136 /* Should possibly check the sections .... */
1137
1138 st->sb = super;
1139 if (st->ss == NULL) {
1140 st->ss = &super_ddf;
1141 st->minor_version = 0;
1142 st->max_devs = 512;
1143 }
1144 return 0;
1145
1146 }
1147
1148 static void free_super_ddf(struct supertype *st)
1149 {
1150 struct ddf_super *ddf = st->sb;
1151 if (ddf == NULL)
1152 return;
1153 free(ddf->phys);
1154 free(ddf->virt);
1155 while (ddf->conflist) {
1156 struct vcl *v = ddf->conflist;
1157 ddf->conflist = v->next;
1158 if (v->block_sizes)
1159 free(v->block_sizes);
1160 if (v->other_bvds)
1161 /*
1162 v->other_bvds[0] points to beginning of buffer,
1163 see alloc_other_bvds()
1164 */
1165 free(v->other_bvds[0]);
1166 free(v);
1167 }
1168 while (ddf->dlist) {
1169 struct dl *d = ddf->dlist;
1170 ddf->dlist = d->next;
1171 if (d->fd >= 0)
1172 close(d->fd);
1173 if (d->spare)
1174 free(d->spare);
1175 free(d);
1176 }
1177 while (ddf->add_list) {
1178 struct dl *d = ddf->add_list;
1179 ddf->add_list = d->next;
1180 if (d->fd >= 0)
1181 close(d->fd);
1182 if (d->spare)
1183 free(d->spare);
1184 free(d);
1185 }
1186 free(ddf);
1187 st->sb = NULL;
1188 }
1189
1190 static struct supertype *match_metadata_desc_ddf(char *arg)
1191 {
1192 /* 'ddf' only support containers */
1193 struct supertype *st;
1194 if (strcmp(arg, "ddf") != 0 &&
1195 strcmp(arg, "default") != 0
1196 )
1197 return NULL;
1198
1199 st = xcalloc(1, sizeof(*st));
1200 st->ss = &super_ddf;
1201 st->max_devs = 512;
1202 st->minor_version = 0;
1203 st->sb = NULL;
1204 return st;
1205 }
1206
1207 #ifndef MDASSEMBLE
1208
1209 static mapping_t ddf_state[] = {
1210 { "Optimal", 0},
1211 { "Degraded", 1},
1212 { "Deleted", 2},
1213 { "Missing", 3},
1214 { "Failed", 4},
1215 { "Partially Optimal", 5},
1216 { "-reserved-", 6},
1217 { "-reserved-", 7},
1218 { NULL, 0}
1219 };
1220
1221 static mapping_t ddf_init_state[] = {
1222 { "Not Initialised", 0},
1223 { "QuickInit in Progress", 1},
1224 { "Fully Initialised", 2},
1225 { "*UNKNOWN*", 3},
1226 { NULL, 0}
1227 };
1228 static mapping_t ddf_access[] = {
1229 { "Read/Write", 0},
1230 { "Reserved", 1},
1231 { "Read Only", 2},
1232 { "Blocked (no access)", 3},
1233 { NULL ,0}
1234 };
1235
1236 static mapping_t ddf_level[] = {
1237 { "RAID0", DDF_RAID0},
1238 { "RAID1", DDF_RAID1},
1239 { "RAID3", DDF_RAID3},
1240 { "RAID4", DDF_RAID4},
1241 { "RAID5", DDF_RAID5},
1242 { "RAID1E",DDF_RAID1E},
1243 { "JBOD", DDF_JBOD},
1244 { "CONCAT",DDF_CONCAT},
1245 { "RAID5E",DDF_RAID5E},
1246 { "RAID5EE",DDF_RAID5EE},
1247 { "RAID6", DDF_RAID6},
1248 { NULL, 0}
1249 };
1250 static mapping_t ddf_sec_level[] = {
1251 { "Striped", DDF_2STRIPED},
1252 { "Mirrored", DDF_2MIRRORED},
1253 { "Concat", DDF_2CONCAT},
1254 { "Spanned", DDF_2SPANNED},
1255 { NULL, 0}
1256 };
1257 #endif
1258
1259 static int all_ff(const char *guid)
1260 {
1261 int i;
1262 for (i = 0; i < DDF_GUID_LEN; i++)
1263 if (guid[i] != (char)0xff)
1264 return 0;
1265 return 1;
1266 }
1267
1268 #ifndef MDASSEMBLE
1269 static void print_guid(char *guid, int tstamp)
1270 {
1271 /* A GUIDs are part (or all) ASCII and part binary.
1272 * They tend to be space padded.
1273 * We print the GUID in HEX, then in parentheses add
1274 * any initial ASCII sequence, and a possible
1275 * time stamp from bytes 16-19
1276 */
1277 int l = DDF_GUID_LEN;
1278 int i;
1279
1280 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1281 if ((i&3)==0 && i != 0) printf(":");
1282 printf("%02X", guid[i]&255);
1283 }
1284
1285 printf("\n (");
1286 while (l && guid[l-1] == ' ')
1287 l--;
1288 for (i=0 ; i<l ; i++) {
1289 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1290 fputc(guid[i], stdout);
1291 else
1292 break;
1293 }
1294 if (tstamp) {
1295 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1296 char tbuf[100];
1297 struct tm *tm;
1298 tm = localtime(&then);
1299 strftime(tbuf, 100, " %D %T",tm);
1300 fputs(tbuf, stdout);
1301 }
1302 printf(")");
1303 }
1304
1305 static const char *guid_str(const char *guid)
1306 {
1307 static char buf[DDF_GUID_LEN*2+1];
1308 int i;
1309 char *p = buf;
1310 for (i = 0; i < DDF_GUID_LEN; i++) {
1311 unsigned char c = guid[i];
1312 if (c >= 32 && c < 127)
1313 p += sprintf(p, "%c", c);
1314 else
1315 p += sprintf(p, "%02x", c);
1316 }
1317 *p = '\0';
1318 return (const char *) buf;
1319 }
1320
1321 static void examine_vd(int n, struct ddf_super *sb, char *guid)
1322 {
1323 int crl = sb->conf_rec_len;
1324 struct vcl *vcl;
1325
1326 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
1327 unsigned int i;
1328 struct vd_config *vc = &vcl->conf;
1329
1330 if (calc_crc(vc, crl*512) != vc->crc)
1331 continue;
1332 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1333 continue;
1334
1335 /* Ok, we know about this VD, let's give more details */
1336 printf(" Raid Devices[%d] : %d (", n,
1337 __be16_to_cpu(vc->prim_elmnt_count));
1338 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
1339 int j;
1340 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1341 for (j=0; j<cnt; j++)
1342 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1343 break;
1344 if (i) printf(" ");
1345 if (j < cnt)
1346 printf("%d", j);
1347 else
1348 printf("--");
1349 }
1350 printf(")\n");
1351 if (vc->chunk_shift != 255)
1352 printf(" Chunk Size[%d] : %d sectors\n", n,
1353 1 << vc->chunk_shift);
1354 printf(" Raid Level[%d] : %s\n", n,
1355 map_num(ddf_level, vc->prl)?:"-unknown-");
1356 if (vc->sec_elmnt_count != 1) {
1357 printf(" Secondary Position[%d] : %d of %d\n", n,
1358 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1359 printf(" Secondary Level[%d] : %s\n", n,
1360 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1361 }
1362 printf(" Device Size[%d] : %llu\n", n,
1363 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
1364 printf(" Array Size[%d] : %llu\n", n,
1365 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
1366 }
1367 }
1368
1369 static void examine_vds(struct ddf_super *sb)
1370 {
1371 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
1372 unsigned int i;
1373 printf(" Virtual Disks : %d\n", cnt);
1374
1375 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
1376 struct virtual_entry *ve = &sb->virt->entries[i];
1377 if (all_ff(ve->guid))
1378 continue;
1379 printf("\n");
1380 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1381 printf("\n");
1382 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1383 printf(" state[%d] : %s, %s%s\n", i,
1384 map_num(ddf_state, ve->state & 7),
1385 (ve->state & 8) ? "Morphing, ": "",
1386 (ve->state & 16)? "Not Consistent" : "Consistent");
1387 printf(" init state[%d] : %s\n", i,
1388 map_num(ddf_init_state, ve->init_state&3));
1389 printf(" access[%d] : %s\n", i,
1390 map_num(ddf_access, (ve->init_state>>6) & 3));
1391 printf(" Name[%d] : %.16s\n", i, ve->name);
1392 examine_vd(i, sb, ve->guid);
1393 }
1394 if (cnt) printf("\n");
1395 }
1396
1397 static void examine_pds(struct ddf_super *sb)
1398 {
1399 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1400 int i;
1401 struct dl *dl;
1402 printf(" Physical Disks : %d\n", cnt);
1403 printf(" Number RefNo Size Device Type/State\n");
1404
1405 for (i=0 ; i<cnt ; i++) {
1406 struct phys_disk_entry *pd = &sb->phys->entries[i];
1407 int type = __be16_to_cpu(pd->type);
1408 int state = __be16_to_cpu(pd->state);
1409
1410 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1411 //printf("\n");
1412 printf(" %3d %08x ", i,
1413 __be32_to_cpu(pd->refnum));
1414 printf("%8lluK ",
1415 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
1416 for (dl = sb->dlist; dl ; dl = dl->next) {
1417 if (dl->disk.refnum == pd->refnum) {
1418 char *dv = map_dev(dl->major, dl->minor, 0);
1419 if (dv) {
1420 printf("%-15s", dv);
1421 break;
1422 }
1423 }
1424 }
1425 if (!dl)
1426 printf("%15s","");
1427 printf(" %s%s%s%s%s",
1428 (type&2) ? "active":"",
1429 (type&4) ? "Global-Spare":"",
1430 (type&8) ? "spare" : "",
1431 (type&16)? ", foreign" : "",
1432 (type&32)? "pass-through" : "");
1433 if (state & DDF_Failed)
1434 /* This over-rides these three */
1435 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
1436 printf("/%s%s%s%s%s%s%s",
1437 (state&1)? "Online": "Offline",
1438 (state&2)? ", Failed": "",
1439 (state&4)? ", Rebuilding": "",
1440 (state&8)? ", in-transition": "",
1441 (state&16)? ", SMART-errors": "",
1442 (state&32)? ", Unrecovered-Read-Errors": "",
1443 (state&64)? ", Missing" : "");
1444 printf("\n");
1445 }
1446 }
1447
1448 static void examine_super_ddf(struct supertype *st, char *homehost)
1449 {
1450 struct ddf_super *sb = st->sb;
1451
1452 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1453 printf(" Version : %.8s\n", sb->anchor.revision);
1454 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1455 printf("\n");
1456 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
1457 printf("\n");
1458 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1459 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1460 ?"yes" : "no");
1461 examine_vds(sb);
1462 examine_pds(sb);
1463 }
1464
1465 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
1466
1467 static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
1468 static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
1469
1470 static unsigned int get_vd_num_of_subarray(struct supertype *st)
1471 {
1472 /*
1473 * Figure out the VD number for this supertype.
1474 * Returns DDF_CONTAINER for the container itself,
1475 * and DDF_NOTFOUND on error.
1476 */
1477 struct ddf_super *ddf = st->sb;
1478 struct mdinfo *sra;
1479 char *sub, *end;
1480 unsigned int vcnum;
1481
1482 if (*st->container_devnm == '\0')
1483 return DDF_CONTAINER;
1484
1485 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1486 if (!sra || sra->array.major_version != -1 ||
1487 sra->array.minor_version != -2 ||
1488 !is_subarray(sra->text_version))
1489 return DDF_NOTFOUND;
1490
1491 sub = strchr(sra->text_version + 1, '/');
1492 if (sub != NULL)
1493 vcnum = strtoul(sub + 1, &end, 10);
1494 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1495 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1496 return DDF_NOTFOUND;
1497
1498 return vcnum;
1499 }
1500
1501 static void brief_examine_super_ddf(struct supertype *st, int verbose)
1502 {
1503 /* We just write a generic DDF ARRAY entry
1504 */
1505 struct mdinfo info;
1506 char nbuf[64];
1507 getinfo_super_ddf(st, &info, NULL);
1508 fname_from_uuid(st, &info, nbuf, ':');
1509
1510 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1511 }
1512
1513 static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
1514 {
1515 /* We just write a generic DDF ARRAY entry
1516 */
1517 struct ddf_super *ddf = st->sb;
1518 struct mdinfo info;
1519 unsigned int i;
1520 char nbuf[64];
1521 getinfo_super_ddf(st, &info, NULL);
1522 fname_from_uuid(st, &info, nbuf, ':');
1523
1524 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1525 struct virtual_entry *ve = &ddf->virt->entries[i];
1526 struct vcl vcl;
1527 char nbuf1[64];
1528 if (all_ff(ve->guid))
1529 continue;
1530 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1531 ddf->currentconf =&vcl;
1532 uuid_from_super_ddf(st, info.uuid);
1533 fname_from_uuid(st, &info, nbuf1, ':');
1534 printf("ARRAY container=%s member=%d UUID=%s\n",
1535 nbuf+5, i, nbuf1+5);
1536 }
1537 }
1538
1539 static void export_examine_super_ddf(struct supertype *st)
1540 {
1541 struct mdinfo info;
1542 char nbuf[64];
1543 getinfo_super_ddf(st, &info, NULL);
1544 fname_from_uuid(st, &info, nbuf, ':');
1545 printf("MD_METADATA=ddf\n");
1546 printf("MD_LEVEL=container\n");
1547 printf("MD_UUID=%s\n", nbuf+5);
1548 }
1549
1550 static int copy_metadata_ddf(struct supertype *st, int from, int to)
1551 {
1552 void *buf;
1553 unsigned long long dsize, offset;
1554 int bytes;
1555 struct ddf_header *ddf;
1556 int written = 0;
1557
1558 /* The meta consists of an anchor, a primary, and a secondary.
1559 * This all lives at the end of the device.
1560 * So it is easiest to find the earliest of primary and
1561 * secondary, and copy everything from there.
1562 *
1563 * Anchor is 512 from end It contains primary_lba and secondary_lba
1564 * we choose one of those
1565 */
1566
1567 if (posix_memalign(&buf, 4096, 4096) != 0)
1568 return 1;
1569
1570 if (!get_dev_size(from, NULL, &dsize))
1571 goto err;
1572
1573 if (lseek64(from, dsize-512, 0) < 0)
1574 goto err;
1575 if (read(from, buf, 512) != 512)
1576 goto err;
1577 ddf = buf;
1578 if (ddf->magic != DDF_HEADER_MAGIC ||
1579 calc_crc(ddf, 512) != ddf->crc ||
1580 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1581 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1582 goto err;
1583
1584 offset = dsize - 512;
1585 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1586 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1587 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1588 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1589
1590 bytes = dsize - offset;
1591
1592 if (lseek64(from, offset, 0) < 0 ||
1593 lseek64(to, offset, 0) < 0)
1594 goto err;
1595 while (written < bytes) {
1596 int n = bytes - written;
1597 if (n > 4096)
1598 n = 4096;
1599 if (read(from, buf, n) != n)
1600 goto err;
1601 if (write(to, buf, n) != n)
1602 goto err;
1603 written += n;
1604 }
1605 free(buf);
1606 return 0;
1607 err:
1608 free(buf);
1609 return 1;
1610 }
1611
1612 static void detail_super_ddf(struct supertype *st, char *homehost)
1613 {
1614 /* FIXME later
1615 * Could print DDF GUID
1616 * Need to find which array
1617 * If whole, briefly list all arrays
1618 * If one, give name
1619 */
1620 }
1621
1622 static void brief_detail_super_ddf(struct supertype *st)
1623 {
1624 struct mdinfo info;
1625 char nbuf[64];
1626 struct ddf_super *ddf = st->sb;
1627 unsigned int vcnum = get_vd_num_of_subarray(st);
1628 if (vcnum == DDF_CONTAINER)
1629 uuid_from_super_ddf(st, info.uuid);
1630 else if (vcnum == DDF_NOTFOUND)
1631 return;
1632 else
1633 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
1634 fname_from_uuid(st, &info, nbuf,':');
1635 printf(" UUID=%s", nbuf + 5);
1636 }
1637 #endif
1638
1639 static int match_home_ddf(struct supertype *st, char *homehost)
1640 {
1641 /* It matches 'this' host if the controller is a
1642 * Linux-MD controller with vendor_data matching
1643 * the hostname
1644 */
1645 struct ddf_super *ddf = st->sb;
1646 unsigned int len;
1647
1648 if (!homehost)
1649 return 0;
1650 len = strlen(homehost);
1651
1652 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1653 len < sizeof(ddf->controller.vendor_data) &&
1654 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1655 ddf->controller.vendor_data[len] == 0);
1656 }
1657
1658 #ifndef MDASSEMBLE
1659 static int find_index_in_bvd(const struct ddf_super *ddf,
1660 const struct vd_config *conf, unsigned int n,
1661 unsigned int *n_bvd)
1662 {
1663 /*
1664 * Find the index of the n-th valid physical disk in this BVD
1665 */
1666 unsigned int i, j;
1667 for (i = 0, j = 0; i < ddf->mppe &&
1668 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1669 if (conf->phys_refnum[i] != 0xffffffff) {
1670 if (n == j) {
1671 *n_bvd = i;
1672 return 1;
1673 }
1674 j++;
1675 }
1676 }
1677 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1678 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1679 return 0;
1680 }
1681
1682 static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1683 unsigned int n,
1684 unsigned int *n_bvd, struct vcl **vcl)
1685 {
1686 struct vcl *v;
1687
1688 for (v = ddf->conflist; v; v = v->next) {
1689 unsigned int nsec, ibvd;
1690 struct vd_config *conf;
1691 if (inst != v->vcnum)
1692 continue;
1693 conf = &v->conf;
1694 if (conf->sec_elmnt_count == 1) {
1695 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1696 *vcl = v;
1697 return conf;
1698 } else
1699 goto bad;
1700 }
1701 if (v->other_bvds == NULL) {
1702 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1703 __func__, conf->sec_elmnt_count);
1704 goto bad;
1705 }
1706 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1707 if (conf->sec_elmnt_seq != nsec) {
1708 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
1709 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1710 == nsec)
1711 break;
1712 }
1713 if (ibvd == conf->sec_elmnt_count)
1714 goto bad;
1715 conf = v->other_bvds[ibvd-1];
1716 }
1717 if (!find_index_in_bvd(ddf, conf,
1718 n - nsec*conf->sec_elmnt_count, n_bvd))
1719 goto bad;
1720 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1721 , __func__, n, *n_bvd, ibvd-1, inst);
1722 *vcl = v;
1723 return conf;
1724 }
1725 bad:
1726 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
1727 return NULL;
1728 }
1729 #endif
1730
1731 static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
1732 {
1733 /* Find the entry in phys_disk which has the given refnum
1734 * and return it's index
1735 */
1736 unsigned int i;
1737 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
1738 if (ddf->phys->entries[i].refnum == phys_refnum)
1739 return i;
1740 return -1;
1741 }
1742
1743 static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1744 {
1745 char buf[20];
1746 struct sha1_ctx ctx;
1747 sha1_init_ctx(&ctx);
1748 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1749 sha1_finish_ctx(&ctx, buf);
1750 memcpy(uuid, buf, 4*4);
1751 }
1752
1753 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1754 {
1755 /* The uuid returned here is used for:
1756 * uuid to put into bitmap file (Create, Grow)
1757 * uuid for backup header when saving critical section (Grow)
1758 * comparing uuids when re-adding a device into an array
1759 * In these cases the uuid required is that of the data-array,
1760 * not the device-set.
1761 * uuid to recognise same set when adding a missing device back
1762 * to an array. This is a uuid for the device-set.
1763 *
1764 * For each of these we can make do with a truncated
1765 * or hashed uuid rather than the original, as long as
1766 * everyone agrees.
1767 * In the case of SVD we assume the BVD is of interest,
1768 * though that might be the case if a bitmap were made for
1769 * a mirrored SVD - worry about that later.
1770 * So we need to find the VD configuration record for the
1771 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1772 * The first 16 bytes of the sha1 of these is used.
1773 */
1774 struct ddf_super *ddf = st->sb;
1775 struct vcl *vcl = ddf->currentconf;
1776 char *guid;
1777
1778 if (vcl)
1779 guid = vcl->conf.guid;
1780 else
1781 guid = ddf->anchor.guid;
1782 uuid_from_ddf_guid(guid, uuid);
1783 }
1784
1785 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
1786
1787 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
1788 {
1789 struct ddf_super *ddf = st->sb;
1790 int map_disks = info->array.raid_disks;
1791 __u32 *cptr;
1792
1793 if (ddf->currentconf) {
1794 getinfo_super_ddf_bvd(st, info, map);
1795 return;
1796 }
1797 memset(info, 0, sizeof(*info));
1798
1799 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1800 info->array.level = LEVEL_CONTAINER;
1801 info->array.layout = 0;
1802 info->array.md_minor = -1;
1803 cptr = (__u32 *)(ddf->anchor.guid + 16);
1804 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1805
1806 info->array.utime = 0;
1807 info->array.chunk_size = 0;
1808 info->container_enough = 1;
1809
1810 info->disk.major = 0;
1811 info->disk.minor = 0;
1812 if (ddf->dlist) {
1813 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
1814 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
1815
1816 info->data_offset = __be64_to_cpu(ddf->phys->
1817 entries[info->disk.raid_disk].
1818 config_size);
1819 info->component_size = ddf->dlist->size - info->data_offset;
1820 } else {
1821 info->disk.number = -1;
1822 info->disk.raid_disk = -1;
1823 // info->disk.raid_disk = find refnum in the table and use index;
1824 }
1825 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
1826
1827 info->recovery_start = MaxSector;
1828 info->reshape_active = 0;
1829 info->recovery_blocked = 0;
1830 info->name[0] = 0;
1831
1832 info->array.major_version = -1;
1833 info->array.minor_version = -2;
1834 strcpy(info->text_version, "ddf");
1835 info->safe_mode_delay = 0;
1836
1837 uuid_from_super_ddf(st, info->uuid);
1838
1839 if (map) {
1840 int i;
1841 for (i = 0 ; i < map_disks; i++) {
1842 if (i < info->array.raid_disks &&
1843 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1844 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1845 map[i] = 1;
1846 else
1847 map[i] = 0;
1848 }
1849 }
1850 }
1851
1852 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
1853 {
1854 struct ddf_super *ddf = st->sb;
1855 struct vcl *vc = ddf->currentconf;
1856 int cd = ddf->currentdev;
1857 int n_prim;
1858 int j;
1859 struct dl *dl;
1860 int map_disks = info->array.raid_disks;
1861 __u32 *cptr;
1862 struct vd_config *conf;
1863
1864 memset(info, 0, sizeof(*info));
1865 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1866 return;
1867 info->array.md_minor = -1;
1868 cptr = (__u32 *)(vc->conf.guid + 16);
1869 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1870 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1871 info->array.chunk_size = 512 << vc->conf.chunk_shift;
1872 info->custom_array_size = 0;
1873
1874 conf = &vc->conf;
1875 n_prim = __be16_to_cpu(conf->prim_elmnt_count);
1876 if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
1877 int ibvd = cd / n_prim - 1;
1878 cd %= n_prim;
1879 conf = vc->other_bvds[ibvd];
1880 }
1881
1882 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
1883 info->data_offset =
1884 __be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
1885 if (vc->block_sizes)
1886 info->component_size = vc->block_sizes[cd];
1887 else
1888 info->component_size = __be64_to_cpu(conf->blocks);
1889 }
1890
1891 for (dl = ddf->dlist; dl ; dl = dl->next)
1892 if (dl->disk.refnum == conf->phys_refnum[cd])
1893 break;
1894
1895 info->disk.major = 0;
1896 info->disk.minor = 0;
1897 info->disk.state = 0;
1898 if (dl) {
1899 info->disk.major = dl->major;
1900 info->disk.minor = dl->minor;
1901 info->disk.raid_disk = cd + conf->sec_elmnt_seq
1902 * __be16_to_cpu(conf->prim_elmnt_count);
1903 info->disk.number = dl->pdnum;
1904 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
1905 }
1906
1907 info->container_member = ddf->currentconf->vcnum;
1908
1909 info->recovery_start = MaxSector;
1910 info->resync_start = 0;
1911 info->reshape_active = 0;
1912 info->recovery_blocked = 0;
1913 if (!(ddf->virt->entries[info->container_member].state
1914 & DDF_state_inconsistent) &&
1915 (ddf->virt->entries[info->container_member].init_state
1916 & DDF_initstate_mask)
1917 == DDF_init_full)
1918 info->resync_start = MaxSector;
1919
1920 uuid_from_super_ddf(st, info->uuid);
1921
1922 info->array.major_version = -1;
1923 info->array.minor_version = -2;
1924 sprintf(info->text_version, "/%s/%d",
1925 st->container_devnm,
1926 info->container_member);
1927 info->safe_mode_delay = 200;
1928
1929 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1930 info->name[16]=0;
1931 for(j=0; j<16; j++)
1932 if (info->name[j] == ' ')
1933 info->name[j] = 0;
1934
1935 if (map)
1936 for (j = 0; j < map_disks; j++) {
1937 map[j] = 0;
1938 if (j < info->array.raid_disks) {
1939 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
1940 if (i >= 0 &&
1941 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1942 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1943 map[i] = 1;
1944 }
1945 }
1946 }
1947
1948 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1949 char *update,
1950 char *devname, int verbose,
1951 int uuid_set, char *homehost)
1952 {
1953 /* For 'assemble' and 'force' we need to return non-zero if any
1954 * change was made. For others, the return value is ignored.
1955 * Update options are:
1956 * force-one : This device looks a bit old but needs to be included,
1957 * update age info appropriately.
1958 * assemble: clear any 'faulty' flag to allow this device to
1959 * be assembled.
1960 * force-array: Array is degraded but being forced, mark it clean
1961 * if that will be needed to assemble it.
1962 *
1963 * newdev: not used ????
1964 * grow: Array has gained a new device - this is currently for
1965 * linear only
1966 * resync: mark as dirty so a resync will happen.
1967 * uuid: Change the uuid of the array to match what is given
1968 * homehost: update the recorded homehost
1969 * name: update the name - preserving the homehost
1970 * _reshape_progress: record new reshape_progress position.
1971 *
1972 * Following are not relevant for this version:
1973 * sparc2.2 : update from old dodgey metadata
1974 * super-minor: change the preferred_minor number
1975 * summaries: update redundant counters.
1976 */
1977 int rv = 0;
1978 // struct ddf_super *ddf = st->sb;
1979 // struct vd_config *vd = find_vdcr(ddf, info->container_member);
1980 // struct virtual_entry *ve = find_ve(ddf);
1981
1982 /* we don't need to handle "force-*" or "assemble" as
1983 * there is no need to 'trick' the kernel. We the metadata is
1984 * first updated to activate the array, all the implied modifications
1985 * will just happen.
1986 */
1987
1988 if (strcmp(update, "grow") == 0) {
1989 /* FIXME */
1990 } else if (strcmp(update, "resync") == 0) {
1991 // info->resync_checkpoint = 0;
1992 } else if (strcmp(update, "homehost") == 0) {
1993 /* homehost is stored in controller->vendor_data,
1994 * or it is when we are the vendor
1995 */
1996 // if (info->vendor_is_local)
1997 // strcpy(ddf->controller.vendor_data, homehost);
1998 rv = -1;
1999 } else if (strcmp(update, "name") == 0) {
2000 /* name is stored in virtual_entry->name */
2001 // memset(ve->name, ' ', 16);
2002 // strncpy(ve->name, info->name, 16);
2003 rv = -1;
2004 } else if (strcmp(update, "_reshape_progress") == 0) {
2005 /* We don't support reshape yet */
2006 } else if (strcmp(update, "assemble") == 0 ) {
2007 /* Do nothing, just succeed */
2008 rv = 0;
2009 } else
2010 rv = -1;
2011
2012 // update_all_csum(ddf);
2013
2014 return rv;
2015 }
2016
2017 static void make_header_guid(char *guid)
2018 {
2019 __u32 stamp;
2020 /* Create a DDF Header of Virtual Disk GUID */
2021
2022 /* 24 bytes of fiction required.
2023 * first 8 are a 'vendor-id' - "Linux-MD"
2024 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2025 * Remaining 8 random number plus timestamp
2026 */
2027 memcpy(guid, T10, sizeof(T10));
2028 stamp = __cpu_to_be32(0xdeadbeef);
2029 memcpy(guid+8, &stamp, 4);
2030 stamp = __cpu_to_be32(0);
2031 memcpy(guid+12, &stamp, 4);
2032 stamp = __cpu_to_be32(time(0) - DECADE);
2033 memcpy(guid+16, &stamp, 4);
2034 stamp = random32();
2035 memcpy(guid+20, &stamp, 4);
2036 }
2037
2038 static unsigned int find_unused_vde(const struct ddf_super *ddf)
2039 {
2040 unsigned int i;
2041 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2042 if (all_ff(ddf->virt->entries[i].guid))
2043 return i;
2044 }
2045 return DDF_NOTFOUND;
2046 }
2047
2048 static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2049 const char *name)
2050 {
2051 unsigned int i;
2052 if (name == NULL)
2053 return DDF_NOTFOUND;
2054 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2055 if (all_ff(ddf->virt->entries[i].guid))
2056 continue;
2057 if (!strncmp(name, ddf->virt->entries[i].name,
2058 sizeof(ddf->virt->entries[i].name)))
2059 return i;
2060 }
2061 return DDF_NOTFOUND;
2062 }
2063
2064 static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2065 const char *guid)
2066 {
2067 unsigned int i;
2068 if (guid == NULL || all_ff(guid))
2069 return DDF_NOTFOUND;
2070 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2071 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2072 return i;
2073 return DDF_NOTFOUND;
2074 }
2075
2076 static int init_super_ddf_bvd(struct supertype *st,
2077 mdu_array_info_t *info,
2078 unsigned long long size,
2079 char *name, char *homehost,
2080 int *uuid, unsigned long long data_offset);
2081
2082 static int init_super_ddf(struct supertype *st,
2083 mdu_array_info_t *info,
2084 unsigned long long size, char *name, char *homehost,
2085 int *uuid, unsigned long long data_offset)
2086 {
2087 /* This is primarily called by Create when creating a new array.
2088 * We will then get add_to_super called for each component, and then
2089 * write_init_super called to write it out to each device.
2090 * For DDF, Create can create on fresh devices or on a pre-existing
2091 * array.
2092 * To create on a pre-existing array a different method will be called.
2093 * This one is just for fresh drives.
2094 *
2095 * We need to create the entire 'ddf' structure which includes:
2096 * DDF headers - these are easy.
2097 * Controller data - a Sector describing this controller .. not that
2098 * this is a controller exactly.
2099 * Physical Disk Record - one entry per device, so
2100 * leave plenty of space.
2101 * Virtual Disk Records - again, just leave plenty of space.
2102 * This just lists VDs, doesn't give details
2103 * Config records - describes the VDs that use this disk
2104 * DiskData - describes 'this' device.
2105 * BadBlockManagement - empty
2106 * Diag Space - empty
2107 * Vendor Logs - Could we put bitmaps here?
2108 *
2109 */
2110 struct ddf_super *ddf;
2111 char hostname[17];
2112 int hostlen;
2113 int max_phys_disks, max_virt_disks;
2114 unsigned long long sector;
2115 int clen;
2116 int i;
2117 int pdsize, vdsize;
2118 struct phys_disk *pd;
2119 struct virtual_disk *vd;
2120
2121 if (data_offset != INVALID_SECTORS) {
2122 pr_err("data-offset not supported by DDF\n");
2123 return 0;
2124 }
2125
2126 if (st->sb)
2127 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2128 data_offset);
2129
2130 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
2131 pr_err("%s could not allocate superblock\n", __func__);
2132 return 0;
2133 }
2134 memset(ddf, 0, sizeof(*ddf));
2135 ddf->dlist = NULL; /* no physical disks yet */
2136 ddf->conflist = NULL; /* No virtual disks yet */
2137 st->sb = ddf;
2138
2139 if (info == NULL) {
2140 /* zeroing superblock */
2141 return 0;
2142 }
2143
2144 /* At least 32MB *must* be reserved for the ddf. So let's just
2145 * start 32MB from the end, and put the primary header there.
2146 * Don't do secondary for now.
2147 * We don't know exactly where that will be yet as it could be
2148 * different on each device. To just set up the lengths.
2149 *
2150 */
2151
2152 ddf->anchor.magic = DDF_HEADER_MAGIC;
2153 make_header_guid(ddf->anchor.guid);
2154
2155 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
2156 ddf->anchor.seq = __cpu_to_be32(1);
2157 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2158 ddf->anchor.openflag = 0xFF;
2159 ddf->anchor.foreignflag = 0;
2160 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2161 ddf->anchor.pad0 = 0xff;
2162 memset(ddf->anchor.pad1, 0xff, 12);
2163 memset(ddf->anchor.header_ext, 0xff, 32);
2164 ddf->anchor.primary_lba = ~(__u64)0;
2165 ddf->anchor.secondary_lba = ~(__u64)0;
2166 ddf->anchor.type = DDF_HEADER_ANCHOR;
2167 memset(ddf->anchor.pad2, 0xff, 3);
2168 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2169 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2170 of 32M reserved.. */
2171 max_phys_disks = 1023; /* Should be enough */
2172 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2173 max_virt_disks = 255;
2174 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2175 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2176 ddf->max_part = 64;
2177 ddf->mppe = 256;
2178 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2179 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2180 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
2181 memset(ddf->anchor.pad3, 0xff, 54);
2182 /* controller sections is one sector long immediately
2183 * after the ddf header */
2184 sector = 1;
2185 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2186 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2187 sector += 1;
2188
2189 /* phys is 8 sectors after that */
2190 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2191 sizeof(struct phys_disk_entry)*max_phys_disks,
2192 512);
2193 switch(pdsize/512) {
2194 case 2: case 8: case 32: case 128: case 512: break;
2195 default: abort();
2196 }
2197 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2198 ddf->anchor.phys_section_length =
2199 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2200 sector += pdsize/512;
2201
2202 /* virt is another 32 sectors */
2203 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2204 sizeof(struct virtual_entry) * max_virt_disks,
2205 512);
2206 switch(vdsize/512) {
2207 case 2: case 8: case 32: case 128: case 512: break;
2208 default: abort();
2209 }
2210 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2211 ddf->anchor.virt_section_length =
2212 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2213 sector += vdsize/512;
2214
2215 clen = ddf->conf_rec_len * (ddf->max_part+1);
2216 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2217 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2218 sector += clen;
2219
2220 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2221 ddf->anchor.data_section_length = __cpu_to_be32(1);
2222 sector += 1;
2223
2224 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2225 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2226 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2227 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2228 ddf->anchor.vendor_length = __cpu_to_be32(0);
2229 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2230
2231 memset(ddf->anchor.pad4, 0xff, 256);
2232
2233 memcpy(&ddf->primary, &ddf->anchor, 512);
2234 memcpy(&ddf->secondary, &ddf->anchor, 512);
2235
2236 ddf->primary.openflag = 1; /* I guess.. */
2237 ddf->primary.type = DDF_HEADER_PRIMARY;
2238
2239 ddf->secondary.openflag = 1; /* I guess.. */
2240 ddf->secondary.type = DDF_HEADER_SECONDARY;
2241
2242 ddf->active = &ddf->primary;
2243
2244 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2245
2246 /* 24 more bytes of fiction required.
2247 * first 8 are a 'vendor-id' - "Linux-MD"
2248 * Remaining 16 are serial number.... maybe a hostname would do?
2249 */
2250 memcpy(ddf->controller.guid, T10, sizeof(T10));
2251 gethostname(hostname, sizeof(hostname));
2252 hostname[sizeof(hostname) - 1] = 0;
2253 hostlen = strlen(hostname);
2254 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2255 for (i = strlen(T10) ; i+hostlen < 24; i++)
2256 ddf->controller.guid[i] = ' ';
2257
2258 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2259 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2260 ddf->controller.type.sub_vendor_id = 0;
2261 ddf->controller.type.sub_device_id = 0;
2262 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2263 memset(ddf->controller.pad, 0xff, 8);
2264 memset(ddf->controller.vendor_data, 0xff, 448);
2265 if (homehost && strlen(homehost) < 440)
2266 strcpy((char*)ddf->controller.vendor_data, homehost);
2267
2268 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
2269 pr_err("%s could not allocate pd\n", __func__);
2270 return 0;
2271 }
2272 ddf->phys = pd;
2273 ddf->pdsize = pdsize;
2274
2275 memset(pd, 0xff, pdsize);
2276 memset(pd, 0, sizeof(*pd));
2277 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2278 pd->used_pdes = __cpu_to_be16(0);
2279 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2280 memset(pd->pad, 0xff, 52);
2281 for (i = 0; i < max_phys_disks; i++)
2282 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
2283
2284 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
2285 pr_err("%s could not allocate vd\n", __func__);
2286 return 0;
2287 }
2288 ddf->virt = vd;
2289 ddf->vdsize = vdsize;
2290 memset(vd, 0, vdsize);
2291 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2292 vd->populated_vdes = __cpu_to_be16(0);
2293 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2294 memset(vd->pad, 0xff, 52);
2295
2296 for (i=0; i<max_virt_disks; i++)
2297 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2298
2299 st->sb = ddf;
2300 ddf_set_updates_pending(ddf);
2301 return 1;
2302 }
2303
2304 static int chunk_to_shift(int chunksize)
2305 {
2306 return ffs(chunksize/512)-1;
2307 }
2308
2309 #ifndef MDASSEMBLE
2310 struct extent {
2311 unsigned long long start, size;
2312 };
2313 static int cmp_extent(const void *av, const void *bv)
2314 {
2315 const struct extent *a = av;
2316 const struct extent *b = bv;
2317 if (a->start < b->start)
2318 return -1;
2319 if (a->start > b->start)
2320 return 1;
2321 return 0;
2322 }
2323
2324 static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
2325 {
2326 /* find a list of used extents on the give physical device
2327 * (dnum) of the given ddf.
2328 * Return a malloced array of 'struct extent'
2329
2330 * FIXME ignore DDF_Legacy devices?
2331
2332 */
2333 struct extent *rv;
2334 int n = 0;
2335 unsigned int i;
2336
2337 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
2338
2339 for (i = 0; i < ddf->max_part; i++) {
2340 const struct vd_config *bvd;
2341 unsigned int ibvd;
2342 struct vcl *v = dl->vlist[i];
2343 if (v == NULL ||
2344 get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2345 &bvd, &ibvd) == DDF_NOTFOUND)
2346 continue;
2347 rv[n].start = __be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2348 rv[n].size = __be64_to_cpu(bvd->blocks);
2349 n++;
2350 }
2351 qsort(rv, n, sizeof(*rv), cmp_extent);
2352
2353 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2354 rv[n].size = 0;
2355 return rv;
2356 }
2357 #endif
2358
2359 static int init_super_ddf_bvd(struct supertype *st,
2360 mdu_array_info_t *info,
2361 unsigned long long size,
2362 char *name, char *homehost,
2363 int *uuid, unsigned long long data_offset)
2364 {
2365 /* We are creating a BVD inside a pre-existing container.
2366 * so st->sb is already set.
2367 * We need to create a new vd_config and a new virtual_entry
2368 */
2369 struct ddf_super *ddf = st->sb;
2370 unsigned int venum, i;
2371 struct virtual_entry *ve;
2372 struct vcl *vcl;
2373 struct vd_config *vc;
2374
2375 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2376 pr_err("This ddf already has an array called %s\n", name);
2377 return 0;
2378 }
2379 venum = find_unused_vde(ddf);
2380 if (venum == DDF_NOTFOUND) {
2381 pr_err("Cannot find spare slot for virtual disk\n");
2382 return 0;
2383 }
2384 ve = &ddf->virt->entries[venum];
2385
2386 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2387 * timestamp, random number
2388 */
2389 make_header_guid(ve->guid);
2390 ve->unit = __cpu_to_be16(info->md_minor);
2391 ve->pad0 = 0xFFFF;
2392 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2393 ve->type = 0;
2394 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2395 if (info->state & 1) /* clean */
2396 ve->init_state = DDF_init_full;
2397 else
2398 ve->init_state = DDF_init_not;
2399
2400 memset(ve->pad1, 0xff, 14);
2401 memset(ve->name, ' ', 16);
2402 if (name)
2403 strncpy(ve->name, name, 16);
2404 ddf->virt->populated_vdes =
2405 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2406
2407 /* Now create a new vd_config */
2408 if (posix_memalign((void**)&vcl, 512,
2409 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
2410 pr_err("%s could not allocate vd_config\n", __func__);
2411 return 0;
2412 }
2413 vcl->vcnum = venum;
2414 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
2415 vc = &vcl->conf;
2416
2417 vc->magic = DDF_VD_CONF_MAGIC;
2418 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2419 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2420 vc->seqnum = __cpu_to_be32(1);
2421 memset(vc->pad0, 0xff, 24);
2422 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2423 if (layout_md2ddf(info, vc) == -1 ||
2424 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2425 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2426 __func__, info->level, info->layout, info->raid_disks);
2427 free(vcl);
2428 return 0;
2429 }
2430 vc->sec_elmnt_seq = 0;
2431 if (alloc_other_bvds(ddf, vcl) != 0) {
2432 pr_err("%s could not allocate other bvds\n",
2433 __func__);
2434 free(vcl);
2435 return 0;
2436 }
2437 vc->blocks = __cpu_to_be64(info->size * 2);
2438 vc->array_blocks = __cpu_to_be64(
2439 calc_array_size(info->level, info->raid_disks, info->layout,
2440 info->chunk_size, info->size*2));
2441 memset(vc->pad1, 0xff, 8);
2442 vc->spare_refs[0] = 0xffffffff;
2443 vc->spare_refs[1] = 0xffffffff;
2444 vc->spare_refs[2] = 0xffffffff;
2445 vc->spare_refs[3] = 0xffffffff;
2446 vc->spare_refs[4] = 0xffffffff;
2447 vc->spare_refs[5] = 0xffffffff;
2448 vc->spare_refs[6] = 0xffffffff;
2449 vc->spare_refs[7] = 0xffffffff;
2450 memset(vc->cache_pol, 0, 8);
2451 vc->bg_rate = 0x80;
2452 memset(vc->pad2, 0xff, 3);
2453 memset(vc->pad3, 0xff, 52);
2454 memset(vc->pad4, 0xff, 192);
2455 memset(vc->v0, 0xff, 32);
2456 memset(vc->v1, 0xff, 32);
2457 memset(vc->v2, 0xff, 16);
2458 memset(vc->v3, 0xff, 16);
2459 memset(vc->vendor, 0xff, 32);
2460
2461 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
2462 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
2463
2464 for (i = 1; i < vc->sec_elmnt_count; i++) {
2465 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2466 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2467 }
2468
2469 vcl->next = ddf->conflist;
2470 ddf->conflist = vcl;
2471 ddf->currentconf = vcl;
2472 ddf_set_updates_pending(ddf);
2473 return 1;
2474 }
2475
2476 static int get_svd_state(const struct ddf_super *, const struct vcl *);
2477
2478 #ifndef MDASSEMBLE
2479 static void add_to_super_ddf_bvd(struct supertype *st,
2480 mdu_disk_info_t *dk, int fd, char *devname)
2481 {
2482 /* fd and devname identify a device with-in the ddf container (st).
2483 * dk identifies a location in the new BVD.
2484 * We need to find suitable free space in that device and update
2485 * the phys_refnum and lba_offset for the newly created vd_config.
2486 * We might also want to update the type in the phys_disk
2487 * section.
2488 *
2489 * Alternately: fd == -1 and we have already chosen which device to
2490 * use and recorded in dlist->raid_disk;
2491 */
2492 struct dl *dl;
2493 struct ddf_super *ddf = st->sb;
2494 struct vd_config *vc;
2495 unsigned int i;
2496 unsigned long long blocks, pos, esize;
2497 struct extent *ex;
2498 unsigned int raid_disk = dk->raid_disk;
2499
2500 if (fd == -1) {
2501 for (dl = ddf->dlist; dl ; dl = dl->next)
2502 if (dl->raiddisk == dk->raid_disk)
2503 break;
2504 } else {
2505 for (dl = ddf->dlist; dl ; dl = dl->next)
2506 if (dl->major == dk->major &&
2507 dl->minor == dk->minor)
2508 break;
2509 }
2510 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2511 return;
2512
2513 vc = &ddf->currentconf->conf;
2514 if (vc->sec_elmnt_count > 1) {
2515 unsigned int n = __be16_to_cpu(vc->prim_elmnt_count);
2516 if (raid_disk >= n)
2517 vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2518 raid_disk %= n;
2519 }
2520
2521 ex = get_extents(ddf, dl);
2522 if (!ex)
2523 return;
2524
2525 i = 0; pos = 0;
2526 blocks = __be64_to_cpu(vc->blocks);
2527 if (ddf->currentconf->block_sizes)
2528 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
2529
2530 do {
2531 esize = ex[i].start - pos;
2532 if (esize >= blocks)
2533 break;
2534 pos = ex[i].start + ex[i].size;
2535 i++;
2536 } while (ex[i-1].size);
2537
2538 free(ex);
2539 if (esize < blocks)
2540 return;
2541
2542 ddf->currentdev = dk->raid_disk;
2543 vc->phys_refnum[raid_disk] = dl->disk.refnum;
2544 LBA_OFFSET(ddf, vc)[raid_disk] = __cpu_to_be64(pos);
2545
2546 for (i = 0; i < ddf->max_part ; i++)
2547 if (dl->vlist[i] == NULL)
2548 break;
2549 if (i == ddf->max_part)
2550 return;
2551 dl->vlist[i] = ddf->currentconf;
2552
2553 if (fd >= 0)
2554 dl->fd = fd;
2555 if (devname)
2556 dl->devname = devname;
2557
2558 /* Check if we can mark array as optimal yet */
2559 i = ddf->currentconf->vcnum;
2560 ddf->virt->entries[i].state =
2561 (ddf->virt->entries[i].state & ~DDF_state_mask)
2562 | get_svd_state(ddf, ddf->currentconf);
2563 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2564 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
2565 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
2566 __func__, dl->pdnum, __be32_to_cpu(dl->disk.refnum),
2567 ddf->currentconf->vcnum, guid_str(vc->guid),
2568 dk->raid_disk);
2569 ddf_set_updates_pending(ddf);
2570 }
2571
2572 static unsigned int find_unused_pde(const struct ddf_super *ddf)
2573 {
2574 unsigned int i;
2575 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++) {
2576 if (all_ff(ddf->phys->entries[i].guid))
2577 return i;
2578 }
2579 return DDF_NOTFOUND;
2580 }
2581
2582 /* add a device to a container, either while creating it or while
2583 * expanding a pre-existing container
2584 */
2585 static int add_to_super_ddf(struct supertype *st,
2586 mdu_disk_info_t *dk, int fd, char *devname,
2587 unsigned long long data_offset)
2588 {
2589 struct ddf_super *ddf = st->sb;
2590 struct dl *dd;
2591 time_t now;
2592 struct tm *tm;
2593 unsigned long long size;
2594 struct phys_disk_entry *pde;
2595 unsigned int n, i;
2596 struct stat stb;
2597 __u32 *tptr;
2598
2599 if (ddf->currentconf) {
2600 add_to_super_ddf_bvd(st, dk, fd, devname);
2601 return 0;
2602 }
2603
2604 /* This is device numbered dk->number. We need to create
2605 * a phys_disk entry and a more detailed disk_data entry.
2606 */
2607 fstat(fd, &stb);
2608 n = find_unused_pde(ddf);
2609 if (n == DDF_NOTFOUND) {
2610 pr_err("%s: No free slot in array, cannot add disk\n",
2611 __func__);
2612 return 1;
2613 }
2614 pde = &ddf->phys->entries[n];
2615 get_dev_size(fd, NULL, &size);
2616 if (size <= 32*1024*1024) {
2617 pr_err("%s: device size must be at least 32MB\n",
2618 __func__);
2619 return 1;
2620 }
2621 size >>= 9;
2622
2623 if (posix_memalign((void**)&dd, 512,
2624 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
2625 pr_err("%s could allocate buffer for new disk, aborting\n",
2626 __func__);
2627 return 1;
2628 }
2629 dd->major = major(stb.st_rdev);
2630 dd->minor = minor(stb.st_rdev);
2631 dd->devname = devname;
2632 dd->fd = fd;
2633 dd->spare = NULL;
2634
2635 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2636 now = time(0);
2637 tm = localtime(&now);
2638 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2639 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
2640 tptr = (__u32 *)(dd->disk.guid + 16);
2641 *tptr++ = random32();
2642 *tptr = random32();
2643
2644 do {
2645 /* Cannot be bothered finding a CRC of some irrelevant details*/
2646 dd->disk.refnum = random32();
2647 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2648 i > 0; i--)
2649 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
2650 break;
2651 } while (i > 0);
2652
2653 dd->disk.forced_ref = 1;
2654 dd->disk.forced_guid = 1;
2655 memset(dd->disk.vendor, ' ', 32);
2656 memcpy(dd->disk.vendor, "Linux", 5);
2657 memset(dd->disk.pad, 0xff, 442);
2658 for (i = 0; i < ddf->max_part ; i++)
2659 dd->vlist[i] = NULL;
2660
2661 dd->pdnum = n;
2662
2663 if (st->update_tail) {
2664 int len = (sizeof(struct phys_disk) +
2665 sizeof(struct phys_disk_entry));
2666 struct phys_disk *pd;
2667
2668 pd = xmalloc(len);
2669 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2670 pd->used_pdes = __cpu_to_be16(n);
2671 pde = &pd->entries[0];
2672 dd->mdupdate = pd;
2673 } else
2674 ddf->phys->used_pdes = __cpu_to_be16(
2675 1 + __be16_to_cpu(ddf->phys->used_pdes));
2676
2677 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2678 pde->refnum = dd->disk.refnum;
2679 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
2680 pde->state = __cpu_to_be16(DDF_Online);
2681 dd->size = size;
2682 /*
2683 * If there is already a device in dlist, try to reserve the same
2684 * amount of workspace. Otherwise, use 32MB.
2685 * We checked disk size above already.
2686 */
2687 #define __calc_lba(new, old, lba, mb) do { \
2688 unsigned long long dif; \
2689 if ((old) != NULL) \
2690 dif = (old)->size - __be64_to_cpu((old)->lba); \
2691 else \
2692 dif = (new)->size; \
2693 if ((new)->size > dif) \
2694 (new)->lba = __cpu_to_be64((new)->size - dif); \
2695 else \
2696 (new)->lba = __cpu_to_be64((new)->size - (mb*1024*2)); \
2697 } while (0)
2698 __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2699 __calc_lba(dd, ddf->dlist, primary_lba, 16);
2700 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
2701 pde->config_size = dd->workspace_lba;
2702
2703 sprintf(pde->path, "%17.17s","Information: nil") ;
2704 memset(pde->pad, 0xff, 6);
2705
2706 if (st->update_tail) {
2707 dd->next = ddf->add_list;
2708 ddf->add_list = dd;
2709 } else {
2710 dd->next = ddf->dlist;
2711 ddf->dlist = dd;
2712 ddf_set_updates_pending(ddf);
2713 }
2714
2715 return 0;
2716 }
2717
2718 static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2719 {
2720 struct ddf_super *ddf = st->sb;
2721 struct dl *dl;
2722
2723 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2724 * disappeared from the container.
2725 * We need to arrange that it disappears from the metadata and
2726 * internal data structures too.
2727 * Most of the work is done by ddf_process_update which edits
2728 * the metadata and closes the file handle and attaches the memory
2729 * where free_updates will free it.
2730 */
2731 for (dl = ddf->dlist; dl ; dl = dl->next)
2732 if (dl->major == dk->major &&
2733 dl->minor == dk->minor)
2734 break;
2735 if (!dl)
2736 return -1;
2737
2738 if (st->update_tail) {
2739 int len = (sizeof(struct phys_disk) +
2740 sizeof(struct phys_disk_entry));
2741 struct phys_disk *pd;
2742
2743 pd = xmalloc(len);
2744 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2745 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2746 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2747 append_metadata_update(st, pd, len);
2748 }
2749 return 0;
2750 }
2751
2752 /*
2753 * This is the write_init_super method for a ddf container. It is
2754 * called when creating a container or adding another device to a
2755 * container.
2756 */
2757 #define NULL_CONF_SZ 4096
2758
2759 static char *null_aligned;
2760 static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
2761 {
2762 unsigned long long sector;
2763 struct ddf_header *header;
2764 int fd, i, n_config, conf_size;
2765 int ret = 0;
2766
2767 if (null_aligned == NULL) {
2768 if (posix_memalign((void **)&null_aligned, 4096, NULL_CONF_SZ)
2769 != 0)
2770 return 0;
2771 memset(null_aligned, 0xff, NULL_CONF_SZ);
2772 }
2773
2774 fd = d->fd;
2775
2776 switch (type) {
2777 case DDF_HEADER_PRIMARY:
2778 header = &ddf->primary;
2779 sector = __be64_to_cpu(header->primary_lba);
2780 break;
2781 case DDF_HEADER_SECONDARY:
2782 header = &ddf->secondary;
2783 sector = __be64_to_cpu(header->secondary_lba);
2784 break;
2785 default:
2786 return 0;
2787 }
2788
2789 header->type = type;
2790 header->openflag = 1;
2791 header->crc = calc_crc(header, 512);
2792
2793 lseek64(fd, sector<<9, 0);
2794 if (write(fd, header, 512) < 0)
2795 goto out;
2796
2797 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2798 if (write(fd, &ddf->controller, 512) < 0)
2799 goto out;
2800
2801 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2802 if (write(fd, ddf->phys, ddf->pdsize) < 0)
2803 goto out;
2804 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2805 if (write(fd, ddf->virt, ddf->vdsize) < 0)
2806 goto out;
2807
2808 /* Now write lots of config records. */
2809 n_config = ddf->max_part;
2810 conf_size = ddf->conf_rec_len * 512;
2811 for (i = 0 ; i <= n_config ; i++) {
2812 struct vcl *c;
2813 struct vd_config *vdc = NULL;
2814 if (i == n_config) {
2815 c = (struct vcl *)d->spare;
2816 if (c)
2817 vdc = &c->conf;
2818 } else {
2819 unsigned int dummy;
2820 c = d->vlist[i];
2821 if (c)
2822 get_pd_index_from_refnum(
2823 c, d->disk.refnum,
2824 ddf->mppe,
2825 (const struct vd_config **)&vdc,
2826 &dummy);
2827 }
2828 if (c) {
2829 dprintf("writing conf record %i on disk %08x for %s/%u\n",
2830 i, __be32_to_cpu(d->disk.refnum),
2831 guid_str(vdc->guid),
2832 vdc->sec_elmnt_seq);
2833 vdc->seqnum = header->seq;
2834 vdc->crc = calc_crc(vdc, conf_size);
2835 if (write(fd, vdc, conf_size) < 0)
2836 break;
2837 } else {
2838 unsigned int togo = conf_size;
2839 while (togo > NULL_CONF_SZ) {
2840 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2841 break;
2842 togo -= NULL_CONF_SZ;
2843 }
2844 if (write(fd, null_aligned, togo) < 0)
2845 break;
2846 }
2847 }
2848 if (i <= n_config)
2849 goto out;
2850
2851 d->disk.crc = calc_crc(&d->disk, 512);
2852 if (write(fd, &d->disk, 512) < 0)
2853 goto out;
2854
2855 ret = 1;
2856 out:
2857 header->openflag = 0;
2858 header->crc = calc_crc(header, 512);
2859
2860 lseek64(fd, sector<<9, 0);
2861 if (write(fd, header, 512) < 0)
2862 ret = 0;
2863
2864 return ret;
2865 }
2866
2867 static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
2868 {
2869 unsigned long long size;
2870 int fd = d->fd;
2871 if (fd < 0)
2872 return 0;
2873
2874 /* We need to fill in the primary, (secondary) and workspace
2875 * lba's in the headers, set their checksums,
2876 * Also checksum phys, virt....
2877 *
2878 * Then write everything out, finally the anchor is written.
2879 */
2880 get_dev_size(fd, NULL, &size);
2881 size /= 512;
2882 if (d->workspace_lba != 0)
2883 ddf->anchor.workspace_lba = d->workspace_lba;
2884 else
2885 ddf->anchor.workspace_lba =
2886 __cpu_to_be64(size - 32*1024*2);
2887 if (d->primary_lba != 0)
2888 ddf->anchor.primary_lba = d->primary_lba;
2889 else
2890 ddf->anchor.primary_lba =
2891 __cpu_to_be64(size - 16*1024*2);
2892 if (d->secondary_lba != 0)
2893 ddf->anchor.secondary_lba = d->secondary_lba;
2894 else
2895 ddf->anchor.secondary_lba =
2896 __cpu_to_be64(size - 32*1024*2);
2897 ddf->anchor.seq = ddf->active->seq;
2898 memcpy(&ddf->primary, &ddf->anchor, 512);
2899 memcpy(&ddf->secondary, &ddf->anchor, 512);
2900
2901 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2902 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2903 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2904
2905 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
2906 return 0;
2907
2908 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
2909 return 0;
2910
2911 lseek64(fd, (size-1)*512, SEEK_SET);
2912 if (write(fd, &ddf->anchor, 512) < 0)
2913 return 0;
2914
2915 return 1;
2916 }
2917
2918 static int __write_init_super_ddf(struct supertype *st)
2919 {
2920 struct ddf_super *ddf = st->sb;
2921 struct dl *d;
2922 int attempts = 0;
2923 int successes = 0;
2924
2925 pr_state(ddf, __func__);
2926
2927 /* try to write updated metadata,
2928 * if we catch a failure move on to the next disk
2929 */
2930 for (d = ddf->dlist; d; d=d->next) {
2931 attempts++;
2932 successes += _write_super_to_disk(ddf, d);
2933 }
2934
2935 return attempts != successes;
2936 }
2937
2938 static int write_init_super_ddf(struct supertype *st)
2939 {
2940 struct ddf_super *ddf = st->sb;
2941 struct vcl *currentconf = ddf->currentconf;
2942
2943 /* we are done with currentconf reset it to point st at the container */
2944 ddf->currentconf = NULL;
2945
2946 if (st->update_tail) {
2947 /* queue the virtual_disk and vd_config as metadata updates */
2948 struct virtual_disk *vd;
2949 struct vd_config *vc;
2950 int len, tlen;
2951 unsigned int i;
2952
2953 if (!currentconf) {
2954 int len = (sizeof(struct phys_disk) +
2955 sizeof(struct phys_disk_entry));
2956
2957 /* adding a disk to the container. */
2958 if (!ddf->add_list)
2959 return 0;
2960
2961 append_metadata_update(st, ddf->add_list->mdupdate, len);
2962 ddf->add_list->mdupdate = NULL;
2963 return 0;
2964 }
2965
2966 /* Newly created VD */
2967
2968 /* First the virtual disk. We have a slightly fake header */
2969 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
2970 vd = xmalloc(len);
2971 *vd = *ddf->virt;
2972 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2973 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
2974 append_metadata_update(st, vd, len);
2975
2976 /* Then the vd_config */
2977 len = ddf->conf_rec_len * 512;
2978 tlen = len * currentconf->conf.sec_elmnt_count;
2979 vc = xmalloc(tlen);
2980 memcpy(vc, &currentconf->conf, len);
2981 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
2982 memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
2983 len);
2984 append_metadata_update(st, vc, tlen);
2985
2986 /* FIXME I need to close the fds! */
2987 return 0;
2988 } else {
2989 struct dl *d;
2990 if (!currentconf)
2991 for (d = ddf->dlist; d; d=d->next)
2992 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
2993 return __write_init_super_ddf(st);
2994 }
2995 }
2996
2997 #endif
2998
2999 static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
3000 unsigned long long data_offset)
3001 {
3002 /* We must reserve the last 32Meg */
3003 if (devsize <= 32*1024*2)
3004 return 0;
3005 return devsize - 32*1024*2;
3006 }
3007
3008 #ifndef MDASSEMBLE
3009
3010 static int reserve_space(struct supertype *st, int raiddisks,
3011 unsigned long long size, int chunk,
3012 unsigned long long *freesize)
3013 {
3014 /* Find 'raiddisks' spare extents at least 'size' big (but
3015 * only caring about multiples of 'chunk') and remember
3016 * them.
3017 * If the cannot be found, fail.
3018 */
3019 struct dl *dl;
3020 struct ddf_super *ddf = st->sb;
3021 int cnt = 0;
3022
3023 for (dl = ddf->dlist; dl ; dl=dl->next) {
3024 dl->raiddisk = -1;
3025 dl->esize = 0;
3026 }
3027 /* Now find largest extent on each device */
3028 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3029 struct extent *e = get_extents(ddf, dl);
3030 unsigned long long pos = 0;
3031 int i = 0;
3032 int found = 0;
3033 unsigned long long minsize = size;
3034
3035 if (size == 0)
3036 minsize = chunk;
3037
3038 if (!e)
3039 continue;
3040 do {
3041 unsigned long long esize;
3042 esize = e[i].start - pos;
3043 if (esize >= minsize) {
3044 found = 1;
3045 minsize = esize;
3046 }
3047 pos = e[i].start + e[i].size;
3048 i++;
3049 } while (e[i-1].size);
3050 if (found) {
3051 cnt++;
3052 dl->esize = minsize;
3053 }
3054 free(e);
3055 }
3056 if (cnt < raiddisks) {
3057 pr_err("not enough devices with space to create array.\n");
3058 return 0; /* No enough free spaces large enough */
3059 }
3060 if (size == 0) {
3061 /* choose the largest size of which there are at least 'raiddisk' */
3062 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3063 struct dl *dl2;
3064 if (dl->esize <= size)
3065 continue;
3066 /* This is bigger than 'size', see if there are enough */
3067 cnt = 0;
3068 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
3069 if (dl2->esize >= dl->esize)
3070 cnt++;
3071 if (cnt >= raiddisks)
3072 size = dl->esize;
3073 }
3074 if (chunk) {
3075 size = size / chunk;
3076 size *= chunk;
3077 }
3078 *freesize = size;
3079 if (size < 32) {
3080 pr_err("not enough spare devices to create array.\n");
3081 return 0;
3082 }
3083 }
3084 /* We have a 'size' of which there are enough spaces.
3085 * We simply do a first-fit */
3086 cnt = 0;
3087 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3088 if (dl->esize < size)
3089 continue;
3090
3091 dl->raiddisk = cnt;
3092 cnt++;
3093 }
3094 return 1;
3095 }
3096
3097 static int
3098 validate_geometry_ddf_container(struct supertype *st,
3099 int level, int layout, int raiddisks,
3100 int chunk, unsigned long long size,
3101 unsigned long long data_offset,
3102 char *dev, unsigned long long *freesize,
3103 int verbose);
3104
3105 static int validate_geometry_ddf_bvd(struct supertype *st,
3106 int level, int layout, int raiddisks,
3107 int *chunk, unsigned long long size,
3108 unsigned long long data_offset,
3109 char *dev, unsigned long long *freesize,
3110 int verbose);
3111
3112 static int validate_geometry_ddf(struct supertype *st,
3113 int level, int layout, int raiddisks,
3114 int *chunk, unsigned long long size,
3115 unsigned long long data_offset,
3116 char *dev, unsigned long long *freesize,
3117 int verbose)
3118 {
3119 int fd;
3120 struct mdinfo *sra;
3121 int cfd;
3122
3123 /* ddf potentially supports lots of things, but it depends on
3124 * what devices are offered (and maybe kernel version?)
3125 * If given unused devices, we will make a container.
3126 * If given devices in a container, we will make a BVD.
3127 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3128 */
3129
3130 if (chunk && *chunk == UnSet)
3131 *chunk = DEFAULT_CHUNK;
3132
3133 if (level == -1000000) level = LEVEL_CONTAINER;
3134 if (level == LEVEL_CONTAINER) {
3135 /* Must be a fresh device to add to a container */
3136 return validate_geometry_ddf_container(st, level, layout,
3137 raiddisks, chunk?*chunk:0,
3138 size, data_offset, dev,
3139 freesize,
3140 verbose);
3141 }
3142
3143 if (!dev) {
3144 mdu_array_info_t array = {
3145 .level = level, .layout = layout,
3146 .raid_disks = raiddisks
3147 };
3148 struct vd_config conf;
3149 if (layout_md2ddf(&array, &conf) == -1) {
3150 if (verbose)
3151 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3152 level, layout, raiddisks);
3153 return 0;
3154 }
3155 /* Should check layout? etc */
3156
3157 if (st->sb && freesize) {
3158 /* --create was given a container to create in.
3159 * So we need to check that there are enough
3160 * free spaces and return the amount of space.
3161 * We may as well remember which drives were
3162 * chosen so that add_to_super/getinfo_super
3163 * can return them.
3164 */
3165 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
3166 }
3167 return 1;
3168 }
3169
3170 if (st->sb) {
3171 /* A container has already been opened, so we are
3172 * creating in there. Maybe a BVD, maybe an SVD.
3173 * Should make a distinction one day.
3174 */
3175 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
3176 chunk, size, data_offset, dev,
3177 freesize,
3178 verbose);
3179 }
3180 /* This is the first device for the array.
3181 * If it is a container, we read it in and do automagic allocations,
3182 * no other devices should be given.
3183 * Otherwise it must be a member device of a container, and we
3184 * do manual allocation.
3185 * Later we should check for a BVD and make an SVD.
3186 */
3187 fd = open(dev, O_RDONLY|O_EXCL, 0);
3188 if (fd >= 0) {
3189 sra = sysfs_read(fd, NULL, GET_VERSION);
3190 close(fd);
3191 if (sra && sra->array.major_version == -1 &&
3192 strcmp(sra->text_version, "ddf") == 0) {
3193
3194 /* load super */
3195 /* find space for 'n' devices. */
3196 /* remember the devices */
3197 /* Somehow return the fact that we have enough */
3198 }
3199
3200 if (verbose)
3201 pr_err("ddf: Cannot create this array "
3202 "on device %s - a container is required.\n",
3203 dev);
3204 return 0;
3205 }
3206 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
3207 if (verbose)
3208 pr_err("ddf: Cannot open %s: %s\n",
3209 dev, strerror(errno));
3210 return 0;
3211 }
3212 /* Well, it is in use by someone, maybe a 'ddf' container. */
3213 cfd = open_container(fd);
3214 if (cfd < 0) {
3215 close(fd);
3216 if (verbose)
3217 pr_err("ddf: Cannot use %s: %s\n",
3218 dev, strerror(EBUSY));
3219 return 0;
3220 }
3221 sra = sysfs_read(cfd, NULL, GET_VERSION);
3222 close(fd);
3223 if (sra && sra->array.major_version == -1 &&
3224 strcmp(sra->text_version, "ddf") == 0) {
3225 /* This is a member of a ddf container. Load the container
3226 * and try to create a bvd
3227 */
3228 struct ddf_super *ddf;
3229 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
3230 st->sb = ddf;
3231 strcpy(st->container_devnm, fd2devnm(cfd));
3232 close(cfd);
3233 return validate_geometry_ddf_bvd(st, level, layout,
3234 raiddisks, chunk, size,
3235 data_offset,
3236 dev, freesize,
3237 verbose);
3238 }
3239 close(cfd);
3240 } else /* device may belong to a different container */
3241 return 0;
3242
3243 return 1;
3244 }
3245
3246 static int
3247 validate_geometry_ddf_container(struct supertype *st,
3248 int level, int layout, int raiddisks,
3249 int chunk, unsigned long long size,
3250 unsigned long long data_offset,
3251 char *dev, unsigned long long *freesize,
3252 int verbose)
3253 {
3254 int fd;
3255 unsigned long long ldsize;
3256
3257 if (level != LEVEL_CONTAINER)
3258 return 0;
3259 if (!dev)
3260 return 1;
3261
3262 fd = open(dev, O_RDONLY|O_EXCL, 0);
3263 if (fd < 0) {
3264 if (verbose)
3265 pr_err("ddf: Cannot open %s: %s\n",
3266 dev, strerror(errno));
3267 return 0;
3268 }
3269 if (!get_dev_size(fd, dev, &ldsize)) {
3270 close(fd);
3271 return 0;
3272 }
3273 close(fd);
3274
3275 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
3276 if (*freesize == 0)
3277 return 0;
3278
3279 return 1;
3280 }
3281
3282 static int validate_geometry_ddf_bvd(struct supertype *st,
3283 int level, int layout, int raiddisks,
3284 int *chunk, unsigned long long size,
3285 unsigned long long data_offset,
3286 char *dev, unsigned long long *freesize,
3287 int verbose)
3288 {
3289 struct stat stb;
3290 struct ddf_super *ddf = st->sb;
3291 struct dl *dl;
3292 unsigned long long pos = 0;
3293 unsigned long long maxsize;
3294 struct extent *e;
3295 int i;
3296 /* ddf/bvd supports lots of things, but not containers */
3297 if (level == LEVEL_CONTAINER) {
3298 if (verbose)
3299 pr_err("DDF cannot create a container within an container\n");
3300 return 0;
3301 }
3302 /* We must have the container info already read in. */
3303 if (!ddf)
3304 return 0;
3305
3306 if (!dev) {
3307 /* General test: make sure there is space for
3308 * 'raiddisks' device extents of size 'size'.
3309 */
3310 unsigned long long minsize = size;
3311 int dcnt = 0;
3312 if (minsize == 0)
3313 minsize = 8;
3314 for (dl = ddf->dlist; dl ; dl = dl->next)
3315 {
3316 int found = 0;
3317 pos = 0;
3318
3319 i = 0;
3320 e = get_extents(ddf, dl);
3321 if (!e) continue;
3322 do {
3323 unsigned long long esize;
3324 esize = e[i].start - pos;
3325 if (esize >= minsize)
3326 found = 1;
3327 pos = e[i].start + e[i].size;
3328 i++;
3329 } while (e[i-1].size);
3330 if (found)
3331 dcnt++;
3332 free(e);
3333 }
3334 if (dcnt < raiddisks) {
3335 if (verbose)
3336 pr_err("ddf: Not enough devices with "
3337 "space for this array (%d < %d)\n",
3338 dcnt, raiddisks);
3339 return 0;
3340 }
3341 return 1;
3342 }
3343 /* This device must be a member of the set */
3344 if (stat(dev, &stb) < 0)
3345 return 0;
3346 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3347 return 0;
3348 for (dl = ddf->dlist ; dl ; dl = dl->next) {
3349 if (dl->major == (int)major(stb.st_rdev) &&
3350 dl->minor == (int)minor(stb.st_rdev))
3351 break;
3352 }
3353 if (!dl) {
3354 if (verbose)
3355 pr_err("ddf: %s is not in the "
3356 "same DDF set\n",
3357 dev);
3358 return 0;
3359 }
3360 e = get_extents(ddf, dl);
3361 maxsize = 0;
3362 i = 0;
3363 if (e) do {
3364 unsigned long long esize;
3365 esize = e[i].start - pos;
3366 if (esize >= maxsize)
3367 maxsize = esize;
3368 pos = e[i].start + e[i].size;
3369 i++;
3370 } while (e[i-1].size);
3371 *freesize = maxsize;
3372 // FIXME here I am
3373
3374 return 1;
3375 }
3376
3377 static int load_super_ddf_all(struct supertype *st, int fd,
3378 void **sbp, char *devname)
3379 {
3380 struct mdinfo *sra;
3381 struct ddf_super *super;
3382 struct mdinfo *sd, *best = NULL;
3383 int bestseq = 0;
3384 int seq;
3385 char nm[20];
3386 int dfd;
3387
3388 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
3389 if (!sra)
3390 return 1;
3391 if (sra->array.major_version != -1 ||
3392 sra->array.minor_version != -2 ||
3393 strcmp(sra->text_version, "ddf") != 0)
3394 return 1;
3395
3396 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
3397 return 1;
3398 memset(super, 0, sizeof(*super));
3399
3400 /* first, try each device, and choose the best ddf */
3401 for (sd = sra->devs ; sd ; sd = sd->next) {
3402 int rv;
3403 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3404 dfd = dev_open(nm, O_RDONLY);
3405 if (dfd < 0)
3406 return 2;
3407 rv = load_ddf_headers(dfd, super, NULL);
3408 close(dfd);
3409 if (rv == 0) {
3410 seq = __be32_to_cpu(super->active->seq);
3411 if (super->active->openflag)
3412 seq--;
3413 if (!best || seq > bestseq) {
3414 bestseq = seq;
3415 best = sd;
3416 }
3417 }
3418 }
3419 if (!best)
3420 return 1;
3421 /* OK, load this ddf */
3422 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3423 dfd = dev_open(nm, O_RDONLY);
3424 if (dfd < 0)
3425 return 1;
3426 load_ddf_headers(dfd, super, NULL);
3427 load_ddf_global(dfd, super, NULL);
3428 close(dfd);
3429 /* Now we need the device-local bits */
3430 for (sd = sra->devs ; sd ; sd = sd->next) {
3431 int rv;
3432
3433 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3434 dfd = dev_open(nm, O_RDWR);
3435 if (dfd < 0)
3436 return 2;
3437 rv = load_ddf_headers(dfd, super, NULL);
3438 if (rv == 0)
3439 rv = load_ddf_local(dfd, super, NULL, 1);
3440 if (rv)
3441 return 1;
3442 }
3443
3444 *sbp = super;
3445 if (st->ss == NULL) {
3446 st->ss = &super_ddf;
3447 st->minor_version = 0;
3448 st->max_devs = 512;
3449 }
3450 strcpy(st->container_devnm, fd2devnm(fd));
3451 return 0;
3452 }
3453
3454 static int load_container_ddf(struct supertype *st, int fd,
3455 char *devname)
3456 {
3457 return load_super_ddf_all(st, fd, &st->sb, devname);
3458 }
3459
3460 #endif /* MDASSEMBLE */
3461
3462 static int check_secondary(const struct vcl *vc)
3463 {
3464 const struct vd_config *conf = &vc->conf;
3465 int i;
3466
3467 /* The only DDF secondary RAID level md can support is
3468 * RAID 10, if the stripe sizes and Basic volume sizes
3469 * are all equal.
3470 * Other configurations could in theory be supported by exposing
3471 * the BVDs to user space and using device mapper for the secondary
3472 * mapping. So far we don't support that.
3473 */
3474
3475 __u64 sec_elements[4] = {0, 0, 0, 0};
3476 #define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3477 #define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3478
3479 if (vc->other_bvds == NULL) {
3480 pr_err("No BVDs for secondary RAID found\n");
3481 return -1;
3482 }
3483 if (conf->prl != DDF_RAID1) {
3484 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3485 return -1;
3486 }
3487 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3488 pr_err("Secondary RAID level %d is unsupported\n",
3489 conf->srl);
3490 return -1;
3491 }
3492 __set_sec_seen(conf->sec_elmnt_seq);
3493 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3494 const struct vd_config *bvd = vc->other_bvds[i];
3495 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
3496 continue;
3497 if (bvd->srl != conf->srl) {
3498 pr_err("Inconsistent secondary RAID level across BVDs\n");
3499 return -1;
3500 }
3501 if (bvd->prl != conf->prl) {
3502 pr_err("Different RAID levels for BVDs are unsupported\n");
3503 return -1;
3504 }
3505 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3506 pr_err("All BVDs must have the same number of primary elements\n");
3507 return -1;
3508 }
3509 if (bvd->chunk_shift != conf->chunk_shift) {
3510 pr_err("Different strip sizes for BVDs are unsupported\n");
3511 return -1;
3512 }
3513 if (bvd->array_blocks != conf->array_blocks) {
3514 pr_err("Different BVD sizes are unsupported\n");
3515 return -1;
3516 }
3517 __set_sec_seen(bvd->sec_elmnt_seq);
3518 }
3519 for (i = 0; i < conf->sec_elmnt_count; i++) {
3520 if (!__was_sec_seen(i)) {
3521 pr_err("BVD %d is missing\n", i);
3522 return -1;
3523 }
3524 }
3525 return 0;
3526 }
3527
3528 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
3529 __u32 refnum, unsigned int nmax,
3530 const struct vd_config **bvd,
3531 unsigned int *idx)
3532 {
3533 unsigned int i, j, n, sec, cnt;
3534
3535 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3536 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3537
3538 for (i = 0, j = 0 ; i < nmax ; i++) {
3539 /* j counts valid entries for this BVD */
3540 if (vc->conf.phys_refnum[i] != 0xffffffff)
3541 j++;
3542 if (vc->conf.phys_refnum[i] == refnum) {
3543 *bvd = &vc->conf;
3544 *idx = i;
3545 return sec * cnt + j - 1;
3546 }
3547 }
3548 if (vc->other_bvds == NULL)
3549 goto bad;
3550
3551 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3552 struct vd_config *vd = vc->other_bvds[n-1];
3553 sec = vd->sec_elmnt_seq;
3554 if (sec == DDF_UNUSED_BVD)
3555 continue;
3556 for (i = 0, j = 0 ; i < nmax ; i++) {
3557 if (vd->phys_refnum[i] != 0xffffffff)
3558 j++;
3559 if (vd->phys_refnum[i] == refnum) {
3560 *bvd = vd;
3561 *idx = i;
3562 return sec * cnt + j - 1;
3563 }
3564 }
3565 }
3566 bad:
3567 *bvd = NULL;
3568 return DDF_NOTFOUND;
3569 }
3570
3571 static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
3572 {
3573 /* Given a container loaded by load_super_ddf_all,
3574 * extract information about all the arrays into
3575 * an mdinfo tree.
3576 *
3577 * For each vcl in conflist: create an mdinfo, fill it in,
3578 * then look for matching devices (phys_refnum) in dlist
3579 * and create appropriate device mdinfo.
3580 */
3581 struct ddf_super *ddf = st->sb;
3582 struct mdinfo *rest = NULL;
3583 struct vcl *vc;
3584
3585 for (vc = ddf->conflist ; vc ; vc=vc->next)
3586 {
3587 unsigned int i;
3588 unsigned int j;
3589 struct mdinfo *this;
3590 char *ep;
3591 __u32 *cptr;
3592 unsigned int pd;
3593
3594 if (subarray &&
3595 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3596 *ep != '\0'))
3597 continue;
3598
3599 if (vc->conf.sec_elmnt_count > 1) {
3600 if (check_secondary(vc) != 0)
3601 continue;
3602 }
3603
3604 this = xcalloc(1, sizeof(*this));
3605 this->next = rest;
3606 rest = this;
3607
3608 if (layout_ddf2md(&vc->conf, &this->array))
3609 continue;
3610 this->array.md_minor = -1;
3611 this->array.major_version = -1;
3612 this->array.minor_version = -2;
3613 cptr = (__u32 *)(vc->conf.guid + 16);
3614 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
3615 this->array.utime = DECADE +
3616 __be32_to_cpu(vc->conf.timestamp);
3617 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3618
3619 i = vc->vcnum;
3620 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3621 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
3622 DDF_init_full) {
3623 this->array.state = 0;
3624 this->resync_start = 0;
3625 } else {
3626 this->array.state = 1;
3627 this->resync_start = MaxSector;
3628 }
3629 memcpy(this->name, ddf->virt->entries[i].name, 16);
3630 this->name[16]=0;
3631 for(j=0; j<16; j++)
3632 if (this->name[j] == ' ')
3633 this->name[j] = 0;
3634
3635 memset(this->uuid, 0, sizeof(this->uuid));
3636 this->component_size = __be64_to_cpu(vc->conf.blocks);
3637 this->array.size = this->component_size / 2;
3638 this->container_member = i;
3639
3640 ddf->currentconf = vc;
3641 uuid_from_super_ddf(st, this->uuid);
3642 if (!subarray)
3643 ddf->currentconf = NULL;
3644
3645 sprintf(this->text_version, "/%s/%d",
3646 st->container_devnm, this->container_member);
3647
3648 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
3649 struct mdinfo *dev;
3650 struct dl *d;
3651 const struct vd_config *bvd;
3652 unsigned int iphys;
3653 int stt;
3654
3655 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
3656 continue;
3657
3658 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
3659 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3660 != DDF_Online)
3661 continue;
3662
3663 i = get_pd_index_from_refnum(
3664 vc, ddf->phys->entries[pd].refnum,
3665 ddf->mppe, &bvd, &iphys);
3666 if (i == DDF_NOTFOUND)
3667 continue;
3668
3669 this->array.working_disks++;
3670
3671 for (d = ddf->dlist; d ; d=d->next)
3672 if (d->disk.refnum ==
3673 ddf->phys->entries[pd].refnum)
3674 break;
3675 if (d == NULL)
3676 /* Haven't found that one yet, maybe there are others */
3677 continue;
3678
3679 dev = xcalloc(1, sizeof(*dev));
3680 dev->next = this->devs;
3681 this->devs = dev;
3682
3683 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3684 dev->disk.major = d->major;
3685 dev->disk.minor = d->minor;
3686 dev->disk.raid_disk = i;
3687 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
3688 dev->recovery_start = MaxSector;
3689
3690 dev->events = __be32_to_cpu(ddf->primary.seq);
3691 dev->data_offset =
3692 __be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
3693 dev->component_size = __be64_to_cpu(bvd->blocks);
3694 if (d->devname)
3695 strcpy(dev->name, d->devname);
3696 }
3697 }
3698 return rest;
3699 }
3700
3701 static int store_super_ddf(struct supertype *st, int fd)
3702 {
3703 struct ddf_super *ddf = st->sb;
3704 unsigned long long dsize;
3705 void *buf;
3706 int rc;
3707
3708 if (!ddf)
3709 return 1;
3710
3711 if (!get_dev_size(fd, NULL, &dsize))
3712 return 1;
3713
3714 if (ddf->dlist || ddf->conflist) {
3715 struct stat sta;
3716 struct dl *dl;
3717 int ofd, ret;
3718
3719 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3720 pr_err("%s: file descriptor for invalid device\n",
3721 __func__);
3722 return 1;
3723 }
3724 for (dl = ddf->dlist; dl; dl = dl->next)
3725 if (dl->major == (int)major(sta.st_rdev) &&
3726 dl->minor == (int)minor(sta.st_rdev))
3727 break;
3728 if (!dl) {
3729 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3730 (int)major(sta.st_rdev),
3731 (int)minor(sta.st_rdev));
3732 return 1;
3733 }
3734 ofd = dl->fd;
3735 dl->fd = fd;
3736 ret = (_write_super_to_disk(ddf, dl) != 1);
3737 dl->fd = ofd;
3738 return ret;
3739 }
3740
3741 if (posix_memalign(&buf, 512, 512) != 0)
3742 return 1;
3743 memset(buf, 0, 512);
3744
3745 lseek64(fd, dsize-512, 0);
3746 rc = write(fd, buf, 512);
3747 free(buf);
3748 if (rc < 0)
3749 return 1;
3750 return 0;
3751 }
3752
3753 static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3754 {
3755 /*
3756 * return:
3757 * 0 same, or first was empty, and second was copied
3758 * 1 second had wrong number
3759 * 2 wrong uuid
3760 * 3 wrong other info
3761 */
3762 struct ddf_super *first = st->sb;
3763 struct ddf_super *second = tst->sb;
3764 struct dl *dl1, *dl2;
3765 struct vcl *vl1, *vl2;
3766 unsigned int max_vds, max_pds, pd, vd;
3767
3768 if (!first) {
3769 st->sb = tst->sb;
3770 tst->sb = NULL;
3771 return 0;
3772 }
3773
3774 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3775 return 2;
3776
3777 if (first->anchor.seq != second->anchor.seq) {
3778 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3779 __be32_to_cpu(first->anchor.seq),
3780 __be32_to_cpu(second->anchor.seq));
3781 return 3;
3782 }
3783 if (first->max_part != second->max_part ||
3784 first->phys->used_pdes != second->phys->used_pdes ||
3785 first->virt->populated_vdes != second->virt->populated_vdes) {
3786 dprintf("%s: PD/VD number mismatch\n", __func__);
3787 return 3;
3788 }
3789
3790 max_pds = __be16_to_cpu(first->phys->used_pdes);
3791 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3792 for (pd = 0; pd < max_pds; pd++)
3793 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3794 break;
3795 if (pd == max_pds) {
3796 dprintf("%s: no match for disk %08x\n", __func__,
3797 __be32_to_cpu(dl2->disk.refnum));
3798 return 3;
3799 }
3800 }
3801
3802 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3803 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3804 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3805 continue;
3806 for (vd = 0; vd < max_vds; vd++)
3807 if (!memcmp(first->virt->entries[vd].guid,
3808 vl2->conf.guid, DDF_GUID_LEN))
3809 break;
3810 if (vd == max_vds) {
3811 dprintf("%s: no match for VD config\n", __func__);
3812 return 3;
3813 }
3814 }
3815 /* FIXME should I look at anything else? */
3816
3817 /*
3818 At this point we are fairly sure that the meta data matches.
3819 But the new disk may contain additional local data.
3820 Add it to the super block.
3821 */
3822 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3823 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3824 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3825 DDF_GUID_LEN))
3826 break;
3827 if (vl1) {
3828 if (vl1->other_bvds != NULL &&
3829 vl1->conf.sec_elmnt_seq !=
3830 vl2->conf.sec_elmnt_seq) {
3831 dprintf("%s: adding BVD %u\n", __func__,
3832 vl2->conf.sec_elmnt_seq);
3833 add_other_bvd(vl1, &vl2->conf,
3834 first->conf_rec_len*512);
3835 }
3836 continue;
3837 }
3838
3839 if (posix_memalign((void **)&vl1, 512,
3840 (first->conf_rec_len*512 +
3841 offsetof(struct vcl, conf))) != 0) {
3842 pr_err("%s could not allocate vcl buf\n",
3843 __func__);
3844 return 3;
3845 }
3846
3847 vl1->next = first->conflist;
3848 vl1->block_sizes = NULL;
3849 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3850 if (alloc_other_bvds(first, vl1) != 0) {
3851 pr_err("%s could not allocate other bvds\n",
3852 __func__);
3853 free(vl1);
3854 return 3;
3855 }
3856 for (vd = 0; vd < max_vds; vd++)
3857 if (!memcmp(first->virt->entries[vd].guid,
3858 vl1->conf.guid, DDF_GUID_LEN))
3859 break;
3860 vl1->vcnum = vd;
3861 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3862 first->conflist = vl1;
3863 }
3864
3865 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3866 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3867 if (dl1->disk.refnum == dl2->disk.refnum)
3868 break;
3869 if (dl1)
3870 continue;
3871
3872 if (posix_memalign((void **)&dl1, 512,
3873 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3874 != 0) {
3875 pr_err("%s could not allocate disk info buffer\n",
3876 __func__);
3877 return 3;
3878 }
3879 memcpy(dl1, dl2, sizeof(*dl1));
3880 dl1->mdupdate = NULL;
3881 dl1->next = first->dlist;
3882 dl1->fd = -1;
3883 for (pd = 0; pd < max_pds; pd++)
3884 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3885 break;
3886 dl1->pdnum = pd;
3887 if (dl2->spare) {
3888 if (posix_memalign((void **)&dl1->spare, 512,
3889 first->conf_rec_len*512) != 0) {
3890 pr_err("%s could not allocate spare info buf\n",
3891 __func__);
3892 return 3;
3893 }
3894 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3895 }
3896 for (vd = 0 ; vd < first->max_part ; vd++) {
3897 if (!dl2->vlist[vd]) {
3898 dl1->vlist[vd] = NULL;
3899 continue;
3900 }
3901 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3902 if (!memcmp(vl1->conf.guid,
3903 dl2->vlist[vd]->conf.guid,
3904 DDF_GUID_LEN))
3905 break;
3906 dl1->vlist[vd] = vl1;
3907 }
3908 }
3909 first->dlist = dl1;
3910 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3911 __be32_to_cpu(dl1->disk.refnum));
3912 }
3913
3914 return 0;
3915 }
3916
3917 #ifndef MDASSEMBLE
3918 /*
3919 * A new array 'a' has been started which claims to be instance 'inst'
3920 * within container 'c'.
3921 * We need to confirm that the array matches the metadata in 'c' so
3922 * that we don't corrupt any metadata.
3923 */
3924 static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
3925 {
3926 struct ddf_super *ddf = c->sb;
3927 int n = atoi(inst);
3928 if (all_ff(ddf->virt->entries[n].guid)) {
3929 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
3930 return -ENODEV;
3931 }
3932 dprintf("ddf: open_new %d\n", n);
3933 a->info.container_member = n;
3934 return 0;
3935 }
3936
3937 /*
3938 * The array 'a' is to be marked clean in the metadata.
3939 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
3940 * clean up to the point (in sectors). If that cannot be recorded in the
3941 * metadata, then leave it as dirty.
3942 *
3943 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3944 * !global! virtual_disk.virtual_entry structure.
3945 */
3946 static int ddf_set_array_state(struct active_array *a, int consistent)
3947 {
3948 struct ddf_super *ddf = a->container->sb;
3949 int inst = a->info.container_member;
3950 int old = ddf->virt->entries[inst].state;
3951 if (consistent == 2) {
3952 /* Should check if a recovery should be started FIXME */
3953 consistent = 1;
3954 if (!is_resync_complete(&a->info))
3955 consistent = 0;
3956 }
3957 if (consistent)
3958 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3959 else
3960 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
3961 if (old != ddf->virt->entries[inst].state)
3962 ddf_set_updates_pending(ddf);
3963
3964 old = ddf->virt->entries[inst].init_state;
3965 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
3966 if (is_resync_complete(&a->info))
3967 ddf->virt->entries[inst].init_state |= DDF_init_full;
3968 else if (a->info.resync_start == 0)
3969 ddf->virt->entries[inst].init_state |= DDF_init_not;
3970 else
3971 ddf->virt->entries[inst].init_state |= DDF_init_quick;
3972 if (old != ddf->virt->entries[inst].init_state)
3973 ddf_set_updates_pending(ddf);
3974
3975 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
3976 guid_str(ddf->virt->entries[inst].guid), a->curr_state,
3977 consistent?"clean":"dirty",
3978 a->info.resync_start);
3979 return consistent;
3980 }
3981
3982 static int get_bvd_state(const struct ddf_super *ddf,
3983 const struct vd_config *vc)
3984 {
3985 unsigned int i, n_bvd, working = 0;
3986 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
3987 int pd, st, state;
3988 for (i = 0; i < n_prim; i++) {
3989 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
3990 continue;
3991 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
3992 if (pd < 0)
3993 continue;
3994 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3995 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3996 == DDF_Online)
3997 working++;
3998 }
3999
4000 state = DDF_state_degraded;
4001 if (working == n_prim)
4002 state = DDF_state_optimal;
4003 else
4004 switch (vc->prl) {
4005 case DDF_RAID0:
4006 case DDF_CONCAT:
4007 case DDF_JBOD:
4008 state = DDF_state_failed;
4009 break;
4010 case DDF_RAID1:
4011 if (working == 0)
4012 state = DDF_state_failed;
4013 else if (working >= 2)
4014 state = DDF_state_part_optimal;
4015 break;
4016 case DDF_RAID4:
4017 case DDF_RAID5:
4018 if (working < n_prim - 1)
4019 state = DDF_state_failed;
4020 break;
4021 case DDF_RAID6:
4022 if (working < n_prim - 2)
4023 state = DDF_state_failed;
4024 else if (working == n_prim - 1)
4025 state = DDF_state_part_optimal;
4026 break;
4027 }
4028 return state;
4029 }
4030
4031 static int secondary_state(int state, int other, int seclevel)
4032 {
4033 if (state == DDF_state_optimal && other == DDF_state_optimal)
4034 return DDF_state_optimal;
4035 if (seclevel == DDF_2MIRRORED) {
4036 if (state == DDF_state_optimal || other == DDF_state_optimal)
4037 return DDF_state_part_optimal;
4038 if (state == DDF_state_failed && other == DDF_state_failed)
4039 return DDF_state_failed;
4040 return DDF_state_degraded;
4041 } else {
4042 if (state == DDF_state_failed || other == DDF_state_failed)
4043 return DDF_state_failed;
4044 if (state == DDF_state_degraded || other == DDF_state_degraded)
4045 return DDF_state_degraded;
4046 return DDF_state_part_optimal;
4047 }
4048 }
4049
4050 static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
4051 {
4052 int state = get_bvd_state(ddf, &vcl->conf);
4053 unsigned int i;
4054 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
4055 state = secondary_state(
4056 state,
4057 get_bvd_state(ddf, vcl->other_bvds[i-1]),
4058 vcl->conf.srl);
4059 }
4060 return state;
4061 }
4062
4063 /*
4064 * The state of each disk is stored in the global phys_disk structure
4065 * in phys_disk.entries[n].state.
4066 * This makes various combinations awkward.
4067 * - When a device fails in any array, it must be failed in all arrays
4068 * that include a part of this device.
4069 * - When a component is rebuilding, we cannot include it officially in the
4070 * array unless this is the only array that uses the device.
4071 *
4072 * So: when transitioning:
4073 * Online -> failed, just set failed flag. monitor will propagate
4074 * spare -> online, the device might need to be added to the array.
4075 * spare -> failed, just set failed. Don't worry if in array or not.
4076 */
4077 static void ddf_set_disk(struct active_array *a, int n, int state)
4078 {
4079 struct ddf_super *ddf = a->container->sb;
4080 unsigned int inst = a->info.container_member, n_bvd;
4081 struct vcl *vcl;
4082 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4083 &n_bvd, &vcl);
4084 int pd;
4085 struct mdinfo *mdi;
4086 struct dl *dl;
4087
4088 if (vc == NULL) {
4089 dprintf("ddf: cannot find instance %d!!\n", inst);
4090 return;
4091 }
4092 /* Find the matching slot in 'info'. */
4093 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4094 if (mdi->disk.raid_disk == n)
4095 break;
4096 if (!mdi)
4097 return;
4098
4099 /* and find the 'dl' entry corresponding to that. */
4100 for (dl = ddf->dlist; dl; dl = dl->next)
4101 if (mdi->state_fd >= 0 &&
4102 mdi->disk.major == dl->major &&
4103 mdi->disk.minor == dl->minor)
4104 break;
4105 if (!dl)
4106 return;
4107
4108 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4109 if (pd < 0 || pd != dl->pdnum) {
4110 /* disk doesn't currently exist or has changed.
4111 * If it is now in_sync, insert it. */
4112 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4113 __func__, dl->pdnum, dl->major, dl->minor,
4114 __be32_to_cpu(dl->disk.refnum));
4115 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4116 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
4117 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
4118 pd = dl->pdnum; /* FIXME: is this really correct ? */
4119 vc->phys_refnum[n_bvd] = dl->disk.refnum;
4120 LBA_OFFSET(ddf, vc)[n_bvd] =
4121 __cpu_to_be64(mdi->data_offset);
4122 ddf->phys->entries[pd].type &=
4123 ~__cpu_to_be16(DDF_Global_Spare);
4124 ddf->phys->entries[pd].type |=
4125 __cpu_to_be16(DDF_Active_in_VD);
4126 ddf_set_updates_pending(ddf);
4127 }
4128 } else {
4129 int old = ddf->phys->entries[pd].state;
4130 if (state & DS_FAULTY)
4131 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4132 if (state & DS_INSYNC) {
4133 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4134 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4135 }
4136 if (old != ddf->phys->entries[pd].state)
4137 ddf_set_updates_pending(ddf);
4138 }
4139
4140 dprintf("ddf: set_disk %d to %x\n", n, state);
4141
4142 /* Now we need to check the state of the array and update
4143 * virtual_disk.entries[n].state.
4144 * It needs to be one of "optimal", "degraded", "failed".
4145 * I don't understand 'deleted' or 'missing'.
4146 */
4147 state = get_svd_state(ddf, vcl);
4148
4149 if (ddf->virt->entries[inst].state !=
4150 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4151 | state)) {
4152
4153 ddf->virt->entries[inst].state =
4154 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4155 | state;
4156 ddf_set_updates_pending(ddf);
4157 }
4158
4159 }
4160
4161 static void ddf_sync_metadata(struct supertype *st)
4162 {
4163
4164 /*
4165 * Write all data to all devices.
4166 * Later, we might be able to track whether only local changes
4167 * have been made, or whether any global data has been changed,
4168 * but ddf is sufficiently weird that it probably always
4169 * changes global data ....
4170 */
4171 struct ddf_super *ddf = st->sb;
4172 if (!ddf->updates_pending)
4173 return;
4174 ddf->updates_pending = 0;
4175 __write_init_super_ddf(st);
4176 dprintf("ddf: sync_metadata\n");
4177 }
4178
4179 static int del_from_conflist(struct vcl **list, const char *guid)
4180 {
4181 struct vcl **p;
4182 int found = 0;
4183 for (p = list; p && *p; p = &((*p)->next))
4184 if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
4185 found = 1;
4186 *p = (*p)->next;
4187 }
4188 return found;
4189 }
4190
4191 static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
4192 {
4193 struct dl *dl;
4194 unsigned int vdnum, i;
4195 vdnum = find_vde_by_guid(ddf, guid);
4196 if (vdnum == DDF_NOTFOUND) {
4197 pr_err("%s: could not find VD %s\n", __func__,
4198 guid_str(guid));
4199 return -1;
4200 }
4201 if (del_from_conflist(&ddf->conflist, guid) == 0) {
4202 pr_err("%s: could not find conf %s\n", __func__,
4203 guid_str(guid));
4204 return -1;
4205 }
4206 for (dl = ddf->dlist; dl; dl = dl->next)
4207 for (i = 0; i < ddf->max_part; i++)
4208 if (dl->vlist[i] != NULL &&
4209 !memcmp(dl->vlist[i]->conf.guid, guid,
4210 DDF_GUID_LEN))
4211 dl->vlist[i] = NULL;
4212 memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
4213 dprintf("%s: deleted %s\n", __func__, guid_str(guid));
4214 return 0;
4215 }
4216
4217 static int kill_subarray_ddf(struct supertype *st)
4218 {
4219 struct ddf_super *ddf = st->sb;
4220 /*
4221 * currentconf is set in container_content_ddf,
4222 * called with subarray arg
4223 */
4224 struct vcl *victim = ddf->currentconf;
4225 struct vd_config *conf;
4226 ddf->currentconf = NULL;
4227 unsigned int vdnum;
4228 if (!victim) {
4229 pr_err("%s: nothing to kill\n", __func__);
4230 return -1;
4231 }
4232 conf = &victim->conf;
4233 vdnum = find_vde_by_guid(ddf, conf->guid);
4234 if (vdnum == DDF_NOTFOUND) {
4235 pr_err("%s: could not find VD %s\n", __func__,
4236 guid_str(conf->guid));
4237 return -1;
4238 }
4239 if (st->update_tail) {
4240 struct virtual_disk *vd;
4241 int len = sizeof(struct virtual_disk)
4242 + sizeof(struct virtual_entry);
4243 vd = xmalloc(len);
4244 if (vd == NULL) {
4245 pr_err("%s: failed to allocate %d bytes\n", __func__,
4246 len);
4247 return -1;
4248 }
4249 memset(vd, 0 , len);
4250 vd->magic = DDF_VIRT_RECORDS_MAGIC;
4251 vd->populated_vdes = 0;
4252 memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
4253 /* we use DDF_state_deleted as marker */
4254 vd->entries[0].state = DDF_state_deleted;
4255 append_metadata_update(st, vd, len);
4256 } else {
4257 _kill_subarray_ddf(ddf, conf->guid);
4258 ddf_set_updates_pending(ddf);
4259 ddf_sync_metadata(st);
4260 }
4261 return 0;
4262 }
4263
4264 static void copy_matching_bvd(struct ddf_super *ddf,
4265 struct vd_config *conf,
4266 const struct metadata_update *update)
4267 {
4268 unsigned int mppe =
4269 __be16_to_cpu(ddf->anchor.max_primary_element_entries);
4270 unsigned int len = ddf->conf_rec_len * 512;
4271 char *p;
4272 struct vd_config *vc;
4273 for (p = update->buf; p < update->buf + update->len; p += len) {
4274 vc = (struct vd_config *) p;
4275 if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
4276 memcpy(conf->phys_refnum, vc->phys_refnum,
4277 mppe * (sizeof(__u32) + sizeof(__u64)));
4278 return;
4279 }
4280 }
4281 pr_err("%s: no match for BVD %d of %s in update\n", __func__,
4282 conf->sec_elmnt_seq, guid_str(conf->guid));
4283 }
4284
4285 static void ddf_process_update(struct supertype *st,
4286 struct metadata_update *update)
4287 {
4288 /* Apply this update to the metadata.
4289 * The first 4 bytes are a DDF_*_MAGIC which guides
4290 * our actions.
4291 * Possible update are:
4292 * DDF_PHYS_RECORDS_MAGIC
4293 * Add a new physical device or remove an old one.
4294 * Changes to this record only happen implicitly.
4295 * used_pdes is the device number.
4296 * DDF_VIRT_RECORDS_MAGIC
4297 * Add a new VD. Possibly also change the 'access' bits.
4298 * populated_vdes is the entry number.
4299 * DDF_VD_CONF_MAGIC
4300 * New or updated VD. the VIRT_RECORD must already
4301 * exist. For an update, phys_refnum and lba_offset
4302 * (at least) are updated, and the VD_CONF must
4303 * be written to precisely those devices listed with
4304 * a phys_refnum.
4305 * DDF_SPARE_ASSIGN_MAGIC
4306 * replacement Spare Assignment Record... but for which device?
4307 *
4308 * So, e.g.:
4309 * - to create a new array, we send a VIRT_RECORD and
4310 * a VD_CONF. Then assemble and start the array.
4311 * - to activate a spare we send a VD_CONF to add the phys_refnum
4312 * and offset. This will also mark the spare as active with
4313 * a spare-assignment record.
4314 */
4315 struct ddf_super *ddf = st->sb;
4316 __u32 *magic = (__u32*)update->buf;
4317 struct phys_disk *pd;
4318 struct virtual_disk *vd;
4319 struct vd_config *vc;
4320 struct vcl *vcl;
4321 struct dl *dl;
4322 unsigned int ent;
4323 unsigned int pdnum, pd2, len;
4324
4325 dprintf("Process update %x\n", *magic);
4326
4327 switch (*magic) {
4328 case DDF_PHYS_RECORDS_MAGIC:
4329
4330 if (update->len != (sizeof(struct phys_disk) +
4331 sizeof(struct phys_disk_entry)))
4332 return;
4333 pd = (struct phys_disk*)update->buf;
4334
4335 ent = __be16_to_cpu(pd->used_pdes);
4336 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4337 return;
4338 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4339 struct dl **dlp;
4340 /* removing this disk. */
4341 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4342 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4343 struct dl *dl = *dlp;
4344 if (dl->pdnum == (signed)ent) {
4345 close(dl->fd);
4346 dl->fd = -1;
4347 /* FIXME this doesn't free
4348 * dl->devname */
4349 update->space = dl;
4350 *dlp = dl->next;
4351 break;
4352 }
4353 }
4354 ddf_set_updates_pending(ddf);
4355 return;
4356 }
4357 if (!all_ff(ddf->phys->entries[ent].guid))
4358 return;
4359 ddf->phys->entries[ent] = pd->entries[0];
4360 ddf->phys->used_pdes = __cpu_to_be16(1 +
4361 __be16_to_cpu(ddf->phys->used_pdes));
4362 ddf_set_updates_pending(ddf);
4363 if (ddf->add_list) {
4364 struct active_array *a;
4365 struct dl *al = ddf->add_list;
4366 ddf->add_list = al->next;
4367
4368 al->next = ddf->dlist;
4369 ddf->dlist = al;
4370
4371 /* As a device has been added, we should check
4372 * for any degraded devices that might make
4373 * use of this spare */
4374 for (a = st->arrays ; a; a=a->next)
4375 a->check_degraded = 1;
4376 }
4377 break;
4378
4379 case DDF_VIRT_RECORDS_MAGIC:
4380
4381 if (update->len != (sizeof(struct virtual_disk) +
4382 sizeof(struct virtual_entry)))
4383 return;
4384 vd = (struct virtual_disk*)update->buf;
4385
4386 if (vd->entries[0].state == DDF_state_deleted) {
4387 if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
4388 return;
4389 } else {
4390
4391 ent = find_vde_by_guid(ddf, vd->entries[0].guid);
4392 if (ent != DDF_NOTFOUND) {
4393 dprintf("%s: VD %s exists already in slot %d\n",
4394 __func__, guid_str(vd->entries[0].guid),
4395 ent);
4396 return;
4397 }
4398 ent = find_unused_vde(ddf);
4399 if (ent == DDF_NOTFOUND)
4400 return;
4401 ddf->virt->entries[ent] = vd->entries[0];
4402 ddf->virt->populated_vdes =
4403 __cpu_to_be16(
4404 1 + __be16_to_cpu(
4405 ddf->virt->populated_vdes));
4406 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4407 __func__, guid_str(vd->entries[0].guid), ent,
4408 ddf->virt->entries[ent].state,
4409 ddf->virt->entries[ent].init_state);
4410 }
4411 ddf_set_updates_pending(ddf);
4412 break;
4413
4414 case DDF_VD_CONF_MAGIC:
4415 vc = (struct vd_config*)update->buf;
4416 len = ddf->conf_rec_len * 512;
4417 if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
4418 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4419 __func__, guid_str(vc->guid), update->len,
4420 vc->sec_elmnt_count);
4421 return;
4422 }
4423 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4424 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4425 break;
4426 dprintf("%s: conf update for %s (%s)\n", __func__,
4427 guid_str(vc->guid), (vcl ? "old" : "new"));
4428 if (vcl) {
4429 /* An update, just copy the phys_refnum and lba_offset
4430 * fields
4431 */
4432 unsigned int i;
4433 copy_matching_bvd(ddf, &vcl->conf, update);
4434 for (i = 1; i < vc->sec_elmnt_count; i++)
4435 copy_matching_bvd(ddf, vcl->other_bvds[i-1],
4436 update);
4437 } else {
4438 /* A new VD_CONF */
4439 unsigned int i;
4440 if (!update->space)
4441 return;
4442 vcl = update->space;
4443 update->space = NULL;
4444 vcl->next = ddf->conflist;
4445 memcpy(&vcl->conf, vc, len);
4446 ent = find_vde_by_guid(ddf, vc->guid);
4447 if (ent == DDF_NOTFOUND)
4448 return;
4449 vcl->vcnum = ent;
4450 ddf->conflist = vcl;
4451 for (i = 1; i < vc->sec_elmnt_count; i++)
4452 memcpy(vcl->other_bvds[i-1],
4453 update->buf + len * i, len);
4454 }
4455 /* Set DDF_Transition on all Failed devices - to help
4456 * us detect those that are no longer in use
4457 */
4458 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4459 if (ddf->phys->entries[pdnum].state
4460 & __be16_to_cpu(DDF_Failed))
4461 ddf->phys->entries[pdnum].state
4462 |= __be16_to_cpu(DDF_Transition);
4463 /* Now make sure vlist is correct for each dl. */
4464 for (dl = ddf->dlist; dl; dl = dl->next) {
4465 unsigned int vn = 0;
4466 int in_degraded = 0;
4467 for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
4468 unsigned int dn, ibvd;
4469 const struct vd_config *conf;
4470 int vstate;
4471 dn = get_pd_index_from_refnum(vcl,
4472 dl->disk.refnum,
4473 ddf->mppe,
4474 &conf, &ibvd);
4475 if (dn == DDF_NOTFOUND)
4476 continue;
4477 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
4478 dl->pdnum,
4479 __be32_to_cpu(dl->disk.refnum),
4480 guid_str(conf->guid),
4481 conf->sec_elmnt_seq, vn);
4482 /* Clear the Transition flag */
4483 if (ddf->phys->entries[dl->pdnum].state
4484 & __be16_to_cpu(DDF_Failed))
4485 ddf->phys->entries[dl->pdnum].state &=
4486 ~__be16_to_cpu(DDF_Transition);
4487 dl->vlist[vn++] = vcl;
4488 vstate = ddf->virt->entries[vcl->vcnum].state
4489 & DDF_state_mask;
4490 if (vstate == DDF_state_degraded ||
4491 vstate == DDF_state_part_optimal)
4492 in_degraded = 1;
4493 }
4494 while (vn < ddf->max_part)
4495 dl->vlist[vn++] = NULL;
4496 if (dl->vlist[0]) {
4497 ddf->phys->entries[dl->pdnum].type &=
4498 ~__cpu_to_be16(DDF_Global_Spare);
4499 if (!(ddf->phys->entries[dl->pdnum].type &
4500 __cpu_to_be16(DDF_Active_in_VD))) {
4501 ddf->phys->entries[dl->pdnum].type |=
4502 __cpu_to_be16(DDF_Active_in_VD);
4503 if (in_degraded)
4504 ddf->phys->entries[dl->pdnum].state |=
4505 __cpu_to_be16(DDF_Rebuilding);
4506 }
4507 }
4508 if (dl->spare) {
4509 ddf->phys->entries[dl->pdnum].type &=
4510 ~__cpu_to_be16(DDF_Global_Spare);
4511 ddf->phys->entries[dl->pdnum].type |=
4512 __cpu_to_be16(DDF_Spare);
4513 }
4514 if (!dl->vlist[0] && !dl->spare) {
4515 ddf->phys->entries[dl->pdnum].type |=
4516 __cpu_to_be16(DDF_Global_Spare);
4517 ddf->phys->entries[dl->pdnum].type &=
4518 ~__cpu_to_be16(DDF_Spare |
4519 DDF_Active_in_VD);
4520 }
4521 }
4522
4523 /* Now remove any 'Failed' devices that are not part
4524 * of any VD. They will have the Transition flag set.
4525 * Once done, we need to update all dl->pdnum numbers.
4526 */
4527 pd2 = 0;
4528 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4529 if ((ddf->phys->entries[pdnum].state
4530 & __be16_to_cpu(DDF_Failed))
4531 && (ddf->phys->entries[pdnum].state
4532 & __be16_to_cpu(DDF_Transition)))
4533 /* skip this one */;
4534 else if (pdnum == pd2)
4535 pd2++;
4536 else {
4537 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4538 for (dl = ddf->dlist; dl; dl = dl->next)
4539 if (dl->pdnum == (int)pdnum)
4540 dl->pdnum = pd2;
4541 pd2++;
4542 }
4543 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4544 while (pd2 < pdnum) {
4545 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4546 pd2++;
4547 }
4548
4549 ddf_set_updates_pending(ddf);
4550 break;
4551 case DDF_SPARE_ASSIGN_MAGIC:
4552 default: break;
4553 }
4554 }
4555
4556 static void ddf_prepare_update(struct supertype *st,
4557 struct metadata_update *update)
4558 {
4559 /* This update arrived at managemon.
4560 * We are about to pass it to monitor.
4561 * If a malloc is needed, do it here.
4562 */
4563 struct ddf_super *ddf = st->sb;
4564 __u32 *magic = (__u32*)update->buf;
4565 if (*magic == DDF_VD_CONF_MAGIC) {
4566 struct vcl *vcl;
4567 struct vd_config *conf = (struct vd_config *) update->buf;
4568 if (posix_memalign(&update->space, 512,
4569 offsetof(struct vcl, conf)
4570 + ddf->conf_rec_len * 512) != 0) {
4571 update->space = NULL;
4572 return;
4573 }
4574 vcl = update->space;
4575 vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
4576 if (alloc_other_bvds(ddf, vcl) != 0) {
4577 free(update->space);
4578 update->space = NULL;
4579 }
4580 }
4581 }
4582
4583 /*
4584 * Check if the array 'a' is degraded but not failed.
4585 * If it is, find as many spares as are available and needed and
4586 * arrange for their inclusion.
4587 * We only choose devices which are not already in the array,
4588 * and prefer those with a spare-assignment to this array.
4589 * otherwise we choose global spares - assuming always that
4590 * there is enough room.
4591 * For each spare that we assign, we return an 'mdinfo' which
4592 * describes the position for the device in the array.
4593 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4594 * the new phys_refnum and lba_offset values.
4595 *
4596 * Only worry about BVDs at the moment.
4597 */
4598 static struct mdinfo *ddf_activate_spare(struct active_array *a,
4599 struct metadata_update **updates)
4600 {
4601 int working = 0;
4602 struct mdinfo *d;
4603 struct ddf_super *ddf = a->container->sb;
4604 int global_ok = 0;
4605 struct mdinfo *rv = NULL;
4606 struct mdinfo *di;
4607 struct metadata_update *mu;
4608 struct dl *dl;
4609 int i;
4610 struct vcl *vcl;
4611 struct vd_config *vc;
4612 unsigned int n_bvd;
4613
4614 for (d = a->info.devs ; d ; d = d->next) {
4615 if ((d->curr_state & DS_FAULTY) &&
4616 d->state_fd >= 0)
4617 /* wait for Removal to happen */
4618 return NULL;
4619 if (d->state_fd >= 0)
4620 working ++;
4621 }
4622
4623 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4624 a->info.array.level);
4625 if (working == a->info.array.raid_disks)
4626 return NULL; /* array not degraded */
4627 switch (a->info.array.level) {
4628 case 1:
4629 if (working == 0)
4630 return NULL; /* failed */
4631 break;
4632 case 4:
4633 case 5:
4634 if (working < a->info.array.raid_disks - 1)
4635 return NULL; /* failed */
4636 break;
4637 case 6:
4638 if (working < a->info.array.raid_disks - 2)
4639 return NULL; /* failed */
4640 break;
4641 default: /* concat or stripe */
4642 return NULL; /* failed */
4643 }
4644
4645 /* For each slot, if it is not working, find a spare */
4646 dl = ddf->dlist;
4647 for (i = 0; i < a->info.array.raid_disks; i++) {
4648 for (d = a->info.devs ; d ; d = d->next)
4649 if (d->disk.raid_disk == i)
4650 break;
4651 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
4652 if (d && (d->state_fd >= 0))
4653 continue;
4654
4655 /* OK, this device needs recovery. Find a spare */
4656 again:
4657 for ( ; dl ; dl = dl->next) {
4658 unsigned long long esize;
4659 unsigned long long pos;
4660 struct mdinfo *d2;
4661 int is_global = 0;
4662 int is_dedicated = 0;
4663 struct extent *ex;
4664 unsigned int j;
4665 /* If in this array, skip */
4666 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
4667 if (d2->state_fd >= 0 &&
4668 d2->disk.major == dl->major &&
4669 d2->disk.minor == dl->minor) {
4670 dprintf("%x:%x already in array\n", dl->major, dl->minor);
4671 break;
4672 }
4673 if (d2)
4674 continue;
4675 if (ddf->phys->entries[dl->pdnum].type &
4676 __cpu_to_be16(DDF_Spare)) {
4677 /* Check spare assign record */
4678 if (dl->spare) {
4679 if (dl->spare->type & DDF_spare_dedicated) {
4680 /* check spare_ents for guid */
4681 for (j = 0 ;
4682 j < __be16_to_cpu(dl->spare->populated);
4683 j++) {
4684 if (memcmp(dl->spare->spare_ents[j].guid,
4685 ddf->virt->entries[a->info.container_member].guid,
4686 DDF_GUID_LEN) == 0)
4687 is_dedicated = 1;
4688 }
4689 } else
4690 is_global = 1;
4691 }
4692 } else if (ddf->phys->entries[dl->pdnum].type &
4693 __cpu_to_be16(DDF_Global_Spare)) {
4694 is_global = 1;
4695 } else if (!(ddf->phys->entries[dl->pdnum].state &
4696 __cpu_to_be16(DDF_Failed))) {
4697 /* we can possibly use some of this */
4698 is_global = 1;
4699 }
4700 if ( ! (is_dedicated ||
4701 (is_global && global_ok))) {
4702 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
4703 is_dedicated, is_global);
4704 continue;
4705 }
4706
4707 /* We are allowed to use this device - is there space?
4708 * We need a->info.component_size sectors */
4709 ex = get_extents(ddf, dl);
4710 if (!ex) {
4711 dprintf("cannot get extents\n");
4712 continue;
4713 }
4714 j = 0; pos = 0;
4715 esize = 0;
4716
4717 do {
4718 esize = ex[j].start - pos;
4719 if (esize >= a->info.component_size)
4720 break;
4721 pos = ex[j].start + ex[j].size;
4722 j++;
4723 } while (ex[j-1].size);
4724
4725 free(ex);
4726 if (esize < a->info.component_size) {
4727 dprintf("%x:%x has no room: %llu %llu\n",
4728 dl->major, dl->minor,
4729 esize, a->info.component_size);
4730 /* No room */
4731 continue;
4732 }
4733
4734 /* Cool, we have a device with some space at pos */
4735 di = xcalloc(1, sizeof(*di));
4736 di->disk.number = i;
4737 di->disk.raid_disk = i;
4738 di->disk.major = dl->major;
4739 di->disk.minor = dl->minor;
4740 di->disk.state = 0;
4741 di->recovery_start = 0;
4742 di->data_offset = pos;
4743 di->component_size = a->info.component_size;
4744 di->container_member = dl->pdnum;
4745 di->next = rv;
4746 rv = di;
4747 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4748 i, pos);
4749
4750 break;
4751 }
4752 if (!dl && ! global_ok) {
4753 /* not enough dedicated spares, try global */
4754 global_ok = 1;
4755 dl = ddf->dlist;
4756 goto again;
4757 }
4758 }
4759
4760 if (!rv)
4761 /* No spares found */
4762 return rv;
4763 /* Now 'rv' has a list of devices to return.
4764 * Create a metadata_update record to update the
4765 * phys_refnum and lba_offset values
4766 */
4767 mu = xmalloc(sizeof(*mu));
4768 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
4769 free(mu);
4770 mu = NULL;
4771 }
4772 mu->buf = xmalloc(ddf->conf_rec_len * 512);
4773 mu->len = ddf->conf_rec_len * 512;
4774 mu->space = NULL;
4775 mu->space_list = NULL;
4776 mu->next = *updates;
4777 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4778 &n_bvd, &vcl);
4779 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4780
4781 vc = (struct vd_config*)mu->buf;
4782 for (di = rv ; di ; di = di->next) {
4783 vc->phys_refnum[di->disk.raid_disk] =
4784 ddf->phys->entries[dl->pdnum].refnum;
4785 LBA_OFFSET(ddf, vc)[di->disk.raid_disk]
4786 = __cpu_to_be64(di->data_offset);
4787 }
4788 *updates = mu;
4789 return rv;
4790 }
4791 #endif /* MDASSEMBLE */
4792
4793 static int ddf_level_to_layout(int level)
4794 {
4795 switch(level) {
4796 case 0:
4797 case 1:
4798 return 0;
4799 case 5:
4800 return ALGORITHM_LEFT_SYMMETRIC;
4801 case 6:
4802 return ALGORITHM_ROTATING_N_CONTINUE;
4803 case 10:
4804 return 0x102;
4805 default:
4806 return UnSet;
4807 }
4808 }
4809
4810 static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4811 {
4812 if (level && *level == UnSet)
4813 *level = LEVEL_CONTAINER;
4814
4815 if (level && layout && *layout == UnSet)
4816 *layout = ddf_level_to_layout(*level);
4817 }
4818
4819 struct superswitch super_ddf = {
4820 #ifndef MDASSEMBLE
4821 .examine_super = examine_super_ddf,
4822 .brief_examine_super = brief_examine_super_ddf,
4823 .brief_examine_subarrays = brief_examine_subarrays_ddf,
4824 .export_examine_super = export_examine_super_ddf,
4825 .detail_super = detail_super_ddf,
4826 .brief_detail_super = brief_detail_super_ddf,
4827 .validate_geometry = validate_geometry_ddf,
4828 .write_init_super = write_init_super_ddf,
4829 .add_to_super = add_to_super_ddf,
4830 .remove_from_super = remove_from_super_ddf,
4831 .load_container = load_container_ddf,
4832 .copy_metadata = copy_metadata_ddf,
4833 #endif
4834 .match_home = match_home_ddf,
4835 .uuid_from_super= uuid_from_super_ddf,
4836 .getinfo_super = getinfo_super_ddf,
4837 .update_super = update_super_ddf,
4838
4839 .avail_size = avail_size_ddf,
4840
4841 .compare_super = compare_super_ddf,
4842
4843 .load_super = load_super_ddf,
4844 .init_super = init_super_ddf,
4845 .store_super = store_super_ddf,
4846 .free_super = free_super_ddf,
4847 .match_metadata_desc = match_metadata_desc_ddf,
4848 .container_content = container_content_ddf,
4849 .default_geometry = default_geometry_ddf,
4850 .kill_subarray = kill_subarray_ddf,
4851
4852 .external = 1,
4853
4854 #ifndef MDASSEMBLE
4855 /* for mdmon */
4856 .open_new = ddf_open_new,
4857 .set_array_state= ddf_set_array_state,
4858 .set_disk = ddf_set_disk,
4859 .sync_metadata = ddf_sync_metadata,
4860 .process_update = ddf_process_update,
4861 .prepare_update = ddf_prepare_update,
4862 .activate_spare = ddf_activate_spare,
4863 #endif
4864 .name = "ddf",
4865 };