]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-ddf.c
DDF: add endian-safe typedefs
[thirdparty/mdadm.git] / super-ddf.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28 #define HAVE_STDINT_H 1
29 #include "mdadm.h"
30 #include "mdmon.h"
31 #include "sha1.h"
32 #include <values.h>
33
34 /* a non-official T10 name for creation GUIDs */
35 static char T10[] = "Linux-MD";
36
37 /* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41 #define DECADE (3600*24*(365*10+2))
42 unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
47 #define DDF_NOTFOUND (~0U)
48 #define DDF_CONTAINER (DDF_NOTFOUND-1)
49
50 /* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61 typedef struct __be16 {
62 __u16 _v16;
63 } be16;
64 #define be16_eq(x, y) ((x)._v16 == (y)._v16)
65
66 typedef struct __be32 {
67 __u32 _v32;
68 } be32;
69 #define be32_eq(x, y) ((x)._v32 == (y)._v32)
70
71 typedef struct __be64 {
72 __u64 _v64;
73 } be64;
74 #define be64_eq(x, y) ((x)._v64 == (y)._v64)
75
76 #define be16_to_cpu(be) __be16_to_cpu((be)._v16)
77 static inline be16 cpu_to_be16(__u16 x)
78 {
79 be16 be = { ._v16 = __cpu_to_be16(x) };
80 return be;
81 }
82
83 #define be32_to_cpu(be) __be32_to_cpu((be)._v32)
84 static inline be32 cpu_to_be32(__u32 x)
85 {
86 be32 be = { ._v32 = __cpu_to_be32(x) };
87 return be;
88 }
89
90 #define be64_to_cpu(be) __be64_to_cpu((be)._v64)
91 static inline be64 cpu_to_be64(__u64 x)
92 {
93 be64 be = { ._v64 = __cpu_to_be64(x) };
94 return be;
95 }
96
97 /* Primary Raid Level (PRL) */
98 #define DDF_RAID0 0x00
99 #define DDF_RAID1 0x01
100 #define DDF_RAID3 0x03
101 #define DDF_RAID4 0x04
102 #define DDF_RAID5 0x05
103 #define DDF_RAID1E 0x11
104 #define DDF_JBOD 0x0f
105 #define DDF_CONCAT 0x1f
106 #define DDF_RAID5E 0x15
107 #define DDF_RAID5EE 0x25
108 #define DDF_RAID6 0x06
109
110 /* Raid Level Qualifier (RLQ) */
111 #define DDF_RAID0_SIMPLE 0x00
112 #define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
113 #define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
114 #define DDF_RAID3_0 0x00 /* parity in first extent */
115 #define DDF_RAID3_N 0x01 /* parity in last extent */
116 #define DDF_RAID4_0 0x00 /* parity in first extent */
117 #define DDF_RAID4_N 0x01 /* parity in last extent */
118 /* these apply to raid5e and raid5ee as well */
119 #define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
120 #define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
121 #define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
122 #define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
123
124 #define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
125 #define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
126
127 /* Secondary RAID Level (SRL) */
128 #define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
129 #define DDF_2MIRRORED 0x01
130 #define DDF_2CONCAT 0x02
131 #define DDF_2SPANNED 0x03 /* This is also weird - be careful */
132
133 /* Magic numbers */
134 #define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
135 #define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
136 #define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
137 #define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
138 #define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
139 #define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
140 #define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
141 #define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
142 #define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
143 #define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
144
145 #define DDF_GUID_LEN 24
146 #define DDF_REVISION_0 "01.00.00"
147 #define DDF_REVISION_2 "01.02.00"
148
149 struct ddf_header {
150 __u32 magic; /* DDF_HEADER_MAGIC */
151 __u32 crc;
152 char guid[DDF_GUID_LEN];
153 char revision[8]; /* 01.02.00 */
154 __u32 seq; /* starts at '1' */
155 __u32 timestamp;
156 __u8 openflag;
157 __u8 foreignflag;
158 __u8 enforcegroups;
159 __u8 pad0; /* 0xff */
160 __u8 pad1[12]; /* 12 * 0xff */
161 /* 64 bytes so far */
162 __u8 header_ext[32]; /* reserved: fill with 0xff */
163 __u64 primary_lba;
164 __u64 secondary_lba;
165 __u8 type;
166 __u8 pad2[3]; /* 0xff */
167 __u32 workspace_len; /* sectors for vendor space -
168 * at least 32768(sectors) */
169 __u64 workspace_lba;
170 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
171 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
172 __u16 max_partitions; /* i.e. max num of configuration
173 record entries per disk */
174 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
175 *12/512) */
176 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
177 __u8 pad3[54]; /* 0xff */
178 /* 192 bytes so far */
179 __u32 controller_section_offset;
180 __u32 controller_section_length;
181 __u32 phys_section_offset;
182 __u32 phys_section_length;
183 __u32 virt_section_offset;
184 __u32 virt_section_length;
185 __u32 config_section_offset;
186 __u32 config_section_length;
187 __u32 data_section_offset;
188 __u32 data_section_length;
189 __u32 bbm_section_offset;
190 __u32 bbm_section_length;
191 __u32 diag_space_offset;
192 __u32 diag_space_length;
193 __u32 vendor_offset;
194 __u32 vendor_length;
195 /* 256 bytes so far */
196 __u8 pad4[256]; /* 0xff */
197 };
198
199 /* type field */
200 #define DDF_HEADER_ANCHOR 0x00
201 #define DDF_HEADER_PRIMARY 0x01
202 #define DDF_HEADER_SECONDARY 0x02
203
204 /* The content of the 'controller section' - global scope */
205 struct ddf_controller_data {
206 __u32 magic; /* DDF_CONTROLLER_MAGIC */
207 __u32 crc;
208 char guid[DDF_GUID_LEN];
209 struct controller_type {
210 __u16 vendor_id;
211 __u16 device_id;
212 __u16 sub_vendor_id;
213 __u16 sub_device_id;
214 } type;
215 char product_id[16];
216 __u8 pad[8]; /* 0xff */
217 __u8 vendor_data[448];
218 };
219
220 /* The content of phys_section - global scope */
221 struct phys_disk {
222 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
223 __u32 crc;
224 __u16 used_pdes;
225 __u16 max_pdes;
226 __u8 pad[52];
227 struct phys_disk_entry {
228 char guid[DDF_GUID_LEN];
229 __u32 refnum;
230 __u16 type;
231 __u16 state;
232 __u64 config_size; /* DDF structures must be after here */
233 char path[18]; /* another horrible structure really */
234 __u8 pad[6];
235 } entries[0];
236 };
237
238 /* phys_disk_entry.type is a bitmap - bigendian remember */
239 #define DDF_Forced_PD_GUID 1
240 #define DDF_Active_in_VD 2
241 #define DDF_Global_Spare 4 /* VD_CONF records are ignored */
242 #define DDF_Spare 8 /* overrides Global_spare */
243 #define DDF_Foreign 16
244 #define DDF_Legacy 32 /* no DDF on this device */
245
246 #define DDF_Interface_mask 0xf00
247 #define DDF_Interface_SCSI 0x100
248 #define DDF_Interface_SAS 0x200
249 #define DDF_Interface_SATA 0x300
250 #define DDF_Interface_FC 0x400
251
252 /* phys_disk_entry.state is a bigendian bitmap */
253 #define DDF_Online 1
254 #define DDF_Failed 2 /* overrides 1,4,8 */
255 #define DDF_Rebuilding 4
256 #define DDF_Transition 8
257 #define DDF_SMART 16
258 #define DDF_ReadErrors 32
259 #define DDF_Missing 64
260
261 /* The content of the virt_section global scope */
262 struct virtual_disk {
263 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
264 __u32 crc;
265 __u16 populated_vdes;
266 __u16 max_vdes;
267 __u8 pad[52];
268 struct virtual_entry {
269 char guid[DDF_GUID_LEN];
270 __u16 unit;
271 __u16 pad0; /* 0xffff */
272 __u16 guid_crc;
273 __u16 type;
274 __u8 state;
275 __u8 init_state;
276 __u8 pad1[14];
277 char name[16];
278 } entries[0];
279 };
280
281 /* virtual_entry.type is a bitmap - bigendian */
282 #define DDF_Shared 1
283 #define DDF_Enforce_Groups 2
284 #define DDF_Unicode 4
285 #define DDF_Owner_Valid 8
286
287 /* virtual_entry.state is a bigendian bitmap */
288 #define DDF_state_mask 0x7
289 #define DDF_state_optimal 0x0
290 #define DDF_state_degraded 0x1
291 #define DDF_state_deleted 0x2
292 #define DDF_state_missing 0x3
293 #define DDF_state_failed 0x4
294 #define DDF_state_part_optimal 0x5
295
296 #define DDF_state_morphing 0x8
297 #define DDF_state_inconsistent 0x10
298
299 /* virtual_entry.init_state is a bigendian bitmap */
300 #define DDF_initstate_mask 0x03
301 #define DDF_init_not 0x00
302 #define DDF_init_quick 0x01 /* initialisation is progress.
303 * i.e. 'state_inconsistent' */
304 #define DDF_init_full 0x02
305
306 #define DDF_access_mask 0xc0
307 #define DDF_access_rw 0x00
308 #define DDF_access_ro 0x80
309 #define DDF_access_blocked 0xc0
310
311 /* The content of the config_section - local scope
312 * It has multiple records each config_record_len sectors
313 * They can be vd_config or spare_assign
314 */
315
316 struct vd_config {
317 __u32 magic; /* DDF_VD_CONF_MAGIC */
318 __u32 crc;
319 char guid[DDF_GUID_LEN];
320 __u32 timestamp;
321 __u32 seqnum;
322 __u8 pad0[24];
323 __u16 prim_elmnt_count;
324 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
325 __u8 prl;
326 __u8 rlq;
327 __u8 sec_elmnt_count;
328 __u8 sec_elmnt_seq;
329 __u8 srl;
330 __u64 blocks; /* blocks per component could be different
331 * on different component devices...(only
332 * for concat I hope) */
333 __u64 array_blocks; /* blocks in array */
334 __u8 pad1[8];
335 __u32 spare_refs[8];
336 __u8 cache_pol[8];
337 __u8 bg_rate;
338 __u8 pad2[3];
339 __u8 pad3[52];
340 __u8 pad4[192];
341 __u8 v0[32]; /* reserved- 0xff */
342 __u8 v1[32]; /* reserved- 0xff */
343 __u8 v2[16]; /* reserved- 0xff */
344 __u8 v3[16]; /* reserved- 0xff */
345 __u8 vendor[32];
346 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
347 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
348 bvd are always the same size */
349 };
350 #define LBA_OFFSET(ddf, vd) ((__u64 *) &(vd)->phys_refnum[(ddf)->mppe])
351
352 /* vd_config.cache_pol[7] is a bitmap */
353 #define DDF_cache_writeback 1 /* else writethrough */
354 #define DDF_cache_wadaptive 2 /* only applies if writeback */
355 #define DDF_cache_readahead 4
356 #define DDF_cache_radaptive 8 /* only if doing read-ahead */
357 #define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
358 #define DDF_cache_wallowed 32 /* enable write caching */
359 #define DDF_cache_rallowed 64 /* enable read caching */
360
361 struct spare_assign {
362 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
363 __u32 crc;
364 __u32 timestamp;
365 __u8 reserved[7];
366 __u8 type;
367 __u16 populated; /* SAEs used */
368 __u16 max; /* max SAEs */
369 __u8 pad[8];
370 struct spare_assign_entry {
371 char guid[DDF_GUID_LEN];
372 __u16 secondary_element;
373 __u8 pad[6];
374 } spare_ents[0];
375 };
376 /* spare_assign.type is a bitmap */
377 #define DDF_spare_dedicated 0x1 /* else global */
378 #define DDF_spare_revertible 0x2 /* else committable */
379 #define DDF_spare_active 0x4 /* else not active */
380 #define DDF_spare_affinity 0x8 /* enclosure affinity */
381
382 /* The data_section contents - local scope */
383 struct disk_data {
384 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
385 __u32 crc;
386 char guid[DDF_GUID_LEN];
387 __u32 refnum; /* crc of some magic drive data ... */
388 __u8 forced_ref; /* set when above was not result of magic */
389 __u8 forced_guid; /* set if guid was forced rather than magic */
390 __u8 vendor[32];
391 __u8 pad[442];
392 };
393
394 /* bbm_section content */
395 struct bad_block_log {
396 __u32 magic;
397 __u32 crc;
398 __u16 entry_count;
399 __u32 spare_count;
400 __u8 pad[10];
401 __u64 first_spare;
402 struct mapped_block {
403 __u64 defective_start;
404 __u32 replacement_start;
405 __u16 remap_count;
406 __u8 pad[2];
407 } entries[0];
408 };
409
410 /* Struct for internally holding ddf structures */
411 /* The DDF structure stored on each device is potentially
412 * quite different, as some data is global and some is local.
413 * The global data is:
414 * - ddf header
415 * - controller_data
416 * - Physical disk records
417 * - Virtual disk records
418 * The local data is:
419 * - Configuration records
420 * - Physical Disk data section
421 * ( and Bad block and vendor which I don't care about yet).
422 *
423 * The local data is parsed into separate lists as it is read
424 * and reconstructed for writing. This means that we only need
425 * to make config changes once and they are automatically
426 * propagated to all devices.
427 * Note that the ddf_super has space of the conf and disk data
428 * for this disk and also for a list of all such data.
429 * The list is only used for the superblock that is being
430 * built in Create or Assemble to describe the whole array.
431 */
432 struct ddf_super {
433 struct ddf_header anchor, primary, secondary;
434 struct ddf_controller_data controller;
435 struct ddf_header *active;
436 struct phys_disk *phys;
437 struct virtual_disk *virt;
438 int pdsize, vdsize;
439 unsigned int max_part, mppe, conf_rec_len;
440 int currentdev;
441 int updates_pending;
442 struct vcl {
443 union {
444 char space[512];
445 struct {
446 struct vcl *next;
447 unsigned int vcnum; /* index into ->virt */
448 struct vd_config **other_bvds;
449 __u64 *block_sizes; /* NULL if all the same */
450 };
451 };
452 struct vd_config conf;
453 } *conflist, *currentconf;
454 struct dl {
455 union {
456 char space[512];
457 struct {
458 struct dl *next;
459 int major, minor;
460 char *devname;
461 int fd;
462 unsigned long long size; /* sectors */
463 unsigned long long primary_lba; /* sectors */
464 unsigned long long secondary_lba; /* sectors */
465 unsigned long long workspace_lba; /* sectors */
466 int pdnum; /* index in ->phys */
467 struct spare_assign *spare;
468 void *mdupdate; /* hold metadata update */
469
470 /* These fields used by auto-layout */
471 int raiddisk; /* slot to fill in autolayout */
472 __u64 esize;
473 };
474 };
475 struct disk_data disk;
476 struct vcl *vlist[0]; /* max_part in size */
477 } *dlist, *add_list;
478 };
479
480 #ifndef offsetof
481 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
482 #endif
483
484 #if DEBUG
485 static int all_ff(const char *guid);
486 static void pr_state(struct ddf_super *ddf, const char *msg)
487 {
488 unsigned int i;
489 dprintf("%s/%s: ", __func__, msg);
490 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
491 if (all_ff(ddf->virt->entries[i].guid))
492 continue;
493 dprintf("%u(s=%02x i=%02x) ", i,
494 ddf->virt->entries[i].state,
495 ddf->virt->entries[i].init_state);
496 }
497 dprintf("\n");
498 }
499 #else
500 static void pr_state(const struct ddf_super *ddf, const char *msg) {}
501 #endif
502
503 static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
504 {
505 ddf->updates_pending = 1;
506 ddf->active->seq = __cpu_to_be32((__be32_to_cpu(ddf->active->seq)+1));
507 pr_state(ddf, func);
508 }
509
510 #define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
511
512 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
513 __u32 refnum, unsigned int nmax,
514 const struct vd_config **bvd,
515 unsigned int *idx);
516
517 static unsigned int calc_crc(void *buf, int len)
518 {
519 /* crcs are always at the same place as in the ddf_header */
520 struct ddf_header *ddf = buf;
521 __u32 oldcrc = ddf->crc;
522 __u32 newcrc;
523 ddf->crc = 0xffffffff;
524
525 newcrc = crc32(0, buf, len);
526 ddf->crc = oldcrc;
527 /* The crc is store (like everything) bigendian, so convert
528 * here for simplicity
529 */
530 return __cpu_to_be32(newcrc);
531 }
532
533 #define DDF_INVALID_LEVEL 0xff
534 #define DDF_NO_SECONDARY 0xff
535 static int err_bad_md_layout(const mdu_array_info_t *array)
536 {
537 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
538 array->level, array->layout, array->raid_disks);
539 return -1;
540 }
541
542 static int layout_md2ddf(const mdu_array_info_t *array,
543 struct vd_config *conf)
544 {
545 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
546 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
547 __u8 sec_elmnt_count = 1;
548 __u8 srl = DDF_NO_SECONDARY;
549
550 switch (array->level) {
551 case LEVEL_LINEAR:
552 prl = DDF_CONCAT;
553 break;
554 case 0:
555 rlq = DDF_RAID0_SIMPLE;
556 prl = DDF_RAID0;
557 break;
558 case 1:
559 switch (array->raid_disks) {
560 case 2:
561 rlq = DDF_RAID1_SIMPLE;
562 break;
563 case 3:
564 rlq = DDF_RAID1_MULTI;
565 break;
566 default:
567 return err_bad_md_layout(array);
568 }
569 prl = DDF_RAID1;
570 break;
571 case 4:
572 if (array->layout != 0)
573 return err_bad_md_layout(array);
574 rlq = DDF_RAID4_N;
575 prl = DDF_RAID4;
576 break;
577 case 5:
578 switch (array->layout) {
579 case ALGORITHM_LEFT_ASYMMETRIC:
580 rlq = DDF_RAID5_N_RESTART;
581 break;
582 case ALGORITHM_RIGHT_ASYMMETRIC:
583 rlq = DDF_RAID5_0_RESTART;
584 break;
585 case ALGORITHM_LEFT_SYMMETRIC:
586 rlq = DDF_RAID5_N_CONTINUE;
587 break;
588 case ALGORITHM_RIGHT_SYMMETRIC:
589 /* not mentioned in standard */
590 default:
591 return err_bad_md_layout(array);
592 }
593 prl = DDF_RAID5;
594 break;
595 case 6:
596 switch (array->layout) {
597 case ALGORITHM_ROTATING_N_RESTART:
598 rlq = DDF_RAID5_N_RESTART;
599 break;
600 case ALGORITHM_ROTATING_ZERO_RESTART:
601 rlq = DDF_RAID6_0_RESTART;
602 break;
603 case ALGORITHM_ROTATING_N_CONTINUE:
604 rlq = DDF_RAID5_N_CONTINUE;
605 break;
606 default:
607 return err_bad_md_layout(array);
608 }
609 prl = DDF_RAID6;
610 break;
611 case 10:
612 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
613 rlq = DDF_RAID1_SIMPLE;
614 prim_elmnt_count = __cpu_to_be16(2);
615 sec_elmnt_count = array->raid_disks / 2;
616 } else if (array->raid_disks % 3 == 0
617 && array->layout == 0x103) {
618 rlq = DDF_RAID1_MULTI;
619 prim_elmnt_count = __cpu_to_be16(3);
620 sec_elmnt_count = array->raid_disks / 3;
621 } else
622 return err_bad_md_layout(array);
623 srl = DDF_2SPANNED;
624 prl = DDF_RAID1;
625 break;
626 default:
627 return err_bad_md_layout(array);
628 }
629 conf->prl = prl;
630 conf->prim_elmnt_count = prim_elmnt_count;
631 conf->rlq = rlq;
632 conf->srl = srl;
633 conf->sec_elmnt_count = sec_elmnt_count;
634 return 0;
635 }
636
637 static int err_bad_ddf_layout(const struct vd_config *conf)
638 {
639 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
640 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
641 return -1;
642 }
643
644 static int layout_ddf2md(const struct vd_config *conf,
645 mdu_array_info_t *array)
646 {
647 int level = LEVEL_UNSUPPORTED;
648 int layout = 0;
649 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
650
651 if (conf->sec_elmnt_count > 1) {
652 /* see also check_secondary() */
653 if (conf->prl != DDF_RAID1 ||
654 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
655 pr_err("Unsupported secondary RAID level %u/%u\n",
656 conf->prl, conf->srl);
657 return -1;
658 }
659 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
660 layout = 0x102;
661 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
662 layout = 0x103;
663 else
664 return err_bad_ddf_layout(conf);
665 raiddisks *= conf->sec_elmnt_count;
666 level = 10;
667 goto good;
668 }
669
670 switch (conf->prl) {
671 case DDF_CONCAT:
672 level = LEVEL_LINEAR;
673 break;
674 case DDF_RAID0:
675 if (conf->rlq != DDF_RAID0_SIMPLE)
676 return err_bad_ddf_layout(conf);
677 level = 0;
678 break;
679 case DDF_RAID1:
680 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
681 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
682 return err_bad_ddf_layout(conf);
683 level = 1;
684 break;
685 case DDF_RAID4:
686 if (conf->rlq != DDF_RAID4_N)
687 return err_bad_ddf_layout(conf);
688 level = 4;
689 break;
690 case DDF_RAID5:
691 switch (conf->rlq) {
692 case DDF_RAID5_N_RESTART:
693 layout = ALGORITHM_LEFT_ASYMMETRIC;
694 break;
695 case DDF_RAID5_0_RESTART:
696 layout = ALGORITHM_RIGHT_ASYMMETRIC;
697 break;
698 case DDF_RAID5_N_CONTINUE:
699 layout = ALGORITHM_LEFT_SYMMETRIC;
700 break;
701 default:
702 return err_bad_ddf_layout(conf);
703 }
704 level = 5;
705 break;
706 case DDF_RAID6:
707 switch (conf->rlq) {
708 case DDF_RAID5_N_RESTART:
709 layout = ALGORITHM_ROTATING_N_RESTART;
710 break;
711 case DDF_RAID6_0_RESTART:
712 layout = ALGORITHM_ROTATING_ZERO_RESTART;
713 break;
714 case DDF_RAID5_N_CONTINUE:
715 layout = ALGORITHM_ROTATING_N_CONTINUE;
716 break;
717 default:
718 return err_bad_ddf_layout(conf);
719 }
720 level = 6;
721 break;
722 default:
723 return err_bad_ddf_layout(conf);
724 };
725
726 good:
727 array->level = level;
728 array->layout = layout;
729 array->raid_disks = raiddisks;
730 return 0;
731 }
732
733 static int load_ddf_header(int fd, unsigned long long lba,
734 unsigned long long size,
735 int type,
736 struct ddf_header *hdr, struct ddf_header *anchor)
737 {
738 /* read a ddf header (primary or secondary) from fd/lba
739 * and check that it is consistent with anchor
740 * Need to check:
741 * magic, crc, guid, rev, and LBA's header_type, and
742 * everything after header_type must be the same
743 */
744 if (lba >= size-1)
745 return 0;
746
747 if (lseek64(fd, lba<<9, 0) < 0)
748 return 0;
749
750 if (read(fd, hdr, 512) != 512)
751 return 0;
752
753 if (hdr->magic != DDF_HEADER_MAGIC)
754 return 0;
755 if (calc_crc(hdr, 512) != hdr->crc)
756 return 0;
757 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
758 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
759 anchor->primary_lba != hdr->primary_lba ||
760 anchor->secondary_lba != hdr->secondary_lba ||
761 hdr->type != type ||
762 memcmp(anchor->pad2, hdr->pad2, 512 -
763 offsetof(struct ddf_header, pad2)) != 0)
764 return 0;
765
766 /* Looks good enough to me... */
767 return 1;
768 }
769
770 static void *load_section(int fd, struct ddf_super *super, void *buf,
771 __u32 offset_be, __u32 len_be, int check)
772 {
773 unsigned long long offset = __be32_to_cpu(offset_be);
774 unsigned long long len = __be32_to_cpu(len_be);
775 int dofree = (buf == NULL);
776
777 if (check)
778 if (len != 2 && len != 8 && len != 32
779 && len != 128 && len != 512)
780 return NULL;
781
782 if (len > 1024)
783 return NULL;
784 if (buf) {
785 /* All pre-allocated sections are a single block */
786 if (len != 1)
787 return NULL;
788 } else if (posix_memalign(&buf, 512, len<<9) != 0)
789 buf = NULL;
790
791 if (!buf)
792 return NULL;
793
794 if (super->active->type == 1)
795 offset += __be64_to_cpu(super->active->primary_lba);
796 else
797 offset += __be64_to_cpu(super->active->secondary_lba);
798
799 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
800 if (dofree)
801 free(buf);
802 return NULL;
803 }
804 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
805 if (dofree)
806 free(buf);
807 return NULL;
808 }
809 return buf;
810 }
811
812 static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
813 {
814 unsigned long long dsize;
815
816 get_dev_size(fd, NULL, &dsize);
817
818 if (lseek64(fd, dsize-512, 0) < 0) {
819 if (devname)
820 pr_err("Cannot seek to anchor block on %s: %s\n",
821 devname, strerror(errno));
822 return 1;
823 }
824 if (read(fd, &super->anchor, 512) != 512) {
825 if (devname)
826 pr_err("Cannot read anchor block on %s: %s\n",
827 devname, strerror(errno));
828 return 1;
829 }
830 if (super->anchor.magic != DDF_HEADER_MAGIC) {
831 if (devname)
832 pr_err("no DDF anchor found on %s\n",
833 devname);
834 return 2;
835 }
836 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
837 if (devname)
838 pr_err("bad CRC on anchor on %s\n",
839 devname);
840 return 2;
841 }
842 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
843 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
844 if (devname)
845 pr_err("can only support super revision"
846 " %.8s and earlier, not %.8s on %s\n",
847 DDF_REVISION_2, super->anchor.revision,devname);
848 return 2;
849 }
850 super->active = NULL;
851 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
852 dsize >> 9, 1,
853 &super->primary, &super->anchor) == 0) {
854 if (devname)
855 pr_err("Failed to load primary DDF header "
856 "on %s\n", devname);
857 } else
858 super->active = &super->primary;
859 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
860 dsize >> 9, 2,
861 &super->secondary, &super->anchor)) {
862 if (super->active == NULL
863 || (__be32_to_cpu(super->primary.seq)
864 < __be32_to_cpu(super->secondary.seq) &&
865 !super->secondary.openflag)
866 || (__be32_to_cpu(super->primary.seq)
867 == __be32_to_cpu(super->secondary.seq) &&
868 super->primary.openflag && !super->secondary.openflag)
869 )
870 super->active = &super->secondary;
871 } else if (devname)
872 pr_err("Failed to load secondary DDF header on %s\n",
873 devname);
874 if (super->active == NULL)
875 return 2;
876 return 0;
877 }
878
879 static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
880 {
881 void *ok;
882 ok = load_section(fd, super, &super->controller,
883 super->active->controller_section_offset,
884 super->active->controller_section_length,
885 0);
886 super->phys = load_section(fd, super, NULL,
887 super->active->phys_section_offset,
888 super->active->phys_section_length,
889 1);
890 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
891
892 super->virt = load_section(fd, super, NULL,
893 super->active->virt_section_offset,
894 super->active->virt_section_length,
895 1);
896 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
897 if (!ok ||
898 !super->phys ||
899 !super->virt) {
900 free(super->phys);
901 free(super->virt);
902 super->phys = NULL;
903 super->virt = NULL;
904 return 2;
905 }
906 super->conflist = NULL;
907 super->dlist = NULL;
908
909 super->max_part = __be16_to_cpu(super->active->max_partitions);
910 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
911 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
912 return 0;
913 }
914
915 #define DDF_UNUSED_BVD 0xff
916 static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
917 {
918 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
919 unsigned int i, vdsize;
920 void *p;
921 if (n_vds == 0) {
922 vcl->other_bvds = NULL;
923 return 0;
924 }
925 vdsize = ddf->conf_rec_len * 512;
926 if (posix_memalign(&p, 512, n_vds *
927 (vdsize + sizeof(struct vd_config *))) != 0)
928 return -1;
929 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
930 for (i = 0; i < n_vds; i++) {
931 vcl->other_bvds[i] = p + i * vdsize;
932 memset(vcl->other_bvds[i], 0, vdsize);
933 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
934 }
935 return 0;
936 }
937
938 static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
939 unsigned int len)
940 {
941 int i;
942 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
943 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
944 break;
945
946 if (i < vcl->conf.sec_elmnt_count-1) {
947 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
948 return;
949 } else {
950 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
951 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
952 break;
953 if (i == vcl->conf.sec_elmnt_count-1) {
954 pr_err("no space for sec level config %u, count is %u\n",
955 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
956 return;
957 }
958 }
959 memcpy(vcl->other_bvds[i], vd, len);
960 }
961
962 static int load_ddf_local(int fd, struct ddf_super *super,
963 char *devname, int keep)
964 {
965 struct dl *dl;
966 struct stat stb;
967 char *conf;
968 unsigned int i;
969 unsigned int confsec;
970 int vnum;
971 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
972 unsigned long long dsize;
973
974 /* First the local disk info */
975 if (posix_memalign((void**)&dl, 512,
976 sizeof(*dl) +
977 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
978 pr_err("%s could not allocate disk info buffer\n",
979 __func__);
980 return 1;
981 }
982
983 load_section(fd, super, &dl->disk,
984 super->active->data_section_offset,
985 super->active->data_section_length,
986 0);
987 dl->devname = devname ? xstrdup(devname) : NULL;
988
989 fstat(fd, &stb);
990 dl->major = major(stb.st_rdev);
991 dl->minor = minor(stb.st_rdev);
992 dl->next = super->dlist;
993 dl->fd = keep ? fd : -1;
994
995 dl->size = 0;
996 if (get_dev_size(fd, devname, &dsize))
997 dl->size = dsize >> 9;
998 /* If the disks have different sizes, the LBAs will differ
999 * between phys disks.
1000 * At this point here, the values in super->active must be valid
1001 * for this phys disk. */
1002 dl->primary_lba = super->active->primary_lba;
1003 dl->secondary_lba = super->active->secondary_lba;
1004 dl->workspace_lba = super->active->workspace_lba;
1005 dl->spare = NULL;
1006 for (i = 0 ; i < super->max_part ; i++)
1007 dl->vlist[i] = NULL;
1008 super->dlist = dl;
1009 dl->pdnum = -1;
1010 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
1011 if (memcmp(super->phys->entries[i].guid,
1012 dl->disk.guid, DDF_GUID_LEN) == 0)
1013 dl->pdnum = i;
1014
1015 /* Now the config list. */
1016 /* 'conf' is an array of config entries, some of which are
1017 * probably invalid. Those which are good need to be copied into
1018 * the conflist
1019 */
1020
1021 conf = load_section(fd, super, NULL,
1022 super->active->config_section_offset,
1023 super->active->config_section_length,
1024 0);
1025
1026 vnum = 0;
1027 for (confsec = 0;
1028 confsec < __be32_to_cpu(super->active->config_section_length);
1029 confsec += super->conf_rec_len) {
1030 struct vd_config *vd =
1031 (struct vd_config *)((char*)conf + confsec*512);
1032 struct vcl *vcl;
1033
1034 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
1035 if (dl->spare)
1036 continue;
1037 if (posix_memalign((void**)&dl->spare, 512,
1038 super->conf_rec_len*512) != 0) {
1039 pr_err("%s could not allocate spare info buf\n",
1040 __func__);
1041 return 1;
1042 }
1043
1044 memcpy(dl->spare, vd, super->conf_rec_len*512);
1045 continue;
1046 }
1047 if (vd->magic != DDF_VD_CONF_MAGIC)
1048 continue;
1049 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1050 if (memcmp(vcl->conf.guid,
1051 vd->guid, DDF_GUID_LEN) == 0)
1052 break;
1053 }
1054
1055 if (vcl) {
1056 dl->vlist[vnum++] = vcl;
1057 if (vcl->other_bvds != NULL &&
1058 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1059 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1060 continue;
1061 }
1062 if (__be32_to_cpu(vd->seqnum) <=
1063 __be32_to_cpu(vcl->conf.seqnum))
1064 continue;
1065 } else {
1066 if (posix_memalign((void**)&vcl, 512,
1067 (super->conf_rec_len*512 +
1068 offsetof(struct vcl, conf))) != 0) {
1069 pr_err("%s could not allocate vcl buf\n",
1070 __func__);
1071 return 1;
1072 }
1073 vcl->next = super->conflist;
1074 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
1075 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1076 if (alloc_other_bvds(super, vcl) != 0) {
1077 pr_err("%s could not allocate other bvds\n",
1078 __func__);
1079 free(vcl);
1080 return 1;
1081 };
1082 super->conflist = vcl;
1083 dl->vlist[vnum++] = vcl;
1084 }
1085 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
1086 for (i=0; i < max_virt_disks ; i++)
1087 if (memcmp(super->virt->entries[i].guid,
1088 vcl->conf.guid, DDF_GUID_LEN)==0)
1089 break;
1090 if (i < max_virt_disks)
1091 vcl->vcnum = i;
1092 }
1093 free(conf);
1094
1095 return 0;
1096 }
1097
1098 #ifndef MDASSEMBLE
1099 static int load_super_ddf_all(struct supertype *st, int fd,
1100 void **sbp, char *devname);
1101 #endif
1102
1103 static void free_super_ddf(struct supertype *st);
1104
1105 static int load_super_ddf(struct supertype *st, int fd,
1106 char *devname)
1107 {
1108 unsigned long long dsize;
1109 struct ddf_super *super;
1110 int rv;
1111
1112 if (get_dev_size(fd, devname, &dsize) == 0)
1113 return 1;
1114
1115 if (!st->ignore_hw_compat && test_partition(fd))
1116 /* DDF is not allowed on partitions */
1117 return 1;
1118
1119 /* 32M is a lower bound */
1120 if (dsize <= 32*1024*1024) {
1121 if (devname)
1122 pr_err("%s is too small for ddf: "
1123 "size is %llu sectors.\n",
1124 devname, dsize>>9);
1125 return 1;
1126 }
1127 if (dsize & 511) {
1128 if (devname)
1129 pr_err("%s is an odd size for ddf: "
1130 "size is %llu bytes.\n",
1131 devname, dsize);
1132 return 1;
1133 }
1134
1135 free_super_ddf(st);
1136
1137 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
1138 pr_err("malloc of %zu failed.\n",
1139 sizeof(*super));
1140 return 1;
1141 }
1142 memset(super, 0, sizeof(*super));
1143
1144 rv = load_ddf_headers(fd, super, devname);
1145 if (rv) {
1146 free(super);
1147 return rv;
1148 }
1149
1150 /* Have valid headers and have chosen the best. Let's read in the rest*/
1151
1152 rv = load_ddf_global(fd, super, devname);
1153
1154 if (rv) {
1155 if (devname)
1156 pr_err("Failed to load all information "
1157 "sections on %s\n", devname);
1158 free(super);
1159 return rv;
1160 }
1161
1162 rv = load_ddf_local(fd, super, devname, 0);
1163
1164 if (rv) {
1165 if (devname)
1166 pr_err("Failed to load all information "
1167 "sections on %s\n", devname);
1168 free(super);
1169 return rv;
1170 }
1171
1172 /* Should possibly check the sections .... */
1173
1174 st->sb = super;
1175 if (st->ss == NULL) {
1176 st->ss = &super_ddf;
1177 st->minor_version = 0;
1178 st->max_devs = 512;
1179 }
1180 return 0;
1181
1182 }
1183
1184 static void free_super_ddf(struct supertype *st)
1185 {
1186 struct ddf_super *ddf = st->sb;
1187 if (ddf == NULL)
1188 return;
1189 free(ddf->phys);
1190 free(ddf->virt);
1191 while (ddf->conflist) {
1192 struct vcl *v = ddf->conflist;
1193 ddf->conflist = v->next;
1194 if (v->block_sizes)
1195 free(v->block_sizes);
1196 if (v->other_bvds)
1197 /*
1198 v->other_bvds[0] points to beginning of buffer,
1199 see alloc_other_bvds()
1200 */
1201 free(v->other_bvds[0]);
1202 free(v);
1203 }
1204 while (ddf->dlist) {
1205 struct dl *d = ddf->dlist;
1206 ddf->dlist = d->next;
1207 if (d->fd >= 0)
1208 close(d->fd);
1209 if (d->spare)
1210 free(d->spare);
1211 free(d);
1212 }
1213 while (ddf->add_list) {
1214 struct dl *d = ddf->add_list;
1215 ddf->add_list = d->next;
1216 if (d->fd >= 0)
1217 close(d->fd);
1218 if (d->spare)
1219 free(d->spare);
1220 free(d);
1221 }
1222 free(ddf);
1223 st->sb = NULL;
1224 }
1225
1226 static struct supertype *match_metadata_desc_ddf(char *arg)
1227 {
1228 /* 'ddf' only support containers */
1229 struct supertype *st;
1230 if (strcmp(arg, "ddf") != 0 &&
1231 strcmp(arg, "default") != 0
1232 )
1233 return NULL;
1234
1235 st = xcalloc(1, sizeof(*st));
1236 st->ss = &super_ddf;
1237 st->max_devs = 512;
1238 st->minor_version = 0;
1239 st->sb = NULL;
1240 return st;
1241 }
1242
1243 #ifndef MDASSEMBLE
1244
1245 static mapping_t ddf_state[] = {
1246 { "Optimal", 0},
1247 { "Degraded", 1},
1248 { "Deleted", 2},
1249 { "Missing", 3},
1250 { "Failed", 4},
1251 { "Partially Optimal", 5},
1252 { "-reserved-", 6},
1253 { "-reserved-", 7},
1254 { NULL, 0}
1255 };
1256
1257 static mapping_t ddf_init_state[] = {
1258 { "Not Initialised", 0},
1259 { "QuickInit in Progress", 1},
1260 { "Fully Initialised", 2},
1261 { "*UNKNOWN*", 3},
1262 { NULL, 0}
1263 };
1264 static mapping_t ddf_access[] = {
1265 { "Read/Write", 0},
1266 { "Reserved", 1},
1267 { "Read Only", 2},
1268 { "Blocked (no access)", 3},
1269 { NULL ,0}
1270 };
1271
1272 static mapping_t ddf_level[] = {
1273 { "RAID0", DDF_RAID0},
1274 { "RAID1", DDF_RAID1},
1275 { "RAID3", DDF_RAID3},
1276 { "RAID4", DDF_RAID4},
1277 { "RAID5", DDF_RAID5},
1278 { "RAID1E",DDF_RAID1E},
1279 { "JBOD", DDF_JBOD},
1280 { "CONCAT",DDF_CONCAT},
1281 { "RAID5E",DDF_RAID5E},
1282 { "RAID5EE",DDF_RAID5EE},
1283 { "RAID6", DDF_RAID6},
1284 { NULL, 0}
1285 };
1286 static mapping_t ddf_sec_level[] = {
1287 { "Striped", DDF_2STRIPED},
1288 { "Mirrored", DDF_2MIRRORED},
1289 { "Concat", DDF_2CONCAT},
1290 { "Spanned", DDF_2SPANNED},
1291 { NULL, 0}
1292 };
1293 #endif
1294
1295 static int all_ff(const char *guid)
1296 {
1297 int i;
1298 for (i = 0; i < DDF_GUID_LEN; i++)
1299 if (guid[i] != (char)0xff)
1300 return 0;
1301 return 1;
1302 }
1303
1304 #ifndef MDASSEMBLE
1305 static void print_guid(char *guid, int tstamp)
1306 {
1307 /* A GUIDs are part (or all) ASCII and part binary.
1308 * They tend to be space padded.
1309 * We print the GUID in HEX, then in parentheses add
1310 * any initial ASCII sequence, and a possible
1311 * time stamp from bytes 16-19
1312 */
1313 int l = DDF_GUID_LEN;
1314 int i;
1315
1316 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1317 if ((i&3)==0 && i != 0) printf(":");
1318 printf("%02X", guid[i]&255);
1319 }
1320
1321 printf("\n (");
1322 while (l && guid[l-1] == ' ')
1323 l--;
1324 for (i=0 ; i<l ; i++) {
1325 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1326 fputc(guid[i], stdout);
1327 else
1328 break;
1329 }
1330 if (tstamp) {
1331 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1332 char tbuf[100];
1333 struct tm *tm;
1334 tm = localtime(&then);
1335 strftime(tbuf, 100, " %D %T",tm);
1336 fputs(tbuf, stdout);
1337 }
1338 printf(")");
1339 }
1340
1341 static const char *guid_str(const char *guid)
1342 {
1343 static char buf[DDF_GUID_LEN*2+1];
1344 int i;
1345 char *p = buf;
1346 for (i = 0; i < DDF_GUID_LEN; i++) {
1347 unsigned char c = guid[i];
1348 if (c >= 32 && c < 127)
1349 p += sprintf(p, "%c", c);
1350 else
1351 p += sprintf(p, "%02x", c);
1352 }
1353 *p = '\0';
1354 return (const char *) buf;
1355 }
1356
1357 static void examine_vd(int n, struct ddf_super *sb, char *guid)
1358 {
1359 int crl = sb->conf_rec_len;
1360 struct vcl *vcl;
1361
1362 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
1363 unsigned int i;
1364 struct vd_config *vc = &vcl->conf;
1365
1366 if (calc_crc(vc, crl*512) != vc->crc)
1367 continue;
1368 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1369 continue;
1370
1371 /* Ok, we know about this VD, let's give more details */
1372 printf(" Raid Devices[%d] : %d (", n,
1373 __be16_to_cpu(vc->prim_elmnt_count));
1374 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
1375 int j;
1376 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1377 for (j=0; j<cnt; j++)
1378 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1379 break;
1380 if (i) printf(" ");
1381 if (j < cnt)
1382 printf("%d", j);
1383 else
1384 printf("--");
1385 }
1386 printf(")\n");
1387 if (vc->chunk_shift != 255)
1388 printf(" Chunk Size[%d] : %d sectors\n", n,
1389 1 << vc->chunk_shift);
1390 printf(" Raid Level[%d] : %s\n", n,
1391 map_num(ddf_level, vc->prl)?:"-unknown-");
1392 if (vc->sec_elmnt_count != 1) {
1393 printf(" Secondary Position[%d] : %d of %d\n", n,
1394 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1395 printf(" Secondary Level[%d] : %s\n", n,
1396 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1397 }
1398 printf(" Device Size[%d] : %llu\n", n,
1399 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
1400 printf(" Array Size[%d] : %llu\n", n,
1401 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
1402 }
1403 }
1404
1405 static void examine_vds(struct ddf_super *sb)
1406 {
1407 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
1408 unsigned int i;
1409 printf(" Virtual Disks : %d\n", cnt);
1410
1411 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
1412 struct virtual_entry *ve = &sb->virt->entries[i];
1413 if (all_ff(ve->guid))
1414 continue;
1415 printf("\n");
1416 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1417 printf("\n");
1418 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1419 printf(" state[%d] : %s, %s%s\n", i,
1420 map_num(ddf_state, ve->state & 7),
1421 (ve->state & 8) ? "Morphing, ": "",
1422 (ve->state & 16)? "Not Consistent" : "Consistent");
1423 printf(" init state[%d] : %s\n", i,
1424 map_num(ddf_init_state, ve->init_state&3));
1425 printf(" access[%d] : %s\n", i,
1426 map_num(ddf_access, (ve->init_state>>6) & 3));
1427 printf(" Name[%d] : %.16s\n", i, ve->name);
1428 examine_vd(i, sb, ve->guid);
1429 }
1430 if (cnt) printf("\n");
1431 }
1432
1433 static void examine_pds(struct ddf_super *sb)
1434 {
1435 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1436 int i;
1437 struct dl *dl;
1438 printf(" Physical Disks : %d\n", cnt);
1439 printf(" Number RefNo Size Device Type/State\n");
1440
1441 for (i=0 ; i<cnt ; i++) {
1442 struct phys_disk_entry *pd = &sb->phys->entries[i];
1443 int type = __be16_to_cpu(pd->type);
1444 int state = __be16_to_cpu(pd->state);
1445
1446 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1447 //printf("\n");
1448 printf(" %3d %08x ", i,
1449 __be32_to_cpu(pd->refnum));
1450 printf("%8lluK ",
1451 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
1452 for (dl = sb->dlist; dl ; dl = dl->next) {
1453 if (dl->disk.refnum == pd->refnum) {
1454 char *dv = map_dev(dl->major, dl->minor, 0);
1455 if (dv) {
1456 printf("%-15s", dv);
1457 break;
1458 }
1459 }
1460 }
1461 if (!dl)
1462 printf("%15s","");
1463 printf(" %s%s%s%s%s",
1464 (type&2) ? "active":"",
1465 (type&4) ? "Global-Spare":"",
1466 (type&8) ? "spare" : "",
1467 (type&16)? ", foreign" : "",
1468 (type&32)? "pass-through" : "");
1469 if (state & DDF_Failed)
1470 /* This over-rides these three */
1471 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
1472 printf("/%s%s%s%s%s%s%s",
1473 (state&1)? "Online": "Offline",
1474 (state&2)? ", Failed": "",
1475 (state&4)? ", Rebuilding": "",
1476 (state&8)? ", in-transition": "",
1477 (state&16)? ", SMART-errors": "",
1478 (state&32)? ", Unrecovered-Read-Errors": "",
1479 (state&64)? ", Missing" : "");
1480 printf("\n");
1481 }
1482 }
1483
1484 static void examine_super_ddf(struct supertype *st, char *homehost)
1485 {
1486 struct ddf_super *sb = st->sb;
1487
1488 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1489 printf(" Version : %.8s\n", sb->anchor.revision);
1490 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1491 printf("\n");
1492 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
1493 printf("\n");
1494 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1495 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1496 ?"yes" : "no");
1497 examine_vds(sb);
1498 examine_pds(sb);
1499 }
1500
1501 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
1502
1503 static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
1504 static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
1505
1506 static unsigned int get_vd_num_of_subarray(struct supertype *st)
1507 {
1508 /*
1509 * Figure out the VD number for this supertype.
1510 * Returns DDF_CONTAINER for the container itself,
1511 * and DDF_NOTFOUND on error.
1512 */
1513 struct ddf_super *ddf = st->sb;
1514 struct mdinfo *sra;
1515 char *sub, *end;
1516 unsigned int vcnum;
1517
1518 if (*st->container_devnm == '\0')
1519 return DDF_CONTAINER;
1520
1521 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1522 if (!sra || sra->array.major_version != -1 ||
1523 sra->array.minor_version != -2 ||
1524 !is_subarray(sra->text_version))
1525 return DDF_NOTFOUND;
1526
1527 sub = strchr(sra->text_version + 1, '/');
1528 if (sub != NULL)
1529 vcnum = strtoul(sub + 1, &end, 10);
1530 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1531 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1532 return DDF_NOTFOUND;
1533
1534 return vcnum;
1535 }
1536
1537 static void brief_examine_super_ddf(struct supertype *st, int verbose)
1538 {
1539 /* We just write a generic DDF ARRAY entry
1540 */
1541 struct mdinfo info;
1542 char nbuf[64];
1543 getinfo_super_ddf(st, &info, NULL);
1544 fname_from_uuid(st, &info, nbuf, ':');
1545
1546 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1547 }
1548
1549 static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
1550 {
1551 /* We just write a generic DDF ARRAY entry
1552 */
1553 struct ddf_super *ddf = st->sb;
1554 struct mdinfo info;
1555 unsigned int i;
1556 char nbuf[64];
1557 getinfo_super_ddf(st, &info, NULL);
1558 fname_from_uuid(st, &info, nbuf, ':');
1559
1560 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1561 struct virtual_entry *ve = &ddf->virt->entries[i];
1562 struct vcl vcl;
1563 char nbuf1[64];
1564 if (all_ff(ve->guid))
1565 continue;
1566 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1567 ddf->currentconf =&vcl;
1568 uuid_from_super_ddf(st, info.uuid);
1569 fname_from_uuid(st, &info, nbuf1, ':');
1570 printf("ARRAY container=%s member=%d UUID=%s\n",
1571 nbuf+5, i, nbuf1+5);
1572 }
1573 }
1574
1575 static void export_examine_super_ddf(struct supertype *st)
1576 {
1577 struct mdinfo info;
1578 char nbuf[64];
1579 getinfo_super_ddf(st, &info, NULL);
1580 fname_from_uuid(st, &info, nbuf, ':');
1581 printf("MD_METADATA=ddf\n");
1582 printf("MD_LEVEL=container\n");
1583 printf("MD_UUID=%s\n", nbuf+5);
1584 }
1585
1586 static int copy_metadata_ddf(struct supertype *st, int from, int to)
1587 {
1588 void *buf;
1589 unsigned long long dsize, offset;
1590 int bytes;
1591 struct ddf_header *ddf;
1592 int written = 0;
1593
1594 /* The meta consists of an anchor, a primary, and a secondary.
1595 * This all lives at the end of the device.
1596 * So it is easiest to find the earliest of primary and
1597 * secondary, and copy everything from there.
1598 *
1599 * Anchor is 512 from end It contains primary_lba and secondary_lba
1600 * we choose one of those
1601 */
1602
1603 if (posix_memalign(&buf, 4096, 4096) != 0)
1604 return 1;
1605
1606 if (!get_dev_size(from, NULL, &dsize))
1607 goto err;
1608
1609 if (lseek64(from, dsize-512, 0) < 0)
1610 goto err;
1611 if (read(from, buf, 512) != 512)
1612 goto err;
1613 ddf = buf;
1614 if (ddf->magic != DDF_HEADER_MAGIC ||
1615 calc_crc(ddf, 512) != ddf->crc ||
1616 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1617 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1618 goto err;
1619
1620 offset = dsize - 512;
1621 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1622 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1623 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1624 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1625
1626 bytes = dsize - offset;
1627
1628 if (lseek64(from, offset, 0) < 0 ||
1629 lseek64(to, offset, 0) < 0)
1630 goto err;
1631 while (written < bytes) {
1632 int n = bytes - written;
1633 if (n > 4096)
1634 n = 4096;
1635 if (read(from, buf, n) != n)
1636 goto err;
1637 if (write(to, buf, n) != n)
1638 goto err;
1639 written += n;
1640 }
1641 free(buf);
1642 return 0;
1643 err:
1644 free(buf);
1645 return 1;
1646 }
1647
1648 static void detail_super_ddf(struct supertype *st, char *homehost)
1649 {
1650 /* FIXME later
1651 * Could print DDF GUID
1652 * Need to find which array
1653 * If whole, briefly list all arrays
1654 * If one, give name
1655 */
1656 }
1657
1658 static void brief_detail_super_ddf(struct supertype *st)
1659 {
1660 struct mdinfo info;
1661 char nbuf[64];
1662 struct ddf_super *ddf = st->sb;
1663 unsigned int vcnum = get_vd_num_of_subarray(st);
1664 if (vcnum == DDF_CONTAINER)
1665 uuid_from_super_ddf(st, info.uuid);
1666 else if (vcnum == DDF_NOTFOUND)
1667 return;
1668 else
1669 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
1670 fname_from_uuid(st, &info, nbuf,':');
1671 printf(" UUID=%s", nbuf + 5);
1672 }
1673 #endif
1674
1675 static int match_home_ddf(struct supertype *st, char *homehost)
1676 {
1677 /* It matches 'this' host if the controller is a
1678 * Linux-MD controller with vendor_data matching
1679 * the hostname
1680 */
1681 struct ddf_super *ddf = st->sb;
1682 unsigned int len;
1683
1684 if (!homehost)
1685 return 0;
1686 len = strlen(homehost);
1687
1688 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1689 len < sizeof(ddf->controller.vendor_data) &&
1690 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1691 ddf->controller.vendor_data[len] == 0);
1692 }
1693
1694 #ifndef MDASSEMBLE
1695 static int find_index_in_bvd(const struct ddf_super *ddf,
1696 const struct vd_config *conf, unsigned int n,
1697 unsigned int *n_bvd)
1698 {
1699 /*
1700 * Find the index of the n-th valid physical disk in this BVD
1701 */
1702 unsigned int i, j;
1703 for (i = 0, j = 0; i < ddf->mppe &&
1704 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1705 if (conf->phys_refnum[i] != 0xffffffff) {
1706 if (n == j) {
1707 *n_bvd = i;
1708 return 1;
1709 }
1710 j++;
1711 }
1712 }
1713 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1714 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1715 return 0;
1716 }
1717
1718 static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1719 unsigned int n,
1720 unsigned int *n_bvd, struct vcl **vcl)
1721 {
1722 struct vcl *v;
1723
1724 for (v = ddf->conflist; v; v = v->next) {
1725 unsigned int nsec, ibvd;
1726 struct vd_config *conf;
1727 if (inst != v->vcnum)
1728 continue;
1729 conf = &v->conf;
1730 if (conf->sec_elmnt_count == 1) {
1731 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1732 *vcl = v;
1733 return conf;
1734 } else
1735 goto bad;
1736 }
1737 if (v->other_bvds == NULL) {
1738 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1739 __func__, conf->sec_elmnt_count);
1740 goto bad;
1741 }
1742 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1743 if (conf->sec_elmnt_seq != nsec) {
1744 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
1745 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1746 == nsec)
1747 break;
1748 }
1749 if (ibvd == conf->sec_elmnt_count)
1750 goto bad;
1751 conf = v->other_bvds[ibvd-1];
1752 }
1753 if (!find_index_in_bvd(ddf, conf,
1754 n - nsec*conf->sec_elmnt_count, n_bvd))
1755 goto bad;
1756 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1757 , __func__, n, *n_bvd, ibvd-1, inst);
1758 *vcl = v;
1759 return conf;
1760 }
1761 bad:
1762 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
1763 return NULL;
1764 }
1765 #endif
1766
1767 static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
1768 {
1769 /* Find the entry in phys_disk which has the given refnum
1770 * and return it's index
1771 */
1772 unsigned int i;
1773 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
1774 if (ddf->phys->entries[i].refnum == phys_refnum)
1775 return i;
1776 return -1;
1777 }
1778
1779 static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1780 {
1781 char buf[20];
1782 struct sha1_ctx ctx;
1783 sha1_init_ctx(&ctx);
1784 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1785 sha1_finish_ctx(&ctx, buf);
1786 memcpy(uuid, buf, 4*4);
1787 }
1788
1789 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1790 {
1791 /* The uuid returned here is used for:
1792 * uuid to put into bitmap file (Create, Grow)
1793 * uuid for backup header when saving critical section (Grow)
1794 * comparing uuids when re-adding a device into an array
1795 * In these cases the uuid required is that of the data-array,
1796 * not the device-set.
1797 * uuid to recognise same set when adding a missing device back
1798 * to an array. This is a uuid for the device-set.
1799 *
1800 * For each of these we can make do with a truncated
1801 * or hashed uuid rather than the original, as long as
1802 * everyone agrees.
1803 * In the case of SVD we assume the BVD is of interest,
1804 * though that might be the case if a bitmap were made for
1805 * a mirrored SVD - worry about that later.
1806 * So we need to find the VD configuration record for the
1807 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1808 * The first 16 bytes of the sha1 of these is used.
1809 */
1810 struct ddf_super *ddf = st->sb;
1811 struct vcl *vcl = ddf->currentconf;
1812 char *guid;
1813
1814 if (vcl)
1815 guid = vcl->conf.guid;
1816 else
1817 guid = ddf->anchor.guid;
1818 uuid_from_ddf_guid(guid, uuid);
1819 }
1820
1821 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
1822
1823 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
1824 {
1825 struct ddf_super *ddf = st->sb;
1826 int map_disks = info->array.raid_disks;
1827 __u32 *cptr;
1828
1829 if (ddf->currentconf) {
1830 getinfo_super_ddf_bvd(st, info, map);
1831 return;
1832 }
1833 memset(info, 0, sizeof(*info));
1834
1835 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1836 info->array.level = LEVEL_CONTAINER;
1837 info->array.layout = 0;
1838 info->array.md_minor = -1;
1839 cptr = (__u32 *)(ddf->anchor.guid + 16);
1840 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1841
1842 info->array.utime = 0;
1843 info->array.chunk_size = 0;
1844 info->container_enough = 1;
1845
1846 info->disk.major = 0;
1847 info->disk.minor = 0;
1848 if (ddf->dlist) {
1849 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
1850 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
1851
1852 info->data_offset = __be64_to_cpu(ddf->phys->
1853 entries[info->disk.raid_disk].
1854 config_size);
1855 info->component_size = ddf->dlist->size - info->data_offset;
1856 } else {
1857 info->disk.number = -1;
1858 info->disk.raid_disk = -1;
1859 // info->disk.raid_disk = find refnum in the table and use index;
1860 }
1861 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
1862
1863 info->recovery_start = MaxSector;
1864 info->reshape_active = 0;
1865 info->recovery_blocked = 0;
1866 info->name[0] = 0;
1867
1868 info->array.major_version = -1;
1869 info->array.minor_version = -2;
1870 strcpy(info->text_version, "ddf");
1871 info->safe_mode_delay = 0;
1872
1873 uuid_from_super_ddf(st, info->uuid);
1874
1875 if (map) {
1876 int i;
1877 for (i = 0 ; i < map_disks; i++) {
1878 if (i < info->array.raid_disks &&
1879 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1880 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1881 map[i] = 1;
1882 else
1883 map[i] = 0;
1884 }
1885 }
1886 }
1887
1888 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
1889 {
1890 struct ddf_super *ddf = st->sb;
1891 struct vcl *vc = ddf->currentconf;
1892 int cd = ddf->currentdev;
1893 int n_prim;
1894 int j;
1895 struct dl *dl;
1896 int map_disks = info->array.raid_disks;
1897 __u32 *cptr;
1898 struct vd_config *conf;
1899
1900 memset(info, 0, sizeof(*info));
1901 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1902 return;
1903 info->array.md_minor = -1;
1904 cptr = (__u32 *)(vc->conf.guid + 16);
1905 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1906 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1907 info->array.chunk_size = 512 << vc->conf.chunk_shift;
1908 info->custom_array_size = 0;
1909
1910 conf = &vc->conf;
1911 n_prim = __be16_to_cpu(conf->prim_elmnt_count);
1912 if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
1913 int ibvd = cd / n_prim - 1;
1914 cd %= n_prim;
1915 conf = vc->other_bvds[ibvd];
1916 }
1917
1918 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
1919 info->data_offset =
1920 __be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
1921 if (vc->block_sizes)
1922 info->component_size = vc->block_sizes[cd];
1923 else
1924 info->component_size = __be64_to_cpu(conf->blocks);
1925 }
1926
1927 for (dl = ddf->dlist; dl ; dl = dl->next)
1928 if (dl->disk.refnum == conf->phys_refnum[cd])
1929 break;
1930
1931 info->disk.major = 0;
1932 info->disk.minor = 0;
1933 info->disk.state = 0;
1934 if (dl) {
1935 info->disk.major = dl->major;
1936 info->disk.minor = dl->minor;
1937 info->disk.raid_disk = cd + conf->sec_elmnt_seq
1938 * __be16_to_cpu(conf->prim_elmnt_count);
1939 info->disk.number = dl->pdnum;
1940 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
1941 }
1942
1943 info->container_member = ddf->currentconf->vcnum;
1944
1945 info->recovery_start = MaxSector;
1946 info->resync_start = 0;
1947 info->reshape_active = 0;
1948 info->recovery_blocked = 0;
1949 if (!(ddf->virt->entries[info->container_member].state
1950 & DDF_state_inconsistent) &&
1951 (ddf->virt->entries[info->container_member].init_state
1952 & DDF_initstate_mask)
1953 == DDF_init_full)
1954 info->resync_start = MaxSector;
1955
1956 uuid_from_super_ddf(st, info->uuid);
1957
1958 info->array.major_version = -1;
1959 info->array.minor_version = -2;
1960 sprintf(info->text_version, "/%s/%d",
1961 st->container_devnm,
1962 info->container_member);
1963 info->safe_mode_delay = 200;
1964
1965 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1966 info->name[16]=0;
1967 for(j=0; j<16; j++)
1968 if (info->name[j] == ' ')
1969 info->name[j] = 0;
1970
1971 if (map)
1972 for (j = 0; j < map_disks; j++) {
1973 map[j] = 0;
1974 if (j < info->array.raid_disks) {
1975 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
1976 if (i >= 0 &&
1977 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1978 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1979 map[i] = 1;
1980 }
1981 }
1982 }
1983
1984 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1985 char *update,
1986 char *devname, int verbose,
1987 int uuid_set, char *homehost)
1988 {
1989 /* For 'assemble' and 'force' we need to return non-zero if any
1990 * change was made. For others, the return value is ignored.
1991 * Update options are:
1992 * force-one : This device looks a bit old but needs to be included,
1993 * update age info appropriately.
1994 * assemble: clear any 'faulty' flag to allow this device to
1995 * be assembled.
1996 * force-array: Array is degraded but being forced, mark it clean
1997 * if that will be needed to assemble it.
1998 *
1999 * newdev: not used ????
2000 * grow: Array has gained a new device - this is currently for
2001 * linear only
2002 * resync: mark as dirty so a resync will happen.
2003 * uuid: Change the uuid of the array to match what is given
2004 * homehost: update the recorded homehost
2005 * name: update the name - preserving the homehost
2006 * _reshape_progress: record new reshape_progress position.
2007 *
2008 * Following are not relevant for this version:
2009 * sparc2.2 : update from old dodgey metadata
2010 * super-minor: change the preferred_minor number
2011 * summaries: update redundant counters.
2012 */
2013 int rv = 0;
2014 // struct ddf_super *ddf = st->sb;
2015 // struct vd_config *vd = find_vdcr(ddf, info->container_member);
2016 // struct virtual_entry *ve = find_ve(ddf);
2017
2018 /* we don't need to handle "force-*" or "assemble" as
2019 * there is no need to 'trick' the kernel. We the metadata is
2020 * first updated to activate the array, all the implied modifications
2021 * will just happen.
2022 */
2023
2024 if (strcmp(update, "grow") == 0) {
2025 /* FIXME */
2026 } else if (strcmp(update, "resync") == 0) {
2027 // info->resync_checkpoint = 0;
2028 } else if (strcmp(update, "homehost") == 0) {
2029 /* homehost is stored in controller->vendor_data,
2030 * or it is when we are the vendor
2031 */
2032 // if (info->vendor_is_local)
2033 // strcpy(ddf->controller.vendor_data, homehost);
2034 rv = -1;
2035 } else if (strcmp(update, "name") == 0) {
2036 /* name is stored in virtual_entry->name */
2037 // memset(ve->name, ' ', 16);
2038 // strncpy(ve->name, info->name, 16);
2039 rv = -1;
2040 } else if (strcmp(update, "_reshape_progress") == 0) {
2041 /* We don't support reshape yet */
2042 } else if (strcmp(update, "assemble") == 0 ) {
2043 /* Do nothing, just succeed */
2044 rv = 0;
2045 } else
2046 rv = -1;
2047
2048 // update_all_csum(ddf);
2049
2050 return rv;
2051 }
2052
2053 static void make_header_guid(char *guid)
2054 {
2055 __u32 stamp;
2056 /* Create a DDF Header of Virtual Disk GUID */
2057
2058 /* 24 bytes of fiction required.
2059 * first 8 are a 'vendor-id' - "Linux-MD"
2060 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2061 * Remaining 8 random number plus timestamp
2062 */
2063 memcpy(guid, T10, sizeof(T10));
2064 stamp = __cpu_to_be32(0xdeadbeef);
2065 memcpy(guid+8, &stamp, 4);
2066 stamp = __cpu_to_be32(0);
2067 memcpy(guid+12, &stamp, 4);
2068 stamp = __cpu_to_be32(time(0) - DECADE);
2069 memcpy(guid+16, &stamp, 4);
2070 stamp = random32();
2071 memcpy(guid+20, &stamp, 4);
2072 }
2073
2074 static unsigned int find_unused_vde(const struct ddf_super *ddf)
2075 {
2076 unsigned int i;
2077 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2078 if (all_ff(ddf->virt->entries[i].guid))
2079 return i;
2080 }
2081 return DDF_NOTFOUND;
2082 }
2083
2084 static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2085 const char *name)
2086 {
2087 unsigned int i;
2088 if (name == NULL)
2089 return DDF_NOTFOUND;
2090 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2091 if (all_ff(ddf->virt->entries[i].guid))
2092 continue;
2093 if (!strncmp(name, ddf->virt->entries[i].name,
2094 sizeof(ddf->virt->entries[i].name)))
2095 return i;
2096 }
2097 return DDF_NOTFOUND;
2098 }
2099
2100 static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2101 const char *guid)
2102 {
2103 unsigned int i;
2104 if (guid == NULL || all_ff(guid))
2105 return DDF_NOTFOUND;
2106 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2107 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2108 return i;
2109 return DDF_NOTFOUND;
2110 }
2111
2112 static int init_super_ddf_bvd(struct supertype *st,
2113 mdu_array_info_t *info,
2114 unsigned long long size,
2115 char *name, char *homehost,
2116 int *uuid, unsigned long long data_offset);
2117
2118 static int init_super_ddf(struct supertype *st,
2119 mdu_array_info_t *info,
2120 unsigned long long size, char *name, char *homehost,
2121 int *uuid, unsigned long long data_offset)
2122 {
2123 /* This is primarily called by Create when creating a new array.
2124 * We will then get add_to_super called for each component, and then
2125 * write_init_super called to write it out to each device.
2126 * For DDF, Create can create on fresh devices or on a pre-existing
2127 * array.
2128 * To create on a pre-existing array a different method will be called.
2129 * This one is just for fresh drives.
2130 *
2131 * We need to create the entire 'ddf' structure which includes:
2132 * DDF headers - these are easy.
2133 * Controller data - a Sector describing this controller .. not that
2134 * this is a controller exactly.
2135 * Physical Disk Record - one entry per device, so
2136 * leave plenty of space.
2137 * Virtual Disk Records - again, just leave plenty of space.
2138 * This just lists VDs, doesn't give details
2139 * Config records - describes the VDs that use this disk
2140 * DiskData - describes 'this' device.
2141 * BadBlockManagement - empty
2142 * Diag Space - empty
2143 * Vendor Logs - Could we put bitmaps here?
2144 *
2145 */
2146 struct ddf_super *ddf;
2147 char hostname[17];
2148 int hostlen;
2149 int max_phys_disks, max_virt_disks;
2150 unsigned long long sector;
2151 int clen;
2152 int i;
2153 int pdsize, vdsize;
2154 struct phys_disk *pd;
2155 struct virtual_disk *vd;
2156
2157 if (data_offset != INVALID_SECTORS) {
2158 pr_err("data-offset not supported by DDF\n");
2159 return 0;
2160 }
2161
2162 if (st->sb)
2163 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2164 data_offset);
2165
2166 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
2167 pr_err("%s could not allocate superblock\n", __func__);
2168 return 0;
2169 }
2170 memset(ddf, 0, sizeof(*ddf));
2171 ddf->dlist = NULL; /* no physical disks yet */
2172 ddf->conflist = NULL; /* No virtual disks yet */
2173 st->sb = ddf;
2174
2175 if (info == NULL) {
2176 /* zeroing superblock */
2177 return 0;
2178 }
2179
2180 /* At least 32MB *must* be reserved for the ddf. So let's just
2181 * start 32MB from the end, and put the primary header there.
2182 * Don't do secondary for now.
2183 * We don't know exactly where that will be yet as it could be
2184 * different on each device. To just set up the lengths.
2185 *
2186 */
2187
2188 ddf->anchor.magic = DDF_HEADER_MAGIC;
2189 make_header_guid(ddf->anchor.guid);
2190
2191 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
2192 ddf->anchor.seq = __cpu_to_be32(1);
2193 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2194 ddf->anchor.openflag = 0xFF;
2195 ddf->anchor.foreignflag = 0;
2196 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2197 ddf->anchor.pad0 = 0xff;
2198 memset(ddf->anchor.pad1, 0xff, 12);
2199 memset(ddf->anchor.header_ext, 0xff, 32);
2200 ddf->anchor.primary_lba = ~(__u64)0;
2201 ddf->anchor.secondary_lba = ~(__u64)0;
2202 ddf->anchor.type = DDF_HEADER_ANCHOR;
2203 memset(ddf->anchor.pad2, 0xff, 3);
2204 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2205 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2206 of 32M reserved.. */
2207 max_phys_disks = 1023; /* Should be enough */
2208 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2209 max_virt_disks = 255;
2210 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2211 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2212 ddf->max_part = 64;
2213 ddf->mppe = 256;
2214 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2215 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2216 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
2217 memset(ddf->anchor.pad3, 0xff, 54);
2218 /* controller sections is one sector long immediately
2219 * after the ddf header */
2220 sector = 1;
2221 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2222 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2223 sector += 1;
2224
2225 /* phys is 8 sectors after that */
2226 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2227 sizeof(struct phys_disk_entry)*max_phys_disks,
2228 512);
2229 switch(pdsize/512) {
2230 case 2: case 8: case 32: case 128: case 512: break;
2231 default: abort();
2232 }
2233 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2234 ddf->anchor.phys_section_length =
2235 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2236 sector += pdsize/512;
2237
2238 /* virt is another 32 sectors */
2239 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2240 sizeof(struct virtual_entry) * max_virt_disks,
2241 512);
2242 switch(vdsize/512) {
2243 case 2: case 8: case 32: case 128: case 512: break;
2244 default: abort();
2245 }
2246 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2247 ddf->anchor.virt_section_length =
2248 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2249 sector += vdsize/512;
2250
2251 clen = ddf->conf_rec_len * (ddf->max_part+1);
2252 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2253 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2254 sector += clen;
2255
2256 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2257 ddf->anchor.data_section_length = __cpu_to_be32(1);
2258 sector += 1;
2259
2260 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2261 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2262 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2263 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2264 ddf->anchor.vendor_length = __cpu_to_be32(0);
2265 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2266
2267 memset(ddf->anchor.pad4, 0xff, 256);
2268
2269 memcpy(&ddf->primary, &ddf->anchor, 512);
2270 memcpy(&ddf->secondary, &ddf->anchor, 512);
2271
2272 ddf->primary.openflag = 1; /* I guess.. */
2273 ddf->primary.type = DDF_HEADER_PRIMARY;
2274
2275 ddf->secondary.openflag = 1; /* I guess.. */
2276 ddf->secondary.type = DDF_HEADER_SECONDARY;
2277
2278 ddf->active = &ddf->primary;
2279
2280 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2281
2282 /* 24 more bytes of fiction required.
2283 * first 8 are a 'vendor-id' - "Linux-MD"
2284 * Remaining 16 are serial number.... maybe a hostname would do?
2285 */
2286 memcpy(ddf->controller.guid, T10, sizeof(T10));
2287 gethostname(hostname, sizeof(hostname));
2288 hostname[sizeof(hostname) - 1] = 0;
2289 hostlen = strlen(hostname);
2290 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2291 for (i = strlen(T10) ; i+hostlen < 24; i++)
2292 ddf->controller.guid[i] = ' ';
2293
2294 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2295 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2296 ddf->controller.type.sub_vendor_id = 0;
2297 ddf->controller.type.sub_device_id = 0;
2298 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2299 memset(ddf->controller.pad, 0xff, 8);
2300 memset(ddf->controller.vendor_data, 0xff, 448);
2301 if (homehost && strlen(homehost) < 440)
2302 strcpy((char*)ddf->controller.vendor_data, homehost);
2303
2304 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
2305 pr_err("%s could not allocate pd\n", __func__);
2306 return 0;
2307 }
2308 ddf->phys = pd;
2309 ddf->pdsize = pdsize;
2310
2311 memset(pd, 0xff, pdsize);
2312 memset(pd, 0, sizeof(*pd));
2313 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2314 pd->used_pdes = __cpu_to_be16(0);
2315 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2316 memset(pd->pad, 0xff, 52);
2317 for (i = 0; i < max_phys_disks; i++)
2318 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
2319
2320 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
2321 pr_err("%s could not allocate vd\n", __func__);
2322 return 0;
2323 }
2324 ddf->virt = vd;
2325 ddf->vdsize = vdsize;
2326 memset(vd, 0, vdsize);
2327 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2328 vd->populated_vdes = __cpu_to_be16(0);
2329 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2330 memset(vd->pad, 0xff, 52);
2331
2332 for (i=0; i<max_virt_disks; i++)
2333 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2334
2335 st->sb = ddf;
2336 ddf_set_updates_pending(ddf);
2337 return 1;
2338 }
2339
2340 static int chunk_to_shift(int chunksize)
2341 {
2342 return ffs(chunksize/512)-1;
2343 }
2344
2345 #ifndef MDASSEMBLE
2346 struct extent {
2347 unsigned long long start, size;
2348 };
2349 static int cmp_extent(const void *av, const void *bv)
2350 {
2351 const struct extent *a = av;
2352 const struct extent *b = bv;
2353 if (a->start < b->start)
2354 return -1;
2355 if (a->start > b->start)
2356 return 1;
2357 return 0;
2358 }
2359
2360 static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
2361 {
2362 /* find a list of used extents on the give physical device
2363 * (dnum) of the given ddf.
2364 * Return a malloced array of 'struct extent'
2365
2366 * FIXME ignore DDF_Legacy devices?
2367
2368 */
2369 struct extent *rv;
2370 int n = 0;
2371 unsigned int i;
2372
2373 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
2374
2375 for (i = 0; i < ddf->max_part; i++) {
2376 const struct vd_config *bvd;
2377 unsigned int ibvd;
2378 struct vcl *v = dl->vlist[i];
2379 if (v == NULL ||
2380 get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2381 &bvd, &ibvd) == DDF_NOTFOUND)
2382 continue;
2383 rv[n].start = __be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2384 rv[n].size = __be64_to_cpu(bvd->blocks);
2385 n++;
2386 }
2387 qsort(rv, n, sizeof(*rv), cmp_extent);
2388
2389 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2390 rv[n].size = 0;
2391 return rv;
2392 }
2393 #endif
2394
2395 static int init_super_ddf_bvd(struct supertype *st,
2396 mdu_array_info_t *info,
2397 unsigned long long size,
2398 char *name, char *homehost,
2399 int *uuid, unsigned long long data_offset)
2400 {
2401 /* We are creating a BVD inside a pre-existing container.
2402 * so st->sb is already set.
2403 * We need to create a new vd_config and a new virtual_entry
2404 */
2405 struct ddf_super *ddf = st->sb;
2406 unsigned int venum, i;
2407 struct virtual_entry *ve;
2408 struct vcl *vcl;
2409 struct vd_config *vc;
2410
2411 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2412 pr_err("This ddf already has an array called %s\n", name);
2413 return 0;
2414 }
2415 venum = find_unused_vde(ddf);
2416 if (venum == DDF_NOTFOUND) {
2417 pr_err("Cannot find spare slot for virtual disk\n");
2418 return 0;
2419 }
2420 ve = &ddf->virt->entries[venum];
2421
2422 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2423 * timestamp, random number
2424 */
2425 make_header_guid(ve->guid);
2426 ve->unit = __cpu_to_be16(info->md_minor);
2427 ve->pad0 = 0xFFFF;
2428 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2429 ve->type = 0;
2430 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2431 if (info->state & 1) /* clean */
2432 ve->init_state = DDF_init_full;
2433 else
2434 ve->init_state = DDF_init_not;
2435
2436 memset(ve->pad1, 0xff, 14);
2437 memset(ve->name, ' ', 16);
2438 if (name)
2439 strncpy(ve->name, name, 16);
2440 ddf->virt->populated_vdes =
2441 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2442
2443 /* Now create a new vd_config */
2444 if (posix_memalign((void**)&vcl, 512,
2445 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
2446 pr_err("%s could not allocate vd_config\n", __func__);
2447 return 0;
2448 }
2449 vcl->vcnum = venum;
2450 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
2451 vc = &vcl->conf;
2452
2453 vc->magic = DDF_VD_CONF_MAGIC;
2454 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2455 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2456 vc->seqnum = __cpu_to_be32(1);
2457 memset(vc->pad0, 0xff, 24);
2458 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2459 if (layout_md2ddf(info, vc) == -1 ||
2460 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2461 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2462 __func__, info->level, info->layout, info->raid_disks);
2463 free(vcl);
2464 return 0;
2465 }
2466 vc->sec_elmnt_seq = 0;
2467 if (alloc_other_bvds(ddf, vcl) != 0) {
2468 pr_err("%s could not allocate other bvds\n",
2469 __func__);
2470 free(vcl);
2471 return 0;
2472 }
2473 vc->blocks = __cpu_to_be64(info->size * 2);
2474 vc->array_blocks = __cpu_to_be64(
2475 calc_array_size(info->level, info->raid_disks, info->layout,
2476 info->chunk_size, info->size*2));
2477 memset(vc->pad1, 0xff, 8);
2478 vc->spare_refs[0] = 0xffffffff;
2479 vc->spare_refs[1] = 0xffffffff;
2480 vc->spare_refs[2] = 0xffffffff;
2481 vc->spare_refs[3] = 0xffffffff;
2482 vc->spare_refs[4] = 0xffffffff;
2483 vc->spare_refs[5] = 0xffffffff;
2484 vc->spare_refs[6] = 0xffffffff;
2485 vc->spare_refs[7] = 0xffffffff;
2486 memset(vc->cache_pol, 0, 8);
2487 vc->bg_rate = 0x80;
2488 memset(vc->pad2, 0xff, 3);
2489 memset(vc->pad3, 0xff, 52);
2490 memset(vc->pad4, 0xff, 192);
2491 memset(vc->v0, 0xff, 32);
2492 memset(vc->v1, 0xff, 32);
2493 memset(vc->v2, 0xff, 16);
2494 memset(vc->v3, 0xff, 16);
2495 memset(vc->vendor, 0xff, 32);
2496
2497 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
2498 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
2499
2500 for (i = 1; i < vc->sec_elmnt_count; i++) {
2501 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2502 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2503 }
2504
2505 vcl->next = ddf->conflist;
2506 ddf->conflist = vcl;
2507 ddf->currentconf = vcl;
2508 ddf_set_updates_pending(ddf);
2509 return 1;
2510 }
2511
2512 static int get_svd_state(const struct ddf_super *, const struct vcl *);
2513
2514 #ifndef MDASSEMBLE
2515 static void add_to_super_ddf_bvd(struct supertype *st,
2516 mdu_disk_info_t *dk, int fd, char *devname)
2517 {
2518 /* fd and devname identify a device with-in the ddf container (st).
2519 * dk identifies a location in the new BVD.
2520 * We need to find suitable free space in that device and update
2521 * the phys_refnum and lba_offset for the newly created vd_config.
2522 * We might also want to update the type in the phys_disk
2523 * section.
2524 *
2525 * Alternately: fd == -1 and we have already chosen which device to
2526 * use and recorded in dlist->raid_disk;
2527 */
2528 struct dl *dl;
2529 struct ddf_super *ddf = st->sb;
2530 struct vd_config *vc;
2531 unsigned int i;
2532 unsigned long long blocks, pos, esize;
2533 struct extent *ex;
2534 unsigned int raid_disk = dk->raid_disk;
2535
2536 if (fd == -1) {
2537 for (dl = ddf->dlist; dl ; dl = dl->next)
2538 if (dl->raiddisk == dk->raid_disk)
2539 break;
2540 } else {
2541 for (dl = ddf->dlist; dl ; dl = dl->next)
2542 if (dl->major == dk->major &&
2543 dl->minor == dk->minor)
2544 break;
2545 }
2546 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2547 return;
2548
2549 vc = &ddf->currentconf->conf;
2550 if (vc->sec_elmnt_count > 1) {
2551 unsigned int n = __be16_to_cpu(vc->prim_elmnt_count);
2552 if (raid_disk >= n)
2553 vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2554 raid_disk %= n;
2555 }
2556
2557 ex = get_extents(ddf, dl);
2558 if (!ex)
2559 return;
2560
2561 i = 0; pos = 0;
2562 blocks = __be64_to_cpu(vc->blocks);
2563 if (ddf->currentconf->block_sizes)
2564 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
2565
2566 do {
2567 esize = ex[i].start - pos;
2568 if (esize >= blocks)
2569 break;
2570 pos = ex[i].start + ex[i].size;
2571 i++;
2572 } while (ex[i-1].size);
2573
2574 free(ex);
2575 if (esize < blocks)
2576 return;
2577
2578 ddf->currentdev = dk->raid_disk;
2579 vc->phys_refnum[raid_disk] = dl->disk.refnum;
2580 LBA_OFFSET(ddf, vc)[raid_disk] = __cpu_to_be64(pos);
2581
2582 for (i = 0; i < ddf->max_part ; i++)
2583 if (dl->vlist[i] == NULL)
2584 break;
2585 if (i == ddf->max_part)
2586 return;
2587 dl->vlist[i] = ddf->currentconf;
2588
2589 if (fd >= 0)
2590 dl->fd = fd;
2591 if (devname)
2592 dl->devname = devname;
2593
2594 /* Check if we can mark array as optimal yet */
2595 i = ddf->currentconf->vcnum;
2596 ddf->virt->entries[i].state =
2597 (ddf->virt->entries[i].state & ~DDF_state_mask)
2598 | get_svd_state(ddf, ddf->currentconf);
2599 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2600 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
2601 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
2602 __func__, dl->pdnum, __be32_to_cpu(dl->disk.refnum),
2603 ddf->currentconf->vcnum, guid_str(vc->guid),
2604 dk->raid_disk);
2605 ddf_set_updates_pending(ddf);
2606 }
2607
2608 static unsigned int find_unused_pde(const struct ddf_super *ddf)
2609 {
2610 unsigned int i;
2611 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++) {
2612 if (all_ff(ddf->phys->entries[i].guid))
2613 return i;
2614 }
2615 return DDF_NOTFOUND;
2616 }
2617
2618 /* add a device to a container, either while creating it or while
2619 * expanding a pre-existing container
2620 */
2621 static int add_to_super_ddf(struct supertype *st,
2622 mdu_disk_info_t *dk, int fd, char *devname,
2623 unsigned long long data_offset)
2624 {
2625 struct ddf_super *ddf = st->sb;
2626 struct dl *dd;
2627 time_t now;
2628 struct tm *tm;
2629 unsigned long long size;
2630 struct phys_disk_entry *pde;
2631 unsigned int n, i;
2632 struct stat stb;
2633 __u32 *tptr;
2634
2635 if (ddf->currentconf) {
2636 add_to_super_ddf_bvd(st, dk, fd, devname);
2637 return 0;
2638 }
2639
2640 /* This is device numbered dk->number. We need to create
2641 * a phys_disk entry and a more detailed disk_data entry.
2642 */
2643 fstat(fd, &stb);
2644 n = find_unused_pde(ddf);
2645 if (n == DDF_NOTFOUND) {
2646 pr_err("%s: No free slot in array, cannot add disk\n",
2647 __func__);
2648 return 1;
2649 }
2650 pde = &ddf->phys->entries[n];
2651 get_dev_size(fd, NULL, &size);
2652 if (size <= 32*1024*1024) {
2653 pr_err("%s: device size must be at least 32MB\n",
2654 __func__);
2655 return 1;
2656 }
2657 size >>= 9;
2658
2659 if (posix_memalign((void**)&dd, 512,
2660 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
2661 pr_err("%s could allocate buffer for new disk, aborting\n",
2662 __func__);
2663 return 1;
2664 }
2665 dd->major = major(stb.st_rdev);
2666 dd->minor = minor(stb.st_rdev);
2667 dd->devname = devname;
2668 dd->fd = fd;
2669 dd->spare = NULL;
2670
2671 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2672 now = time(0);
2673 tm = localtime(&now);
2674 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2675 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
2676 tptr = (__u32 *)(dd->disk.guid + 16);
2677 *tptr++ = random32();
2678 *tptr = random32();
2679
2680 do {
2681 /* Cannot be bothered finding a CRC of some irrelevant details*/
2682 dd->disk.refnum = random32();
2683 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2684 i > 0; i--)
2685 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
2686 break;
2687 } while (i > 0);
2688
2689 dd->disk.forced_ref = 1;
2690 dd->disk.forced_guid = 1;
2691 memset(dd->disk.vendor, ' ', 32);
2692 memcpy(dd->disk.vendor, "Linux", 5);
2693 memset(dd->disk.pad, 0xff, 442);
2694 for (i = 0; i < ddf->max_part ; i++)
2695 dd->vlist[i] = NULL;
2696
2697 dd->pdnum = n;
2698
2699 if (st->update_tail) {
2700 int len = (sizeof(struct phys_disk) +
2701 sizeof(struct phys_disk_entry));
2702 struct phys_disk *pd;
2703
2704 pd = xmalloc(len);
2705 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2706 pd->used_pdes = __cpu_to_be16(n);
2707 pde = &pd->entries[0];
2708 dd->mdupdate = pd;
2709 } else
2710 ddf->phys->used_pdes = __cpu_to_be16(
2711 1 + __be16_to_cpu(ddf->phys->used_pdes));
2712
2713 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2714 pde->refnum = dd->disk.refnum;
2715 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
2716 pde->state = __cpu_to_be16(DDF_Online);
2717 dd->size = size;
2718 /*
2719 * If there is already a device in dlist, try to reserve the same
2720 * amount of workspace. Otherwise, use 32MB.
2721 * We checked disk size above already.
2722 */
2723 #define __calc_lba(new, old, lba, mb) do { \
2724 unsigned long long dif; \
2725 if ((old) != NULL) \
2726 dif = (old)->size - __be64_to_cpu((old)->lba); \
2727 else \
2728 dif = (new)->size; \
2729 if ((new)->size > dif) \
2730 (new)->lba = __cpu_to_be64((new)->size - dif); \
2731 else \
2732 (new)->lba = __cpu_to_be64((new)->size - (mb*1024*2)); \
2733 } while (0)
2734 __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2735 __calc_lba(dd, ddf->dlist, primary_lba, 16);
2736 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
2737 pde->config_size = dd->workspace_lba;
2738
2739 sprintf(pde->path, "%17.17s","Information: nil") ;
2740 memset(pde->pad, 0xff, 6);
2741
2742 if (st->update_tail) {
2743 dd->next = ddf->add_list;
2744 ddf->add_list = dd;
2745 } else {
2746 dd->next = ddf->dlist;
2747 ddf->dlist = dd;
2748 ddf_set_updates_pending(ddf);
2749 }
2750
2751 return 0;
2752 }
2753
2754 static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2755 {
2756 struct ddf_super *ddf = st->sb;
2757 struct dl *dl;
2758
2759 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2760 * disappeared from the container.
2761 * We need to arrange that it disappears from the metadata and
2762 * internal data structures too.
2763 * Most of the work is done by ddf_process_update which edits
2764 * the metadata and closes the file handle and attaches the memory
2765 * where free_updates will free it.
2766 */
2767 for (dl = ddf->dlist; dl ; dl = dl->next)
2768 if (dl->major == dk->major &&
2769 dl->minor == dk->minor)
2770 break;
2771 if (!dl)
2772 return -1;
2773
2774 if (st->update_tail) {
2775 int len = (sizeof(struct phys_disk) +
2776 sizeof(struct phys_disk_entry));
2777 struct phys_disk *pd;
2778
2779 pd = xmalloc(len);
2780 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2781 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2782 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2783 append_metadata_update(st, pd, len);
2784 }
2785 return 0;
2786 }
2787
2788 /*
2789 * This is the write_init_super method for a ddf container. It is
2790 * called when creating a container or adding another device to a
2791 * container.
2792 */
2793 #define NULL_CONF_SZ 4096
2794
2795 static char *null_aligned;
2796 static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
2797 {
2798 unsigned long long sector;
2799 struct ddf_header *header;
2800 int fd, i, n_config, conf_size;
2801 int ret = 0;
2802
2803 if (null_aligned == NULL) {
2804 if (posix_memalign((void **)&null_aligned, 4096, NULL_CONF_SZ)
2805 != 0)
2806 return 0;
2807 memset(null_aligned, 0xff, NULL_CONF_SZ);
2808 }
2809
2810 fd = d->fd;
2811
2812 switch (type) {
2813 case DDF_HEADER_PRIMARY:
2814 header = &ddf->primary;
2815 sector = __be64_to_cpu(header->primary_lba);
2816 break;
2817 case DDF_HEADER_SECONDARY:
2818 header = &ddf->secondary;
2819 sector = __be64_to_cpu(header->secondary_lba);
2820 break;
2821 default:
2822 return 0;
2823 }
2824
2825 header->type = type;
2826 header->openflag = 1;
2827 header->crc = calc_crc(header, 512);
2828
2829 lseek64(fd, sector<<9, 0);
2830 if (write(fd, header, 512) < 0)
2831 goto out;
2832
2833 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2834 if (write(fd, &ddf->controller, 512) < 0)
2835 goto out;
2836
2837 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2838 if (write(fd, ddf->phys, ddf->pdsize) < 0)
2839 goto out;
2840 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2841 if (write(fd, ddf->virt, ddf->vdsize) < 0)
2842 goto out;
2843
2844 /* Now write lots of config records. */
2845 n_config = ddf->max_part;
2846 conf_size = ddf->conf_rec_len * 512;
2847 for (i = 0 ; i <= n_config ; i++) {
2848 struct vcl *c;
2849 struct vd_config *vdc = NULL;
2850 if (i == n_config) {
2851 c = (struct vcl *)d->spare;
2852 if (c)
2853 vdc = &c->conf;
2854 } else {
2855 unsigned int dummy;
2856 c = d->vlist[i];
2857 if (c)
2858 get_pd_index_from_refnum(
2859 c, d->disk.refnum,
2860 ddf->mppe,
2861 (const struct vd_config **)&vdc,
2862 &dummy);
2863 }
2864 if (c) {
2865 dprintf("writing conf record %i on disk %08x for %s/%u\n",
2866 i, __be32_to_cpu(d->disk.refnum),
2867 guid_str(vdc->guid),
2868 vdc->sec_elmnt_seq);
2869 vdc->seqnum = header->seq;
2870 vdc->crc = calc_crc(vdc, conf_size);
2871 if (write(fd, vdc, conf_size) < 0)
2872 break;
2873 } else {
2874 unsigned int togo = conf_size;
2875 while (togo > NULL_CONF_SZ) {
2876 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2877 break;
2878 togo -= NULL_CONF_SZ;
2879 }
2880 if (write(fd, null_aligned, togo) < 0)
2881 break;
2882 }
2883 }
2884 if (i <= n_config)
2885 goto out;
2886
2887 d->disk.crc = calc_crc(&d->disk, 512);
2888 if (write(fd, &d->disk, 512) < 0)
2889 goto out;
2890
2891 ret = 1;
2892 out:
2893 header->openflag = 0;
2894 header->crc = calc_crc(header, 512);
2895
2896 lseek64(fd, sector<<9, 0);
2897 if (write(fd, header, 512) < 0)
2898 ret = 0;
2899
2900 return ret;
2901 }
2902
2903 static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
2904 {
2905 unsigned long long size;
2906 int fd = d->fd;
2907 if (fd < 0)
2908 return 0;
2909
2910 /* We need to fill in the primary, (secondary) and workspace
2911 * lba's in the headers, set their checksums,
2912 * Also checksum phys, virt....
2913 *
2914 * Then write everything out, finally the anchor is written.
2915 */
2916 get_dev_size(fd, NULL, &size);
2917 size /= 512;
2918 if (d->workspace_lba != 0)
2919 ddf->anchor.workspace_lba = d->workspace_lba;
2920 else
2921 ddf->anchor.workspace_lba =
2922 __cpu_to_be64(size - 32*1024*2);
2923 if (d->primary_lba != 0)
2924 ddf->anchor.primary_lba = d->primary_lba;
2925 else
2926 ddf->anchor.primary_lba =
2927 __cpu_to_be64(size - 16*1024*2);
2928 if (d->secondary_lba != 0)
2929 ddf->anchor.secondary_lba = d->secondary_lba;
2930 else
2931 ddf->anchor.secondary_lba =
2932 __cpu_to_be64(size - 32*1024*2);
2933 ddf->anchor.seq = ddf->active->seq;
2934 memcpy(&ddf->primary, &ddf->anchor, 512);
2935 memcpy(&ddf->secondary, &ddf->anchor, 512);
2936
2937 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2938 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2939 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2940
2941 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
2942 return 0;
2943
2944 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
2945 return 0;
2946
2947 lseek64(fd, (size-1)*512, SEEK_SET);
2948 if (write(fd, &ddf->anchor, 512) < 0)
2949 return 0;
2950
2951 return 1;
2952 }
2953
2954 static int __write_init_super_ddf(struct supertype *st)
2955 {
2956 struct ddf_super *ddf = st->sb;
2957 struct dl *d;
2958 int attempts = 0;
2959 int successes = 0;
2960
2961 pr_state(ddf, __func__);
2962
2963 /* try to write updated metadata,
2964 * if we catch a failure move on to the next disk
2965 */
2966 for (d = ddf->dlist; d; d=d->next) {
2967 attempts++;
2968 successes += _write_super_to_disk(ddf, d);
2969 }
2970
2971 return attempts != successes;
2972 }
2973
2974 static int write_init_super_ddf(struct supertype *st)
2975 {
2976 struct ddf_super *ddf = st->sb;
2977 struct vcl *currentconf = ddf->currentconf;
2978
2979 /* we are done with currentconf reset it to point st at the container */
2980 ddf->currentconf = NULL;
2981
2982 if (st->update_tail) {
2983 /* queue the virtual_disk and vd_config as metadata updates */
2984 struct virtual_disk *vd;
2985 struct vd_config *vc;
2986 int len, tlen;
2987 unsigned int i;
2988
2989 if (!currentconf) {
2990 int len = (sizeof(struct phys_disk) +
2991 sizeof(struct phys_disk_entry));
2992
2993 /* adding a disk to the container. */
2994 if (!ddf->add_list)
2995 return 0;
2996
2997 append_metadata_update(st, ddf->add_list->mdupdate, len);
2998 ddf->add_list->mdupdate = NULL;
2999 return 0;
3000 }
3001
3002 /* Newly created VD */
3003
3004 /* First the virtual disk. We have a slightly fake header */
3005 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
3006 vd = xmalloc(len);
3007 *vd = *ddf->virt;
3008 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
3009 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
3010 append_metadata_update(st, vd, len);
3011
3012 /* Then the vd_config */
3013 len = ddf->conf_rec_len * 512;
3014 tlen = len * currentconf->conf.sec_elmnt_count;
3015 vc = xmalloc(tlen);
3016 memcpy(vc, &currentconf->conf, len);
3017 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
3018 memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
3019 len);
3020 append_metadata_update(st, vc, tlen);
3021
3022 /* FIXME I need to close the fds! */
3023 return 0;
3024 } else {
3025 struct dl *d;
3026 if (!currentconf)
3027 for (d = ddf->dlist; d; d=d->next)
3028 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
3029 return __write_init_super_ddf(st);
3030 }
3031 }
3032
3033 #endif
3034
3035 static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
3036 unsigned long long data_offset)
3037 {
3038 /* We must reserve the last 32Meg */
3039 if (devsize <= 32*1024*2)
3040 return 0;
3041 return devsize - 32*1024*2;
3042 }
3043
3044 #ifndef MDASSEMBLE
3045
3046 static int reserve_space(struct supertype *st, int raiddisks,
3047 unsigned long long size, int chunk,
3048 unsigned long long *freesize)
3049 {
3050 /* Find 'raiddisks' spare extents at least 'size' big (but
3051 * only caring about multiples of 'chunk') and remember
3052 * them.
3053 * If the cannot be found, fail.
3054 */
3055 struct dl *dl;
3056 struct ddf_super *ddf = st->sb;
3057 int cnt = 0;
3058
3059 for (dl = ddf->dlist; dl ; dl=dl->next) {
3060 dl->raiddisk = -1;
3061 dl->esize = 0;
3062 }
3063 /* Now find largest extent on each device */
3064 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3065 struct extent *e = get_extents(ddf, dl);
3066 unsigned long long pos = 0;
3067 int i = 0;
3068 int found = 0;
3069 unsigned long long minsize = size;
3070
3071 if (size == 0)
3072 minsize = chunk;
3073
3074 if (!e)
3075 continue;
3076 do {
3077 unsigned long long esize;
3078 esize = e[i].start - pos;
3079 if (esize >= minsize) {
3080 found = 1;
3081 minsize = esize;
3082 }
3083 pos = e[i].start + e[i].size;
3084 i++;
3085 } while (e[i-1].size);
3086 if (found) {
3087 cnt++;
3088 dl->esize = minsize;
3089 }
3090 free(e);
3091 }
3092 if (cnt < raiddisks) {
3093 pr_err("not enough devices with space to create array.\n");
3094 return 0; /* No enough free spaces large enough */
3095 }
3096 if (size == 0) {
3097 /* choose the largest size of which there are at least 'raiddisk' */
3098 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3099 struct dl *dl2;
3100 if (dl->esize <= size)
3101 continue;
3102 /* This is bigger than 'size', see if there are enough */
3103 cnt = 0;
3104 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
3105 if (dl2->esize >= dl->esize)
3106 cnt++;
3107 if (cnt >= raiddisks)
3108 size = dl->esize;
3109 }
3110 if (chunk) {
3111 size = size / chunk;
3112 size *= chunk;
3113 }
3114 *freesize = size;
3115 if (size < 32) {
3116 pr_err("not enough spare devices to create array.\n");
3117 return 0;
3118 }
3119 }
3120 /* We have a 'size' of which there are enough spaces.
3121 * We simply do a first-fit */
3122 cnt = 0;
3123 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3124 if (dl->esize < size)
3125 continue;
3126
3127 dl->raiddisk = cnt;
3128 cnt++;
3129 }
3130 return 1;
3131 }
3132
3133 static int
3134 validate_geometry_ddf_container(struct supertype *st,
3135 int level, int layout, int raiddisks,
3136 int chunk, unsigned long long size,
3137 unsigned long long data_offset,
3138 char *dev, unsigned long long *freesize,
3139 int verbose);
3140
3141 static int validate_geometry_ddf_bvd(struct supertype *st,
3142 int level, int layout, int raiddisks,
3143 int *chunk, unsigned long long size,
3144 unsigned long long data_offset,
3145 char *dev, unsigned long long *freesize,
3146 int verbose);
3147
3148 static int validate_geometry_ddf(struct supertype *st,
3149 int level, int layout, int raiddisks,
3150 int *chunk, unsigned long long size,
3151 unsigned long long data_offset,
3152 char *dev, unsigned long long *freesize,
3153 int verbose)
3154 {
3155 int fd;
3156 struct mdinfo *sra;
3157 int cfd;
3158
3159 /* ddf potentially supports lots of things, but it depends on
3160 * what devices are offered (and maybe kernel version?)
3161 * If given unused devices, we will make a container.
3162 * If given devices in a container, we will make a BVD.
3163 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3164 */
3165
3166 if (chunk && *chunk == UnSet)
3167 *chunk = DEFAULT_CHUNK;
3168
3169 if (level == -1000000) level = LEVEL_CONTAINER;
3170 if (level == LEVEL_CONTAINER) {
3171 /* Must be a fresh device to add to a container */
3172 return validate_geometry_ddf_container(st, level, layout,
3173 raiddisks, chunk?*chunk:0,
3174 size, data_offset, dev,
3175 freesize,
3176 verbose);
3177 }
3178
3179 if (!dev) {
3180 mdu_array_info_t array = {
3181 .level = level, .layout = layout,
3182 .raid_disks = raiddisks
3183 };
3184 struct vd_config conf;
3185 if (layout_md2ddf(&array, &conf) == -1) {
3186 if (verbose)
3187 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3188 level, layout, raiddisks);
3189 return 0;
3190 }
3191 /* Should check layout? etc */
3192
3193 if (st->sb && freesize) {
3194 /* --create was given a container to create in.
3195 * So we need to check that there are enough
3196 * free spaces and return the amount of space.
3197 * We may as well remember which drives were
3198 * chosen so that add_to_super/getinfo_super
3199 * can return them.
3200 */
3201 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
3202 }
3203 return 1;
3204 }
3205
3206 if (st->sb) {
3207 /* A container has already been opened, so we are
3208 * creating in there. Maybe a BVD, maybe an SVD.
3209 * Should make a distinction one day.
3210 */
3211 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
3212 chunk, size, data_offset, dev,
3213 freesize,
3214 verbose);
3215 }
3216 /* This is the first device for the array.
3217 * If it is a container, we read it in and do automagic allocations,
3218 * no other devices should be given.
3219 * Otherwise it must be a member device of a container, and we
3220 * do manual allocation.
3221 * Later we should check for a BVD and make an SVD.
3222 */
3223 fd = open(dev, O_RDONLY|O_EXCL, 0);
3224 if (fd >= 0) {
3225 sra = sysfs_read(fd, NULL, GET_VERSION);
3226 close(fd);
3227 if (sra && sra->array.major_version == -1 &&
3228 strcmp(sra->text_version, "ddf") == 0) {
3229
3230 /* load super */
3231 /* find space for 'n' devices. */
3232 /* remember the devices */
3233 /* Somehow return the fact that we have enough */
3234 }
3235
3236 if (verbose)
3237 pr_err("ddf: Cannot create this array "
3238 "on device %s - a container is required.\n",
3239 dev);
3240 return 0;
3241 }
3242 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
3243 if (verbose)
3244 pr_err("ddf: Cannot open %s: %s\n",
3245 dev, strerror(errno));
3246 return 0;
3247 }
3248 /* Well, it is in use by someone, maybe a 'ddf' container. */
3249 cfd = open_container(fd);
3250 if (cfd < 0) {
3251 close(fd);
3252 if (verbose)
3253 pr_err("ddf: Cannot use %s: %s\n",
3254 dev, strerror(EBUSY));
3255 return 0;
3256 }
3257 sra = sysfs_read(cfd, NULL, GET_VERSION);
3258 close(fd);
3259 if (sra && sra->array.major_version == -1 &&
3260 strcmp(sra->text_version, "ddf") == 0) {
3261 /* This is a member of a ddf container. Load the container
3262 * and try to create a bvd
3263 */
3264 struct ddf_super *ddf;
3265 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
3266 st->sb = ddf;
3267 strcpy(st->container_devnm, fd2devnm(cfd));
3268 close(cfd);
3269 return validate_geometry_ddf_bvd(st, level, layout,
3270 raiddisks, chunk, size,
3271 data_offset,
3272 dev, freesize,
3273 verbose);
3274 }
3275 close(cfd);
3276 } else /* device may belong to a different container */
3277 return 0;
3278
3279 return 1;
3280 }
3281
3282 static int
3283 validate_geometry_ddf_container(struct supertype *st,
3284 int level, int layout, int raiddisks,
3285 int chunk, unsigned long long size,
3286 unsigned long long data_offset,
3287 char *dev, unsigned long long *freesize,
3288 int verbose)
3289 {
3290 int fd;
3291 unsigned long long ldsize;
3292
3293 if (level != LEVEL_CONTAINER)
3294 return 0;
3295 if (!dev)
3296 return 1;
3297
3298 fd = open(dev, O_RDONLY|O_EXCL, 0);
3299 if (fd < 0) {
3300 if (verbose)
3301 pr_err("ddf: Cannot open %s: %s\n",
3302 dev, strerror(errno));
3303 return 0;
3304 }
3305 if (!get_dev_size(fd, dev, &ldsize)) {
3306 close(fd);
3307 return 0;
3308 }
3309 close(fd);
3310
3311 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
3312 if (*freesize == 0)
3313 return 0;
3314
3315 return 1;
3316 }
3317
3318 static int validate_geometry_ddf_bvd(struct supertype *st,
3319 int level, int layout, int raiddisks,
3320 int *chunk, unsigned long long size,
3321 unsigned long long data_offset,
3322 char *dev, unsigned long long *freesize,
3323 int verbose)
3324 {
3325 struct stat stb;
3326 struct ddf_super *ddf = st->sb;
3327 struct dl *dl;
3328 unsigned long long pos = 0;
3329 unsigned long long maxsize;
3330 struct extent *e;
3331 int i;
3332 /* ddf/bvd supports lots of things, but not containers */
3333 if (level == LEVEL_CONTAINER) {
3334 if (verbose)
3335 pr_err("DDF cannot create a container within an container\n");
3336 return 0;
3337 }
3338 /* We must have the container info already read in. */
3339 if (!ddf)
3340 return 0;
3341
3342 if (!dev) {
3343 /* General test: make sure there is space for
3344 * 'raiddisks' device extents of size 'size'.
3345 */
3346 unsigned long long minsize = size;
3347 int dcnt = 0;
3348 if (minsize == 0)
3349 minsize = 8;
3350 for (dl = ddf->dlist; dl ; dl = dl->next)
3351 {
3352 int found = 0;
3353 pos = 0;
3354
3355 i = 0;
3356 e = get_extents(ddf, dl);
3357 if (!e) continue;
3358 do {
3359 unsigned long long esize;
3360 esize = e[i].start - pos;
3361 if (esize >= minsize)
3362 found = 1;
3363 pos = e[i].start + e[i].size;
3364 i++;
3365 } while (e[i-1].size);
3366 if (found)
3367 dcnt++;
3368 free(e);
3369 }
3370 if (dcnt < raiddisks) {
3371 if (verbose)
3372 pr_err("ddf: Not enough devices with "
3373 "space for this array (%d < %d)\n",
3374 dcnt, raiddisks);
3375 return 0;
3376 }
3377 return 1;
3378 }
3379 /* This device must be a member of the set */
3380 if (stat(dev, &stb) < 0)
3381 return 0;
3382 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3383 return 0;
3384 for (dl = ddf->dlist ; dl ; dl = dl->next) {
3385 if (dl->major == (int)major(stb.st_rdev) &&
3386 dl->minor == (int)minor(stb.st_rdev))
3387 break;
3388 }
3389 if (!dl) {
3390 if (verbose)
3391 pr_err("ddf: %s is not in the "
3392 "same DDF set\n",
3393 dev);
3394 return 0;
3395 }
3396 e = get_extents(ddf, dl);
3397 maxsize = 0;
3398 i = 0;
3399 if (e) do {
3400 unsigned long long esize;
3401 esize = e[i].start - pos;
3402 if (esize >= maxsize)
3403 maxsize = esize;
3404 pos = e[i].start + e[i].size;
3405 i++;
3406 } while (e[i-1].size);
3407 *freesize = maxsize;
3408 // FIXME here I am
3409
3410 return 1;
3411 }
3412
3413 static int load_super_ddf_all(struct supertype *st, int fd,
3414 void **sbp, char *devname)
3415 {
3416 struct mdinfo *sra;
3417 struct ddf_super *super;
3418 struct mdinfo *sd, *best = NULL;
3419 int bestseq = 0;
3420 int seq;
3421 char nm[20];
3422 int dfd;
3423
3424 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
3425 if (!sra)
3426 return 1;
3427 if (sra->array.major_version != -1 ||
3428 sra->array.minor_version != -2 ||
3429 strcmp(sra->text_version, "ddf") != 0)
3430 return 1;
3431
3432 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
3433 return 1;
3434 memset(super, 0, sizeof(*super));
3435
3436 /* first, try each device, and choose the best ddf */
3437 for (sd = sra->devs ; sd ; sd = sd->next) {
3438 int rv;
3439 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3440 dfd = dev_open(nm, O_RDONLY);
3441 if (dfd < 0)
3442 return 2;
3443 rv = load_ddf_headers(dfd, super, NULL);
3444 close(dfd);
3445 if (rv == 0) {
3446 seq = __be32_to_cpu(super->active->seq);
3447 if (super->active->openflag)
3448 seq--;
3449 if (!best || seq > bestseq) {
3450 bestseq = seq;
3451 best = sd;
3452 }
3453 }
3454 }
3455 if (!best)
3456 return 1;
3457 /* OK, load this ddf */
3458 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3459 dfd = dev_open(nm, O_RDONLY);
3460 if (dfd < 0)
3461 return 1;
3462 load_ddf_headers(dfd, super, NULL);
3463 load_ddf_global(dfd, super, NULL);
3464 close(dfd);
3465 /* Now we need the device-local bits */
3466 for (sd = sra->devs ; sd ; sd = sd->next) {
3467 int rv;
3468
3469 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3470 dfd = dev_open(nm, O_RDWR);
3471 if (dfd < 0)
3472 return 2;
3473 rv = load_ddf_headers(dfd, super, NULL);
3474 if (rv == 0)
3475 rv = load_ddf_local(dfd, super, NULL, 1);
3476 if (rv)
3477 return 1;
3478 }
3479
3480 *sbp = super;
3481 if (st->ss == NULL) {
3482 st->ss = &super_ddf;
3483 st->minor_version = 0;
3484 st->max_devs = 512;
3485 }
3486 strcpy(st->container_devnm, fd2devnm(fd));
3487 return 0;
3488 }
3489
3490 static int load_container_ddf(struct supertype *st, int fd,
3491 char *devname)
3492 {
3493 return load_super_ddf_all(st, fd, &st->sb, devname);
3494 }
3495
3496 #endif /* MDASSEMBLE */
3497
3498 static int check_secondary(const struct vcl *vc)
3499 {
3500 const struct vd_config *conf = &vc->conf;
3501 int i;
3502
3503 /* The only DDF secondary RAID level md can support is
3504 * RAID 10, if the stripe sizes and Basic volume sizes
3505 * are all equal.
3506 * Other configurations could in theory be supported by exposing
3507 * the BVDs to user space and using device mapper for the secondary
3508 * mapping. So far we don't support that.
3509 */
3510
3511 __u64 sec_elements[4] = {0, 0, 0, 0};
3512 #define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3513 #define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3514
3515 if (vc->other_bvds == NULL) {
3516 pr_err("No BVDs for secondary RAID found\n");
3517 return -1;
3518 }
3519 if (conf->prl != DDF_RAID1) {
3520 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3521 return -1;
3522 }
3523 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3524 pr_err("Secondary RAID level %d is unsupported\n",
3525 conf->srl);
3526 return -1;
3527 }
3528 __set_sec_seen(conf->sec_elmnt_seq);
3529 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3530 const struct vd_config *bvd = vc->other_bvds[i];
3531 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
3532 continue;
3533 if (bvd->srl != conf->srl) {
3534 pr_err("Inconsistent secondary RAID level across BVDs\n");
3535 return -1;
3536 }
3537 if (bvd->prl != conf->prl) {
3538 pr_err("Different RAID levels for BVDs are unsupported\n");
3539 return -1;
3540 }
3541 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3542 pr_err("All BVDs must have the same number of primary elements\n");
3543 return -1;
3544 }
3545 if (bvd->chunk_shift != conf->chunk_shift) {
3546 pr_err("Different strip sizes for BVDs are unsupported\n");
3547 return -1;
3548 }
3549 if (bvd->array_blocks != conf->array_blocks) {
3550 pr_err("Different BVD sizes are unsupported\n");
3551 return -1;
3552 }
3553 __set_sec_seen(bvd->sec_elmnt_seq);
3554 }
3555 for (i = 0; i < conf->sec_elmnt_count; i++) {
3556 if (!__was_sec_seen(i)) {
3557 pr_err("BVD %d is missing\n", i);
3558 return -1;
3559 }
3560 }
3561 return 0;
3562 }
3563
3564 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
3565 __u32 refnum, unsigned int nmax,
3566 const struct vd_config **bvd,
3567 unsigned int *idx)
3568 {
3569 unsigned int i, j, n, sec, cnt;
3570
3571 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3572 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3573
3574 for (i = 0, j = 0 ; i < nmax ; i++) {
3575 /* j counts valid entries for this BVD */
3576 if (vc->conf.phys_refnum[i] != 0xffffffff)
3577 j++;
3578 if (vc->conf.phys_refnum[i] == refnum) {
3579 *bvd = &vc->conf;
3580 *idx = i;
3581 return sec * cnt + j - 1;
3582 }
3583 }
3584 if (vc->other_bvds == NULL)
3585 goto bad;
3586
3587 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3588 struct vd_config *vd = vc->other_bvds[n-1];
3589 sec = vd->sec_elmnt_seq;
3590 if (sec == DDF_UNUSED_BVD)
3591 continue;
3592 for (i = 0, j = 0 ; i < nmax ; i++) {
3593 if (vd->phys_refnum[i] != 0xffffffff)
3594 j++;
3595 if (vd->phys_refnum[i] == refnum) {
3596 *bvd = vd;
3597 *idx = i;
3598 return sec * cnt + j - 1;
3599 }
3600 }
3601 }
3602 bad:
3603 *bvd = NULL;
3604 return DDF_NOTFOUND;
3605 }
3606
3607 static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
3608 {
3609 /* Given a container loaded by load_super_ddf_all,
3610 * extract information about all the arrays into
3611 * an mdinfo tree.
3612 *
3613 * For each vcl in conflist: create an mdinfo, fill it in,
3614 * then look for matching devices (phys_refnum) in dlist
3615 * and create appropriate device mdinfo.
3616 */
3617 struct ddf_super *ddf = st->sb;
3618 struct mdinfo *rest = NULL;
3619 struct vcl *vc;
3620
3621 for (vc = ddf->conflist ; vc ; vc=vc->next)
3622 {
3623 unsigned int i;
3624 unsigned int j;
3625 struct mdinfo *this;
3626 char *ep;
3627 __u32 *cptr;
3628 unsigned int pd;
3629
3630 if (subarray &&
3631 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3632 *ep != '\0'))
3633 continue;
3634
3635 if (vc->conf.sec_elmnt_count > 1) {
3636 if (check_secondary(vc) != 0)
3637 continue;
3638 }
3639
3640 this = xcalloc(1, sizeof(*this));
3641 this->next = rest;
3642 rest = this;
3643
3644 if (layout_ddf2md(&vc->conf, &this->array))
3645 continue;
3646 this->array.md_minor = -1;
3647 this->array.major_version = -1;
3648 this->array.minor_version = -2;
3649 cptr = (__u32 *)(vc->conf.guid + 16);
3650 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
3651 this->array.utime = DECADE +
3652 __be32_to_cpu(vc->conf.timestamp);
3653 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3654
3655 i = vc->vcnum;
3656 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3657 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
3658 DDF_init_full) {
3659 this->array.state = 0;
3660 this->resync_start = 0;
3661 } else {
3662 this->array.state = 1;
3663 this->resync_start = MaxSector;
3664 }
3665 memcpy(this->name, ddf->virt->entries[i].name, 16);
3666 this->name[16]=0;
3667 for(j=0; j<16; j++)
3668 if (this->name[j] == ' ')
3669 this->name[j] = 0;
3670
3671 memset(this->uuid, 0, sizeof(this->uuid));
3672 this->component_size = __be64_to_cpu(vc->conf.blocks);
3673 this->array.size = this->component_size / 2;
3674 this->container_member = i;
3675
3676 ddf->currentconf = vc;
3677 uuid_from_super_ddf(st, this->uuid);
3678 if (!subarray)
3679 ddf->currentconf = NULL;
3680
3681 sprintf(this->text_version, "/%s/%d",
3682 st->container_devnm, this->container_member);
3683
3684 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
3685 struct mdinfo *dev;
3686 struct dl *d;
3687 const struct vd_config *bvd;
3688 unsigned int iphys;
3689 int stt;
3690
3691 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
3692 continue;
3693
3694 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
3695 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3696 != DDF_Online)
3697 continue;
3698
3699 i = get_pd_index_from_refnum(
3700 vc, ddf->phys->entries[pd].refnum,
3701 ddf->mppe, &bvd, &iphys);
3702 if (i == DDF_NOTFOUND)
3703 continue;
3704
3705 this->array.working_disks++;
3706
3707 for (d = ddf->dlist; d ; d=d->next)
3708 if (d->disk.refnum ==
3709 ddf->phys->entries[pd].refnum)
3710 break;
3711 if (d == NULL)
3712 /* Haven't found that one yet, maybe there are others */
3713 continue;
3714
3715 dev = xcalloc(1, sizeof(*dev));
3716 dev->next = this->devs;
3717 this->devs = dev;
3718
3719 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3720 dev->disk.major = d->major;
3721 dev->disk.minor = d->minor;
3722 dev->disk.raid_disk = i;
3723 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
3724 dev->recovery_start = MaxSector;
3725
3726 dev->events = __be32_to_cpu(ddf->primary.seq);
3727 dev->data_offset =
3728 __be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
3729 dev->component_size = __be64_to_cpu(bvd->blocks);
3730 if (d->devname)
3731 strcpy(dev->name, d->devname);
3732 }
3733 }
3734 return rest;
3735 }
3736
3737 static int store_super_ddf(struct supertype *st, int fd)
3738 {
3739 struct ddf_super *ddf = st->sb;
3740 unsigned long long dsize;
3741 void *buf;
3742 int rc;
3743
3744 if (!ddf)
3745 return 1;
3746
3747 if (!get_dev_size(fd, NULL, &dsize))
3748 return 1;
3749
3750 if (ddf->dlist || ddf->conflist) {
3751 struct stat sta;
3752 struct dl *dl;
3753 int ofd, ret;
3754
3755 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3756 pr_err("%s: file descriptor for invalid device\n",
3757 __func__);
3758 return 1;
3759 }
3760 for (dl = ddf->dlist; dl; dl = dl->next)
3761 if (dl->major == (int)major(sta.st_rdev) &&
3762 dl->minor == (int)minor(sta.st_rdev))
3763 break;
3764 if (!dl) {
3765 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3766 (int)major(sta.st_rdev),
3767 (int)minor(sta.st_rdev));
3768 return 1;
3769 }
3770 ofd = dl->fd;
3771 dl->fd = fd;
3772 ret = (_write_super_to_disk(ddf, dl) != 1);
3773 dl->fd = ofd;
3774 return ret;
3775 }
3776
3777 if (posix_memalign(&buf, 512, 512) != 0)
3778 return 1;
3779 memset(buf, 0, 512);
3780
3781 lseek64(fd, dsize-512, 0);
3782 rc = write(fd, buf, 512);
3783 free(buf);
3784 if (rc < 0)
3785 return 1;
3786 return 0;
3787 }
3788
3789 static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3790 {
3791 /*
3792 * return:
3793 * 0 same, or first was empty, and second was copied
3794 * 1 second had wrong number
3795 * 2 wrong uuid
3796 * 3 wrong other info
3797 */
3798 struct ddf_super *first = st->sb;
3799 struct ddf_super *second = tst->sb;
3800 struct dl *dl1, *dl2;
3801 struct vcl *vl1, *vl2;
3802 unsigned int max_vds, max_pds, pd, vd;
3803
3804 if (!first) {
3805 st->sb = tst->sb;
3806 tst->sb = NULL;
3807 return 0;
3808 }
3809
3810 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3811 return 2;
3812
3813 if (first->anchor.seq != second->anchor.seq) {
3814 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3815 __be32_to_cpu(first->anchor.seq),
3816 __be32_to_cpu(second->anchor.seq));
3817 return 3;
3818 }
3819 if (first->max_part != second->max_part ||
3820 first->phys->used_pdes != second->phys->used_pdes ||
3821 first->virt->populated_vdes != second->virt->populated_vdes) {
3822 dprintf("%s: PD/VD number mismatch\n", __func__);
3823 return 3;
3824 }
3825
3826 max_pds = __be16_to_cpu(first->phys->used_pdes);
3827 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3828 for (pd = 0; pd < max_pds; pd++)
3829 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3830 break;
3831 if (pd == max_pds) {
3832 dprintf("%s: no match for disk %08x\n", __func__,
3833 __be32_to_cpu(dl2->disk.refnum));
3834 return 3;
3835 }
3836 }
3837
3838 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3839 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3840 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3841 continue;
3842 for (vd = 0; vd < max_vds; vd++)
3843 if (!memcmp(first->virt->entries[vd].guid,
3844 vl2->conf.guid, DDF_GUID_LEN))
3845 break;
3846 if (vd == max_vds) {
3847 dprintf("%s: no match for VD config\n", __func__);
3848 return 3;
3849 }
3850 }
3851 /* FIXME should I look at anything else? */
3852
3853 /*
3854 At this point we are fairly sure that the meta data matches.
3855 But the new disk may contain additional local data.
3856 Add it to the super block.
3857 */
3858 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3859 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3860 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3861 DDF_GUID_LEN))
3862 break;
3863 if (vl1) {
3864 if (vl1->other_bvds != NULL &&
3865 vl1->conf.sec_elmnt_seq !=
3866 vl2->conf.sec_elmnt_seq) {
3867 dprintf("%s: adding BVD %u\n", __func__,
3868 vl2->conf.sec_elmnt_seq);
3869 add_other_bvd(vl1, &vl2->conf,
3870 first->conf_rec_len*512);
3871 }
3872 continue;
3873 }
3874
3875 if (posix_memalign((void **)&vl1, 512,
3876 (first->conf_rec_len*512 +
3877 offsetof(struct vcl, conf))) != 0) {
3878 pr_err("%s could not allocate vcl buf\n",
3879 __func__);
3880 return 3;
3881 }
3882
3883 vl1->next = first->conflist;
3884 vl1->block_sizes = NULL;
3885 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3886 if (alloc_other_bvds(first, vl1) != 0) {
3887 pr_err("%s could not allocate other bvds\n",
3888 __func__);
3889 free(vl1);
3890 return 3;
3891 }
3892 for (vd = 0; vd < max_vds; vd++)
3893 if (!memcmp(first->virt->entries[vd].guid,
3894 vl1->conf.guid, DDF_GUID_LEN))
3895 break;
3896 vl1->vcnum = vd;
3897 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3898 first->conflist = vl1;
3899 }
3900
3901 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3902 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3903 if (dl1->disk.refnum == dl2->disk.refnum)
3904 break;
3905 if (dl1)
3906 continue;
3907
3908 if (posix_memalign((void **)&dl1, 512,
3909 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3910 != 0) {
3911 pr_err("%s could not allocate disk info buffer\n",
3912 __func__);
3913 return 3;
3914 }
3915 memcpy(dl1, dl2, sizeof(*dl1));
3916 dl1->mdupdate = NULL;
3917 dl1->next = first->dlist;
3918 dl1->fd = -1;
3919 for (pd = 0; pd < max_pds; pd++)
3920 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3921 break;
3922 dl1->pdnum = pd;
3923 if (dl2->spare) {
3924 if (posix_memalign((void **)&dl1->spare, 512,
3925 first->conf_rec_len*512) != 0) {
3926 pr_err("%s could not allocate spare info buf\n",
3927 __func__);
3928 return 3;
3929 }
3930 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3931 }
3932 for (vd = 0 ; vd < first->max_part ; vd++) {
3933 if (!dl2->vlist[vd]) {
3934 dl1->vlist[vd] = NULL;
3935 continue;
3936 }
3937 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3938 if (!memcmp(vl1->conf.guid,
3939 dl2->vlist[vd]->conf.guid,
3940 DDF_GUID_LEN))
3941 break;
3942 dl1->vlist[vd] = vl1;
3943 }
3944 }
3945 first->dlist = dl1;
3946 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3947 __be32_to_cpu(dl1->disk.refnum));
3948 }
3949
3950 return 0;
3951 }
3952
3953 #ifndef MDASSEMBLE
3954 /*
3955 * A new array 'a' has been started which claims to be instance 'inst'
3956 * within container 'c'.
3957 * We need to confirm that the array matches the metadata in 'c' so
3958 * that we don't corrupt any metadata.
3959 */
3960 static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
3961 {
3962 struct ddf_super *ddf = c->sb;
3963 int n = atoi(inst);
3964 if (all_ff(ddf->virt->entries[n].guid)) {
3965 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
3966 return -ENODEV;
3967 }
3968 dprintf("ddf: open_new %d\n", n);
3969 a->info.container_member = n;
3970 return 0;
3971 }
3972
3973 /*
3974 * The array 'a' is to be marked clean in the metadata.
3975 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
3976 * clean up to the point (in sectors). If that cannot be recorded in the
3977 * metadata, then leave it as dirty.
3978 *
3979 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3980 * !global! virtual_disk.virtual_entry structure.
3981 */
3982 static int ddf_set_array_state(struct active_array *a, int consistent)
3983 {
3984 struct ddf_super *ddf = a->container->sb;
3985 int inst = a->info.container_member;
3986 int old = ddf->virt->entries[inst].state;
3987 if (consistent == 2) {
3988 /* Should check if a recovery should be started FIXME */
3989 consistent = 1;
3990 if (!is_resync_complete(&a->info))
3991 consistent = 0;
3992 }
3993 if (consistent)
3994 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3995 else
3996 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
3997 if (old != ddf->virt->entries[inst].state)
3998 ddf_set_updates_pending(ddf);
3999
4000 old = ddf->virt->entries[inst].init_state;
4001 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
4002 if (is_resync_complete(&a->info))
4003 ddf->virt->entries[inst].init_state |= DDF_init_full;
4004 else if (a->info.resync_start == 0)
4005 ddf->virt->entries[inst].init_state |= DDF_init_not;
4006 else
4007 ddf->virt->entries[inst].init_state |= DDF_init_quick;
4008 if (old != ddf->virt->entries[inst].init_state)
4009 ddf_set_updates_pending(ddf);
4010
4011 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
4012 guid_str(ddf->virt->entries[inst].guid), a->curr_state,
4013 consistent?"clean":"dirty",
4014 a->info.resync_start);
4015 return consistent;
4016 }
4017
4018 static int get_bvd_state(const struct ddf_super *ddf,
4019 const struct vd_config *vc)
4020 {
4021 unsigned int i, n_bvd, working = 0;
4022 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
4023 int pd, st, state;
4024 for (i = 0; i < n_prim; i++) {
4025 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
4026 continue;
4027 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4028 if (pd < 0)
4029 continue;
4030 st = __be16_to_cpu(ddf->phys->entries[pd].state);
4031 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
4032 == DDF_Online)
4033 working++;
4034 }
4035
4036 state = DDF_state_degraded;
4037 if (working == n_prim)
4038 state = DDF_state_optimal;
4039 else
4040 switch (vc->prl) {
4041 case DDF_RAID0:
4042 case DDF_CONCAT:
4043 case DDF_JBOD:
4044 state = DDF_state_failed;
4045 break;
4046 case DDF_RAID1:
4047 if (working == 0)
4048 state = DDF_state_failed;
4049 else if (working >= 2)
4050 state = DDF_state_part_optimal;
4051 break;
4052 case DDF_RAID4:
4053 case DDF_RAID5:
4054 if (working < n_prim - 1)
4055 state = DDF_state_failed;
4056 break;
4057 case DDF_RAID6:
4058 if (working < n_prim - 2)
4059 state = DDF_state_failed;
4060 else if (working == n_prim - 1)
4061 state = DDF_state_part_optimal;
4062 break;
4063 }
4064 return state;
4065 }
4066
4067 static int secondary_state(int state, int other, int seclevel)
4068 {
4069 if (state == DDF_state_optimal && other == DDF_state_optimal)
4070 return DDF_state_optimal;
4071 if (seclevel == DDF_2MIRRORED) {
4072 if (state == DDF_state_optimal || other == DDF_state_optimal)
4073 return DDF_state_part_optimal;
4074 if (state == DDF_state_failed && other == DDF_state_failed)
4075 return DDF_state_failed;
4076 return DDF_state_degraded;
4077 } else {
4078 if (state == DDF_state_failed || other == DDF_state_failed)
4079 return DDF_state_failed;
4080 if (state == DDF_state_degraded || other == DDF_state_degraded)
4081 return DDF_state_degraded;
4082 return DDF_state_part_optimal;
4083 }
4084 }
4085
4086 static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
4087 {
4088 int state = get_bvd_state(ddf, &vcl->conf);
4089 unsigned int i;
4090 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
4091 state = secondary_state(
4092 state,
4093 get_bvd_state(ddf, vcl->other_bvds[i-1]),
4094 vcl->conf.srl);
4095 }
4096 return state;
4097 }
4098
4099 /*
4100 * The state of each disk is stored in the global phys_disk structure
4101 * in phys_disk.entries[n].state.
4102 * This makes various combinations awkward.
4103 * - When a device fails in any array, it must be failed in all arrays
4104 * that include a part of this device.
4105 * - When a component is rebuilding, we cannot include it officially in the
4106 * array unless this is the only array that uses the device.
4107 *
4108 * So: when transitioning:
4109 * Online -> failed, just set failed flag. monitor will propagate
4110 * spare -> online, the device might need to be added to the array.
4111 * spare -> failed, just set failed. Don't worry if in array or not.
4112 */
4113 static void ddf_set_disk(struct active_array *a, int n, int state)
4114 {
4115 struct ddf_super *ddf = a->container->sb;
4116 unsigned int inst = a->info.container_member, n_bvd;
4117 struct vcl *vcl;
4118 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4119 &n_bvd, &vcl);
4120 int pd;
4121 struct mdinfo *mdi;
4122 struct dl *dl;
4123
4124 if (vc == NULL) {
4125 dprintf("ddf: cannot find instance %d!!\n", inst);
4126 return;
4127 }
4128 /* Find the matching slot in 'info'. */
4129 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4130 if (mdi->disk.raid_disk == n)
4131 break;
4132 if (!mdi)
4133 return;
4134
4135 /* and find the 'dl' entry corresponding to that. */
4136 for (dl = ddf->dlist; dl; dl = dl->next)
4137 if (mdi->state_fd >= 0 &&
4138 mdi->disk.major == dl->major &&
4139 mdi->disk.minor == dl->minor)
4140 break;
4141 if (!dl)
4142 return;
4143
4144 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4145 if (pd < 0 || pd != dl->pdnum) {
4146 /* disk doesn't currently exist or has changed.
4147 * If it is now in_sync, insert it. */
4148 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4149 __func__, dl->pdnum, dl->major, dl->minor,
4150 __be32_to_cpu(dl->disk.refnum));
4151 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4152 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
4153 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
4154 pd = dl->pdnum; /* FIXME: is this really correct ? */
4155 vc->phys_refnum[n_bvd] = dl->disk.refnum;
4156 LBA_OFFSET(ddf, vc)[n_bvd] =
4157 __cpu_to_be64(mdi->data_offset);
4158 ddf->phys->entries[pd].type &=
4159 ~__cpu_to_be16(DDF_Global_Spare);
4160 ddf->phys->entries[pd].type |=
4161 __cpu_to_be16(DDF_Active_in_VD);
4162 ddf_set_updates_pending(ddf);
4163 }
4164 } else {
4165 int old = ddf->phys->entries[pd].state;
4166 if (state & DS_FAULTY)
4167 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4168 if (state & DS_INSYNC) {
4169 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4170 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4171 }
4172 if (old != ddf->phys->entries[pd].state)
4173 ddf_set_updates_pending(ddf);
4174 }
4175
4176 dprintf("ddf: set_disk %d to %x\n", n, state);
4177
4178 /* Now we need to check the state of the array and update
4179 * virtual_disk.entries[n].state.
4180 * It needs to be one of "optimal", "degraded", "failed".
4181 * I don't understand 'deleted' or 'missing'.
4182 */
4183 state = get_svd_state(ddf, vcl);
4184
4185 if (ddf->virt->entries[inst].state !=
4186 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4187 | state)) {
4188
4189 ddf->virt->entries[inst].state =
4190 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4191 | state;
4192 ddf_set_updates_pending(ddf);
4193 }
4194
4195 }
4196
4197 static void ddf_sync_metadata(struct supertype *st)
4198 {
4199
4200 /*
4201 * Write all data to all devices.
4202 * Later, we might be able to track whether only local changes
4203 * have been made, or whether any global data has been changed,
4204 * but ddf is sufficiently weird that it probably always
4205 * changes global data ....
4206 */
4207 struct ddf_super *ddf = st->sb;
4208 if (!ddf->updates_pending)
4209 return;
4210 ddf->updates_pending = 0;
4211 __write_init_super_ddf(st);
4212 dprintf("ddf: sync_metadata\n");
4213 }
4214
4215 static int del_from_conflist(struct vcl **list, const char *guid)
4216 {
4217 struct vcl **p;
4218 int found = 0;
4219 for (p = list; p && *p; p = &((*p)->next))
4220 if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
4221 found = 1;
4222 *p = (*p)->next;
4223 }
4224 return found;
4225 }
4226
4227 static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
4228 {
4229 struct dl *dl;
4230 unsigned int vdnum, i;
4231 vdnum = find_vde_by_guid(ddf, guid);
4232 if (vdnum == DDF_NOTFOUND) {
4233 pr_err("%s: could not find VD %s\n", __func__,
4234 guid_str(guid));
4235 return -1;
4236 }
4237 if (del_from_conflist(&ddf->conflist, guid) == 0) {
4238 pr_err("%s: could not find conf %s\n", __func__,
4239 guid_str(guid));
4240 return -1;
4241 }
4242 for (dl = ddf->dlist; dl; dl = dl->next)
4243 for (i = 0; i < ddf->max_part; i++)
4244 if (dl->vlist[i] != NULL &&
4245 !memcmp(dl->vlist[i]->conf.guid, guid,
4246 DDF_GUID_LEN))
4247 dl->vlist[i] = NULL;
4248 memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
4249 dprintf("%s: deleted %s\n", __func__, guid_str(guid));
4250 return 0;
4251 }
4252
4253 static int kill_subarray_ddf(struct supertype *st)
4254 {
4255 struct ddf_super *ddf = st->sb;
4256 /*
4257 * currentconf is set in container_content_ddf,
4258 * called with subarray arg
4259 */
4260 struct vcl *victim = ddf->currentconf;
4261 struct vd_config *conf;
4262 ddf->currentconf = NULL;
4263 unsigned int vdnum;
4264 if (!victim) {
4265 pr_err("%s: nothing to kill\n", __func__);
4266 return -1;
4267 }
4268 conf = &victim->conf;
4269 vdnum = find_vde_by_guid(ddf, conf->guid);
4270 if (vdnum == DDF_NOTFOUND) {
4271 pr_err("%s: could not find VD %s\n", __func__,
4272 guid_str(conf->guid));
4273 return -1;
4274 }
4275 if (st->update_tail) {
4276 struct virtual_disk *vd;
4277 int len = sizeof(struct virtual_disk)
4278 + sizeof(struct virtual_entry);
4279 vd = xmalloc(len);
4280 if (vd == NULL) {
4281 pr_err("%s: failed to allocate %d bytes\n", __func__,
4282 len);
4283 return -1;
4284 }
4285 memset(vd, 0 , len);
4286 vd->magic = DDF_VIRT_RECORDS_MAGIC;
4287 vd->populated_vdes = 0;
4288 memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
4289 /* we use DDF_state_deleted as marker */
4290 vd->entries[0].state = DDF_state_deleted;
4291 append_metadata_update(st, vd, len);
4292 } else {
4293 _kill_subarray_ddf(ddf, conf->guid);
4294 ddf_set_updates_pending(ddf);
4295 ddf_sync_metadata(st);
4296 }
4297 return 0;
4298 }
4299
4300 static void copy_matching_bvd(struct ddf_super *ddf,
4301 struct vd_config *conf,
4302 const struct metadata_update *update)
4303 {
4304 unsigned int mppe =
4305 __be16_to_cpu(ddf->anchor.max_primary_element_entries);
4306 unsigned int len = ddf->conf_rec_len * 512;
4307 char *p;
4308 struct vd_config *vc;
4309 for (p = update->buf; p < update->buf + update->len; p += len) {
4310 vc = (struct vd_config *) p;
4311 if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
4312 memcpy(conf->phys_refnum, vc->phys_refnum,
4313 mppe * (sizeof(__u32) + sizeof(__u64)));
4314 return;
4315 }
4316 }
4317 pr_err("%s: no match for BVD %d of %s in update\n", __func__,
4318 conf->sec_elmnt_seq, guid_str(conf->guid));
4319 }
4320
4321 static void ddf_process_update(struct supertype *st,
4322 struct metadata_update *update)
4323 {
4324 /* Apply this update to the metadata.
4325 * The first 4 bytes are a DDF_*_MAGIC which guides
4326 * our actions.
4327 * Possible update are:
4328 * DDF_PHYS_RECORDS_MAGIC
4329 * Add a new physical device or remove an old one.
4330 * Changes to this record only happen implicitly.
4331 * used_pdes is the device number.
4332 * DDF_VIRT_RECORDS_MAGIC
4333 * Add a new VD. Possibly also change the 'access' bits.
4334 * populated_vdes is the entry number.
4335 * DDF_VD_CONF_MAGIC
4336 * New or updated VD. the VIRT_RECORD must already
4337 * exist. For an update, phys_refnum and lba_offset
4338 * (at least) are updated, and the VD_CONF must
4339 * be written to precisely those devices listed with
4340 * a phys_refnum.
4341 * DDF_SPARE_ASSIGN_MAGIC
4342 * replacement Spare Assignment Record... but for which device?
4343 *
4344 * So, e.g.:
4345 * - to create a new array, we send a VIRT_RECORD and
4346 * a VD_CONF. Then assemble and start the array.
4347 * - to activate a spare we send a VD_CONF to add the phys_refnum
4348 * and offset. This will also mark the spare as active with
4349 * a spare-assignment record.
4350 */
4351 struct ddf_super *ddf = st->sb;
4352 __u32 *magic = (__u32*)update->buf;
4353 struct phys_disk *pd;
4354 struct virtual_disk *vd;
4355 struct vd_config *vc;
4356 struct vcl *vcl;
4357 struct dl *dl;
4358 unsigned int ent;
4359 unsigned int pdnum, pd2, len;
4360
4361 dprintf("Process update %x\n", *magic);
4362
4363 switch (*magic) {
4364 case DDF_PHYS_RECORDS_MAGIC:
4365
4366 if (update->len != (sizeof(struct phys_disk) +
4367 sizeof(struct phys_disk_entry)))
4368 return;
4369 pd = (struct phys_disk*)update->buf;
4370
4371 ent = __be16_to_cpu(pd->used_pdes);
4372 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4373 return;
4374 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4375 struct dl **dlp;
4376 /* removing this disk. */
4377 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4378 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4379 struct dl *dl = *dlp;
4380 if (dl->pdnum == (signed)ent) {
4381 close(dl->fd);
4382 dl->fd = -1;
4383 /* FIXME this doesn't free
4384 * dl->devname */
4385 update->space = dl;
4386 *dlp = dl->next;
4387 break;
4388 }
4389 }
4390 ddf_set_updates_pending(ddf);
4391 return;
4392 }
4393 if (!all_ff(ddf->phys->entries[ent].guid))
4394 return;
4395 ddf->phys->entries[ent] = pd->entries[0];
4396 ddf->phys->used_pdes = __cpu_to_be16(1 +
4397 __be16_to_cpu(ddf->phys->used_pdes));
4398 ddf_set_updates_pending(ddf);
4399 if (ddf->add_list) {
4400 struct active_array *a;
4401 struct dl *al = ddf->add_list;
4402 ddf->add_list = al->next;
4403
4404 al->next = ddf->dlist;
4405 ddf->dlist = al;
4406
4407 /* As a device has been added, we should check
4408 * for any degraded devices that might make
4409 * use of this spare */
4410 for (a = st->arrays ; a; a=a->next)
4411 a->check_degraded = 1;
4412 }
4413 break;
4414
4415 case DDF_VIRT_RECORDS_MAGIC:
4416
4417 if (update->len != (sizeof(struct virtual_disk) +
4418 sizeof(struct virtual_entry)))
4419 return;
4420 vd = (struct virtual_disk*)update->buf;
4421
4422 if (vd->entries[0].state == DDF_state_deleted) {
4423 if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
4424 return;
4425 } else {
4426
4427 ent = find_vde_by_guid(ddf, vd->entries[0].guid);
4428 if (ent != DDF_NOTFOUND) {
4429 dprintf("%s: VD %s exists already in slot %d\n",
4430 __func__, guid_str(vd->entries[0].guid),
4431 ent);
4432 return;
4433 }
4434 ent = find_unused_vde(ddf);
4435 if (ent == DDF_NOTFOUND)
4436 return;
4437 ddf->virt->entries[ent] = vd->entries[0];
4438 ddf->virt->populated_vdes =
4439 __cpu_to_be16(
4440 1 + __be16_to_cpu(
4441 ddf->virt->populated_vdes));
4442 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4443 __func__, guid_str(vd->entries[0].guid), ent,
4444 ddf->virt->entries[ent].state,
4445 ddf->virt->entries[ent].init_state);
4446 }
4447 ddf_set_updates_pending(ddf);
4448 break;
4449
4450 case DDF_VD_CONF_MAGIC:
4451 vc = (struct vd_config*)update->buf;
4452 len = ddf->conf_rec_len * 512;
4453 if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
4454 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4455 __func__, guid_str(vc->guid), update->len,
4456 vc->sec_elmnt_count);
4457 return;
4458 }
4459 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4460 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4461 break;
4462 dprintf("%s: conf update for %s (%s)\n", __func__,
4463 guid_str(vc->guid), (vcl ? "old" : "new"));
4464 if (vcl) {
4465 /* An update, just copy the phys_refnum and lba_offset
4466 * fields
4467 */
4468 unsigned int i;
4469 copy_matching_bvd(ddf, &vcl->conf, update);
4470 for (i = 1; i < vc->sec_elmnt_count; i++)
4471 copy_matching_bvd(ddf, vcl->other_bvds[i-1],
4472 update);
4473 } else {
4474 /* A new VD_CONF */
4475 unsigned int i;
4476 if (!update->space)
4477 return;
4478 vcl = update->space;
4479 update->space = NULL;
4480 vcl->next = ddf->conflist;
4481 memcpy(&vcl->conf, vc, len);
4482 ent = find_vde_by_guid(ddf, vc->guid);
4483 if (ent == DDF_NOTFOUND)
4484 return;
4485 vcl->vcnum = ent;
4486 ddf->conflist = vcl;
4487 for (i = 1; i < vc->sec_elmnt_count; i++)
4488 memcpy(vcl->other_bvds[i-1],
4489 update->buf + len * i, len);
4490 }
4491 /* Set DDF_Transition on all Failed devices - to help
4492 * us detect those that are no longer in use
4493 */
4494 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4495 if (ddf->phys->entries[pdnum].state
4496 & __be16_to_cpu(DDF_Failed))
4497 ddf->phys->entries[pdnum].state
4498 |= __be16_to_cpu(DDF_Transition);
4499 /* Now make sure vlist is correct for each dl. */
4500 for (dl = ddf->dlist; dl; dl = dl->next) {
4501 unsigned int vn = 0;
4502 int in_degraded = 0;
4503 for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
4504 unsigned int dn, ibvd;
4505 const struct vd_config *conf;
4506 int vstate;
4507 dn = get_pd_index_from_refnum(vcl,
4508 dl->disk.refnum,
4509 ddf->mppe,
4510 &conf, &ibvd);
4511 if (dn == DDF_NOTFOUND)
4512 continue;
4513 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
4514 dl->pdnum,
4515 __be32_to_cpu(dl->disk.refnum),
4516 guid_str(conf->guid),
4517 conf->sec_elmnt_seq, vn);
4518 /* Clear the Transition flag */
4519 if (ddf->phys->entries[dl->pdnum].state
4520 & __be16_to_cpu(DDF_Failed))
4521 ddf->phys->entries[dl->pdnum].state &=
4522 ~__be16_to_cpu(DDF_Transition);
4523 dl->vlist[vn++] = vcl;
4524 vstate = ddf->virt->entries[vcl->vcnum].state
4525 & DDF_state_mask;
4526 if (vstate == DDF_state_degraded ||
4527 vstate == DDF_state_part_optimal)
4528 in_degraded = 1;
4529 }
4530 while (vn < ddf->max_part)
4531 dl->vlist[vn++] = NULL;
4532 if (dl->vlist[0]) {
4533 ddf->phys->entries[dl->pdnum].type &=
4534 ~__cpu_to_be16(DDF_Global_Spare);
4535 if (!(ddf->phys->entries[dl->pdnum].type &
4536 __cpu_to_be16(DDF_Active_in_VD))) {
4537 ddf->phys->entries[dl->pdnum].type |=
4538 __cpu_to_be16(DDF_Active_in_VD);
4539 if (in_degraded)
4540 ddf->phys->entries[dl->pdnum].state |=
4541 __cpu_to_be16(DDF_Rebuilding);
4542 }
4543 }
4544 if (dl->spare) {
4545 ddf->phys->entries[dl->pdnum].type &=
4546 ~__cpu_to_be16(DDF_Global_Spare);
4547 ddf->phys->entries[dl->pdnum].type |=
4548 __cpu_to_be16(DDF_Spare);
4549 }
4550 if (!dl->vlist[0] && !dl->spare) {
4551 ddf->phys->entries[dl->pdnum].type |=
4552 __cpu_to_be16(DDF_Global_Spare);
4553 ddf->phys->entries[dl->pdnum].type &=
4554 ~__cpu_to_be16(DDF_Spare |
4555 DDF_Active_in_VD);
4556 }
4557 }
4558
4559 /* Now remove any 'Failed' devices that are not part
4560 * of any VD. They will have the Transition flag set.
4561 * Once done, we need to update all dl->pdnum numbers.
4562 */
4563 pd2 = 0;
4564 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4565 if ((ddf->phys->entries[pdnum].state
4566 & __be16_to_cpu(DDF_Failed))
4567 && (ddf->phys->entries[pdnum].state
4568 & __be16_to_cpu(DDF_Transition)))
4569 /* skip this one */;
4570 else if (pdnum == pd2)
4571 pd2++;
4572 else {
4573 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4574 for (dl = ddf->dlist; dl; dl = dl->next)
4575 if (dl->pdnum == (int)pdnum)
4576 dl->pdnum = pd2;
4577 pd2++;
4578 }
4579 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4580 while (pd2 < pdnum) {
4581 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4582 pd2++;
4583 }
4584
4585 ddf_set_updates_pending(ddf);
4586 break;
4587 case DDF_SPARE_ASSIGN_MAGIC:
4588 default: break;
4589 }
4590 }
4591
4592 static void ddf_prepare_update(struct supertype *st,
4593 struct metadata_update *update)
4594 {
4595 /* This update arrived at managemon.
4596 * We are about to pass it to monitor.
4597 * If a malloc is needed, do it here.
4598 */
4599 struct ddf_super *ddf = st->sb;
4600 __u32 *magic = (__u32*)update->buf;
4601 if (*magic == DDF_VD_CONF_MAGIC) {
4602 struct vcl *vcl;
4603 struct vd_config *conf = (struct vd_config *) update->buf;
4604 if (posix_memalign(&update->space, 512,
4605 offsetof(struct vcl, conf)
4606 + ddf->conf_rec_len * 512) != 0) {
4607 update->space = NULL;
4608 return;
4609 }
4610 vcl = update->space;
4611 vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
4612 if (alloc_other_bvds(ddf, vcl) != 0) {
4613 free(update->space);
4614 update->space = NULL;
4615 }
4616 }
4617 }
4618
4619 /*
4620 * Check if the array 'a' is degraded but not failed.
4621 * If it is, find as many spares as are available and needed and
4622 * arrange for their inclusion.
4623 * We only choose devices which are not already in the array,
4624 * and prefer those with a spare-assignment to this array.
4625 * otherwise we choose global spares - assuming always that
4626 * there is enough room.
4627 * For each spare that we assign, we return an 'mdinfo' which
4628 * describes the position for the device in the array.
4629 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4630 * the new phys_refnum and lba_offset values.
4631 *
4632 * Only worry about BVDs at the moment.
4633 */
4634 static struct mdinfo *ddf_activate_spare(struct active_array *a,
4635 struct metadata_update **updates)
4636 {
4637 int working = 0;
4638 struct mdinfo *d;
4639 struct ddf_super *ddf = a->container->sb;
4640 int global_ok = 0;
4641 struct mdinfo *rv = NULL;
4642 struct mdinfo *di;
4643 struct metadata_update *mu;
4644 struct dl *dl;
4645 int i;
4646 struct vcl *vcl;
4647 struct vd_config *vc;
4648 unsigned int n_bvd;
4649
4650 for (d = a->info.devs ; d ; d = d->next) {
4651 if ((d->curr_state & DS_FAULTY) &&
4652 d->state_fd >= 0)
4653 /* wait for Removal to happen */
4654 return NULL;
4655 if (d->state_fd >= 0)
4656 working ++;
4657 }
4658
4659 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4660 a->info.array.level);
4661 if (working == a->info.array.raid_disks)
4662 return NULL; /* array not degraded */
4663 switch (a->info.array.level) {
4664 case 1:
4665 if (working == 0)
4666 return NULL; /* failed */
4667 break;
4668 case 4:
4669 case 5:
4670 if (working < a->info.array.raid_disks - 1)
4671 return NULL; /* failed */
4672 break;
4673 case 6:
4674 if (working < a->info.array.raid_disks - 2)
4675 return NULL; /* failed */
4676 break;
4677 default: /* concat or stripe */
4678 return NULL; /* failed */
4679 }
4680
4681 /* For each slot, if it is not working, find a spare */
4682 dl = ddf->dlist;
4683 for (i = 0; i < a->info.array.raid_disks; i++) {
4684 for (d = a->info.devs ; d ; d = d->next)
4685 if (d->disk.raid_disk == i)
4686 break;
4687 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
4688 if (d && (d->state_fd >= 0))
4689 continue;
4690
4691 /* OK, this device needs recovery. Find a spare */
4692 again:
4693 for ( ; dl ; dl = dl->next) {
4694 unsigned long long esize;
4695 unsigned long long pos;
4696 struct mdinfo *d2;
4697 int is_global = 0;
4698 int is_dedicated = 0;
4699 struct extent *ex;
4700 unsigned int j;
4701 /* If in this array, skip */
4702 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
4703 if (d2->state_fd >= 0 &&
4704 d2->disk.major == dl->major &&
4705 d2->disk.minor == dl->minor) {
4706 dprintf("%x:%x already in array\n", dl->major, dl->minor);
4707 break;
4708 }
4709 if (d2)
4710 continue;
4711 if (ddf->phys->entries[dl->pdnum].type &
4712 __cpu_to_be16(DDF_Spare)) {
4713 /* Check spare assign record */
4714 if (dl->spare) {
4715 if (dl->spare->type & DDF_spare_dedicated) {
4716 /* check spare_ents for guid */
4717 for (j = 0 ;
4718 j < __be16_to_cpu(dl->spare->populated);
4719 j++) {
4720 if (memcmp(dl->spare->spare_ents[j].guid,
4721 ddf->virt->entries[a->info.container_member].guid,
4722 DDF_GUID_LEN) == 0)
4723 is_dedicated = 1;
4724 }
4725 } else
4726 is_global = 1;
4727 }
4728 } else if (ddf->phys->entries[dl->pdnum].type &
4729 __cpu_to_be16(DDF_Global_Spare)) {
4730 is_global = 1;
4731 } else if (!(ddf->phys->entries[dl->pdnum].state &
4732 __cpu_to_be16(DDF_Failed))) {
4733 /* we can possibly use some of this */
4734 is_global = 1;
4735 }
4736 if ( ! (is_dedicated ||
4737 (is_global && global_ok))) {
4738 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
4739 is_dedicated, is_global);
4740 continue;
4741 }
4742
4743 /* We are allowed to use this device - is there space?
4744 * We need a->info.component_size sectors */
4745 ex = get_extents(ddf, dl);
4746 if (!ex) {
4747 dprintf("cannot get extents\n");
4748 continue;
4749 }
4750 j = 0; pos = 0;
4751 esize = 0;
4752
4753 do {
4754 esize = ex[j].start - pos;
4755 if (esize >= a->info.component_size)
4756 break;
4757 pos = ex[j].start + ex[j].size;
4758 j++;
4759 } while (ex[j-1].size);
4760
4761 free(ex);
4762 if (esize < a->info.component_size) {
4763 dprintf("%x:%x has no room: %llu %llu\n",
4764 dl->major, dl->minor,
4765 esize, a->info.component_size);
4766 /* No room */
4767 continue;
4768 }
4769
4770 /* Cool, we have a device with some space at pos */
4771 di = xcalloc(1, sizeof(*di));
4772 di->disk.number = i;
4773 di->disk.raid_disk = i;
4774 di->disk.major = dl->major;
4775 di->disk.minor = dl->minor;
4776 di->disk.state = 0;
4777 di->recovery_start = 0;
4778 di->data_offset = pos;
4779 di->component_size = a->info.component_size;
4780 di->container_member = dl->pdnum;
4781 di->next = rv;
4782 rv = di;
4783 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4784 i, pos);
4785
4786 break;
4787 }
4788 if (!dl && ! global_ok) {
4789 /* not enough dedicated spares, try global */
4790 global_ok = 1;
4791 dl = ddf->dlist;
4792 goto again;
4793 }
4794 }
4795
4796 if (!rv)
4797 /* No spares found */
4798 return rv;
4799 /* Now 'rv' has a list of devices to return.
4800 * Create a metadata_update record to update the
4801 * phys_refnum and lba_offset values
4802 */
4803 mu = xmalloc(sizeof(*mu));
4804 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
4805 free(mu);
4806 mu = NULL;
4807 }
4808 mu->buf = xmalloc(ddf->conf_rec_len * 512);
4809 mu->len = ddf->conf_rec_len * 512;
4810 mu->space = NULL;
4811 mu->space_list = NULL;
4812 mu->next = *updates;
4813 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4814 &n_bvd, &vcl);
4815 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4816
4817 vc = (struct vd_config*)mu->buf;
4818 for (di = rv ; di ; di = di->next) {
4819 vc->phys_refnum[di->disk.raid_disk] =
4820 ddf->phys->entries[dl->pdnum].refnum;
4821 LBA_OFFSET(ddf, vc)[di->disk.raid_disk]
4822 = __cpu_to_be64(di->data_offset);
4823 }
4824 *updates = mu;
4825 return rv;
4826 }
4827 #endif /* MDASSEMBLE */
4828
4829 static int ddf_level_to_layout(int level)
4830 {
4831 switch(level) {
4832 case 0:
4833 case 1:
4834 return 0;
4835 case 5:
4836 return ALGORITHM_LEFT_SYMMETRIC;
4837 case 6:
4838 return ALGORITHM_ROTATING_N_CONTINUE;
4839 case 10:
4840 return 0x102;
4841 default:
4842 return UnSet;
4843 }
4844 }
4845
4846 static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4847 {
4848 if (level && *level == UnSet)
4849 *level = LEVEL_CONTAINER;
4850
4851 if (level && layout && *layout == UnSet)
4852 *layout = ddf_level_to_layout(*level);
4853 }
4854
4855 struct superswitch super_ddf = {
4856 #ifndef MDASSEMBLE
4857 .examine_super = examine_super_ddf,
4858 .brief_examine_super = brief_examine_super_ddf,
4859 .brief_examine_subarrays = brief_examine_subarrays_ddf,
4860 .export_examine_super = export_examine_super_ddf,
4861 .detail_super = detail_super_ddf,
4862 .brief_detail_super = brief_detail_super_ddf,
4863 .validate_geometry = validate_geometry_ddf,
4864 .write_init_super = write_init_super_ddf,
4865 .add_to_super = add_to_super_ddf,
4866 .remove_from_super = remove_from_super_ddf,
4867 .load_container = load_container_ddf,
4868 .copy_metadata = copy_metadata_ddf,
4869 #endif
4870 .match_home = match_home_ddf,
4871 .uuid_from_super= uuid_from_super_ddf,
4872 .getinfo_super = getinfo_super_ddf,
4873 .update_super = update_super_ddf,
4874
4875 .avail_size = avail_size_ddf,
4876
4877 .compare_super = compare_super_ddf,
4878
4879 .load_super = load_super_ddf,
4880 .init_super = init_super_ddf,
4881 .store_super = store_super_ddf,
4882 .free_super = free_super_ddf,
4883 .match_metadata_desc = match_metadata_desc_ddf,
4884 .container_content = container_content_ddf,
4885 .default_geometry = default_geometry_ddf,
4886 .kill_subarray = kill_subarray_ddf,
4887
4888 .external = 1,
4889
4890 #ifndef MDASSEMBLE
4891 /* for mdmon */
4892 .open_new = ddf_open_new,
4893 .set_array_state= ddf_set_array_state,
4894 .set_disk = ddf_set_disk,
4895 .sync_metadata = ddf_sync_metadata,
4896 .process_update = ddf_process_update,
4897 .prepare_update = ddf_prepare_update,
4898 .activate_spare = ddf_activate_spare,
4899 #endif
4900 .name = "ddf",
4901 };