2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neil@brown.name>
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
28 #define HAVE_STDINT_H 1
34 /* a non-official T10 name for creation GUIDs */
35 static char T10
[] = "Linux-MD";
37 /* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
41 #define DECADE (3600*24*(365*10+2))
44 const unsigned char *buf
,
47 #define DDF_NOTFOUND (~0U)
48 #define DDF_CONTAINER (DDF_NOTFOUND-1)
50 /* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
61 /* Primary Raid Level (PRL) */
62 #define DDF_RAID0 0x00
63 #define DDF_RAID1 0x01
64 #define DDF_RAID3 0x03
65 #define DDF_RAID4 0x04
66 #define DDF_RAID5 0x05
67 #define DDF_RAID1E 0x11
69 #define DDF_CONCAT 0x1f
70 #define DDF_RAID5E 0x15
71 #define DDF_RAID5EE 0x25
72 #define DDF_RAID6 0x06
74 /* Raid Level Qualifier (RLQ) */
75 #define DDF_RAID0_SIMPLE 0x00
76 #define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77 #define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78 #define DDF_RAID3_0 0x00 /* parity in first extent */
79 #define DDF_RAID3_N 0x01 /* parity in last extent */
80 #define DDF_RAID4_0 0x00 /* parity in first extent */
81 #define DDF_RAID4_N 0x01 /* parity in last extent */
82 /* these apply to raid5e and raid5ee as well */
83 #define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
84 #define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
85 #define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86 #define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
88 #define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89 #define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
91 /* Secondary RAID Level (SRL) */
92 #define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93 #define DDF_2MIRRORED 0x01
94 #define DDF_2CONCAT 0x02
95 #define DDF_2SPANNED 0x03 /* This is also weird - be careful */
98 #define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99 #define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100 #define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101 #define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102 #define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103 #define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104 #define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105 #define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106 #define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107 #define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
109 #define DDF_GUID_LEN 24
110 #define DDF_REVISION_0 "01.00.00"
111 #define DDF_REVISION_2 "01.02.00"
114 __u32 magic
; /* DDF_HEADER_MAGIC */
116 char guid
[DDF_GUID_LEN
];
117 char revision
[8]; /* 01.02.00 */
118 __u32 seq
; /* starts at '1' */
123 __u8 pad0
; /* 0xff */
124 __u8 pad1
[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext
[32]; /* reserved: fill with 0xff */
130 __u8 pad2
[3]; /* 0xff */
131 __u32 workspace_len
; /* sectors for vendor space -
132 * at least 32768(sectors) */
134 __u16 max_pd_entries
; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries
; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions
; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len
; /* 1 +ROUNDUP(max_primary_element_entries
140 __u16 max_primary_element_entries
; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3
[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset
;
144 __u32 controller_section_length
;
145 __u32 phys_section_offset
;
146 __u32 phys_section_length
;
147 __u32 virt_section_offset
;
148 __u32 virt_section_length
;
149 __u32 config_section_offset
;
150 __u32 config_section_length
;
151 __u32 data_section_offset
;
152 __u32 data_section_length
;
153 __u32 bbm_section_offset
;
154 __u32 bbm_section_length
;
155 __u32 diag_space_offset
;
156 __u32 diag_space_length
;
159 /* 256 bytes so far */
160 __u8 pad4
[256]; /* 0xff */
164 #define DDF_HEADER_ANCHOR 0x00
165 #define DDF_HEADER_PRIMARY 0x01
166 #define DDF_HEADER_SECONDARY 0x02
168 /* The content of the 'controller section' - global scope */
169 struct ddf_controller_data
{
170 __u32 magic
; /* DDF_CONTROLLER_MAGIC */
172 char guid
[DDF_GUID_LEN
];
173 struct controller_type
{
180 __u8 pad
[8]; /* 0xff */
181 __u8 vendor_data
[448];
184 /* The content of phys_section - global scope */
186 __u32 magic
; /* DDF_PHYS_RECORDS_MAGIC */
191 struct phys_disk_entry
{
192 char guid
[DDF_GUID_LEN
];
196 __u64 config_size
; /* DDF structures must be after here */
197 char path
[18]; /* another horrible structure really */
202 /* phys_disk_entry.type is a bitmap - bigendian remember */
203 #define DDF_Forced_PD_GUID 1
204 #define DDF_Active_in_VD 2
205 #define DDF_Global_Spare 4 /* VD_CONF records are ignored */
206 #define DDF_Spare 8 /* overrides Global_spare */
207 #define DDF_Foreign 16
208 #define DDF_Legacy 32 /* no DDF on this device */
210 #define DDF_Interface_mask 0xf00
211 #define DDF_Interface_SCSI 0x100
212 #define DDF_Interface_SAS 0x200
213 #define DDF_Interface_SATA 0x300
214 #define DDF_Interface_FC 0x400
216 /* phys_disk_entry.state is a bigendian bitmap */
218 #define DDF_Failed 2 /* overrides 1,4,8 */
219 #define DDF_Rebuilding 4
220 #define DDF_Transition 8
222 #define DDF_ReadErrors 32
223 #define DDF_Missing 64
225 /* The content of the virt_section global scope */
226 struct virtual_disk
{
227 __u32 magic
; /* DDF_VIRT_RECORDS_MAGIC */
229 __u16 populated_vdes
;
232 struct virtual_entry
{
233 char guid
[DDF_GUID_LEN
];
235 __u16 pad0
; /* 0xffff */
245 /* virtual_entry.type is a bitmap - bigendian */
247 #define DDF_Enforce_Groups 2
248 #define DDF_Unicode 4
249 #define DDF_Owner_Valid 8
251 /* virtual_entry.state is a bigendian bitmap */
252 #define DDF_state_mask 0x7
253 #define DDF_state_optimal 0x0
254 #define DDF_state_degraded 0x1
255 #define DDF_state_deleted 0x2
256 #define DDF_state_missing 0x3
257 #define DDF_state_failed 0x4
258 #define DDF_state_part_optimal 0x5
260 #define DDF_state_morphing 0x8
261 #define DDF_state_inconsistent 0x10
263 /* virtual_entry.init_state is a bigendian bitmap */
264 #define DDF_initstate_mask 0x03
265 #define DDF_init_not 0x00
266 #define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
268 #define DDF_init_full 0x02
270 #define DDF_access_mask 0xc0
271 #define DDF_access_rw 0x00
272 #define DDF_access_ro 0x80
273 #define DDF_access_blocked 0xc0
275 /* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
281 __u32 magic
; /* DDF_VD_CONF_MAGIC */
283 char guid
[DDF_GUID_LEN
];
287 __u16 prim_elmnt_count
;
288 __u8 chunk_shift
; /* 0 == 512, 1==1024 etc */
291 __u8 sec_elmnt_count
;
294 __u64 blocks
; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks
; /* blocks in array */
305 __u8 v0
[32]; /* reserved- 0xff */
306 __u8 v1
[32]; /* reserved- 0xff */
307 __u8 v2
[16]; /* reserved- 0xff */
308 __u8 v3
[16]; /* reserved- 0xff */
310 __u32 phys_refnum
[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
314 #define LBA_OFFSET(ddf, vd) ((__u64 *) &(vd)->phys_refnum[(ddf)->mppe])
316 /* vd_config.cache_pol[7] is a bitmap */
317 #define DDF_cache_writeback 1 /* else writethrough */
318 #define DDF_cache_wadaptive 2 /* only applies if writeback */
319 #define DDF_cache_readahead 4
320 #define DDF_cache_radaptive 8 /* only if doing read-ahead */
321 #define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
322 #define DDF_cache_wallowed 32 /* enable write caching */
323 #define DDF_cache_rallowed 64 /* enable read caching */
325 struct spare_assign
{
326 __u32 magic
; /* DDF_SPARE_ASSIGN_MAGIC */
331 __u16 populated
; /* SAEs used */
332 __u16 max
; /* max SAEs */
334 struct spare_assign_entry
{
335 char guid
[DDF_GUID_LEN
];
336 __u16 secondary_element
;
340 /* spare_assign.type is a bitmap */
341 #define DDF_spare_dedicated 0x1 /* else global */
342 #define DDF_spare_revertible 0x2 /* else committable */
343 #define DDF_spare_active 0x4 /* else not active */
344 #define DDF_spare_affinity 0x8 /* enclosure affinity */
346 /* The data_section contents - local scope */
348 __u32 magic
; /* DDF_PHYS_DATA_MAGIC */
350 char guid
[DDF_GUID_LEN
];
351 __u32 refnum
; /* crc of some magic drive data ... */
352 __u8 forced_ref
; /* set when above was not result of magic */
353 __u8 forced_guid
; /* set if guid was forced rather than magic */
358 /* bbm_section content */
359 struct bad_block_log
{
366 struct mapped_block
{
367 __u64 defective_start
;
368 __u32 replacement_start
;
374 /* Struct for internally holding ddf structures */
375 /* The DDF structure stored on each device is potentially
376 * quite different, as some data is global and some is local.
377 * The global data is:
380 * - Physical disk records
381 * - Virtual disk records
383 * - Configuration records
384 * - Physical Disk data section
385 * ( and Bad block and vendor which I don't care about yet).
387 * The local data is parsed into separate lists as it is read
388 * and reconstructed for writing. This means that we only need
389 * to make config changes once and they are automatically
390 * propagated to all devices.
391 * Note that the ddf_super has space of the conf and disk data
392 * for this disk and also for a list of all such data.
393 * The list is only used for the superblock that is being
394 * built in Create or Assemble to describe the whole array.
397 struct ddf_header anchor
, primary
, secondary
;
398 struct ddf_controller_data controller
;
399 struct ddf_header
*active
;
400 struct phys_disk
*phys
;
401 struct virtual_disk
*virt
;
403 unsigned int max_part
, mppe
, conf_rec_len
;
411 unsigned int vcnum
; /* index into ->virt */
412 struct vd_config
**other_bvds
;
413 __u64
*block_sizes
; /* NULL if all the same */
416 struct vd_config conf
;
417 } *conflist
, *currentconf
;
426 unsigned long long size
; /* sectors */
427 unsigned long long primary_lba
; /* sectors */
428 unsigned long long secondary_lba
; /* sectors */
429 unsigned long long workspace_lba
; /* sectors */
430 int pdnum
; /* index in ->phys */
431 struct spare_assign
*spare
;
432 void *mdupdate
; /* hold metadata update */
434 /* These fields used by auto-layout */
435 int raiddisk
; /* slot to fill in autolayout */
439 struct disk_data disk
;
440 struct vcl
*vlist
[0]; /* max_part in size */
445 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
449 static int all_ff(const char *guid
);
450 static void pr_state(struct ddf_super
*ddf
, const char *msg
)
453 dprintf("%s/%s: ", __func__
, msg
);
454 for (i
= 0; i
< __be16_to_cpu(ddf
->active
->max_vd_entries
); i
++) {
455 if (all_ff(ddf
->virt
->entries
[i
].guid
))
457 dprintf("%u(s=%02x i=%02x) ", i
,
458 ddf
->virt
->entries
[i
].state
,
459 ddf
->virt
->entries
[i
].init_state
);
464 static void pr_state(const struct ddf_super
*ddf
, const char *msg
) {}
467 static void _ddf_set_updates_pending(struct ddf_super
*ddf
, const char *func
)
469 ddf
->updates_pending
= 1;
470 ddf
->active
->seq
= __cpu_to_be32((__be32_to_cpu(ddf
->active
->seq
)+1));
474 #define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
476 static unsigned int get_pd_index_from_refnum(const struct vcl
*vc
,
477 __u32 refnum
, unsigned int nmax
,
478 const struct vd_config
**bvd
,
481 static unsigned int calc_crc(void *buf
, int len
)
483 /* crcs are always at the same place as in the ddf_header */
484 struct ddf_header
*ddf
= buf
;
485 __u32 oldcrc
= ddf
->crc
;
487 ddf
->crc
= 0xffffffff;
489 newcrc
= crc32(0, buf
, len
);
491 /* The crc is store (like everything) bigendian, so convert
492 * here for simplicity
494 return __cpu_to_be32(newcrc
);
497 #define DDF_INVALID_LEVEL 0xff
498 #define DDF_NO_SECONDARY 0xff
499 static int err_bad_md_layout(const mdu_array_info_t
*array
)
501 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
502 array
->level
, array
->layout
, array
->raid_disks
);
506 static int layout_md2ddf(const mdu_array_info_t
*array
,
507 struct vd_config
*conf
)
509 __u16 prim_elmnt_count
= __cpu_to_be16(array
->raid_disks
);
510 __u8 prl
= DDF_INVALID_LEVEL
, rlq
= 0;
511 __u8 sec_elmnt_count
= 1;
512 __u8 srl
= DDF_NO_SECONDARY
;
514 switch (array
->level
) {
519 rlq
= DDF_RAID0_SIMPLE
;
523 switch (array
->raid_disks
) {
525 rlq
= DDF_RAID1_SIMPLE
;
528 rlq
= DDF_RAID1_MULTI
;
531 return err_bad_md_layout(array
);
536 if (array
->layout
!= 0)
537 return err_bad_md_layout(array
);
542 switch (array
->layout
) {
543 case ALGORITHM_LEFT_ASYMMETRIC
:
544 rlq
= DDF_RAID5_N_RESTART
;
546 case ALGORITHM_RIGHT_ASYMMETRIC
:
547 rlq
= DDF_RAID5_0_RESTART
;
549 case ALGORITHM_LEFT_SYMMETRIC
:
550 rlq
= DDF_RAID5_N_CONTINUE
;
552 case ALGORITHM_RIGHT_SYMMETRIC
:
553 /* not mentioned in standard */
555 return err_bad_md_layout(array
);
560 switch (array
->layout
) {
561 case ALGORITHM_ROTATING_N_RESTART
:
562 rlq
= DDF_RAID5_N_RESTART
;
564 case ALGORITHM_ROTATING_ZERO_RESTART
:
565 rlq
= DDF_RAID6_0_RESTART
;
567 case ALGORITHM_ROTATING_N_CONTINUE
:
568 rlq
= DDF_RAID5_N_CONTINUE
;
571 return err_bad_md_layout(array
);
576 if (array
->raid_disks
% 2 == 0 && array
->layout
== 0x102) {
577 rlq
= DDF_RAID1_SIMPLE
;
578 prim_elmnt_count
= __cpu_to_be16(2);
579 sec_elmnt_count
= array
->raid_disks
/ 2;
580 } else if (array
->raid_disks
% 3 == 0
581 && array
->layout
== 0x103) {
582 rlq
= DDF_RAID1_MULTI
;
583 prim_elmnt_count
= __cpu_to_be16(3);
584 sec_elmnt_count
= array
->raid_disks
/ 3;
586 return err_bad_md_layout(array
);
591 return err_bad_md_layout(array
);
594 conf
->prim_elmnt_count
= prim_elmnt_count
;
597 conf
->sec_elmnt_count
= sec_elmnt_count
;
601 static int err_bad_ddf_layout(const struct vd_config
*conf
)
603 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
604 conf
->prl
, conf
->rlq
, __be16_to_cpu(conf
->prim_elmnt_count
));
608 static int layout_ddf2md(const struct vd_config
*conf
,
609 mdu_array_info_t
*array
)
611 int level
= LEVEL_UNSUPPORTED
;
613 int raiddisks
= __be16_to_cpu(conf
->prim_elmnt_count
);
615 if (conf
->sec_elmnt_count
> 1) {
616 /* see also check_secondary() */
617 if (conf
->prl
!= DDF_RAID1
||
618 (conf
->srl
!= DDF_2STRIPED
&& conf
->srl
!= DDF_2SPANNED
)) {
619 pr_err("Unsupported secondary RAID level %u/%u\n",
620 conf
->prl
, conf
->srl
);
623 if (raiddisks
== 2 && conf
->rlq
== DDF_RAID1_SIMPLE
)
625 else if (raiddisks
== 3 && conf
->rlq
== DDF_RAID1_MULTI
)
628 return err_bad_ddf_layout(conf
);
629 raiddisks
*= conf
->sec_elmnt_count
;
636 level
= LEVEL_LINEAR
;
639 if (conf
->rlq
!= DDF_RAID0_SIMPLE
)
640 return err_bad_ddf_layout(conf
);
644 if (!((conf
->rlq
== DDF_RAID1_SIMPLE
&& raiddisks
== 2) ||
645 (conf
->rlq
== DDF_RAID1_MULTI
&& raiddisks
== 3)))
646 return err_bad_ddf_layout(conf
);
650 if (conf
->rlq
!= DDF_RAID4_N
)
651 return err_bad_ddf_layout(conf
);
656 case DDF_RAID5_N_RESTART
:
657 layout
= ALGORITHM_LEFT_ASYMMETRIC
;
659 case DDF_RAID5_0_RESTART
:
660 layout
= ALGORITHM_RIGHT_ASYMMETRIC
;
662 case DDF_RAID5_N_CONTINUE
:
663 layout
= ALGORITHM_LEFT_SYMMETRIC
;
666 return err_bad_ddf_layout(conf
);
672 case DDF_RAID5_N_RESTART
:
673 layout
= ALGORITHM_ROTATING_N_RESTART
;
675 case DDF_RAID6_0_RESTART
:
676 layout
= ALGORITHM_ROTATING_ZERO_RESTART
;
678 case DDF_RAID5_N_CONTINUE
:
679 layout
= ALGORITHM_ROTATING_N_CONTINUE
;
682 return err_bad_ddf_layout(conf
);
687 return err_bad_ddf_layout(conf
);
691 array
->level
= level
;
692 array
->layout
= layout
;
693 array
->raid_disks
= raiddisks
;
697 static int load_ddf_header(int fd
, unsigned long long lba
,
698 unsigned long long size
,
700 struct ddf_header
*hdr
, struct ddf_header
*anchor
)
702 /* read a ddf header (primary or secondary) from fd/lba
703 * and check that it is consistent with anchor
705 * magic, crc, guid, rev, and LBA's header_type, and
706 * everything after header_type must be the same
711 if (lseek64(fd
, lba
<<9, 0) < 0)
714 if (read(fd
, hdr
, 512) != 512)
717 if (hdr
->magic
!= DDF_HEADER_MAGIC
)
719 if (calc_crc(hdr
, 512) != hdr
->crc
)
721 if (memcmp(anchor
->guid
, hdr
->guid
, DDF_GUID_LEN
) != 0 ||
722 memcmp(anchor
->revision
, hdr
->revision
, 8) != 0 ||
723 anchor
->primary_lba
!= hdr
->primary_lba
||
724 anchor
->secondary_lba
!= hdr
->secondary_lba
||
726 memcmp(anchor
->pad2
, hdr
->pad2
, 512 -
727 offsetof(struct ddf_header
, pad2
)) != 0)
730 /* Looks good enough to me... */
734 static void *load_section(int fd
, struct ddf_super
*super
, void *buf
,
735 __u32 offset_be
, __u32 len_be
, int check
)
737 unsigned long long offset
= __be32_to_cpu(offset_be
);
738 unsigned long long len
= __be32_to_cpu(len_be
);
739 int dofree
= (buf
== NULL
);
742 if (len
!= 2 && len
!= 8 && len
!= 32
743 && len
!= 128 && len
!= 512)
749 /* All pre-allocated sections are a single block */
752 } else if (posix_memalign(&buf
, 512, len
<<9) != 0)
758 if (super
->active
->type
== 1)
759 offset
+= __be64_to_cpu(super
->active
->primary_lba
);
761 offset
+= __be64_to_cpu(super
->active
->secondary_lba
);
763 if ((unsigned long long)lseek64(fd
, offset
<<9, 0) != (offset
<<9)) {
768 if ((unsigned long long)read(fd
, buf
, len
<<9) != (len
<<9)) {
776 static int load_ddf_headers(int fd
, struct ddf_super
*super
, char *devname
)
778 unsigned long long dsize
;
780 get_dev_size(fd
, NULL
, &dsize
);
782 if (lseek64(fd
, dsize
-512, 0) < 0) {
784 pr_err("Cannot seek to anchor block on %s: %s\n",
785 devname
, strerror(errno
));
788 if (read(fd
, &super
->anchor
, 512) != 512) {
790 pr_err("Cannot read anchor block on %s: %s\n",
791 devname
, strerror(errno
));
794 if (super
->anchor
.magic
!= DDF_HEADER_MAGIC
) {
796 pr_err("no DDF anchor found on %s\n",
800 if (calc_crc(&super
->anchor
, 512) != super
->anchor
.crc
) {
802 pr_err("bad CRC on anchor on %s\n",
806 if (memcmp(super
->anchor
.revision
, DDF_REVISION_0
, 8) != 0 &&
807 memcmp(super
->anchor
.revision
, DDF_REVISION_2
, 8) != 0) {
809 pr_err("can only support super revision"
810 " %.8s and earlier, not %.8s on %s\n",
811 DDF_REVISION_2
, super
->anchor
.revision
,devname
);
814 super
->active
= NULL
;
815 if (load_ddf_header(fd
, __be64_to_cpu(super
->anchor
.primary_lba
),
817 &super
->primary
, &super
->anchor
) == 0) {
819 pr_err("Failed to load primary DDF header "
822 super
->active
= &super
->primary
;
823 if (load_ddf_header(fd
, __be64_to_cpu(super
->anchor
.secondary_lba
),
825 &super
->secondary
, &super
->anchor
)) {
826 if (super
->active
== NULL
827 || (__be32_to_cpu(super
->primary
.seq
)
828 < __be32_to_cpu(super
->secondary
.seq
) &&
829 !super
->secondary
.openflag
)
830 || (__be32_to_cpu(super
->primary
.seq
)
831 == __be32_to_cpu(super
->secondary
.seq
) &&
832 super
->primary
.openflag
&& !super
->secondary
.openflag
)
834 super
->active
= &super
->secondary
;
836 pr_err("Failed to load secondary DDF header on %s\n",
838 if (super
->active
== NULL
)
843 static int load_ddf_global(int fd
, struct ddf_super
*super
, char *devname
)
846 ok
= load_section(fd
, super
, &super
->controller
,
847 super
->active
->controller_section_offset
,
848 super
->active
->controller_section_length
,
850 super
->phys
= load_section(fd
, super
, NULL
,
851 super
->active
->phys_section_offset
,
852 super
->active
->phys_section_length
,
854 super
->pdsize
= __be32_to_cpu(super
->active
->phys_section_length
) * 512;
856 super
->virt
= load_section(fd
, super
, NULL
,
857 super
->active
->virt_section_offset
,
858 super
->active
->virt_section_length
,
860 super
->vdsize
= __be32_to_cpu(super
->active
->virt_section_length
) * 512;
870 super
->conflist
= NULL
;
873 super
->max_part
= __be16_to_cpu(super
->active
->max_partitions
);
874 super
->mppe
= __be16_to_cpu(super
->active
->max_primary_element_entries
);
875 super
->conf_rec_len
= __be16_to_cpu(super
->active
->config_record_len
);
879 #define DDF_UNUSED_BVD 0xff
880 static int alloc_other_bvds(const struct ddf_super
*ddf
, struct vcl
*vcl
)
882 unsigned int n_vds
= vcl
->conf
.sec_elmnt_count
- 1;
883 unsigned int i
, vdsize
;
886 vcl
->other_bvds
= NULL
;
889 vdsize
= ddf
->conf_rec_len
* 512;
890 if (posix_memalign(&p
, 512, n_vds
*
891 (vdsize
+ sizeof(struct vd_config
*))) != 0)
893 vcl
->other_bvds
= (struct vd_config
**) (p
+ n_vds
* vdsize
);
894 for (i
= 0; i
< n_vds
; i
++) {
895 vcl
->other_bvds
[i
] = p
+ i
* vdsize
;
896 memset(vcl
->other_bvds
[i
], 0, vdsize
);
897 vcl
->other_bvds
[i
]->sec_elmnt_seq
= DDF_UNUSED_BVD
;
902 static void add_other_bvd(struct vcl
*vcl
, struct vd_config
*vd
,
906 for (i
= 0; i
< vcl
->conf
.sec_elmnt_count
-1; i
++)
907 if (vcl
->other_bvds
[i
]->sec_elmnt_seq
== vd
->sec_elmnt_seq
)
910 if (i
< vcl
->conf
.sec_elmnt_count
-1) {
911 if (vd
->seqnum
<= vcl
->other_bvds
[i
]->seqnum
)
914 for (i
= 0; i
< vcl
->conf
.sec_elmnt_count
-1; i
++)
915 if (vcl
->other_bvds
[i
]->sec_elmnt_seq
== DDF_UNUSED_BVD
)
917 if (i
== vcl
->conf
.sec_elmnt_count
-1) {
918 pr_err("no space for sec level config %u, count is %u\n",
919 vd
->sec_elmnt_seq
, vcl
->conf
.sec_elmnt_count
);
923 memcpy(vcl
->other_bvds
[i
], vd
, len
);
926 static int load_ddf_local(int fd
, struct ddf_super
*super
,
927 char *devname
, int keep
)
933 unsigned int confsec
;
935 unsigned int max_virt_disks
= __be16_to_cpu(super
->active
->max_vd_entries
);
936 unsigned long long dsize
;
938 /* First the local disk info */
939 if (posix_memalign((void**)&dl
, 512,
941 (super
->max_part
) * sizeof(dl
->vlist
[0])) != 0) {
942 pr_err("%s could not allocate disk info buffer\n",
947 load_section(fd
, super
, &dl
->disk
,
948 super
->active
->data_section_offset
,
949 super
->active
->data_section_length
,
951 dl
->devname
= devname
? xstrdup(devname
) : NULL
;
954 dl
->major
= major(stb
.st_rdev
);
955 dl
->minor
= minor(stb
.st_rdev
);
956 dl
->next
= super
->dlist
;
957 dl
->fd
= keep
? fd
: -1;
960 if (get_dev_size(fd
, devname
, &dsize
))
961 dl
->size
= dsize
>> 9;
962 /* If the disks have different sizes, the LBAs will differ
963 * between phys disks.
964 * At this point here, the values in super->active must be valid
965 * for this phys disk. */
966 dl
->primary_lba
= super
->active
->primary_lba
;
967 dl
->secondary_lba
= super
->active
->secondary_lba
;
968 dl
->workspace_lba
= super
->active
->workspace_lba
;
970 for (i
= 0 ; i
< super
->max_part
; i
++)
974 for (i
= 0; i
< __be16_to_cpu(super
->active
->max_pd_entries
); i
++)
975 if (memcmp(super
->phys
->entries
[i
].guid
,
976 dl
->disk
.guid
, DDF_GUID_LEN
) == 0)
979 /* Now the config list. */
980 /* 'conf' is an array of config entries, some of which are
981 * probably invalid. Those which are good need to be copied into
985 conf
= load_section(fd
, super
, NULL
,
986 super
->active
->config_section_offset
,
987 super
->active
->config_section_length
,
992 confsec
< __be32_to_cpu(super
->active
->config_section_length
);
993 confsec
+= super
->conf_rec_len
) {
994 struct vd_config
*vd
=
995 (struct vd_config
*)((char*)conf
+ confsec
*512);
998 if (vd
->magic
== DDF_SPARE_ASSIGN_MAGIC
) {
1001 if (posix_memalign((void**)&dl
->spare
, 512,
1002 super
->conf_rec_len
*512) != 0) {
1003 pr_err("%s could not allocate spare info buf\n",
1008 memcpy(dl
->spare
, vd
, super
->conf_rec_len
*512);
1011 if (vd
->magic
!= DDF_VD_CONF_MAGIC
)
1013 for (vcl
= super
->conflist
; vcl
; vcl
= vcl
->next
) {
1014 if (memcmp(vcl
->conf
.guid
,
1015 vd
->guid
, DDF_GUID_LEN
) == 0)
1020 dl
->vlist
[vnum
++] = vcl
;
1021 if (vcl
->other_bvds
!= NULL
&&
1022 vcl
->conf
.sec_elmnt_seq
!= vd
->sec_elmnt_seq
) {
1023 add_other_bvd(vcl
, vd
, super
->conf_rec_len
*512);
1026 if (__be32_to_cpu(vd
->seqnum
) <=
1027 __be32_to_cpu(vcl
->conf
.seqnum
))
1030 if (posix_memalign((void**)&vcl
, 512,
1031 (super
->conf_rec_len
*512 +
1032 offsetof(struct vcl
, conf
))) != 0) {
1033 pr_err("%s could not allocate vcl buf\n",
1037 vcl
->next
= super
->conflist
;
1038 vcl
->block_sizes
= NULL
; /* FIXME not for CONCAT */
1039 vcl
->conf
.sec_elmnt_count
= vd
->sec_elmnt_count
;
1040 if (alloc_other_bvds(super
, vcl
) != 0) {
1041 pr_err("%s could not allocate other bvds\n",
1046 super
->conflist
= vcl
;
1047 dl
->vlist
[vnum
++] = vcl
;
1049 memcpy(&vcl
->conf
, vd
, super
->conf_rec_len
*512);
1050 for (i
=0; i
< max_virt_disks
; i
++)
1051 if (memcmp(super
->virt
->entries
[i
].guid
,
1052 vcl
->conf
.guid
, DDF_GUID_LEN
)==0)
1054 if (i
< max_virt_disks
)
1063 static int load_super_ddf_all(struct supertype
*st
, int fd
,
1064 void **sbp
, char *devname
);
1067 static void free_super_ddf(struct supertype
*st
);
1069 static int load_super_ddf(struct supertype
*st
, int fd
,
1072 unsigned long long dsize
;
1073 struct ddf_super
*super
;
1076 if (get_dev_size(fd
, devname
, &dsize
) == 0)
1079 if (!st
->ignore_hw_compat
&& test_partition(fd
))
1080 /* DDF is not allowed on partitions */
1083 /* 32M is a lower bound */
1084 if (dsize
<= 32*1024*1024) {
1086 pr_err("%s is too small for ddf: "
1087 "size is %llu sectors.\n",
1093 pr_err("%s is an odd size for ddf: "
1094 "size is %llu bytes.\n",
1101 if (posix_memalign((void**)&super
, 512, sizeof(*super
))!= 0) {
1102 pr_err("malloc of %zu failed.\n",
1106 memset(super
, 0, sizeof(*super
));
1108 rv
= load_ddf_headers(fd
, super
, devname
);
1114 /* Have valid headers and have chosen the best. Let's read in the rest*/
1116 rv
= load_ddf_global(fd
, super
, devname
);
1120 pr_err("Failed to load all information "
1121 "sections on %s\n", devname
);
1126 rv
= load_ddf_local(fd
, super
, devname
, 0);
1130 pr_err("Failed to load all information "
1131 "sections on %s\n", devname
);
1136 /* Should possibly check the sections .... */
1139 if (st
->ss
== NULL
) {
1140 st
->ss
= &super_ddf
;
1141 st
->minor_version
= 0;
1148 static void free_super_ddf(struct supertype
*st
)
1150 struct ddf_super
*ddf
= st
->sb
;
1155 while (ddf
->conflist
) {
1156 struct vcl
*v
= ddf
->conflist
;
1157 ddf
->conflist
= v
->next
;
1159 free(v
->block_sizes
);
1162 v->other_bvds[0] points to beginning of buffer,
1163 see alloc_other_bvds()
1165 free(v
->other_bvds
[0]);
1168 while (ddf
->dlist
) {
1169 struct dl
*d
= ddf
->dlist
;
1170 ddf
->dlist
= d
->next
;
1177 while (ddf
->add_list
) {
1178 struct dl
*d
= ddf
->add_list
;
1179 ddf
->add_list
= d
->next
;
1190 static struct supertype
*match_metadata_desc_ddf(char *arg
)
1192 /* 'ddf' only support containers */
1193 struct supertype
*st
;
1194 if (strcmp(arg
, "ddf") != 0 &&
1195 strcmp(arg
, "default") != 0
1199 st
= xcalloc(1, sizeof(*st
));
1200 st
->ss
= &super_ddf
;
1202 st
->minor_version
= 0;
1209 static mapping_t ddf_state
[] = {
1215 { "Partially Optimal", 5},
1221 static mapping_t ddf_init_state
[] = {
1222 { "Not Initialised", 0},
1223 { "QuickInit in Progress", 1},
1224 { "Fully Initialised", 2},
1228 static mapping_t ddf_access
[] = {
1232 { "Blocked (no access)", 3},
1236 static mapping_t ddf_level
[] = {
1237 { "RAID0", DDF_RAID0
},
1238 { "RAID1", DDF_RAID1
},
1239 { "RAID3", DDF_RAID3
},
1240 { "RAID4", DDF_RAID4
},
1241 { "RAID5", DDF_RAID5
},
1242 { "RAID1E",DDF_RAID1E
},
1243 { "JBOD", DDF_JBOD
},
1244 { "CONCAT",DDF_CONCAT
},
1245 { "RAID5E",DDF_RAID5E
},
1246 { "RAID5EE",DDF_RAID5EE
},
1247 { "RAID6", DDF_RAID6
},
1250 static mapping_t ddf_sec_level
[] = {
1251 { "Striped", DDF_2STRIPED
},
1252 { "Mirrored", DDF_2MIRRORED
},
1253 { "Concat", DDF_2CONCAT
},
1254 { "Spanned", DDF_2SPANNED
},
1259 static int all_ff(const char *guid
)
1262 for (i
= 0; i
< DDF_GUID_LEN
; i
++)
1263 if (guid
[i
] != (char)0xff)
1269 static void print_guid(char *guid
, int tstamp
)
1271 /* A GUIDs are part (or all) ASCII and part binary.
1272 * They tend to be space padded.
1273 * We print the GUID in HEX, then in parentheses add
1274 * any initial ASCII sequence, and a possible
1275 * time stamp from bytes 16-19
1277 int l
= DDF_GUID_LEN
;
1280 for (i
=0 ; i
<DDF_GUID_LEN
; i
++) {
1281 if ((i
&3)==0 && i
!= 0) printf(":");
1282 printf("%02X", guid
[i
]&255);
1286 while (l
&& guid
[l
-1] == ' ')
1288 for (i
=0 ; i
<l
; i
++) {
1289 if (guid
[i
] >= 0x20 && guid
[i
] < 0x7f)
1290 fputc(guid
[i
], stdout
);
1295 time_t then
= __be32_to_cpu(*(__u32
*)(guid
+16)) + DECADE
;
1298 tm
= localtime(&then
);
1299 strftime(tbuf
, 100, " %D %T",tm
);
1300 fputs(tbuf
, stdout
);
1305 static const char *guid_str(const char *guid
)
1307 static char buf
[DDF_GUID_LEN
*2+1];
1310 for (i
= 0; i
< DDF_GUID_LEN
; i
++) {
1311 unsigned char c
= guid
[i
];
1312 if (c
>= 32 && c
< 127)
1313 p
+= sprintf(p
, "%c", c
);
1315 p
+= sprintf(p
, "%02x", c
);
1318 return (const char *) buf
;
1321 static void examine_vd(int n
, struct ddf_super
*sb
, char *guid
)
1323 int crl
= sb
->conf_rec_len
;
1326 for (vcl
= sb
->conflist
; vcl
; vcl
= vcl
->next
) {
1328 struct vd_config
*vc
= &vcl
->conf
;
1330 if (calc_crc(vc
, crl
*512) != vc
->crc
)
1332 if (memcmp(vc
->guid
, guid
, DDF_GUID_LEN
) != 0)
1335 /* Ok, we know about this VD, let's give more details */
1336 printf(" Raid Devices[%d] : %d (", n
,
1337 __be16_to_cpu(vc
->prim_elmnt_count
));
1338 for (i
= 0; i
< __be16_to_cpu(vc
->prim_elmnt_count
); i
++) {
1340 int cnt
= __be16_to_cpu(sb
->phys
->used_pdes
);
1341 for (j
=0; j
<cnt
; j
++)
1342 if (vc
->phys_refnum
[i
] == sb
->phys
->entries
[j
].refnum
)
1351 if (vc
->chunk_shift
!= 255)
1352 printf(" Chunk Size[%d] : %d sectors\n", n
,
1353 1 << vc
->chunk_shift
);
1354 printf(" Raid Level[%d] : %s\n", n
,
1355 map_num(ddf_level
, vc
->prl
)?:"-unknown-");
1356 if (vc
->sec_elmnt_count
!= 1) {
1357 printf(" Secondary Position[%d] : %d of %d\n", n
,
1358 vc
->sec_elmnt_seq
, vc
->sec_elmnt_count
);
1359 printf(" Secondary Level[%d] : %s\n", n
,
1360 map_num(ddf_sec_level
, vc
->srl
) ?: "-unknown-");
1362 printf(" Device Size[%d] : %llu\n", n
,
1363 (unsigned long long)__be64_to_cpu(vc
->blocks
)/2);
1364 printf(" Array Size[%d] : %llu\n", n
,
1365 (unsigned long long)__be64_to_cpu(vc
->array_blocks
)/2);
1369 static void examine_vds(struct ddf_super
*sb
)
1371 int cnt
= __be16_to_cpu(sb
->virt
->populated_vdes
);
1373 printf(" Virtual Disks : %d\n", cnt
);
1375 for (i
= 0; i
< __be16_to_cpu(sb
->virt
->max_vdes
); i
++) {
1376 struct virtual_entry
*ve
= &sb
->virt
->entries
[i
];
1377 if (all_ff(ve
->guid
))
1380 printf(" VD GUID[%d] : ", i
); print_guid(ve
->guid
, 1);
1382 printf(" unit[%d] : %d\n", i
, __be16_to_cpu(ve
->unit
));
1383 printf(" state[%d] : %s, %s%s\n", i
,
1384 map_num(ddf_state
, ve
->state
& 7),
1385 (ve
->state
& 8) ? "Morphing, ": "",
1386 (ve
->state
& 16)? "Not Consistent" : "Consistent");
1387 printf(" init state[%d] : %s\n", i
,
1388 map_num(ddf_init_state
, ve
->init_state
&3));
1389 printf(" access[%d] : %s\n", i
,
1390 map_num(ddf_access
, (ve
->init_state
>>6) & 3));
1391 printf(" Name[%d] : %.16s\n", i
, ve
->name
);
1392 examine_vd(i
, sb
, ve
->guid
);
1394 if (cnt
) printf("\n");
1397 static void examine_pds(struct ddf_super
*sb
)
1399 int cnt
= __be16_to_cpu(sb
->phys
->used_pdes
);
1402 printf(" Physical Disks : %d\n", cnt
);
1403 printf(" Number RefNo Size Device Type/State\n");
1405 for (i
=0 ; i
<cnt
; i
++) {
1406 struct phys_disk_entry
*pd
= &sb
->phys
->entries
[i
];
1407 int type
= __be16_to_cpu(pd
->type
);
1408 int state
= __be16_to_cpu(pd
->state
);
1410 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1412 printf(" %3d %08x ", i
,
1413 __be32_to_cpu(pd
->refnum
));
1415 (unsigned long long)__be64_to_cpu(pd
->config_size
)>>1);
1416 for (dl
= sb
->dlist
; dl
; dl
= dl
->next
) {
1417 if (dl
->disk
.refnum
== pd
->refnum
) {
1418 char *dv
= map_dev(dl
->major
, dl
->minor
, 0);
1420 printf("%-15s", dv
);
1427 printf(" %s%s%s%s%s",
1428 (type
&2) ? "active":"",
1429 (type
&4) ? "Global-Spare":"",
1430 (type
&8) ? "spare" : "",
1431 (type
&16)? ", foreign" : "",
1432 (type
&32)? "pass-through" : "");
1433 if (state
& DDF_Failed
)
1434 /* This over-rides these three */
1435 state
&= ~(DDF_Online
|DDF_Rebuilding
|DDF_Transition
);
1436 printf("/%s%s%s%s%s%s%s",
1437 (state
&1)? "Online": "Offline",
1438 (state
&2)? ", Failed": "",
1439 (state
&4)? ", Rebuilding": "",
1440 (state
&8)? ", in-transition": "",
1441 (state
&16)? ", SMART-errors": "",
1442 (state
&32)? ", Unrecovered-Read-Errors": "",
1443 (state
&64)? ", Missing" : "");
1448 static void examine_super_ddf(struct supertype
*st
, char *homehost
)
1450 struct ddf_super
*sb
= st
->sb
;
1452 printf(" Magic : %08x\n", __be32_to_cpu(sb
->anchor
.magic
));
1453 printf(" Version : %.8s\n", sb
->anchor
.revision
);
1454 printf("Controller GUID : "); print_guid(sb
->controller
.guid
, 0);
1456 printf(" Container GUID : "); print_guid(sb
->anchor
.guid
, 1);
1458 printf(" Seq : %08x\n", __be32_to_cpu(sb
->active
->seq
));
1459 printf(" Redundant hdr : %s\n", sb
->secondary
.magic
== DDF_HEADER_MAGIC
1465 static void getinfo_super_ddf(struct supertype
*st
, struct mdinfo
*info
, char *map
);
1467 static void uuid_from_ddf_guid(const char *guid
, int uuid
[4]);
1468 static void uuid_from_super_ddf(struct supertype
*st
, int uuid
[4]);
1470 static unsigned int get_vd_num_of_subarray(struct supertype
*st
)
1473 * Figure out the VD number for this supertype.
1474 * Returns DDF_CONTAINER for the container itself,
1475 * and DDF_NOTFOUND on error.
1477 struct ddf_super
*ddf
= st
->sb
;
1482 if (*st
->container_devnm
== '\0')
1483 return DDF_CONTAINER
;
1485 sra
= sysfs_read(-1, st
->devnm
, GET_VERSION
);
1486 if (!sra
|| sra
->array
.major_version
!= -1 ||
1487 sra
->array
.minor_version
!= -2 ||
1488 !is_subarray(sra
->text_version
))
1489 return DDF_NOTFOUND
;
1491 sub
= strchr(sra
->text_version
+ 1, '/');
1493 vcnum
= strtoul(sub
+ 1, &end
, 10);
1494 if (sub
== NULL
|| *sub
== '\0' || *end
!= '\0' ||
1495 vcnum
>= __be16_to_cpu(ddf
->active
->max_vd_entries
))
1496 return DDF_NOTFOUND
;
1501 static void brief_examine_super_ddf(struct supertype
*st
, int verbose
)
1503 /* We just write a generic DDF ARRAY entry
1507 getinfo_super_ddf(st
, &info
, NULL
);
1508 fname_from_uuid(st
, &info
, nbuf
, ':');
1510 printf("ARRAY metadata=ddf UUID=%s\n", nbuf
+ 5);
1513 static void brief_examine_subarrays_ddf(struct supertype
*st
, int verbose
)
1515 /* We just write a generic DDF ARRAY entry
1517 struct ddf_super
*ddf
= st
->sb
;
1521 getinfo_super_ddf(st
, &info
, NULL
);
1522 fname_from_uuid(st
, &info
, nbuf
, ':');
1524 for (i
= 0; i
< __be16_to_cpu(ddf
->virt
->max_vdes
); i
++) {
1525 struct virtual_entry
*ve
= &ddf
->virt
->entries
[i
];
1528 if (all_ff(ve
->guid
))
1530 memcpy(vcl
.conf
.guid
, ve
->guid
, DDF_GUID_LEN
);
1531 ddf
->currentconf
=&vcl
;
1532 uuid_from_super_ddf(st
, info
.uuid
);
1533 fname_from_uuid(st
, &info
, nbuf1
, ':');
1534 printf("ARRAY container=%s member=%d UUID=%s\n",
1535 nbuf
+5, i
, nbuf1
+5);
1539 static void export_examine_super_ddf(struct supertype
*st
)
1543 getinfo_super_ddf(st
, &info
, NULL
);
1544 fname_from_uuid(st
, &info
, nbuf
, ':');
1545 printf("MD_METADATA=ddf\n");
1546 printf("MD_LEVEL=container\n");
1547 printf("MD_UUID=%s\n", nbuf
+5);
1550 static int copy_metadata_ddf(struct supertype
*st
, int from
, int to
)
1553 unsigned long long dsize
, offset
;
1555 struct ddf_header
*ddf
;
1558 /* The meta consists of an anchor, a primary, and a secondary.
1559 * This all lives at the end of the device.
1560 * So it is easiest to find the earliest of primary and
1561 * secondary, and copy everything from there.
1563 * Anchor is 512 from end It contains primary_lba and secondary_lba
1564 * we choose one of those
1567 if (posix_memalign(&buf
, 4096, 4096) != 0)
1570 if (!get_dev_size(from
, NULL
, &dsize
))
1573 if (lseek64(from
, dsize
-512, 0) < 0)
1575 if (read(from
, buf
, 512) != 512)
1578 if (ddf
->magic
!= DDF_HEADER_MAGIC
||
1579 calc_crc(ddf
, 512) != ddf
->crc
||
1580 (memcmp(ddf
->revision
, DDF_REVISION_0
, 8) != 0 &&
1581 memcmp(ddf
->revision
, DDF_REVISION_2
, 8) != 0))
1584 offset
= dsize
- 512;
1585 if ((__be64_to_cpu(ddf
->primary_lba
) << 9) < offset
)
1586 offset
= __be64_to_cpu(ddf
->primary_lba
) << 9;
1587 if ((__be64_to_cpu(ddf
->secondary_lba
) << 9) < offset
)
1588 offset
= __be64_to_cpu(ddf
->secondary_lba
) << 9;
1590 bytes
= dsize
- offset
;
1592 if (lseek64(from
, offset
, 0) < 0 ||
1593 lseek64(to
, offset
, 0) < 0)
1595 while (written
< bytes
) {
1596 int n
= bytes
- written
;
1599 if (read(from
, buf
, n
) != n
)
1601 if (write(to
, buf
, n
) != n
)
1612 static void detail_super_ddf(struct supertype
*st
, char *homehost
)
1615 * Could print DDF GUID
1616 * Need to find which array
1617 * If whole, briefly list all arrays
1622 static void brief_detail_super_ddf(struct supertype
*st
)
1626 struct ddf_super
*ddf
= st
->sb
;
1627 unsigned int vcnum
= get_vd_num_of_subarray(st
);
1628 if (vcnum
== DDF_CONTAINER
)
1629 uuid_from_super_ddf(st
, info
.uuid
);
1630 else if (vcnum
== DDF_NOTFOUND
)
1633 uuid_from_ddf_guid(ddf
->virt
->entries
[vcnum
].guid
, info
.uuid
);
1634 fname_from_uuid(st
, &info
, nbuf
,':');
1635 printf(" UUID=%s", nbuf
+ 5);
1639 static int match_home_ddf(struct supertype
*st
, char *homehost
)
1641 /* It matches 'this' host if the controller is a
1642 * Linux-MD controller with vendor_data matching
1645 struct ddf_super
*ddf
= st
->sb
;
1650 len
= strlen(homehost
);
1652 return (memcmp(ddf
->controller
.guid
, T10
, 8) == 0 &&
1653 len
< sizeof(ddf
->controller
.vendor_data
) &&
1654 memcmp(ddf
->controller
.vendor_data
, homehost
,len
) == 0 &&
1655 ddf
->controller
.vendor_data
[len
] == 0);
1659 static int find_index_in_bvd(const struct ddf_super
*ddf
,
1660 const struct vd_config
*conf
, unsigned int n
,
1661 unsigned int *n_bvd
)
1664 * Find the index of the n-th valid physical disk in this BVD
1667 for (i
= 0, j
= 0; i
< ddf
->mppe
&&
1668 j
< __be16_to_cpu(conf
->prim_elmnt_count
); i
++) {
1669 if (conf
->phys_refnum
[i
] != 0xffffffff) {
1677 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1678 __func__
, n
, __be16_to_cpu(conf
->prim_elmnt_count
));
1682 static struct vd_config
*find_vdcr(struct ddf_super
*ddf
, unsigned int inst
,
1684 unsigned int *n_bvd
, struct vcl
**vcl
)
1688 for (v
= ddf
->conflist
; v
; v
= v
->next
) {
1689 unsigned int nsec
, ibvd
;
1690 struct vd_config
*conf
;
1691 if (inst
!= v
->vcnum
)
1694 if (conf
->sec_elmnt_count
== 1) {
1695 if (find_index_in_bvd(ddf
, conf
, n
, n_bvd
)) {
1701 if (v
->other_bvds
== NULL
) {
1702 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1703 __func__
, conf
->sec_elmnt_count
);
1706 nsec
= n
/ __be16_to_cpu(conf
->prim_elmnt_count
);
1707 if (conf
->sec_elmnt_seq
!= nsec
) {
1708 for (ibvd
= 1; ibvd
< conf
->sec_elmnt_count
; ibvd
++) {
1709 if (v
->other_bvds
[ibvd
-1]->sec_elmnt_seq
1713 if (ibvd
== conf
->sec_elmnt_count
)
1715 conf
= v
->other_bvds
[ibvd
-1];
1717 if (!find_index_in_bvd(ddf
, conf
,
1718 n
- nsec
*conf
->sec_elmnt_count
, n_bvd
))
1720 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1721 , __func__
, n
, *n_bvd
, ibvd
-1, inst
);
1726 pr_err("%s: Could't find disk %d in array %u\n", __func__
, n
, inst
);
1731 static int find_phys(const struct ddf_super
*ddf
, __u32 phys_refnum
)
1733 /* Find the entry in phys_disk which has the given refnum
1734 * and return it's index
1737 for (i
= 0; i
< __be16_to_cpu(ddf
->phys
->max_pdes
); i
++)
1738 if (ddf
->phys
->entries
[i
].refnum
== phys_refnum
)
1743 static void uuid_from_ddf_guid(const char *guid
, int uuid
[4])
1746 struct sha1_ctx ctx
;
1747 sha1_init_ctx(&ctx
);
1748 sha1_process_bytes(guid
, DDF_GUID_LEN
, &ctx
);
1749 sha1_finish_ctx(&ctx
, buf
);
1750 memcpy(uuid
, buf
, 4*4);
1753 static void uuid_from_super_ddf(struct supertype
*st
, int uuid
[4])
1755 /* The uuid returned here is used for:
1756 * uuid to put into bitmap file (Create, Grow)
1757 * uuid for backup header when saving critical section (Grow)
1758 * comparing uuids when re-adding a device into an array
1759 * In these cases the uuid required is that of the data-array,
1760 * not the device-set.
1761 * uuid to recognise same set when adding a missing device back
1762 * to an array. This is a uuid for the device-set.
1764 * For each of these we can make do with a truncated
1765 * or hashed uuid rather than the original, as long as
1767 * In the case of SVD we assume the BVD is of interest,
1768 * though that might be the case if a bitmap were made for
1769 * a mirrored SVD - worry about that later.
1770 * So we need to find the VD configuration record for the
1771 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1772 * The first 16 bytes of the sha1 of these is used.
1774 struct ddf_super
*ddf
= st
->sb
;
1775 struct vcl
*vcl
= ddf
->currentconf
;
1779 guid
= vcl
->conf
.guid
;
1781 guid
= ddf
->anchor
.guid
;
1782 uuid_from_ddf_guid(guid
, uuid
);
1785 static void getinfo_super_ddf_bvd(struct supertype
*st
, struct mdinfo
*info
, char *map
);
1787 static void getinfo_super_ddf(struct supertype
*st
, struct mdinfo
*info
, char *map
)
1789 struct ddf_super
*ddf
= st
->sb
;
1790 int map_disks
= info
->array
.raid_disks
;
1793 if (ddf
->currentconf
) {
1794 getinfo_super_ddf_bvd(st
, info
, map
);
1797 memset(info
, 0, sizeof(*info
));
1799 info
->array
.raid_disks
= __be16_to_cpu(ddf
->phys
->used_pdes
);
1800 info
->array
.level
= LEVEL_CONTAINER
;
1801 info
->array
.layout
= 0;
1802 info
->array
.md_minor
= -1;
1803 cptr
= (__u32
*)(ddf
->anchor
.guid
+ 16);
1804 info
->array
.ctime
= DECADE
+ __be32_to_cpu(*cptr
);
1806 info
->array
.utime
= 0;
1807 info
->array
.chunk_size
= 0;
1808 info
->container_enough
= 1;
1810 info
->disk
.major
= 0;
1811 info
->disk
.minor
= 0;
1813 info
->disk
.number
= __be32_to_cpu(ddf
->dlist
->disk
.refnum
);
1814 info
->disk
.raid_disk
= find_phys(ddf
, ddf
->dlist
->disk
.refnum
);
1816 info
->data_offset
= __be64_to_cpu(ddf
->phys
->
1817 entries
[info
->disk
.raid_disk
].
1819 info
->component_size
= ddf
->dlist
->size
- info
->data_offset
;
1821 info
->disk
.number
= -1;
1822 info
->disk
.raid_disk
= -1;
1823 // info->disk.raid_disk = find refnum in the table and use index;
1825 info
->disk
.state
= (1 << MD_DISK_SYNC
) | (1 << MD_DISK_ACTIVE
);
1827 info
->recovery_start
= MaxSector
;
1828 info
->reshape_active
= 0;
1829 info
->recovery_blocked
= 0;
1832 info
->array
.major_version
= -1;
1833 info
->array
.minor_version
= -2;
1834 strcpy(info
->text_version
, "ddf");
1835 info
->safe_mode_delay
= 0;
1837 uuid_from_super_ddf(st
, info
->uuid
);
1841 for (i
= 0 ; i
< map_disks
; i
++) {
1842 if (i
< info
->array
.raid_disks
&&
1843 (__be16_to_cpu(ddf
->phys
->entries
[i
].state
) & DDF_Online
) &&
1844 !(__be16_to_cpu(ddf
->phys
->entries
[i
].state
) & DDF_Failed
))
1852 static void getinfo_super_ddf_bvd(struct supertype
*st
, struct mdinfo
*info
, char *map
)
1854 struct ddf_super
*ddf
= st
->sb
;
1855 struct vcl
*vc
= ddf
->currentconf
;
1856 int cd
= ddf
->currentdev
;
1860 int map_disks
= info
->array
.raid_disks
;
1862 struct vd_config
*conf
;
1864 memset(info
, 0, sizeof(*info
));
1865 if (layout_ddf2md(&vc
->conf
, &info
->array
) == -1)
1867 info
->array
.md_minor
= -1;
1868 cptr
= (__u32
*)(vc
->conf
.guid
+ 16);
1869 info
->array
.ctime
= DECADE
+ __be32_to_cpu(*cptr
);
1870 info
->array
.utime
= DECADE
+ __be32_to_cpu(vc
->conf
.timestamp
);
1871 info
->array
.chunk_size
= 512 << vc
->conf
.chunk_shift
;
1872 info
->custom_array_size
= 0;
1875 n_prim
= __be16_to_cpu(conf
->prim_elmnt_count
);
1876 if (conf
->sec_elmnt_count
> 1 && cd
>= n_prim
) {
1877 int ibvd
= cd
/ n_prim
- 1;
1879 conf
= vc
->other_bvds
[ibvd
];
1882 if (cd
>= 0 && (unsigned)cd
< ddf
->mppe
) {
1884 __be64_to_cpu(LBA_OFFSET(ddf
, &vc
->conf
)[cd
]);
1885 if (vc
->block_sizes
)
1886 info
->component_size
= vc
->block_sizes
[cd
];
1888 info
->component_size
= __be64_to_cpu(vc
->conf
.blocks
);
1891 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
1892 if (dl
->disk
.refnum
== conf
->phys_refnum
[cd
])
1895 info
->disk
.major
= 0;
1896 info
->disk
.minor
= 0;
1897 info
->disk
.state
= 0;
1899 info
->disk
.major
= dl
->major
;
1900 info
->disk
.minor
= dl
->minor
;
1901 info
->disk
.raid_disk
= cd
+ conf
->sec_elmnt_seq
1902 * __be16_to_cpu(conf
->prim_elmnt_count
);
1903 info
->disk
.number
= dl
->pdnum
;
1904 info
->disk
.state
= (1<<MD_DISK_SYNC
)|(1<<MD_DISK_ACTIVE
);
1907 info
->container_member
= ddf
->currentconf
->vcnum
;
1909 info
->recovery_start
= MaxSector
;
1910 info
->resync_start
= 0;
1911 info
->reshape_active
= 0;
1912 info
->recovery_blocked
= 0;
1913 if (!(ddf
->virt
->entries
[info
->container_member
].state
1914 & DDF_state_inconsistent
) &&
1915 (ddf
->virt
->entries
[info
->container_member
].init_state
1916 & DDF_initstate_mask
)
1918 info
->resync_start
= MaxSector
;
1920 uuid_from_super_ddf(st
, info
->uuid
);
1922 info
->array
.major_version
= -1;
1923 info
->array
.minor_version
= -2;
1924 sprintf(info
->text_version
, "/%s/%d",
1925 st
->container_devnm
,
1926 info
->container_member
);
1927 info
->safe_mode_delay
= 200;
1929 memcpy(info
->name
, ddf
->virt
->entries
[info
->container_member
].name
, 16);
1932 if (info
->name
[j
] == ' ')
1936 for (j
= 0; j
< map_disks
; j
++) {
1938 if (j
< info
->array
.raid_disks
) {
1939 int i
= find_phys(ddf
, vc
->conf
.phys_refnum
[j
]);
1941 (__be16_to_cpu(ddf
->phys
->entries
[i
].state
) & DDF_Online
) &&
1942 !(__be16_to_cpu(ddf
->phys
->entries
[i
].state
) & DDF_Failed
))
1948 static int update_super_ddf(struct supertype
*st
, struct mdinfo
*info
,
1950 char *devname
, int verbose
,
1951 int uuid_set
, char *homehost
)
1953 /* For 'assemble' and 'force' we need to return non-zero if any
1954 * change was made. For others, the return value is ignored.
1955 * Update options are:
1956 * force-one : This device looks a bit old but needs to be included,
1957 * update age info appropriately.
1958 * assemble: clear any 'faulty' flag to allow this device to
1960 * force-array: Array is degraded but being forced, mark it clean
1961 * if that will be needed to assemble it.
1963 * newdev: not used ????
1964 * grow: Array has gained a new device - this is currently for
1966 * resync: mark as dirty so a resync will happen.
1967 * uuid: Change the uuid of the array to match what is given
1968 * homehost: update the recorded homehost
1969 * name: update the name - preserving the homehost
1970 * _reshape_progress: record new reshape_progress position.
1972 * Following are not relevant for this version:
1973 * sparc2.2 : update from old dodgey metadata
1974 * super-minor: change the preferred_minor number
1975 * summaries: update redundant counters.
1978 // struct ddf_super *ddf = st->sb;
1979 // struct vd_config *vd = find_vdcr(ddf, info->container_member);
1980 // struct virtual_entry *ve = find_ve(ddf);
1982 /* we don't need to handle "force-*" or "assemble" as
1983 * there is no need to 'trick' the kernel. We the metadata is
1984 * first updated to activate the array, all the implied modifications
1988 if (strcmp(update
, "grow") == 0) {
1990 } else if (strcmp(update
, "resync") == 0) {
1991 // info->resync_checkpoint = 0;
1992 } else if (strcmp(update
, "homehost") == 0) {
1993 /* homehost is stored in controller->vendor_data,
1994 * or it is when we are the vendor
1996 // if (info->vendor_is_local)
1997 // strcpy(ddf->controller.vendor_data, homehost);
1999 } else if (strcmp(update
, "name") == 0) {
2000 /* name is stored in virtual_entry->name */
2001 // memset(ve->name, ' ', 16);
2002 // strncpy(ve->name, info->name, 16);
2004 } else if (strcmp(update
, "_reshape_progress") == 0) {
2005 /* We don't support reshape yet */
2006 } else if (strcmp(update
, "assemble") == 0 ) {
2007 /* Do nothing, just succeed */
2012 // update_all_csum(ddf);
2017 static void make_header_guid(char *guid
)
2020 /* Create a DDF Header of Virtual Disk GUID */
2022 /* 24 bytes of fiction required.
2023 * first 8 are a 'vendor-id' - "Linux-MD"
2024 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2025 * Remaining 8 random number plus timestamp
2027 memcpy(guid
, T10
, sizeof(T10
));
2028 stamp
= __cpu_to_be32(0xdeadbeef);
2029 memcpy(guid
+8, &stamp
, 4);
2030 stamp
= __cpu_to_be32(0);
2031 memcpy(guid
+12, &stamp
, 4);
2032 stamp
= __cpu_to_be32(time(0) - DECADE
);
2033 memcpy(guid
+16, &stamp
, 4);
2035 memcpy(guid
+20, &stamp
, 4);
2038 static unsigned int find_unused_vde(const struct ddf_super
*ddf
)
2041 for (i
= 0; i
< __be16_to_cpu(ddf
->virt
->max_vdes
); i
++) {
2042 if (all_ff(ddf
->virt
->entries
[i
].guid
))
2045 return DDF_NOTFOUND
;
2048 static unsigned int find_vde_by_name(const struct ddf_super
*ddf
,
2053 return DDF_NOTFOUND
;
2054 for (i
= 0; i
< __be16_to_cpu(ddf
->virt
->max_vdes
); i
++) {
2055 if (all_ff(ddf
->virt
->entries
[i
].guid
))
2057 if (!strncmp(name
, ddf
->virt
->entries
[i
].name
,
2058 sizeof(ddf
->virt
->entries
[i
].name
)))
2061 return DDF_NOTFOUND
;
2064 static unsigned int find_vde_by_guid(const struct ddf_super
*ddf
,
2068 if (guid
== NULL
|| all_ff(guid
))
2069 return DDF_NOTFOUND
;
2070 for (i
= 0; i
< __be16_to_cpu(ddf
->virt
->max_vdes
); i
++)
2071 if (!memcmp(ddf
->virt
->entries
[i
].guid
, guid
, DDF_GUID_LEN
))
2073 return DDF_NOTFOUND
;
2076 static int init_super_ddf_bvd(struct supertype
*st
,
2077 mdu_array_info_t
*info
,
2078 unsigned long long size
,
2079 char *name
, char *homehost
,
2080 int *uuid
, unsigned long long data_offset
);
2082 static int init_super_ddf(struct supertype
*st
,
2083 mdu_array_info_t
*info
,
2084 unsigned long long size
, char *name
, char *homehost
,
2085 int *uuid
, unsigned long long data_offset
)
2087 /* This is primarily called by Create when creating a new array.
2088 * We will then get add_to_super called for each component, and then
2089 * write_init_super called to write it out to each device.
2090 * For DDF, Create can create on fresh devices or on a pre-existing
2092 * To create on a pre-existing array a different method will be called.
2093 * This one is just for fresh drives.
2095 * We need to create the entire 'ddf' structure which includes:
2096 * DDF headers - these are easy.
2097 * Controller data - a Sector describing this controller .. not that
2098 * this is a controller exactly.
2099 * Physical Disk Record - one entry per device, so
2100 * leave plenty of space.
2101 * Virtual Disk Records - again, just leave plenty of space.
2102 * This just lists VDs, doesn't give details
2103 * Config records - describes the VDs that use this disk
2104 * DiskData - describes 'this' device.
2105 * BadBlockManagement - empty
2106 * Diag Space - empty
2107 * Vendor Logs - Could we put bitmaps here?
2110 struct ddf_super
*ddf
;
2113 int max_phys_disks
, max_virt_disks
;
2114 unsigned long long sector
;
2118 struct phys_disk
*pd
;
2119 struct virtual_disk
*vd
;
2121 if (data_offset
!= INVALID_SECTORS
) {
2122 pr_err("data-offset not supported by DDF\n");
2127 return init_super_ddf_bvd(st
, info
, size
, name
, homehost
, uuid
,
2130 if (posix_memalign((void**)&ddf
, 512, sizeof(*ddf
)) != 0) {
2131 pr_err("%s could not allocate superblock\n", __func__
);
2134 memset(ddf
, 0, sizeof(*ddf
));
2135 ddf
->dlist
= NULL
; /* no physical disks yet */
2136 ddf
->conflist
= NULL
; /* No virtual disks yet */
2140 /* zeroing superblock */
2144 /* At least 32MB *must* be reserved for the ddf. So let's just
2145 * start 32MB from the end, and put the primary header there.
2146 * Don't do secondary for now.
2147 * We don't know exactly where that will be yet as it could be
2148 * different on each device. To just set up the lengths.
2152 ddf
->anchor
.magic
= DDF_HEADER_MAGIC
;
2153 make_header_guid(ddf
->anchor
.guid
);
2155 memcpy(ddf
->anchor
.revision
, DDF_REVISION_2
, 8);
2156 ddf
->anchor
.seq
= __cpu_to_be32(1);
2157 ddf
->anchor
.timestamp
= __cpu_to_be32(time(0) - DECADE
);
2158 ddf
->anchor
.openflag
= 0xFF;
2159 ddf
->anchor
.foreignflag
= 0;
2160 ddf
->anchor
.enforcegroups
= 0; /* Is this best?? */
2161 ddf
->anchor
.pad0
= 0xff;
2162 memset(ddf
->anchor
.pad1
, 0xff, 12);
2163 memset(ddf
->anchor
.header_ext
, 0xff, 32);
2164 ddf
->anchor
.primary_lba
= ~(__u64
)0;
2165 ddf
->anchor
.secondary_lba
= ~(__u64
)0;
2166 ddf
->anchor
.type
= DDF_HEADER_ANCHOR
;
2167 memset(ddf
->anchor
.pad2
, 0xff, 3);
2168 ddf
->anchor
.workspace_len
= __cpu_to_be32(32768); /* Must be reserved */
2169 ddf
->anchor
.workspace_lba
= ~(__u64
)0; /* Put this at bottom
2170 of 32M reserved.. */
2171 max_phys_disks
= 1023; /* Should be enough */
2172 ddf
->anchor
.max_pd_entries
= __cpu_to_be16(max_phys_disks
);
2173 max_virt_disks
= 255;
2174 ddf
->anchor
.max_vd_entries
= __cpu_to_be16(max_virt_disks
); /* ?? */
2175 ddf
->anchor
.max_partitions
= __cpu_to_be16(64); /* ?? */
2178 ddf
->conf_rec_len
= 1 + ROUND_UP(ddf
->mppe
* (4+8), 512)/512;
2179 ddf
->anchor
.config_record_len
= __cpu_to_be16(ddf
->conf_rec_len
);
2180 ddf
->anchor
.max_primary_element_entries
= __cpu_to_be16(ddf
->mppe
);
2181 memset(ddf
->anchor
.pad3
, 0xff, 54);
2182 /* controller sections is one sector long immediately
2183 * after the ddf header */
2185 ddf
->anchor
.controller_section_offset
= __cpu_to_be32(sector
);
2186 ddf
->anchor
.controller_section_length
= __cpu_to_be32(1);
2189 /* phys is 8 sectors after that */
2190 pdsize
= ROUND_UP(sizeof(struct phys_disk
) +
2191 sizeof(struct phys_disk_entry
)*max_phys_disks
,
2193 switch(pdsize
/512) {
2194 case 2: case 8: case 32: case 128: case 512: break;
2197 ddf
->anchor
.phys_section_offset
= __cpu_to_be32(sector
);
2198 ddf
->anchor
.phys_section_length
=
2199 __cpu_to_be32(pdsize
/512); /* max_primary_element_entries/8 */
2200 sector
+= pdsize
/512;
2202 /* virt is another 32 sectors */
2203 vdsize
= ROUND_UP(sizeof(struct virtual_disk
) +
2204 sizeof(struct virtual_entry
) * max_virt_disks
,
2206 switch(vdsize
/512) {
2207 case 2: case 8: case 32: case 128: case 512: break;
2210 ddf
->anchor
.virt_section_offset
= __cpu_to_be32(sector
);
2211 ddf
->anchor
.virt_section_length
=
2212 __cpu_to_be32(vdsize
/512); /* max_vd_entries/8 */
2213 sector
+= vdsize
/512;
2215 clen
= ddf
->conf_rec_len
* (ddf
->max_part
+1);
2216 ddf
->anchor
.config_section_offset
= __cpu_to_be32(sector
);
2217 ddf
->anchor
.config_section_length
= __cpu_to_be32(clen
);
2220 ddf
->anchor
.data_section_offset
= __cpu_to_be32(sector
);
2221 ddf
->anchor
.data_section_length
= __cpu_to_be32(1);
2224 ddf
->anchor
.bbm_section_length
= __cpu_to_be32(0);
2225 ddf
->anchor
.bbm_section_offset
= __cpu_to_be32(0xFFFFFFFF);
2226 ddf
->anchor
.diag_space_length
= __cpu_to_be32(0);
2227 ddf
->anchor
.diag_space_offset
= __cpu_to_be32(0xFFFFFFFF);
2228 ddf
->anchor
.vendor_length
= __cpu_to_be32(0);
2229 ddf
->anchor
.vendor_offset
= __cpu_to_be32(0xFFFFFFFF);
2231 memset(ddf
->anchor
.pad4
, 0xff, 256);
2233 memcpy(&ddf
->primary
, &ddf
->anchor
, 512);
2234 memcpy(&ddf
->secondary
, &ddf
->anchor
, 512);
2236 ddf
->primary
.openflag
= 1; /* I guess.. */
2237 ddf
->primary
.type
= DDF_HEADER_PRIMARY
;
2239 ddf
->secondary
.openflag
= 1; /* I guess.. */
2240 ddf
->secondary
.type
= DDF_HEADER_SECONDARY
;
2242 ddf
->active
= &ddf
->primary
;
2244 ddf
->controller
.magic
= DDF_CONTROLLER_MAGIC
;
2246 /* 24 more bytes of fiction required.
2247 * first 8 are a 'vendor-id' - "Linux-MD"
2248 * Remaining 16 are serial number.... maybe a hostname would do?
2250 memcpy(ddf
->controller
.guid
, T10
, sizeof(T10
));
2251 gethostname(hostname
, sizeof(hostname
));
2252 hostname
[sizeof(hostname
) - 1] = 0;
2253 hostlen
= strlen(hostname
);
2254 memcpy(ddf
->controller
.guid
+ 24 - hostlen
, hostname
, hostlen
);
2255 for (i
= strlen(T10
) ; i
+hostlen
< 24; i
++)
2256 ddf
->controller
.guid
[i
] = ' ';
2258 ddf
->controller
.type
.vendor_id
= __cpu_to_be16(0xDEAD);
2259 ddf
->controller
.type
.device_id
= __cpu_to_be16(0xBEEF);
2260 ddf
->controller
.type
.sub_vendor_id
= 0;
2261 ddf
->controller
.type
.sub_device_id
= 0;
2262 memcpy(ddf
->controller
.product_id
, "What Is My PID??", 16);
2263 memset(ddf
->controller
.pad
, 0xff, 8);
2264 memset(ddf
->controller
.vendor_data
, 0xff, 448);
2265 if (homehost
&& strlen(homehost
) < 440)
2266 strcpy((char*)ddf
->controller
.vendor_data
, homehost
);
2268 if (posix_memalign((void**)&pd
, 512, pdsize
) != 0) {
2269 pr_err("%s could not allocate pd\n", __func__
);
2273 ddf
->pdsize
= pdsize
;
2275 memset(pd
, 0xff, pdsize
);
2276 memset(pd
, 0, sizeof(*pd
));
2277 pd
->magic
= DDF_PHYS_RECORDS_MAGIC
;
2278 pd
->used_pdes
= __cpu_to_be16(0);
2279 pd
->max_pdes
= __cpu_to_be16(max_phys_disks
);
2280 memset(pd
->pad
, 0xff, 52);
2281 for (i
= 0; i
< max_phys_disks
; i
++)
2282 memset(pd
->entries
[i
].guid
, 0xff, DDF_GUID_LEN
);
2284 if (posix_memalign((void**)&vd
, 512, vdsize
) != 0) {
2285 pr_err("%s could not allocate vd\n", __func__
);
2289 ddf
->vdsize
= vdsize
;
2290 memset(vd
, 0, vdsize
);
2291 vd
->magic
= DDF_VIRT_RECORDS_MAGIC
;
2292 vd
->populated_vdes
= __cpu_to_be16(0);
2293 vd
->max_vdes
= __cpu_to_be16(max_virt_disks
);
2294 memset(vd
->pad
, 0xff, 52);
2296 for (i
=0; i
<max_virt_disks
; i
++)
2297 memset(&vd
->entries
[i
], 0xff, sizeof(struct virtual_entry
));
2300 ddf_set_updates_pending(ddf
);
2304 static int chunk_to_shift(int chunksize
)
2306 return ffs(chunksize
/512)-1;
2311 unsigned long long start
, size
;
2313 static int cmp_extent(const void *av
, const void *bv
)
2315 const struct extent
*a
= av
;
2316 const struct extent
*b
= bv
;
2317 if (a
->start
< b
->start
)
2319 if (a
->start
> b
->start
)
2324 static struct extent
*get_extents(struct ddf_super
*ddf
, struct dl
*dl
)
2326 /* find a list of used extents on the give physical device
2327 * (dnum) of the given ddf.
2328 * Return a malloced array of 'struct extent'
2330 * FIXME ignore DDF_Legacy devices?
2337 rv
= xmalloc(sizeof(struct extent
) * (ddf
->max_part
+ 2));
2339 for (i
= 0; i
< ddf
->max_part
; i
++) {
2340 const struct vd_config
*bvd
;
2342 struct vcl
*v
= dl
->vlist
[i
];
2344 get_pd_index_from_refnum(v
, dl
->disk
.refnum
, ddf
->mppe
,
2345 &bvd
, &ibvd
) == DDF_NOTFOUND
)
2347 rv
[n
].start
= __be64_to_cpu(LBA_OFFSET(ddf
, bvd
)[ibvd
]);
2348 rv
[n
].size
= __be64_to_cpu(bvd
->blocks
);
2351 qsort(rv
, n
, sizeof(*rv
), cmp_extent
);
2353 rv
[n
].start
= __be64_to_cpu(ddf
->phys
->entries
[dl
->pdnum
].config_size
);
2359 static int init_super_ddf_bvd(struct supertype
*st
,
2360 mdu_array_info_t
*info
,
2361 unsigned long long size
,
2362 char *name
, char *homehost
,
2363 int *uuid
, unsigned long long data_offset
)
2365 /* We are creating a BVD inside a pre-existing container.
2366 * so st->sb is already set.
2367 * We need to create a new vd_config and a new virtual_entry
2369 struct ddf_super
*ddf
= st
->sb
;
2370 unsigned int venum
, i
;
2371 struct virtual_entry
*ve
;
2373 struct vd_config
*vc
;
2375 if (find_vde_by_name(ddf
, name
) != DDF_NOTFOUND
) {
2376 pr_err("This ddf already has an array called %s\n", name
);
2379 venum
= find_unused_vde(ddf
);
2380 if (venum
== DDF_NOTFOUND
) {
2381 pr_err("Cannot find spare slot for virtual disk\n");
2384 ve
= &ddf
->virt
->entries
[venum
];
2386 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2387 * timestamp, random number
2389 make_header_guid(ve
->guid
);
2390 ve
->unit
= __cpu_to_be16(info
->md_minor
);
2392 ve
->guid_crc
= crc32(0, (unsigned char*)ddf
->anchor
.guid
, DDF_GUID_LEN
);
2394 ve
->state
= DDF_state_degraded
; /* Will be modified as devices are added */
2395 if (info
->state
& 1) /* clean */
2396 ve
->init_state
= DDF_init_full
;
2398 ve
->init_state
= DDF_init_not
;
2400 memset(ve
->pad1
, 0xff, 14);
2401 memset(ve
->name
, ' ', 16);
2403 strncpy(ve
->name
, name
, 16);
2404 ddf
->virt
->populated_vdes
=
2405 __cpu_to_be16(__be16_to_cpu(ddf
->virt
->populated_vdes
)+1);
2407 /* Now create a new vd_config */
2408 if (posix_memalign((void**)&vcl
, 512,
2409 (offsetof(struct vcl
, conf
) + ddf
->conf_rec_len
* 512)) != 0) {
2410 pr_err("%s could not allocate vd_config\n", __func__
);
2414 vcl
->block_sizes
= NULL
; /* FIXME not for CONCAT */
2417 vc
->magic
= DDF_VD_CONF_MAGIC
;
2418 memcpy(vc
->guid
, ve
->guid
, DDF_GUID_LEN
);
2419 vc
->timestamp
= __cpu_to_be32(time(0)-DECADE
);
2420 vc
->seqnum
= __cpu_to_be32(1);
2421 memset(vc
->pad0
, 0xff, 24);
2422 vc
->chunk_shift
= chunk_to_shift(info
->chunk_size
);
2423 if (layout_md2ddf(info
, vc
) == -1 ||
2424 __be16_to_cpu(vc
->prim_elmnt_count
) > ddf
->mppe
) {
2425 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2426 __func__
, info
->level
, info
->layout
, info
->raid_disks
);
2430 vc
->sec_elmnt_seq
= 0;
2431 if (alloc_other_bvds(ddf
, vcl
) != 0) {
2432 pr_err("%s could not allocate other bvds\n",
2437 vc
->blocks
= __cpu_to_be64(info
->size
* 2);
2438 vc
->array_blocks
= __cpu_to_be64(
2439 calc_array_size(info
->level
, info
->raid_disks
, info
->layout
,
2440 info
->chunk_size
, info
->size
*2));
2441 memset(vc
->pad1
, 0xff, 8);
2442 vc
->spare_refs
[0] = 0xffffffff;
2443 vc
->spare_refs
[1] = 0xffffffff;
2444 vc
->spare_refs
[2] = 0xffffffff;
2445 vc
->spare_refs
[3] = 0xffffffff;
2446 vc
->spare_refs
[4] = 0xffffffff;
2447 vc
->spare_refs
[5] = 0xffffffff;
2448 vc
->spare_refs
[6] = 0xffffffff;
2449 vc
->spare_refs
[7] = 0xffffffff;
2450 memset(vc
->cache_pol
, 0, 8);
2452 memset(vc
->pad2
, 0xff, 3);
2453 memset(vc
->pad3
, 0xff, 52);
2454 memset(vc
->pad4
, 0xff, 192);
2455 memset(vc
->v0
, 0xff, 32);
2456 memset(vc
->v1
, 0xff, 32);
2457 memset(vc
->v2
, 0xff, 16);
2458 memset(vc
->v3
, 0xff, 16);
2459 memset(vc
->vendor
, 0xff, 32);
2461 memset(vc
->phys_refnum
, 0xff, 4*ddf
->mppe
);
2462 memset(vc
->phys_refnum
+ddf
->mppe
, 0x00, 8*ddf
->mppe
);
2464 for (i
= 1; i
< vc
->sec_elmnt_count
; i
++) {
2465 memcpy(vcl
->other_bvds
[i
-1], vc
, ddf
->conf_rec_len
* 512);
2466 vcl
->other_bvds
[i
-1]->sec_elmnt_seq
= i
;
2469 vcl
->next
= ddf
->conflist
;
2470 ddf
->conflist
= vcl
;
2471 ddf
->currentconf
= vcl
;
2472 ddf_set_updates_pending(ddf
);
2476 static int get_svd_state(const struct ddf_super
*, const struct vcl
*);
2479 static void add_to_super_ddf_bvd(struct supertype
*st
,
2480 mdu_disk_info_t
*dk
, int fd
, char *devname
)
2482 /* fd and devname identify a device with-in the ddf container (st).
2483 * dk identifies a location in the new BVD.
2484 * We need to find suitable free space in that device and update
2485 * the phys_refnum and lba_offset for the newly created vd_config.
2486 * We might also want to update the type in the phys_disk
2489 * Alternately: fd == -1 and we have already chosen which device to
2490 * use and recorded in dlist->raid_disk;
2493 struct ddf_super
*ddf
= st
->sb
;
2494 struct vd_config
*vc
;
2496 unsigned long long blocks
, pos
, esize
;
2498 unsigned int raid_disk
= dk
->raid_disk
;
2501 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
2502 if (dl
->raiddisk
== dk
->raid_disk
)
2505 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
2506 if (dl
->major
== dk
->major
&&
2507 dl
->minor
== dk
->minor
)
2510 if (!dl
|| ! (dk
->state
& (1<<MD_DISK_SYNC
)))
2513 vc
= &ddf
->currentconf
->conf
;
2514 if (vc
->sec_elmnt_count
> 1) {
2515 unsigned int n
= __be16_to_cpu(vc
->prim_elmnt_count
);
2517 vc
= ddf
->currentconf
->other_bvds
[raid_disk
/ n
- 1];
2521 ex
= get_extents(ddf
, dl
);
2526 blocks
= __be64_to_cpu(vc
->blocks
);
2527 if (ddf
->currentconf
->block_sizes
)
2528 blocks
= ddf
->currentconf
->block_sizes
[dk
->raid_disk
];
2531 esize
= ex
[i
].start
- pos
;
2532 if (esize
>= blocks
)
2534 pos
= ex
[i
].start
+ ex
[i
].size
;
2536 } while (ex
[i
-1].size
);
2542 ddf
->currentdev
= dk
->raid_disk
;
2543 vc
->phys_refnum
[raid_disk
] = dl
->disk
.refnum
;
2544 LBA_OFFSET(ddf
, vc
)[raid_disk
] = __cpu_to_be64(pos
);
2546 for (i
= 0; i
< ddf
->max_part
; i
++)
2547 if (dl
->vlist
[i
] == NULL
)
2549 if (i
== ddf
->max_part
)
2551 dl
->vlist
[i
] = ddf
->currentconf
;
2556 dl
->devname
= devname
;
2558 /* Check if we can mark array as optimal yet */
2559 i
= ddf
->currentconf
->vcnum
;
2560 ddf
->virt
->entries
[i
].state
=
2561 (ddf
->virt
->entries
[i
].state
& ~DDF_state_mask
)
2562 | get_svd_state(ddf
, ddf
->currentconf
);
2563 ddf
->phys
->entries
[dl
->pdnum
].type
&= ~__cpu_to_be16(DDF_Global_Spare
);
2564 ddf
->phys
->entries
[dl
->pdnum
].type
|= __cpu_to_be16(DDF_Active_in_VD
);
2565 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
2566 __func__
, dl
->pdnum
, __be32_to_cpu(dl
->disk
.refnum
),
2567 ddf
->currentconf
->vcnum
, guid_str(vc
->guid
),
2569 ddf_set_updates_pending(ddf
);
2572 static unsigned int find_unused_pde(const struct ddf_super
*ddf
)
2575 for (i
= 0; i
< __be16_to_cpu(ddf
->phys
->max_pdes
); i
++) {
2576 if (all_ff(ddf
->phys
->entries
[i
].guid
))
2579 return DDF_NOTFOUND
;
2582 /* add a device to a container, either while creating it or while
2583 * expanding a pre-existing container
2585 static int add_to_super_ddf(struct supertype
*st
,
2586 mdu_disk_info_t
*dk
, int fd
, char *devname
,
2587 unsigned long long data_offset
)
2589 struct ddf_super
*ddf
= st
->sb
;
2593 unsigned long long size
;
2594 struct phys_disk_entry
*pde
;
2599 if (ddf
->currentconf
) {
2600 add_to_super_ddf_bvd(st
, dk
, fd
, devname
);
2604 /* This is device numbered dk->number. We need to create
2605 * a phys_disk entry and a more detailed disk_data entry.
2608 n
= find_unused_pde(ddf
);
2609 if (n
== DDF_NOTFOUND
) {
2610 pr_err("%s: No free slot in array, cannot add disk\n",
2614 pde
= &ddf
->phys
->entries
[n
];
2615 get_dev_size(fd
, NULL
, &size
);
2616 if (size
<= 32*1024*1024) {
2617 pr_err("%s: device size must be at least 32MB\n",
2623 if (posix_memalign((void**)&dd
, 512,
2624 sizeof(*dd
) + sizeof(dd
->vlist
[0]) * ddf
->max_part
) != 0) {
2625 pr_err("%s could allocate buffer for new disk, aborting\n",
2629 dd
->major
= major(stb
.st_rdev
);
2630 dd
->minor
= minor(stb
.st_rdev
);
2631 dd
->devname
= devname
;
2635 dd
->disk
.magic
= DDF_PHYS_DATA_MAGIC
;
2637 tm
= localtime(&now
);
2638 sprintf(dd
->disk
.guid
, "%8s%04d%02d%02d",
2639 T10
, tm
->tm_year
+1900, tm
->tm_mon
+1, tm
->tm_mday
);
2640 tptr
= (__u32
*)(dd
->disk
.guid
+ 16);
2641 *tptr
++ = random32();
2645 /* Cannot be bothered finding a CRC of some irrelevant details*/
2646 dd
->disk
.refnum
= random32();
2647 for (i
= __be16_to_cpu(ddf
->active
->max_pd_entries
);
2649 if (ddf
->phys
->entries
[i
-1].refnum
== dd
->disk
.refnum
)
2653 dd
->disk
.forced_ref
= 1;
2654 dd
->disk
.forced_guid
= 1;
2655 memset(dd
->disk
.vendor
, ' ', 32);
2656 memcpy(dd
->disk
.vendor
, "Linux", 5);
2657 memset(dd
->disk
.pad
, 0xff, 442);
2658 for (i
= 0; i
< ddf
->max_part
; i
++)
2659 dd
->vlist
[i
] = NULL
;
2663 if (st
->update_tail
) {
2664 int len
= (sizeof(struct phys_disk
) +
2665 sizeof(struct phys_disk_entry
));
2666 struct phys_disk
*pd
;
2669 pd
->magic
= DDF_PHYS_RECORDS_MAGIC
;
2670 pd
->used_pdes
= __cpu_to_be16(n
);
2671 pde
= &pd
->entries
[0];
2674 ddf
->phys
->used_pdes
= __cpu_to_be16(
2675 1 + __be16_to_cpu(ddf
->phys
->used_pdes
));
2677 memcpy(pde
->guid
, dd
->disk
.guid
, DDF_GUID_LEN
);
2678 pde
->refnum
= dd
->disk
.refnum
;
2679 pde
->type
= __cpu_to_be16(DDF_Forced_PD_GUID
| DDF_Global_Spare
);
2680 pde
->state
= __cpu_to_be16(DDF_Online
);
2683 * If there is already a device in dlist, try to reserve the same
2684 * amount of workspace. Otherwise, use 32MB.
2685 * We checked disk size above already.
2687 #define __calc_lba(new, old, lba, mb) do { \
2688 unsigned long long dif; \
2689 if ((old) != NULL) \
2690 dif = (old)->size - __be64_to_cpu((old)->lba); \
2692 dif = (new)->size; \
2693 if ((new)->size > dif) \
2694 (new)->lba = __cpu_to_be64((new)->size - dif); \
2696 (new)->lba = __cpu_to_be64((new)->size - (mb*1024*2)); \
2698 __calc_lba(dd
, ddf
->dlist
, workspace_lba
, 32);
2699 __calc_lba(dd
, ddf
->dlist
, primary_lba
, 16);
2700 __calc_lba(dd
, ddf
->dlist
, secondary_lba
, 32);
2701 pde
->config_size
= dd
->workspace_lba
;
2703 sprintf(pde
->path
, "%17.17s","Information: nil") ;
2704 memset(pde
->pad
, 0xff, 6);
2706 if (st
->update_tail
) {
2707 dd
->next
= ddf
->add_list
;
2710 dd
->next
= ddf
->dlist
;
2712 ddf_set_updates_pending(ddf
);
2718 static int remove_from_super_ddf(struct supertype
*st
, mdu_disk_info_t
*dk
)
2720 struct ddf_super
*ddf
= st
->sb
;
2723 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2724 * disappeared from the container.
2725 * We need to arrange that it disappears from the metadata and
2726 * internal data structures too.
2727 * Most of the work is done by ddf_process_update which edits
2728 * the metadata and closes the file handle and attaches the memory
2729 * where free_updates will free it.
2731 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
2732 if (dl
->major
== dk
->major
&&
2733 dl
->minor
== dk
->minor
)
2738 if (st
->update_tail
) {
2739 int len
= (sizeof(struct phys_disk
) +
2740 sizeof(struct phys_disk_entry
));
2741 struct phys_disk
*pd
;
2744 pd
->magic
= DDF_PHYS_RECORDS_MAGIC
;
2745 pd
->used_pdes
= __cpu_to_be16(dl
->pdnum
);
2746 pd
->entries
[0].state
= __cpu_to_be16(DDF_Missing
);
2747 append_metadata_update(st
, pd
, len
);
2753 * This is the write_init_super method for a ddf container. It is
2754 * called when creating a container or adding another device to a
2757 #define NULL_CONF_SZ 4096
2759 static char *null_aligned
;
2760 static int __write_ddf_structure(struct dl
*d
, struct ddf_super
*ddf
, __u8 type
)
2762 unsigned long long sector
;
2763 struct ddf_header
*header
;
2764 int fd
, i
, n_config
, conf_size
;
2767 if (null_aligned
== NULL
) {
2768 if (posix_memalign((void **)&null_aligned
, 4096, NULL_CONF_SZ
)
2771 memset(null_aligned
, 0xff, NULL_CONF_SZ
);
2777 case DDF_HEADER_PRIMARY
:
2778 header
= &ddf
->primary
;
2779 sector
= __be64_to_cpu(header
->primary_lba
);
2781 case DDF_HEADER_SECONDARY
:
2782 header
= &ddf
->secondary
;
2783 sector
= __be64_to_cpu(header
->secondary_lba
);
2789 header
->type
= type
;
2790 header
->openflag
= 1;
2791 header
->crc
= calc_crc(header
, 512);
2793 lseek64(fd
, sector
<<9, 0);
2794 if (write(fd
, header
, 512) < 0)
2797 ddf
->controller
.crc
= calc_crc(&ddf
->controller
, 512);
2798 if (write(fd
, &ddf
->controller
, 512) < 0)
2801 ddf
->phys
->crc
= calc_crc(ddf
->phys
, ddf
->pdsize
);
2802 if (write(fd
, ddf
->phys
, ddf
->pdsize
) < 0)
2804 ddf
->virt
->crc
= calc_crc(ddf
->virt
, ddf
->vdsize
);
2805 if (write(fd
, ddf
->virt
, ddf
->vdsize
) < 0)
2808 /* Now write lots of config records. */
2809 n_config
= ddf
->max_part
;
2810 conf_size
= ddf
->conf_rec_len
* 512;
2811 for (i
= 0 ; i
<= n_config
; i
++) {
2813 struct vd_config
*vdc
= NULL
;
2814 if (i
== n_config
) {
2815 c
= (struct vcl
*)d
->spare
;
2822 get_pd_index_from_refnum(
2825 (const struct vd_config
**)&vdc
,
2829 dprintf("writing conf record %i on disk %08x for %s/%u\n",
2830 i
, __be32_to_cpu(d
->disk
.refnum
),
2831 guid_str(vdc
->guid
),
2832 vdc
->sec_elmnt_seq
);
2833 vdc
->seqnum
= header
->seq
;
2834 vdc
->crc
= calc_crc(vdc
, conf_size
);
2835 if (write(fd
, vdc
, conf_size
) < 0)
2838 unsigned int togo
= conf_size
;
2839 while (togo
> NULL_CONF_SZ
) {
2840 if (write(fd
, null_aligned
, NULL_CONF_SZ
) < 0)
2842 togo
-= NULL_CONF_SZ
;
2844 if (write(fd
, null_aligned
, togo
) < 0)
2851 d
->disk
.crc
= calc_crc(&d
->disk
, 512);
2852 if (write(fd
, &d
->disk
, 512) < 0)
2857 header
->openflag
= 0;
2858 header
->crc
= calc_crc(header
, 512);
2860 lseek64(fd
, sector
<<9, 0);
2861 if (write(fd
, header
, 512) < 0)
2867 static int _write_super_to_disk(struct ddf_super
*ddf
, struct dl
*d
)
2869 unsigned long long size
;
2874 /* We need to fill in the primary, (secondary) and workspace
2875 * lba's in the headers, set their checksums,
2876 * Also checksum phys, virt....
2878 * Then write everything out, finally the anchor is written.
2880 get_dev_size(fd
, NULL
, &size
);
2882 if (d
->workspace_lba
!= 0)
2883 ddf
->anchor
.workspace_lba
= d
->workspace_lba
;
2885 ddf
->anchor
.workspace_lba
=
2886 __cpu_to_be64(size
- 32*1024*2);
2887 if (d
->primary_lba
!= 0)
2888 ddf
->anchor
.primary_lba
= d
->primary_lba
;
2890 ddf
->anchor
.primary_lba
=
2891 __cpu_to_be64(size
- 16*1024*2);
2892 if (d
->secondary_lba
!= 0)
2893 ddf
->anchor
.secondary_lba
= d
->secondary_lba
;
2895 ddf
->anchor
.secondary_lba
=
2896 __cpu_to_be64(size
- 32*1024*2);
2897 ddf
->anchor
.seq
= ddf
->active
->seq
;
2898 memcpy(&ddf
->primary
, &ddf
->anchor
, 512);
2899 memcpy(&ddf
->secondary
, &ddf
->anchor
, 512);
2901 ddf
->anchor
.openflag
= 0xFF; /* 'open' means nothing */
2902 ddf
->anchor
.seq
= 0xFFFFFFFF; /* no sequencing in anchor */
2903 ddf
->anchor
.crc
= calc_crc(&ddf
->anchor
, 512);
2905 if (!__write_ddf_structure(d
, ddf
, DDF_HEADER_PRIMARY
))
2908 if (!__write_ddf_structure(d
, ddf
, DDF_HEADER_SECONDARY
))
2911 lseek64(fd
, (size
-1)*512, SEEK_SET
);
2912 if (write(fd
, &ddf
->anchor
, 512) < 0)
2918 static int __write_init_super_ddf(struct supertype
*st
)
2920 struct ddf_super
*ddf
= st
->sb
;
2925 pr_state(ddf
, __func__
);
2927 /* try to write updated metadata,
2928 * if we catch a failure move on to the next disk
2930 for (d
= ddf
->dlist
; d
; d
=d
->next
) {
2932 successes
+= _write_super_to_disk(ddf
, d
);
2935 return attempts
!= successes
;
2938 static int write_init_super_ddf(struct supertype
*st
)
2940 struct ddf_super
*ddf
= st
->sb
;
2941 struct vcl
*currentconf
= ddf
->currentconf
;
2943 /* we are done with currentconf reset it to point st at the container */
2944 ddf
->currentconf
= NULL
;
2946 if (st
->update_tail
) {
2947 /* queue the virtual_disk and vd_config as metadata updates */
2948 struct virtual_disk
*vd
;
2949 struct vd_config
*vc
;
2954 int len
= (sizeof(struct phys_disk
) +
2955 sizeof(struct phys_disk_entry
));
2957 /* adding a disk to the container. */
2961 append_metadata_update(st
, ddf
->add_list
->mdupdate
, len
);
2962 ddf
->add_list
->mdupdate
= NULL
;
2966 /* Newly created VD */
2968 /* First the virtual disk. We have a slightly fake header */
2969 len
= sizeof(struct virtual_disk
) + sizeof(struct virtual_entry
);
2972 vd
->entries
[0] = ddf
->virt
->entries
[currentconf
->vcnum
];
2973 vd
->populated_vdes
= __cpu_to_be16(currentconf
->vcnum
);
2974 append_metadata_update(st
, vd
, len
);
2976 /* Then the vd_config */
2977 len
= ddf
->conf_rec_len
* 512;
2978 tlen
= len
* currentconf
->conf
.sec_elmnt_count
;
2980 memcpy(vc
, ¤tconf
->conf
, len
);
2981 for (i
= 1; i
< currentconf
->conf
.sec_elmnt_count
; i
++)
2982 memcpy((char *)vc
+ i
*len
, currentconf
->other_bvds
[i
-1],
2984 append_metadata_update(st
, vc
, tlen
);
2986 /* FIXME I need to close the fds! */
2991 for (d
= ddf
->dlist
; d
; d
=d
->next
)
2992 while (Kill(d
->devname
, NULL
, 0, -1, 1) == 0);
2993 return __write_init_super_ddf(st
);
2999 static __u64
avail_size_ddf(struct supertype
*st
, __u64 devsize
,
3000 unsigned long long data_offset
)
3002 /* We must reserve the last 32Meg */
3003 if (devsize
<= 32*1024*2)
3005 return devsize
- 32*1024*2;
3010 static int reserve_space(struct supertype
*st
, int raiddisks
,
3011 unsigned long long size
, int chunk
,
3012 unsigned long long *freesize
)
3014 /* Find 'raiddisks' spare extents at least 'size' big (but
3015 * only caring about multiples of 'chunk') and remember
3017 * If the cannot be found, fail.
3020 struct ddf_super
*ddf
= st
->sb
;
3023 for (dl
= ddf
->dlist
; dl
; dl
=dl
->next
) {
3027 /* Now find largest extent on each device */
3028 for (dl
= ddf
->dlist
; dl
; dl
=dl
->next
) {
3029 struct extent
*e
= get_extents(ddf
, dl
);
3030 unsigned long long pos
= 0;
3033 unsigned long long minsize
= size
;
3041 unsigned long long esize
;
3042 esize
= e
[i
].start
- pos
;
3043 if (esize
>= minsize
) {
3047 pos
= e
[i
].start
+ e
[i
].size
;
3049 } while (e
[i
-1].size
);
3052 dl
->esize
= minsize
;
3056 if (cnt
< raiddisks
) {
3057 pr_err("not enough devices with space to create array.\n");
3058 return 0; /* No enough free spaces large enough */
3061 /* choose the largest size of which there are at least 'raiddisk' */
3062 for (dl
= ddf
->dlist
; dl
; dl
=dl
->next
) {
3064 if (dl
->esize
<= size
)
3066 /* This is bigger than 'size', see if there are enough */
3068 for (dl2
= ddf
->dlist
; dl2
; dl2
=dl2
->next
)
3069 if (dl2
->esize
>= dl
->esize
)
3071 if (cnt
>= raiddisks
)
3075 size
= size
/ chunk
;
3080 pr_err("not enough spare devices to create array.\n");
3084 /* We have a 'size' of which there are enough spaces.
3085 * We simply do a first-fit */
3087 for (dl
= ddf
->dlist
; dl
&& cnt
< raiddisks
; dl
=dl
->next
) {
3088 if (dl
->esize
< size
)
3098 validate_geometry_ddf_container(struct supertype
*st
,
3099 int level
, int layout
, int raiddisks
,
3100 int chunk
, unsigned long long size
,
3101 unsigned long long data_offset
,
3102 char *dev
, unsigned long long *freesize
,
3105 static int validate_geometry_ddf_bvd(struct supertype
*st
,
3106 int level
, int layout
, int raiddisks
,
3107 int *chunk
, unsigned long long size
,
3108 unsigned long long data_offset
,
3109 char *dev
, unsigned long long *freesize
,
3112 static int validate_geometry_ddf(struct supertype
*st
,
3113 int level
, int layout
, int raiddisks
,
3114 int *chunk
, unsigned long long size
,
3115 unsigned long long data_offset
,
3116 char *dev
, unsigned long long *freesize
,
3123 /* ddf potentially supports lots of things, but it depends on
3124 * what devices are offered (and maybe kernel version?)
3125 * If given unused devices, we will make a container.
3126 * If given devices in a container, we will make a BVD.
3127 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3130 if (chunk
&& *chunk
== UnSet
)
3131 *chunk
= DEFAULT_CHUNK
;
3133 if (level
== -1000000) level
= LEVEL_CONTAINER
;
3134 if (level
== LEVEL_CONTAINER
) {
3135 /* Must be a fresh device to add to a container */
3136 return validate_geometry_ddf_container(st
, level
, layout
,
3137 raiddisks
, chunk
?*chunk
:0,
3138 size
, data_offset
, dev
,
3144 mdu_array_info_t array
= {
3145 .level
= level
, .layout
= layout
,
3146 .raid_disks
= raiddisks
3148 struct vd_config conf
;
3149 if (layout_md2ddf(&array
, &conf
) == -1) {
3151 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3152 level
, layout
, raiddisks
);
3155 /* Should check layout? etc */
3157 if (st
->sb
&& freesize
) {
3158 /* --create was given a container to create in.
3159 * So we need to check that there are enough
3160 * free spaces and return the amount of space.
3161 * We may as well remember which drives were
3162 * chosen so that add_to_super/getinfo_super
3165 return reserve_space(st
, raiddisks
, size
, chunk
?*chunk
:0, freesize
);
3171 /* A container has already been opened, so we are
3172 * creating in there. Maybe a BVD, maybe an SVD.
3173 * Should make a distinction one day.
3175 return validate_geometry_ddf_bvd(st
, level
, layout
, raiddisks
,
3176 chunk
, size
, data_offset
, dev
,
3180 /* This is the first device for the array.
3181 * If it is a container, we read it in and do automagic allocations,
3182 * no other devices should be given.
3183 * Otherwise it must be a member device of a container, and we
3184 * do manual allocation.
3185 * Later we should check for a BVD and make an SVD.
3187 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
3189 sra
= sysfs_read(fd
, NULL
, GET_VERSION
);
3191 if (sra
&& sra
->array
.major_version
== -1 &&
3192 strcmp(sra
->text_version
, "ddf") == 0) {
3195 /* find space for 'n' devices. */
3196 /* remember the devices */
3197 /* Somehow return the fact that we have enough */
3201 pr_err("ddf: Cannot create this array "
3202 "on device %s - a container is required.\n",
3206 if (errno
!= EBUSY
|| (fd
= open(dev
, O_RDONLY
, 0)) < 0) {
3208 pr_err("ddf: Cannot open %s: %s\n",
3209 dev
, strerror(errno
));
3212 /* Well, it is in use by someone, maybe a 'ddf' container. */
3213 cfd
= open_container(fd
);
3217 pr_err("ddf: Cannot use %s: %s\n",
3218 dev
, strerror(EBUSY
));
3221 sra
= sysfs_read(cfd
, NULL
, GET_VERSION
);
3223 if (sra
&& sra
->array
.major_version
== -1 &&
3224 strcmp(sra
->text_version
, "ddf") == 0) {
3225 /* This is a member of a ddf container. Load the container
3226 * and try to create a bvd
3228 struct ddf_super
*ddf
;
3229 if (load_super_ddf_all(st
, cfd
, (void **)&ddf
, NULL
) == 0) {
3231 strcpy(st
->container_devnm
, fd2devnm(cfd
));
3233 return validate_geometry_ddf_bvd(st
, level
, layout
,
3234 raiddisks
, chunk
, size
,
3240 } else /* device may belong to a different container */
3247 validate_geometry_ddf_container(struct supertype
*st
,
3248 int level
, int layout
, int raiddisks
,
3249 int chunk
, unsigned long long size
,
3250 unsigned long long data_offset
,
3251 char *dev
, unsigned long long *freesize
,
3255 unsigned long long ldsize
;
3257 if (level
!= LEVEL_CONTAINER
)
3262 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
3265 pr_err("ddf: Cannot open %s: %s\n",
3266 dev
, strerror(errno
));
3269 if (!get_dev_size(fd
, dev
, &ldsize
)) {
3275 *freesize
= avail_size_ddf(st
, ldsize
>> 9, INVALID_SECTORS
);
3282 static int validate_geometry_ddf_bvd(struct supertype
*st
,
3283 int level
, int layout
, int raiddisks
,
3284 int *chunk
, unsigned long long size
,
3285 unsigned long long data_offset
,
3286 char *dev
, unsigned long long *freesize
,
3290 struct ddf_super
*ddf
= st
->sb
;
3292 unsigned long long pos
= 0;
3293 unsigned long long maxsize
;
3296 /* ddf/bvd supports lots of things, but not containers */
3297 if (level
== LEVEL_CONTAINER
) {
3299 pr_err("DDF cannot create a container within an container\n");
3302 /* We must have the container info already read in. */
3307 /* General test: make sure there is space for
3308 * 'raiddisks' device extents of size 'size'.
3310 unsigned long long minsize
= size
;
3314 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
3320 e
= get_extents(ddf
, dl
);
3323 unsigned long long esize
;
3324 esize
= e
[i
].start
- pos
;
3325 if (esize
>= minsize
)
3327 pos
= e
[i
].start
+ e
[i
].size
;
3329 } while (e
[i
-1].size
);
3334 if (dcnt
< raiddisks
) {
3336 pr_err("ddf: Not enough devices with "
3337 "space for this array (%d < %d)\n",
3343 /* This device must be a member of the set */
3344 if (stat(dev
, &stb
) < 0)
3346 if ((S_IFMT
& stb
.st_mode
) != S_IFBLK
)
3348 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
) {
3349 if (dl
->major
== (int)major(stb
.st_rdev
) &&
3350 dl
->minor
== (int)minor(stb
.st_rdev
))
3355 pr_err("ddf: %s is not in the "
3360 e
= get_extents(ddf
, dl
);
3364 unsigned long long esize
;
3365 esize
= e
[i
].start
- pos
;
3366 if (esize
>= maxsize
)
3368 pos
= e
[i
].start
+ e
[i
].size
;
3370 } while (e
[i
-1].size
);
3371 *freesize
= maxsize
;
3377 static int load_super_ddf_all(struct supertype
*st
, int fd
,
3378 void **sbp
, char *devname
)
3381 struct ddf_super
*super
;
3382 struct mdinfo
*sd
, *best
= NULL
;
3388 sra
= sysfs_read(fd
, 0, GET_LEVEL
|GET_VERSION
|GET_DEVS
|GET_STATE
);
3391 if (sra
->array
.major_version
!= -1 ||
3392 sra
->array
.minor_version
!= -2 ||
3393 strcmp(sra
->text_version
, "ddf") != 0)
3396 if (posix_memalign((void**)&super
, 512, sizeof(*super
)) != 0)
3398 memset(super
, 0, sizeof(*super
));
3400 /* first, try each device, and choose the best ddf */
3401 for (sd
= sra
->devs
; sd
; sd
= sd
->next
) {
3403 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
3404 dfd
= dev_open(nm
, O_RDONLY
);
3407 rv
= load_ddf_headers(dfd
, super
, NULL
);
3410 seq
= __be32_to_cpu(super
->active
->seq
);
3411 if (super
->active
->openflag
)
3413 if (!best
|| seq
> bestseq
) {
3421 /* OK, load this ddf */
3422 sprintf(nm
, "%d:%d", best
->disk
.major
, best
->disk
.minor
);
3423 dfd
= dev_open(nm
, O_RDONLY
);
3426 load_ddf_headers(dfd
, super
, NULL
);
3427 load_ddf_global(dfd
, super
, NULL
);
3429 /* Now we need the device-local bits */
3430 for (sd
= sra
->devs
; sd
; sd
= sd
->next
) {
3433 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
3434 dfd
= dev_open(nm
, O_RDWR
);
3437 rv
= load_ddf_headers(dfd
, super
, NULL
);
3439 rv
= load_ddf_local(dfd
, super
, NULL
, 1);
3445 if (st
->ss
== NULL
) {
3446 st
->ss
= &super_ddf
;
3447 st
->minor_version
= 0;
3450 strcpy(st
->container_devnm
, fd2devnm(fd
));
3454 static int load_container_ddf(struct supertype
*st
, int fd
,
3457 return load_super_ddf_all(st
, fd
, &st
->sb
, devname
);
3460 #endif /* MDASSEMBLE */
3462 static int check_secondary(const struct vcl
*vc
)
3464 const struct vd_config
*conf
= &vc
->conf
;
3467 /* The only DDF secondary RAID level md can support is
3468 * RAID 10, if the stripe sizes and Basic volume sizes
3470 * Other configurations could in theory be supported by exposing
3471 * the BVDs to user space and using device mapper for the secondary
3472 * mapping. So far we don't support that.
3475 __u64 sec_elements
[4] = {0, 0, 0, 0};
3476 #define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3477 #define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3479 if (vc
->other_bvds
== NULL
) {
3480 pr_err("No BVDs for secondary RAID found\n");
3483 if (conf
->prl
!= DDF_RAID1
) {
3484 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3487 if (conf
->srl
!= DDF_2STRIPED
&& conf
->srl
!= DDF_2SPANNED
) {
3488 pr_err("Secondary RAID level %d is unsupported\n",
3492 __set_sec_seen(conf
->sec_elmnt_seq
);
3493 for (i
= 0; i
< conf
->sec_elmnt_count
-1; i
++) {
3494 const struct vd_config
*bvd
= vc
->other_bvds
[i
];
3495 if (bvd
->sec_elmnt_seq
== DDF_UNUSED_BVD
)
3497 if (bvd
->srl
!= conf
->srl
) {
3498 pr_err("Inconsistent secondary RAID level across BVDs\n");
3501 if (bvd
->prl
!= conf
->prl
) {
3502 pr_err("Different RAID levels for BVDs are unsupported\n");
3505 if (bvd
->prim_elmnt_count
!= conf
->prim_elmnt_count
) {
3506 pr_err("All BVDs must have the same number of primary elements\n");
3509 if (bvd
->chunk_shift
!= conf
->chunk_shift
) {
3510 pr_err("Different strip sizes for BVDs are unsupported\n");
3513 if (bvd
->array_blocks
!= conf
->array_blocks
) {
3514 pr_err("Different BVD sizes are unsupported\n");
3517 __set_sec_seen(bvd
->sec_elmnt_seq
);
3519 for (i
= 0; i
< conf
->sec_elmnt_count
; i
++) {
3520 if (!__was_sec_seen(i
)) {
3521 pr_err("BVD %d is missing\n", i
);
3528 static unsigned int get_pd_index_from_refnum(const struct vcl
*vc
,
3529 __u32 refnum
, unsigned int nmax
,
3530 const struct vd_config
**bvd
,
3533 unsigned int i
, j
, n
, sec
, cnt
;
3535 cnt
= __be16_to_cpu(vc
->conf
.prim_elmnt_count
);
3536 sec
= (vc
->conf
.sec_elmnt_count
== 1 ? 0 : vc
->conf
.sec_elmnt_seq
);
3538 for (i
= 0, j
= 0 ; i
< nmax
; i
++) {
3539 /* j counts valid entries for this BVD */
3540 if (vc
->conf
.phys_refnum
[i
] != 0xffffffff)
3542 if (vc
->conf
.phys_refnum
[i
] == refnum
) {
3545 return sec
* cnt
+ j
- 1;
3548 if (vc
->other_bvds
== NULL
)
3551 for (n
= 1; n
< vc
->conf
.sec_elmnt_count
; n
++) {
3552 struct vd_config
*vd
= vc
->other_bvds
[n
-1];
3553 sec
= vd
->sec_elmnt_seq
;
3554 if (sec
== DDF_UNUSED_BVD
)
3556 for (i
= 0, j
= 0 ; i
< nmax
; i
++) {
3557 if (vd
->phys_refnum
[i
] != 0xffffffff)
3559 if (vd
->phys_refnum
[i
] == refnum
) {
3562 return sec
* cnt
+ j
- 1;
3568 return DDF_NOTFOUND
;
3571 static struct mdinfo
*container_content_ddf(struct supertype
*st
, char *subarray
)
3573 /* Given a container loaded by load_super_ddf_all,
3574 * extract information about all the arrays into
3577 * For each vcl in conflist: create an mdinfo, fill it in,
3578 * then look for matching devices (phys_refnum) in dlist
3579 * and create appropriate device mdinfo.
3581 struct ddf_super
*ddf
= st
->sb
;
3582 struct mdinfo
*rest
= NULL
;
3585 for (vc
= ddf
->conflist
; vc
; vc
=vc
->next
)
3589 struct mdinfo
*this;
3595 (strtoul(subarray
, &ep
, 10) != vc
->vcnum
||
3599 if (vc
->conf
.sec_elmnt_count
> 1) {
3600 if (check_secondary(vc
) != 0)
3604 this = xcalloc(1, sizeof(*this));
3608 if (layout_ddf2md(&vc
->conf
, &this->array
))
3610 this->array
.md_minor
= -1;
3611 this->array
.major_version
= -1;
3612 this->array
.minor_version
= -2;
3613 cptr
= (__u32
*)(vc
->conf
.guid
+ 16);
3614 this->array
.ctime
= DECADE
+ __be32_to_cpu(*cptr
);
3615 this->array
.utime
= DECADE
+
3616 __be32_to_cpu(vc
->conf
.timestamp
);
3617 this->array
.chunk_size
= 512 << vc
->conf
.chunk_shift
;
3620 if ((ddf
->virt
->entries
[i
].state
& DDF_state_inconsistent
) ||
3621 (ddf
->virt
->entries
[i
].init_state
& DDF_initstate_mask
) !=
3623 this->array
.state
= 0;
3624 this->resync_start
= 0;
3626 this->array
.state
= 1;
3627 this->resync_start
= MaxSector
;
3629 memcpy(this->name
, ddf
->virt
->entries
[i
].name
, 16);
3632 if (this->name
[j
] == ' ')
3635 memset(this->uuid
, 0, sizeof(this->uuid
));
3636 this->component_size
= __be64_to_cpu(vc
->conf
.blocks
);
3637 this->array
.size
= this->component_size
/ 2;
3638 this->container_member
= i
;
3640 ddf
->currentconf
= vc
;
3641 uuid_from_super_ddf(st
, this->uuid
);
3643 ddf
->currentconf
= NULL
;
3645 sprintf(this->text_version
, "/%s/%d",
3646 st
->container_devnm
, this->container_member
);
3648 for (pd
= 0; pd
< __be16_to_cpu(ddf
->phys
->used_pdes
); pd
++) {
3651 const struct vd_config
*bvd
;
3655 if (ddf
->phys
->entries
[pd
].refnum
== 0xFFFFFFFF)
3658 stt
= __be16_to_cpu(ddf
->phys
->entries
[pd
].state
);
3659 if ((stt
& (DDF_Online
|DDF_Failed
|DDF_Rebuilding
))
3663 i
= get_pd_index_from_refnum(
3664 vc
, ddf
->phys
->entries
[pd
].refnum
,
3665 ddf
->mppe
, &bvd
, &iphys
);
3666 if (i
== DDF_NOTFOUND
)
3669 this->array
.working_disks
++;
3671 for (d
= ddf
->dlist
; d
; d
=d
->next
)
3672 if (d
->disk
.refnum
==
3673 ddf
->phys
->entries
[pd
].refnum
)
3676 /* Haven't found that one yet, maybe there are others */
3679 dev
= xcalloc(1, sizeof(*dev
));
3680 dev
->next
= this->devs
;
3683 dev
->disk
.number
= __be32_to_cpu(d
->disk
.refnum
);
3684 dev
->disk
.major
= d
->major
;
3685 dev
->disk
.minor
= d
->minor
;
3686 dev
->disk
.raid_disk
= i
;
3687 dev
->disk
.state
= (1<<MD_DISK_SYNC
)|(1<<MD_DISK_ACTIVE
);
3688 dev
->recovery_start
= MaxSector
;
3690 dev
->events
= __be32_to_cpu(ddf
->primary
.seq
);
3692 __be64_to_cpu(LBA_OFFSET(ddf
, bvd
)[iphys
]);
3693 dev
->component_size
= __be64_to_cpu(bvd
->blocks
);
3695 strcpy(dev
->name
, d
->devname
);
3701 static int store_super_ddf(struct supertype
*st
, int fd
)
3703 struct ddf_super
*ddf
= st
->sb
;
3704 unsigned long long dsize
;
3711 if (!get_dev_size(fd
, NULL
, &dsize
))
3714 if (ddf
->dlist
|| ddf
->conflist
) {
3719 if (fstat(fd
, &sta
) == -1 || !S_ISBLK(sta
.st_mode
)) {
3720 pr_err("%s: file descriptor for invalid device\n",
3724 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
3725 if (dl
->major
== (int)major(sta
.st_rdev
) &&
3726 dl
->minor
== (int)minor(sta
.st_rdev
))
3729 pr_err("%s: couldn't find disk %d/%d\n", __func__
,
3730 (int)major(sta
.st_rdev
),
3731 (int)minor(sta
.st_rdev
));
3736 ret
= (_write_super_to_disk(ddf
, dl
) != 1);
3741 if (posix_memalign(&buf
, 512, 512) != 0)
3743 memset(buf
, 0, 512);
3745 lseek64(fd
, dsize
-512, 0);
3746 rc
= write(fd
, buf
, 512);
3753 static int compare_super_ddf(struct supertype
*st
, struct supertype
*tst
)
3757 * 0 same, or first was empty, and second was copied
3758 * 1 second had wrong number
3760 * 3 wrong other info
3762 struct ddf_super
*first
= st
->sb
;
3763 struct ddf_super
*second
= tst
->sb
;
3764 struct dl
*dl1
, *dl2
;
3765 struct vcl
*vl1
, *vl2
;
3766 unsigned int max_vds
, max_pds
, pd
, vd
;
3774 if (memcmp(first
->anchor
.guid
, second
->anchor
.guid
, DDF_GUID_LEN
) != 0)
3777 if (first
->anchor
.seq
!= second
->anchor
.seq
) {
3778 dprintf("%s: sequence number mismatch %u/%u\n", __func__
,
3779 __be32_to_cpu(first
->anchor
.seq
),
3780 __be32_to_cpu(second
->anchor
.seq
));
3783 if (first
->max_part
!= second
->max_part
||
3784 first
->phys
->used_pdes
!= second
->phys
->used_pdes
||
3785 first
->virt
->populated_vdes
!= second
->virt
->populated_vdes
) {
3786 dprintf("%s: PD/VD number mismatch\n", __func__
);
3790 max_pds
= __be16_to_cpu(first
->phys
->used_pdes
);
3791 for (dl2
= second
->dlist
; dl2
; dl2
= dl2
->next
) {
3792 for (pd
= 0; pd
< max_pds
; pd
++)
3793 if (first
->phys
->entries
[pd
].refnum
== dl2
->disk
.refnum
)
3795 if (pd
== max_pds
) {
3796 dprintf("%s: no match for disk %08x\n", __func__
,
3797 __be32_to_cpu(dl2
->disk
.refnum
));
3802 max_vds
= __be16_to_cpu(first
->active
->max_vd_entries
);
3803 for (vl2
= second
->conflist
; vl2
; vl2
= vl2
->next
) {
3804 if (vl2
->conf
.magic
!= DDF_VD_CONF_MAGIC
)
3806 for (vd
= 0; vd
< max_vds
; vd
++)
3807 if (!memcmp(first
->virt
->entries
[vd
].guid
,
3808 vl2
->conf
.guid
, DDF_GUID_LEN
))
3810 if (vd
== max_vds
) {
3811 dprintf("%s: no match for VD config\n", __func__
);
3815 /* FIXME should I look at anything else? */
3818 At this point we are fairly sure that the meta data matches.
3819 But the new disk may contain additional local data.
3820 Add it to the super block.
3822 for (vl2
= second
->conflist
; vl2
; vl2
= vl2
->next
) {
3823 for (vl1
= first
->conflist
; vl1
; vl1
= vl1
->next
)
3824 if (!memcmp(vl1
->conf
.guid
, vl2
->conf
.guid
,
3828 if (vl1
->other_bvds
!= NULL
&&
3829 vl1
->conf
.sec_elmnt_seq
!=
3830 vl2
->conf
.sec_elmnt_seq
) {
3831 dprintf("%s: adding BVD %u\n", __func__
,
3832 vl2
->conf
.sec_elmnt_seq
);
3833 add_other_bvd(vl1
, &vl2
->conf
,
3834 first
->conf_rec_len
*512);
3839 if (posix_memalign((void **)&vl1
, 512,
3840 (first
->conf_rec_len
*512 +
3841 offsetof(struct vcl
, conf
))) != 0) {
3842 pr_err("%s could not allocate vcl buf\n",
3847 vl1
->next
= first
->conflist
;
3848 vl1
->block_sizes
= NULL
;
3849 memcpy(&vl1
->conf
, &vl2
->conf
, first
->conf_rec_len
*512);
3850 if (alloc_other_bvds(first
, vl1
) != 0) {
3851 pr_err("%s could not allocate other bvds\n",
3856 for (vd
= 0; vd
< max_vds
; vd
++)
3857 if (!memcmp(first
->virt
->entries
[vd
].guid
,
3858 vl1
->conf
.guid
, DDF_GUID_LEN
))
3861 dprintf("%s: added config for VD %u\n", __func__
, vl1
->vcnum
);
3862 first
->conflist
= vl1
;
3865 for (dl2
= second
->dlist
; dl2
; dl2
= dl2
->next
) {
3866 for (dl1
= first
->dlist
; dl1
; dl1
= dl1
->next
)
3867 if (dl1
->disk
.refnum
== dl2
->disk
.refnum
)
3872 if (posix_memalign((void **)&dl1
, 512,
3873 sizeof(*dl1
) + (first
->max_part
) * sizeof(dl1
->vlist
[0]))
3875 pr_err("%s could not allocate disk info buffer\n",
3879 memcpy(dl1
, dl2
, sizeof(*dl1
));
3880 dl1
->mdupdate
= NULL
;
3881 dl1
->next
= first
->dlist
;
3883 for (pd
= 0; pd
< max_pds
; pd
++)
3884 if (first
->phys
->entries
[pd
].refnum
== dl1
->disk
.refnum
)
3888 if (posix_memalign((void **)&dl1
->spare
, 512,
3889 first
->conf_rec_len
*512) != 0) {
3890 pr_err("%s could not allocate spare info buf\n",
3894 memcpy(dl1
->spare
, dl2
->spare
, first
->conf_rec_len
*512);
3896 for (vd
= 0 ; vd
< first
->max_part
; vd
++) {
3897 if (!dl2
->vlist
[vd
]) {
3898 dl1
->vlist
[vd
] = NULL
;
3901 for (vl1
= first
->conflist
; vl1
; vl1
= vl1
->next
) {
3902 if (!memcmp(vl1
->conf
.guid
,
3903 dl2
->vlist
[vd
]->conf
.guid
,
3906 dl1
->vlist
[vd
] = vl1
;
3910 dprintf("%s: added disk %d: %08x\n", __func__
, dl1
->pdnum
,
3911 __be32_to_cpu(dl1
->disk
.refnum
));
3919 * A new array 'a' has been started which claims to be instance 'inst'
3920 * within container 'c'.
3921 * We need to confirm that the array matches the metadata in 'c' so
3922 * that we don't corrupt any metadata.
3924 static int ddf_open_new(struct supertype
*c
, struct active_array
*a
, char *inst
)
3926 struct ddf_super
*ddf
= c
->sb
;
3928 if (all_ff(ddf
->virt
->entries
[n
].guid
)) {
3929 pr_err("%s: subarray %d doesn't exist\n", __func__
, n
);
3932 dprintf("ddf: open_new %d\n", n
);
3933 a
->info
.container_member
= n
;
3938 * The array 'a' is to be marked clean in the metadata.
3939 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
3940 * clean up to the point (in sectors). If that cannot be recorded in the
3941 * metadata, then leave it as dirty.
3943 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3944 * !global! virtual_disk.virtual_entry structure.
3946 static int ddf_set_array_state(struct active_array
*a
, int consistent
)
3948 struct ddf_super
*ddf
= a
->container
->sb
;
3949 int inst
= a
->info
.container_member
;
3950 int old
= ddf
->virt
->entries
[inst
].state
;
3951 if (consistent
== 2) {
3952 /* Should check if a recovery should be started FIXME */
3954 if (!is_resync_complete(&a
->info
))
3958 ddf
->virt
->entries
[inst
].state
&= ~DDF_state_inconsistent
;
3960 ddf
->virt
->entries
[inst
].state
|= DDF_state_inconsistent
;
3961 if (old
!= ddf
->virt
->entries
[inst
].state
)
3962 ddf_set_updates_pending(ddf
);
3964 old
= ddf
->virt
->entries
[inst
].init_state
;
3965 ddf
->virt
->entries
[inst
].init_state
&= ~DDF_initstate_mask
;
3966 if (is_resync_complete(&a
->info
))
3967 ddf
->virt
->entries
[inst
].init_state
|= DDF_init_full
;
3968 else if (a
->info
.resync_start
== 0)
3969 ddf
->virt
->entries
[inst
].init_state
|= DDF_init_not
;
3971 ddf
->virt
->entries
[inst
].init_state
|= DDF_init_quick
;
3972 if (old
!= ddf
->virt
->entries
[inst
].init_state
)
3973 ddf_set_updates_pending(ddf
);
3975 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst
,
3976 guid_str(ddf
->virt
->entries
[inst
].guid
), a
->curr_state
,
3977 consistent
?"clean":"dirty",
3978 a
->info
.resync_start
);
3982 static int get_bvd_state(const struct ddf_super
*ddf
,
3983 const struct vd_config
*vc
)
3985 unsigned int i
, n_bvd
, working
= 0;
3986 unsigned int n_prim
= __be16_to_cpu(vc
->prim_elmnt_count
);
3988 for (i
= 0; i
< n_prim
; i
++) {
3989 if (!find_index_in_bvd(ddf
, vc
, i
, &n_bvd
))
3991 pd
= find_phys(ddf
, vc
->phys_refnum
[n_bvd
]);
3994 st
= __be16_to_cpu(ddf
->phys
->entries
[pd
].state
);
3995 if ((st
& (DDF_Online
|DDF_Failed
|DDF_Rebuilding
))
4000 state
= DDF_state_degraded
;
4001 if (working
== n_prim
)
4002 state
= DDF_state_optimal
;
4008 state
= DDF_state_failed
;
4012 state
= DDF_state_failed
;
4013 else if (working
>= 2)
4014 state
= DDF_state_part_optimal
;
4018 if (working
< n_prim
- 1)
4019 state
= DDF_state_failed
;
4022 if (working
< n_prim
- 2)
4023 state
= DDF_state_failed
;
4024 else if (working
== n_prim
- 1)
4025 state
= DDF_state_part_optimal
;
4031 static int secondary_state(int state
, int other
, int seclevel
)
4033 if (state
== DDF_state_optimal
&& other
== DDF_state_optimal
)
4034 return DDF_state_optimal
;
4035 if (seclevel
== DDF_2MIRRORED
) {
4036 if (state
== DDF_state_optimal
|| other
== DDF_state_optimal
)
4037 return DDF_state_part_optimal
;
4038 if (state
== DDF_state_failed
&& other
== DDF_state_failed
)
4039 return DDF_state_failed
;
4040 return DDF_state_degraded
;
4042 if (state
== DDF_state_failed
|| other
== DDF_state_failed
)
4043 return DDF_state_failed
;
4044 if (state
== DDF_state_degraded
|| other
== DDF_state_degraded
)
4045 return DDF_state_degraded
;
4046 return DDF_state_part_optimal
;
4050 static int get_svd_state(const struct ddf_super
*ddf
, const struct vcl
*vcl
)
4052 int state
= get_bvd_state(ddf
, &vcl
->conf
);
4054 for (i
= 1; i
< vcl
->conf
.sec_elmnt_count
; i
++) {
4055 state
= secondary_state(
4057 get_bvd_state(ddf
, vcl
->other_bvds
[i
-1]),
4064 * The state of each disk is stored in the global phys_disk structure
4065 * in phys_disk.entries[n].state.
4066 * This makes various combinations awkward.
4067 * - When a device fails in any array, it must be failed in all arrays
4068 * that include a part of this device.
4069 * - When a component is rebuilding, we cannot include it officially in the
4070 * array unless this is the only array that uses the device.
4072 * So: when transitioning:
4073 * Online -> failed, just set failed flag. monitor will propagate
4074 * spare -> online, the device might need to be added to the array.
4075 * spare -> failed, just set failed. Don't worry if in array or not.
4077 static void ddf_set_disk(struct active_array
*a
, int n
, int state
)
4079 struct ddf_super
*ddf
= a
->container
->sb
;
4080 unsigned int inst
= a
->info
.container_member
, n_bvd
;
4082 struct vd_config
*vc
= find_vdcr(ddf
, inst
, (unsigned int)n
,
4089 dprintf("ddf: cannot find instance %d!!\n", inst
);
4092 /* Find the matching slot in 'info'. */
4093 for (mdi
= a
->info
.devs
; mdi
; mdi
= mdi
->next
)
4094 if (mdi
->disk
.raid_disk
== n
)
4099 /* and find the 'dl' entry corresponding to that. */
4100 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
4101 if (mdi
->state_fd
>= 0 &&
4102 mdi
->disk
.major
== dl
->major
&&
4103 mdi
->disk
.minor
== dl
->minor
)
4108 pd
= find_phys(ddf
, vc
->phys_refnum
[n_bvd
]);
4109 if (pd
< 0 || pd
!= dl
->pdnum
) {
4110 /* disk doesn't currently exist or has changed.
4111 * If it is now in_sync, insert it. */
4112 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4113 __func__
, dl
->pdnum
, dl
->major
, dl
->minor
,
4114 __be32_to_cpu(dl
->disk
.refnum
));
4115 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4116 __func__
, inst
, n_bvd
, vc
->phys_refnum
[n_bvd
], pd
);
4117 if ((state
& DS_INSYNC
) && ! (state
& DS_FAULTY
)) {
4118 pd
= dl
->pdnum
; /* FIXME: is this really correct ? */
4119 vc
->phys_refnum
[n_bvd
] = dl
->disk
.refnum
;
4120 LBA_OFFSET(ddf
, vc
)[n_bvd
] =
4121 __cpu_to_be64(mdi
->data_offset
);
4122 ddf
->phys
->entries
[pd
].type
&=
4123 ~__cpu_to_be16(DDF_Global_Spare
);
4124 ddf
->phys
->entries
[pd
].type
|=
4125 __cpu_to_be16(DDF_Active_in_VD
);
4126 ddf_set_updates_pending(ddf
);
4129 int old
= ddf
->phys
->entries
[pd
].state
;
4130 if (state
& DS_FAULTY
)
4131 ddf
->phys
->entries
[pd
].state
|= __cpu_to_be16(DDF_Failed
);
4132 if (state
& DS_INSYNC
) {
4133 ddf
->phys
->entries
[pd
].state
|= __cpu_to_be16(DDF_Online
);
4134 ddf
->phys
->entries
[pd
].state
&= __cpu_to_be16(~DDF_Rebuilding
);
4136 if (old
!= ddf
->phys
->entries
[pd
].state
)
4137 ddf_set_updates_pending(ddf
);
4140 dprintf("ddf: set_disk %d to %x\n", n
, state
);
4142 /* Now we need to check the state of the array and update
4143 * virtual_disk.entries[n].state.
4144 * It needs to be one of "optimal", "degraded", "failed".
4145 * I don't understand 'deleted' or 'missing'.
4147 state
= get_svd_state(ddf
, vcl
);
4149 if (ddf
->virt
->entries
[inst
].state
!=
4150 ((ddf
->virt
->entries
[inst
].state
& ~DDF_state_mask
)
4153 ddf
->virt
->entries
[inst
].state
=
4154 (ddf
->virt
->entries
[inst
].state
& ~DDF_state_mask
)
4156 ddf_set_updates_pending(ddf
);
4161 static void ddf_sync_metadata(struct supertype
*st
)
4165 * Write all data to all devices.
4166 * Later, we might be able to track whether only local changes
4167 * have been made, or whether any global data has been changed,
4168 * but ddf is sufficiently weird that it probably always
4169 * changes global data ....
4171 struct ddf_super
*ddf
= st
->sb
;
4172 if (!ddf
->updates_pending
)
4174 ddf
->updates_pending
= 0;
4175 __write_init_super_ddf(st
);
4176 dprintf("ddf: sync_metadata\n");
4179 static int del_from_conflist(struct vcl
**list
, const char *guid
)
4183 for (p
= list
; p
&& *p
; p
= &((*p
)->next
))
4184 if (!memcmp((*p
)->conf
.guid
, guid
, DDF_GUID_LEN
)) {
4191 static int _kill_subarray_ddf(struct ddf_super
*ddf
, const char *guid
)
4194 unsigned int vdnum
, i
;
4195 vdnum
= find_vde_by_guid(ddf
, guid
);
4196 if (vdnum
== DDF_NOTFOUND
) {
4197 pr_err("%s: could not find VD %s\n", __func__
,
4201 if (del_from_conflist(&ddf
->conflist
, guid
) == 0) {
4202 pr_err("%s: could not find conf %s\n", __func__
,
4206 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
4207 for (i
= 0; i
< ddf
->max_part
; i
++)
4208 if (dl
->vlist
[i
] != NULL
&&
4209 !memcmp(dl
->vlist
[i
]->conf
.guid
, guid
,
4211 dl
->vlist
[i
] = NULL
;
4212 memset(ddf
->virt
->entries
[vdnum
].guid
, 0xff, DDF_GUID_LEN
);
4213 dprintf("%s: deleted %s\n", __func__
, guid_str(guid
));
4217 static int kill_subarray_ddf(struct supertype
*st
)
4219 struct ddf_super
*ddf
= st
->sb
;
4221 * currentconf is set in container_content_ddf,
4222 * called with subarray arg
4224 struct vcl
*victim
= ddf
->currentconf
;
4225 struct vd_config
*conf
;
4226 ddf
->currentconf
= NULL
;
4229 pr_err("%s: nothing to kill\n", __func__
);
4232 conf
= &victim
->conf
;
4233 vdnum
= find_vde_by_guid(ddf
, conf
->guid
);
4234 if (vdnum
== DDF_NOTFOUND
) {
4235 pr_err("%s: could not find VD %s\n", __func__
,
4236 guid_str(conf
->guid
));
4239 if (st
->update_tail
) {
4240 struct virtual_disk
*vd
;
4241 int len
= sizeof(struct virtual_disk
)
4242 + sizeof(struct virtual_entry
);
4245 pr_err("%s: failed to allocate %d bytes\n", __func__
,
4249 memset(vd
, 0 , len
);
4250 vd
->magic
= DDF_VIRT_RECORDS_MAGIC
;
4251 vd
->populated_vdes
= 0;
4252 memcpy(vd
->entries
[0].guid
, conf
->guid
, DDF_GUID_LEN
);
4253 /* we use DDF_state_deleted as marker */
4254 vd
->entries
[0].state
= DDF_state_deleted
;
4255 append_metadata_update(st
, vd
, len
);
4257 _kill_subarray_ddf(ddf
, conf
->guid
);
4258 ddf_set_updates_pending(ddf
);
4259 ddf_sync_metadata(st
);
4264 static void copy_matching_bvd(struct ddf_super
*ddf
,
4265 struct vd_config
*conf
,
4266 const struct metadata_update
*update
)
4269 __be16_to_cpu(ddf
->anchor
.max_primary_element_entries
);
4270 unsigned int len
= ddf
->conf_rec_len
* 512;
4272 struct vd_config
*vc
;
4273 for (p
= update
->buf
; p
< update
->buf
+ update
->len
; p
+= len
) {
4274 vc
= (struct vd_config
*) p
;
4275 if (vc
->sec_elmnt_seq
== conf
->sec_elmnt_seq
) {
4276 memcpy(conf
->phys_refnum
, vc
->phys_refnum
,
4277 mppe
* (sizeof(__u32
) + sizeof(__u64
)));
4281 pr_err("%s: no match for BVD %d of %s in update\n", __func__
,
4282 conf
->sec_elmnt_seq
, guid_str(conf
->guid
));
4285 static void ddf_process_update(struct supertype
*st
,
4286 struct metadata_update
*update
)
4288 /* Apply this update to the metadata.
4289 * The first 4 bytes are a DDF_*_MAGIC which guides
4291 * Possible update are:
4292 * DDF_PHYS_RECORDS_MAGIC
4293 * Add a new physical device or remove an old one.
4294 * Changes to this record only happen implicitly.
4295 * used_pdes is the device number.
4296 * DDF_VIRT_RECORDS_MAGIC
4297 * Add a new VD. Possibly also change the 'access' bits.
4298 * populated_vdes is the entry number.
4300 * New or updated VD. the VIRT_RECORD must already
4301 * exist. For an update, phys_refnum and lba_offset
4302 * (at least) are updated, and the VD_CONF must
4303 * be written to precisely those devices listed with
4305 * DDF_SPARE_ASSIGN_MAGIC
4306 * replacement Spare Assignment Record... but for which device?
4309 * - to create a new array, we send a VIRT_RECORD and
4310 * a VD_CONF. Then assemble and start the array.
4311 * - to activate a spare we send a VD_CONF to add the phys_refnum
4312 * and offset. This will also mark the spare as active with
4313 * a spare-assignment record.
4315 struct ddf_super
*ddf
= st
->sb
;
4316 __u32
*magic
= (__u32
*)update
->buf
;
4317 struct phys_disk
*pd
;
4318 struct virtual_disk
*vd
;
4319 struct vd_config
*vc
;
4323 unsigned int pdnum
, pd2
, len
;
4325 dprintf("Process update %x\n", *magic
);
4328 case DDF_PHYS_RECORDS_MAGIC
:
4330 if (update
->len
!= (sizeof(struct phys_disk
) +
4331 sizeof(struct phys_disk_entry
)))
4333 pd
= (struct phys_disk
*)update
->buf
;
4335 ent
= __be16_to_cpu(pd
->used_pdes
);
4336 if (ent
>= __be16_to_cpu(ddf
->phys
->max_pdes
))
4338 if (pd
->entries
[0].state
& __cpu_to_be16(DDF_Missing
)) {
4340 /* removing this disk. */
4341 ddf
->phys
->entries
[ent
].state
|= __cpu_to_be16(DDF_Missing
);
4342 for (dlp
= &ddf
->dlist
; *dlp
; dlp
= &(*dlp
)->next
) {
4343 struct dl
*dl
= *dlp
;
4344 if (dl
->pdnum
== (signed)ent
) {
4347 /* FIXME this doesn't free
4354 ddf_set_updates_pending(ddf
);
4357 if (!all_ff(ddf
->phys
->entries
[ent
].guid
))
4359 ddf
->phys
->entries
[ent
] = pd
->entries
[0];
4360 ddf
->phys
->used_pdes
= __cpu_to_be16(1 +
4361 __be16_to_cpu(ddf
->phys
->used_pdes
));
4362 ddf_set_updates_pending(ddf
);
4363 if (ddf
->add_list
) {
4364 struct active_array
*a
;
4365 struct dl
*al
= ddf
->add_list
;
4366 ddf
->add_list
= al
->next
;
4368 al
->next
= ddf
->dlist
;
4371 /* As a device has been added, we should check
4372 * for any degraded devices that might make
4373 * use of this spare */
4374 for (a
= st
->arrays
; a
; a
=a
->next
)
4375 a
->check_degraded
= 1;
4379 case DDF_VIRT_RECORDS_MAGIC
:
4381 if (update
->len
!= (sizeof(struct virtual_disk
) +
4382 sizeof(struct virtual_entry
)))
4384 vd
= (struct virtual_disk
*)update
->buf
;
4386 if (vd
->entries
[0].state
== DDF_state_deleted
) {
4387 if (_kill_subarray_ddf(ddf
, vd
->entries
[0].guid
))
4391 ent
= find_vde_by_guid(ddf
, vd
->entries
[0].guid
);
4392 if (ent
!= DDF_NOTFOUND
) {
4393 dprintf("%s: VD %s exists already in slot %d\n",
4394 __func__
, guid_str(vd
->entries
[0].guid
),
4398 ent
= find_unused_vde(ddf
);
4399 if (ent
== DDF_NOTFOUND
)
4401 ddf
->virt
->entries
[ent
] = vd
->entries
[0];
4402 ddf
->virt
->populated_vdes
=
4405 ddf
->virt
->populated_vdes
));
4406 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4407 __func__
, guid_str(vd
->entries
[0].guid
), ent
,
4408 ddf
->virt
->entries
[ent
].state
,
4409 ddf
->virt
->entries
[ent
].init_state
);
4411 ddf_set_updates_pending(ddf
);
4414 case DDF_VD_CONF_MAGIC
:
4415 vc
= (struct vd_config
*)update
->buf
;
4416 len
= ddf
->conf_rec_len
* 512;
4417 if ((unsigned int)update
->len
!= len
* vc
->sec_elmnt_count
) {
4418 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4419 __func__
, guid_str(vc
->guid
), update
->len
,
4420 vc
->sec_elmnt_count
);
4423 for (vcl
= ddf
->conflist
; vcl
; vcl
= vcl
->next
)
4424 if (memcmp(vcl
->conf
.guid
, vc
->guid
, DDF_GUID_LEN
) == 0)
4426 dprintf("%s: conf update for %s (%s)\n", __func__
,
4427 guid_str(vc
->guid
), (vcl
? "old" : "new"));
4429 /* An update, just copy the phys_refnum and lba_offset
4433 copy_matching_bvd(ddf
, &vcl
->conf
, update
);
4434 for (i
= 1; i
< vc
->sec_elmnt_count
; i
++)
4435 copy_matching_bvd(ddf
, vcl
->other_bvds
[i
-1],
4442 vcl
= update
->space
;
4443 update
->space
= NULL
;
4444 vcl
->next
= ddf
->conflist
;
4445 memcpy(&vcl
->conf
, vc
, len
);
4446 ent
= find_vde_by_guid(ddf
, vc
->guid
);
4447 if (ent
== DDF_NOTFOUND
)
4450 ddf
->conflist
= vcl
;
4451 for (i
= 1; i
< vc
->sec_elmnt_count
; i
++)
4452 memcpy(vcl
->other_bvds
[i
-1],
4453 update
->buf
+ len
* i
, len
);
4455 /* Set DDF_Transition on all Failed devices - to help
4456 * us detect those that are no longer in use
4458 for (pdnum
= 0; pdnum
< __be16_to_cpu(ddf
->phys
->used_pdes
); pdnum
++)
4459 if (ddf
->phys
->entries
[pdnum
].state
4460 & __be16_to_cpu(DDF_Failed
))
4461 ddf
->phys
->entries
[pdnum
].state
4462 |= __be16_to_cpu(DDF_Transition
);
4463 /* Now make sure vlist is correct for each dl. */
4464 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
) {
4465 unsigned int vn
= 0;
4466 int in_degraded
= 0;
4467 for (vcl
= ddf
->conflist
; vcl
; vcl
= vcl
->next
) {
4468 unsigned int dn
, ibvd
;
4469 const struct vd_config
*conf
;
4471 dn
= get_pd_index_from_refnum(vcl
,
4475 if (dn
== DDF_NOTFOUND
)
4477 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
4479 __be32_to_cpu(dl
->disk
.refnum
),
4480 guid_str(conf
->guid
),
4481 conf
->sec_elmnt_seq
, vn
);
4482 /* Clear the Transition flag */
4483 if (ddf
->phys
->entries
[dl
->pdnum
].state
4484 & __be16_to_cpu(DDF_Failed
))
4485 ddf
->phys
->entries
[dl
->pdnum
].state
&=
4486 ~__be16_to_cpu(DDF_Transition
);
4487 dl
->vlist
[vn
++] = vcl
;
4488 vstate
= ddf
->virt
->entries
[vcl
->vcnum
].state
4490 if (vstate
== DDF_state_degraded
||
4491 vstate
== DDF_state_part_optimal
)
4494 while (vn
< ddf
->max_part
)
4495 dl
->vlist
[vn
++] = NULL
;
4497 ddf
->phys
->entries
[dl
->pdnum
].type
&=
4498 ~__cpu_to_be16(DDF_Global_Spare
);
4499 if (!(ddf
->phys
->entries
[dl
->pdnum
].type
&
4500 __cpu_to_be16(DDF_Active_in_VD
))) {
4501 ddf
->phys
->entries
[dl
->pdnum
].type
|=
4502 __cpu_to_be16(DDF_Active_in_VD
);
4504 ddf
->phys
->entries
[dl
->pdnum
].state
|=
4505 __cpu_to_be16(DDF_Rebuilding
);
4509 ddf
->phys
->entries
[dl
->pdnum
].type
&=
4510 ~__cpu_to_be16(DDF_Global_Spare
);
4511 ddf
->phys
->entries
[dl
->pdnum
].type
|=
4512 __cpu_to_be16(DDF_Spare
);
4514 if (!dl
->vlist
[0] && !dl
->spare
) {
4515 ddf
->phys
->entries
[dl
->pdnum
].type
|=
4516 __cpu_to_be16(DDF_Global_Spare
);
4517 ddf
->phys
->entries
[dl
->pdnum
].type
&=
4518 ~__cpu_to_be16(DDF_Spare
|
4523 /* Now remove any 'Failed' devices that are not part
4524 * of any VD. They will have the Transition flag set.
4525 * Once done, we need to update all dl->pdnum numbers.
4528 for (pdnum
= 0; pdnum
< __be16_to_cpu(ddf
->phys
->used_pdes
); pdnum
++)
4529 if ((ddf
->phys
->entries
[pdnum
].state
4530 & __be16_to_cpu(DDF_Failed
))
4531 && (ddf
->phys
->entries
[pdnum
].state
4532 & __be16_to_cpu(DDF_Transition
)))
4533 /* skip this one */;
4534 else if (pdnum
== pd2
)
4537 ddf
->phys
->entries
[pd2
] = ddf
->phys
->entries
[pdnum
];
4538 for (dl
= ddf
->dlist
; dl
; dl
= dl
->next
)
4539 if (dl
->pdnum
== (int)pdnum
)
4543 ddf
->phys
->used_pdes
= __cpu_to_be16(pd2
);
4544 while (pd2
< pdnum
) {
4545 memset(ddf
->phys
->entries
[pd2
].guid
, 0xff, DDF_GUID_LEN
);
4549 ddf_set_updates_pending(ddf
);
4551 case DDF_SPARE_ASSIGN_MAGIC
:
4556 static void ddf_prepare_update(struct supertype
*st
,
4557 struct metadata_update
*update
)
4559 /* This update arrived at managemon.
4560 * We are about to pass it to monitor.
4561 * If a malloc is needed, do it here.
4563 struct ddf_super
*ddf
= st
->sb
;
4564 __u32
*magic
= (__u32
*)update
->buf
;
4565 if (*magic
== DDF_VD_CONF_MAGIC
) {
4567 struct vd_config
*conf
= (struct vd_config
*) update
->buf
;
4568 if (posix_memalign(&update
->space
, 512,
4569 offsetof(struct vcl
, conf
)
4570 + ddf
->conf_rec_len
* 512) != 0) {
4571 update
->space
= NULL
;
4574 vcl
= update
->space
;
4575 vcl
->conf
.sec_elmnt_count
= conf
->sec_elmnt_count
;
4576 if (alloc_other_bvds(ddf
, vcl
) != 0) {
4577 free(update
->space
);
4578 update
->space
= NULL
;
4584 * Check if the array 'a' is degraded but not failed.
4585 * If it is, find as many spares as are available and needed and
4586 * arrange for their inclusion.
4587 * We only choose devices which are not already in the array,
4588 * and prefer those with a spare-assignment to this array.
4589 * otherwise we choose global spares - assuming always that
4590 * there is enough room.
4591 * For each spare that we assign, we return an 'mdinfo' which
4592 * describes the position for the device in the array.
4593 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4594 * the new phys_refnum and lba_offset values.
4596 * Only worry about BVDs at the moment.
4598 static struct mdinfo
*ddf_activate_spare(struct active_array
*a
,
4599 struct metadata_update
**updates
)
4603 struct ddf_super
*ddf
= a
->container
->sb
;
4605 struct mdinfo
*rv
= NULL
;
4607 struct metadata_update
*mu
;
4611 struct vd_config
*vc
;
4614 for (d
= a
->info
.devs
; d
; d
= d
->next
) {
4615 if ((d
->curr_state
& DS_FAULTY
) &&
4617 /* wait for Removal to happen */
4619 if (d
->state_fd
>= 0)
4623 dprintf("ddf_activate: working=%d (%d) level=%d\n", working
, a
->info
.array
.raid_disks
,
4624 a
->info
.array
.level
);
4625 if (working
== a
->info
.array
.raid_disks
)
4626 return NULL
; /* array not degraded */
4627 switch (a
->info
.array
.level
) {
4630 return NULL
; /* failed */
4634 if (working
< a
->info
.array
.raid_disks
- 1)
4635 return NULL
; /* failed */
4638 if (working
< a
->info
.array
.raid_disks
- 2)
4639 return NULL
; /* failed */
4641 default: /* concat or stripe */
4642 return NULL
; /* failed */
4645 /* For each slot, if it is not working, find a spare */
4647 for (i
= 0; i
< a
->info
.array
.raid_disks
; i
++) {
4648 for (d
= a
->info
.devs
; d
; d
= d
->next
)
4649 if (d
->disk
.raid_disk
== i
)
4651 dprintf("found %d: %p %x\n", i
, d
, d
?d
->curr_state
:0);
4652 if (d
&& (d
->state_fd
>= 0))
4655 /* OK, this device needs recovery. Find a spare */
4657 for ( ; dl
; dl
= dl
->next
) {
4658 unsigned long long esize
;
4659 unsigned long long pos
;
4662 int is_dedicated
= 0;
4665 /* If in this array, skip */
4666 for (d2
= a
->info
.devs
; d2
; d2
= d2
->next
)
4667 if (d2
->state_fd
>= 0 &&
4668 d2
->disk
.major
== dl
->major
&&
4669 d2
->disk
.minor
== dl
->minor
) {
4670 dprintf("%x:%x already in array\n", dl
->major
, dl
->minor
);
4675 if (ddf
->phys
->entries
[dl
->pdnum
].type
&
4676 __cpu_to_be16(DDF_Spare
)) {
4677 /* Check spare assign record */
4679 if (dl
->spare
->type
& DDF_spare_dedicated
) {
4680 /* check spare_ents for guid */
4682 j
< __be16_to_cpu(dl
->spare
->populated
);
4684 if (memcmp(dl
->spare
->spare_ents
[j
].guid
,
4685 ddf
->virt
->entries
[a
->info
.container_member
].guid
,
4692 } else if (ddf
->phys
->entries
[dl
->pdnum
].type
&
4693 __cpu_to_be16(DDF_Global_Spare
)) {
4695 } else if (!(ddf
->phys
->entries
[dl
->pdnum
].state
&
4696 __cpu_to_be16(DDF_Failed
))) {
4697 /* we can possibly use some of this */
4700 if ( ! (is_dedicated
||
4701 (is_global
&& global_ok
))) {
4702 dprintf("%x:%x not suitable: %d %d\n", dl
->major
, dl
->minor
,
4703 is_dedicated
, is_global
);
4707 /* We are allowed to use this device - is there space?
4708 * We need a->info.component_size sectors */
4709 ex
= get_extents(ddf
, dl
);
4711 dprintf("cannot get extents\n");
4718 esize
= ex
[j
].start
- pos
;
4719 if (esize
>= a
->info
.component_size
)
4721 pos
= ex
[j
].start
+ ex
[j
].size
;
4723 } while (ex
[j
-1].size
);
4726 if (esize
< a
->info
.component_size
) {
4727 dprintf("%x:%x has no room: %llu %llu\n",
4728 dl
->major
, dl
->minor
,
4729 esize
, a
->info
.component_size
);
4734 /* Cool, we have a device with some space at pos */
4735 di
= xcalloc(1, sizeof(*di
));
4736 di
->disk
.number
= i
;
4737 di
->disk
.raid_disk
= i
;
4738 di
->disk
.major
= dl
->major
;
4739 di
->disk
.minor
= dl
->minor
;
4741 di
->recovery_start
= 0;
4742 di
->data_offset
= pos
;
4743 di
->component_size
= a
->info
.component_size
;
4744 di
->container_member
= dl
->pdnum
;
4747 dprintf("%x:%x to be %d at %llu\n", dl
->major
, dl
->minor
,
4752 if (!dl
&& ! global_ok
) {
4753 /* not enough dedicated spares, try global */
4761 /* No spares found */
4763 /* Now 'rv' has a list of devices to return.
4764 * Create a metadata_update record to update the
4765 * phys_refnum and lba_offset values
4767 mu
= xmalloc(sizeof(*mu
));
4768 if (posix_memalign(&mu
->space
, 512, sizeof(struct vcl
)) != 0) {
4772 mu
->buf
= xmalloc(ddf
->conf_rec_len
* 512);
4773 mu
->len
= ddf
->conf_rec_len
* 512;
4775 mu
->space_list
= NULL
;
4776 mu
->next
= *updates
;
4777 vc
= find_vdcr(ddf
, a
->info
.container_member
, di
->disk
.raid_disk
,
4779 memcpy(mu
->buf
, vc
, ddf
->conf_rec_len
* 512);
4781 vc
= (struct vd_config
*)mu
->buf
;
4782 for (di
= rv
; di
; di
= di
->next
) {
4783 vc
->phys_refnum
[di
->disk
.raid_disk
] =
4784 ddf
->phys
->entries
[dl
->pdnum
].refnum
;
4785 LBA_OFFSET(ddf
, vc
)[di
->disk
.raid_disk
]
4786 = __cpu_to_be64(di
->data_offset
);
4791 #endif /* MDASSEMBLE */
4793 static int ddf_level_to_layout(int level
)
4800 return ALGORITHM_LEFT_SYMMETRIC
;
4802 return ALGORITHM_ROTATING_N_CONTINUE
;
4810 static void default_geometry_ddf(struct supertype
*st
, int *level
, int *layout
, int *chunk
)
4812 if (level
&& *level
== UnSet
)
4813 *level
= LEVEL_CONTAINER
;
4815 if (level
&& layout
&& *layout
== UnSet
)
4816 *layout
= ddf_level_to_layout(*level
);
4819 struct superswitch super_ddf
= {
4821 .examine_super
= examine_super_ddf
,
4822 .brief_examine_super
= brief_examine_super_ddf
,
4823 .brief_examine_subarrays
= brief_examine_subarrays_ddf
,
4824 .export_examine_super
= export_examine_super_ddf
,
4825 .detail_super
= detail_super_ddf
,
4826 .brief_detail_super
= brief_detail_super_ddf
,
4827 .validate_geometry
= validate_geometry_ddf
,
4828 .write_init_super
= write_init_super_ddf
,
4829 .add_to_super
= add_to_super_ddf
,
4830 .remove_from_super
= remove_from_super_ddf
,
4831 .load_container
= load_container_ddf
,
4832 .copy_metadata
= copy_metadata_ddf
,
4834 .match_home
= match_home_ddf
,
4835 .uuid_from_super
= uuid_from_super_ddf
,
4836 .getinfo_super
= getinfo_super_ddf
,
4837 .update_super
= update_super_ddf
,
4839 .avail_size
= avail_size_ddf
,
4841 .compare_super
= compare_super_ddf
,
4843 .load_super
= load_super_ddf
,
4844 .init_super
= init_super_ddf
,
4845 .store_super
= store_super_ddf
,
4846 .free_super
= free_super_ddf
,
4847 .match_metadata_desc
= match_metadata_desc_ddf
,
4848 .container_content
= container_content_ddf
,
4849 .default_geometry
= default_geometry_ddf
,
4850 .kill_subarray
= kill_subarray_ddf
,
4856 .open_new
= ddf_open_new
,
4857 .set_array_state
= ddf_set_array_state
,
4858 .set_disk
= ddf_set_disk
,
4859 .sync_metadata
= ddf_sync_metadata
,
4860 .process_update
= ddf_process_update
,
4861 .prepare_update
= ddf_prepare_update
,
4862 .activate_spare
= ddf_activate_spare
,