2 * mdadm - Intel(R) Matrix Storage Manager Support
4 * Copyright (C) 2002-2008 Intel Corporation
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 #define HAVE_STDINT_H 1
24 #include "platform-intel.h"
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
54 #define MPB_SECTOR_CNT 418
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
61 __u8 serial
[MAX_RAID_SERIAL_LEN
];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks
; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id
; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status
; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num
; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler
[IMSM_DISK_FILLERS
]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
73 /* RAID map configuration infos. */
75 __u32 pba_of_lba0
; /* start address of partition */
76 __u32 blocks_per_member
;/* blocks per member */
77 __u32 num_data_stripes
; /* number of data stripes */
78 __u16 blocks_per_strip
;
79 __u8 map_state
; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members
; /* number of member disks */
89 __u8 num_domains
; /* number of parity domains */
90 __u8 failed_disk_num
; /* valid only when state is degraded */
92 __u32 filler
[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl
[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
97 } __attribute__ ((packed
));
100 __u32 curr_migr_unit
;
101 __u32 checkpoint_id
; /* id to access curr_migr_unit */
102 __u8 migr_state
; /* Normal or Migrating */
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type
; /* Initializing, Rebuilding, ... */
111 __u8 fs_state
; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors
; /* number of mismatches */
113 __u16 bad_blocks
; /* number of bad blocks during verify */
115 struct imsm_map map
[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed
));
120 __u8 volume
[MAX_RAID_SERIAL_LEN
];
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status
; /* Persistent RaidDev status */
137 __u32 reserved_blocks
; /* Reserved blocks at beginning of volume */
141 __u8 cng_master_disk
;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler
[IMSM_DEV_FILLERS
];
148 } __attribute__ ((packed
));
151 __u8 sig
[MAX_SIGNATURE_LENGTH
]; /* 0x00 - 0x1F */
152 __u32 check_sum
; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size
; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num
; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num
; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size
; /* 0x30 - 0x33 in bytes */
157 __u32 attributes
; /* 0x34 - 0x37 */
158 __u8 num_disks
; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs
; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos
; /* 0x3A */
161 __u8 fill
[1]; /* 0x3B */
162 __u32 cache_size
; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num
; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count
; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size
; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler
[IMSM_FILLERS
]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk
[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed
));
173 #define BBM_LOG_MAX_ENTRIES 254
175 struct bbm_log_entry
{
176 __u64 defective_block_start
;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset
;
179 __u16 remapped_marked_count
;
181 } __attribute__ ((__packed__
));
184 __u32 signature
; /* 0xABADB10C */
186 __u32 reserved_spare_block_count
; /* 0 */
187 __u32 reserved
; /* 0xFFFF */
188 __u64 first_spare_lba
;
189 struct bbm_log_entry mapped_block_entries
[BBM_LOG_MAX_ENTRIES
];
190 } __attribute__ ((__packed__
));
194 static char *map_state_str
[] = { "normal", "uninitialized", "degraded", "failed" };
197 static __u8
migr_type(struct imsm_dev
*dev
)
199 if (dev
->vol
.migr_type
== MIGR_VERIFY
&&
200 dev
->status
& DEV_VERIFY_AND_FIX
)
203 return dev
->vol
.migr_type
;
206 static void set_migr_type(struct imsm_dev
*dev
, __u8 migr_type
)
208 /* for compatibility with older oroms convert MIGR_REPAIR, into
209 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
211 if (migr_type
== MIGR_REPAIR
) {
212 dev
->vol
.migr_type
= MIGR_VERIFY
;
213 dev
->status
|= DEV_VERIFY_AND_FIX
;
215 dev
->vol
.migr_type
= migr_type
;
216 dev
->status
&= ~DEV_VERIFY_AND_FIX
;
220 static unsigned int sector_count(__u32 bytes
)
222 return ((bytes
+ (512-1)) & (~(512-1))) / 512;
225 static unsigned int mpb_sectors(struct imsm_super
*mpb
)
227 return sector_count(__le32_to_cpu(mpb
->mpb_size
));
231 struct imsm_dev
*dev
;
232 struct intel_dev
*next
;
237 enum sys_dev_type type
;
240 struct intel_hba
*next
;
247 /* internal representation of IMSM metadata */
250 void *buf
; /* O_DIRECT buffer for reading/writing metadata */
251 struct imsm_super
*anchor
; /* immovable parameters */
253 size_t len
; /* size of the 'buf' allocation */
254 void *next_buf
; /* for realloc'ing buf from the manager */
256 int updates_pending
; /* count of pending updates for mdmon */
257 int current_vol
; /* index of raid device undergoing creation */
258 __u32 create_offset
; /* common start for 'current_vol' */
259 __u32 random
; /* random data for seeding new family numbers */
260 struct intel_dev
*devlist
;
264 __u8 serial
[MAX_RAID_SERIAL_LEN
];
267 struct imsm_disk disk
;
270 struct extent
*e
; /* for determining freespace @ create */
271 int raiddisk
; /* slot to fill in autolayout */
274 struct dl
*disk_mgmt_list
; /* list of disks to add/remove while mdmon
276 struct dl
*missing
; /* disks removed while we weren't looking */
277 struct bbm_log
*bbm_log
;
278 struct intel_hba
*hba
; /* device path of the raid controller for this metadata */
279 const struct imsm_orom
*orom
; /* platform firmware support */
280 struct intel_super
*next
; /* (temp) list for disambiguating family_num */
284 struct imsm_disk disk
;
285 #define IMSM_UNKNOWN_OWNER (-1)
287 struct intel_disk
*next
;
291 unsigned long long start
, size
;
294 /* definitions of reshape process types */
295 enum imsm_reshape_type
{
300 /* definition of messages passed to imsm_process_update */
301 enum imsm_update_type
{
302 update_activate_spare
,
306 update_add_remove_disk
,
307 update_reshape_container_disks
,
311 struct imsm_update_activate_spare
{
312 enum imsm_update_type type
;
316 struct imsm_update_activate_spare
*next
;
329 enum takeover_direction
{
333 struct imsm_update_takeover
{
334 enum imsm_update_type type
;
336 enum takeover_direction direction
;
339 struct imsm_update_reshape
{
340 enum imsm_update_type type
;
343 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
347 __u8 serial
[MAX_RAID_SERIAL_LEN
];
350 struct imsm_update_create_array
{
351 enum imsm_update_type type
;
356 struct imsm_update_kill_array
{
357 enum imsm_update_type type
;
361 struct imsm_update_rename_array
{
362 enum imsm_update_type type
;
363 __u8 name
[MAX_RAID_SERIAL_LEN
];
367 struct imsm_update_add_remove_disk
{
368 enum imsm_update_type type
;
372 static const char *_sys_dev_type
[] = {
373 [SYS_DEV_UNKNOWN
] = "Unknown",
374 [SYS_DEV_SAS
] = "SAS",
375 [SYS_DEV_SATA
] = "SATA"
378 const char *get_sys_dev_type(enum sys_dev_type type
)
380 if (type
>= SYS_DEV_MAX
)
381 type
= SYS_DEV_UNKNOWN
;
383 return _sys_dev_type
[type
];
386 static struct intel_hba
* alloc_intel_hba(struct sys_dev
*device
)
388 struct intel_hba
*result
= malloc(sizeof(*result
));
390 result
->type
= device
->type
;
391 result
->path
= strdup(device
->path
);
393 if (result
->path
&& (result
->pci_id
= strrchr(result
->path
, '/')) != NULL
)
399 static struct intel_hba
* find_intel_hba(struct intel_hba
*hba
, struct sys_dev
*device
)
401 struct intel_hba
*result
=NULL
;
402 for (result
= hba
; result
; result
= result
->next
) {
403 if (result
->type
== device
->type
&& strcmp(result
->path
, device
->path
) == 0)
409 static int attach_hba_to_super(struct intel_super
*super
, struct sys_dev
*device
)
411 struct intel_hba
*hba
;
413 /* check if disk attached to Intel HBA */
414 hba
= find_intel_hba(super
->hba
, device
);
417 /* Check if HBA is already attached to super */
418 if (super
->hba
== NULL
) {
419 super
->hba
= alloc_intel_hba(device
);
424 /* Intel metadata allows for all disks attached to the same type HBA.
425 * Do not sypport odf HBA types mixing
427 if (device
->type
!= hba
->type
)
433 hba
->next
= alloc_intel_hba(device
);
437 static struct sys_dev
* find_disk_attached_hba(int fd
, const char *devname
)
439 struct sys_dev
*list
, *elem
, *prev
;
442 if ((list
= find_intel_devices()) == NULL
)
446 disk_path
= (char *) devname
;
448 disk_path
= diskfd_to_devpath(fd
);
455 for (prev
= NULL
, elem
= list
; elem
; prev
= elem
, elem
= elem
->next
) {
456 if (path_attached_to_hba(disk_path
, elem
->path
)) {
460 prev
->next
= elem
->next
;
462 if (disk_path
!= devname
)
468 if (disk_path
!= devname
)
476 static int find_intel_hba_capability(int fd
, struct intel_super
*super
,
479 static struct supertype
*match_metadata_desc_imsm(char *arg
)
481 struct supertype
*st
;
483 if (strcmp(arg
, "imsm") != 0 &&
484 strcmp(arg
, "default") != 0
488 st
= malloc(sizeof(*st
));
491 memset(st
, 0, sizeof(*st
));
492 st
->container_dev
= NoMdDev
;
493 st
->ss
= &super_imsm
;
494 st
->max_devs
= IMSM_MAX_DEVICES
;
495 st
->minor_version
= 0;
501 static __u8
*get_imsm_version(struct imsm_super
*mpb
)
503 return &mpb
->sig
[MPB_SIG_LEN
];
507 /* retrieve a disk directly from the anchor when the anchor is known to be
508 * up-to-date, currently only at load time
510 static struct imsm_disk
*__get_imsm_disk(struct imsm_super
*mpb
, __u8 index
)
512 if (index
>= mpb
->num_disks
)
514 return &mpb
->disk
[index
];
517 /* retrieve the disk description based on a index of the disk
520 static struct dl
*get_imsm_dl_disk(struct intel_super
*super
, __u8 index
)
524 for (d
= super
->disks
; d
; d
= d
->next
)
525 if (d
->index
== index
)
530 /* retrieve a disk from the parsed metadata */
531 static struct imsm_disk
*get_imsm_disk(struct intel_super
*super
, __u8 index
)
535 dl
= get_imsm_dl_disk(super
, index
);
542 /* generate a checksum directly from the anchor when the anchor is known to be
543 * up-to-date, currently only at load or write_super after coalescing
545 static __u32
__gen_imsm_checksum(struct imsm_super
*mpb
)
547 __u32 end
= mpb
->mpb_size
/ sizeof(end
);
548 __u32
*p
= (__u32
*) mpb
;
552 sum
+= __le32_to_cpu(*p
);
556 return sum
- __le32_to_cpu(mpb
->check_sum
);
559 static size_t sizeof_imsm_map(struct imsm_map
*map
)
561 return sizeof(struct imsm_map
) + sizeof(__u32
) * (map
->num_members
- 1);
564 struct imsm_map
*get_imsm_map(struct imsm_dev
*dev
, int second_map
)
566 /* A device can have 2 maps if it is in the middle of a migration.
568 * 0 - we return the first map
569 * 1 - we return the second map if it exists, else NULL
570 * -1 - we return the second map if it exists, else the first
572 struct imsm_map
*map
= &dev
->vol
.map
[0];
574 if (second_map
== 1 && !dev
->vol
.migr_state
)
576 else if (second_map
== 1 ||
577 (second_map
< 0 && dev
->vol
.migr_state
)) {
580 return ptr
+ sizeof_imsm_map(map
);
586 /* return the size of the device.
587 * migr_state increases the returned size if map[0] were to be duplicated
589 static size_t sizeof_imsm_dev(struct imsm_dev
*dev
, int migr_state
)
591 size_t size
= sizeof(*dev
) - sizeof(struct imsm_map
) +
592 sizeof_imsm_map(get_imsm_map(dev
, 0));
594 /* migrating means an additional map */
595 if (dev
->vol
.migr_state
)
596 size
+= sizeof_imsm_map(get_imsm_map(dev
, 1));
598 size
+= sizeof_imsm_map(get_imsm_map(dev
, 0));
604 /* retrieve disk serial number list from a metadata update */
605 static struct disk_info
*get_disk_info(struct imsm_update_create_array
*update
)
608 struct disk_info
*inf
;
610 inf
= u
+ sizeof(*update
) - sizeof(struct imsm_dev
) +
611 sizeof_imsm_dev(&update
->dev
, 0);
617 static struct imsm_dev
*__get_imsm_dev(struct imsm_super
*mpb
, __u8 index
)
623 if (index
>= mpb
->num_raid_devs
)
626 /* devices start after all disks */
627 offset
= ((void *) &mpb
->disk
[mpb
->num_disks
]) - _mpb
;
629 for (i
= 0; i
<= index
; i
++)
631 return _mpb
+ offset
;
633 offset
+= sizeof_imsm_dev(_mpb
+ offset
, 0);
638 static struct imsm_dev
*get_imsm_dev(struct intel_super
*super
, __u8 index
)
640 struct intel_dev
*dv
;
642 if (index
>= super
->anchor
->num_raid_devs
)
644 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
645 if (dv
->index
== index
)
653 * == 1 get second map
654 * == -1 than get map according to the current migr_state
656 static __u32
get_imsm_ord_tbl_ent(struct imsm_dev
*dev
,
660 struct imsm_map
*map
;
662 map
= get_imsm_map(dev
, second_map
);
664 /* top byte identifies disk under rebuild */
665 return __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
668 #define ord_to_idx(ord) (((ord) << 8) >> 8)
669 static __u32
get_imsm_disk_idx(struct imsm_dev
*dev
, int slot
, int second_map
)
671 __u32 ord
= get_imsm_ord_tbl_ent(dev
, slot
, second_map
);
673 return ord_to_idx(ord
);
676 static void set_imsm_ord_tbl_ent(struct imsm_map
*map
, int slot
, __u32 ord
)
678 map
->disk_ord_tbl
[slot
] = __cpu_to_le32(ord
);
681 static int get_imsm_disk_slot(struct imsm_map
*map
, unsigned idx
)
686 for (slot
= 0; slot
< map
->num_members
; slot
++) {
687 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
688 if (ord_to_idx(ord
) == idx
)
695 static int get_imsm_raid_level(struct imsm_map
*map
)
697 if (map
->raid_level
== 1) {
698 if (map
->num_members
== 2)
704 return map
->raid_level
;
707 static int cmp_extent(const void *av
, const void *bv
)
709 const struct extent
*a
= av
;
710 const struct extent
*b
= bv
;
711 if (a
->start
< b
->start
)
713 if (a
->start
> b
->start
)
718 static int count_memberships(struct dl
*dl
, struct intel_super
*super
)
723 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
724 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
725 struct imsm_map
*map
= get_imsm_map(dev
, 0);
727 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
734 static struct extent
*get_extents(struct intel_super
*super
, struct dl
*dl
)
736 /* find a list of used extents on the given physical device */
737 struct extent
*rv
, *e
;
739 int memberships
= count_memberships(dl
, super
);
740 __u32 reservation
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
742 rv
= malloc(sizeof(struct extent
) * (memberships
+ 1));
747 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
748 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
749 struct imsm_map
*map
= get_imsm_map(dev
, 0);
751 if (get_imsm_disk_slot(map
, dl
->index
) >= 0) {
752 e
->start
= __le32_to_cpu(map
->pba_of_lba0
);
753 e
->size
= __le32_to_cpu(map
->blocks_per_member
);
757 qsort(rv
, memberships
, sizeof(*rv
), cmp_extent
);
759 /* determine the start of the metadata
760 * when no raid devices are defined use the default
761 * ...otherwise allow the metadata to truncate the value
762 * as is the case with older versions of imsm
765 struct extent
*last
= &rv
[memberships
- 1];
768 remainder
= __le32_to_cpu(dl
->disk
.total_blocks
) -
769 (last
->start
+ last
->size
);
770 /* round down to 1k block to satisfy precision of the kernel
774 /* make sure remainder is still sane */
775 if (remainder
< (unsigned)ROUND_UP(super
->len
, 512) >> 9)
776 remainder
= ROUND_UP(super
->len
, 512) >> 9;
777 if (reservation
> remainder
)
778 reservation
= remainder
;
780 e
->start
= __le32_to_cpu(dl
->disk
.total_blocks
) - reservation
;
785 /* try to determine how much space is reserved for metadata from
786 * the last get_extents() entry, otherwise fallback to the
789 static __u32
imsm_reserved_sectors(struct intel_super
*super
, struct dl
*dl
)
795 /* for spares just return a minimal reservation which will grow
796 * once the spare is picked up by an array
799 return MPB_SECTOR_CNT
;
801 e
= get_extents(super
, dl
);
803 return MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
805 /* scroll to last entry */
806 for (i
= 0; e
[i
].size
; i
++)
809 rv
= __le32_to_cpu(dl
->disk
.total_blocks
) - e
[i
].start
;
816 static int is_spare(struct imsm_disk
*disk
)
818 return (disk
->status
& SPARE_DISK
) == SPARE_DISK
;
821 static int is_configured(struct imsm_disk
*disk
)
823 return (disk
->status
& CONFIGURED_DISK
) == CONFIGURED_DISK
;
826 static int is_failed(struct imsm_disk
*disk
)
828 return (disk
->status
& FAILED_DISK
) == FAILED_DISK
;
831 /* Return minimum size of a spare that can be used in this array*/
832 static unsigned long long min_acceptable_spare_size_imsm(struct supertype
*st
)
834 struct intel_super
*super
= st
->sb
;
838 unsigned long long rv
= 0;
842 /* find first active disk in array */
844 while (dl
&& (is_failed(&dl
->disk
) || dl
->index
== -1))
848 /* find last lba used by subarrays */
849 e
= get_extents(super
, dl
);
852 for (i
= 0; e
[i
].size
; i
++)
855 rv
= e
[i
-1].start
+ e
[i
-1].size
;
857 /* add the amount of space needed for metadata */
858 rv
= rv
+ MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
863 static __u64
blocks_per_migr_unit(struct imsm_dev
*dev
);
865 static void print_imsm_dev(struct imsm_dev
*dev
, char *uuid
, int disk_idx
)
869 struct imsm_map
*map
= get_imsm_map(dev
, 0);
870 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
874 printf("[%.16s]:\n", dev
->volume
);
875 printf(" UUID : %s\n", uuid
);
876 printf(" RAID Level : %d", get_imsm_raid_level(map
));
878 printf(" <-- %d", get_imsm_raid_level(map2
));
880 printf(" Members : %d", map
->num_members
);
882 printf(" <-- %d", map2
->num_members
);
884 printf(" Slots : [");
885 for (i
= 0; i
< map
->num_members
; i
++) {
886 ord
= get_imsm_ord_tbl_ent(dev
, i
, 0);
887 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
892 for (i
= 0; i
< map2
->num_members
; i
++) {
893 ord
= get_imsm_ord_tbl_ent(dev
, i
, 1);
894 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
899 printf(" Failed disk : ");
900 if (map
->failed_disk_num
== 0xff)
903 printf("%i", map
->failed_disk_num
);
905 slot
= get_imsm_disk_slot(map
, disk_idx
);
907 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
908 printf(" This Slot : %d%s\n", slot
,
909 ord
& IMSM_ORD_REBUILD
? " (out-of-sync)" : "");
911 printf(" This Slot : ?\n");
912 sz
= __le32_to_cpu(dev
->size_high
);
914 sz
+= __le32_to_cpu(dev
->size_low
);
915 printf(" Array Size : %llu%s\n", (unsigned long long)sz
,
916 human_size(sz
* 512));
917 sz
= __le32_to_cpu(map
->blocks_per_member
);
918 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz
,
919 human_size(sz
* 512));
920 printf(" Sector Offset : %u\n",
921 __le32_to_cpu(map
->pba_of_lba0
));
922 printf(" Num Stripes : %u\n",
923 __le32_to_cpu(map
->num_data_stripes
));
924 printf(" Chunk Size : %u KiB",
925 __le16_to_cpu(map
->blocks_per_strip
) / 2);
927 printf(" <-- %u KiB",
928 __le16_to_cpu(map2
->blocks_per_strip
) / 2);
930 printf(" Reserved : %d\n", __le32_to_cpu(dev
->reserved_blocks
));
931 printf(" Migrate State : ");
932 if (dev
->vol
.migr_state
) {
933 if (migr_type(dev
) == MIGR_INIT
)
934 printf("initialize\n");
935 else if (migr_type(dev
) == MIGR_REBUILD
)
937 else if (migr_type(dev
) == MIGR_VERIFY
)
939 else if (migr_type(dev
) == MIGR_GEN_MIGR
)
940 printf("general migration\n");
941 else if (migr_type(dev
) == MIGR_STATE_CHANGE
)
942 printf("state change\n");
943 else if (migr_type(dev
) == MIGR_REPAIR
)
946 printf("<unknown:%d>\n", migr_type(dev
));
949 printf(" Map State : %s", map_state_str
[map
->map_state
]);
950 if (dev
->vol
.migr_state
) {
951 struct imsm_map
*map
= get_imsm_map(dev
, 1);
953 printf(" <-- %s", map_state_str
[map
->map_state
]);
954 printf("\n Checkpoint : %u (%llu)",
955 __le32_to_cpu(dev
->vol
.curr_migr_unit
),
956 (unsigned long long)blocks_per_migr_unit(dev
));
959 printf(" Dirty State : %s\n", dev
->vol
.dirty
? "dirty" : "clean");
962 static void print_imsm_disk(struct imsm_super
*mpb
, int index
, __u32 reserved
)
964 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, index
);
965 char str
[MAX_RAID_SERIAL_LEN
+ 1];
968 if (index
< 0 || !disk
)
972 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
973 printf(" Disk%02d Serial : %s\n", index
, str
);
974 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
975 is_configured(disk
) ? " active" : "",
976 is_failed(disk
) ? " failed" : "");
977 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
978 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
979 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
980 human_size(sz
* 512));
983 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
);
985 static void examine_super_imsm(struct supertype
*st
, char *homehost
)
987 struct intel_super
*super
= st
->sb
;
988 struct imsm_super
*mpb
= super
->anchor
;
989 char str
[MAX_SIGNATURE_LENGTH
];
994 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
997 snprintf(str
, MPB_SIG_LEN
, "%s", mpb
->sig
);
998 printf(" Magic : %s\n", str
);
999 snprintf(str
, strlen(MPB_VERSION_RAID0
), "%s", get_imsm_version(mpb
));
1000 printf(" Version : %s\n", get_imsm_version(mpb
));
1001 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb
->orig_family_num
));
1002 printf(" Family : %08x\n", __le32_to_cpu(mpb
->family_num
));
1003 printf(" Generation : %08x\n", __le32_to_cpu(mpb
->generation_num
));
1004 getinfo_super_imsm(st
, &info
, NULL
);
1005 fname_from_uuid(st
, &info
, nbuf
, ':');
1006 printf(" UUID : %s\n", nbuf
+ 5);
1007 sum
= __le32_to_cpu(mpb
->check_sum
);
1008 printf(" Checksum : %08x %s\n", sum
,
1009 __gen_imsm_checksum(mpb
) == sum
? "correct" : "incorrect");
1010 printf(" MPB Sectors : %d\n", mpb_sectors(mpb
));
1011 printf(" Disks : %d\n", mpb
->num_disks
);
1012 printf(" RAID Devices : %d\n", mpb
->num_raid_devs
);
1013 print_imsm_disk(mpb
, super
->disks
->index
, reserved
);
1014 if (super
->bbm_log
) {
1015 struct bbm_log
*log
= super
->bbm_log
;
1018 printf("Bad Block Management Log:\n");
1019 printf(" Log Size : %d\n", __le32_to_cpu(mpb
->bbm_log_size
));
1020 printf(" Signature : %x\n", __le32_to_cpu(log
->signature
));
1021 printf(" Entry Count : %d\n", __le32_to_cpu(log
->entry_count
));
1022 printf(" Spare Blocks : %d\n", __le32_to_cpu(log
->reserved_spare_block_count
));
1023 printf(" First Spare : %llx\n",
1024 (unsigned long long) __le64_to_cpu(log
->first_spare_lba
));
1026 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1028 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
1030 super
->current_vol
= i
;
1031 getinfo_super_imsm(st
, &info
, NULL
);
1032 fname_from_uuid(st
, &info
, nbuf
, ':');
1033 print_imsm_dev(dev
, nbuf
+ 5, super
->disks
->index
);
1035 for (i
= 0; i
< mpb
->num_disks
; i
++) {
1036 if (i
== super
->disks
->index
)
1038 print_imsm_disk(mpb
, i
, reserved
);
1040 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1041 struct imsm_disk
*disk
;
1042 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1050 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1051 printf(" Disk Serial : %s\n", str
);
1052 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1053 is_configured(disk
) ? " active" : "",
1054 is_failed(disk
) ? " failed" : "");
1055 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1056 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1057 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1058 human_size(sz
* 512));
1062 static void brief_examine_super_imsm(struct supertype
*st
, int verbose
)
1064 /* We just write a generic IMSM ARRAY entry */
1067 struct intel_super
*super
= st
->sb
;
1069 if (!super
->anchor
->num_raid_devs
) {
1070 printf("ARRAY metadata=imsm\n");
1074 getinfo_super_imsm(st
, &info
, NULL
);
1075 fname_from_uuid(st
, &info
, nbuf
, ':');
1076 printf("ARRAY metadata=imsm UUID=%s\n", nbuf
+ 5);
1079 static void brief_examine_subarrays_imsm(struct supertype
*st
, int verbose
)
1081 /* We just write a generic IMSM ARRAY entry */
1085 struct intel_super
*super
= st
->sb
;
1088 if (!super
->anchor
->num_raid_devs
)
1091 getinfo_super_imsm(st
, &info
, NULL
);
1092 fname_from_uuid(st
, &info
, nbuf
, ':');
1093 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
1094 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
1096 super
->current_vol
= i
;
1097 getinfo_super_imsm(st
, &info
, NULL
);
1098 fname_from_uuid(st
, &info
, nbuf1
, ':');
1099 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1100 dev
->volume
, nbuf
+ 5, i
, nbuf1
+ 5);
1104 static void export_examine_super_imsm(struct supertype
*st
)
1106 struct intel_super
*super
= st
->sb
;
1107 struct imsm_super
*mpb
= super
->anchor
;
1111 getinfo_super_imsm(st
, &info
, NULL
);
1112 fname_from_uuid(st
, &info
, nbuf
, ':');
1113 printf("MD_METADATA=imsm\n");
1114 printf("MD_LEVEL=container\n");
1115 printf("MD_UUID=%s\n", nbuf
+5);
1116 printf("MD_DEVICES=%u\n", mpb
->num_disks
);
1119 static void detail_super_imsm(struct supertype
*st
, char *homehost
)
1124 getinfo_super_imsm(st
, &info
, NULL
);
1125 fname_from_uuid(st
, &info
, nbuf
, ':');
1126 printf("\n UUID : %s\n", nbuf
+ 5);
1129 static void brief_detail_super_imsm(struct supertype
*st
)
1133 getinfo_super_imsm(st
, &info
, NULL
);
1134 fname_from_uuid(st
, &info
, nbuf
, ':');
1135 printf(" UUID=%s", nbuf
+ 5);
1138 static int imsm_read_serial(int fd
, char *devname
, __u8
*serial
);
1139 static void fd2devname(int fd
, char *name
);
1141 static int ahci_enumerate_ports(const char *hba_path
, int port_count
, int host_base
, int verbose
)
1143 /* dump an unsorted list of devices attached to AHCI Intel storage
1144 * controller, as well as non-connected ports
1146 int hba_len
= strlen(hba_path
) + 1;
1151 unsigned long port_mask
= (1 << port_count
) - 1;
1153 if (port_count
> (int)sizeof(port_mask
) * 8) {
1155 fprintf(stderr
, Name
": port_count %d out of range\n", port_count
);
1159 /* scroll through /sys/dev/block looking for devices attached to
1162 dir
= opendir("/sys/dev/block");
1163 for (ent
= dir
? readdir(dir
) : NULL
; ent
; ent
= readdir(dir
)) {
1174 if (sscanf(ent
->d_name
, "%d:%d", &major
, &minor
) != 2)
1176 path
= devt_to_devpath(makedev(major
, minor
));
1179 if (!path_attached_to_hba(path
, hba_path
)) {
1185 /* retrieve the scsi device type */
1186 if (asprintf(&device
, "/sys/dev/block/%d:%d/device/xxxxxxx", major
, minor
) < 0) {
1188 fprintf(stderr
, Name
": failed to allocate 'device'\n");
1192 sprintf(device
, "/sys/dev/block/%d:%d/device/type", major
, minor
);
1193 if (load_sys(device
, buf
) != 0) {
1195 fprintf(stderr
, Name
": failed to read device type for %s\n",
1201 type
= strtoul(buf
, NULL
, 10);
1203 /* if it's not a disk print the vendor and model */
1204 if (!(type
== 0 || type
== 7 || type
== 14)) {
1207 sprintf(device
, "/sys/dev/block/%d:%d/device/vendor", major
, minor
);
1208 if (load_sys(device
, buf
) == 0) {
1209 strncpy(vendor
, buf
, sizeof(vendor
));
1210 vendor
[sizeof(vendor
) - 1] = '\0';
1211 c
= (char *) &vendor
[sizeof(vendor
) - 1];
1212 while (isspace(*c
) || *c
== '\0')
1216 sprintf(device
, "/sys/dev/block/%d:%d/device/model", major
, minor
);
1217 if (load_sys(device
, buf
) == 0) {
1218 strncpy(model
, buf
, sizeof(model
));
1219 model
[sizeof(model
) - 1] = '\0';
1220 c
= (char *) &model
[sizeof(model
) - 1];
1221 while (isspace(*c
) || *c
== '\0')
1225 if (vendor
[0] && model
[0])
1226 sprintf(buf
, "%.64s %.64s", vendor
, model
);
1228 switch (type
) { /* numbers from hald/linux/device.c */
1229 case 1: sprintf(buf
, "tape"); break;
1230 case 2: sprintf(buf
, "printer"); break;
1231 case 3: sprintf(buf
, "processor"); break;
1233 case 5: sprintf(buf
, "cdrom"); break;
1234 case 6: sprintf(buf
, "scanner"); break;
1235 case 8: sprintf(buf
, "media_changer"); break;
1236 case 9: sprintf(buf
, "comm"); break;
1237 case 12: sprintf(buf
, "raid"); break;
1238 default: sprintf(buf
, "unknown");
1244 /* chop device path to 'host%d' and calculate the port number */
1245 c
= strchr(&path
[hba_len
], '/');
1248 fprintf(stderr
, Name
": %s - invalid path name\n", path
+ hba_len
);
1253 if (sscanf(&path
[hba_len
], "host%d", &port
) == 1)
1257 *c
= '/'; /* repair the full string */
1258 fprintf(stderr
, Name
": failed to determine port number for %s\n",
1265 /* mark this port as used */
1266 port_mask
&= ~(1 << port
);
1268 /* print out the device information */
1270 printf(" Port%d : - non-disk device (%s) -\n", port
, buf
);
1274 fd
= dev_open(ent
->d_name
, O_RDONLY
);
1276 printf(" Port%d : - disk info unavailable -\n", port
);
1278 fd2devname(fd
, buf
);
1279 printf(" Port%d : %s", port
, buf
);
1280 if (imsm_read_serial(fd
, NULL
, (__u8
*) buf
) == 0)
1281 printf(" (%s)\n", buf
);
1296 for (i
= 0; i
< port_count
; i
++)
1297 if (port_mask
& (1 << i
))
1298 printf(" Port%d : - no device attached -\n", i
);
1306 static void print_found_intel_controllers(struct sys_dev
*elem
)
1308 for (; elem
; elem
= elem
->next
) {
1309 fprintf(stderr
, Name
": found Intel(R) ");
1310 if (elem
->type
== SYS_DEV_SATA
)
1311 fprintf(stderr
, "SATA ");
1312 else if (elem
->type
== SYS_DEV_SAS
)
1313 fprintf(stderr
, "SAS ");
1314 fprintf(stderr
, "RAID controller");
1316 fprintf(stderr
, " at %s", elem
->pci_id
);
1317 fprintf(stderr
, ".\n");
1322 static int ahci_get_port_count(const char *hba_path
, int *port_count
)
1329 if ((dir
= opendir(hba_path
)) == NULL
)
1332 for (ent
= readdir(dir
); ent
; ent
= readdir(dir
)) {
1335 if (sscanf(ent
->d_name
, "host%d", &host
) != 1)
1337 if (*port_count
== 0)
1339 else if (host
< host_base
)
1342 if (host
+ 1 > *port_count
+ host_base
)
1343 *port_count
= host
+ 1 - host_base
;
1349 static void print_imsm_capability(const struct imsm_orom
*orom
)
1351 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1352 printf(" Version : %d.%d.%d.%d\n", orom
->major_ver
, orom
->minor_ver
,
1353 orom
->hotfix_ver
, orom
->build
);
1354 printf(" RAID Levels :%s%s%s%s%s\n",
1355 imsm_orom_has_raid0(orom
) ? " raid0" : "",
1356 imsm_orom_has_raid1(orom
) ? " raid1" : "",
1357 imsm_orom_has_raid1e(orom
) ? " raid1e" : "",
1358 imsm_orom_has_raid10(orom
) ? " raid10" : "",
1359 imsm_orom_has_raid5(orom
) ? " raid5" : "");
1360 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1361 imsm_orom_has_chunk(orom
, 2) ? " 2k" : "",
1362 imsm_orom_has_chunk(orom
, 4) ? " 4k" : "",
1363 imsm_orom_has_chunk(orom
, 8) ? " 8k" : "",
1364 imsm_orom_has_chunk(orom
, 16) ? " 16k" : "",
1365 imsm_orom_has_chunk(orom
, 32) ? " 32k" : "",
1366 imsm_orom_has_chunk(orom
, 64) ? " 64k" : "",
1367 imsm_orom_has_chunk(orom
, 128) ? " 128k" : "",
1368 imsm_orom_has_chunk(orom
, 256) ? " 256k" : "",
1369 imsm_orom_has_chunk(orom
, 512) ? " 512k" : "",
1370 imsm_orom_has_chunk(orom
, 1024*1) ? " 1M" : "",
1371 imsm_orom_has_chunk(orom
, 1024*2) ? " 2M" : "",
1372 imsm_orom_has_chunk(orom
, 1024*4) ? " 4M" : "",
1373 imsm_orom_has_chunk(orom
, 1024*8) ? " 8M" : "",
1374 imsm_orom_has_chunk(orom
, 1024*16) ? " 16M" : "",
1375 imsm_orom_has_chunk(orom
, 1024*32) ? " 32M" : "",
1376 imsm_orom_has_chunk(orom
, 1024*64) ? " 64M" : "");
1377 printf(" Max Disks : %d\n", orom
->tds
);
1378 printf(" Max Volumes : %d\n", orom
->vpa
);
1382 static int detail_platform_imsm(int verbose
, int enumerate_only
)
1384 /* There are two components to imsm platform support, the ahci SATA
1385 * controller and the option-rom. To find the SATA controller we
1386 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1387 * controller with the Intel vendor id is present. This approach
1388 * allows mdadm to leverage the kernel's ahci detection logic, with the
1389 * caveat that if ahci.ko is not loaded mdadm will not be able to
1390 * detect platform raid capabilities. The option-rom resides in a
1391 * platform "Adapter ROM". We scan for its signature to retrieve the
1392 * platform capabilities. If raid support is disabled in the BIOS the
1393 * option-rom capability structure will not be available.
1395 const struct imsm_orom
*orom
;
1396 struct sys_dev
*list
, *hba
;
1401 if (enumerate_only
) {
1402 if (check_env("IMSM_NO_PLATFORM"))
1404 list
= find_intel_devices();
1407 for (hba
= list
; hba
; hba
= hba
->next
) {
1408 orom
= find_imsm_capability(hba
->type
);
1414 free_sys_dev(&list
);
1418 list
= find_intel_devices();
1421 fprintf(stderr
, Name
": no active Intel(R) RAID "
1422 "controller found.\n");
1423 free_sys_dev(&list
);
1426 print_found_intel_controllers(list
);
1428 for (hba
= list
; hba
; hba
= hba
->next
) {
1429 orom
= find_imsm_capability(hba
->type
);
1431 fprintf(stderr
, Name
": imsm capabilities not found for controller: %s (type %s)\n",
1432 hba
->path
, get_sys_dev_type(hba
->type
));
1434 print_imsm_capability(orom
);
1437 for (hba
= list
; hba
; hba
= hba
->next
) {
1438 printf(" I/O Controller : %s (%s)\n",
1439 hba
->path
, get_sys_dev_type(hba
->type
));
1441 if (hba
->type
== SYS_DEV_SATA
) {
1442 host_base
= ahci_get_port_count(hba
->path
, &port_count
);
1443 if (ahci_enumerate_ports(hba
->path
, port_count
, host_base
, verbose
)) {
1445 fprintf(stderr
, Name
": failed to enumerate "
1446 "ports on SATA controller at %s.", hba
->pci_id
);
1452 free_sys_dev(&list
);
1457 static int match_home_imsm(struct supertype
*st
, char *homehost
)
1459 /* the imsm metadata format does not specify any host
1460 * identification information. We return -1 since we can never
1461 * confirm nor deny whether a given array is "meant" for this
1462 * host. We rely on compare_super and the 'family_num' fields to
1463 * exclude member disks that do not belong, and we rely on
1464 * mdadm.conf to specify the arrays that should be assembled.
1465 * Auto-assembly may still pick up "foreign" arrays.
1471 static void uuid_from_super_imsm(struct supertype
*st
, int uuid
[4])
1473 /* The uuid returned here is used for:
1474 * uuid to put into bitmap file (Create, Grow)
1475 * uuid for backup header when saving critical section (Grow)
1476 * comparing uuids when re-adding a device into an array
1477 * In these cases the uuid required is that of the data-array,
1478 * not the device-set.
1479 * uuid to recognise same set when adding a missing device back
1480 * to an array. This is a uuid for the device-set.
1482 * For each of these we can make do with a truncated
1483 * or hashed uuid rather than the original, as long as
1485 * In each case the uuid required is that of the data-array,
1486 * not the device-set.
1488 /* imsm does not track uuid's so we synthesis one using sha1 on
1489 * - The signature (Which is constant for all imsm array, but no matter)
1490 * - the orig_family_num of the container
1491 * - the index number of the volume
1492 * - the 'serial' number of the volume.
1493 * Hopefully these are all constant.
1495 struct intel_super
*super
= st
->sb
;
1498 struct sha1_ctx ctx
;
1499 struct imsm_dev
*dev
= NULL
;
1502 /* some mdadm versions failed to set ->orig_family_num, in which
1503 * case fall back to ->family_num. orig_family_num will be
1504 * fixed up with the first metadata update.
1506 family_num
= super
->anchor
->orig_family_num
;
1507 if (family_num
== 0)
1508 family_num
= super
->anchor
->family_num
;
1509 sha1_init_ctx(&ctx
);
1510 sha1_process_bytes(super
->anchor
->sig
, MPB_SIG_LEN
, &ctx
);
1511 sha1_process_bytes(&family_num
, sizeof(__u32
), &ctx
);
1512 if (super
->current_vol
>= 0)
1513 dev
= get_imsm_dev(super
, super
->current_vol
);
1515 __u32 vol
= super
->current_vol
;
1516 sha1_process_bytes(&vol
, sizeof(vol
), &ctx
);
1517 sha1_process_bytes(dev
->volume
, MAX_RAID_SERIAL_LEN
, &ctx
);
1519 sha1_finish_ctx(&ctx
, buf
);
1520 memcpy(uuid
, buf
, 4*4);
1525 get_imsm_numerical_version(struct imsm_super
*mpb
, int *m
, int *p
)
1527 __u8
*v
= get_imsm_version(mpb
);
1528 __u8
*end
= mpb
->sig
+ MAX_SIGNATURE_LENGTH
;
1529 char major
[] = { 0, 0, 0 };
1530 char minor
[] = { 0 ,0, 0 };
1531 char patch
[] = { 0, 0, 0 };
1532 char *ver_parse
[] = { major
, minor
, patch
};
1536 while (*v
!= '\0' && v
< end
) {
1537 if (*v
!= '.' && j
< 2)
1538 ver_parse
[i
][j
++] = *v
;
1546 *m
= strtol(minor
, NULL
, 0);
1547 *p
= strtol(patch
, NULL
, 0);
1551 static __u32
migr_strip_blocks_resync(struct imsm_dev
*dev
)
1553 /* migr_strip_size when repairing or initializing parity */
1554 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1555 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1557 switch (get_imsm_raid_level(map
)) {
1562 return 128*1024 >> 9;
1566 static __u32
migr_strip_blocks_rebuild(struct imsm_dev
*dev
)
1568 /* migr_strip_size when rebuilding a degraded disk, no idea why
1569 * this is different than migr_strip_size_resync(), but it's good
1572 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1573 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1575 switch (get_imsm_raid_level(map
)) {
1578 if (map
->num_members
% map
->num_domains
== 0)
1579 return 128*1024 >> 9;
1583 return max((__u32
) 64*1024 >> 9, chunk
);
1585 return 128*1024 >> 9;
1589 static __u32
num_stripes_per_unit_resync(struct imsm_dev
*dev
)
1591 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1592 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1593 __u32 lo_chunk
= __le32_to_cpu(lo
->blocks_per_strip
);
1594 __u32 hi_chunk
= __le32_to_cpu(hi
->blocks_per_strip
);
1596 return max((__u32
) 1, hi_chunk
/ lo_chunk
);
1599 static __u32
num_stripes_per_unit_rebuild(struct imsm_dev
*dev
)
1601 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1602 int level
= get_imsm_raid_level(lo
);
1604 if (level
== 1 || level
== 10) {
1605 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1607 return hi
->num_domains
;
1609 return num_stripes_per_unit_resync(dev
);
1612 static __u8
imsm_num_data_members(struct imsm_dev
*dev
, int second_map
)
1614 /* named 'imsm_' because raid0, raid1 and raid10
1615 * counter-intuitively have the same number of data disks
1617 struct imsm_map
*map
= get_imsm_map(dev
, second_map
);
1619 switch (get_imsm_raid_level(map
)) {
1623 return map
->num_members
;
1625 return map
->num_members
- 1;
1627 dprintf("%s: unsupported raid level\n", __func__
);
1632 static __u32
parity_segment_depth(struct imsm_dev
*dev
)
1634 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1635 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1637 switch(get_imsm_raid_level(map
)) {
1640 return chunk
* map
->num_domains
;
1642 return chunk
* map
->num_members
;
1648 static __u32
map_migr_block(struct imsm_dev
*dev
, __u32 block
)
1650 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1651 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1652 __u32 strip
= block
/ chunk
;
1654 switch (get_imsm_raid_level(map
)) {
1657 __u32 vol_strip
= (strip
* map
->num_domains
) + 1;
1658 __u32 vol_stripe
= vol_strip
/ map
->num_members
;
1660 return vol_stripe
* chunk
+ block
% chunk
;
1662 __u32 stripe
= strip
/ (map
->num_members
- 1);
1664 return stripe
* chunk
+ block
% chunk
;
1671 static __u64
blocks_per_migr_unit(struct imsm_dev
*dev
)
1673 /* calculate the conversion factor between per member 'blocks'
1674 * (md/{resync,rebuild}_start) and imsm migration units, return
1675 * 0 for the 'not migrating' and 'unsupported migration' cases
1677 if (!dev
->vol
.migr_state
)
1680 switch (migr_type(dev
)) {
1685 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1686 __u32 stripes_per_unit
;
1687 __u32 blocks_per_unit
;
1696 /* yes, this is really the translation of migr_units to
1697 * per-member blocks in the 'resync' case
1699 stripes_per_unit
= num_stripes_per_unit_resync(dev
);
1700 migr_chunk
= migr_strip_blocks_resync(dev
);
1701 disks
= imsm_num_data_members(dev
, 0);
1702 blocks_per_unit
= stripes_per_unit
* migr_chunk
* disks
;
1703 stripe
= __le32_to_cpu(map
->blocks_per_strip
) * disks
;
1704 segment
= blocks_per_unit
/ stripe
;
1705 block_rel
= blocks_per_unit
- segment
* stripe
;
1706 parity_depth
= parity_segment_depth(dev
);
1707 block_map
= map_migr_block(dev
, block_rel
);
1708 return block_map
+ parity_depth
* segment
;
1710 case MIGR_REBUILD
: {
1711 __u32 stripes_per_unit
;
1714 stripes_per_unit
= num_stripes_per_unit_rebuild(dev
);
1715 migr_chunk
= migr_strip_blocks_rebuild(dev
);
1716 return migr_chunk
* stripes_per_unit
;
1718 case MIGR_STATE_CHANGE
:
1724 static int imsm_level_to_layout(int level
)
1732 return ALGORITHM_LEFT_ASYMMETRIC
;
1739 static void getinfo_super_imsm_volume(struct supertype
*st
, struct mdinfo
*info
, char *dmap
)
1741 struct intel_super
*super
= st
->sb
;
1742 struct imsm_dev
*dev
= get_imsm_dev(super
, super
->current_vol
);
1743 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1744 struct imsm_map
*prev_map
= get_imsm_map(dev
, 1);
1745 struct imsm_map
*map_to_analyse
= map
;
1748 unsigned int component_size_alligment
;
1749 int map_disks
= info
->array
.raid_disks
;
1752 map_to_analyse
= prev_map
;
1754 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
1755 if (dl
->raiddisk
== info
->disk
.raid_disk
)
1757 info
->container_member
= super
->current_vol
;
1758 info
->array
.raid_disks
= map_to_analyse
->num_members
;
1759 info
->array
.level
= get_imsm_raid_level(map_to_analyse
);
1760 info
->array
.layout
= imsm_level_to_layout(info
->array
.level
);
1761 info
->array
.md_minor
= -1;
1762 info
->array
.ctime
= 0;
1763 info
->array
.utime
= 0;
1764 info
->array
.chunk_size
=
1765 __le16_to_cpu(map_to_analyse
->blocks_per_strip
) << 9;
1766 info
->array
.state
= !dev
->vol
.dirty
;
1767 info
->custom_array_size
= __le32_to_cpu(dev
->size_high
);
1768 info
->custom_array_size
<<= 32;
1769 info
->custom_array_size
|= __le32_to_cpu(dev
->size_low
);
1770 if (prev_map
&& map
->map_state
== prev_map
->map_state
) {
1771 info
->reshape_active
= 1;
1772 info
->new_level
= get_imsm_raid_level(map
);
1773 info
->new_layout
= imsm_level_to_layout(info
->new_level
);
1774 info
->new_chunk
= __le16_to_cpu(map
->blocks_per_strip
) << 9;
1775 info
->delta_disks
= map
->num_members
- prev_map
->num_members
;
1776 if (info
->delta_disks
) {
1777 /* this needs to be applied to every array
1780 info
->reshape_active
= 2;
1782 /* We shape information that we give to md might have to be
1783 * modify to cope with md's requirement for reshaping arrays.
1784 * For example, when reshaping a RAID0, md requires it to be
1785 * presented as a degraded RAID4.
1786 * Also if a RAID0 is migrating to a RAID5 we need to specify
1787 * the array as already being RAID5, but the 'before' layout
1788 * is a RAID4-like layout.
1790 switch (info
->array
.level
) {
1792 switch(info
->new_level
) {
1794 /* conversion is happening as RAID4 */
1795 info
->array
.level
= 4;
1796 info
->array
.raid_disks
+= 1;
1799 /* conversion is happening as RAID5 */
1800 info
->array
.level
= 5;
1801 info
->array
.layout
= ALGORITHM_PARITY_N
;
1802 info
->array
.raid_disks
+= 1;
1803 info
->delta_disks
-= 1;
1806 /* FIXME error message */
1807 info
->array
.level
= UnSet
;
1813 info
->new_level
= UnSet
;
1814 info
->new_layout
= UnSet
;
1815 info
->new_chunk
= info
->array
.chunk_size
;
1816 info
->delta_disks
= 0;
1818 info
->disk
.major
= 0;
1819 info
->disk
.minor
= 0;
1821 info
->disk
.major
= dl
->major
;
1822 info
->disk
.minor
= dl
->minor
;
1825 info
->data_offset
= __le32_to_cpu(map_to_analyse
->pba_of_lba0
);
1826 info
->component_size
=
1827 __le32_to_cpu(map_to_analyse
->blocks_per_member
);
1829 /* check component size aligment
1831 component_size_alligment
=
1832 info
->component_size
% (info
->array
.chunk_size
/512);
1834 if (component_size_alligment
&&
1835 (info
->array
.level
!= 1) && (info
->array
.level
!= UnSet
)) {
1836 dprintf("imsm: reported component size alligned from %llu ",
1837 info
->component_size
);
1838 info
->component_size
-= component_size_alligment
;
1839 dprintf("to %llu (%i).\n",
1840 info
->component_size
, component_size_alligment
);
1843 memset(info
->uuid
, 0, sizeof(info
->uuid
));
1844 info
->recovery_start
= MaxSector
;
1846 info
->reshape_progress
= 0;
1847 info
->resync_start
= MaxSector
;
1848 if (map_to_analyse
->map_state
== IMSM_T_STATE_UNINITIALIZED
||
1850 info
->resync_start
= 0;
1852 if (dev
->vol
.migr_state
) {
1853 switch (migr_type(dev
)) {
1856 __u64 blocks_per_unit
= blocks_per_migr_unit(dev
);
1857 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
1859 info
->resync_start
= blocks_per_unit
* units
;
1862 case MIGR_GEN_MIGR
: {
1863 __u64 blocks_per_unit
= blocks_per_migr_unit(dev
);
1864 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
1865 unsigned long long array_blocks
;
1868 info
->reshape_progress
= blocks_per_unit
* units
;
1870 /* checkpoint is written per disks unit
1871 * recalculate it to reshape position
1873 used_disks
= imsm_num_data_members(dev
, 0);
1874 info
->reshape_progress
*= used_disks
;
1875 dprintf("IMSM: General Migration checkpoint : %llu "
1876 "(%llu) -> read reshape progress : %llu\n",
1877 units
, blocks_per_unit
, info
->reshape_progress
);
1879 used_disks
= imsm_num_data_members(dev
, 1);
1880 if (used_disks
> 0) {
1881 array_blocks
= map
->blocks_per_member
*
1883 /* round array size down to closest MB
1885 info
->custom_array_size
= (array_blocks
1886 >> SECT_PER_MB_SHIFT
)
1887 << SECT_PER_MB_SHIFT
;
1891 /* we could emulate the checkpointing of
1892 * 'sync_action=check' migrations, but for now
1893 * we just immediately complete them
1896 /* this is handled by container_content_imsm() */
1897 case MIGR_STATE_CHANGE
:
1898 /* FIXME handle other migrations */
1900 /* we are not dirty, so... */
1901 info
->resync_start
= MaxSector
;
1905 strncpy(info
->name
, (char *) dev
->volume
, MAX_RAID_SERIAL_LEN
);
1906 info
->name
[MAX_RAID_SERIAL_LEN
] = 0;
1908 info
->array
.major_version
= -1;
1909 info
->array
.minor_version
= -2;
1910 devname
= devnum2devname(st
->container_dev
);
1911 *info
->text_version
= '\0';
1913 sprintf(info
->text_version
, "/%s/%d", devname
, info
->container_member
);
1915 info
->safe_mode_delay
= 4000; /* 4 secs like the Matrix driver */
1916 uuid_from_super_imsm(st
, info
->uuid
);
1920 for (i
=0; i
<map_disks
; i
++) {
1922 if (i
< info
->array
.raid_disks
) {
1923 struct imsm_disk
*dsk
;
1924 j
= get_imsm_disk_idx(dev
, i
, -1);
1925 dsk
= get_imsm_disk(super
, j
);
1926 if (dsk
&& (dsk
->status
& CONFIGURED_DISK
))
1933 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
);
1934 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
);
1936 static struct imsm_disk
*get_imsm_missing(struct intel_super
*super
, __u8 index
)
1940 for (d
= super
->missing
; d
; d
= d
->next
)
1941 if (d
->index
== index
)
1946 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
)
1948 struct intel_super
*super
= st
->sb
;
1949 struct imsm_disk
*disk
;
1950 int map_disks
= info
->array
.raid_disks
;
1951 int max_enough
= -1;
1953 struct imsm_super
*mpb
;
1955 if (super
->current_vol
>= 0) {
1956 getinfo_super_imsm_volume(st
, info
, map
);
1960 /* Set raid_disks to zero so that Assemble will always pull in valid
1963 info
->array
.raid_disks
= 0;
1964 info
->array
.level
= LEVEL_CONTAINER
;
1965 info
->array
.layout
= 0;
1966 info
->array
.md_minor
= -1;
1967 info
->array
.ctime
= 0; /* N/A for imsm */
1968 info
->array
.utime
= 0;
1969 info
->array
.chunk_size
= 0;
1971 info
->disk
.major
= 0;
1972 info
->disk
.minor
= 0;
1973 info
->disk
.raid_disk
= -1;
1974 info
->reshape_active
= 0;
1975 info
->array
.major_version
= -1;
1976 info
->array
.minor_version
= -2;
1977 strcpy(info
->text_version
, "imsm");
1978 info
->safe_mode_delay
= 0;
1979 info
->disk
.number
= -1;
1980 info
->disk
.state
= 0;
1982 info
->recovery_start
= MaxSector
;
1984 /* do we have the all the insync disks that we expect? */
1985 mpb
= super
->anchor
;
1987 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1988 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
1989 int failed
, enough
, j
, missing
= 0;
1990 struct imsm_map
*map
;
1993 failed
= imsm_count_failed(super
, dev
);
1994 state
= imsm_check_degraded(super
, dev
, failed
);
1995 map
= get_imsm_map(dev
, dev
->vol
.migr_state
);
1997 /* any newly missing disks?
1998 * (catches single-degraded vs double-degraded)
2000 for (j
= 0; j
< map
->num_members
; j
++) {
2001 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
2002 __u32 idx
= ord_to_idx(ord
);
2004 if (!(ord
& IMSM_ORD_REBUILD
) &&
2005 get_imsm_missing(super
, idx
)) {
2011 if (state
== IMSM_T_STATE_FAILED
)
2013 else if (state
== IMSM_T_STATE_DEGRADED
&&
2014 (state
!= map
->map_state
|| missing
))
2016 else /* we're normal, or already degraded */
2019 /* in the missing/failed disk case check to see
2020 * if at least one array is runnable
2022 max_enough
= max(max_enough
, enough
);
2024 dprintf("%s: enough: %d\n", __func__
, max_enough
);
2025 info
->container_enough
= max_enough
;
2028 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
2030 disk
= &super
->disks
->disk
;
2031 info
->data_offset
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
2032 info
->component_size
= reserved
;
2033 info
->disk
.state
= is_configured(disk
) ? (1 << MD_DISK_ACTIVE
) : 0;
2034 /* we don't change info->disk.raid_disk here because
2035 * this state will be finalized in mdmon after we have
2036 * found the 'most fresh' version of the metadata
2038 info
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2039 info
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2042 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2043 * ->compare_super may have updated the 'num_raid_devs' field for spares
2045 if (info
->disk
.state
& (1 << MD_DISK_SYNC
) || super
->anchor
->num_raid_devs
)
2046 uuid_from_super_imsm(st
, info
->uuid
);
2048 memcpy(info
->uuid
, uuid_zero
, sizeof(uuid_zero
));
2050 /* I don't know how to compute 'map' on imsm, so use safe default */
2053 for (i
= 0; i
< map_disks
; i
++)
2059 /* allocates memory and fills disk in mdinfo structure
2060 * for each disk in array */
2061 struct mdinfo
*getinfo_super_disks_imsm(struct supertype
*st
)
2063 struct mdinfo
*mddev
= NULL
;
2064 struct intel_super
*super
= st
->sb
;
2065 struct imsm_disk
*disk
;
2068 if (!super
|| !super
->disks
)
2071 mddev
= malloc(sizeof(*mddev
));
2073 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2076 memset(mddev
, 0, sizeof(*mddev
));
2080 tmp
= malloc(sizeof(*tmp
));
2082 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2087 memset(tmp
, 0, sizeof(*tmp
));
2089 tmp
->next
= mddev
->devs
;
2091 tmp
->disk
.number
= count
++;
2092 tmp
->disk
.major
= dl
->major
;
2093 tmp
->disk
.minor
= dl
->minor
;
2094 tmp
->disk
.state
= is_configured(disk
) ?
2095 (1 << MD_DISK_ACTIVE
) : 0;
2096 tmp
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2097 tmp
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2098 tmp
->disk
.raid_disk
= -1;
2104 static int update_super_imsm(struct supertype
*st
, struct mdinfo
*info
,
2105 char *update
, char *devname
, int verbose
,
2106 int uuid_set
, char *homehost
)
2108 /* For 'assemble' and 'force' we need to return non-zero if any
2109 * change was made. For others, the return value is ignored.
2110 * Update options are:
2111 * force-one : This device looks a bit old but needs to be included,
2112 * update age info appropriately.
2113 * assemble: clear any 'faulty' flag to allow this device to
2115 * force-array: Array is degraded but being forced, mark it clean
2116 * if that will be needed to assemble it.
2118 * newdev: not used ????
2119 * grow: Array has gained a new device - this is currently for
2121 * resync: mark as dirty so a resync will happen.
2122 * name: update the name - preserving the homehost
2123 * uuid: Change the uuid of the array to match watch is given
2125 * Following are not relevant for this imsm:
2126 * sparc2.2 : update from old dodgey metadata
2127 * super-minor: change the preferred_minor number
2128 * summaries: update redundant counters.
2129 * homehost: update the recorded homehost
2130 * _reshape_progress: record new reshape_progress position.
2133 struct intel_super
*super
= st
->sb
;
2134 struct imsm_super
*mpb
;
2136 /* we can only update container info */
2137 if (!super
|| super
->current_vol
>= 0 || !super
->anchor
)
2140 mpb
= super
->anchor
;
2142 if (strcmp(update
, "uuid") == 0 && uuid_set
&& !info
->update_private
)
2144 else if (strcmp(update
, "uuid") == 0 && uuid_set
&& info
->update_private
) {
2145 mpb
->orig_family_num
= *((__u32
*) info
->update_private
);
2147 } else if (strcmp(update
, "uuid") == 0) {
2148 __u32
*new_family
= malloc(sizeof(*new_family
));
2150 /* update orig_family_number with the incoming random
2151 * data, report the new effective uuid, and store the
2152 * new orig_family_num for future updates.
2155 memcpy(&mpb
->orig_family_num
, info
->uuid
, sizeof(__u32
));
2156 uuid_from_super_imsm(st
, info
->uuid
);
2157 *new_family
= mpb
->orig_family_num
;
2158 info
->update_private
= new_family
;
2161 } else if (strcmp(update
, "assemble") == 0)
2166 /* successful update? recompute checksum */
2168 mpb
->check_sum
= __le32_to_cpu(__gen_imsm_checksum(mpb
));
2173 static size_t disks_to_mpb_size(int disks
)
2177 size
= sizeof(struct imsm_super
);
2178 size
+= (disks
- 1) * sizeof(struct imsm_disk
);
2179 size
+= 2 * sizeof(struct imsm_dev
);
2180 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2181 size
+= (4 - 2) * sizeof(struct imsm_map
);
2182 /* 4 possible disk_ord_tbl's */
2183 size
+= 4 * (disks
- 1) * sizeof(__u32
);
2188 static __u64
avail_size_imsm(struct supertype
*st
, __u64 devsize
)
2190 if (devsize
< (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
))
2193 return devsize
- (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
);
2196 static void free_devlist(struct intel_super
*super
)
2198 struct intel_dev
*dv
;
2200 while (super
->devlist
) {
2201 dv
= super
->devlist
->next
;
2202 free(super
->devlist
->dev
);
2203 free(super
->devlist
);
2204 super
->devlist
= dv
;
2208 static void imsm_copy_dev(struct imsm_dev
*dest
, struct imsm_dev
*src
)
2210 memcpy(dest
, src
, sizeof_imsm_dev(src
, 0));
2213 static int compare_super_imsm(struct supertype
*st
, struct supertype
*tst
)
2217 * 0 same, or first was empty, and second was copied
2218 * 1 second had wrong number
2220 * 3 wrong other info
2222 struct intel_super
*first
= st
->sb
;
2223 struct intel_super
*sec
= tst
->sb
;
2230 /* in platform dependent environment test if the disks
2231 * use the same Intel hba
2233 if (!check_env("IMSM_NO_PLATFORM")) {
2234 if (!first
->hba
|| !sec
->hba
||
2235 (first
->hba
->type
!= sec
->hba
->type
)) {
2237 "HBAs of devices does not match %s != %s\n",
2238 first
->hba
? get_sys_dev_type(first
->hba
->type
) : NULL
,
2239 sec
->hba
? get_sys_dev_type(sec
->hba
->type
) : NULL
);
2244 /* if an anchor does not have num_raid_devs set then it is a free
2247 if (first
->anchor
->num_raid_devs
> 0 &&
2248 sec
->anchor
->num_raid_devs
> 0) {
2249 /* Determine if these disks might ever have been
2250 * related. Further disambiguation can only take place
2251 * in load_super_imsm_all
2253 __u32 first_family
= first
->anchor
->orig_family_num
;
2254 __u32 sec_family
= sec
->anchor
->orig_family_num
;
2256 if (memcmp(first
->anchor
->sig
, sec
->anchor
->sig
,
2257 MAX_SIGNATURE_LENGTH
) != 0)
2260 if (first_family
== 0)
2261 first_family
= first
->anchor
->family_num
;
2262 if (sec_family
== 0)
2263 sec_family
= sec
->anchor
->family_num
;
2265 if (first_family
!= sec_family
)
2271 /* if 'first' is a spare promote it to a populated mpb with sec's
2274 if (first
->anchor
->num_raid_devs
== 0 &&
2275 sec
->anchor
->num_raid_devs
> 0) {
2277 struct intel_dev
*dv
;
2278 struct imsm_dev
*dev
;
2280 /* we need to copy raid device info from sec if an allocation
2281 * fails here we don't associate the spare
2283 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++) {
2284 dv
= malloc(sizeof(*dv
));
2287 dev
= malloc(sizeof_imsm_dev(get_imsm_dev(sec
, i
), 1));
2294 dv
->next
= first
->devlist
;
2295 first
->devlist
= dv
;
2297 if (i
< sec
->anchor
->num_raid_devs
) {
2298 /* allocation failure */
2299 free_devlist(first
);
2300 fprintf(stderr
, "imsm: failed to associate spare\n");
2303 first
->anchor
->num_raid_devs
= sec
->anchor
->num_raid_devs
;
2304 first
->anchor
->orig_family_num
= sec
->anchor
->orig_family_num
;
2305 first
->anchor
->family_num
= sec
->anchor
->family_num
;
2306 memcpy(first
->anchor
->sig
, sec
->anchor
->sig
, MAX_SIGNATURE_LENGTH
);
2307 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++)
2308 imsm_copy_dev(get_imsm_dev(first
, i
), get_imsm_dev(sec
, i
));
2314 static void fd2devname(int fd
, char *name
)
2318 char dname
[PATH_MAX
];
2323 if (fstat(fd
, &st
) != 0)
2325 sprintf(path
, "/sys/dev/block/%d:%d",
2326 major(st
.st_rdev
), minor(st
.st_rdev
));
2328 rv
= readlink(path
, dname
, sizeof(dname
));
2333 nm
= strrchr(dname
, '/');
2335 snprintf(name
, MAX_RAID_SERIAL_LEN
, "/dev/%s", nm
);
2338 extern int scsi_get_serial(int fd
, void *buf
, size_t buf_len
);
2340 static int imsm_read_serial(int fd
, char *devname
,
2341 __u8 serial
[MAX_RAID_SERIAL_LEN
])
2343 unsigned char scsi_serial
[255];
2352 memset(scsi_serial
, 0, sizeof(scsi_serial
));
2354 rv
= scsi_get_serial(fd
, scsi_serial
, sizeof(scsi_serial
));
2356 if (rv
&& check_env("IMSM_DEVNAME_AS_SERIAL")) {
2357 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2358 fd2devname(fd
, (char *) serial
);
2365 Name
": Failed to retrieve serial for %s\n",
2370 rsp_len
= scsi_serial
[3];
2374 Name
": Failed to retrieve serial for %s\n",
2378 rsp_buf
= (char *) &scsi_serial
[4];
2380 /* trim all whitespace and non-printable characters and convert
2383 for (i
= 0, dest
= rsp_buf
; i
< rsp_len
; i
++) {
2386 /* ':' is reserved for use in placeholder serial
2387 * numbers for missing disks
2395 len
= dest
- rsp_buf
;
2398 /* truncate leading characters */
2399 if (len
> MAX_RAID_SERIAL_LEN
) {
2400 dest
+= len
- MAX_RAID_SERIAL_LEN
;
2401 len
= MAX_RAID_SERIAL_LEN
;
2404 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2405 memcpy(serial
, dest
, len
);
2410 static int serialcmp(__u8
*s1
, __u8
*s2
)
2412 return strncmp((char *) s1
, (char *) s2
, MAX_RAID_SERIAL_LEN
);
2415 static void serialcpy(__u8
*dest
, __u8
*src
)
2417 strncpy((char *) dest
, (char *) src
, MAX_RAID_SERIAL_LEN
);
2421 static struct dl
*serial_to_dl(__u8
*serial
, struct intel_super
*super
)
2425 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
2426 if (serialcmp(dl
->serial
, serial
) == 0)
2433 static struct imsm_disk
*
2434 __serial_to_disk(__u8
*serial
, struct imsm_super
*mpb
, int *idx
)
2438 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2439 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
2441 if (serialcmp(disk
->serial
, serial
) == 0) {
2452 load_imsm_disk(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2454 struct imsm_disk
*disk
;
2459 __u8 serial
[MAX_RAID_SERIAL_LEN
];
2461 rv
= imsm_read_serial(fd
, devname
, serial
);
2466 dl
= calloc(1, sizeof(*dl
));
2470 Name
": failed to allocate disk buffer for %s\n",
2476 dl
->major
= major(stb
.st_rdev
);
2477 dl
->minor
= minor(stb
.st_rdev
);
2478 dl
->next
= super
->disks
;
2479 dl
->fd
= keep_fd
? fd
: -1;
2480 assert(super
->disks
== NULL
);
2482 serialcpy(dl
->serial
, serial
);
2485 fd2devname(fd
, name
);
2487 dl
->devname
= strdup(devname
);
2489 dl
->devname
= strdup(name
);
2491 /* look up this disk's index in the current anchor */
2492 disk
= __serial_to_disk(dl
->serial
, super
->anchor
, &dl
->index
);
2495 /* only set index on disks that are a member of a
2496 * populated contianer, i.e. one with raid_devs
2498 if (is_failed(&dl
->disk
))
2500 else if (is_spare(&dl
->disk
))
2508 /* When migrating map0 contains the 'destination' state while map1
2509 * contains the current state. When not migrating map0 contains the
2510 * current state. This routine assumes that map[0].map_state is set to
2511 * the current array state before being called.
2513 * Migration is indicated by one of the following states
2514 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2515 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2516 * map1state=unitialized)
2517 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2519 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2520 * map1state=degraded)
2522 static void migrate(struct imsm_dev
*dev
, __u8 to_state
, int migr_type
)
2524 struct imsm_map
*dest
;
2525 struct imsm_map
*src
= get_imsm_map(dev
, 0);
2527 dev
->vol
.migr_state
= 1;
2528 set_migr_type(dev
, migr_type
);
2529 dev
->vol
.curr_migr_unit
= 0;
2530 dest
= get_imsm_map(dev
, 1);
2532 /* duplicate and then set the target end state in map[0] */
2533 memcpy(dest
, src
, sizeof_imsm_map(src
));
2534 if ((migr_type
== MIGR_REBUILD
) ||
2535 (migr_type
== MIGR_GEN_MIGR
)) {
2539 for (i
= 0; i
< src
->num_members
; i
++) {
2540 ord
= __le32_to_cpu(src
->disk_ord_tbl
[i
]);
2541 set_imsm_ord_tbl_ent(src
, i
, ord_to_idx(ord
));
2545 src
->map_state
= to_state
;
2548 static void end_migration(struct imsm_dev
*dev
, __u8 map_state
)
2550 struct imsm_map
*map
= get_imsm_map(dev
, 0);
2551 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2554 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2555 * completed in the last migration.
2557 * FIXME add support for raid-level-migration
2559 for (i
= 0; i
< prev
->num_members
; i
++)
2560 for (j
= 0; j
< map
->num_members
; j
++)
2561 /* during online capacity expansion
2562 * disks position can be changed if takeover is used
2564 if (ord_to_idx(map
->disk_ord_tbl
[j
]) ==
2565 ord_to_idx(prev
->disk_ord_tbl
[i
])) {
2566 map
->disk_ord_tbl
[j
] |= prev
->disk_ord_tbl
[i
];
2570 dev
->vol
.migr_state
= 0;
2571 dev
->vol
.migr_type
= 0;
2572 dev
->vol
.curr_migr_unit
= 0;
2573 map
->map_state
= map_state
;
2577 static int parse_raid_devices(struct intel_super
*super
)
2580 struct imsm_dev
*dev_new
;
2581 size_t len
, len_migr
;
2583 size_t space_needed
= 0;
2584 struct imsm_super
*mpb
= super
->anchor
;
2586 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2587 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
2588 struct intel_dev
*dv
;
2590 len
= sizeof_imsm_dev(dev_iter
, 0);
2591 len_migr
= sizeof_imsm_dev(dev_iter
, 1);
2593 space_needed
+= len_migr
- len
;
2595 dv
= malloc(sizeof(*dv
));
2598 if (max_len
< len_migr
)
2600 if (max_len
> len_migr
)
2601 space_needed
+= max_len
- len_migr
;
2602 dev_new
= malloc(max_len
);
2607 imsm_copy_dev(dev_new
, dev_iter
);
2610 dv
->next
= super
->devlist
;
2611 super
->devlist
= dv
;
2614 /* ensure that super->buf is large enough when all raid devices
2617 if (__le32_to_cpu(mpb
->mpb_size
) + space_needed
> super
->len
) {
2620 len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + space_needed
, 512);
2621 if (posix_memalign(&buf
, 512, len
) != 0)
2624 memcpy(buf
, super
->buf
, super
->len
);
2625 memset(buf
+ super
->len
, 0, len
- super
->len
);
2634 /* retrieve a pointer to the bbm log which starts after all raid devices */
2635 struct bbm_log
*__get_imsm_bbm_log(struct imsm_super
*mpb
)
2639 if (__le32_to_cpu(mpb
->bbm_log_size
)) {
2641 ptr
+= mpb
->mpb_size
- __le32_to_cpu(mpb
->bbm_log_size
);
2647 static void __free_imsm(struct intel_super
*super
, int free_disks
);
2649 /* load_imsm_mpb - read matrix metadata
2650 * allocates super->mpb to be freed by free_imsm
2652 static int load_imsm_mpb(int fd
, struct intel_super
*super
, char *devname
)
2654 unsigned long long dsize
;
2655 unsigned long long sectors
;
2657 struct imsm_super
*anchor
;
2660 get_dev_size(fd
, NULL
, &dsize
);
2664 Name
": %s: device to small for imsm\n",
2669 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0) {
2672 Name
": Cannot seek to anchor block on %s: %s\n",
2673 devname
, strerror(errno
));
2677 if (posix_memalign((void**)&anchor
, 512, 512) != 0) {
2680 Name
": Failed to allocate imsm anchor buffer"
2681 " on %s\n", devname
);
2684 if (read(fd
, anchor
, 512) != 512) {
2687 Name
": Cannot read anchor block on %s: %s\n",
2688 devname
, strerror(errno
));
2693 if (strncmp((char *) anchor
->sig
, MPB_SIGNATURE
, MPB_SIG_LEN
) != 0) {
2696 Name
": no IMSM anchor on %s\n", devname
);
2701 __free_imsm(super
, 0);
2702 /* reload capability and hba */
2704 /* capability and hba must be updated with new super allocation */
2705 find_intel_hba_capability(fd
, super
, devname
);
2706 super
->len
= ROUND_UP(anchor
->mpb_size
, 512);
2707 if (posix_memalign(&super
->buf
, 512, super
->len
) != 0) {
2710 Name
": unable to allocate %zu byte mpb buffer\n",
2715 memcpy(super
->buf
, anchor
, 512);
2717 sectors
= mpb_sectors(anchor
) - 1;
2720 check_sum
= __gen_imsm_checksum(super
->anchor
);
2721 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
2724 Name
": IMSM checksum %x != %x on %s\n",
2726 __le32_to_cpu(super
->anchor
->check_sum
),
2734 /* read the extended mpb */
2735 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0) {
2738 Name
": Cannot seek to extended mpb on %s: %s\n",
2739 devname
, strerror(errno
));
2743 if ((unsigned)read(fd
, super
->buf
+ 512, super
->len
- 512) != super
->len
- 512) {
2746 Name
": Cannot read extended mpb on %s: %s\n",
2747 devname
, strerror(errno
));
2751 check_sum
= __gen_imsm_checksum(super
->anchor
);
2752 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
2755 Name
": IMSM checksum %x != %x on %s\n",
2756 check_sum
, __le32_to_cpu(super
->anchor
->check_sum
),
2761 /* FIXME the BBM log is disk specific so we cannot use this global
2762 * buffer for all disks. Ok for now since we only look at the global
2763 * bbm_log_size parameter to gate assembly
2765 super
->bbm_log
= __get_imsm_bbm_log(super
->anchor
);
2771 load_and_parse_mpb(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2775 err
= load_imsm_mpb(fd
, super
, devname
);
2778 err
= load_imsm_disk(fd
, super
, devname
, keep_fd
);
2781 err
= parse_raid_devices(super
);
2786 static void __free_imsm_disk(struct dl
*d
)
2798 static void free_imsm_disks(struct intel_super
*super
)
2802 while (super
->disks
) {
2804 super
->disks
= d
->next
;
2805 __free_imsm_disk(d
);
2807 while (super
->disk_mgmt_list
) {
2808 d
= super
->disk_mgmt_list
;
2809 super
->disk_mgmt_list
= d
->next
;
2810 __free_imsm_disk(d
);
2812 while (super
->missing
) {
2814 super
->missing
= d
->next
;
2815 __free_imsm_disk(d
);
2820 /* free all the pieces hanging off of a super pointer */
2821 static void __free_imsm(struct intel_super
*super
, int free_disks
)
2823 struct intel_hba
*elem
, *next
;
2829 /* unlink capability description */
2832 free_imsm_disks(super
);
2833 free_devlist(super
);
2837 free((void *)elem
->path
);
2845 static void free_imsm(struct intel_super
*super
)
2847 __free_imsm(super
, 1);
2851 static void free_super_imsm(struct supertype
*st
)
2853 struct intel_super
*super
= st
->sb
;
2862 static struct intel_super
*alloc_super(void)
2864 struct intel_super
*super
= malloc(sizeof(*super
));
2867 memset(super
, 0, sizeof(*super
));
2868 super
->current_vol
= -1;
2869 super
->create_offset
= ~((__u32
) 0);
2875 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
2877 static int find_intel_hba_capability(int fd
, struct intel_super
*super
, char *devname
)
2879 struct sys_dev
*hba_name
;
2882 if ((fd
< 0) || check_env("IMSM_NO_PLATFORM")) {
2887 hba_name
= find_disk_attached_hba(fd
, NULL
);
2891 Name
": %s is not attached to Intel(R) RAID controller.\n",
2895 rv
= attach_hba_to_super(super
, hba_name
);
2898 struct intel_hba
*hba
= super
->hba
;
2900 fprintf(stderr
, Name
": %s is attached to Intel(R) %s RAID "
2901 "controller (%s),\n"
2902 " but the container is assigned to Intel(R) "
2903 "%s RAID controller (",
2906 hba_name
->pci_id
? : "Err!",
2907 get_sys_dev_type(hba_name
->type
));
2910 fprintf(stderr
, "%s", hba
->pci_id
? : "Err!");
2912 fprintf(stderr
, ", ");
2916 fprintf(stderr
, ").\n"
2917 " Mixing devices attached to different controllers "
2918 "is not allowed.\n");
2920 free_sys_dev(&hba_name
);
2923 super
->orom
= find_imsm_capability(hba_name
->type
);
2924 free_sys_dev(&hba_name
);
2931 /* find_missing - helper routine for load_super_imsm_all that identifies
2932 * disks that have disappeared from the system. This routine relies on
2933 * the mpb being uptodate, which it is at load time.
2935 static int find_missing(struct intel_super
*super
)
2938 struct imsm_super
*mpb
= super
->anchor
;
2940 struct imsm_disk
*disk
;
2942 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2943 disk
= __get_imsm_disk(mpb
, i
);
2944 dl
= serial_to_dl(disk
->serial
, super
);
2948 dl
= malloc(sizeof(*dl
));
2954 dl
->devname
= strdup("missing");
2956 serialcpy(dl
->serial
, disk
->serial
);
2959 dl
->next
= super
->missing
;
2960 super
->missing
= dl
;
2966 static struct intel_disk
*disk_list_get(__u8
*serial
, struct intel_disk
*disk_list
)
2968 struct intel_disk
*idisk
= disk_list
;
2971 if (serialcmp(idisk
->disk
.serial
, serial
) == 0)
2973 idisk
= idisk
->next
;
2979 static int __prep_thunderdome(struct intel_super
**table
, int tbl_size
,
2980 struct intel_super
*super
,
2981 struct intel_disk
**disk_list
)
2983 struct imsm_disk
*d
= &super
->disks
->disk
;
2984 struct imsm_super
*mpb
= super
->anchor
;
2987 for (i
= 0; i
< tbl_size
; i
++) {
2988 struct imsm_super
*tbl_mpb
= table
[i
]->anchor
;
2989 struct imsm_disk
*tbl_d
= &table
[i
]->disks
->disk
;
2991 if (tbl_mpb
->family_num
== mpb
->family_num
) {
2992 if (tbl_mpb
->check_sum
== mpb
->check_sum
) {
2993 dprintf("%s: mpb from %d:%d matches %d:%d\n",
2994 __func__
, super
->disks
->major
,
2995 super
->disks
->minor
,
2996 table
[i
]->disks
->major
,
2997 table
[i
]->disks
->minor
);
3001 if (((is_configured(d
) && !is_configured(tbl_d
)) ||
3002 is_configured(d
) == is_configured(tbl_d
)) &&
3003 tbl_mpb
->generation_num
< mpb
->generation_num
) {
3004 /* current version of the mpb is a
3005 * better candidate than the one in
3006 * super_table, but copy over "cross
3007 * generational" status
3009 struct intel_disk
*idisk
;
3011 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3012 __func__
, super
->disks
->major
,
3013 super
->disks
->minor
,
3014 table
[i
]->disks
->major
,
3015 table
[i
]->disks
->minor
);
3017 idisk
= disk_list_get(tbl_d
->serial
, *disk_list
);
3018 if (idisk
&& is_failed(&idisk
->disk
))
3019 tbl_d
->status
|= FAILED_DISK
;
3022 struct intel_disk
*idisk
;
3023 struct imsm_disk
*disk
;
3025 /* tbl_mpb is more up to date, but copy
3026 * over cross generational status before
3029 disk
= __serial_to_disk(d
->serial
, mpb
, NULL
);
3030 if (disk
&& is_failed(disk
))
3031 d
->status
|= FAILED_DISK
;
3033 idisk
= disk_list_get(d
->serial
, *disk_list
);
3036 if (disk
&& is_configured(disk
))
3037 idisk
->disk
.status
|= CONFIGURED_DISK
;
3040 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3041 __func__
, super
->disks
->major
,
3042 super
->disks
->minor
,
3043 table
[i
]->disks
->major
,
3044 table
[i
]->disks
->minor
);
3052 table
[tbl_size
++] = super
;
3056 /* update/extend the merged list of imsm_disk records */
3057 for (j
= 0; j
< mpb
->num_disks
; j
++) {
3058 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, j
);
3059 struct intel_disk
*idisk
;
3061 idisk
= disk_list_get(disk
->serial
, *disk_list
);
3063 idisk
->disk
.status
|= disk
->status
;
3064 if (is_configured(&idisk
->disk
) ||
3065 is_failed(&idisk
->disk
))
3066 idisk
->disk
.status
&= ~(SPARE_DISK
);
3068 idisk
= calloc(1, sizeof(*idisk
));
3071 idisk
->owner
= IMSM_UNKNOWN_OWNER
;
3072 idisk
->disk
= *disk
;
3073 idisk
->next
= *disk_list
;
3077 if (serialcmp(idisk
->disk
.serial
, d
->serial
) == 0)
3084 static struct intel_super
*
3085 validate_members(struct intel_super
*super
, struct intel_disk
*disk_list
,
3088 struct imsm_super
*mpb
= super
->anchor
;
3092 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3093 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
3094 struct intel_disk
*idisk
;
3096 idisk
= disk_list_get(disk
->serial
, disk_list
);
3098 if (idisk
->owner
== owner
||
3099 idisk
->owner
== IMSM_UNKNOWN_OWNER
)
3102 dprintf("%s: '%.16s' owner %d != %d\n",
3103 __func__
, disk
->serial
, idisk
->owner
,
3106 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3107 __func__
, __le32_to_cpu(mpb
->family_num
), i
,
3113 if (ok_count
== mpb
->num_disks
)
3118 static void show_conflicts(__u32 family_num
, struct intel_super
*super_list
)
3120 struct intel_super
*s
;
3122 for (s
= super_list
; s
; s
= s
->next
) {
3123 if (family_num
!= s
->anchor
->family_num
)
3125 fprintf(stderr
, "Conflict, offlining family %#x on '%s'\n",
3126 __le32_to_cpu(family_num
), s
->disks
->devname
);
3130 static struct intel_super
*
3131 imsm_thunderdome(struct intel_super
**super_list
, int len
)
3133 struct intel_super
*super_table
[len
];
3134 struct intel_disk
*disk_list
= NULL
;
3135 struct intel_super
*champion
, *spare
;
3136 struct intel_super
*s
, **del
;
3141 memset(super_table
, 0, sizeof(super_table
));
3142 for (s
= *super_list
; s
; s
= s
->next
)
3143 tbl_size
= __prep_thunderdome(super_table
, tbl_size
, s
, &disk_list
);
3145 for (i
= 0; i
< tbl_size
; i
++) {
3146 struct imsm_disk
*d
;
3147 struct intel_disk
*idisk
;
3148 struct imsm_super
*mpb
= super_table
[i
]->anchor
;
3151 d
= &s
->disks
->disk
;
3153 /* 'd' must appear in merged disk list for its
3154 * configuration to be valid
3156 idisk
= disk_list_get(d
->serial
, disk_list
);
3157 if (idisk
&& idisk
->owner
== i
)
3158 s
= validate_members(s
, disk_list
, i
);
3163 dprintf("%s: marking family: %#x from %d:%d offline\n",
3164 __func__
, mpb
->family_num
,
3165 super_table
[i
]->disks
->major
,
3166 super_table
[i
]->disks
->minor
);
3170 /* This is where the mdadm implementation differs from the Windows
3171 * driver which has no strict concept of a container. We can only
3172 * assemble one family from a container, so when returning a prodigal
3173 * array member to this system the code will not be able to disambiguate
3174 * the container contents that should be assembled ("foreign" versus
3175 * "local"). It requires user intervention to set the orig_family_num
3176 * to a new value to establish a new container. The Windows driver in
3177 * this situation fixes up the volume name in place and manages the
3178 * foreign array as an independent entity.
3183 for (i
= 0; i
< tbl_size
; i
++) {
3184 struct intel_super
*tbl_ent
= super_table
[i
];
3190 if (tbl_ent
->anchor
->num_raid_devs
== 0) {
3195 if (s
&& !is_spare
) {
3196 show_conflicts(tbl_ent
->anchor
->family_num
, *super_list
);
3198 } else if (!s
&& !is_spare
)
3211 fprintf(stderr
, "Chose family %#x on '%s', "
3212 "assemble conflicts to new container with '--update=uuid'\n",
3213 __le32_to_cpu(s
->anchor
->family_num
), s
->disks
->devname
);
3215 /* collect all dl's onto 'champion', and update them to
3216 * champion's version of the status
3218 for (s
= *super_list
; s
; s
= s
->next
) {
3219 struct imsm_super
*mpb
= champion
->anchor
;
3220 struct dl
*dl
= s
->disks
;
3225 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3226 struct imsm_disk
*disk
;
3228 disk
= __serial_to_disk(dl
->serial
, mpb
, &dl
->index
);
3231 /* only set index on disks that are a member of
3232 * a populated contianer, i.e. one with
3235 if (is_failed(&dl
->disk
))
3237 else if (is_spare(&dl
->disk
))
3243 if (i
>= mpb
->num_disks
) {
3244 struct intel_disk
*idisk
;
3246 idisk
= disk_list_get(dl
->serial
, disk_list
);
3247 if (idisk
&& is_spare(&idisk
->disk
) &&
3248 !is_failed(&idisk
->disk
) && !is_configured(&idisk
->disk
))
3256 dl
->next
= champion
->disks
;
3257 champion
->disks
= dl
;
3261 /* delete 'champion' from super_list */
3262 for (del
= super_list
; *del
; ) {
3263 if (*del
== champion
) {
3264 *del
= (*del
)->next
;
3267 del
= &(*del
)->next
;
3269 champion
->next
= NULL
;
3273 struct intel_disk
*idisk
= disk_list
;
3275 disk_list
= disk_list
->next
;
3282 static int load_super_imsm_all(struct supertype
*st
, int fd
, void **sbp
,
3286 struct intel_super
*super_list
= NULL
;
3287 struct intel_super
*super
= NULL
;
3288 int devnum
= fd2devnum(fd
);
3294 /* check if 'fd' an opened container */
3295 sra
= sysfs_read(fd
, 0, GET_LEVEL
|GET_VERSION
|GET_DEVS
|GET_STATE
);
3299 if (sra
->array
.major_version
!= -1 ||
3300 sra
->array
.minor_version
!= -2 ||
3301 strcmp(sra
->text_version
, "imsm") != 0) {
3306 for (sd
= sra
->devs
, i
= 0; sd
; sd
= sd
->next
, i
++) {
3307 struct intel_super
*s
= alloc_super();
3315 s
->next
= super_list
;
3319 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
3320 dfd
= dev_open(nm
, O_RDWR
);
3324 rv
= find_intel_hba_capability(dfd
, s
, devname
);
3325 /* no orom/efi or non-intel hba of the disk */
3329 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3331 /* retry the load if we might have raced against mdmon */
3332 if (err
== 3 && mdmon_running(devnum
))
3333 for (retry
= 0; retry
< 3; retry
++) {
3335 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3343 /* all mpbs enter, maybe one leaves */
3344 super
= imsm_thunderdome(&super_list
, i
);
3350 if (find_missing(super
) != 0) {
3358 while (super_list
) {
3359 struct intel_super
*s
= super_list
;
3361 super_list
= super_list
->next
;
3370 st
->container_dev
= devnum
;
3371 if (err
== 0 && st
->ss
== NULL
) {
3372 st
->ss
= &super_imsm
;
3373 st
->minor_version
= 0;
3374 st
->max_devs
= IMSM_MAX_DEVICES
;
3379 static int load_container_imsm(struct supertype
*st
, int fd
, char *devname
)
3381 return load_super_imsm_all(st
, fd
, &st
->sb
, devname
);
3385 static int load_super_imsm(struct supertype
*st
, int fd
, char *devname
)
3387 struct intel_super
*super
;
3390 if (test_partition(fd
))
3391 /* IMSM not allowed on partitions */
3394 free_super_imsm(st
);
3396 super
= alloc_super();
3399 Name
": malloc of %zu failed.\n",
3403 /* Load hba and capabilities if they exist.
3404 * But do not preclude loading metadata in case capabilities or hba are
3405 * non-compliant and ignore_hw_compat is set.
3407 rv
= find_intel_hba_capability(fd
, super
, devname
);
3408 /* no orom/efi or non-intel hba of the disk */
3409 if ((rv
!= 0) && (st
->ignore_hw_compat
== 0)) {
3412 Name
": No OROM/EFI properties for %s\n", devname
);
3416 rv
= load_and_parse_mpb(fd
, super
, devname
, 0);
3421 Name
": Failed to load all information "
3422 "sections on %s\n", devname
);
3428 if (st
->ss
== NULL
) {
3429 st
->ss
= &super_imsm
;
3430 st
->minor_version
= 0;
3431 st
->max_devs
= IMSM_MAX_DEVICES
;
3436 static __u16
info_to_blocks_per_strip(mdu_array_info_t
*info
)
3438 if (info
->level
== 1)
3440 return info
->chunk_size
>> 9;
3443 static __u32
info_to_num_data_stripes(mdu_array_info_t
*info
, int num_domains
)
3447 num_stripes
= (info
->size
* 2) / info_to_blocks_per_strip(info
);
3448 num_stripes
/= num_domains
;
3453 static __u32
info_to_blocks_per_member(mdu_array_info_t
*info
)
3455 if (info
->level
== 1)
3456 return info
->size
* 2;
3458 return (info
->size
* 2) & ~(info_to_blocks_per_strip(info
) - 1);
3461 static void imsm_update_version_info(struct intel_super
*super
)
3463 /* update the version and attributes */
3464 struct imsm_super
*mpb
= super
->anchor
;
3466 struct imsm_dev
*dev
;
3467 struct imsm_map
*map
;
3470 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3471 dev
= get_imsm_dev(super
, i
);
3472 map
= get_imsm_map(dev
, 0);
3473 if (__le32_to_cpu(dev
->size_high
) > 0)
3474 mpb
->attributes
|= MPB_ATTRIB_2TB
;
3476 /* FIXME detect when an array spans a port multiplier */
3478 mpb
->attributes
|= MPB_ATTRIB_PM
;
3481 if (mpb
->num_raid_devs
> 1 ||
3482 mpb
->attributes
!= MPB_ATTRIB_CHECKSUM_VERIFY
) {
3483 version
= MPB_VERSION_ATTRIBS
;
3484 switch (get_imsm_raid_level(map
)) {
3485 case 0: mpb
->attributes
|= MPB_ATTRIB_RAID0
; break;
3486 case 1: mpb
->attributes
|= MPB_ATTRIB_RAID1
; break;
3487 case 10: mpb
->attributes
|= MPB_ATTRIB_RAID10
; break;
3488 case 5: mpb
->attributes
|= MPB_ATTRIB_RAID5
; break;
3491 if (map
->num_members
>= 5)
3492 version
= MPB_VERSION_5OR6_DISK_ARRAY
;
3493 else if (dev
->status
== DEV_CLONE_N_GO
)
3494 version
= MPB_VERSION_CNG
;
3495 else if (get_imsm_raid_level(map
) == 5)
3496 version
= MPB_VERSION_RAID5
;
3497 else if (map
->num_members
>= 3)
3498 version
= MPB_VERSION_3OR4_DISK_ARRAY
;
3499 else if (get_imsm_raid_level(map
) == 1)
3500 version
= MPB_VERSION_RAID1
;
3502 version
= MPB_VERSION_RAID0
;
3504 strcpy(((char *) mpb
->sig
) + strlen(MPB_SIGNATURE
), version
);
3508 static int check_name(struct intel_super
*super
, char *name
, int quiet
)
3510 struct imsm_super
*mpb
= super
->anchor
;
3511 char *reason
= NULL
;
3514 if (strlen(name
) > MAX_RAID_SERIAL_LEN
)
3515 reason
= "must be 16 characters or less";
3517 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3518 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
3520 if (strncmp((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
) == 0) {
3521 reason
= "already exists";
3526 if (reason
&& !quiet
)
3527 fprintf(stderr
, Name
": imsm volume name %s\n", reason
);
3532 static int init_super_imsm_volume(struct supertype
*st
, mdu_array_info_t
*info
,
3533 unsigned long long size
, char *name
,
3534 char *homehost
, int *uuid
)
3536 /* We are creating a volume inside a pre-existing container.
3537 * so st->sb is already set.
3539 struct intel_super
*super
= st
->sb
;
3540 struct imsm_super
*mpb
= super
->anchor
;
3541 struct intel_dev
*dv
;
3542 struct imsm_dev
*dev
;
3543 struct imsm_vol
*vol
;
3544 struct imsm_map
*map
;
3545 int idx
= mpb
->num_raid_devs
;
3547 unsigned long long array_blocks
;
3548 size_t size_old
, size_new
;
3549 __u32 num_data_stripes
;
3551 if (super
->orom
&& mpb
->num_raid_devs
>= super
->orom
->vpa
) {
3552 fprintf(stderr
, Name
": This imsm-container already has the "
3553 "maximum of %d volumes\n", super
->orom
->vpa
);
3557 /* ensure the mpb is large enough for the new data */
3558 size_old
= __le32_to_cpu(mpb
->mpb_size
);
3559 size_new
= disks_to_mpb_size(info
->nr_disks
);
3560 if (size_new
> size_old
) {
3562 size_t size_round
= ROUND_UP(size_new
, 512);
3564 if (posix_memalign(&mpb_new
, 512, size_round
) != 0) {
3565 fprintf(stderr
, Name
": could not allocate new mpb\n");
3568 memcpy(mpb_new
, mpb
, size_old
);
3571 super
->anchor
= mpb_new
;
3572 mpb
->mpb_size
= __cpu_to_le32(size_new
);
3573 memset(mpb_new
+ size_old
, 0, size_round
- size_old
);
3575 super
->current_vol
= idx
;
3576 /* when creating the first raid device in this container set num_disks
3577 * to zero, i.e. delete this spare and add raid member devices in
3578 * add_to_super_imsm_volume()
3580 if (super
->current_vol
== 0)
3583 if (!check_name(super
, name
, 0))
3585 dv
= malloc(sizeof(*dv
));
3587 fprintf(stderr
, Name
": failed to allocate device list entry\n");
3590 dev
= calloc(1, sizeof(*dev
) + sizeof(__u32
) * (info
->raid_disks
- 1));
3593 fprintf(stderr
, Name
": could not allocate raid device\n");
3597 strncpy((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
);
3598 if (info
->level
== 1)
3599 array_blocks
= info_to_blocks_per_member(info
);
3601 array_blocks
= calc_array_size(info
->level
, info
->raid_disks
,
3602 info
->layout
, info
->chunk_size
,
3604 /* round array size down to closest MB */
3605 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
3607 dev
->size_low
= __cpu_to_le32((__u32
) array_blocks
);
3608 dev
->size_high
= __cpu_to_le32((__u32
) (array_blocks
>> 32));
3609 dev
->status
= (DEV_READ_COALESCING
| DEV_WRITE_COALESCING
);
3611 vol
->migr_state
= 0;
3612 set_migr_type(dev
, MIGR_INIT
);
3614 vol
->curr_migr_unit
= 0;
3615 map
= get_imsm_map(dev
, 0);
3616 map
->pba_of_lba0
= __cpu_to_le32(super
->create_offset
);
3617 map
->blocks_per_member
= __cpu_to_le32(info_to_blocks_per_member(info
));
3618 map
->blocks_per_strip
= __cpu_to_le16(info_to_blocks_per_strip(info
));
3619 map
->failed_disk_num
= ~0;
3620 map
->map_state
= info
->level
? IMSM_T_STATE_UNINITIALIZED
:
3621 IMSM_T_STATE_NORMAL
;
3624 if (info
->level
== 1 && info
->raid_disks
> 2) {
3627 fprintf(stderr
, Name
": imsm does not support more than 2 disks"
3628 "in a raid1 volume\n");
3632 map
->raid_level
= info
->level
;
3633 if (info
->level
== 10) {
3634 map
->raid_level
= 1;
3635 map
->num_domains
= info
->raid_disks
/ 2;
3636 } else if (info
->level
== 1)
3637 map
->num_domains
= info
->raid_disks
;
3639 map
->num_domains
= 1;
3641 num_data_stripes
= info_to_num_data_stripes(info
, map
->num_domains
);
3642 map
->num_data_stripes
= __cpu_to_le32(num_data_stripes
);
3644 map
->num_members
= info
->raid_disks
;
3645 for (i
= 0; i
< map
->num_members
; i
++) {
3646 /* initialized in add_to_super */
3647 set_imsm_ord_tbl_ent(map
, i
, IMSM_ORD_REBUILD
);
3649 mpb
->num_raid_devs
++;
3652 dv
->index
= super
->current_vol
;
3653 dv
->next
= super
->devlist
;
3654 super
->devlist
= dv
;
3656 imsm_update_version_info(super
);
3661 static int init_super_imsm(struct supertype
*st
, mdu_array_info_t
*info
,
3662 unsigned long long size
, char *name
,
3663 char *homehost
, int *uuid
)
3665 /* This is primarily called by Create when creating a new array.
3666 * We will then get add_to_super called for each component, and then
3667 * write_init_super called to write it out to each device.
3668 * For IMSM, Create can create on fresh devices or on a pre-existing
3670 * To create on a pre-existing array a different method will be called.
3671 * This one is just for fresh drives.
3673 struct intel_super
*super
;
3674 struct imsm_super
*mpb
;
3679 return init_super_imsm_volume(st
, info
, size
, name
, homehost
, uuid
);
3682 mpb_size
= disks_to_mpb_size(info
->nr_disks
);
3686 super
= alloc_super();
3687 if (super
&& posix_memalign(&super
->buf
, 512, mpb_size
) != 0) {
3692 fprintf(stderr
, Name
3693 ": %s could not allocate superblock\n", __func__
);
3696 memset(super
->buf
, 0, mpb_size
);
3698 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
3702 /* zeroing superblock */
3706 mpb
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
3708 version
= (char *) mpb
->sig
;
3709 strcpy(version
, MPB_SIGNATURE
);
3710 version
+= strlen(MPB_SIGNATURE
);
3711 strcpy(version
, MPB_VERSION_RAID0
);
3717 static int add_to_super_imsm_volume(struct supertype
*st
, mdu_disk_info_t
*dk
,
3718 int fd
, char *devname
)
3720 struct intel_super
*super
= st
->sb
;
3721 struct imsm_super
*mpb
= super
->anchor
;
3723 struct imsm_dev
*dev
;
3724 struct imsm_map
*map
;
3727 dev
= get_imsm_dev(super
, super
->current_vol
);
3728 map
= get_imsm_map(dev
, 0);
3730 if (! (dk
->state
& (1<<MD_DISK_SYNC
))) {
3731 fprintf(stderr
, Name
": %s: Cannot add spare devices to IMSM volume\n",
3737 /* we're doing autolayout so grab the pre-marked (in
3738 * validate_geometry) raid_disk
3740 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
3741 if (dl
->raiddisk
== dk
->raid_disk
)
3744 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
3745 if (dl
->major
== dk
->major
&&
3746 dl
->minor
== dk
->minor
)
3751 fprintf(stderr
, Name
": %s is not a member of the same container\n", devname
);
3755 /* add a pristine spare to the metadata */
3756 if (dl
->index
< 0) {
3757 dl
->index
= super
->anchor
->num_disks
;
3758 super
->anchor
->num_disks
++;
3760 /* Check the device has not already been added */
3761 slot
= get_imsm_disk_slot(map
, dl
->index
);
3763 (get_imsm_ord_tbl_ent(dev
, slot
, -1) & IMSM_ORD_REBUILD
) == 0) {
3764 fprintf(stderr
, Name
": %s has been included in this array twice\n",
3768 set_imsm_ord_tbl_ent(map
, dk
->number
, dl
->index
);
3769 dl
->disk
.status
= CONFIGURED_DISK
;
3771 /* if we are creating the first raid device update the family number */
3772 if (super
->current_vol
== 0) {
3774 struct imsm_dev
*_dev
= __get_imsm_dev(mpb
, 0);
3775 struct imsm_disk
*_disk
= __get_imsm_disk(mpb
, dl
->index
);
3777 if (!_dev
|| !_disk
) {
3778 fprintf(stderr
, Name
": BUG mpb setup error\n");
3784 sum
+= __gen_imsm_checksum(mpb
);
3785 mpb
->family_num
= __cpu_to_le32(sum
);
3786 mpb
->orig_family_num
= mpb
->family_num
;
3793 static int add_to_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
,
3794 int fd
, char *devname
)
3796 struct intel_super
*super
= st
->sb
;
3798 unsigned long long size
;
3803 /* If we are on an RAID enabled platform check that the disk is
3804 * attached to the raid controller.
3805 * We do not need to test disks attachment for container based additions,
3806 * they shall be already tested when container was created/assembled.
3808 rv
= find_intel_hba_capability(fd
, super
, devname
);
3809 /* no orom/efi or non-intel hba of the disk */
3811 dprintf("capability: %p fd: %d ret: %d\n",
3812 super
->orom
, fd
, rv
);
3816 if (super
->current_vol
>= 0)
3817 return add_to_super_imsm_volume(st
, dk
, fd
, devname
);
3820 dd
= malloc(sizeof(*dd
));
3823 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
3826 memset(dd
, 0, sizeof(*dd
));
3827 dd
->major
= major(stb
.st_rdev
);
3828 dd
->minor
= minor(stb
.st_rdev
);
3830 dd
->devname
= devname
? strdup(devname
) : NULL
;
3833 dd
->action
= DISK_ADD
;
3834 rv
= imsm_read_serial(fd
, devname
, dd
->serial
);
3837 Name
": failed to retrieve scsi serial, aborting\n");
3842 get_dev_size(fd
, NULL
, &size
);
3844 serialcpy(dd
->disk
.serial
, dd
->serial
);
3845 dd
->disk
.total_blocks
= __cpu_to_le32(size
);
3846 dd
->disk
.status
= SPARE_DISK
;
3847 if (sysfs_disk_to_scsi_id(fd
, &id
) == 0)
3848 dd
->disk
.scsi_id
= __cpu_to_le32(id
);
3850 dd
->disk
.scsi_id
= __cpu_to_le32(0);
3852 if (st
->update_tail
) {
3853 dd
->next
= super
->disk_mgmt_list
;
3854 super
->disk_mgmt_list
= dd
;
3856 dd
->next
= super
->disks
;
3858 super
->updates_pending
++;
3865 static int remove_from_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
)
3867 struct intel_super
*super
= st
->sb
;
3870 /* remove from super works only in mdmon - for communication
3871 * manager - monitor. Check if communication memory buffer
3874 if (!st
->update_tail
) {
3876 Name
": %s shall be used in mdmon context only"
3877 "(line %d).\n", __func__
, __LINE__
);
3880 dd
= malloc(sizeof(*dd
));
3883 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
3886 memset(dd
, 0, sizeof(*dd
));
3887 dd
->major
= dk
->major
;
3888 dd
->minor
= dk
->minor
;
3891 dd
->disk
.status
= SPARE_DISK
;
3892 dd
->action
= DISK_REMOVE
;
3894 dd
->next
= super
->disk_mgmt_list
;
3895 super
->disk_mgmt_list
= dd
;
3901 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
);
3905 struct imsm_super anchor
;
3906 } spare_record
__attribute__ ((aligned(512)));
3908 /* spare records have their own family number and do not have any defined raid
3911 static int write_super_imsm_spares(struct intel_super
*super
, int doclose
)
3913 struct imsm_super
*mpb
= super
->anchor
;
3914 struct imsm_super
*spare
= &spare_record
.anchor
;
3918 spare
->mpb_size
= __cpu_to_le32(sizeof(struct imsm_super
)),
3919 spare
->generation_num
= __cpu_to_le32(1UL),
3920 spare
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
3921 spare
->num_disks
= 1,
3922 spare
->num_raid_devs
= 0,
3923 spare
->cache_size
= mpb
->cache_size
,
3924 spare
->pwr_cycle_count
= __cpu_to_le32(1),
3926 snprintf((char *) spare
->sig
, MAX_SIGNATURE_LENGTH
,
3927 MPB_SIGNATURE MPB_VERSION_RAID0
);
3929 for (d
= super
->disks
; d
; d
= d
->next
) {
3933 spare
->disk
[0] = d
->disk
;
3934 sum
= __gen_imsm_checksum(spare
);
3935 spare
->family_num
= __cpu_to_le32(sum
);
3936 spare
->orig_family_num
= 0;
3937 sum
= __gen_imsm_checksum(spare
);
3938 spare
->check_sum
= __cpu_to_le32(sum
);
3940 if (store_imsm_mpb(d
->fd
, spare
)) {
3941 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
3942 __func__
, d
->major
, d
->minor
, strerror(errno
));
3954 static int write_super_imsm(struct supertype
*st
, int doclose
)
3956 struct intel_super
*super
= st
->sb
;
3957 struct imsm_super
*mpb
= super
->anchor
;
3963 __u32 mpb_size
= sizeof(struct imsm_super
) - sizeof(struct imsm_disk
);
3966 /* 'generation' is incremented everytime the metadata is written */
3967 generation
= __le32_to_cpu(mpb
->generation_num
);
3969 mpb
->generation_num
= __cpu_to_le32(generation
);
3971 /* fix up cases where previous mdadm releases failed to set
3974 if (mpb
->orig_family_num
== 0)
3975 mpb
->orig_family_num
= mpb
->family_num
;
3977 for (d
= super
->disks
; d
; d
= d
->next
) {
3981 mpb
->disk
[d
->index
] = d
->disk
;
3985 for (d
= super
->missing
; d
; d
= d
->next
) {
3986 mpb
->disk
[d
->index
] = d
->disk
;
3989 mpb
->num_disks
= num_disks
;
3990 mpb_size
+= sizeof(struct imsm_disk
) * mpb
->num_disks
;
3992 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3993 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
3994 struct imsm_dev
*dev2
= get_imsm_dev(super
, i
);
3996 imsm_copy_dev(dev
, dev2
);
3997 mpb_size
+= sizeof_imsm_dev(dev
, 0);
4000 mpb_size
+= __le32_to_cpu(mpb
->bbm_log_size
);
4001 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
4003 /* recalculate checksum */
4004 sum
= __gen_imsm_checksum(mpb
);
4005 mpb
->check_sum
= __cpu_to_le32(sum
);
4007 /* write the mpb for disks that compose raid devices */
4008 for (d
= super
->disks
; d
; d
= d
->next
) {
4011 if (store_imsm_mpb(d
->fd
, mpb
))
4012 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
4013 __func__
, d
->major
, d
->minor
, strerror(errno
));
4021 return write_super_imsm_spares(super
, doclose
);
4027 static int create_array(struct supertype
*st
, int dev_idx
)
4030 struct imsm_update_create_array
*u
;
4031 struct intel_super
*super
= st
->sb
;
4032 struct imsm_dev
*dev
= get_imsm_dev(super
, dev_idx
);
4033 struct imsm_map
*map
= get_imsm_map(dev
, 0);
4034 struct disk_info
*inf
;
4035 struct imsm_disk
*disk
;
4038 len
= sizeof(*u
) - sizeof(*dev
) + sizeof_imsm_dev(dev
, 0) +
4039 sizeof(*inf
) * map
->num_members
;
4042 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4047 u
->type
= update_create_array
;
4048 u
->dev_idx
= dev_idx
;
4049 imsm_copy_dev(&u
->dev
, dev
);
4050 inf
= get_disk_info(u
);
4051 for (i
= 0; i
< map
->num_members
; i
++) {
4052 int idx
= get_imsm_disk_idx(dev
, i
, -1);
4054 disk
= get_imsm_disk(super
, idx
);
4055 serialcpy(inf
[i
].serial
, disk
->serial
);
4057 append_metadata_update(st
, u
, len
);
4062 static int mgmt_disk(struct supertype
*st
)
4064 struct intel_super
*super
= st
->sb
;
4066 struct imsm_update_add_remove_disk
*u
;
4068 if (!super
->disk_mgmt_list
)
4074 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4079 u
->type
= update_add_remove_disk
;
4080 append_metadata_update(st
, u
, len
);
4085 static int write_init_super_imsm(struct supertype
*st
)
4087 struct intel_super
*super
= st
->sb
;
4088 int current_vol
= super
->current_vol
;
4090 /* we are done with current_vol reset it to point st at the container */
4091 super
->current_vol
= -1;
4093 if (st
->update_tail
) {
4094 /* queue the recently created array / added disk
4095 * as a metadata update */
4098 /* determine if we are creating a volume or adding a disk */
4099 if (current_vol
< 0) {
4100 /* in the mgmt (add/remove) disk case we are running
4101 * in mdmon context, so don't close fd's
4103 return mgmt_disk(st
);
4105 rv
= create_array(st
, current_vol
);
4110 for (d
= super
->disks
; d
; d
= d
->next
)
4111 Kill(d
->devname
, NULL
, 0, 1, 1);
4112 return write_super_imsm(st
, 1);
4117 static int store_super_imsm(struct supertype
*st
, int fd
)
4119 struct intel_super
*super
= st
->sb
;
4120 struct imsm_super
*mpb
= super
? super
->anchor
: NULL
;
4126 return store_imsm_mpb(fd
, mpb
);
4132 static int imsm_bbm_log_size(struct imsm_super
*mpb
)
4134 return __le32_to_cpu(mpb
->bbm_log_size
);
4138 static int validate_geometry_imsm_container(struct supertype
*st
, int level
,
4139 int layout
, int raiddisks
, int chunk
,
4140 unsigned long long size
, char *dev
,
4141 unsigned long long *freesize
,
4145 unsigned long long ldsize
;
4146 struct intel_super
*super
=NULL
;
4149 if (level
!= LEVEL_CONTAINER
)
4154 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4157 fprintf(stderr
, Name
": imsm: Cannot open %s: %s\n",
4158 dev
, strerror(errno
));
4161 if (!get_dev_size(fd
, dev
, &ldsize
)) {
4166 /* capabilities retrieve could be possible
4167 * note that there is no fd for the disks in array.
4169 super
= alloc_super();
4172 Name
": malloc of %zu failed.\n",
4178 rv
= find_intel_hba_capability(fd
, super
, verbose
? dev
: NULL
);
4182 fd2devname(fd
, str
);
4183 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4184 fd
, str
, super
->orom
, rv
, raiddisks
);
4186 /* no orom/efi or non-intel hba of the disk */
4192 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4194 fprintf(stderr
, Name
": %d exceeds maximum number of"
4195 " platform supported disks: %d\n",
4196 raiddisks
, super
->orom
->tds
);
4202 *freesize
= avail_size_imsm(st
, ldsize
>> 9);
4208 static unsigned long long find_size(struct extent
*e
, int *idx
, int num_extents
)
4210 const unsigned long long base_start
= e
[*idx
].start
;
4211 unsigned long long end
= base_start
+ e
[*idx
].size
;
4214 if (base_start
== end
)
4218 for (i
= *idx
; i
< num_extents
; i
++) {
4219 /* extend overlapping extents */
4220 if (e
[i
].start
>= base_start
&&
4221 e
[i
].start
<= end
) {
4224 if (e
[i
].start
+ e
[i
].size
> end
)
4225 end
= e
[i
].start
+ e
[i
].size
;
4226 } else if (e
[i
].start
> end
) {
4232 return end
- base_start
;
4235 static unsigned long long merge_extents(struct intel_super
*super
, int sum_extents
)
4237 /* build a composite disk with all known extents and generate a new
4238 * 'maxsize' given the "all disks in an array must share a common start
4239 * offset" constraint
4241 struct extent
*e
= calloc(sum_extents
, sizeof(*e
));
4245 unsigned long long pos
;
4246 unsigned long long start
= 0;
4247 unsigned long long maxsize
;
4248 unsigned long reserve
;
4253 /* coalesce and sort all extents. also, check to see if we need to
4254 * reserve space between member arrays
4257 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4260 for (i
= 0; i
< dl
->extent_cnt
; i
++)
4263 qsort(e
, sum_extents
, sizeof(*e
), cmp_extent
);
4268 while (i
< sum_extents
) {
4269 e
[j
].start
= e
[i
].start
;
4270 e
[j
].size
= find_size(e
, &i
, sum_extents
);
4272 if (e
[j
-1].size
== 0)
4281 unsigned long long esize
;
4283 esize
= e
[i
].start
- pos
;
4284 if (esize
>= maxsize
) {
4289 pos
= e
[i
].start
+ e
[i
].size
;
4291 } while (e
[i
-1].size
);
4297 /* FIXME assumes volume at offset 0 is the first volume in a
4300 if (start_extent
> 0)
4301 reserve
= IMSM_RESERVED_SECTORS
; /* gap between raid regions */
4305 if (maxsize
< reserve
)
4308 super
->create_offset
= ~((__u32
) 0);
4309 if (start
+ reserve
> super
->create_offset
)
4310 return 0; /* start overflows create_offset */
4311 super
->create_offset
= start
+ reserve
;
4313 return maxsize
- reserve
;
4316 static int is_raid_level_supported(const struct imsm_orom
*orom
, int level
, int raiddisks
)
4318 if (level
< 0 || level
== 6 || level
== 4)
4321 /* if we have an orom prevent invalid raid levels */
4324 case 0: return imsm_orom_has_raid0(orom
);
4327 return imsm_orom_has_raid1e(orom
);
4328 return imsm_orom_has_raid1(orom
) && raiddisks
== 2;
4329 case 10: return imsm_orom_has_raid10(orom
) && raiddisks
== 4;
4330 case 5: return imsm_orom_has_raid5(orom
) && raiddisks
> 2;
4333 return 1; /* not on an Intel RAID platform so anything goes */
4339 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4341 * validate volume parameters with OROM/EFI capabilities
4344 validate_geometry_imsm_orom(struct intel_super
*super
, int level
, int layout
,
4345 int raiddisks
, int *chunk
, int verbose
)
4350 /* validate container capabilities */
4351 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4353 fprintf(stderr
, Name
": %d exceeds maximum number of"
4354 " platform supported disks: %d\n",
4355 raiddisks
, super
->orom
->tds
);
4359 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4360 if (super
->orom
&& (!is_raid_level_supported(super
->orom
, level
,
4362 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4363 level
, raiddisks
, raiddisks
> 1 ? "s" : "");
4366 if (super
->orom
&& level
!= 1) {
4367 if (chunk
&& (*chunk
== 0 || *chunk
== UnSet
))
4368 *chunk
= imsm_orom_default_chunk(super
->orom
);
4369 else if (chunk
&& !imsm_orom_has_chunk(super
->orom
, *chunk
)) {
4370 pr_vrb(": platform does not support a chunk size of: "
4375 if (layout
!= imsm_level_to_layout(level
)) {
4377 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4378 else if (level
== 10)
4379 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4381 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4388 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4389 * FIX ME add ahci details
4391 static int validate_geometry_imsm_volume(struct supertype
*st
, int level
,
4392 int layout
, int raiddisks
, int *chunk
,
4393 unsigned long long size
, char *dev
,
4394 unsigned long long *freesize
,
4398 struct intel_super
*super
= st
->sb
;
4399 struct imsm_super
*mpb
= super
->anchor
;
4401 unsigned long long pos
= 0;
4402 unsigned long long maxsize
;
4406 /* We must have the container info already read in. */
4410 if (!validate_geometry_imsm_orom(super
, level
, layout
, raiddisks
, chunk
, verbose
)) {
4411 fprintf(stderr
, Name
": RAID gemetry validation failed. "
4412 "Cannot proceed with the action(s).\n");
4416 /* General test: make sure there is space for
4417 * 'raiddisks' device extents of size 'size' at a given
4420 unsigned long long minsize
= size
;
4421 unsigned long long start_offset
= MaxSector
;
4424 minsize
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
4425 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4430 e
= get_extents(super
, dl
);
4433 unsigned long long esize
;
4434 esize
= e
[i
].start
- pos
;
4435 if (esize
>= minsize
)
4437 if (found
&& start_offset
== MaxSector
) {
4440 } else if (found
&& pos
!= start_offset
) {
4444 pos
= e
[i
].start
+ e
[i
].size
;
4446 } while (e
[i
-1].size
);
4451 if (dcnt
< raiddisks
) {
4453 fprintf(stderr
, Name
": imsm: Not enough "
4454 "devices with space for this array "
4462 /* This device must be a member of the set */
4463 if (stat(dev
, &stb
) < 0)
4465 if ((S_IFMT
& stb
.st_mode
) != S_IFBLK
)
4467 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4468 if (dl
->major
== (int)major(stb
.st_rdev
) &&
4469 dl
->minor
== (int)minor(stb
.st_rdev
))
4474 fprintf(stderr
, Name
": %s is not in the "
4475 "same imsm set\n", dev
);
4477 } else if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
) {
4478 /* If a volume is present then the current creation attempt
4479 * cannot incorporate new spares because the orom may not
4480 * understand this configuration (all member disks must be
4481 * members of each array in the container).
4483 fprintf(stderr
, Name
": %s is a spare and a volume"
4484 " is already defined for this container\n", dev
);
4485 fprintf(stderr
, Name
": The option-rom requires all member"
4486 " disks to be a member of all volumes\n");
4490 /* retrieve the largest free space block */
4491 e
= get_extents(super
, dl
);
4496 unsigned long long esize
;
4498 esize
= e
[i
].start
- pos
;
4499 if (esize
>= maxsize
)
4501 pos
= e
[i
].start
+ e
[i
].size
;
4503 } while (e
[i
-1].size
);
4508 fprintf(stderr
, Name
": unable to determine free space for: %s\n",
4512 if (maxsize
< size
) {
4514 fprintf(stderr
, Name
": %s not enough space (%llu < %llu)\n",
4515 dev
, maxsize
, size
);
4519 /* count total number of extents for merge */
4521 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4523 i
+= dl
->extent_cnt
;
4525 maxsize
= merge_extents(super
, i
);
4526 if (maxsize
< size
|| maxsize
== 0) {
4528 fprintf(stderr
, Name
": not enough space after merge (%llu < %llu)\n",
4533 *freesize
= maxsize
;
4538 static int reserve_space(struct supertype
*st
, int raiddisks
,
4539 unsigned long long size
, int chunk
,
4540 unsigned long long *freesize
)
4542 struct intel_super
*super
= st
->sb
;
4543 struct imsm_super
*mpb
= super
->anchor
;
4548 unsigned long long maxsize
;
4549 unsigned long long minsize
;
4553 /* find the largest common start free region of the possible disks */
4557 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4563 /* don't activate new spares if we are orom constrained
4564 * and there is already a volume active in the container
4566 if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
)
4569 e
= get_extents(super
, dl
);
4572 for (i
= 1; e
[i
-1].size
; i
++)
4580 maxsize
= merge_extents(super
, extent_cnt
);
4584 minsize
= chunk
* 2;
4586 if (cnt
< raiddisks
||
4587 (super
->orom
&& used
&& used
!= raiddisks
) ||
4588 maxsize
< minsize
||
4590 fprintf(stderr
, Name
": not enough devices with space to create array.\n");
4591 return 0; /* No enough free spaces large enough */
4603 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4605 dl
->raiddisk
= cnt
++;
4612 static int validate_geometry_imsm(struct supertype
*st
, int level
, int layout
,
4613 int raiddisks
, int *chunk
, unsigned long long size
,
4614 char *dev
, unsigned long long *freesize
,
4622 * if given unused devices create a container
4623 * if given given devices in a container create a member volume
4625 if (level
== LEVEL_CONTAINER
) {
4626 /* Must be a fresh device to add to a container */
4627 return validate_geometry_imsm_container(st
, level
, layout
,
4629 chunk
?*chunk
:0, size
,
4635 if (st
->sb
&& freesize
) {
4636 /* we are being asked to automatically layout a
4637 * new volume based on the current contents of
4638 * the container. If the the parameters can be
4639 * satisfied reserve_space will record the disks,
4640 * start offset, and size of the volume to be
4641 * created. add_to_super and getinfo_super
4642 * detect when autolayout is in progress.
4644 if (!validate_geometry_imsm_orom(st
->sb
, level
, layout
,
4648 return reserve_space(st
, raiddisks
, size
,
4649 chunk
?*chunk
:0, freesize
);
4654 /* creating in a given container */
4655 return validate_geometry_imsm_volume(st
, level
, layout
,
4656 raiddisks
, chunk
, size
,
4657 dev
, freesize
, verbose
);
4660 /* This device needs to be a device in an 'imsm' container */
4661 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4665 Name
": Cannot create this array on device %s\n",
4670 if (errno
!= EBUSY
|| (fd
= open(dev
, O_RDONLY
, 0)) < 0) {
4672 fprintf(stderr
, Name
": Cannot open %s: %s\n",
4673 dev
, strerror(errno
));
4676 /* Well, it is in use by someone, maybe an 'imsm' container. */
4677 cfd
= open_container(fd
);
4681 fprintf(stderr
, Name
": Cannot use %s: It is busy\n",
4685 sra
= sysfs_read(cfd
, 0, GET_VERSION
);
4686 if (sra
&& sra
->array
.major_version
== -1 &&
4687 strcmp(sra
->text_version
, "imsm") == 0)
4691 /* This is a member of a imsm container. Load the container
4692 * and try to create a volume
4694 struct intel_super
*super
;
4696 if (load_super_imsm_all(st
, cfd
, (void **) &super
, NULL
) == 0) {
4698 st
->container_dev
= fd2devnum(cfd
);
4700 return validate_geometry_imsm_volume(st
, level
, layout
,
4708 fprintf(stderr
, Name
": failed container membership check\n");
4714 static void default_geometry_imsm(struct supertype
*st
, int *level
, int *layout
, int *chunk
)
4716 struct intel_super
*super
= st
->sb
;
4718 if (level
&& *level
== UnSet
)
4719 *level
= LEVEL_CONTAINER
;
4721 if (level
&& layout
&& *layout
== UnSet
)
4722 *layout
= imsm_level_to_layout(*level
);
4724 if (chunk
&& (*chunk
== UnSet
|| *chunk
== 0) &&
4725 super
&& super
->orom
)
4726 *chunk
= imsm_orom_default_chunk(super
->orom
);
4729 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
);
4731 static int kill_subarray_imsm(struct supertype
*st
)
4733 /* remove the subarray currently referenced by ->current_vol */
4735 struct intel_dev
**dp
;
4736 struct intel_super
*super
= st
->sb
;
4737 __u8 current_vol
= super
->current_vol
;
4738 struct imsm_super
*mpb
= super
->anchor
;
4740 if (super
->current_vol
< 0)
4742 super
->current_vol
= -1; /* invalidate subarray cursor */
4744 /* block deletions that would change the uuid of active subarrays
4746 * FIXME when immutable ids are available, but note that we'll
4747 * also need to fixup the invalidated/active subarray indexes in
4750 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4753 if (i
< current_vol
)
4755 sprintf(subarray
, "%u", i
);
4756 if (is_subarray_active(subarray
, st
->devname
)) {
4758 Name
": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
4765 if (st
->update_tail
) {
4766 struct imsm_update_kill_array
*u
= malloc(sizeof(*u
));
4770 u
->type
= update_kill_array
;
4771 u
->dev_idx
= current_vol
;
4772 append_metadata_update(st
, u
, sizeof(*u
));
4777 for (dp
= &super
->devlist
; *dp
;)
4778 if ((*dp
)->index
== current_vol
) {
4781 handle_missing(super
, (*dp
)->dev
);
4782 if ((*dp
)->index
> current_vol
)
4787 /* no more raid devices, all active components are now spares,
4788 * but of course failed are still failed
4790 if (--mpb
->num_raid_devs
== 0) {
4793 for (d
= super
->disks
; d
; d
= d
->next
)
4794 if (d
->index
> -2) {
4796 d
->disk
.status
= SPARE_DISK
;
4800 super
->updates_pending
++;
4805 static int update_subarray_imsm(struct supertype
*st
, char *subarray
,
4806 char *update
, struct mddev_ident
*ident
)
4808 /* update the subarray currently referenced by ->current_vol */
4809 struct intel_super
*super
= st
->sb
;
4810 struct imsm_super
*mpb
= super
->anchor
;
4812 if (strcmp(update
, "name") == 0) {
4813 char *name
= ident
->name
;
4817 if (is_subarray_active(subarray
, st
->devname
)) {
4819 Name
": Unable to update name of active subarray\n");
4823 if (!check_name(super
, name
, 0))
4826 vol
= strtoul(subarray
, &ep
, 10);
4827 if (*ep
!= '\0' || vol
>= super
->anchor
->num_raid_devs
)
4830 if (st
->update_tail
) {
4831 struct imsm_update_rename_array
*u
= malloc(sizeof(*u
));
4835 u
->type
= update_rename_array
;
4837 snprintf((char *) u
->name
, MAX_RAID_SERIAL_LEN
, "%s", name
);
4838 append_metadata_update(st
, u
, sizeof(*u
));
4840 struct imsm_dev
*dev
;
4843 dev
= get_imsm_dev(super
, vol
);
4844 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
4845 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4846 dev
= get_imsm_dev(super
, i
);
4847 handle_missing(super
, dev
);
4849 super
->updates_pending
++;
4857 static int is_gen_migration(struct imsm_dev
*dev
)
4859 if (!dev
->vol
.migr_state
)
4862 if (migr_type(dev
) == MIGR_GEN_MIGR
)
4867 #endif /* MDASSEMBLE */
4869 static int is_rebuilding(struct imsm_dev
*dev
)
4871 struct imsm_map
*migr_map
;
4873 if (!dev
->vol
.migr_state
)
4876 if (migr_type(dev
) != MIGR_REBUILD
)
4879 migr_map
= get_imsm_map(dev
, 1);
4881 if (migr_map
->map_state
== IMSM_T_STATE_DEGRADED
)
4887 static void update_recovery_start(struct imsm_dev
*dev
, struct mdinfo
*array
)
4889 struct mdinfo
*rebuild
= NULL
;
4893 if (!is_rebuilding(dev
))
4896 /* Find the rebuild target, but punt on the dual rebuild case */
4897 for (d
= array
->devs
; d
; d
= d
->next
)
4898 if (d
->recovery_start
== 0) {
4905 /* (?) none of the disks are marked with
4906 * IMSM_ORD_REBUILD, so assume they are missing and the
4907 * disk_ord_tbl was not correctly updated
4909 dprintf("%s: failed to locate out-of-sync disk\n", __func__
);
4913 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
4914 rebuild
->recovery_start
= units
* blocks_per_migr_unit(dev
);
4918 static struct mdinfo
*container_content_imsm(struct supertype
*st
, char *subarray
)
4920 /* Given a container loaded by load_super_imsm_all,
4921 * extract information about all the arrays into
4923 * If 'subarray' is given, just extract info about that array.
4925 * For each imsm_dev create an mdinfo, fill it in,
4926 * then look for matching devices in super->disks
4927 * and create appropriate device mdinfo.
4929 struct intel_super
*super
= st
->sb
;
4930 struct imsm_super
*mpb
= super
->anchor
;
4931 struct mdinfo
*rest
= NULL
;
4935 int spare_disks
= 0;
4937 /* check for bad blocks */
4938 if (imsm_bbm_log_size(super
->anchor
))
4941 /* count spare devices, not used in maps
4943 for (d
= super
->disks
; d
; d
= d
->next
)
4947 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4948 struct imsm_dev
*dev
;
4949 struct imsm_map
*map
;
4950 struct imsm_map
*map2
;
4951 struct mdinfo
*this;
4956 (i
!= strtoul(subarray
, &ep
, 10) || *ep
!= '\0'))
4959 dev
= get_imsm_dev(super
, i
);
4960 map
= get_imsm_map(dev
, 0);
4961 map2
= get_imsm_map(dev
, 1);
4963 /* do not publish arrays that are in the middle of an
4964 * unsupported migration
4966 if (dev
->vol
.migr_state
&&
4967 (migr_type(dev
) == MIGR_STATE_CHANGE
)) {
4968 fprintf(stderr
, Name
": cannot assemble volume '%.16s':"
4969 " unsupported migration in progress\n",
4973 /* do not publish arrays that are not support by controller's
4977 chunk
= __le16_to_cpu(map
->blocks_per_strip
) >> 1;
4979 if (!validate_geometry_imsm_orom(super
,
4980 get_imsm_raid_level(map
), /* RAID level */
4981 imsm_level_to_layout(get_imsm_raid_level(map
)),
4982 map
->num_members
, /* raid disks */
4985 fprintf(stderr
, Name
": RAID gemetry validation failed. "
4986 "Cannot proceed with the action(s).\n");
4989 #endif /* MDASSEMBLE */
4990 this = malloc(sizeof(*this));
4992 fprintf(stderr
, Name
": failed to allocate %zu bytes\n",
4996 memset(this, 0, sizeof(*this));
4999 super
->current_vol
= i
;
5000 getinfo_super_imsm_volume(st
, this, NULL
);
5001 for (slot
= 0 ; slot
< map
->num_members
; slot
++) {
5002 unsigned long long recovery_start
;
5003 struct mdinfo
*info_d
;
5010 idx
= get_imsm_disk_idx(dev
, slot
, 0);
5011 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
5012 for (d
= super
->disks
; d
; d
= d
->next
)
5013 if (d
->index
== idx
)
5016 recovery_start
= MaxSector
;
5019 if (d
&& is_failed(&d
->disk
))
5021 if (ord
& IMSM_ORD_REBUILD
)
5025 * if we skip some disks the array will be assmebled degraded;
5026 * reset resync start to avoid a dirty-degraded
5027 * situation when performing the intial sync
5029 * FIXME handle dirty degraded
5031 if ((skip
|| recovery_start
== 0) && !dev
->vol
.dirty
)
5032 this->resync_start
= MaxSector
;
5036 info_d
= calloc(1, sizeof(*info_d
));
5038 fprintf(stderr
, Name
": failed to allocate disk"
5039 " for volume %.16s\n", dev
->volume
);
5040 info_d
= this->devs
;
5042 struct mdinfo
*d
= info_d
->next
;
5051 info_d
->next
= this->devs
;
5052 this->devs
= info_d
;
5054 info_d
->disk
.number
= d
->index
;
5055 info_d
->disk
.major
= d
->major
;
5056 info_d
->disk
.minor
= d
->minor
;
5057 info_d
->disk
.raid_disk
= slot
;
5058 info_d
->recovery_start
= recovery_start
;
5060 if (slot
< map2
->num_members
)
5061 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5063 this->array
.spare_disks
++;
5065 if (slot
< map
->num_members
)
5066 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5068 this->array
.spare_disks
++;
5070 if (info_d
->recovery_start
== MaxSector
)
5071 this->array
.working_disks
++;
5073 info_d
->events
= __le32_to_cpu(mpb
->generation_num
);
5074 info_d
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5075 info_d
->component_size
= __le32_to_cpu(map
->blocks_per_member
);
5077 /* now that the disk list is up-to-date fixup recovery_start */
5078 update_recovery_start(dev
, this);
5079 this->array
.spare_disks
+= spare_disks
;
5083 /* if array has bad blocks, set suitable bit in array status */
5085 rest
->array
.state
|= (1<<MD_SB_BBM_ERRORS
);
5091 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
)
5093 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5096 return map
->map_state
== IMSM_T_STATE_UNINITIALIZED
?
5097 IMSM_T_STATE_UNINITIALIZED
: IMSM_T_STATE_NORMAL
;
5099 switch (get_imsm_raid_level(map
)) {
5101 return IMSM_T_STATE_FAILED
;
5104 if (failed
< map
->num_members
)
5105 return IMSM_T_STATE_DEGRADED
;
5107 return IMSM_T_STATE_FAILED
;
5112 * check to see if any mirrors have failed, otherwise we
5113 * are degraded. Even numbered slots are mirrored on
5117 /* gcc -Os complains that this is unused */
5118 int insync
= insync
;
5120 for (i
= 0; i
< map
->num_members
; i
++) {
5121 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
5122 int idx
= ord_to_idx(ord
);
5123 struct imsm_disk
*disk
;
5125 /* reset the potential in-sync count on even-numbered
5126 * slots. num_copies is always 2 for imsm raid10
5131 disk
= get_imsm_disk(super
, idx
);
5132 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5135 /* no in-sync disks left in this mirror the
5139 return IMSM_T_STATE_FAILED
;
5142 return IMSM_T_STATE_DEGRADED
;
5146 return IMSM_T_STATE_DEGRADED
;
5148 return IMSM_T_STATE_FAILED
;
5154 return map
->map_state
;
5157 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
)
5161 struct imsm_disk
*disk
;
5162 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5163 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
5167 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5168 * disks that are being rebuilt. New failures are recorded to
5169 * map[0]. So we look through all the disks we started with and
5170 * see if any failures are still present, or if any new ones
5173 * FIXME add support for online capacity expansion and
5174 * raid-level-migration
5176 for (i
= 0; i
< prev
->num_members
; i
++) {
5177 ord
= __le32_to_cpu(prev
->disk_ord_tbl
[i
]);
5178 ord
|= __le32_to_cpu(map
->disk_ord_tbl
[i
]);
5179 idx
= ord_to_idx(ord
);
5181 disk
= get_imsm_disk(super
, idx
);
5182 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5190 static int imsm_open_new(struct supertype
*c
, struct active_array
*a
,
5193 struct intel_super
*super
= c
->sb
;
5194 struct imsm_super
*mpb
= super
->anchor
;
5196 if (atoi(inst
) >= mpb
->num_raid_devs
) {
5197 fprintf(stderr
, "%s: subarry index %d, out of range\n",
5198 __func__
, atoi(inst
));
5202 dprintf("imsm: open_new %s\n", inst
);
5203 a
->info
.container_member
= atoi(inst
);
5207 static int is_resyncing(struct imsm_dev
*dev
)
5209 struct imsm_map
*migr_map
;
5211 if (!dev
->vol
.migr_state
)
5214 if (migr_type(dev
) == MIGR_INIT
||
5215 migr_type(dev
) == MIGR_REPAIR
)
5218 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5221 migr_map
= get_imsm_map(dev
, 1);
5223 if ((migr_map
->map_state
== IMSM_T_STATE_NORMAL
) &&
5224 (dev
->vol
.migr_type
!= MIGR_GEN_MIGR
))
5230 /* return true if we recorded new information */
5231 static int mark_failure(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5235 struct imsm_map
*map
;
5237 /* new failures are always set in map[0] */
5238 map
= get_imsm_map(dev
, 0);
5240 slot
= get_imsm_disk_slot(map
, idx
);
5244 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
5245 if (is_failed(disk
) && (ord
& IMSM_ORD_REBUILD
))
5248 disk
->status
|= FAILED_DISK
;
5249 set_imsm_ord_tbl_ent(map
, slot
, idx
| IMSM_ORD_REBUILD
);
5250 if (map
->failed_disk_num
== 0xff)
5251 map
->failed_disk_num
= slot
;
5255 static void mark_missing(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5257 mark_failure(dev
, disk
, idx
);
5259 if (disk
->scsi_id
== __cpu_to_le32(~(__u32
)0))
5262 disk
->scsi_id
= __cpu_to_le32(~(__u32
)0);
5263 memmove(&disk
->serial
[0], &disk
->serial
[1], MAX_RAID_SERIAL_LEN
- 1);
5266 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
)
5272 if (!super
->missing
)
5274 failed
= imsm_count_failed(super
, dev
);
5275 map_state
= imsm_check_degraded(super
, dev
, failed
);
5277 dprintf("imsm: mark missing\n");
5278 end_migration(dev
, map_state
);
5279 for (dl
= super
->missing
; dl
; dl
= dl
->next
)
5280 mark_missing(dev
, &dl
->disk
, dl
->index
);
5281 super
->updates_pending
++;
5284 static unsigned long long imsm_set_array_size(struct imsm_dev
*dev
)
5286 int used_disks
= imsm_num_data_members(dev
, 0);
5287 unsigned long long array_blocks
;
5288 struct imsm_map
*map
;
5290 if (used_disks
== 0) {
5291 /* when problems occures
5292 * return current array_blocks value
5294 array_blocks
= __le32_to_cpu(dev
->size_high
);
5295 array_blocks
= array_blocks
<< 32;
5296 array_blocks
+= __le32_to_cpu(dev
->size_low
);
5298 return array_blocks
;
5301 /* set array size in metadata
5303 map
= get_imsm_map(dev
, 0);
5304 array_blocks
= map
->blocks_per_member
* used_disks
;
5306 /* round array size down to closest MB
5308 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
5309 dev
->size_low
= __cpu_to_le32((__u32
)array_blocks
);
5310 dev
->size_high
= __cpu_to_le32((__u32
)(array_blocks
>> 32));
5312 return array_blocks
;
5315 static void imsm_set_disk(struct active_array
*a
, int n
, int state
);
5317 static void imsm_progress_container_reshape(struct intel_super
*super
)
5319 /* if no device has a migr_state, but some device has a
5320 * different number of members than the previous device, start
5321 * changing the number of devices in this device to match
5324 struct imsm_super
*mpb
= super
->anchor
;
5325 int prev_disks
= -1;
5329 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5330 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
5331 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5332 struct imsm_map
*map2
;
5333 int prev_num_members
;
5335 if (dev
->vol
.migr_state
)
5338 if (prev_disks
== -1)
5339 prev_disks
= map
->num_members
;
5340 if (prev_disks
== map
->num_members
)
5343 /* OK, this array needs to enter reshape mode.
5344 * i.e it needs a migr_state
5347 copy_map_size
= sizeof_imsm_map(map
);
5348 prev_num_members
= map
->num_members
;
5349 map
->num_members
= prev_disks
;
5350 dev
->vol
.migr_state
= 1;
5351 dev
->vol
.curr_migr_unit
= 0;
5352 dev
->vol
.migr_type
= MIGR_GEN_MIGR
;
5353 for (i
= prev_num_members
;
5354 i
< map
->num_members
; i
++)
5355 set_imsm_ord_tbl_ent(map
, i
, i
);
5356 map2
= get_imsm_map(dev
, 1);
5357 /* Copy the current map */
5358 memcpy(map2
, map
, copy_map_size
);
5359 map2
->num_members
= prev_num_members
;
5361 imsm_set_array_size(dev
);
5362 super
->updates_pending
++;
5366 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5367 * states are handled in imsm_set_disk() with one exception, when a
5368 * resync is stopped due to a new failure this routine will set the
5369 * 'degraded' state for the array.
5371 static int imsm_set_array_state(struct active_array
*a
, int consistent
)
5373 int inst
= a
->info
.container_member
;
5374 struct intel_super
*super
= a
->container
->sb
;
5375 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5376 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5377 int failed
= imsm_count_failed(super
, dev
);
5378 __u8 map_state
= imsm_check_degraded(super
, dev
, failed
);
5379 __u32 blocks_per_unit
;
5381 if (dev
->vol
.migr_state
&&
5382 dev
->vol
.migr_type
== MIGR_GEN_MIGR
) {
5383 /* array state change is blocked due to reshape action
5385 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5386 * - finish the reshape (if last_checkpoint is big and action != reshape)
5387 * - update curr_migr_unit
5389 if (a
->curr_action
== reshape
) {
5390 /* still reshaping, maybe update curr_migr_unit */
5391 goto mark_checkpoint
;
5393 if (a
->last_checkpoint
== 0 && a
->prev_action
== reshape
) {
5394 /* for some reason we aborted the reshape.
5397 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
5398 dev
->vol
.migr_state
= 0;
5399 dev
->vol
.migr_type
= 0;
5400 dev
->vol
.curr_migr_unit
= 0;
5401 memcpy(map
, map2
, sizeof_imsm_map(map2
));
5402 super
->updates_pending
++;
5404 if (a
->last_checkpoint
>= a
->info
.component_size
) {
5405 unsigned long long array_blocks
;
5409 used_disks
= imsm_num_data_members(dev
, 0);
5410 if (used_disks
> 0) {
5412 map
->blocks_per_member
*
5414 /* round array size down to closest MB
5416 array_blocks
= (array_blocks
5417 >> SECT_PER_MB_SHIFT
)
5418 << SECT_PER_MB_SHIFT
;
5419 a
->info
.custom_array_size
= array_blocks
;
5420 /* encourage manager to update array
5424 a
->check_reshape
= 1;
5426 /* finalize online capacity expansion/reshape */
5427 for (mdi
= a
->info
.devs
; mdi
; mdi
= mdi
->next
)
5429 mdi
->disk
.raid_disk
,
5432 imsm_progress_container_reshape(super
);
5437 /* before we activate this array handle any missing disks */
5438 if (consistent
== 2)
5439 handle_missing(super
, dev
);
5441 if (consistent
== 2 &&
5442 (!is_resync_complete(&a
->info
) ||
5443 map_state
!= IMSM_T_STATE_NORMAL
||
5444 dev
->vol
.migr_state
))
5447 if (is_resync_complete(&a
->info
)) {
5448 /* complete intialization / resync,
5449 * recovery and interrupted recovery is completed in
5452 if (is_resyncing(dev
)) {
5453 dprintf("imsm: mark resync done\n");
5454 end_migration(dev
, map_state
);
5455 super
->updates_pending
++;
5456 a
->last_checkpoint
= 0;
5458 } else if (!is_resyncing(dev
) && !failed
) {
5459 /* mark the start of the init process if nothing is failed */
5460 dprintf("imsm: mark resync start\n");
5461 if (map
->map_state
== IMSM_T_STATE_UNINITIALIZED
)
5462 migrate(dev
, IMSM_T_STATE_NORMAL
, MIGR_INIT
);
5464 migrate(dev
, IMSM_T_STATE_NORMAL
, MIGR_REPAIR
);
5465 super
->updates_pending
++;
5469 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5470 blocks_per_unit
= blocks_per_migr_unit(dev
);
5471 if (blocks_per_unit
) {
5475 units
= a
->last_checkpoint
/ blocks_per_unit
;
5478 /* check that we did not overflow 32-bits, and that
5479 * curr_migr_unit needs updating
5481 if (units32
== units
&&
5482 __le32_to_cpu(dev
->vol
.curr_migr_unit
) != units32
) {
5483 dprintf("imsm: mark checkpoint (%u)\n", units32
);
5484 dev
->vol
.curr_migr_unit
= __cpu_to_le32(units32
);
5485 super
->updates_pending
++;
5489 /* mark dirty / clean */
5490 if (dev
->vol
.dirty
!= !consistent
) {
5491 dprintf("imsm: mark '%s'\n", consistent
? "clean" : "dirty");
5496 super
->updates_pending
++;
5502 static void imsm_set_disk(struct active_array
*a
, int n
, int state
)
5504 int inst
= a
->info
.container_member
;
5505 struct intel_super
*super
= a
->container
->sb
;
5506 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5507 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5508 struct imsm_disk
*disk
;
5513 if (n
> map
->num_members
)
5514 fprintf(stderr
, "imsm: set_disk %d out of range 0..%d\n",
5515 n
, map
->num_members
- 1);
5520 dprintf("imsm: set_disk %d:%x\n", n
, state
);
5522 ord
= get_imsm_ord_tbl_ent(dev
, n
, -1);
5523 disk
= get_imsm_disk(super
, ord_to_idx(ord
));
5525 /* check for new failures */
5526 if (state
& DS_FAULTY
) {
5527 if (mark_failure(dev
, disk
, ord_to_idx(ord
)))
5528 super
->updates_pending
++;
5531 /* check if in_sync */
5532 if (state
& DS_INSYNC
&& ord
& IMSM_ORD_REBUILD
&& is_rebuilding(dev
)) {
5533 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
5535 set_imsm_ord_tbl_ent(migr_map
, n
, ord_to_idx(ord
));
5536 super
->updates_pending
++;
5539 failed
= imsm_count_failed(super
, dev
);
5540 map_state
= imsm_check_degraded(super
, dev
, failed
);
5542 /* check if recovery complete, newly degraded, or failed */
5543 if (map_state
== IMSM_T_STATE_NORMAL
&& is_rebuilding(dev
)) {
5544 end_migration(dev
, map_state
);
5545 map
= get_imsm_map(dev
, 0);
5546 map
->failed_disk_num
= ~0;
5547 super
->updates_pending
++;
5548 a
->last_checkpoint
= 0;
5549 } else if (map_state
== IMSM_T_STATE_DEGRADED
&&
5550 map
->map_state
!= map_state
&&
5551 !dev
->vol
.migr_state
) {
5552 dprintf("imsm: mark degraded\n");
5553 map
->map_state
= map_state
;
5554 super
->updates_pending
++;
5555 a
->last_checkpoint
= 0;
5556 } else if (map_state
== IMSM_T_STATE_FAILED
&&
5557 map
->map_state
!= map_state
) {
5558 dprintf("imsm: mark failed\n");
5559 end_migration(dev
, map_state
);
5560 super
->updates_pending
++;
5561 a
->last_checkpoint
= 0;
5562 } else if (is_gen_migration(dev
)) {
5563 dprintf("imsm: Detected General Migration in state: ");
5564 if (map_state
== IMSM_T_STATE_NORMAL
) {
5565 end_migration(dev
, map_state
);
5566 map
= get_imsm_map(dev
, 0);
5567 map
->failed_disk_num
= ~0;
5568 dprintf("normal\n");
5570 if (map_state
== IMSM_T_STATE_DEGRADED
) {
5571 printf("degraded\n");
5572 end_migration(dev
, map_state
);
5574 dprintf("failed\n");
5576 map
->map_state
= map_state
;
5578 super
->updates_pending
++;
5582 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
)
5585 __u32 mpb_size
= __le32_to_cpu(mpb
->mpb_size
);
5586 unsigned long long dsize
;
5587 unsigned long long sectors
;
5589 get_dev_size(fd
, NULL
, &dsize
);
5591 if (mpb_size
> 512) {
5592 /* -1 to account for anchor */
5593 sectors
= mpb_sectors(mpb
) - 1;
5595 /* write the extended mpb to the sectors preceeding the anchor */
5596 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0)
5599 if ((unsigned long long)write(fd
, buf
+ 512, 512 * sectors
)
5604 /* first block is stored on second to last sector of the disk */
5605 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0)
5608 if (write(fd
, buf
, 512) != 512)
5614 static void imsm_sync_metadata(struct supertype
*container
)
5616 struct intel_super
*super
= container
->sb
;
5618 dprintf("sync metadata: %d\n", super
->updates_pending
);
5619 if (!super
->updates_pending
)
5622 write_super_imsm(container
, 0);
5624 super
->updates_pending
= 0;
5627 static struct dl
*imsm_readd(struct intel_super
*super
, int idx
, struct active_array
*a
)
5629 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
5630 int i
= get_imsm_disk_idx(dev
, idx
, -1);
5633 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
5637 if (dl
&& is_failed(&dl
->disk
))
5641 dprintf("%s: found %x:%x\n", __func__
, dl
->major
, dl
->minor
);
5646 static struct dl
*imsm_add_spare(struct intel_super
*super
, int slot
,
5647 struct active_array
*a
, int activate_new
,
5648 struct mdinfo
*additional_test_list
)
5650 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
5651 int idx
= get_imsm_disk_idx(dev
, slot
, -1);
5652 struct imsm_super
*mpb
= super
->anchor
;
5653 struct imsm_map
*map
;
5654 unsigned long long pos
;
5659 __u32 array_start
= 0;
5660 __u32 array_end
= 0;
5662 struct mdinfo
*test_list
;
5664 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
5665 /* If in this array, skip */
5666 for (d
= a
->info
.devs
; d
; d
= d
->next
)
5667 if (d
->state_fd
>= 0 &&
5668 d
->disk
.major
== dl
->major
&&
5669 d
->disk
.minor
== dl
->minor
) {
5670 dprintf("%x:%x already in array\n",
5671 dl
->major
, dl
->minor
);
5676 test_list
= additional_test_list
;
5678 if (test_list
->disk
.major
== dl
->major
&&
5679 test_list
->disk
.minor
== dl
->minor
) {
5680 dprintf("%x:%x already in additional test list\n",
5681 dl
->major
, dl
->minor
);
5684 test_list
= test_list
->next
;
5689 /* skip in use or failed drives */
5690 if (is_failed(&dl
->disk
) || idx
== dl
->index
||
5692 dprintf("%x:%x status (failed: %d index: %d)\n",
5693 dl
->major
, dl
->minor
, is_failed(&dl
->disk
), idx
);
5697 /* skip pure spares when we are looking for partially
5698 * assimilated drives
5700 if (dl
->index
== -1 && !activate_new
)
5703 /* Does this unused device have the requisite free space?
5704 * It needs to be able to cover all member volumes
5706 ex
= get_extents(super
, dl
);
5708 dprintf("cannot get extents\n");
5711 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5712 dev
= get_imsm_dev(super
, i
);
5713 map
= get_imsm_map(dev
, 0);
5715 /* check if this disk is already a member of
5718 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
5724 array_start
= __le32_to_cpu(map
->pba_of_lba0
);
5725 array_end
= array_start
+
5726 __le32_to_cpu(map
->blocks_per_member
) - 1;
5729 /* check that we can start at pba_of_lba0 with
5730 * blocks_per_member of space
5732 if (array_start
>= pos
&& array_end
< ex
[j
].start
) {
5736 pos
= ex
[j
].start
+ ex
[j
].size
;
5738 } while (ex
[j
-1].size
);
5745 if (i
< mpb
->num_raid_devs
) {
5746 dprintf("%x:%x does not have %u to %u available\n",
5747 dl
->major
, dl
->minor
, array_start
, array_end
);
5758 static int imsm_rebuild_allowed(struct supertype
*cont
, int dev_idx
, int failed
)
5760 struct imsm_dev
*dev2
;
5761 struct imsm_map
*map
;
5767 dev2
= get_imsm_dev(cont
->sb
, dev_idx
);
5769 state
= imsm_check_degraded(cont
->sb
, dev2
, failed
);
5770 if (state
== IMSM_T_STATE_FAILED
) {
5771 map
= get_imsm_map(dev2
, 0);
5774 for (slot
= 0; slot
< map
->num_members
; slot
++) {
5776 * Check if failed disks are deleted from intel
5777 * disk list or are marked to be deleted
5779 idx
= get_imsm_disk_idx(dev2
, slot
, -1);
5780 idisk
= get_imsm_dl_disk(cont
->sb
, idx
);
5782 * Do not rebuild the array if failed disks
5783 * from failed sub-array are not removed from
5787 is_failed(&idisk
->disk
) &&
5788 (idisk
->action
!= DISK_REMOVE
))
5796 static struct mdinfo
*imsm_activate_spare(struct active_array
*a
,
5797 struct metadata_update
**updates
)
5800 * Find a device with unused free space and use it to replace a
5801 * failed/vacant region in an array. We replace failed regions one a
5802 * array at a time. The result is that a new spare disk will be added
5803 * to the first failed array and after the monitor has finished
5804 * propagating failures the remainder will be consumed.
5806 * FIXME add a capability for mdmon to request spares from another
5810 struct intel_super
*super
= a
->container
->sb
;
5811 int inst
= a
->info
.container_member
;
5812 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5813 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5814 int failed
= a
->info
.array
.raid_disks
;
5815 struct mdinfo
*rv
= NULL
;
5818 struct metadata_update
*mu
;
5820 struct imsm_update_activate_spare
*u
;
5825 for (d
= a
->info
.devs
; d
; d
= d
->next
) {
5826 if ((d
->curr_state
& DS_FAULTY
) &&
5828 /* wait for Removal to happen */
5830 if (d
->state_fd
>= 0)
5834 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
5835 inst
, failed
, a
->info
.array
.raid_disks
, a
->info
.array
.level
);
5837 if (dev
->vol
.migr_state
&&
5838 dev
->vol
.migr_type
== MIGR_GEN_MIGR
)
5839 /* No repair during migration */
5842 if (a
->info
.array
.level
== 4)
5843 /* No repair for takeovered array
5844 * imsm doesn't support raid4
5848 if (imsm_check_degraded(super
, dev
, failed
) != IMSM_T_STATE_DEGRADED
)
5852 * If there are any failed disks check state of the other volume.
5853 * Block rebuild if the another one is failed until failed disks
5854 * are removed from container.
5857 dprintf("found failed disks in %s, check if there another"
5858 "failed sub-array.\n",
5860 /* check if states of the other volumes allow for rebuild */
5861 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
5863 allowed
= imsm_rebuild_allowed(a
->container
,
5871 /* For each slot, if it is not working, find a spare */
5872 for (i
= 0; i
< a
->info
.array
.raid_disks
; i
++) {
5873 for (d
= a
->info
.devs
; d
; d
= d
->next
)
5874 if (d
->disk
.raid_disk
== i
)
5876 dprintf("found %d: %p %x\n", i
, d
, d
?d
->curr_state
:0);
5877 if (d
&& (d
->state_fd
>= 0))
5881 * OK, this device needs recovery. Try to re-add the
5882 * previous occupant of this slot, if this fails see if
5883 * we can continue the assimilation of a spare that was
5884 * partially assimilated, finally try to activate a new
5887 dl
= imsm_readd(super
, i
, a
);
5889 dl
= imsm_add_spare(super
, i
, a
, 0, NULL
);
5891 dl
= imsm_add_spare(super
, i
, a
, 1, NULL
);
5895 /* found a usable disk with enough space */
5896 di
= malloc(sizeof(*di
));
5899 memset(di
, 0, sizeof(*di
));
5901 /* dl->index will be -1 in the case we are activating a
5902 * pristine spare. imsm_process_update() will create a
5903 * new index in this case. Once a disk is found to be
5904 * failed in all member arrays it is kicked from the
5907 di
->disk
.number
= dl
->index
;
5909 /* (ab)use di->devs to store a pointer to the device
5912 di
->devs
= (struct mdinfo
*) dl
;
5914 di
->disk
.raid_disk
= i
;
5915 di
->disk
.major
= dl
->major
;
5916 di
->disk
.minor
= dl
->minor
;
5918 di
->recovery_start
= 0;
5919 di
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5920 di
->component_size
= a
->info
.component_size
;
5921 di
->container_member
= inst
;
5922 super
->random
= random32();
5926 dprintf("%x:%x to be %d at %llu\n", dl
->major
, dl
->minor
,
5927 i
, di
->data_offset
);
5933 /* No spares found */
5935 /* Now 'rv' has a list of devices to return.
5936 * Create a metadata_update record to update the
5937 * disk_ord_tbl for the array
5939 mu
= malloc(sizeof(*mu
));
5941 mu
->buf
= malloc(sizeof(struct imsm_update_activate_spare
) * num_spares
);
5942 if (mu
->buf
== NULL
) {
5949 struct mdinfo
*n
= rv
->next
;
5958 mu
->space_list
= NULL
;
5959 mu
->len
= sizeof(struct imsm_update_activate_spare
) * num_spares
;
5960 mu
->next
= *updates
;
5961 u
= (struct imsm_update_activate_spare
*) mu
->buf
;
5963 for (di
= rv
; di
; di
= di
->next
) {
5964 u
->type
= update_activate_spare
;
5965 u
->dl
= (struct dl
*) di
->devs
;
5967 u
->slot
= di
->disk
.raid_disk
;
5978 static int disks_overlap(struct intel_super
*super
, int idx
, struct imsm_update_create_array
*u
)
5980 struct imsm_dev
*dev
= get_imsm_dev(super
, idx
);
5981 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5982 struct imsm_map
*new_map
= get_imsm_map(&u
->dev
, 0);
5983 struct disk_info
*inf
= get_disk_info(u
);
5984 struct imsm_disk
*disk
;
5988 for (i
= 0; i
< map
->num_members
; i
++) {
5989 disk
= get_imsm_disk(super
, get_imsm_disk_idx(dev
, i
, -1));
5990 for (j
= 0; j
< new_map
->num_members
; j
++)
5991 if (serialcmp(disk
->serial
, inf
[j
].serial
) == 0)
5999 static struct dl
*get_disk_super(struct intel_super
*super
, int major
, int minor
)
6001 struct dl
*dl
= NULL
;
6002 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6003 if ((dl
->major
== major
) && (dl
->minor
== minor
))
6008 static int remove_disk_super(struct intel_super
*super
, int major
, int minor
)
6010 struct dl
*prev
= NULL
;
6014 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
6015 if ((dl
->major
== major
) && (dl
->minor
== minor
)) {
6018 prev
->next
= dl
->next
;
6020 super
->disks
= dl
->next
;
6022 __free_imsm_disk(dl
);
6023 dprintf("%s: removed %x:%x\n",
6024 __func__
, major
, minor
);
6032 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
);
6034 static int add_remove_disk_update(struct intel_super
*super
)
6036 int check_degraded
= 0;
6037 struct dl
*disk
= NULL
;
6038 /* add/remove some spares to/from the metadata/contrainer */
6039 while (super
->disk_mgmt_list
) {
6040 struct dl
*disk_cfg
;
6042 disk_cfg
= super
->disk_mgmt_list
;
6043 super
->disk_mgmt_list
= disk_cfg
->next
;
6044 disk_cfg
->next
= NULL
;
6046 if (disk_cfg
->action
== DISK_ADD
) {
6047 disk_cfg
->next
= super
->disks
;
6048 super
->disks
= disk_cfg
;
6050 dprintf("%s: added %x:%x\n",
6051 __func__
, disk_cfg
->major
,
6053 } else if (disk_cfg
->action
== DISK_REMOVE
) {
6054 dprintf("Disk remove action processed: %x.%x\n",
6055 disk_cfg
->major
, disk_cfg
->minor
);
6056 disk
= get_disk_super(super
,
6060 /* store action status */
6061 disk
->action
= DISK_REMOVE
;
6062 /* remove spare disks only */
6063 if (disk
->index
== -1) {
6064 remove_disk_super(super
,
6069 /* release allocate disk structure */
6070 __free_imsm_disk(disk_cfg
);
6073 return check_degraded
;
6076 static int apply_reshape_container_disks_update(struct imsm_update_reshape
*u
,
6077 struct intel_super
*super
,
6080 struct dl
*new_disk
;
6081 struct intel_dev
*id
;
6083 int delta_disks
= u
->new_raid_disks
- u
->old_raid_disks
;
6084 int disk_count
= u
->old_raid_disks
;
6085 void **tofree
= NULL
;
6086 int devices_to_reshape
= 1;
6087 struct imsm_super
*mpb
= super
->anchor
;
6089 unsigned int dev_id
;
6091 dprintf("imsm: apply_reshape_container_disks_update()\n");
6093 /* enable spares to use in array */
6094 for (i
= 0; i
< delta_disks
; i
++) {
6095 new_disk
= get_disk_super(super
,
6096 major(u
->new_disks
[i
]),
6097 minor(u
->new_disks
[i
]));
6098 dprintf("imsm: new disk for reshape is: %i:%i "
6099 "(%p, index = %i)\n",
6100 major(u
->new_disks
[i
]), minor(u
->new_disks
[i
]),
6101 new_disk
, new_disk
->index
);
6102 if ((new_disk
== NULL
) ||
6103 ((new_disk
->index
>= 0) &&
6104 (new_disk
->index
< u
->old_raid_disks
)))
6105 goto update_reshape_exit
;
6106 new_disk
->index
= disk_count
++;
6107 /* slot to fill in autolayout
6109 new_disk
->raiddisk
= new_disk
->index
;
6110 new_disk
->disk
.status
|=
6112 new_disk
->disk
.status
&= ~SPARE_DISK
;
6115 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6116 mpb
->num_raid_devs
);
6117 /* manage changes in volume
6119 for (dev_id
= 0; dev_id
< mpb
->num_raid_devs
; dev_id
++) {
6120 void **sp
= *space_list
;
6121 struct imsm_dev
*newdev
;
6122 struct imsm_map
*newmap
, *oldmap
;
6124 for (id
= super
->devlist
; id
; id
= id
->next
) {
6125 if (id
->index
== dev_id
)
6134 /* Copy the dev, but not (all of) the map */
6135 memcpy(newdev
, id
->dev
, sizeof(*newdev
));
6136 oldmap
= get_imsm_map(id
->dev
, 0);
6137 newmap
= get_imsm_map(newdev
, 0);
6138 /* Copy the current map */
6139 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6140 /* update one device only
6142 if (devices_to_reshape
) {
6143 dprintf("imsm: modifying subdev: %i\n",
6145 devices_to_reshape
--;
6146 newdev
->vol
.migr_state
= 1;
6147 newdev
->vol
.curr_migr_unit
= 0;
6148 newdev
->vol
.migr_type
= MIGR_GEN_MIGR
;
6149 newmap
->num_members
= u
->new_raid_disks
;
6150 for (i
= 0; i
< delta_disks
; i
++) {
6151 set_imsm_ord_tbl_ent(newmap
,
6152 u
->old_raid_disks
+ i
,
6153 u
->old_raid_disks
+ i
);
6155 /* New map is correct, now need to save old map
6157 newmap
= get_imsm_map(newdev
, 1);
6158 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6160 imsm_set_array_size(newdev
);
6163 sp
= (void **)id
->dev
;
6169 *space_list
= tofree
;
6172 update_reshape_exit
:
6177 static int apply_takeover_update(struct imsm_update_takeover
*u
,
6178 struct intel_super
*super
,
6181 struct imsm_dev
*dev
= NULL
;
6182 struct intel_dev
*dv
;
6183 struct imsm_dev
*dev_new
;
6184 struct imsm_map
*map
;
6188 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
6189 if (dv
->index
== (unsigned int)u
->subarray
) {
6197 map
= get_imsm_map(dev
, 0);
6199 if (u
->direction
== R10_TO_R0
) {
6200 /* Number of failed disks must be half of initial disk number */
6201 if (imsm_count_failed(super
, dev
) != (map
->num_members
/ 2))
6204 /* iterate through devices to mark removed disks as spare */
6205 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6206 if (dm
->disk
.status
& FAILED_DISK
) {
6207 int idx
= dm
->index
;
6208 /* update indexes on the disk list */
6209 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6210 the index values will end up being correct.... NB */
6211 for (du
= super
->disks
; du
; du
= du
->next
)
6212 if (du
->index
> idx
)
6214 /* mark as spare disk */
6215 dm
->disk
.status
= SPARE_DISK
;
6220 map
->num_members
= map
->num_members
/ 2;
6221 map
->map_state
= IMSM_T_STATE_NORMAL
;
6222 map
->num_domains
= 1;
6223 map
->raid_level
= 0;
6224 map
->failed_disk_num
= -1;
6227 if (u
->direction
== R0_TO_R10
) {
6229 /* update slots in current disk list */
6230 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6234 /* create new *missing* disks */
6235 for (i
= 0; i
< map
->num_members
; i
++) {
6236 space
= *space_list
;
6239 *space_list
= *space
;
6241 memcpy(du
, super
->disks
, sizeof(*du
));
6245 du
->index
= (i
* 2) + 1;
6246 sprintf((char *)du
->disk
.serial
,
6247 " MISSING_%d", du
->index
);
6248 sprintf((char *)du
->serial
,
6249 "MISSING_%d", du
->index
);
6250 du
->next
= super
->missing
;
6251 super
->missing
= du
;
6253 /* create new dev and map */
6254 space
= *space_list
;
6257 *space_list
= *space
;
6258 dev_new
= (void *)space
;
6259 memcpy(dev_new
, dev
, sizeof(*dev
));
6260 /* update new map */
6261 map
= get_imsm_map(dev_new
, 0);
6262 map
->num_members
= map
->num_members
* 2;
6263 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6264 map
->num_domains
= 2;
6265 map
->raid_level
= 1;
6266 /* replace dev<->dev_new */
6269 /* update disk order table */
6270 for (du
= super
->disks
; du
; du
= du
->next
)
6272 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6273 for (du
= super
->missing
; du
; du
= du
->next
)
6274 if (du
->index
>= 0) {
6275 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6276 mark_missing(dev_new
, &du
->disk
, du
->index
);
6282 static void imsm_process_update(struct supertype
*st
,
6283 struct metadata_update
*update
)
6286 * crack open the metadata_update envelope to find the update record
6287 * update can be one of:
6288 * update_reshape_container_disks - all the arrays in the container
6289 * are being reshaped to have more devices. We need to mark
6290 * the arrays for general migration and convert selected spares
6291 * into active devices.
6292 * update_activate_spare - a spare device has replaced a failed
6293 * device in an array, update the disk_ord_tbl. If this disk is
6294 * present in all member arrays then also clear the SPARE_DISK
6296 * update_create_array
6298 * update_rename_array
6299 * update_add_remove_disk
6301 struct intel_super
*super
= st
->sb
;
6302 struct imsm_super
*mpb
;
6303 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6305 /* update requires a larger buf but the allocation failed */
6306 if (super
->next_len
&& !super
->next_buf
) {
6307 super
->next_len
= 0;
6311 if (super
->next_buf
) {
6312 memcpy(super
->next_buf
, super
->buf
, super
->len
);
6314 super
->len
= super
->next_len
;
6315 super
->buf
= super
->next_buf
;
6317 super
->next_len
= 0;
6318 super
->next_buf
= NULL
;
6321 mpb
= super
->anchor
;
6324 case update_takeover
: {
6325 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6326 if (apply_takeover_update(u
, super
, &update
->space_list
)) {
6327 imsm_update_version_info(super
);
6328 super
->updates_pending
++;
6333 case update_reshape_container_disks
: {
6334 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6335 if (apply_reshape_container_disks_update(
6336 u
, super
, &update
->space_list
))
6337 super
->updates_pending
++;
6340 case update_activate_spare
: {
6341 struct imsm_update_activate_spare
*u
= (void *) update
->buf
;
6342 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->array
);
6343 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6344 struct imsm_map
*migr_map
;
6345 struct active_array
*a
;
6346 struct imsm_disk
*disk
;
6351 int victim
= get_imsm_disk_idx(dev
, u
->slot
, -1);
6354 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6359 fprintf(stderr
, "error: imsm_activate_spare passed "
6360 "an unknown disk (index: %d)\n",
6365 super
->updates_pending
++;
6367 /* count failures (excluding rebuilds and the victim)
6368 * to determine map[0] state
6371 for (i
= 0; i
< map
->num_members
; i
++) {
6374 disk
= get_imsm_disk(super
,
6375 get_imsm_disk_idx(dev
, i
, -1));
6376 if (!disk
|| is_failed(disk
))
6380 /* adding a pristine spare, assign a new index */
6381 if (dl
->index
< 0) {
6382 dl
->index
= super
->anchor
->num_disks
;
6383 super
->anchor
->num_disks
++;
6386 disk
->status
|= CONFIGURED_DISK
;
6387 disk
->status
&= ~SPARE_DISK
;
6390 to_state
= imsm_check_degraded(super
, dev
, failed
);
6391 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6392 migrate(dev
, to_state
, MIGR_REBUILD
);
6393 migr_map
= get_imsm_map(dev
, 1);
6394 set_imsm_ord_tbl_ent(map
, u
->slot
, dl
->index
);
6395 set_imsm_ord_tbl_ent(migr_map
, u
->slot
, dl
->index
| IMSM_ORD_REBUILD
);
6397 /* update the family_num to mark a new container
6398 * generation, being careful to record the existing
6399 * family_num in orig_family_num to clean up after
6400 * earlier mdadm versions that neglected to set it.
6402 if (mpb
->orig_family_num
== 0)
6403 mpb
->orig_family_num
= mpb
->family_num
;
6404 mpb
->family_num
+= super
->random
;
6406 /* count arrays using the victim in the metadata */
6408 for (a
= st
->arrays
; a
; a
= a
->next
) {
6409 dev
= get_imsm_dev(super
, a
->info
.container_member
);
6410 map
= get_imsm_map(dev
, 0);
6412 if (get_imsm_disk_slot(map
, victim
) >= 0)
6416 /* delete the victim if it is no longer being
6422 /* We know that 'manager' isn't touching anything,
6423 * so it is safe to delete
6425 for (dlp
= &super
->disks
; *dlp
; dlp
= &(*dlp
)->next
)
6426 if ((*dlp
)->index
== victim
)
6429 /* victim may be on the missing list */
6431 for (dlp
= &super
->missing
; *dlp
; dlp
= &(*dlp
)->next
)
6432 if ((*dlp
)->index
== victim
)
6434 imsm_delete(super
, dlp
, victim
);
6438 case update_create_array
: {
6439 /* someone wants to create a new array, we need to be aware of
6440 * a few races/collisions:
6441 * 1/ 'Create' called by two separate instances of mdadm
6442 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6443 * devices that have since been assimilated via
6445 * In the event this update can not be carried out mdadm will
6446 * (FIX ME) notice that its update did not take hold.
6448 struct imsm_update_create_array
*u
= (void *) update
->buf
;
6449 struct intel_dev
*dv
;
6450 struct imsm_dev
*dev
;
6451 struct imsm_map
*map
, *new_map
;
6452 unsigned long long start
, end
;
6453 unsigned long long new_start
, new_end
;
6455 struct disk_info
*inf
;
6458 /* handle racing creates: first come first serve */
6459 if (u
->dev_idx
< mpb
->num_raid_devs
) {
6460 dprintf("%s: subarray %d already defined\n",
6461 __func__
, u
->dev_idx
);
6465 /* check update is next in sequence */
6466 if (u
->dev_idx
!= mpb
->num_raid_devs
) {
6467 dprintf("%s: can not create array %d expected index %d\n",
6468 __func__
, u
->dev_idx
, mpb
->num_raid_devs
);
6472 new_map
= get_imsm_map(&u
->dev
, 0);
6473 new_start
= __le32_to_cpu(new_map
->pba_of_lba0
);
6474 new_end
= new_start
+ __le32_to_cpu(new_map
->blocks_per_member
);
6475 inf
= get_disk_info(u
);
6477 /* handle activate_spare versus create race:
6478 * check to make sure that overlapping arrays do not include
6481 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6482 dev
= get_imsm_dev(super
, i
);
6483 map
= get_imsm_map(dev
, 0);
6484 start
= __le32_to_cpu(map
->pba_of_lba0
);
6485 end
= start
+ __le32_to_cpu(map
->blocks_per_member
);
6486 if ((new_start
>= start
&& new_start
<= end
) ||
6487 (start
>= new_start
&& start
<= new_end
))
6492 if (disks_overlap(super
, i
, u
)) {
6493 dprintf("%s: arrays overlap\n", __func__
);
6498 /* check that prepare update was successful */
6499 if (!update
->space
) {
6500 dprintf("%s: prepare update failed\n", __func__
);
6504 /* check that all disks are still active before committing
6505 * changes. FIXME: could we instead handle this by creating a
6506 * degraded array? That's probably not what the user expects,
6507 * so better to drop this update on the floor.
6509 for (i
= 0; i
< new_map
->num_members
; i
++) {
6510 dl
= serial_to_dl(inf
[i
].serial
, super
);
6512 dprintf("%s: disk disappeared\n", __func__
);
6517 super
->updates_pending
++;
6519 /* convert spares to members and fixup ord_tbl */
6520 for (i
= 0; i
< new_map
->num_members
; i
++) {
6521 dl
= serial_to_dl(inf
[i
].serial
, super
);
6522 if (dl
->index
== -1) {
6523 dl
->index
= mpb
->num_disks
;
6525 dl
->disk
.status
|= CONFIGURED_DISK
;
6526 dl
->disk
.status
&= ~SPARE_DISK
;
6528 set_imsm_ord_tbl_ent(new_map
, i
, dl
->index
);
6533 update
->space
= NULL
;
6534 imsm_copy_dev(dev
, &u
->dev
);
6535 dv
->index
= u
->dev_idx
;
6536 dv
->next
= super
->devlist
;
6537 super
->devlist
= dv
;
6538 mpb
->num_raid_devs
++;
6540 imsm_update_version_info(super
);
6543 /* mdmon knows how to release update->space, but not
6544 * ((struct intel_dev *) update->space)->dev
6546 if (update
->space
) {
6552 case update_kill_array
: {
6553 struct imsm_update_kill_array
*u
= (void *) update
->buf
;
6554 int victim
= u
->dev_idx
;
6555 struct active_array
*a
;
6556 struct intel_dev
**dp
;
6557 struct imsm_dev
*dev
;
6559 /* sanity check that we are not affecting the uuid of
6560 * active arrays, or deleting an active array
6562 * FIXME when immutable ids are available, but note that
6563 * we'll also need to fixup the invalidated/active
6564 * subarray indexes in mdstat
6566 for (a
= st
->arrays
; a
; a
= a
->next
)
6567 if (a
->info
.container_member
>= victim
)
6569 /* by definition if mdmon is running at least one array
6570 * is active in the container, so checking
6571 * mpb->num_raid_devs is just extra paranoia
6573 dev
= get_imsm_dev(super
, victim
);
6574 if (a
|| !dev
|| mpb
->num_raid_devs
== 1) {
6575 dprintf("failed to delete subarray-%d\n", victim
);
6579 for (dp
= &super
->devlist
; *dp
;)
6580 if ((*dp
)->index
== (unsigned)super
->current_vol
) {
6583 if ((*dp
)->index
> (unsigned)victim
)
6587 mpb
->num_raid_devs
--;
6588 super
->updates_pending
++;
6591 case update_rename_array
: {
6592 struct imsm_update_rename_array
*u
= (void *) update
->buf
;
6593 char name
[MAX_RAID_SERIAL_LEN
+1];
6594 int target
= u
->dev_idx
;
6595 struct active_array
*a
;
6596 struct imsm_dev
*dev
;
6598 /* sanity check that we are not affecting the uuid of
6601 snprintf(name
, MAX_RAID_SERIAL_LEN
, "%s", (char *) u
->name
);
6602 name
[MAX_RAID_SERIAL_LEN
] = '\0';
6603 for (a
= st
->arrays
; a
; a
= a
->next
)
6604 if (a
->info
.container_member
== target
)
6606 dev
= get_imsm_dev(super
, u
->dev_idx
);
6607 if (a
|| !dev
|| !check_name(super
, name
, 1)) {
6608 dprintf("failed to rename subarray-%d\n", target
);
6612 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
6613 super
->updates_pending
++;
6616 case update_add_remove_disk
: {
6617 /* we may be able to repair some arrays if disks are
6618 * being added, check teh status of add_remove_disk
6619 * if discs has been added.
6621 if (add_remove_disk_update(super
)) {
6622 struct active_array
*a
;
6624 super
->updates_pending
++;
6625 for (a
= st
->arrays
; a
; a
= a
->next
)
6626 a
->check_degraded
= 1;
6631 fprintf(stderr
, "error: unsuported process update type:"
6632 "(type: %d)\n", type
);
6636 static void imsm_prepare_update(struct supertype
*st
,
6637 struct metadata_update
*update
)
6640 * Allocate space to hold new disk entries, raid-device entries or a new
6641 * mpb if necessary. The manager synchronously waits for updates to
6642 * complete in the monitor, so new mpb buffers allocated here can be
6643 * integrated by the monitor thread without worrying about live pointers
6644 * in the manager thread.
6646 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6647 struct intel_super
*super
= st
->sb
;
6648 struct imsm_super
*mpb
= super
->anchor
;
6653 case update_takeover
: {
6654 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6655 if (u
->direction
== R0_TO_R10
) {
6656 void **tail
= (void **)&update
->space_list
;
6657 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subarray
);
6658 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6659 int num_members
= map
->num_members
;
6663 /* allocate memory for added disks */
6664 for (i
= 0; i
< num_members
; i
++) {
6665 size
= sizeof(struct dl
);
6666 space
= malloc(size
);
6675 /* allocate memory for new device */
6676 size
= sizeof_imsm_dev(super
->devlist
->dev
, 0) +
6677 (num_members
* sizeof(__u32
));
6678 space
= malloc(size
);
6687 len
= disks_to_mpb_size(num_members
* 2);
6689 /* if allocation didn't success, free buffer */
6690 while (update
->space_list
) {
6691 void **sp
= update
->space_list
;
6692 update
->space_list
= *sp
;
6700 case update_reshape_container_disks
: {
6701 /* Every raid device in the container is about to
6702 * gain some more devices, and we will enter a
6704 * So each 'imsm_map' will be bigger, and the imsm_vol
6705 * will now hold 2 of them.
6706 * Thus we need new 'struct imsm_dev' allocations sized
6707 * as sizeof_imsm_dev but with more devices in both maps.
6709 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6710 struct intel_dev
*dl
;
6711 void **space_tail
= (void**)&update
->space_list
;
6713 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
6715 for (dl
= super
->devlist
; dl
; dl
= dl
->next
) {
6716 int size
= sizeof_imsm_dev(dl
->dev
, 1);
6718 if (u
->new_raid_disks
> u
->old_raid_disks
)
6719 size
+= sizeof(__u32
)*2*
6720 (u
->new_raid_disks
- u
->old_raid_disks
);
6729 len
= disks_to_mpb_size(u
->new_raid_disks
);
6730 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
6733 case update_create_array
: {
6734 struct imsm_update_create_array
*u
= (void *) update
->buf
;
6735 struct intel_dev
*dv
;
6736 struct imsm_dev
*dev
= &u
->dev
;
6737 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6739 struct disk_info
*inf
;
6743 inf
= get_disk_info(u
);
6744 len
= sizeof_imsm_dev(dev
, 1);
6745 /* allocate a new super->devlist entry */
6746 dv
= malloc(sizeof(*dv
));
6748 dv
->dev
= malloc(len
);
6753 update
->space
= NULL
;
6757 /* count how many spares will be converted to members */
6758 for (i
= 0; i
< map
->num_members
; i
++) {
6759 dl
= serial_to_dl(inf
[i
].serial
, super
);
6761 /* hmm maybe it failed?, nothing we can do about
6766 if (count_memberships(dl
, super
) == 0)
6769 len
+= activate
* sizeof(struct imsm_disk
);
6776 /* check if we need a larger metadata buffer */
6777 if (super
->next_buf
)
6778 buf_len
= super
->next_len
;
6780 buf_len
= super
->len
;
6782 if (__le32_to_cpu(mpb
->mpb_size
) + len
> buf_len
) {
6783 /* ok we need a larger buf than what is currently allocated
6784 * if this allocation fails process_update will notice that
6785 * ->next_len is set and ->next_buf is NULL
6787 buf_len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + len
, 512);
6788 if (super
->next_buf
)
6789 free(super
->next_buf
);
6791 super
->next_len
= buf_len
;
6792 if (posix_memalign(&super
->next_buf
, 512, buf_len
) == 0)
6793 memset(super
->next_buf
, 0, buf_len
);
6795 super
->next_buf
= NULL
;
6799 /* must be called while manager is quiesced */
6800 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
)
6802 struct imsm_super
*mpb
= super
->anchor
;
6804 struct imsm_dev
*dev
;
6805 struct imsm_map
*map
;
6806 int i
, j
, num_members
;
6809 dprintf("%s: deleting device[%d] from imsm_super\n",
6812 /* shift all indexes down one */
6813 for (iter
= super
->disks
; iter
; iter
= iter
->next
)
6814 if (iter
->index
> (int)index
)
6816 for (iter
= super
->missing
; iter
; iter
= iter
->next
)
6817 if (iter
->index
> (int)index
)
6820 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6821 dev
= get_imsm_dev(super
, i
);
6822 map
= get_imsm_map(dev
, 0);
6823 num_members
= map
->num_members
;
6824 for (j
= 0; j
< num_members
; j
++) {
6825 /* update ord entries being careful not to propagate
6826 * ord-flags to the first map
6828 ord
= get_imsm_ord_tbl_ent(dev
, j
, -1);
6830 if (ord_to_idx(ord
) <= index
)
6833 map
= get_imsm_map(dev
, 0);
6834 set_imsm_ord_tbl_ent(map
, j
, ord_to_idx(ord
- 1));
6835 map
= get_imsm_map(dev
, 1);
6837 set_imsm_ord_tbl_ent(map
, j
, ord
- 1);
6842 super
->updates_pending
++;
6844 struct dl
*dl
= *dlp
;
6846 *dlp
= (*dlp
)->next
;
6847 __free_imsm_disk(dl
);
6851 static char disk_by_path
[] = "/dev/disk/by-path/";
6853 static const char *imsm_get_disk_controller_domain(const char *path
)
6855 char disk_path
[PATH_MAX
];
6859 strncpy(disk_path
, disk_by_path
, PATH_MAX
- 1);
6860 strncat(disk_path
, path
, PATH_MAX
- strlen(disk_path
) - 1);
6861 if (stat(disk_path
, &st
) == 0) {
6862 struct sys_dev
* hba
;
6865 path
= devt_to_devpath(st
.st_rdev
);
6868 hba
= find_disk_attached_hba(-1, path
);
6869 if (hba
&& hba
->type
== SYS_DEV_SAS
)
6871 else if (hba
&& hba
->type
== SYS_DEV_SATA
)
6875 dprintf("path: %s hba: %s attached: %s\n",
6876 path
, (hba
) ? hba
->path
: "NULL", drv
);
6884 static int imsm_find_array_minor_by_subdev(int subdev
, int container
, int *minor
)
6886 char subdev_name
[20];
6887 struct mdstat_ent
*mdstat
;
6889 sprintf(subdev_name
, "%d", subdev
);
6890 mdstat
= mdstat_by_subdev(subdev_name
, container
);
6894 *minor
= mdstat
->devnum
;
6895 free_mdstat(mdstat
);
6899 static int imsm_reshape_is_allowed_on_container(struct supertype
*st
,
6900 struct geo_params
*geo
,
6901 int *old_raid_disks
)
6903 /* currently we only support increasing the number of devices
6904 * for a container. This increases the number of device for each
6905 * member array. They must all be RAID0 or RAID5.
6908 struct mdinfo
*info
, *member
;
6909 int devices_that_can_grow
= 0;
6911 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
6912 "st->devnum = (%i)\n",
6915 if (geo
->size
!= -1 ||
6916 geo
->level
!= UnSet
||
6917 geo
->layout
!= UnSet
||
6918 geo
->chunksize
!= 0 ||
6919 geo
->raid_disks
== UnSet
) {
6920 dprintf("imsm: Container operation is allowed for "
6921 "raid disks number change only.\n");
6925 info
= container_content_imsm(st
, NULL
);
6926 for (member
= info
; member
; member
= member
->next
) {
6930 dprintf("imsm: checking device_num: %i\n",
6931 member
->container_member
);
6933 if (geo
->raid_disks
<= member
->array
.raid_disks
) {
6934 /* we work on container for Online Capacity Expansion
6935 * only so raid_disks has to grow
6937 dprintf("imsm: for container operation raid disks "
6938 "increase is required\n");
6942 if ((info
->array
.level
!= 0) &&
6943 (info
->array
.level
!= 5)) {
6944 /* we cannot use this container with other raid level
6946 dprintf("imsm: for container operation wrong"
6947 " raid level (%i) detected\n",
6951 /* check for platform support
6952 * for this raid level configuration
6954 struct intel_super
*super
= st
->sb
;
6955 if (!is_raid_level_supported(super
->orom
,
6956 member
->array
.level
,
6958 dprintf("platform does not support raid%d with"
6962 geo
->raid_disks
> 1 ? "s" : "");
6965 /* check if component size is aligned to chunk size
6967 if (info
->component_size
%
6968 (info
->array
.chunk_size
/512)) {
6969 dprintf("Component size is not aligned to "
6975 if (*old_raid_disks
&&
6976 info
->array
.raid_disks
!= *old_raid_disks
)
6978 *old_raid_disks
= info
->array
.raid_disks
;
6980 /* All raid5 and raid0 volumes in container
6981 * have to be ready for Online Capacity Expansion
6982 * so they need to be assembled. We have already
6983 * checked that no recovery etc is happening.
6985 result
= imsm_find_array_minor_by_subdev(member
->container_member
,
6989 dprintf("imsm: cannot find array\n");
6992 devices_that_can_grow
++;
6995 if (!member
&& devices_that_can_grow
)
6999 dprintf("\tContainer operation allowed\n");
7001 dprintf("\tError: %i\n", ret_val
);
7006 /* Function: get_spares_for_grow
7007 * Description: Allocates memory and creates list of spare devices
7008 * avaliable in container. Checks if spare drive size is acceptable.
7009 * Parameters: Pointer to the supertype structure
7010 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7013 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
)
7015 unsigned long long min_size
= min_acceptable_spare_size_imsm(st
);
7016 return container_choose_spares(st
, min_size
, NULL
, NULL
, NULL
, 0);
7019 /******************************************************************************
7020 * function: imsm_create_metadata_update_for_reshape
7021 * Function creates update for whole IMSM container.
7023 ******************************************************************************/
7024 static int imsm_create_metadata_update_for_reshape(
7025 struct supertype
*st
,
7026 struct geo_params
*geo
,
7028 struct imsm_update_reshape
**updatep
)
7030 struct intel_super
*super
= st
->sb
;
7031 struct imsm_super
*mpb
= super
->anchor
;
7032 int update_memory_size
= 0;
7033 struct imsm_update_reshape
*u
= NULL
;
7034 struct mdinfo
*spares
= NULL
;
7036 int delta_disks
= 0;
7039 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7042 delta_disks
= geo
->raid_disks
- old_raid_disks
;
7044 /* size of all update data without anchor */
7045 update_memory_size
= sizeof(struct imsm_update_reshape
);
7047 /* now add space for spare disks that we need to add. */
7048 update_memory_size
+= sizeof(u
->new_disks
[0]) * (delta_disks
- 1);
7050 u
= calloc(1, update_memory_size
);
7053 "cannot get memory for imsm_update_reshape update\n");
7056 u
->type
= update_reshape_container_disks
;
7057 u
->old_raid_disks
= old_raid_disks
;
7058 u
->new_raid_disks
= geo
->raid_disks
;
7060 /* now get spare disks list
7062 spares
= get_spares_for_grow(st
);
7065 || delta_disks
> spares
->array
.spare_disks
) {
7066 fprintf(stderr
, Name
": imsm: ERROR: Cannot get spare devices "
7067 "for %s.\n", geo
->dev_name
);
7071 /* we have got spares
7072 * update disk list in imsm_disk list table in anchor
7074 dprintf("imsm: %i spares are available.\n\n",
7075 spares
->array
.spare_disks
);
7078 for (i
= 0; i
< delta_disks
; i
++) {
7083 u
->new_disks
[i
] = makedev(dev
->disk
.major
,
7085 dl
= get_disk_super(super
, dev
->disk
.major
, dev
->disk
.minor
);
7086 dl
->index
= mpb
->num_disks
;
7096 dprintf("imsm: reshape update preparation :");
7097 if (i
== delta_disks
) {
7100 return update_memory_size
;
7103 dprintf(" Error\n");
7108 static void imsm_update_metadata_locally(struct supertype
*st
,
7111 struct metadata_update mu
;
7116 mu
.space_list
= NULL
;
7118 imsm_prepare_update(st
, &mu
);
7119 imsm_process_update(st
, &mu
);
7121 while (mu
.space_list
) {
7122 void **space
= mu
.space_list
;
7123 mu
.space_list
= *space
;
7128 /***************************************************************************
7129 * Function: imsm_analyze_change
7130 * Description: Function analyze change for single volume
7131 * and validate if transition is supported
7132 * Parameters: Geometry parameters, supertype structure
7133 * Returns: Operation type code on success, -1 if fail
7134 ****************************************************************************/
7135 enum imsm_reshape_type
imsm_analyze_change(struct supertype
*st
,
7136 struct geo_params
*geo
)
7143 getinfo_super_imsm_volume(st
, &info
, NULL
);
7145 if ((geo
->level
!= info
.array
.level
) &&
7146 (geo
->level
>= 0) &&
7147 (geo
->level
!= UnSet
)) {
7148 switch (info
.array
.level
) {
7150 if (geo
->level
== 5) {
7151 change
= CH_MIGRATION
;
7154 if (geo
->level
== 10) {
7155 change
= CH_TAKEOVER
;
7160 if (geo
->level
== 0) {
7161 change
= CH_TAKEOVER
;
7166 if (geo
->level
== 0)
7167 change
= CH_MIGRATION
;
7170 if (geo
->level
== 0) {
7171 change
= CH_TAKEOVER
;
7178 Name
" Error. Level Migration from %d to %d "
7180 info
.array
.level
, geo
->level
);
7181 goto analyse_change_exit
;
7184 geo
->level
= info
.array
.level
;
7186 if ((geo
->layout
!= info
.array
.layout
)
7187 && ((geo
->layout
!= UnSet
) && (geo
->layout
!= -1))) {
7188 change
= CH_MIGRATION
;
7189 if ((info
.array
.layout
== 0)
7190 && (info
.array
.level
== 5)
7191 && (geo
->layout
== 5)) {
7192 /* reshape 5 -> 4 */
7193 } else if ((info
.array
.layout
== 5)
7194 && (info
.array
.level
== 5)
7195 && (geo
->layout
== 0)) {
7196 /* reshape 4 -> 5 */
7201 Name
" Error. Layout Migration from %d to %d "
7203 info
.array
.layout
, geo
->layout
);
7205 goto analyse_change_exit
;
7208 geo
->layout
= info
.array
.layout
;
7210 if ((geo
->chunksize
> 0) && (geo
->chunksize
!= UnSet
)
7211 && (geo
->chunksize
!= info
.array
.chunk_size
))
7212 change
= CH_MIGRATION
;
7214 geo
->chunksize
= info
.array
.chunk_size
;
7216 chunk
= geo
->chunksize
/ 1024;
7217 if (!validate_geometry_imsm(st
,
7227 struct intel_super
*super
= st
->sb
;
7228 struct imsm_super
*mpb
= super
->anchor
;
7230 if (mpb
->num_raid_devs
> 1) {
7232 Name
" Error. Cannot perform operation on %s"
7233 "- for this operation it MUST be single "
7234 "array in container\n",
7240 analyse_change_exit
:
7245 int imsm_takeover(struct supertype
*st
, struct geo_params
*geo
)
7247 struct intel_super
*super
= st
->sb
;
7248 struct imsm_update_takeover
*u
;
7250 u
= malloc(sizeof(struct imsm_update_takeover
));
7254 u
->type
= update_takeover
;
7255 u
->subarray
= super
->current_vol
;
7257 /* 10->0 transition */
7258 if (geo
->level
== 0)
7259 u
->direction
= R10_TO_R0
;
7261 /* 0->10 transition */
7262 if (geo
->level
== 10)
7263 u
->direction
= R0_TO_R10
;
7265 /* update metadata locally */
7266 imsm_update_metadata_locally(st
, u
,
7267 sizeof(struct imsm_update_takeover
));
7268 /* and possibly remotely */
7269 if (st
->update_tail
)
7270 append_metadata_update(st
, u
,
7271 sizeof(struct imsm_update_takeover
));
7278 static int warn_user_about_risk(void)
7283 "\nThis is an experimental feature. Data on the RAID volume(s) "
7284 "can be lost!!!\n\n"
7285 "To continue command execution please make sure that\n"
7286 "the grow process will not be interrupted. Use safe power\n"
7287 "supply to avoid unexpected system reboot. Make sure that\n"
7288 "reshaped container is not assembled automatically during\n"
7290 "If reshape is interrupted, assemble array manually\n"
7291 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
7292 "Assembly in scan mode is not possible in such case.\n"
7293 "Growing container with boot array is not possible.\n"
7294 "If boot array reshape is interrupted, whole file system\n"
7295 "can be lost.\n\n");
7296 rv
= ask("Do you want to continue? ");
7297 fprintf(stderr
, "\n");
7302 static int imsm_reshape_super(struct supertype
*st
, long long size
, int level
,
7303 int layout
, int chunksize
, int raid_disks
,
7304 int delta_disks
, char *backup
, char *dev
,
7308 struct geo_params geo
;
7310 dprintf("imsm: reshape_super called.\n");
7312 memset(&geo
, 0, sizeof(struct geo_params
));
7315 geo
.dev_id
= st
->devnum
;
7318 geo
.layout
= layout
;
7319 geo
.chunksize
= chunksize
;
7320 geo
.raid_disks
= raid_disks
;
7321 if (delta_disks
!= UnSet
)
7322 geo
.raid_disks
+= delta_disks
;
7324 dprintf("\tfor level : %i\n", geo
.level
);
7325 dprintf("\tfor raid_disks : %i\n", geo
.raid_disks
);
7327 if (experimental() == 0)
7330 if (st
->container_dev
== st
->devnum
) {
7331 /* On container level we can only increase number of devices. */
7332 dprintf("imsm: info: Container operation\n");
7333 int old_raid_disks
= 0;
7335 /* this warning will be removed when imsm checkpointing
7336 * will be implemented, and restoring from check-point
7337 * operation will be transparent for reboot process
7339 if (warn_user_about_risk() == 0)
7342 if (imsm_reshape_is_allowed_on_container(
7343 st
, &geo
, &old_raid_disks
)) {
7344 struct imsm_update_reshape
*u
= NULL
;
7347 len
= imsm_create_metadata_update_for_reshape(
7348 st
, &geo
, old_raid_disks
, &u
);
7351 dprintf("imsm: Cannot prepare update\n");
7352 goto exit_imsm_reshape_super
;
7356 /* update metadata locally */
7357 imsm_update_metadata_locally(st
, u
, len
);
7358 /* and possibly remotely */
7359 if (st
->update_tail
)
7360 append_metadata_update(st
, u
, len
);
7365 fprintf(stderr
, Name
": (imsm) Operation "
7366 "is not allowed on this container\n");
7369 /* On volume level we support following operations
7370 * - takeover: raid10 -> raid0; raid0 -> raid10
7371 * - chunk size migration
7372 * - migration: raid5 -> raid0; raid0 -> raid5
7374 struct intel_super
*super
= st
->sb
;
7375 struct intel_dev
*dev
= super
->devlist
;
7377 dprintf("imsm: info: Volume operation\n");
7378 /* find requested device */
7380 imsm_find_array_minor_by_subdev(dev
->index
, st
->container_dev
, &devnum
);
7381 if (devnum
== geo
.dev_id
)
7386 fprintf(stderr
, Name
" Cannot find %s (%i) subarray\n",
7387 geo
.dev_name
, geo
.dev_id
);
7388 goto exit_imsm_reshape_super
;
7390 super
->current_vol
= dev
->index
;
7391 change
= imsm_analyze_change(st
, &geo
);
7394 ret_val
= imsm_takeover(st
, &geo
);
7404 exit_imsm_reshape_super
:
7405 dprintf("imsm: reshape_super Exit code = %i\n", ret_val
);
7409 static int imsm_manage_reshape(
7410 int afd
, struct mdinfo
*sra
, struct reshape
*reshape
,
7411 struct supertype
*st
, unsigned long stripes
,
7412 int *fds
, unsigned long long *offsets
,
7413 int dests
, int *destfd
, unsigned long long *destoffsets
)
7415 /* Just use child_monitor for now */
7416 return child_monitor(
7417 afd
, sra
, reshape
, st
, stripes
,
7418 fds
, offsets
, dests
, destfd
, destoffsets
);
7420 #endif /* MDASSEMBLE */
7422 struct superswitch super_imsm
= {
7424 .examine_super
= examine_super_imsm
,
7425 .brief_examine_super
= brief_examine_super_imsm
,
7426 .brief_examine_subarrays
= brief_examine_subarrays_imsm
,
7427 .export_examine_super
= export_examine_super_imsm
,
7428 .detail_super
= detail_super_imsm
,
7429 .brief_detail_super
= brief_detail_super_imsm
,
7430 .write_init_super
= write_init_super_imsm
,
7431 .validate_geometry
= validate_geometry_imsm
,
7432 .add_to_super
= add_to_super_imsm
,
7433 .remove_from_super
= remove_from_super_imsm
,
7434 .detail_platform
= detail_platform_imsm
,
7435 .kill_subarray
= kill_subarray_imsm
,
7436 .update_subarray
= update_subarray_imsm
,
7437 .load_container
= load_container_imsm
,
7438 .default_geometry
= default_geometry_imsm
,
7439 .get_disk_controller_domain
= imsm_get_disk_controller_domain
,
7440 .reshape_super
= imsm_reshape_super
,
7441 .manage_reshape
= imsm_manage_reshape
,
7443 .match_home
= match_home_imsm
,
7444 .uuid_from_super
= uuid_from_super_imsm
,
7445 .getinfo_super
= getinfo_super_imsm
,
7446 .getinfo_super_disks
= getinfo_super_disks_imsm
,
7447 .update_super
= update_super_imsm
,
7449 .avail_size
= avail_size_imsm
,
7450 .min_acceptable_spare_size
= min_acceptable_spare_size_imsm
,
7452 .compare_super
= compare_super_imsm
,
7454 .load_super
= load_super_imsm
,
7455 .init_super
= init_super_imsm
,
7456 .store_super
= store_super_imsm
,
7457 .free_super
= free_super_imsm
,
7458 .match_metadata_desc
= match_metadata_desc_imsm
,
7459 .container_content
= container_content_imsm
,
7466 .open_new
= imsm_open_new
,
7467 .set_array_state
= imsm_set_array_state
,
7468 .set_disk
= imsm_set_disk
,
7469 .sync_metadata
= imsm_sync_metadata
,
7470 .activate_spare
= imsm_activate_spare
,
7471 .process_update
= imsm_process_update
,
7472 .prepare_update
= imsm_prepare_update
,
7473 #endif /* MDASSEMBLE */