2 * mdadm - Intel(R) Matrix Storage Manager Support
4 * Copyright (C) 2002-2008 Intel Corporation
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 #define HAVE_STDINT_H 1
24 #include "platform-intel.h"
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
54 #define MPB_SECTOR_CNT 418
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
61 __u8 serial
[MAX_RAID_SERIAL_LEN
];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks
; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id
; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status
; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num
; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler
[IMSM_DISK_FILLERS
]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
73 /* RAID map configuration infos. */
75 __u32 pba_of_lba0
; /* start address of partition */
76 __u32 blocks_per_member
;/* blocks per member */
77 __u32 num_data_stripes
; /* number of data stripes */
78 __u16 blocks_per_strip
;
79 __u8 map_state
; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members
; /* number of member disks */
89 __u8 num_domains
; /* number of parity domains */
90 __u8 failed_disk_num
; /* valid only when state is degraded */
92 __u32 filler
[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl
[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
97 } __attribute__ ((packed
));
100 __u32 curr_migr_unit
;
101 __u32 checkpoint_id
; /* id to access curr_migr_unit */
102 __u8 migr_state
; /* Normal or Migrating */
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type
; /* Initializing, Rebuilding, ... */
111 __u8 fs_state
; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors
; /* number of mismatches */
113 __u16 bad_blocks
; /* number of bad blocks during verify */
115 struct imsm_map map
[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed
));
120 __u8 volume
[MAX_RAID_SERIAL_LEN
];
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status
; /* Persistent RaidDev status */
137 __u32 reserved_blocks
; /* Reserved blocks at beginning of volume */
141 __u8 cng_master_disk
;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler
[IMSM_DEV_FILLERS
];
148 } __attribute__ ((packed
));
151 __u8 sig
[MAX_SIGNATURE_LENGTH
]; /* 0x00 - 0x1F */
152 __u32 check_sum
; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size
; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num
; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num
; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size
; /* 0x30 - 0x33 in bytes */
157 __u32 attributes
; /* 0x34 - 0x37 */
158 __u8 num_disks
; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs
; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos
; /* 0x3A */
161 __u8 fill
[1]; /* 0x3B */
162 __u32 cache_size
; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num
; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count
; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size
; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler
[IMSM_FILLERS
]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk
[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed
));
173 #define BBM_LOG_MAX_ENTRIES 254
175 struct bbm_log_entry
{
176 __u64 defective_block_start
;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset
;
179 __u16 remapped_marked_count
;
181 } __attribute__ ((__packed__
));
184 __u32 signature
; /* 0xABADB10C */
186 __u32 reserved_spare_block_count
; /* 0 */
187 __u32 reserved
; /* 0xFFFF */
188 __u64 first_spare_lba
;
189 struct bbm_log_entry mapped_block_entries
[BBM_LOG_MAX_ENTRIES
];
190 } __attribute__ ((__packed__
));
194 static char *map_state_str
[] = { "normal", "uninitialized", "degraded", "failed" };
197 static __u8
migr_type(struct imsm_dev
*dev
)
199 if (dev
->vol
.migr_type
== MIGR_VERIFY
&&
200 dev
->status
& DEV_VERIFY_AND_FIX
)
203 return dev
->vol
.migr_type
;
206 static void set_migr_type(struct imsm_dev
*dev
, __u8 migr_type
)
208 /* for compatibility with older oroms convert MIGR_REPAIR, into
209 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
211 if (migr_type
== MIGR_REPAIR
) {
212 dev
->vol
.migr_type
= MIGR_VERIFY
;
213 dev
->status
|= DEV_VERIFY_AND_FIX
;
215 dev
->vol
.migr_type
= migr_type
;
216 dev
->status
&= ~DEV_VERIFY_AND_FIX
;
220 static unsigned int sector_count(__u32 bytes
)
222 return ((bytes
+ (512-1)) & (~(512-1))) / 512;
225 static unsigned int mpb_sectors(struct imsm_super
*mpb
)
227 return sector_count(__le32_to_cpu(mpb
->mpb_size
));
231 struct imsm_dev
*dev
;
232 struct intel_dev
*next
;
237 enum sys_dev_type type
;
240 struct intel_hba
*next
;
247 /* internal representation of IMSM metadata */
250 void *buf
; /* O_DIRECT buffer for reading/writing metadata */
251 struct imsm_super
*anchor
; /* immovable parameters */
253 size_t len
; /* size of the 'buf' allocation */
254 void *next_buf
; /* for realloc'ing buf from the manager */
256 int updates_pending
; /* count of pending updates for mdmon */
257 int current_vol
; /* index of raid device undergoing creation */
258 __u32 create_offset
; /* common start for 'current_vol' */
259 __u32 random
; /* random data for seeding new family numbers */
260 struct intel_dev
*devlist
;
264 __u8 serial
[MAX_RAID_SERIAL_LEN
];
267 struct imsm_disk disk
;
270 struct extent
*e
; /* for determining freespace @ create */
271 int raiddisk
; /* slot to fill in autolayout */
274 struct dl
*disk_mgmt_list
; /* list of disks to add/remove while mdmon
276 struct dl
*missing
; /* disks removed while we weren't looking */
277 struct bbm_log
*bbm_log
;
278 struct intel_hba
*hba
; /* device path of the raid controller for this metadata */
279 const struct imsm_orom
*orom
; /* platform firmware support */
280 struct intel_super
*next
; /* (temp) list for disambiguating family_num */
284 struct imsm_disk disk
;
285 #define IMSM_UNKNOWN_OWNER (-1)
287 struct intel_disk
*next
;
291 unsigned long long start
, size
;
294 /* definitions of reshape process types */
295 enum imsm_reshape_type
{
300 /* definition of messages passed to imsm_process_update */
301 enum imsm_update_type
{
302 update_activate_spare
,
306 update_add_remove_disk
,
307 update_reshape_container_disks
,
311 struct imsm_update_activate_spare
{
312 enum imsm_update_type type
;
316 struct imsm_update_activate_spare
*next
;
329 enum takeover_direction
{
333 struct imsm_update_takeover
{
334 enum imsm_update_type type
;
336 enum takeover_direction direction
;
339 struct imsm_update_reshape
{
340 enum imsm_update_type type
;
343 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
347 __u8 serial
[MAX_RAID_SERIAL_LEN
];
350 struct imsm_update_create_array
{
351 enum imsm_update_type type
;
356 struct imsm_update_kill_array
{
357 enum imsm_update_type type
;
361 struct imsm_update_rename_array
{
362 enum imsm_update_type type
;
363 __u8 name
[MAX_RAID_SERIAL_LEN
];
367 struct imsm_update_add_remove_disk
{
368 enum imsm_update_type type
;
372 static const char *_sys_dev_type
[] = {
373 [SYS_DEV_UNKNOWN
] = "Unknown",
374 [SYS_DEV_SAS
] = "SAS",
375 [SYS_DEV_SATA
] = "SATA"
378 const char *get_sys_dev_type(enum sys_dev_type type
)
380 if (type
>= SYS_DEV_MAX
)
381 type
= SYS_DEV_UNKNOWN
;
383 return _sys_dev_type
[type
];
386 static struct intel_hba
* alloc_intel_hba(struct sys_dev
*device
)
388 struct intel_hba
*result
= malloc(sizeof(*result
));
390 result
->type
= device
->type
;
391 result
->path
= strdup(device
->path
);
393 if (result
->path
&& (result
->pci_id
= strrchr(result
->path
, '/')) != NULL
)
399 static struct intel_hba
* find_intel_hba(struct intel_hba
*hba
, struct sys_dev
*device
)
401 struct intel_hba
*result
=NULL
;
402 for (result
= hba
; result
; result
= result
->next
) {
403 if (result
->type
== device
->type
&& strcmp(result
->path
, device
->path
) == 0)
409 static int attach_hba_to_super(struct intel_super
*super
, struct sys_dev
*device
)
411 struct intel_hba
*hba
;
413 /* check if disk attached to Intel HBA */
414 hba
= find_intel_hba(super
->hba
, device
);
417 /* Check if HBA is already attached to super */
418 if (super
->hba
== NULL
) {
419 super
->hba
= alloc_intel_hba(device
);
424 /* Intel metadata allows for all disks attached to the same type HBA.
425 * Do not sypport odf HBA types mixing
427 if (device
->type
!= hba
->type
)
433 hba
->next
= alloc_intel_hba(device
);
437 static struct sys_dev
* find_disk_attached_hba(int fd
, const char *devname
)
439 struct sys_dev
*list
, *elem
, *prev
;
442 if ((list
= find_intel_devices()) == NULL
)
446 disk_path
= (char *) devname
;
448 disk_path
= diskfd_to_devpath(fd
);
455 for (prev
= NULL
, elem
= list
; elem
; prev
= elem
, elem
= elem
->next
) {
456 if (path_attached_to_hba(disk_path
, elem
->path
)) {
460 prev
->next
= elem
->next
;
462 if (disk_path
!= devname
)
468 if (disk_path
!= devname
)
476 static int find_intel_hba_capability(int fd
, struct intel_super
*super
,
479 static struct supertype
*match_metadata_desc_imsm(char *arg
)
481 struct supertype
*st
;
483 if (strcmp(arg
, "imsm") != 0 &&
484 strcmp(arg
, "default") != 0
488 st
= malloc(sizeof(*st
));
491 memset(st
, 0, sizeof(*st
));
492 st
->container_dev
= NoMdDev
;
493 st
->ss
= &super_imsm
;
494 st
->max_devs
= IMSM_MAX_DEVICES
;
495 st
->minor_version
= 0;
501 static __u8
*get_imsm_version(struct imsm_super
*mpb
)
503 return &mpb
->sig
[MPB_SIG_LEN
];
507 /* retrieve a disk directly from the anchor when the anchor is known to be
508 * up-to-date, currently only at load time
510 static struct imsm_disk
*__get_imsm_disk(struct imsm_super
*mpb
, __u8 index
)
512 if (index
>= mpb
->num_disks
)
514 return &mpb
->disk
[index
];
517 /* retrieve the disk description based on a index of the disk
520 static struct dl
*get_imsm_dl_disk(struct intel_super
*super
, __u8 index
)
524 for (d
= super
->disks
; d
; d
= d
->next
)
525 if (d
->index
== index
)
530 /* retrieve a disk from the parsed metadata */
531 static struct imsm_disk
*get_imsm_disk(struct intel_super
*super
, __u8 index
)
535 dl
= get_imsm_dl_disk(super
, index
);
542 /* generate a checksum directly from the anchor when the anchor is known to be
543 * up-to-date, currently only at load or write_super after coalescing
545 static __u32
__gen_imsm_checksum(struct imsm_super
*mpb
)
547 __u32 end
= mpb
->mpb_size
/ sizeof(end
);
548 __u32
*p
= (__u32
*) mpb
;
552 sum
+= __le32_to_cpu(*p
);
556 return sum
- __le32_to_cpu(mpb
->check_sum
);
559 static size_t sizeof_imsm_map(struct imsm_map
*map
)
561 return sizeof(struct imsm_map
) + sizeof(__u32
) * (map
->num_members
- 1);
564 struct imsm_map
*get_imsm_map(struct imsm_dev
*dev
, int second_map
)
566 /* A device can have 2 maps if it is in the middle of a migration.
568 * 0 - we return the first map
569 * 1 - we return the second map if it exists, else NULL
570 * -1 - we return the second map if it exists, else the first
572 struct imsm_map
*map
= &dev
->vol
.map
[0];
574 if (second_map
== 1 && !dev
->vol
.migr_state
)
576 else if (second_map
== 1 ||
577 (second_map
< 0 && dev
->vol
.migr_state
)) {
580 return ptr
+ sizeof_imsm_map(map
);
586 /* return the size of the device.
587 * migr_state increases the returned size if map[0] were to be duplicated
589 static size_t sizeof_imsm_dev(struct imsm_dev
*dev
, int migr_state
)
591 size_t size
= sizeof(*dev
) - sizeof(struct imsm_map
) +
592 sizeof_imsm_map(get_imsm_map(dev
, 0));
594 /* migrating means an additional map */
595 if (dev
->vol
.migr_state
)
596 size
+= sizeof_imsm_map(get_imsm_map(dev
, 1));
598 size
+= sizeof_imsm_map(get_imsm_map(dev
, 0));
604 /* retrieve disk serial number list from a metadata update */
605 static struct disk_info
*get_disk_info(struct imsm_update_create_array
*update
)
608 struct disk_info
*inf
;
610 inf
= u
+ sizeof(*update
) - sizeof(struct imsm_dev
) +
611 sizeof_imsm_dev(&update
->dev
, 0);
617 static struct imsm_dev
*__get_imsm_dev(struct imsm_super
*mpb
, __u8 index
)
623 if (index
>= mpb
->num_raid_devs
)
626 /* devices start after all disks */
627 offset
= ((void *) &mpb
->disk
[mpb
->num_disks
]) - _mpb
;
629 for (i
= 0; i
<= index
; i
++)
631 return _mpb
+ offset
;
633 offset
+= sizeof_imsm_dev(_mpb
+ offset
, 0);
638 static struct imsm_dev
*get_imsm_dev(struct intel_super
*super
, __u8 index
)
640 struct intel_dev
*dv
;
642 if (index
>= super
->anchor
->num_raid_devs
)
644 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
645 if (dv
->index
== index
)
653 * == 1 get second map
654 * == -1 than get map according to the current migr_state
656 static __u32
get_imsm_ord_tbl_ent(struct imsm_dev
*dev
,
660 struct imsm_map
*map
;
662 map
= get_imsm_map(dev
, second_map
);
664 /* top byte identifies disk under rebuild */
665 return __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
668 #define ord_to_idx(ord) (((ord) << 8) >> 8)
669 static __u32
get_imsm_disk_idx(struct imsm_dev
*dev
, int slot
, int second_map
)
671 __u32 ord
= get_imsm_ord_tbl_ent(dev
, slot
, second_map
);
673 return ord_to_idx(ord
);
676 static void set_imsm_ord_tbl_ent(struct imsm_map
*map
, int slot
, __u32 ord
)
678 map
->disk_ord_tbl
[slot
] = __cpu_to_le32(ord
);
681 static int get_imsm_disk_slot(struct imsm_map
*map
, unsigned idx
)
686 for (slot
= 0; slot
< map
->num_members
; slot
++) {
687 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
688 if (ord_to_idx(ord
) == idx
)
695 static int get_imsm_raid_level(struct imsm_map
*map
)
697 if (map
->raid_level
== 1) {
698 if (map
->num_members
== 2)
704 return map
->raid_level
;
707 static int cmp_extent(const void *av
, const void *bv
)
709 const struct extent
*a
= av
;
710 const struct extent
*b
= bv
;
711 if (a
->start
< b
->start
)
713 if (a
->start
> b
->start
)
718 static int count_memberships(struct dl
*dl
, struct intel_super
*super
)
723 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
724 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
725 struct imsm_map
*map
= get_imsm_map(dev
, 0);
727 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
734 static struct extent
*get_extents(struct intel_super
*super
, struct dl
*dl
)
736 /* find a list of used extents on the given physical device */
737 struct extent
*rv
, *e
;
739 int memberships
= count_memberships(dl
, super
);
740 __u32 reservation
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
742 rv
= malloc(sizeof(struct extent
) * (memberships
+ 1));
747 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
748 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
749 struct imsm_map
*map
= get_imsm_map(dev
, 0);
751 if (get_imsm_disk_slot(map
, dl
->index
) >= 0) {
752 e
->start
= __le32_to_cpu(map
->pba_of_lba0
);
753 e
->size
= __le32_to_cpu(map
->blocks_per_member
);
757 qsort(rv
, memberships
, sizeof(*rv
), cmp_extent
);
759 /* determine the start of the metadata
760 * when no raid devices are defined use the default
761 * ...otherwise allow the metadata to truncate the value
762 * as is the case with older versions of imsm
765 struct extent
*last
= &rv
[memberships
- 1];
768 remainder
= __le32_to_cpu(dl
->disk
.total_blocks
) -
769 (last
->start
+ last
->size
);
770 /* round down to 1k block to satisfy precision of the kernel
774 /* make sure remainder is still sane */
775 if (remainder
< (unsigned)ROUND_UP(super
->len
, 512) >> 9)
776 remainder
= ROUND_UP(super
->len
, 512) >> 9;
777 if (reservation
> remainder
)
778 reservation
= remainder
;
780 e
->start
= __le32_to_cpu(dl
->disk
.total_blocks
) - reservation
;
785 /* try to determine how much space is reserved for metadata from
786 * the last get_extents() entry, otherwise fallback to the
789 static __u32
imsm_reserved_sectors(struct intel_super
*super
, struct dl
*dl
)
795 /* for spares just return a minimal reservation which will grow
796 * once the spare is picked up by an array
799 return MPB_SECTOR_CNT
;
801 e
= get_extents(super
, dl
);
803 return MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
805 /* scroll to last entry */
806 for (i
= 0; e
[i
].size
; i
++)
809 rv
= __le32_to_cpu(dl
->disk
.total_blocks
) - e
[i
].start
;
816 static int is_spare(struct imsm_disk
*disk
)
818 return (disk
->status
& SPARE_DISK
) == SPARE_DISK
;
821 static int is_configured(struct imsm_disk
*disk
)
823 return (disk
->status
& CONFIGURED_DISK
) == CONFIGURED_DISK
;
826 static int is_failed(struct imsm_disk
*disk
)
828 return (disk
->status
& FAILED_DISK
) == FAILED_DISK
;
831 /* Return minimum size of a spare that can be used in this array*/
832 static unsigned long long min_acceptable_spare_size_imsm(struct supertype
*st
)
834 struct intel_super
*super
= st
->sb
;
838 unsigned long long rv
= 0;
842 /* find first active disk in array */
844 while (dl
&& (is_failed(&dl
->disk
) || dl
->index
== -1))
848 /* find last lba used by subarrays */
849 e
= get_extents(super
, dl
);
852 for (i
= 0; e
[i
].size
; i
++)
855 rv
= e
[i
-1].start
+ e
[i
-1].size
;
857 /* add the amount of space needed for metadata */
858 rv
= rv
+ MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
863 static __u64
blocks_per_migr_unit(struct imsm_dev
*dev
);
865 static void print_imsm_dev(struct imsm_dev
*dev
, char *uuid
, int disk_idx
)
869 struct imsm_map
*map
= get_imsm_map(dev
, 0);
870 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
874 printf("[%.16s]:\n", dev
->volume
);
875 printf(" UUID : %s\n", uuid
);
876 printf(" RAID Level : %d", get_imsm_raid_level(map
));
878 printf(" <-- %d", get_imsm_raid_level(map2
));
880 printf(" Members : %d", map
->num_members
);
882 printf(" <-- %d", map2
->num_members
);
884 printf(" Slots : [");
885 for (i
= 0; i
< map
->num_members
; i
++) {
886 ord
= get_imsm_ord_tbl_ent(dev
, i
, 0);
887 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
892 for (i
= 0; i
< map2
->num_members
; i
++) {
893 ord
= get_imsm_ord_tbl_ent(dev
, i
, 1);
894 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
899 printf(" Failed disk : ");
900 if (map
->failed_disk_num
== 0xff)
903 printf("%i", map
->failed_disk_num
);
905 slot
= get_imsm_disk_slot(map
, disk_idx
);
907 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
908 printf(" This Slot : %d%s\n", slot
,
909 ord
& IMSM_ORD_REBUILD
? " (out-of-sync)" : "");
911 printf(" This Slot : ?\n");
912 sz
= __le32_to_cpu(dev
->size_high
);
914 sz
+= __le32_to_cpu(dev
->size_low
);
915 printf(" Array Size : %llu%s\n", (unsigned long long)sz
,
916 human_size(sz
* 512));
917 sz
= __le32_to_cpu(map
->blocks_per_member
);
918 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz
,
919 human_size(sz
* 512));
920 printf(" Sector Offset : %u\n",
921 __le32_to_cpu(map
->pba_of_lba0
));
922 printf(" Num Stripes : %u\n",
923 __le32_to_cpu(map
->num_data_stripes
));
924 printf(" Chunk Size : %u KiB",
925 __le16_to_cpu(map
->blocks_per_strip
) / 2);
927 printf(" <-- %u KiB",
928 __le16_to_cpu(map2
->blocks_per_strip
) / 2);
930 printf(" Reserved : %d\n", __le32_to_cpu(dev
->reserved_blocks
));
931 printf(" Migrate State : ");
932 if (dev
->vol
.migr_state
) {
933 if (migr_type(dev
) == MIGR_INIT
)
934 printf("initialize\n");
935 else if (migr_type(dev
) == MIGR_REBUILD
)
937 else if (migr_type(dev
) == MIGR_VERIFY
)
939 else if (migr_type(dev
) == MIGR_GEN_MIGR
)
940 printf("general migration\n");
941 else if (migr_type(dev
) == MIGR_STATE_CHANGE
)
942 printf("state change\n");
943 else if (migr_type(dev
) == MIGR_REPAIR
)
946 printf("<unknown:%d>\n", migr_type(dev
));
949 printf(" Map State : %s", map_state_str
[map
->map_state
]);
950 if (dev
->vol
.migr_state
) {
951 struct imsm_map
*map
= get_imsm_map(dev
, 1);
953 printf(" <-- %s", map_state_str
[map
->map_state
]);
954 printf("\n Checkpoint : %u (%llu)",
955 __le32_to_cpu(dev
->vol
.curr_migr_unit
),
956 (unsigned long long)blocks_per_migr_unit(dev
));
959 printf(" Dirty State : %s\n", dev
->vol
.dirty
? "dirty" : "clean");
962 static void print_imsm_disk(struct imsm_super
*mpb
, int index
, __u32 reserved
)
964 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, index
);
965 char str
[MAX_RAID_SERIAL_LEN
+ 1];
968 if (index
< 0 || !disk
)
972 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
973 printf(" Disk%02d Serial : %s\n", index
, str
);
974 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
975 is_configured(disk
) ? " active" : "",
976 is_failed(disk
) ? " failed" : "");
977 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
978 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
979 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
980 human_size(sz
* 512));
983 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
);
985 static void examine_super_imsm(struct supertype
*st
, char *homehost
)
987 struct intel_super
*super
= st
->sb
;
988 struct imsm_super
*mpb
= super
->anchor
;
989 char str
[MAX_SIGNATURE_LENGTH
];
994 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
997 snprintf(str
, MPB_SIG_LEN
, "%s", mpb
->sig
);
998 printf(" Magic : %s\n", str
);
999 snprintf(str
, strlen(MPB_VERSION_RAID0
), "%s", get_imsm_version(mpb
));
1000 printf(" Version : %s\n", get_imsm_version(mpb
));
1001 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb
->orig_family_num
));
1002 printf(" Family : %08x\n", __le32_to_cpu(mpb
->family_num
));
1003 printf(" Generation : %08x\n", __le32_to_cpu(mpb
->generation_num
));
1004 getinfo_super_imsm(st
, &info
, NULL
);
1005 fname_from_uuid(st
, &info
, nbuf
, ':');
1006 printf(" UUID : %s\n", nbuf
+ 5);
1007 sum
= __le32_to_cpu(mpb
->check_sum
);
1008 printf(" Checksum : %08x %s\n", sum
,
1009 __gen_imsm_checksum(mpb
) == sum
? "correct" : "incorrect");
1010 printf(" MPB Sectors : %d\n", mpb_sectors(mpb
));
1011 printf(" Disks : %d\n", mpb
->num_disks
);
1012 printf(" RAID Devices : %d\n", mpb
->num_raid_devs
);
1013 print_imsm_disk(mpb
, super
->disks
->index
, reserved
);
1014 if (super
->bbm_log
) {
1015 struct bbm_log
*log
= super
->bbm_log
;
1018 printf("Bad Block Management Log:\n");
1019 printf(" Log Size : %d\n", __le32_to_cpu(mpb
->bbm_log_size
));
1020 printf(" Signature : %x\n", __le32_to_cpu(log
->signature
));
1021 printf(" Entry Count : %d\n", __le32_to_cpu(log
->entry_count
));
1022 printf(" Spare Blocks : %d\n", __le32_to_cpu(log
->reserved_spare_block_count
));
1023 printf(" First Spare : %llx\n",
1024 (unsigned long long) __le64_to_cpu(log
->first_spare_lba
));
1026 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1028 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
1030 super
->current_vol
= i
;
1031 getinfo_super_imsm(st
, &info
, NULL
);
1032 fname_from_uuid(st
, &info
, nbuf
, ':');
1033 print_imsm_dev(dev
, nbuf
+ 5, super
->disks
->index
);
1035 for (i
= 0; i
< mpb
->num_disks
; i
++) {
1036 if (i
== super
->disks
->index
)
1038 print_imsm_disk(mpb
, i
, reserved
);
1040 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1041 struct imsm_disk
*disk
;
1042 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1050 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1051 printf(" Disk Serial : %s\n", str
);
1052 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1053 is_configured(disk
) ? " active" : "",
1054 is_failed(disk
) ? " failed" : "");
1055 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1056 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1057 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1058 human_size(sz
* 512));
1062 static void brief_examine_super_imsm(struct supertype
*st
, int verbose
)
1064 /* We just write a generic IMSM ARRAY entry */
1067 struct intel_super
*super
= st
->sb
;
1069 if (!super
->anchor
->num_raid_devs
) {
1070 printf("ARRAY metadata=imsm\n");
1074 getinfo_super_imsm(st
, &info
, NULL
);
1075 fname_from_uuid(st
, &info
, nbuf
, ':');
1076 printf("ARRAY metadata=imsm UUID=%s\n", nbuf
+ 5);
1079 static void brief_examine_subarrays_imsm(struct supertype
*st
, int verbose
)
1081 /* We just write a generic IMSM ARRAY entry */
1085 struct intel_super
*super
= st
->sb
;
1088 if (!super
->anchor
->num_raid_devs
)
1091 getinfo_super_imsm(st
, &info
, NULL
);
1092 fname_from_uuid(st
, &info
, nbuf
, ':');
1093 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
1094 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
1096 super
->current_vol
= i
;
1097 getinfo_super_imsm(st
, &info
, NULL
);
1098 fname_from_uuid(st
, &info
, nbuf1
, ':');
1099 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1100 dev
->volume
, nbuf
+ 5, i
, nbuf1
+ 5);
1104 static void export_examine_super_imsm(struct supertype
*st
)
1106 struct intel_super
*super
= st
->sb
;
1107 struct imsm_super
*mpb
= super
->anchor
;
1111 getinfo_super_imsm(st
, &info
, NULL
);
1112 fname_from_uuid(st
, &info
, nbuf
, ':');
1113 printf("MD_METADATA=imsm\n");
1114 printf("MD_LEVEL=container\n");
1115 printf("MD_UUID=%s\n", nbuf
+5);
1116 printf("MD_DEVICES=%u\n", mpb
->num_disks
);
1119 static void detail_super_imsm(struct supertype
*st
, char *homehost
)
1124 getinfo_super_imsm(st
, &info
, NULL
);
1125 fname_from_uuid(st
, &info
, nbuf
, ':');
1126 printf("\n UUID : %s\n", nbuf
+ 5);
1129 static void brief_detail_super_imsm(struct supertype
*st
)
1133 getinfo_super_imsm(st
, &info
, NULL
);
1134 fname_from_uuid(st
, &info
, nbuf
, ':');
1135 printf(" UUID=%s", nbuf
+ 5);
1138 static int imsm_read_serial(int fd
, char *devname
, __u8
*serial
);
1139 static void fd2devname(int fd
, char *name
);
1141 static int ahci_enumerate_ports(const char *hba_path
, int port_count
, int host_base
, int verbose
)
1143 /* dump an unsorted list of devices attached to AHCI Intel storage
1144 * controller, as well as non-connected ports
1146 int hba_len
= strlen(hba_path
) + 1;
1151 unsigned long port_mask
= (1 << port_count
) - 1;
1153 if (port_count
> (int)sizeof(port_mask
) * 8) {
1155 fprintf(stderr
, Name
": port_count %d out of range\n", port_count
);
1159 /* scroll through /sys/dev/block looking for devices attached to
1162 dir
= opendir("/sys/dev/block");
1163 for (ent
= dir
? readdir(dir
) : NULL
; ent
; ent
= readdir(dir
)) {
1174 if (sscanf(ent
->d_name
, "%d:%d", &major
, &minor
) != 2)
1176 path
= devt_to_devpath(makedev(major
, minor
));
1179 if (!path_attached_to_hba(path
, hba_path
)) {
1185 /* retrieve the scsi device type */
1186 if (asprintf(&device
, "/sys/dev/block/%d:%d/device/xxxxxxx", major
, minor
) < 0) {
1188 fprintf(stderr
, Name
": failed to allocate 'device'\n");
1192 sprintf(device
, "/sys/dev/block/%d:%d/device/type", major
, minor
);
1193 if (load_sys(device
, buf
) != 0) {
1195 fprintf(stderr
, Name
": failed to read device type for %s\n",
1201 type
= strtoul(buf
, NULL
, 10);
1203 /* if it's not a disk print the vendor and model */
1204 if (!(type
== 0 || type
== 7 || type
== 14)) {
1207 sprintf(device
, "/sys/dev/block/%d:%d/device/vendor", major
, minor
);
1208 if (load_sys(device
, buf
) == 0) {
1209 strncpy(vendor
, buf
, sizeof(vendor
));
1210 vendor
[sizeof(vendor
) - 1] = '\0';
1211 c
= (char *) &vendor
[sizeof(vendor
) - 1];
1212 while (isspace(*c
) || *c
== '\0')
1216 sprintf(device
, "/sys/dev/block/%d:%d/device/model", major
, minor
);
1217 if (load_sys(device
, buf
) == 0) {
1218 strncpy(model
, buf
, sizeof(model
));
1219 model
[sizeof(model
) - 1] = '\0';
1220 c
= (char *) &model
[sizeof(model
) - 1];
1221 while (isspace(*c
) || *c
== '\0')
1225 if (vendor
[0] && model
[0])
1226 sprintf(buf
, "%.64s %.64s", vendor
, model
);
1228 switch (type
) { /* numbers from hald/linux/device.c */
1229 case 1: sprintf(buf
, "tape"); break;
1230 case 2: sprintf(buf
, "printer"); break;
1231 case 3: sprintf(buf
, "processor"); break;
1233 case 5: sprintf(buf
, "cdrom"); break;
1234 case 6: sprintf(buf
, "scanner"); break;
1235 case 8: sprintf(buf
, "media_changer"); break;
1236 case 9: sprintf(buf
, "comm"); break;
1237 case 12: sprintf(buf
, "raid"); break;
1238 default: sprintf(buf
, "unknown");
1244 /* chop device path to 'host%d' and calculate the port number */
1245 c
= strchr(&path
[hba_len
], '/');
1248 fprintf(stderr
, Name
": %s - invalid path name\n", path
+ hba_len
);
1253 if (sscanf(&path
[hba_len
], "host%d", &port
) == 1)
1257 *c
= '/'; /* repair the full string */
1258 fprintf(stderr
, Name
": failed to determine port number for %s\n",
1265 /* mark this port as used */
1266 port_mask
&= ~(1 << port
);
1268 /* print out the device information */
1270 printf(" Port%d : - non-disk device (%s) -\n", port
, buf
);
1274 fd
= dev_open(ent
->d_name
, O_RDONLY
);
1276 printf(" Port%d : - disk info unavailable -\n", port
);
1278 fd2devname(fd
, buf
);
1279 printf(" Port%d : %s", port
, buf
);
1280 if (imsm_read_serial(fd
, NULL
, (__u8
*) buf
) == 0)
1281 printf(" (%s)\n", buf
);
1296 for (i
= 0; i
< port_count
; i
++)
1297 if (port_mask
& (1 << i
))
1298 printf(" Port%d : - no device attached -\n", i
);
1306 static void print_found_intel_controllers(struct sys_dev
*elem
)
1308 for (; elem
; elem
= elem
->next
) {
1309 fprintf(stderr
, Name
": found Intel(R) ");
1310 if (elem
->type
== SYS_DEV_SATA
)
1311 fprintf(stderr
, "SATA ");
1312 else if (elem
->type
== SYS_DEV_SAS
)
1313 fprintf(stderr
, "SAS ");
1314 fprintf(stderr
, "RAID controller");
1316 fprintf(stderr
, " at %s", elem
->pci_id
);
1317 fprintf(stderr
, ".\n");
1322 static int ahci_get_port_count(const char *hba_path
, int *port_count
)
1329 if ((dir
= opendir(hba_path
)) == NULL
)
1332 for (ent
= readdir(dir
); ent
; ent
= readdir(dir
)) {
1335 if (sscanf(ent
->d_name
, "host%d", &host
) != 1)
1337 if (*port_count
== 0)
1339 else if (host
< host_base
)
1342 if (host
+ 1 > *port_count
+ host_base
)
1343 *port_count
= host
+ 1 - host_base
;
1349 static void print_imsm_capability(const struct imsm_orom
*orom
)
1351 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1352 printf(" Version : %d.%d.%d.%d\n", orom
->major_ver
, orom
->minor_ver
,
1353 orom
->hotfix_ver
, orom
->build
);
1354 printf(" RAID Levels :%s%s%s%s%s\n",
1355 imsm_orom_has_raid0(orom
) ? " raid0" : "",
1356 imsm_orom_has_raid1(orom
) ? " raid1" : "",
1357 imsm_orom_has_raid1e(orom
) ? " raid1e" : "",
1358 imsm_orom_has_raid10(orom
) ? " raid10" : "",
1359 imsm_orom_has_raid5(orom
) ? " raid5" : "");
1360 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1361 imsm_orom_has_chunk(orom
, 2) ? " 2k" : "",
1362 imsm_orom_has_chunk(orom
, 4) ? " 4k" : "",
1363 imsm_orom_has_chunk(orom
, 8) ? " 8k" : "",
1364 imsm_orom_has_chunk(orom
, 16) ? " 16k" : "",
1365 imsm_orom_has_chunk(orom
, 32) ? " 32k" : "",
1366 imsm_orom_has_chunk(orom
, 64) ? " 64k" : "",
1367 imsm_orom_has_chunk(orom
, 128) ? " 128k" : "",
1368 imsm_orom_has_chunk(orom
, 256) ? " 256k" : "",
1369 imsm_orom_has_chunk(orom
, 512) ? " 512k" : "",
1370 imsm_orom_has_chunk(orom
, 1024*1) ? " 1M" : "",
1371 imsm_orom_has_chunk(orom
, 1024*2) ? " 2M" : "",
1372 imsm_orom_has_chunk(orom
, 1024*4) ? " 4M" : "",
1373 imsm_orom_has_chunk(orom
, 1024*8) ? " 8M" : "",
1374 imsm_orom_has_chunk(orom
, 1024*16) ? " 16M" : "",
1375 imsm_orom_has_chunk(orom
, 1024*32) ? " 32M" : "",
1376 imsm_orom_has_chunk(orom
, 1024*64) ? " 64M" : "");
1377 printf(" Max Disks : %d\n", orom
->tds
);
1378 printf(" Max Volumes : %d\n", orom
->vpa
);
1382 static int detail_platform_imsm(int verbose
, int enumerate_only
)
1384 /* There are two components to imsm platform support, the ahci SATA
1385 * controller and the option-rom. To find the SATA controller we
1386 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1387 * controller with the Intel vendor id is present. This approach
1388 * allows mdadm to leverage the kernel's ahci detection logic, with the
1389 * caveat that if ahci.ko is not loaded mdadm will not be able to
1390 * detect platform raid capabilities. The option-rom resides in a
1391 * platform "Adapter ROM". We scan for its signature to retrieve the
1392 * platform capabilities. If raid support is disabled in the BIOS the
1393 * option-rom capability structure will not be available.
1395 const struct imsm_orom
*orom
;
1396 struct sys_dev
*list
, *hba
;
1401 if (enumerate_only
) {
1402 if (check_env("IMSM_NO_PLATFORM"))
1404 list
= find_intel_devices();
1407 for (hba
= list
; hba
; hba
= hba
->next
) {
1408 orom
= find_imsm_capability(hba
->type
);
1414 free_sys_dev(&list
);
1418 list
= find_intel_devices();
1421 fprintf(stderr
, Name
": no active Intel(R) RAID "
1422 "controller found.\n");
1423 free_sys_dev(&list
);
1426 print_found_intel_controllers(list
);
1428 for (hba
= list
; hba
; hba
= hba
->next
) {
1429 orom
= find_imsm_capability(hba
->type
);
1431 fprintf(stderr
, Name
": imsm capabilities not found for controller: %s (type %s)\n",
1432 hba
->path
, get_sys_dev_type(hba
->type
));
1434 print_imsm_capability(orom
);
1437 for (hba
= list
; hba
; hba
= hba
->next
) {
1438 printf(" I/O Controller : %s (%s)\n",
1439 hba
->path
, get_sys_dev_type(hba
->type
));
1441 if (hba
->type
== SYS_DEV_SATA
) {
1442 host_base
= ahci_get_port_count(hba
->path
, &port_count
);
1443 if (ahci_enumerate_ports(hba
->path
, port_count
, host_base
, verbose
)) {
1445 fprintf(stderr
, Name
": failed to enumerate "
1446 "ports on SATA controller at %s.", hba
->pci_id
);
1452 free_sys_dev(&list
);
1457 static int match_home_imsm(struct supertype
*st
, char *homehost
)
1459 /* the imsm metadata format does not specify any host
1460 * identification information. We return -1 since we can never
1461 * confirm nor deny whether a given array is "meant" for this
1462 * host. We rely on compare_super and the 'family_num' fields to
1463 * exclude member disks that do not belong, and we rely on
1464 * mdadm.conf to specify the arrays that should be assembled.
1465 * Auto-assembly may still pick up "foreign" arrays.
1471 static void uuid_from_super_imsm(struct supertype
*st
, int uuid
[4])
1473 /* The uuid returned here is used for:
1474 * uuid to put into bitmap file (Create, Grow)
1475 * uuid for backup header when saving critical section (Grow)
1476 * comparing uuids when re-adding a device into an array
1477 * In these cases the uuid required is that of the data-array,
1478 * not the device-set.
1479 * uuid to recognise same set when adding a missing device back
1480 * to an array. This is a uuid for the device-set.
1482 * For each of these we can make do with a truncated
1483 * or hashed uuid rather than the original, as long as
1485 * In each case the uuid required is that of the data-array,
1486 * not the device-set.
1488 /* imsm does not track uuid's so we synthesis one using sha1 on
1489 * - The signature (Which is constant for all imsm array, but no matter)
1490 * - the orig_family_num of the container
1491 * - the index number of the volume
1492 * - the 'serial' number of the volume.
1493 * Hopefully these are all constant.
1495 struct intel_super
*super
= st
->sb
;
1498 struct sha1_ctx ctx
;
1499 struct imsm_dev
*dev
= NULL
;
1502 /* some mdadm versions failed to set ->orig_family_num, in which
1503 * case fall back to ->family_num. orig_family_num will be
1504 * fixed up with the first metadata update.
1506 family_num
= super
->anchor
->orig_family_num
;
1507 if (family_num
== 0)
1508 family_num
= super
->anchor
->family_num
;
1509 sha1_init_ctx(&ctx
);
1510 sha1_process_bytes(super
->anchor
->sig
, MPB_SIG_LEN
, &ctx
);
1511 sha1_process_bytes(&family_num
, sizeof(__u32
), &ctx
);
1512 if (super
->current_vol
>= 0)
1513 dev
= get_imsm_dev(super
, super
->current_vol
);
1515 __u32 vol
= super
->current_vol
;
1516 sha1_process_bytes(&vol
, sizeof(vol
), &ctx
);
1517 sha1_process_bytes(dev
->volume
, MAX_RAID_SERIAL_LEN
, &ctx
);
1519 sha1_finish_ctx(&ctx
, buf
);
1520 memcpy(uuid
, buf
, 4*4);
1525 get_imsm_numerical_version(struct imsm_super
*mpb
, int *m
, int *p
)
1527 __u8
*v
= get_imsm_version(mpb
);
1528 __u8
*end
= mpb
->sig
+ MAX_SIGNATURE_LENGTH
;
1529 char major
[] = { 0, 0, 0 };
1530 char minor
[] = { 0 ,0, 0 };
1531 char patch
[] = { 0, 0, 0 };
1532 char *ver_parse
[] = { major
, minor
, patch
};
1536 while (*v
!= '\0' && v
< end
) {
1537 if (*v
!= '.' && j
< 2)
1538 ver_parse
[i
][j
++] = *v
;
1546 *m
= strtol(minor
, NULL
, 0);
1547 *p
= strtol(patch
, NULL
, 0);
1551 static __u32
migr_strip_blocks_resync(struct imsm_dev
*dev
)
1553 /* migr_strip_size when repairing or initializing parity */
1554 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1555 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1557 switch (get_imsm_raid_level(map
)) {
1562 return 128*1024 >> 9;
1566 static __u32
migr_strip_blocks_rebuild(struct imsm_dev
*dev
)
1568 /* migr_strip_size when rebuilding a degraded disk, no idea why
1569 * this is different than migr_strip_size_resync(), but it's good
1572 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1573 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1575 switch (get_imsm_raid_level(map
)) {
1578 if (map
->num_members
% map
->num_domains
== 0)
1579 return 128*1024 >> 9;
1583 return max((__u32
) 64*1024 >> 9, chunk
);
1585 return 128*1024 >> 9;
1589 static __u32
num_stripes_per_unit_resync(struct imsm_dev
*dev
)
1591 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1592 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1593 __u32 lo_chunk
= __le32_to_cpu(lo
->blocks_per_strip
);
1594 __u32 hi_chunk
= __le32_to_cpu(hi
->blocks_per_strip
);
1596 return max((__u32
) 1, hi_chunk
/ lo_chunk
);
1599 static __u32
num_stripes_per_unit_rebuild(struct imsm_dev
*dev
)
1601 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1602 int level
= get_imsm_raid_level(lo
);
1604 if (level
== 1 || level
== 10) {
1605 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1607 return hi
->num_domains
;
1609 return num_stripes_per_unit_resync(dev
);
1612 static __u8
imsm_num_data_members(struct imsm_dev
*dev
, int second_map
)
1614 /* named 'imsm_' because raid0, raid1 and raid10
1615 * counter-intuitively have the same number of data disks
1617 struct imsm_map
*map
= get_imsm_map(dev
, second_map
);
1619 switch (get_imsm_raid_level(map
)) {
1623 return map
->num_members
;
1625 return map
->num_members
- 1;
1627 dprintf("%s: unsupported raid level\n", __func__
);
1632 static __u32
parity_segment_depth(struct imsm_dev
*dev
)
1634 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1635 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1637 switch(get_imsm_raid_level(map
)) {
1640 return chunk
* map
->num_domains
;
1642 return chunk
* map
->num_members
;
1648 static __u32
map_migr_block(struct imsm_dev
*dev
, __u32 block
)
1650 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1651 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1652 __u32 strip
= block
/ chunk
;
1654 switch (get_imsm_raid_level(map
)) {
1657 __u32 vol_strip
= (strip
* map
->num_domains
) + 1;
1658 __u32 vol_stripe
= vol_strip
/ map
->num_members
;
1660 return vol_stripe
* chunk
+ block
% chunk
;
1662 __u32 stripe
= strip
/ (map
->num_members
- 1);
1664 return stripe
* chunk
+ block
% chunk
;
1671 static __u64
blocks_per_migr_unit(struct imsm_dev
*dev
)
1673 /* calculate the conversion factor between per member 'blocks'
1674 * (md/{resync,rebuild}_start) and imsm migration units, return
1675 * 0 for the 'not migrating' and 'unsupported migration' cases
1677 if (!dev
->vol
.migr_state
)
1680 switch (migr_type(dev
)) {
1685 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1686 __u32 stripes_per_unit
;
1687 __u32 blocks_per_unit
;
1696 /* yes, this is really the translation of migr_units to
1697 * per-member blocks in the 'resync' case
1699 stripes_per_unit
= num_stripes_per_unit_resync(dev
);
1700 migr_chunk
= migr_strip_blocks_resync(dev
);
1701 disks
= imsm_num_data_members(dev
, 0);
1702 blocks_per_unit
= stripes_per_unit
* migr_chunk
* disks
;
1703 stripe
= __le32_to_cpu(map
->blocks_per_strip
) * disks
;
1704 segment
= blocks_per_unit
/ stripe
;
1705 block_rel
= blocks_per_unit
- segment
* stripe
;
1706 parity_depth
= parity_segment_depth(dev
);
1707 block_map
= map_migr_block(dev
, block_rel
);
1708 return block_map
+ parity_depth
* segment
;
1710 case MIGR_REBUILD
: {
1711 __u32 stripes_per_unit
;
1714 stripes_per_unit
= num_stripes_per_unit_rebuild(dev
);
1715 migr_chunk
= migr_strip_blocks_rebuild(dev
);
1716 return migr_chunk
* stripes_per_unit
;
1718 case MIGR_STATE_CHANGE
:
1724 static int imsm_level_to_layout(int level
)
1732 return ALGORITHM_LEFT_ASYMMETRIC
;
1739 static void getinfo_super_imsm_volume(struct supertype
*st
, struct mdinfo
*info
, char *dmap
)
1741 struct intel_super
*super
= st
->sb
;
1742 struct imsm_dev
*dev
= get_imsm_dev(super
, super
->current_vol
);
1743 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1744 struct imsm_map
*prev_map
= get_imsm_map(dev
, 1);
1745 struct imsm_map
*map_to_analyse
= map
;
1748 int map_disks
= info
->array
.raid_disks
;
1751 map_to_analyse
= prev_map
;
1753 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
1754 if (dl
->raiddisk
== info
->disk
.raid_disk
)
1756 info
->container_member
= super
->current_vol
;
1757 info
->array
.raid_disks
= map_to_analyse
->num_members
;
1758 info
->array
.level
= get_imsm_raid_level(map_to_analyse
);
1759 info
->array
.layout
= imsm_level_to_layout(info
->array
.level
);
1760 info
->array
.md_minor
= -1;
1761 info
->array
.ctime
= 0;
1762 info
->array
.utime
= 0;
1763 info
->array
.chunk_size
=
1764 __le16_to_cpu(map_to_analyse
->blocks_per_strip
) << 9;
1765 info
->array
.state
= !dev
->vol
.dirty
;
1766 info
->custom_array_size
= __le32_to_cpu(dev
->size_high
);
1767 info
->custom_array_size
<<= 32;
1768 info
->custom_array_size
|= __le32_to_cpu(dev
->size_low
);
1769 if (prev_map
&& map
->map_state
== prev_map
->map_state
) {
1770 info
->reshape_active
= 1;
1771 info
->new_level
= get_imsm_raid_level(map
);
1772 info
->new_layout
= imsm_level_to_layout(info
->new_level
);
1773 info
->new_chunk
= __le16_to_cpu(map
->blocks_per_strip
) << 9;
1774 info
->delta_disks
= map
->num_members
- prev_map
->num_members
;
1775 if (info
->delta_disks
) {
1776 /* this needs to be applied to every array
1779 info
->reshape_active
= 2;
1781 /* We shape information that we give to md might have to be
1782 * modify to cope with md's requirement for reshaping arrays.
1783 * For example, when reshaping a RAID0, md requires it to be
1784 * presented as a degraded RAID4.
1785 * Also if a RAID0 is migrating to a RAID5 we need to specify
1786 * the array as already being RAID5, but the 'before' layout
1787 * is a RAID4-like layout.
1789 switch (info
->array
.level
) {
1791 switch(info
->new_level
) {
1793 /* conversion is happening as RAID4 */
1794 info
->array
.level
= 4;
1795 info
->array
.raid_disks
+= 1;
1798 /* conversion is happening as RAID5 */
1799 info
->array
.level
= 5;
1800 info
->array
.layout
= ALGORITHM_PARITY_N
;
1801 info
->array
.raid_disks
+= 1;
1802 info
->delta_disks
-= 1;
1805 /* FIXME error message */
1806 info
->array
.level
= UnSet
;
1812 info
->new_level
= UnSet
;
1813 info
->new_layout
= UnSet
;
1814 info
->new_chunk
= info
->array
.chunk_size
;
1815 info
->delta_disks
= 0;
1817 info
->disk
.major
= 0;
1818 info
->disk
.minor
= 0;
1820 info
->disk
.major
= dl
->major
;
1821 info
->disk
.minor
= dl
->minor
;
1824 info
->data_offset
= __le32_to_cpu(map_to_analyse
->pba_of_lba0
);
1825 info
->component_size
=
1826 __le32_to_cpu(map_to_analyse
->blocks_per_member
);
1827 memset(info
->uuid
, 0, sizeof(info
->uuid
));
1828 info
->recovery_start
= MaxSector
;
1830 info
->reshape_progress
= 0;
1831 info
->resync_start
= MaxSector
;
1832 if (map_to_analyse
->map_state
== IMSM_T_STATE_UNINITIALIZED
||
1834 info
->resync_start
= 0;
1836 if (dev
->vol
.migr_state
) {
1837 switch (migr_type(dev
)) {
1840 __u64 blocks_per_unit
= blocks_per_migr_unit(dev
);
1841 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
1843 info
->resync_start
= blocks_per_unit
* units
;
1846 case MIGR_GEN_MIGR
: {
1847 __u64 blocks_per_unit
= blocks_per_migr_unit(dev
);
1848 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
1849 unsigned long long array_blocks
;
1852 info
->reshape_progress
= blocks_per_unit
* units
;
1854 /* checkpoint is written per disks unit
1855 * recalculate it to reshape position
1857 used_disks
= imsm_num_data_members(dev
, 0);
1858 info
->reshape_progress
*= used_disks
;
1859 dprintf("IMSM: General Migration checkpoint : %llu "
1860 "(%llu) -> read reshape progress : %llu\n",
1861 units
, blocks_per_unit
, info
->reshape_progress
);
1863 used_disks
= imsm_num_data_members(dev
, 1);
1864 if (used_disks
> 0) {
1865 array_blocks
= map
->blocks_per_member
*
1867 /* round array size down to closest MB
1869 info
->custom_array_size
= (array_blocks
1870 >> SECT_PER_MB_SHIFT
)
1871 << SECT_PER_MB_SHIFT
;
1875 /* we could emulate the checkpointing of
1876 * 'sync_action=check' migrations, but for now
1877 * we just immediately complete them
1880 /* this is handled by container_content_imsm() */
1881 case MIGR_STATE_CHANGE
:
1882 /* FIXME handle other migrations */
1884 /* we are not dirty, so... */
1885 info
->resync_start
= MaxSector
;
1889 strncpy(info
->name
, (char *) dev
->volume
, MAX_RAID_SERIAL_LEN
);
1890 info
->name
[MAX_RAID_SERIAL_LEN
] = 0;
1892 info
->array
.major_version
= -1;
1893 info
->array
.minor_version
= -2;
1894 devname
= devnum2devname(st
->container_dev
);
1895 *info
->text_version
= '\0';
1897 sprintf(info
->text_version
, "/%s/%d", devname
, info
->container_member
);
1899 info
->safe_mode_delay
= 4000; /* 4 secs like the Matrix driver */
1900 uuid_from_super_imsm(st
, info
->uuid
);
1904 for (i
=0; i
<map_disks
; i
++) {
1906 if (i
< info
->array
.raid_disks
) {
1907 struct imsm_disk
*dsk
;
1908 j
= get_imsm_disk_idx(dev
, i
, -1);
1909 dsk
= get_imsm_disk(super
, j
);
1910 if (dsk
&& (dsk
->status
& CONFIGURED_DISK
))
1917 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
);
1918 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
);
1920 static struct imsm_disk
*get_imsm_missing(struct intel_super
*super
, __u8 index
)
1924 for (d
= super
->missing
; d
; d
= d
->next
)
1925 if (d
->index
== index
)
1930 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
)
1932 struct intel_super
*super
= st
->sb
;
1933 struct imsm_disk
*disk
;
1934 int map_disks
= info
->array
.raid_disks
;
1935 int max_enough
= -1;
1937 struct imsm_super
*mpb
;
1939 if (super
->current_vol
>= 0) {
1940 getinfo_super_imsm_volume(st
, info
, map
);
1944 /* Set raid_disks to zero so that Assemble will always pull in valid
1947 info
->array
.raid_disks
= 0;
1948 info
->array
.level
= LEVEL_CONTAINER
;
1949 info
->array
.layout
= 0;
1950 info
->array
.md_minor
= -1;
1951 info
->array
.ctime
= 0; /* N/A for imsm */
1952 info
->array
.utime
= 0;
1953 info
->array
.chunk_size
= 0;
1955 info
->disk
.major
= 0;
1956 info
->disk
.minor
= 0;
1957 info
->disk
.raid_disk
= -1;
1958 info
->reshape_active
= 0;
1959 info
->array
.major_version
= -1;
1960 info
->array
.minor_version
= -2;
1961 strcpy(info
->text_version
, "imsm");
1962 info
->safe_mode_delay
= 0;
1963 info
->disk
.number
= -1;
1964 info
->disk
.state
= 0;
1966 info
->recovery_start
= MaxSector
;
1968 /* do we have the all the insync disks that we expect? */
1969 mpb
= super
->anchor
;
1971 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1972 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
1973 int failed
, enough
, j
, missing
= 0;
1974 struct imsm_map
*map
;
1977 failed
= imsm_count_failed(super
, dev
);
1978 state
= imsm_check_degraded(super
, dev
, failed
);
1979 map
= get_imsm_map(dev
, dev
->vol
.migr_state
);
1981 /* any newly missing disks?
1982 * (catches single-degraded vs double-degraded)
1984 for (j
= 0; j
< map
->num_members
; j
++) {
1985 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
1986 __u32 idx
= ord_to_idx(ord
);
1988 if (!(ord
& IMSM_ORD_REBUILD
) &&
1989 get_imsm_missing(super
, idx
)) {
1995 if (state
== IMSM_T_STATE_FAILED
)
1997 else if (state
== IMSM_T_STATE_DEGRADED
&&
1998 (state
!= map
->map_state
|| missing
))
2000 else /* we're normal, or already degraded */
2003 /* in the missing/failed disk case check to see
2004 * if at least one array is runnable
2006 max_enough
= max(max_enough
, enough
);
2008 dprintf("%s: enough: %d\n", __func__
, max_enough
);
2009 info
->container_enough
= max_enough
;
2012 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
2014 disk
= &super
->disks
->disk
;
2015 info
->data_offset
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
2016 info
->component_size
= reserved
;
2017 info
->disk
.state
= is_configured(disk
) ? (1 << MD_DISK_ACTIVE
) : 0;
2018 /* we don't change info->disk.raid_disk here because
2019 * this state will be finalized in mdmon after we have
2020 * found the 'most fresh' version of the metadata
2022 info
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2023 info
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2026 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2027 * ->compare_super may have updated the 'num_raid_devs' field for spares
2029 if (info
->disk
.state
& (1 << MD_DISK_SYNC
) || super
->anchor
->num_raid_devs
)
2030 uuid_from_super_imsm(st
, info
->uuid
);
2032 memcpy(info
->uuid
, uuid_zero
, sizeof(uuid_zero
));
2034 /* I don't know how to compute 'map' on imsm, so use safe default */
2037 for (i
= 0; i
< map_disks
; i
++)
2043 /* allocates memory and fills disk in mdinfo structure
2044 * for each disk in array */
2045 struct mdinfo
*getinfo_super_disks_imsm(struct supertype
*st
)
2047 struct mdinfo
*mddev
= NULL
;
2048 struct intel_super
*super
= st
->sb
;
2049 struct imsm_disk
*disk
;
2052 if (!super
|| !super
->disks
)
2055 mddev
= malloc(sizeof(*mddev
));
2057 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2060 memset(mddev
, 0, sizeof(*mddev
));
2064 tmp
= malloc(sizeof(*tmp
));
2066 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2071 memset(tmp
, 0, sizeof(*tmp
));
2073 tmp
->next
= mddev
->devs
;
2075 tmp
->disk
.number
= count
++;
2076 tmp
->disk
.major
= dl
->major
;
2077 tmp
->disk
.minor
= dl
->minor
;
2078 tmp
->disk
.state
= is_configured(disk
) ?
2079 (1 << MD_DISK_ACTIVE
) : 0;
2080 tmp
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2081 tmp
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2082 tmp
->disk
.raid_disk
= -1;
2088 static int update_super_imsm(struct supertype
*st
, struct mdinfo
*info
,
2089 char *update
, char *devname
, int verbose
,
2090 int uuid_set
, char *homehost
)
2092 /* For 'assemble' and 'force' we need to return non-zero if any
2093 * change was made. For others, the return value is ignored.
2094 * Update options are:
2095 * force-one : This device looks a bit old but needs to be included,
2096 * update age info appropriately.
2097 * assemble: clear any 'faulty' flag to allow this device to
2099 * force-array: Array is degraded but being forced, mark it clean
2100 * if that will be needed to assemble it.
2102 * newdev: not used ????
2103 * grow: Array has gained a new device - this is currently for
2105 * resync: mark as dirty so a resync will happen.
2106 * name: update the name - preserving the homehost
2107 * uuid: Change the uuid of the array to match watch is given
2109 * Following are not relevant for this imsm:
2110 * sparc2.2 : update from old dodgey metadata
2111 * super-minor: change the preferred_minor number
2112 * summaries: update redundant counters.
2113 * homehost: update the recorded homehost
2114 * _reshape_progress: record new reshape_progress position.
2117 struct intel_super
*super
= st
->sb
;
2118 struct imsm_super
*mpb
;
2120 /* we can only update container info */
2121 if (!super
|| super
->current_vol
>= 0 || !super
->anchor
)
2124 mpb
= super
->anchor
;
2126 if (strcmp(update
, "uuid") == 0 && uuid_set
&& !info
->update_private
)
2128 else if (strcmp(update
, "uuid") == 0 && uuid_set
&& info
->update_private
) {
2129 mpb
->orig_family_num
= *((__u32
*) info
->update_private
);
2131 } else if (strcmp(update
, "uuid") == 0) {
2132 __u32
*new_family
= malloc(sizeof(*new_family
));
2134 /* update orig_family_number with the incoming random
2135 * data, report the new effective uuid, and store the
2136 * new orig_family_num for future updates.
2139 memcpy(&mpb
->orig_family_num
, info
->uuid
, sizeof(__u32
));
2140 uuid_from_super_imsm(st
, info
->uuid
);
2141 *new_family
= mpb
->orig_family_num
;
2142 info
->update_private
= new_family
;
2145 } else if (strcmp(update
, "assemble") == 0)
2150 /* successful update? recompute checksum */
2152 mpb
->check_sum
= __le32_to_cpu(__gen_imsm_checksum(mpb
));
2157 static size_t disks_to_mpb_size(int disks
)
2161 size
= sizeof(struct imsm_super
);
2162 size
+= (disks
- 1) * sizeof(struct imsm_disk
);
2163 size
+= 2 * sizeof(struct imsm_dev
);
2164 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2165 size
+= (4 - 2) * sizeof(struct imsm_map
);
2166 /* 4 possible disk_ord_tbl's */
2167 size
+= 4 * (disks
- 1) * sizeof(__u32
);
2172 static __u64
avail_size_imsm(struct supertype
*st
, __u64 devsize
)
2174 if (devsize
< (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
))
2177 return devsize
- (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
);
2180 static void free_devlist(struct intel_super
*super
)
2182 struct intel_dev
*dv
;
2184 while (super
->devlist
) {
2185 dv
= super
->devlist
->next
;
2186 free(super
->devlist
->dev
);
2187 free(super
->devlist
);
2188 super
->devlist
= dv
;
2192 static void imsm_copy_dev(struct imsm_dev
*dest
, struct imsm_dev
*src
)
2194 memcpy(dest
, src
, sizeof_imsm_dev(src
, 0));
2197 static int compare_super_imsm(struct supertype
*st
, struct supertype
*tst
)
2201 * 0 same, or first was empty, and second was copied
2202 * 1 second had wrong number
2204 * 3 wrong other info
2206 struct intel_super
*first
= st
->sb
;
2207 struct intel_super
*sec
= tst
->sb
;
2214 /* in platform dependent environment test if the disks
2215 * use the same Intel hba
2217 if (!check_env("IMSM_NO_PLATFORM")) {
2218 if (!first
->hba
|| !sec
->hba
||
2219 (first
->hba
->type
!= sec
->hba
->type
)) {
2221 "HBAs of devices does not match %s != %s\n",
2222 first
->hba
? get_sys_dev_type(first
->hba
->type
) : NULL
,
2223 sec
->hba
? get_sys_dev_type(sec
->hba
->type
) : NULL
);
2228 /* if an anchor does not have num_raid_devs set then it is a free
2231 if (first
->anchor
->num_raid_devs
> 0 &&
2232 sec
->anchor
->num_raid_devs
> 0) {
2233 /* Determine if these disks might ever have been
2234 * related. Further disambiguation can only take place
2235 * in load_super_imsm_all
2237 __u32 first_family
= first
->anchor
->orig_family_num
;
2238 __u32 sec_family
= sec
->anchor
->orig_family_num
;
2240 if (memcmp(first
->anchor
->sig
, sec
->anchor
->sig
,
2241 MAX_SIGNATURE_LENGTH
) != 0)
2244 if (first_family
== 0)
2245 first_family
= first
->anchor
->family_num
;
2246 if (sec_family
== 0)
2247 sec_family
= sec
->anchor
->family_num
;
2249 if (first_family
!= sec_family
)
2255 /* if 'first' is a spare promote it to a populated mpb with sec's
2258 if (first
->anchor
->num_raid_devs
== 0 &&
2259 sec
->anchor
->num_raid_devs
> 0) {
2261 struct intel_dev
*dv
;
2262 struct imsm_dev
*dev
;
2264 /* we need to copy raid device info from sec if an allocation
2265 * fails here we don't associate the spare
2267 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++) {
2268 dv
= malloc(sizeof(*dv
));
2271 dev
= malloc(sizeof_imsm_dev(get_imsm_dev(sec
, i
), 1));
2278 dv
->next
= first
->devlist
;
2279 first
->devlist
= dv
;
2281 if (i
< sec
->anchor
->num_raid_devs
) {
2282 /* allocation failure */
2283 free_devlist(first
);
2284 fprintf(stderr
, "imsm: failed to associate spare\n");
2287 first
->anchor
->num_raid_devs
= sec
->anchor
->num_raid_devs
;
2288 first
->anchor
->orig_family_num
= sec
->anchor
->orig_family_num
;
2289 first
->anchor
->family_num
= sec
->anchor
->family_num
;
2290 memcpy(first
->anchor
->sig
, sec
->anchor
->sig
, MAX_SIGNATURE_LENGTH
);
2291 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++)
2292 imsm_copy_dev(get_imsm_dev(first
, i
), get_imsm_dev(sec
, i
));
2298 static void fd2devname(int fd
, char *name
)
2302 char dname
[PATH_MAX
];
2307 if (fstat(fd
, &st
) != 0)
2309 sprintf(path
, "/sys/dev/block/%d:%d",
2310 major(st
.st_rdev
), minor(st
.st_rdev
));
2312 rv
= readlink(path
, dname
, sizeof(dname
));
2317 nm
= strrchr(dname
, '/');
2319 snprintf(name
, MAX_RAID_SERIAL_LEN
, "/dev/%s", nm
);
2322 extern int scsi_get_serial(int fd
, void *buf
, size_t buf_len
);
2324 static int imsm_read_serial(int fd
, char *devname
,
2325 __u8 serial
[MAX_RAID_SERIAL_LEN
])
2327 unsigned char scsi_serial
[255];
2336 memset(scsi_serial
, 0, sizeof(scsi_serial
));
2338 rv
= scsi_get_serial(fd
, scsi_serial
, sizeof(scsi_serial
));
2340 if (rv
&& check_env("IMSM_DEVNAME_AS_SERIAL")) {
2341 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2342 fd2devname(fd
, (char *) serial
);
2349 Name
": Failed to retrieve serial for %s\n",
2354 rsp_len
= scsi_serial
[3];
2358 Name
": Failed to retrieve serial for %s\n",
2362 rsp_buf
= (char *) &scsi_serial
[4];
2364 /* trim all whitespace and non-printable characters and convert
2367 for (i
= 0, dest
= rsp_buf
; i
< rsp_len
; i
++) {
2370 /* ':' is reserved for use in placeholder serial
2371 * numbers for missing disks
2379 len
= dest
- rsp_buf
;
2382 /* truncate leading characters */
2383 if (len
> MAX_RAID_SERIAL_LEN
) {
2384 dest
+= len
- MAX_RAID_SERIAL_LEN
;
2385 len
= MAX_RAID_SERIAL_LEN
;
2388 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2389 memcpy(serial
, dest
, len
);
2394 static int serialcmp(__u8
*s1
, __u8
*s2
)
2396 return strncmp((char *) s1
, (char *) s2
, MAX_RAID_SERIAL_LEN
);
2399 static void serialcpy(__u8
*dest
, __u8
*src
)
2401 strncpy((char *) dest
, (char *) src
, MAX_RAID_SERIAL_LEN
);
2405 static struct dl
*serial_to_dl(__u8
*serial
, struct intel_super
*super
)
2409 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
2410 if (serialcmp(dl
->serial
, serial
) == 0)
2417 static struct imsm_disk
*
2418 __serial_to_disk(__u8
*serial
, struct imsm_super
*mpb
, int *idx
)
2422 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2423 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
2425 if (serialcmp(disk
->serial
, serial
) == 0) {
2436 load_imsm_disk(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2438 struct imsm_disk
*disk
;
2443 __u8 serial
[MAX_RAID_SERIAL_LEN
];
2445 rv
= imsm_read_serial(fd
, devname
, serial
);
2450 dl
= calloc(1, sizeof(*dl
));
2454 Name
": failed to allocate disk buffer for %s\n",
2460 dl
->major
= major(stb
.st_rdev
);
2461 dl
->minor
= minor(stb
.st_rdev
);
2462 dl
->next
= super
->disks
;
2463 dl
->fd
= keep_fd
? fd
: -1;
2464 assert(super
->disks
== NULL
);
2466 serialcpy(dl
->serial
, serial
);
2469 fd2devname(fd
, name
);
2471 dl
->devname
= strdup(devname
);
2473 dl
->devname
= strdup(name
);
2475 /* look up this disk's index in the current anchor */
2476 disk
= __serial_to_disk(dl
->serial
, super
->anchor
, &dl
->index
);
2479 /* only set index on disks that are a member of a
2480 * populated contianer, i.e. one with raid_devs
2482 if (is_failed(&dl
->disk
))
2484 else if (is_spare(&dl
->disk
))
2492 /* When migrating map0 contains the 'destination' state while map1
2493 * contains the current state. When not migrating map0 contains the
2494 * current state. This routine assumes that map[0].map_state is set to
2495 * the current array state before being called.
2497 * Migration is indicated by one of the following states
2498 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2499 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2500 * map1state=unitialized)
2501 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2503 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2504 * map1state=degraded)
2506 static void migrate(struct imsm_dev
*dev
, __u8 to_state
, int migr_type
)
2508 struct imsm_map
*dest
;
2509 struct imsm_map
*src
= get_imsm_map(dev
, 0);
2511 dev
->vol
.migr_state
= 1;
2512 set_migr_type(dev
, migr_type
);
2513 dev
->vol
.curr_migr_unit
= 0;
2514 dest
= get_imsm_map(dev
, 1);
2516 /* duplicate and then set the target end state in map[0] */
2517 memcpy(dest
, src
, sizeof_imsm_map(src
));
2518 if ((migr_type
== MIGR_REBUILD
) ||
2519 (migr_type
== MIGR_GEN_MIGR
)) {
2523 for (i
= 0; i
< src
->num_members
; i
++) {
2524 ord
= __le32_to_cpu(src
->disk_ord_tbl
[i
]);
2525 set_imsm_ord_tbl_ent(src
, i
, ord_to_idx(ord
));
2529 src
->map_state
= to_state
;
2532 static void end_migration(struct imsm_dev
*dev
, __u8 map_state
)
2534 struct imsm_map
*map
= get_imsm_map(dev
, 0);
2535 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2538 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2539 * completed in the last migration.
2541 * FIXME add support for raid-level-migration
2543 for (i
= 0; i
< prev
->num_members
; i
++)
2544 for (j
= 0; j
< map
->num_members
; j
++)
2545 /* during online capacity expansion
2546 * disks position can be changed if takeover is used
2548 if (ord_to_idx(map
->disk_ord_tbl
[j
]) ==
2549 ord_to_idx(prev
->disk_ord_tbl
[i
])) {
2550 map
->disk_ord_tbl
[j
] |= prev
->disk_ord_tbl
[i
];
2554 dev
->vol
.migr_state
= 0;
2555 dev
->vol
.migr_type
= 0;
2556 dev
->vol
.curr_migr_unit
= 0;
2557 map
->map_state
= map_state
;
2561 static int parse_raid_devices(struct intel_super
*super
)
2564 struct imsm_dev
*dev_new
;
2565 size_t len
, len_migr
;
2567 size_t space_needed
= 0;
2568 struct imsm_super
*mpb
= super
->anchor
;
2570 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2571 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
2572 struct intel_dev
*dv
;
2574 len
= sizeof_imsm_dev(dev_iter
, 0);
2575 len_migr
= sizeof_imsm_dev(dev_iter
, 1);
2577 space_needed
+= len_migr
- len
;
2579 dv
= malloc(sizeof(*dv
));
2582 if (max_len
< len_migr
)
2584 if (max_len
> len_migr
)
2585 space_needed
+= max_len
- len_migr
;
2586 dev_new
= malloc(max_len
);
2591 imsm_copy_dev(dev_new
, dev_iter
);
2594 dv
->next
= super
->devlist
;
2595 super
->devlist
= dv
;
2598 /* ensure that super->buf is large enough when all raid devices
2601 if (__le32_to_cpu(mpb
->mpb_size
) + space_needed
> super
->len
) {
2604 len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + space_needed
, 512);
2605 if (posix_memalign(&buf
, 512, len
) != 0)
2608 memcpy(buf
, super
->buf
, super
->len
);
2609 memset(buf
+ super
->len
, 0, len
- super
->len
);
2618 /* retrieve a pointer to the bbm log which starts after all raid devices */
2619 struct bbm_log
*__get_imsm_bbm_log(struct imsm_super
*mpb
)
2623 if (__le32_to_cpu(mpb
->bbm_log_size
)) {
2625 ptr
+= mpb
->mpb_size
- __le32_to_cpu(mpb
->bbm_log_size
);
2631 static void __free_imsm(struct intel_super
*super
, int free_disks
);
2633 /* load_imsm_mpb - read matrix metadata
2634 * allocates super->mpb to be freed by free_imsm
2636 static int load_imsm_mpb(int fd
, struct intel_super
*super
, char *devname
)
2638 unsigned long long dsize
;
2639 unsigned long long sectors
;
2641 struct imsm_super
*anchor
;
2644 get_dev_size(fd
, NULL
, &dsize
);
2648 Name
": %s: device to small for imsm\n",
2653 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0) {
2656 Name
": Cannot seek to anchor block on %s: %s\n",
2657 devname
, strerror(errno
));
2661 if (posix_memalign((void**)&anchor
, 512, 512) != 0) {
2664 Name
": Failed to allocate imsm anchor buffer"
2665 " on %s\n", devname
);
2668 if (read(fd
, anchor
, 512) != 512) {
2671 Name
": Cannot read anchor block on %s: %s\n",
2672 devname
, strerror(errno
));
2677 if (strncmp((char *) anchor
->sig
, MPB_SIGNATURE
, MPB_SIG_LEN
) != 0) {
2680 Name
": no IMSM anchor on %s\n", devname
);
2685 __free_imsm(super
, 0);
2686 /* reload capability and hba */
2688 /* capability and hba must be updated with new super allocation */
2689 find_intel_hba_capability(fd
, super
, devname
);
2690 super
->len
= ROUND_UP(anchor
->mpb_size
, 512);
2691 if (posix_memalign(&super
->buf
, 512, super
->len
) != 0) {
2694 Name
": unable to allocate %zu byte mpb buffer\n",
2699 memcpy(super
->buf
, anchor
, 512);
2701 sectors
= mpb_sectors(anchor
) - 1;
2704 check_sum
= __gen_imsm_checksum(super
->anchor
);
2705 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
2708 Name
": IMSM checksum %x != %x on %s\n",
2710 __le32_to_cpu(super
->anchor
->check_sum
),
2718 /* read the extended mpb */
2719 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0) {
2722 Name
": Cannot seek to extended mpb on %s: %s\n",
2723 devname
, strerror(errno
));
2727 if ((unsigned)read(fd
, super
->buf
+ 512, super
->len
- 512) != super
->len
- 512) {
2730 Name
": Cannot read extended mpb on %s: %s\n",
2731 devname
, strerror(errno
));
2735 check_sum
= __gen_imsm_checksum(super
->anchor
);
2736 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
2739 Name
": IMSM checksum %x != %x on %s\n",
2740 check_sum
, __le32_to_cpu(super
->anchor
->check_sum
),
2745 /* FIXME the BBM log is disk specific so we cannot use this global
2746 * buffer for all disks. Ok for now since we only look at the global
2747 * bbm_log_size parameter to gate assembly
2749 super
->bbm_log
= __get_imsm_bbm_log(super
->anchor
);
2755 load_and_parse_mpb(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2759 err
= load_imsm_mpb(fd
, super
, devname
);
2762 err
= load_imsm_disk(fd
, super
, devname
, keep_fd
);
2765 err
= parse_raid_devices(super
);
2770 static void __free_imsm_disk(struct dl
*d
)
2782 static void free_imsm_disks(struct intel_super
*super
)
2786 while (super
->disks
) {
2788 super
->disks
= d
->next
;
2789 __free_imsm_disk(d
);
2791 while (super
->disk_mgmt_list
) {
2792 d
= super
->disk_mgmt_list
;
2793 super
->disk_mgmt_list
= d
->next
;
2794 __free_imsm_disk(d
);
2796 while (super
->missing
) {
2798 super
->missing
= d
->next
;
2799 __free_imsm_disk(d
);
2804 /* free all the pieces hanging off of a super pointer */
2805 static void __free_imsm(struct intel_super
*super
, int free_disks
)
2807 struct intel_hba
*elem
, *next
;
2813 /* unlink capability description */
2816 free_imsm_disks(super
);
2817 free_devlist(super
);
2821 free((void *)elem
->path
);
2829 static void free_imsm(struct intel_super
*super
)
2831 __free_imsm(super
, 1);
2835 static void free_super_imsm(struct supertype
*st
)
2837 struct intel_super
*super
= st
->sb
;
2846 static struct intel_super
*alloc_super(void)
2848 struct intel_super
*super
= malloc(sizeof(*super
));
2851 memset(super
, 0, sizeof(*super
));
2852 super
->current_vol
= -1;
2853 super
->create_offset
= ~((__u32
) 0);
2859 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
2861 static int find_intel_hba_capability(int fd
, struct intel_super
*super
, char *devname
)
2863 struct sys_dev
*hba_name
;
2866 if ((fd
< 0) || check_env("IMSM_NO_PLATFORM")) {
2871 hba_name
= find_disk_attached_hba(fd
, NULL
);
2875 Name
": %s is not attached to Intel(R) RAID controller.\n",
2879 rv
= attach_hba_to_super(super
, hba_name
);
2882 struct intel_hba
*hba
= super
->hba
;
2884 fprintf(stderr
, Name
": %s is attached to Intel(R) %s RAID "
2885 "controller (%s),\n"
2886 " but the container is assigned to Intel(R) "
2887 "%s RAID controller (",
2890 hba_name
->pci_id
? : "Err!",
2891 get_sys_dev_type(hba_name
->type
));
2894 fprintf(stderr
, "%s", hba
->pci_id
? : "Err!");
2896 fprintf(stderr
, ", ");
2900 fprintf(stderr
, ").\n"
2901 " Mixing devices attached to different controllers "
2902 "is not allowed.\n");
2904 free_sys_dev(&hba_name
);
2907 super
->orom
= find_imsm_capability(hba_name
->type
);
2908 free_sys_dev(&hba_name
);
2915 /* find_missing - helper routine for load_super_imsm_all that identifies
2916 * disks that have disappeared from the system. This routine relies on
2917 * the mpb being uptodate, which it is at load time.
2919 static int find_missing(struct intel_super
*super
)
2922 struct imsm_super
*mpb
= super
->anchor
;
2924 struct imsm_disk
*disk
;
2926 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2927 disk
= __get_imsm_disk(mpb
, i
);
2928 dl
= serial_to_dl(disk
->serial
, super
);
2932 dl
= malloc(sizeof(*dl
));
2938 dl
->devname
= strdup("missing");
2940 serialcpy(dl
->serial
, disk
->serial
);
2943 dl
->next
= super
->missing
;
2944 super
->missing
= dl
;
2950 static struct intel_disk
*disk_list_get(__u8
*serial
, struct intel_disk
*disk_list
)
2952 struct intel_disk
*idisk
= disk_list
;
2955 if (serialcmp(idisk
->disk
.serial
, serial
) == 0)
2957 idisk
= idisk
->next
;
2963 static int __prep_thunderdome(struct intel_super
**table
, int tbl_size
,
2964 struct intel_super
*super
,
2965 struct intel_disk
**disk_list
)
2967 struct imsm_disk
*d
= &super
->disks
->disk
;
2968 struct imsm_super
*mpb
= super
->anchor
;
2971 for (i
= 0; i
< tbl_size
; i
++) {
2972 struct imsm_super
*tbl_mpb
= table
[i
]->anchor
;
2973 struct imsm_disk
*tbl_d
= &table
[i
]->disks
->disk
;
2975 if (tbl_mpb
->family_num
== mpb
->family_num
) {
2976 if (tbl_mpb
->check_sum
== mpb
->check_sum
) {
2977 dprintf("%s: mpb from %d:%d matches %d:%d\n",
2978 __func__
, super
->disks
->major
,
2979 super
->disks
->minor
,
2980 table
[i
]->disks
->major
,
2981 table
[i
]->disks
->minor
);
2985 if (((is_configured(d
) && !is_configured(tbl_d
)) ||
2986 is_configured(d
) == is_configured(tbl_d
)) &&
2987 tbl_mpb
->generation_num
< mpb
->generation_num
) {
2988 /* current version of the mpb is a
2989 * better candidate than the one in
2990 * super_table, but copy over "cross
2991 * generational" status
2993 struct intel_disk
*idisk
;
2995 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
2996 __func__
, super
->disks
->major
,
2997 super
->disks
->minor
,
2998 table
[i
]->disks
->major
,
2999 table
[i
]->disks
->minor
);
3001 idisk
= disk_list_get(tbl_d
->serial
, *disk_list
);
3002 if (idisk
&& is_failed(&idisk
->disk
))
3003 tbl_d
->status
|= FAILED_DISK
;
3006 struct intel_disk
*idisk
;
3007 struct imsm_disk
*disk
;
3009 /* tbl_mpb is more up to date, but copy
3010 * over cross generational status before
3013 disk
= __serial_to_disk(d
->serial
, mpb
, NULL
);
3014 if (disk
&& is_failed(disk
))
3015 d
->status
|= FAILED_DISK
;
3017 idisk
= disk_list_get(d
->serial
, *disk_list
);
3020 if (disk
&& is_configured(disk
))
3021 idisk
->disk
.status
|= CONFIGURED_DISK
;
3024 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3025 __func__
, super
->disks
->major
,
3026 super
->disks
->minor
,
3027 table
[i
]->disks
->major
,
3028 table
[i
]->disks
->minor
);
3036 table
[tbl_size
++] = super
;
3040 /* update/extend the merged list of imsm_disk records */
3041 for (j
= 0; j
< mpb
->num_disks
; j
++) {
3042 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, j
);
3043 struct intel_disk
*idisk
;
3045 idisk
= disk_list_get(disk
->serial
, *disk_list
);
3047 idisk
->disk
.status
|= disk
->status
;
3048 if (is_configured(&idisk
->disk
) ||
3049 is_failed(&idisk
->disk
))
3050 idisk
->disk
.status
&= ~(SPARE_DISK
);
3052 idisk
= calloc(1, sizeof(*idisk
));
3055 idisk
->owner
= IMSM_UNKNOWN_OWNER
;
3056 idisk
->disk
= *disk
;
3057 idisk
->next
= *disk_list
;
3061 if (serialcmp(idisk
->disk
.serial
, d
->serial
) == 0)
3068 static struct intel_super
*
3069 validate_members(struct intel_super
*super
, struct intel_disk
*disk_list
,
3072 struct imsm_super
*mpb
= super
->anchor
;
3076 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3077 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
3078 struct intel_disk
*idisk
;
3080 idisk
= disk_list_get(disk
->serial
, disk_list
);
3082 if (idisk
->owner
== owner
||
3083 idisk
->owner
== IMSM_UNKNOWN_OWNER
)
3086 dprintf("%s: '%.16s' owner %d != %d\n",
3087 __func__
, disk
->serial
, idisk
->owner
,
3090 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3091 __func__
, __le32_to_cpu(mpb
->family_num
), i
,
3097 if (ok_count
== mpb
->num_disks
)
3102 static void show_conflicts(__u32 family_num
, struct intel_super
*super_list
)
3104 struct intel_super
*s
;
3106 for (s
= super_list
; s
; s
= s
->next
) {
3107 if (family_num
!= s
->anchor
->family_num
)
3109 fprintf(stderr
, "Conflict, offlining family %#x on '%s'\n",
3110 __le32_to_cpu(family_num
), s
->disks
->devname
);
3114 static struct intel_super
*
3115 imsm_thunderdome(struct intel_super
**super_list
, int len
)
3117 struct intel_super
*super_table
[len
];
3118 struct intel_disk
*disk_list
= NULL
;
3119 struct intel_super
*champion
, *spare
;
3120 struct intel_super
*s
, **del
;
3125 memset(super_table
, 0, sizeof(super_table
));
3126 for (s
= *super_list
; s
; s
= s
->next
)
3127 tbl_size
= __prep_thunderdome(super_table
, tbl_size
, s
, &disk_list
);
3129 for (i
= 0; i
< tbl_size
; i
++) {
3130 struct imsm_disk
*d
;
3131 struct intel_disk
*idisk
;
3132 struct imsm_super
*mpb
= super_table
[i
]->anchor
;
3135 d
= &s
->disks
->disk
;
3137 /* 'd' must appear in merged disk list for its
3138 * configuration to be valid
3140 idisk
= disk_list_get(d
->serial
, disk_list
);
3141 if (idisk
&& idisk
->owner
== i
)
3142 s
= validate_members(s
, disk_list
, i
);
3147 dprintf("%s: marking family: %#x from %d:%d offline\n",
3148 __func__
, mpb
->family_num
,
3149 super_table
[i
]->disks
->major
,
3150 super_table
[i
]->disks
->minor
);
3154 /* This is where the mdadm implementation differs from the Windows
3155 * driver which has no strict concept of a container. We can only
3156 * assemble one family from a container, so when returning a prodigal
3157 * array member to this system the code will not be able to disambiguate
3158 * the container contents that should be assembled ("foreign" versus
3159 * "local"). It requires user intervention to set the orig_family_num
3160 * to a new value to establish a new container. The Windows driver in
3161 * this situation fixes up the volume name in place and manages the
3162 * foreign array as an independent entity.
3167 for (i
= 0; i
< tbl_size
; i
++) {
3168 struct intel_super
*tbl_ent
= super_table
[i
];
3174 if (tbl_ent
->anchor
->num_raid_devs
== 0) {
3179 if (s
&& !is_spare
) {
3180 show_conflicts(tbl_ent
->anchor
->family_num
, *super_list
);
3182 } else if (!s
&& !is_spare
)
3195 fprintf(stderr
, "Chose family %#x on '%s', "
3196 "assemble conflicts to new container with '--update=uuid'\n",
3197 __le32_to_cpu(s
->anchor
->family_num
), s
->disks
->devname
);
3199 /* collect all dl's onto 'champion', and update them to
3200 * champion's version of the status
3202 for (s
= *super_list
; s
; s
= s
->next
) {
3203 struct imsm_super
*mpb
= champion
->anchor
;
3204 struct dl
*dl
= s
->disks
;
3209 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3210 struct imsm_disk
*disk
;
3212 disk
= __serial_to_disk(dl
->serial
, mpb
, &dl
->index
);
3215 /* only set index on disks that are a member of
3216 * a populated contianer, i.e. one with
3219 if (is_failed(&dl
->disk
))
3221 else if (is_spare(&dl
->disk
))
3227 if (i
>= mpb
->num_disks
) {
3228 struct intel_disk
*idisk
;
3230 idisk
= disk_list_get(dl
->serial
, disk_list
);
3231 if (idisk
&& is_spare(&idisk
->disk
) &&
3232 !is_failed(&idisk
->disk
) && !is_configured(&idisk
->disk
))
3240 dl
->next
= champion
->disks
;
3241 champion
->disks
= dl
;
3245 /* delete 'champion' from super_list */
3246 for (del
= super_list
; *del
; ) {
3247 if (*del
== champion
) {
3248 *del
= (*del
)->next
;
3251 del
= &(*del
)->next
;
3253 champion
->next
= NULL
;
3257 struct intel_disk
*idisk
= disk_list
;
3259 disk_list
= disk_list
->next
;
3266 static int load_super_imsm_all(struct supertype
*st
, int fd
, void **sbp
,
3270 struct intel_super
*super_list
= NULL
;
3271 struct intel_super
*super
= NULL
;
3272 int devnum
= fd2devnum(fd
);
3278 /* check if 'fd' an opened container */
3279 sra
= sysfs_read(fd
, 0, GET_LEVEL
|GET_VERSION
|GET_DEVS
|GET_STATE
);
3283 if (sra
->array
.major_version
!= -1 ||
3284 sra
->array
.minor_version
!= -2 ||
3285 strcmp(sra
->text_version
, "imsm") != 0) {
3290 for (sd
= sra
->devs
, i
= 0; sd
; sd
= sd
->next
, i
++) {
3291 struct intel_super
*s
= alloc_super();
3299 s
->next
= super_list
;
3303 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
3304 dfd
= dev_open(nm
, O_RDWR
);
3308 rv
= find_intel_hba_capability(dfd
, s
, devname
);
3309 /* no orom/efi or non-intel hba of the disk */
3313 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3315 /* retry the load if we might have raced against mdmon */
3316 if (err
== 3 && mdmon_running(devnum
))
3317 for (retry
= 0; retry
< 3; retry
++) {
3319 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3327 /* all mpbs enter, maybe one leaves */
3328 super
= imsm_thunderdome(&super_list
, i
);
3334 if (find_missing(super
) != 0) {
3342 while (super_list
) {
3343 struct intel_super
*s
= super_list
;
3345 super_list
= super_list
->next
;
3354 st
->container_dev
= devnum
;
3355 if (err
== 0 && st
->ss
== NULL
) {
3356 st
->ss
= &super_imsm
;
3357 st
->minor_version
= 0;
3358 st
->max_devs
= IMSM_MAX_DEVICES
;
3363 static int load_container_imsm(struct supertype
*st
, int fd
, char *devname
)
3365 return load_super_imsm_all(st
, fd
, &st
->sb
, devname
);
3369 static int load_super_imsm(struct supertype
*st
, int fd
, char *devname
)
3371 struct intel_super
*super
;
3374 if (test_partition(fd
))
3375 /* IMSM not allowed on partitions */
3378 free_super_imsm(st
);
3380 super
= alloc_super();
3383 Name
": malloc of %zu failed.\n",
3387 /* Load hba and capabilities if they exist.
3388 * But do not preclude loading metadata in case capabilities or hba are
3389 * non-compliant and ignore_hw_compat is set.
3391 rv
= find_intel_hba_capability(fd
, super
, devname
);
3392 /* no orom/efi or non-intel hba of the disk */
3393 if ((rv
!= 0) && (st
->ignore_hw_compat
== 0)) {
3396 Name
": No OROM/EFI properties for %s\n", devname
);
3400 rv
= load_and_parse_mpb(fd
, super
, devname
, 0);
3405 Name
": Failed to load all information "
3406 "sections on %s\n", devname
);
3412 if (st
->ss
== NULL
) {
3413 st
->ss
= &super_imsm
;
3414 st
->minor_version
= 0;
3415 st
->max_devs
= IMSM_MAX_DEVICES
;
3420 static __u16
info_to_blocks_per_strip(mdu_array_info_t
*info
)
3422 if (info
->level
== 1)
3424 return info
->chunk_size
>> 9;
3427 static __u32
info_to_num_data_stripes(mdu_array_info_t
*info
, int num_domains
)
3431 num_stripes
= (info
->size
* 2) / info_to_blocks_per_strip(info
);
3432 num_stripes
/= num_domains
;
3437 static __u32
info_to_blocks_per_member(mdu_array_info_t
*info
)
3439 if (info
->level
== 1)
3440 return info
->size
* 2;
3442 return (info
->size
* 2) & ~(info_to_blocks_per_strip(info
) - 1);
3445 static void imsm_update_version_info(struct intel_super
*super
)
3447 /* update the version and attributes */
3448 struct imsm_super
*mpb
= super
->anchor
;
3450 struct imsm_dev
*dev
;
3451 struct imsm_map
*map
;
3454 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3455 dev
= get_imsm_dev(super
, i
);
3456 map
= get_imsm_map(dev
, 0);
3457 if (__le32_to_cpu(dev
->size_high
) > 0)
3458 mpb
->attributes
|= MPB_ATTRIB_2TB
;
3460 /* FIXME detect when an array spans a port multiplier */
3462 mpb
->attributes
|= MPB_ATTRIB_PM
;
3465 if (mpb
->num_raid_devs
> 1 ||
3466 mpb
->attributes
!= MPB_ATTRIB_CHECKSUM_VERIFY
) {
3467 version
= MPB_VERSION_ATTRIBS
;
3468 switch (get_imsm_raid_level(map
)) {
3469 case 0: mpb
->attributes
|= MPB_ATTRIB_RAID0
; break;
3470 case 1: mpb
->attributes
|= MPB_ATTRIB_RAID1
; break;
3471 case 10: mpb
->attributes
|= MPB_ATTRIB_RAID10
; break;
3472 case 5: mpb
->attributes
|= MPB_ATTRIB_RAID5
; break;
3475 if (map
->num_members
>= 5)
3476 version
= MPB_VERSION_5OR6_DISK_ARRAY
;
3477 else if (dev
->status
== DEV_CLONE_N_GO
)
3478 version
= MPB_VERSION_CNG
;
3479 else if (get_imsm_raid_level(map
) == 5)
3480 version
= MPB_VERSION_RAID5
;
3481 else if (map
->num_members
>= 3)
3482 version
= MPB_VERSION_3OR4_DISK_ARRAY
;
3483 else if (get_imsm_raid_level(map
) == 1)
3484 version
= MPB_VERSION_RAID1
;
3486 version
= MPB_VERSION_RAID0
;
3488 strcpy(((char *) mpb
->sig
) + strlen(MPB_SIGNATURE
), version
);
3492 static int check_name(struct intel_super
*super
, char *name
, int quiet
)
3494 struct imsm_super
*mpb
= super
->anchor
;
3495 char *reason
= NULL
;
3498 if (strlen(name
) > MAX_RAID_SERIAL_LEN
)
3499 reason
= "must be 16 characters or less";
3501 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3502 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
3504 if (strncmp((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
) == 0) {
3505 reason
= "already exists";
3510 if (reason
&& !quiet
)
3511 fprintf(stderr
, Name
": imsm volume name %s\n", reason
);
3516 static int init_super_imsm_volume(struct supertype
*st
, mdu_array_info_t
*info
,
3517 unsigned long long size
, char *name
,
3518 char *homehost
, int *uuid
)
3520 /* We are creating a volume inside a pre-existing container.
3521 * so st->sb is already set.
3523 struct intel_super
*super
= st
->sb
;
3524 struct imsm_super
*mpb
= super
->anchor
;
3525 struct intel_dev
*dv
;
3526 struct imsm_dev
*dev
;
3527 struct imsm_vol
*vol
;
3528 struct imsm_map
*map
;
3529 int idx
= mpb
->num_raid_devs
;
3531 unsigned long long array_blocks
;
3532 size_t size_old
, size_new
;
3533 __u32 num_data_stripes
;
3535 if (super
->orom
&& mpb
->num_raid_devs
>= super
->orom
->vpa
) {
3536 fprintf(stderr
, Name
": This imsm-container already has the "
3537 "maximum of %d volumes\n", super
->orom
->vpa
);
3541 /* ensure the mpb is large enough for the new data */
3542 size_old
= __le32_to_cpu(mpb
->mpb_size
);
3543 size_new
= disks_to_mpb_size(info
->nr_disks
);
3544 if (size_new
> size_old
) {
3546 size_t size_round
= ROUND_UP(size_new
, 512);
3548 if (posix_memalign(&mpb_new
, 512, size_round
) != 0) {
3549 fprintf(stderr
, Name
": could not allocate new mpb\n");
3552 memcpy(mpb_new
, mpb
, size_old
);
3555 super
->anchor
= mpb_new
;
3556 mpb
->mpb_size
= __cpu_to_le32(size_new
);
3557 memset(mpb_new
+ size_old
, 0, size_round
- size_old
);
3559 super
->current_vol
= idx
;
3560 /* when creating the first raid device in this container set num_disks
3561 * to zero, i.e. delete this spare and add raid member devices in
3562 * add_to_super_imsm_volume()
3564 if (super
->current_vol
== 0)
3567 if (!check_name(super
, name
, 0))
3569 dv
= malloc(sizeof(*dv
));
3571 fprintf(stderr
, Name
": failed to allocate device list entry\n");
3574 dev
= calloc(1, sizeof(*dev
) + sizeof(__u32
) * (info
->raid_disks
- 1));
3577 fprintf(stderr
, Name
": could not allocate raid device\n");
3581 strncpy((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
);
3582 if (info
->level
== 1)
3583 array_blocks
= info_to_blocks_per_member(info
);
3585 array_blocks
= calc_array_size(info
->level
, info
->raid_disks
,
3586 info
->layout
, info
->chunk_size
,
3588 /* round array size down to closest MB */
3589 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
3591 dev
->size_low
= __cpu_to_le32((__u32
) array_blocks
);
3592 dev
->size_high
= __cpu_to_le32((__u32
) (array_blocks
>> 32));
3593 dev
->status
= (DEV_READ_COALESCING
| DEV_WRITE_COALESCING
);
3595 vol
->migr_state
= 0;
3596 set_migr_type(dev
, MIGR_INIT
);
3598 vol
->curr_migr_unit
= 0;
3599 map
= get_imsm_map(dev
, 0);
3600 map
->pba_of_lba0
= __cpu_to_le32(super
->create_offset
);
3601 map
->blocks_per_member
= __cpu_to_le32(info_to_blocks_per_member(info
));
3602 map
->blocks_per_strip
= __cpu_to_le16(info_to_blocks_per_strip(info
));
3603 map
->failed_disk_num
= ~0;
3604 map
->map_state
= info
->level
? IMSM_T_STATE_UNINITIALIZED
:
3605 IMSM_T_STATE_NORMAL
;
3608 if (info
->level
== 1 && info
->raid_disks
> 2) {
3611 fprintf(stderr
, Name
": imsm does not support more than 2 disks"
3612 "in a raid1 volume\n");
3616 map
->raid_level
= info
->level
;
3617 if (info
->level
== 10) {
3618 map
->raid_level
= 1;
3619 map
->num_domains
= info
->raid_disks
/ 2;
3620 } else if (info
->level
== 1)
3621 map
->num_domains
= info
->raid_disks
;
3623 map
->num_domains
= 1;
3625 num_data_stripes
= info_to_num_data_stripes(info
, map
->num_domains
);
3626 map
->num_data_stripes
= __cpu_to_le32(num_data_stripes
);
3628 map
->num_members
= info
->raid_disks
;
3629 for (i
= 0; i
< map
->num_members
; i
++) {
3630 /* initialized in add_to_super */
3631 set_imsm_ord_tbl_ent(map
, i
, IMSM_ORD_REBUILD
);
3633 mpb
->num_raid_devs
++;
3636 dv
->index
= super
->current_vol
;
3637 dv
->next
= super
->devlist
;
3638 super
->devlist
= dv
;
3640 imsm_update_version_info(super
);
3645 static int init_super_imsm(struct supertype
*st
, mdu_array_info_t
*info
,
3646 unsigned long long size
, char *name
,
3647 char *homehost
, int *uuid
)
3649 /* This is primarily called by Create when creating a new array.
3650 * We will then get add_to_super called for each component, and then
3651 * write_init_super called to write it out to each device.
3652 * For IMSM, Create can create on fresh devices or on a pre-existing
3654 * To create on a pre-existing array a different method will be called.
3655 * This one is just for fresh drives.
3657 struct intel_super
*super
;
3658 struct imsm_super
*mpb
;
3663 return init_super_imsm_volume(st
, info
, size
, name
, homehost
, uuid
);
3666 mpb_size
= disks_to_mpb_size(info
->nr_disks
);
3670 super
= alloc_super();
3671 if (super
&& posix_memalign(&super
->buf
, 512, mpb_size
) != 0) {
3676 fprintf(stderr
, Name
3677 ": %s could not allocate superblock\n", __func__
);
3680 memset(super
->buf
, 0, mpb_size
);
3682 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
3686 /* zeroing superblock */
3690 mpb
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
3692 version
= (char *) mpb
->sig
;
3693 strcpy(version
, MPB_SIGNATURE
);
3694 version
+= strlen(MPB_SIGNATURE
);
3695 strcpy(version
, MPB_VERSION_RAID0
);
3701 static int add_to_super_imsm_volume(struct supertype
*st
, mdu_disk_info_t
*dk
,
3702 int fd
, char *devname
)
3704 struct intel_super
*super
= st
->sb
;
3705 struct imsm_super
*mpb
= super
->anchor
;
3707 struct imsm_dev
*dev
;
3708 struct imsm_map
*map
;
3711 dev
= get_imsm_dev(super
, super
->current_vol
);
3712 map
= get_imsm_map(dev
, 0);
3714 if (! (dk
->state
& (1<<MD_DISK_SYNC
))) {
3715 fprintf(stderr
, Name
": %s: Cannot add spare devices to IMSM volume\n",
3721 /* we're doing autolayout so grab the pre-marked (in
3722 * validate_geometry) raid_disk
3724 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
3725 if (dl
->raiddisk
== dk
->raid_disk
)
3728 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
3729 if (dl
->major
== dk
->major
&&
3730 dl
->minor
== dk
->minor
)
3735 fprintf(stderr
, Name
": %s is not a member of the same container\n", devname
);
3739 /* add a pristine spare to the metadata */
3740 if (dl
->index
< 0) {
3741 dl
->index
= super
->anchor
->num_disks
;
3742 super
->anchor
->num_disks
++;
3744 /* Check the device has not already been added */
3745 slot
= get_imsm_disk_slot(map
, dl
->index
);
3747 (get_imsm_ord_tbl_ent(dev
, slot
, -1) & IMSM_ORD_REBUILD
) == 0) {
3748 fprintf(stderr
, Name
": %s has been included in this array twice\n",
3752 set_imsm_ord_tbl_ent(map
, dk
->number
, dl
->index
);
3753 dl
->disk
.status
= CONFIGURED_DISK
;
3755 /* if we are creating the first raid device update the family number */
3756 if (super
->current_vol
== 0) {
3758 struct imsm_dev
*_dev
= __get_imsm_dev(mpb
, 0);
3759 struct imsm_disk
*_disk
= __get_imsm_disk(mpb
, dl
->index
);
3761 if (!_dev
|| !_disk
) {
3762 fprintf(stderr
, Name
": BUG mpb setup error\n");
3768 sum
+= __gen_imsm_checksum(mpb
);
3769 mpb
->family_num
= __cpu_to_le32(sum
);
3770 mpb
->orig_family_num
= mpb
->family_num
;
3777 static int add_to_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
,
3778 int fd
, char *devname
)
3780 struct intel_super
*super
= st
->sb
;
3782 unsigned long long size
;
3787 /* If we are on an RAID enabled platform check that the disk is
3788 * attached to the raid controller.
3789 * We do not need to test disks attachment for container based additions,
3790 * they shall be already tested when container was created/assembled.
3792 rv
= find_intel_hba_capability(fd
, super
, devname
);
3793 /* no orom/efi or non-intel hba of the disk */
3795 dprintf("capability: %p fd: %d ret: %d\n",
3796 super
->orom
, fd
, rv
);
3800 if (super
->current_vol
>= 0)
3801 return add_to_super_imsm_volume(st
, dk
, fd
, devname
);
3804 dd
= malloc(sizeof(*dd
));
3807 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
3810 memset(dd
, 0, sizeof(*dd
));
3811 dd
->major
= major(stb
.st_rdev
);
3812 dd
->minor
= minor(stb
.st_rdev
);
3814 dd
->devname
= devname
? strdup(devname
) : NULL
;
3817 dd
->action
= DISK_ADD
;
3818 rv
= imsm_read_serial(fd
, devname
, dd
->serial
);
3821 Name
": failed to retrieve scsi serial, aborting\n");
3826 get_dev_size(fd
, NULL
, &size
);
3828 serialcpy(dd
->disk
.serial
, dd
->serial
);
3829 dd
->disk
.total_blocks
= __cpu_to_le32(size
);
3830 dd
->disk
.status
= SPARE_DISK
;
3831 if (sysfs_disk_to_scsi_id(fd
, &id
) == 0)
3832 dd
->disk
.scsi_id
= __cpu_to_le32(id
);
3834 dd
->disk
.scsi_id
= __cpu_to_le32(0);
3836 if (st
->update_tail
) {
3837 dd
->next
= super
->disk_mgmt_list
;
3838 super
->disk_mgmt_list
= dd
;
3840 dd
->next
= super
->disks
;
3842 super
->updates_pending
++;
3849 static int remove_from_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
)
3851 struct intel_super
*super
= st
->sb
;
3854 /* remove from super works only in mdmon - for communication
3855 * manager - monitor. Check if communication memory buffer
3858 if (!st
->update_tail
) {
3860 Name
": %s shall be used in mdmon context only"
3861 "(line %d).\n", __func__
, __LINE__
);
3864 dd
= malloc(sizeof(*dd
));
3867 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
3870 memset(dd
, 0, sizeof(*dd
));
3871 dd
->major
= dk
->major
;
3872 dd
->minor
= dk
->minor
;
3875 dd
->disk
.status
= SPARE_DISK
;
3876 dd
->action
= DISK_REMOVE
;
3878 dd
->next
= super
->disk_mgmt_list
;
3879 super
->disk_mgmt_list
= dd
;
3885 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
);
3889 struct imsm_super anchor
;
3890 } spare_record
__attribute__ ((aligned(512)));
3892 /* spare records have their own family number and do not have any defined raid
3895 static int write_super_imsm_spares(struct intel_super
*super
, int doclose
)
3897 struct imsm_super
*mpb
= super
->anchor
;
3898 struct imsm_super
*spare
= &spare_record
.anchor
;
3902 spare
->mpb_size
= __cpu_to_le32(sizeof(struct imsm_super
)),
3903 spare
->generation_num
= __cpu_to_le32(1UL),
3904 spare
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
3905 spare
->num_disks
= 1,
3906 spare
->num_raid_devs
= 0,
3907 spare
->cache_size
= mpb
->cache_size
,
3908 spare
->pwr_cycle_count
= __cpu_to_le32(1),
3910 snprintf((char *) spare
->sig
, MAX_SIGNATURE_LENGTH
,
3911 MPB_SIGNATURE MPB_VERSION_RAID0
);
3913 for (d
= super
->disks
; d
; d
= d
->next
) {
3917 spare
->disk
[0] = d
->disk
;
3918 sum
= __gen_imsm_checksum(spare
);
3919 spare
->family_num
= __cpu_to_le32(sum
);
3920 spare
->orig_family_num
= 0;
3921 sum
= __gen_imsm_checksum(spare
);
3922 spare
->check_sum
= __cpu_to_le32(sum
);
3924 if (store_imsm_mpb(d
->fd
, spare
)) {
3925 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
3926 __func__
, d
->major
, d
->minor
, strerror(errno
));
3938 static int write_super_imsm(struct supertype
*st
, int doclose
)
3940 struct intel_super
*super
= st
->sb
;
3941 struct imsm_super
*mpb
= super
->anchor
;
3947 __u32 mpb_size
= sizeof(struct imsm_super
) - sizeof(struct imsm_disk
);
3950 /* 'generation' is incremented everytime the metadata is written */
3951 generation
= __le32_to_cpu(mpb
->generation_num
);
3953 mpb
->generation_num
= __cpu_to_le32(generation
);
3955 /* fix up cases where previous mdadm releases failed to set
3958 if (mpb
->orig_family_num
== 0)
3959 mpb
->orig_family_num
= mpb
->family_num
;
3961 for (d
= super
->disks
; d
; d
= d
->next
) {
3965 mpb
->disk
[d
->index
] = d
->disk
;
3969 for (d
= super
->missing
; d
; d
= d
->next
) {
3970 mpb
->disk
[d
->index
] = d
->disk
;
3973 mpb
->num_disks
= num_disks
;
3974 mpb_size
+= sizeof(struct imsm_disk
) * mpb
->num_disks
;
3976 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3977 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
3978 struct imsm_dev
*dev2
= get_imsm_dev(super
, i
);
3980 imsm_copy_dev(dev
, dev2
);
3981 mpb_size
+= sizeof_imsm_dev(dev
, 0);
3984 mpb_size
+= __le32_to_cpu(mpb
->bbm_log_size
);
3985 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
3987 /* recalculate checksum */
3988 sum
= __gen_imsm_checksum(mpb
);
3989 mpb
->check_sum
= __cpu_to_le32(sum
);
3991 /* write the mpb for disks that compose raid devices */
3992 for (d
= super
->disks
; d
; d
= d
->next
) {
3995 if (store_imsm_mpb(d
->fd
, mpb
))
3996 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
3997 __func__
, d
->major
, d
->minor
, strerror(errno
));
4005 return write_super_imsm_spares(super
, doclose
);
4011 static int create_array(struct supertype
*st
, int dev_idx
)
4014 struct imsm_update_create_array
*u
;
4015 struct intel_super
*super
= st
->sb
;
4016 struct imsm_dev
*dev
= get_imsm_dev(super
, dev_idx
);
4017 struct imsm_map
*map
= get_imsm_map(dev
, 0);
4018 struct disk_info
*inf
;
4019 struct imsm_disk
*disk
;
4022 len
= sizeof(*u
) - sizeof(*dev
) + sizeof_imsm_dev(dev
, 0) +
4023 sizeof(*inf
) * map
->num_members
;
4026 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4031 u
->type
= update_create_array
;
4032 u
->dev_idx
= dev_idx
;
4033 imsm_copy_dev(&u
->dev
, dev
);
4034 inf
= get_disk_info(u
);
4035 for (i
= 0; i
< map
->num_members
; i
++) {
4036 int idx
= get_imsm_disk_idx(dev
, i
, -1);
4038 disk
= get_imsm_disk(super
, idx
);
4039 serialcpy(inf
[i
].serial
, disk
->serial
);
4041 append_metadata_update(st
, u
, len
);
4046 static int mgmt_disk(struct supertype
*st
)
4048 struct intel_super
*super
= st
->sb
;
4050 struct imsm_update_add_remove_disk
*u
;
4052 if (!super
->disk_mgmt_list
)
4058 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4063 u
->type
= update_add_remove_disk
;
4064 append_metadata_update(st
, u
, len
);
4069 static int write_init_super_imsm(struct supertype
*st
)
4071 struct intel_super
*super
= st
->sb
;
4072 int current_vol
= super
->current_vol
;
4074 /* we are done with current_vol reset it to point st at the container */
4075 super
->current_vol
= -1;
4077 if (st
->update_tail
) {
4078 /* queue the recently created array / added disk
4079 * as a metadata update */
4082 /* determine if we are creating a volume or adding a disk */
4083 if (current_vol
< 0) {
4084 /* in the mgmt (add/remove) disk case we are running
4085 * in mdmon context, so don't close fd's
4087 return mgmt_disk(st
);
4089 rv
= create_array(st
, current_vol
);
4094 for (d
= super
->disks
; d
; d
= d
->next
)
4095 Kill(d
->devname
, NULL
, 0, 1, 1);
4096 return write_super_imsm(st
, 1);
4101 static int store_super_imsm(struct supertype
*st
, int fd
)
4103 struct intel_super
*super
= st
->sb
;
4104 struct imsm_super
*mpb
= super
? super
->anchor
: NULL
;
4110 return store_imsm_mpb(fd
, mpb
);
4116 static int imsm_bbm_log_size(struct imsm_super
*mpb
)
4118 return __le32_to_cpu(mpb
->bbm_log_size
);
4122 static int validate_geometry_imsm_container(struct supertype
*st
, int level
,
4123 int layout
, int raiddisks
, int chunk
,
4124 unsigned long long size
, char *dev
,
4125 unsigned long long *freesize
,
4129 unsigned long long ldsize
;
4130 struct intel_super
*super
=NULL
;
4133 if (level
!= LEVEL_CONTAINER
)
4138 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4141 fprintf(stderr
, Name
": imsm: Cannot open %s: %s\n",
4142 dev
, strerror(errno
));
4145 if (!get_dev_size(fd
, dev
, &ldsize
)) {
4150 /* capabilities retrieve could be possible
4151 * note that there is no fd for the disks in array.
4153 super
= alloc_super();
4156 Name
": malloc of %zu failed.\n",
4162 rv
= find_intel_hba_capability(fd
, super
, verbose
? dev
: NULL
);
4166 fd2devname(fd
, str
);
4167 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4168 fd
, str
, super
->orom
, rv
, raiddisks
);
4170 /* no orom/efi or non-intel hba of the disk */
4176 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4178 fprintf(stderr
, Name
": %d exceeds maximum number of"
4179 " platform supported disks: %d\n",
4180 raiddisks
, super
->orom
->tds
);
4186 *freesize
= avail_size_imsm(st
, ldsize
>> 9);
4192 static unsigned long long find_size(struct extent
*e
, int *idx
, int num_extents
)
4194 const unsigned long long base_start
= e
[*idx
].start
;
4195 unsigned long long end
= base_start
+ e
[*idx
].size
;
4198 if (base_start
== end
)
4202 for (i
= *idx
; i
< num_extents
; i
++) {
4203 /* extend overlapping extents */
4204 if (e
[i
].start
>= base_start
&&
4205 e
[i
].start
<= end
) {
4208 if (e
[i
].start
+ e
[i
].size
> end
)
4209 end
= e
[i
].start
+ e
[i
].size
;
4210 } else if (e
[i
].start
> end
) {
4216 return end
- base_start
;
4219 static unsigned long long merge_extents(struct intel_super
*super
, int sum_extents
)
4221 /* build a composite disk with all known extents and generate a new
4222 * 'maxsize' given the "all disks in an array must share a common start
4223 * offset" constraint
4225 struct extent
*e
= calloc(sum_extents
, sizeof(*e
));
4229 unsigned long long pos
;
4230 unsigned long long start
= 0;
4231 unsigned long long maxsize
;
4232 unsigned long reserve
;
4237 /* coalesce and sort all extents. also, check to see if we need to
4238 * reserve space between member arrays
4241 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4244 for (i
= 0; i
< dl
->extent_cnt
; i
++)
4247 qsort(e
, sum_extents
, sizeof(*e
), cmp_extent
);
4252 while (i
< sum_extents
) {
4253 e
[j
].start
= e
[i
].start
;
4254 e
[j
].size
= find_size(e
, &i
, sum_extents
);
4256 if (e
[j
-1].size
== 0)
4265 unsigned long long esize
;
4267 esize
= e
[i
].start
- pos
;
4268 if (esize
>= maxsize
) {
4273 pos
= e
[i
].start
+ e
[i
].size
;
4275 } while (e
[i
-1].size
);
4281 /* FIXME assumes volume at offset 0 is the first volume in a
4284 if (start_extent
> 0)
4285 reserve
= IMSM_RESERVED_SECTORS
; /* gap between raid regions */
4289 if (maxsize
< reserve
)
4292 super
->create_offset
= ~((__u32
) 0);
4293 if (start
+ reserve
> super
->create_offset
)
4294 return 0; /* start overflows create_offset */
4295 super
->create_offset
= start
+ reserve
;
4297 return maxsize
- reserve
;
4300 static int is_raid_level_supported(const struct imsm_orom
*orom
, int level
, int raiddisks
)
4302 if (level
< 0 || level
== 6 || level
== 4)
4305 /* if we have an orom prevent invalid raid levels */
4308 case 0: return imsm_orom_has_raid0(orom
);
4311 return imsm_orom_has_raid1e(orom
);
4312 return imsm_orom_has_raid1(orom
) && raiddisks
== 2;
4313 case 10: return imsm_orom_has_raid10(orom
) && raiddisks
== 4;
4314 case 5: return imsm_orom_has_raid5(orom
) && raiddisks
> 2;
4317 return 1; /* not on an Intel RAID platform so anything goes */
4323 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4325 * validate volume parameters with OROM/EFI capabilities
4328 validate_geometry_imsm_orom(struct intel_super
*super
, int level
, int layout
,
4329 int raiddisks
, int *chunk
, int verbose
)
4334 /* validate container capabilities */
4335 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4337 fprintf(stderr
, Name
": %d exceeds maximum number of"
4338 " platform supported disks: %d\n",
4339 raiddisks
, super
->orom
->tds
);
4343 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4344 if (super
->orom
&& (!is_raid_level_supported(super
->orom
, level
,
4346 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4347 level
, raiddisks
, raiddisks
> 1 ? "s" : "");
4350 if (super
->orom
&& level
!= 1) {
4351 if (chunk
&& (*chunk
== 0 || *chunk
== UnSet
))
4352 *chunk
= imsm_orom_default_chunk(super
->orom
);
4353 else if (chunk
&& !imsm_orom_has_chunk(super
->orom
, *chunk
)) {
4354 pr_vrb(": platform does not support a chunk size of: "
4359 if (layout
!= imsm_level_to_layout(level
)) {
4361 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4362 else if (level
== 10)
4363 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4365 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4372 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4373 * FIX ME add ahci details
4375 static int validate_geometry_imsm_volume(struct supertype
*st
, int level
,
4376 int layout
, int raiddisks
, int *chunk
,
4377 unsigned long long size
, char *dev
,
4378 unsigned long long *freesize
,
4382 struct intel_super
*super
= st
->sb
;
4383 struct imsm_super
*mpb
= super
->anchor
;
4385 unsigned long long pos
= 0;
4386 unsigned long long maxsize
;
4390 /* We must have the container info already read in. */
4394 if (!validate_geometry_imsm_orom(super
, level
, layout
, raiddisks
, chunk
, verbose
)) {
4395 fprintf(stderr
, Name
": RAID gemetry validation failed. "
4396 "Cannot proceed with the action(s).\n");
4400 /* General test: make sure there is space for
4401 * 'raiddisks' device extents of size 'size' at a given
4404 unsigned long long minsize
= size
;
4405 unsigned long long start_offset
= MaxSector
;
4408 minsize
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
4409 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4414 e
= get_extents(super
, dl
);
4417 unsigned long long esize
;
4418 esize
= e
[i
].start
- pos
;
4419 if (esize
>= minsize
)
4421 if (found
&& start_offset
== MaxSector
) {
4424 } else if (found
&& pos
!= start_offset
) {
4428 pos
= e
[i
].start
+ e
[i
].size
;
4430 } while (e
[i
-1].size
);
4435 if (dcnt
< raiddisks
) {
4437 fprintf(stderr
, Name
": imsm: Not enough "
4438 "devices with space for this array "
4446 /* This device must be a member of the set */
4447 if (stat(dev
, &stb
) < 0)
4449 if ((S_IFMT
& stb
.st_mode
) != S_IFBLK
)
4451 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4452 if (dl
->major
== (int)major(stb
.st_rdev
) &&
4453 dl
->minor
== (int)minor(stb
.st_rdev
))
4458 fprintf(stderr
, Name
": %s is not in the "
4459 "same imsm set\n", dev
);
4461 } else if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
) {
4462 /* If a volume is present then the current creation attempt
4463 * cannot incorporate new spares because the orom may not
4464 * understand this configuration (all member disks must be
4465 * members of each array in the container).
4467 fprintf(stderr
, Name
": %s is a spare and a volume"
4468 " is already defined for this container\n", dev
);
4469 fprintf(stderr
, Name
": The option-rom requires all member"
4470 " disks to be a member of all volumes\n");
4474 /* retrieve the largest free space block */
4475 e
= get_extents(super
, dl
);
4480 unsigned long long esize
;
4482 esize
= e
[i
].start
- pos
;
4483 if (esize
>= maxsize
)
4485 pos
= e
[i
].start
+ e
[i
].size
;
4487 } while (e
[i
-1].size
);
4492 fprintf(stderr
, Name
": unable to determine free space for: %s\n",
4496 if (maxsize
< size
) {
4498 fprintf(stderr
, Name
": %s not enough space (%llu < %llu)\n",
4499 dev
, maxsize
, size
);
4503 /* count total number of extents for merge */
4505 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4507 i
+= dl
->extent_cnt
;
4509 maxsize
= merge_extents(super
, i
);
4510 if (maxsize
< size
|| maxsize
== 0) {
4512 fprintf(stderr
, Name
": not enough space after merge (%llu < %llu)\n",
4517 *freesize
= maxsize
;
4522 static int reserve_space(struct supertype
*st
, int raiddisks
,
4523 unsigned long long size
, int chunk
,
4524 unsigned long long *freesize
)
4526 struct intel_super
*super
= st
->sb
;
4527 struct imsm_super
*mpb
= super
->anchor
;
4532 unsigned long long maxsize
;
4533 unsigned long long minsize
;
4537 /* find the largest common start free region of the possible disks */
4541 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4547 /* don't activate new spares if we are orom constrained
4548 * and there is already a volume active in the container
4550 if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
)
4553 e
= get_extents(super
, dl
);
4556 for (i
= 1; e
[i
-1].size
; i
++)
4564 maxsize
= merge_extents(super
, extent_cnt
);
4568 minsize
= chunk
* 2;
4570 if (cnt
< raiddisks
||
4571 (super
->orom
&& used
&& used
!= raiddisks
) ||
4572 maxsize
< minsize
||
4574 fprintf(stderr
, Name
": not enough devices with space to create array.\n");
4575 return 0; /* No enough free spaces large enough */
4587 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4589 dl
->raiddisk
= cnt
++;
4596 static int validate_geometry_imsm(struct supertype
*st
, int level
, int layout
,
4597 int raiddisks
, int *chunk
, unsigned long long size
,
4598 char *dev
, unsigned long long *freesize
,
4606 * if given unused devices create a container
4607 * if given given devices in a container create a member volume
4609 if (level
== LEVEL_CONTAINER
) {
4610 /* Must be a fresh device to add to a container */
4611 return validate_geometry_imsm_container(st
, level
, layout
,
4613 chunk
?*chunk
:0, size
,
4619 if (st
->sb
&& freesize
) {
4620 /* we are being asked to automatically layout a
4621 * new volume based on the current contents of
4622 * the container. If the the parameters can be
4623 * satisfied reserve_space will record the disks,
4624 * start offset, and size of the volume to be
4625 * created. add_to_super and getinfo_super
4626 * detect when autolayout is in progress.
4628 if (!validate_geometry_imsm_orom(st
->sb
, level
, layout
,
4632 return reserve_space(st
, raiddisks
, size
,
4633 chunk
?*chunk
:0, freesize
);
4638 /* creating in a given container */
4639 return validate_geometry_imsm_volume(st
, level
, layout
,
4640 raiddisks
, chunk
, size
,
4641 dev
, freesize
, verbose
);
4644 /* This device needs to be a device in an 'imsm' container */
4645 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4649 Name
": Cannot create this array on device %s\n",
4654 if (errno
!= EBUSY
|| (fd
= open(dev
, O_RDONLY
, 0)) < 0) {
4656 fprintf(stderr
, Name
": Cannot open %s: %s\n",
4657 dev
, strerror(errno
));
4660 /* Well, it is in use by someone, maybe an 'imsm' container. */
4661 cfd
= open_container(fd
);
4665 fprintf(stderr
, Name
": Cannot use %s: It is busy\n",
4669 sra
= sysfs_read(cfd
, 0, GET_VERSION
);
4670 if (sra
&& sra
->array
.major_version
== -1 &&
4671 strcmp(sra
->text_version
, "imsm") == 0)
4675 /* This is a member of a imsm container. Load the container
4676 * and try to create a volume
4678 struct intel_super
*super
;
4680 if (load_super_imsm_all(st
, cfd
, (void **) &super
, NULL
) == 0) {
4682 st
->container_dev
= fd2devnum(cfd
);
4684 return validate_geometry_imsm_volume(st
, level
, layout
,
4692 fprintf(stderr
, Name
": failed container membership check\n");
4698 static void default_geometry_imsm(struct supertype
*st
, int *level
, int *layout
, int *chunk
)
4700 struct intel_super
*super
= st
->sb
;
4702 if (level
&& *level
== UnSet
)
4703 *level
= LEVEL_CONTAINER
;
4705 if (level
&& layout
&& *layout
== UnSet
)
4706 *layout
= imsm_level_to_layout(*level
);
4708 if (chunk
&& (*chunk
== UnSet
|| *chunk
== 0) &&
4709 super
&& super
->orom
)
4710 *chunk
= imsm_orom_default_chunk(super
->orom
);
4713 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
);
4715 static int kill_subarray_imsm(struct supertype
*st
)
4717 /* remove the subarray currently referenced by ->current_vol */
4719 struct intel_dev
**dp
;
4720 struct intel_super
*super
= st
->sb
;
4721 __u8 current_vol
= super
->current_vol
;
4722 struct imsm_super
*mpb
= super
->anchor
;
4724 if (super
->current_vol
< 0)
4726 super
->current_vol
= -1; /* invalidate subarray cursor */
4728 /* block deletions that would change the uuid of active subarrays
4730 * FIXME when immutable ids are available, but note that we'll
4731 * also need to fixup the invalidated/active subarray indexes in
4734 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4737 if (i
< current_vol
)
4739 sprintf(subarray
, "%u", i
);
4740 if (is_subarray_active(subarray
, st
->devname
)) {
4742 Name
": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
4749 if (st
->update_tail
) {
4750 struct imsm_update_kill_array
*u
= malloc(sizeof(*u
));
4754 u
->type
= update_kill_array
;
4755 u
->dev_idx
= current_vol
;
4756 append_metadata_update(st
, u
, sizeof(*u
));
4761 for (dp
= &super
->devlist
; *dp
;)
4762 if ((*dp
)->index
== current_vol
) {
4765 handle_missing(super
, (*dp
)->dev
);
4766 if ((*dp
)->index
> current_vol
)
4771 /* no more raid devices, all active components are now spares,
4772 * but of course failed are still failed
4774 if (--mpb
->num_raid_devs
== 0) {
4777 for (d
= super
->disks
; d
; d
= d
->next
)
4778 if (d
->index
> -2) {
4780 d
->disk
.status
= SPARE_DISK
;
4784 super
->updates_pending
++;
4789 static int update_subarray_imsm(struct supertype
*st
, char *subarray
,
4790 char *update
, struct mddev_ident
*ident
)
4792 /* update the subarray currently referenced by ->current_vol */
4793 struct intel_super
*super
= st
->sb
;
4794 struct imsm_super
*mpb
= super
->anchor
;
4796 if (strcmp(update
, "name") == 0) {
4797 char *name
= ident
->name
;
4801 if (is_subarray_active(subarray
, st
->devname
)) {
4803 Name
": Unable to update name of active subarray\n");
4807 if (!check_name(super
, name
, 0))
4810 vol
= strtoul(subarray
, &ep
, 10);
4811 if (*ep
!= '\0' || vol
>= super
->anchor
->num_raid_devs
)
4814 if (st
->update_tail
) {
4815 struct imsm_update_rename_array
*u
= malloc(sizeof(*u
));
4819 u
->type
= update_rename_array
;
4821 snprintf((char *) u
->name
, MAX_RAID_SERIAL_LEN
, "%s", name
);
4822 append_metadata_update(st
, u
, sizeof(*u
));
4824 struct imsm_dev
*dev
;
4827 dev
= get_imsm_dev(super
, vol
);
4828 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
4829 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4830 dev
= get_imsm_dev(super
, i
);
4831 handle_missing(super
, dev
);
4833 super
->updates_pending
++;
4841 static int is_gen_migration(struct imsm_dev
*dev
)
4843 if (!dev
->vol
.migr_state
)
4846 if (migr_type(dev
) == MIGR_GEN_MIGR
)
4851 #endif /* MDASSEMBLE */
4853 static int is_rebuilding(struct imsm_dev
*dev
)
4855 struct imsm_map
*migr_map
;
4857 if (!dev
->vol
.migr_state
)
4860 if (migr_type(dev
) != MIGR_REBUILD
)
4863 migr_map
= get_imsm_map(dev
, 1);
4865 if (migr_map
->map_state
== IMSM_T_STATE_DEGRADED
)
4871 static void update_recovery_start(struct imsm_dev
*dev
, struct mdinfo
*array
)
4873 struct mdinfo
*rebuild
= NULL
;
4877 if (!is_rebuilding(dev
))
4880 /* Find the rebuild target, but punt on the dual rebuild case */
4881 for (d
= array
->devs
; d
; d
= d
->next
)
4882 if (d
->recovery_start
== 0) {
4889 /* (?) none of the disks are marked with
4890 * IMSM_ORD_REBUILD, so assume they are missing and the
4891 * disk_ord_tbl was not correctly updated
4893 dprintf("%s: failed to locate out-of-sync disk\n", __func__
);
4897 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
4898 rebuild
->recovery_start
= units
* blocks_per_migr_unit(dev
);
4902 static struct mdinfo
*container_content_imsm(struct supertype
*st
, char *subarray
)
4904 /* Given a container loaded by load_super_imsm_all,
4905 * extract information about all the arrays into
4907 * If 'subarray' is given, just extract info about that array.
4909 * For each imsm_dev create an mdinfo, fill it in,
4910 * then look for matching devices in super->disks
4911 * and create appropriate device mdinfo.
4913 struct intel_super
*super
= st
->sb
;
4914 struct imsm_super
*mpb
= super
->anchor
;
4915 struct mdinfo
*rest
= NULL
;
4919 int spare_disks
= 0;
4921 /* check for bad blocks */
4922 if (imsm_bbm_log_size(super
->anchor
))
4925 /* count spare devices, not used in maps
4927 for (d
= super
->disks
; d
; d
= d
->next
)
4931 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4932 struct imsm_dev
*dev
;
4933 struct imsm_map
*map
;
4934 struct imsm_map
*map2
;
4935 struct mdinfo
*this;
4940 (i
!= strtoul(subarray
, &ep
, 10) || *ep
!= '\0'))
4943 dev
= get_imsm_dev(super
, i
);
4944 map
= get_imsm_map(dev
, 0);
4945 map2
= get_imsm_map(dev
, 1);
4947 /* do not publish arrays that are in the middle of an
4948 * unsupported migration
4950 if (dev
->vol
.migr_state
&&
4951 (migr_type(dev
) == MIGR_STATE_CHANGE
)) {
4952 fprintf(stderr
, Name
": cannot assemble volume '%.16s':"
4953 " unsupported migration in progress\n",
4957 /* do not publish arrays that are not support by controller's
4961 chunk
= __le16_to_cpu(map
->blocks_per_strip
) >> 1;
4963 if (!validate_geometry_imsm_orom(super
,
4964 get_imsm_raid_level(map
), /* RAID level */
4965 imsm_level_to_layout(get_imsm_raid_level(map
)),
4966 map
->num_members
, /* raid disks */
4969 fprintf(stderr
, Name
": RAID gemetry validation failed. "
4970 "Cannot proceed with the action(s).\n");
4973 #endif /* MDASSEMBLE */
4974 this = malloc(sizeof(*this));
4976 fprintf(stderr
, Name
": failed to allocate %zu bytes\n",
4980 memset(this, 0, sizeof(*this));
4983 super
->current_vol
= i
;
4984 getinfo_super_imsm_volume(st
, this, NULL
);
4985 for (slot
= 0 ; slot
< map
->num_members
; slot
++) {
4986 unsigned long long recovery_start
;
4987 struct mdinfo
*info_d
;
4994 idx
= get_imsm_disk_idx(dev
, slot
, 0);
4995 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
4996 for (d
= super
->disks
; d
; d
= d
->next
)
4997 if (d
->index
== idx
)
5000 recovery_start
= MaxSector
;
5003 if (d
&& is_failed(&d
->disk
))
5005 if (ord
& IMSM_ORD_REBUILD
)
5009 * if we skip some disks the array will be assmebled degraded;
5010 * reset resync start to avoid a dirty-degraded
5011 * situation when performing the intial sync
5013 * FIXME handle dirty degraded
5015 if ((skip
|| recovery_start
== 0) && !dev
->vol
.dirty
)
5016 this->resync_start
= MaxSector
;
5020 info_d
= calloc(1, sizeof(*info_d
));
5022 fprintf(stderr
, Name
": failed to allocate disk"
5023 " for volume %.16s\n", dev
->volume
);
5024 info_d
= this->devs
;
5026 struct mdinfo
*d
= info_d
->next
;
5035 info_d
->next
= this->devs
;
5036 this->devs
= info_d
;
5038 info_d
->disk
.number
= d
->index
;
5039 info_d
->disk
.major
= d
->major
;
5040 info_d
->disk
.minor
= d
->minor
;
5041 info_d
->disk
.raid_disk
= slot
;
5042 info_d
->recovery_start
= recovery_start
;
5044 if (slot
< map2
->num_members
)
5045 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5047 this->array
.spare_disks
++;
5049 if (slot
< map
->num_members
)
5050 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5052 this->array
.spare_disks
++;
5054 if (info_d
->recovery_start
== MaxSector
)
5055 this->array
.working_disks
++;
5057 info_d
->events
= __le32_to_cpu(mpb
->generation_num
);
5058 info_d
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5059 info_d
->component_size
= __le32_to_cpu(map
->blocks_per_member
);
5061 /* now that the disk list is up-to-date fixup recovery_start */
5062 update_recovery_start(dev
, this);
5063 this->array
.spare_disks
+= spare_disks
;
5067 /* if array has bad blocks, set suitable bit in array status */
5069 rest
->array
.state
|= (1<<MD_SB_BBM_ERRORS
);
5075 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
)
5077 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5080 return map
->map_state
== IMSM_T_STATE_UNINITIALIZED
?
5081 IMSM_T_STATE_UNINITIALIZED
: IMSM_T_STATE_NORMAL
;
5083 switch (get_imsm_raid_level(map
)) {
5085 return IMSM_T_STATE_FAILED
;
5088 if (failed
< map
->num_members
)
5089 return IMSM_T_STATE_DEGRADED
;
5091 return IMSM_T_STATE_FAILED
;
5096 * check to see if any mirrors have failed, otherwise we
5097 * are degraded. Even numbered slots are mirrored on
5101 /* gcc -Os complains that this is unused */
5102 int insync
= insync
;
5104 for (i
= 0; i
< map
->num_members
; i
++) {
5105 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
5106 int idx
= ord_to_idx(ord
);
5107 struct imsm_disk
*disk
;
5109 /* reset the potential in-sync count on even-numbered
5110 * slots. num_copies is always 2 for imsm raid10
5115 disk
= get_imsm_disk(super
, idx
);
5116 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5119 /* no in-sync disks left in this mirror the
5123 return IMSM_T_STATE_FAILED
;
5126 return IMSM_T_STATE_DEGRADED
;
5130 return IMSM_T_STATE_DEGRADED
;
5132 return IMSM_T_STATE_FAILED
;
5138 return map
->map_state
;
5141 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
)
5145 struct imsm_disk
*disk
;
5146 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5147 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
5151 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5152 * disks that are being rebuilt. New failures are recorded to
5153 * map[0]. So we look through all the disks we started with and
5154 * see if any failures are still present, or if any new ones
5157 * FIXME add support for online capacity expansion and
5158 * raid-level-migration
5160 for (i
= 0; i
< prev
->num_members
; i
++) {
5161 ord
= __le32_to_cpu(prev
->disk_ord_tbl
[i
]);
5162 ord
|= __le32_to_cpu(map
->disk_ord_tbl
[i
]);
5163 idx
= ord_to_idx(ord
);
5165 disk
= get_imsm_disk(super
, idx
);
5166 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5174 static int imsm_open_new(struct supertype
*c
, struct active_array
*a
,
5177 struct intel_super
*super
= c
->sb
;
5178 struct imsm_super
*mpb
= super
->anchor
;
5180 if (atoi(inst
) >= mpb
->num_raid_devs
) {
5181 fprintf(stderr
, "%s: subarry index %d, out of range\n",
5182 __func__
, atoi(inst
));
5186 dprintf("imsm: open_new %s\n", inst
);
5187 a
->info
.container_member
= atoi(inst
);
5191 static int is_resyncing(struct imsm_dev
*dev
)
5193 struct imsm_map
*migr_map
;
5195 if (!dev
->vol
.migr_state
)
5198 if (migr_type(dev
) == MIGR_INIT
||
5199 migr_type(dev
) == MIGR_REPAIR
)
5202 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5205 migr_map
= get_imsm_map(dev
, 1);
5207 if ((migr_map
->map_state
== IMSM_T_STATE_NORMAL
) &&
5208 (dev
->vol
.migr_type
!= MIGR_GEN_MIGR
))
5214 /* return true if we recorded new information */
5215 static int mark_failure(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5219 struct imsm_map
*map
;
5221 /* new failures are always set in map[0] */
5222 map
= get_imsm_map(dev
, 0);
5224 slot
= get_imsm_disk_slot(map
, idx
);
5228 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
5229 if (is_failed(disk
) && (ord
& IMSM_ORD_REBUILD
))
5232 disk
->status
|= FAILED_DISK
;
5233 set_imsm_ord_tbl_ent(map
, slot
, idx
| IMSM_ORD_REBUILD
);
5234 if (map
->failed_disk_num
== 0xff)
5235 map
->failed_disk_num
= slot
;
5239 static void mark_missing(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5241 mark_failure(dev
, disk
, idx
);
5243 if (disk
->scsi_id
== __cpu_to_le32(~(__u32
)0))
5246 disk
->scsi_id
= __cpu_to_le32(~(__u32
)0);
5247 memmove(&disk
->serial
[0], &disk
->serial
[1], MAX_RAID_SERIAL_LEN
- 1);
5250 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
)
5256 if (!super
->missing
)
5258 failed
= imsm_count_failed(super
, dev
);
5259 map_state
= imsm_check_degraded(super
, dev
, failed
);
5261 dprintf("imsm: mark missing\n");
5262 end_migration(dev
, map_state
);
5263 for (dl
= super
->missing
; dl
; dl
= dl
->next
)
5264 mark_missing(dev
, &dl
->disk
, dl
->index
);
5265 super
->updates_pending
++;
5268 static unsigned long long imsm_set_array_size(struct imsm_dev
*dev
)
5270 int used_disks
= imsm_num_data_members(dev
, 0);
5271 unsigned long long array_blocks
;
5272 struct imsm_map
*map
;
5274 if (used_disks
== 0) {
5275 /* when problems occures
5276 * return current array_blocks value
5278 array_blocks
= __le32_to_cpu(dev
->size_high
);
5279 array_blocks
= array_blocks
<< 32;
5280 array_blocks
+= __le32_to_cpu(dev
->size_low
);
5282 return array_blocks
;
5285 /* set array size in metadata
5287 map
= get_imsm_map(dev
, 0);
5288 array_blocks
= map
->blocks_per_member
* used_disks
;
5290 /* round array size down to closest MB
5292 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
5293 dev
->size_low
= __cpu_to_le32((__u32
)array_blocks
);
5294 dev
->size_high
= __cpu_to_le32((__u32
)(array_blocks
>> 32));
5296 return array_blocks
;
5299 static void imsm_set_disk(struct active_array
*a
, int n
, int state
);
5301 static void imsm_progress_container_reshape(struct intel_super
*super
)
5303 /* if no device has a migr_state, but some device has a
5304 * different number of members than the previous device, start
5305 * changing the number of devices in this device to match
5308 struct imsm_super
*mpb
= super
->anchor
;
5309 int prev_disks
= -1;
5313 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5314 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
5315 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5316 struct imsm_map
*map2
;
5317 int prev_num_members
;
5319 if (dev
->vol
.migr_state
)
5322 if (prev_disks
== -1)
5323 prev_disks
= map
->num_members
;
5324 if (prev_disks
== map
->num_members
)
5327 /* OK, this array needs to enter reshape mode.
5328 * i.e it needs a migr_state
5331 copy_map_size
= sizeof_imsm_map(map
);
5332 prev_num_members
= map
->num_members
;
5333 map
->num_members
= prev_disks
;
5334 dev
->vol
.migr_state
= 1;
5335 dev
->vol
.curr_migr_unit
= 0;
5336 dev
->vol
.migr_type
= MIGR_GEN_MIGR
;
5337 for (i
= prev_num_members
;
5338 i
< map
->num_members
; i
++)
5339 set_imsm_ord_tbl_ent(map
, i
, i
);
5340 map2
= get_imsm_map(dev
, 1);
5341 /* Copy the current map */
5342 memcpy(map2
, map
, copy_map_size
);
5343 map2
->num_members
= prev_num_members
;
5345 imsm_set_array_size(dev
);
5346 super
->updates_pending
++;
5350 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5351 * states are handled in imsm_set_disk() with one exception, when a
5352 * resync is stopped due to a new failure this routine will set the
5353 * 'degraded' state for the array.
5355 static int imsm_set_array_state(struct active_array
*a
, int consistent
)
5357 int inst
= a
->info
.container_member
;
5358 struct intel_super
*super
= a
->container
->sb
;
5359 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5360 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5361 int failed
= imsm_count_failed(super
, dev
);
5362 __u8 map_state
= imsm_check_degraded(super
, dev
, failed
);
5363 __u32 blocks_per_unit
;
5365 if (dev
->vol
.migr_state
&&
5366 dev
->vol
.migr_type
== MIGR_GEN_MIGR
) {
5367 /* array state change is blocked due to reshape action
5369 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5370 * - finish the reshape (if last_checkpoint is big and action != reshape)
5371 * - update curr_migr_unit
5373 if (a
->curr_action
== reshape
) {
5374 /* still reshaping, maybe update curr_migr_unit */
5375 goto mark_checkpoint
;
5377 if (a
->last_checkpoint
== 0 && a
->prev_action
== reshape
) {
5378 /* for some reason we aborted the reshape.
5381 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
5382 dev
->vol
.migr_state
= 0;
5383 dev
->vol
.migr_type
= 0;
5384 dev
->vol
.curr_migr_unit
= 0;
5385 memcpy(map
, map2
, sizeof_imsm_map(map2
));
5386 super
->updates_pending
++;
5388 if (a
->last_checkpoint
>= a
->info
.component_size
) {
5389 unsigned long long array_blocks
;
5393 used_disks
= imsm_num_data_members(dev
, 0);
5394 if (used_disks
> 0) {
5396 map
->blocks_per_member
*
5398 /* round array size down to closest MB
5400 array_blocks
= (array_blocks
5401 >> SECT_PER_MB_SHIFT
)
5402 << SECT_PER_MB_SHIFT
;
5403 a
->info
.custom_array_size
= array_blocks
;
5404 /* encourage manager to update array
5408 a
->check_reshape
= 1;
5410 /* finalize online capacity expansion/reshape */
5411 for (mdi
= a
->info
.devs
; mdi
; mdi
= mdi
->next
)
5413 mdi
->disk
.raid_disk
,
5416 imsm_progress_container_reshape(super
);
5421 /* before we activate this array handle any missing disks */
5422 if (consistent
== 2)
5423 handle_missing(super
, dev
);
5425 if (consistent
== 2 &&
5426 (!is_resync_complete(&a
->info
) ||
5427 map_state
!= IMSM_T_STATE_NORMAL
||
5428 dev
->vol
.migr_state
))
5431 if (is_resync_complete(&a
->info
)) {
5432 /* complete intialization / resync,
5433 * recovery and interrupted recovery is completed in
5436 if (is_resyncing(dev
)) {
5437 dprintf("imsm: mark resync done\n");
5438 end_migration(dev
, map_state
);
5439 super
->updates_pending
++;
5440 a
->last_checkpoint
= 0;
5442 } else if (!is_resyncing(dev
) && !failed
) {
5443 /* mark the start of the init process if nothing is failed */
5444 dprintf("imsm: mark resync start\n");
5445 if (map
->map_state
== IMSM_T_STATE_UNINITIALIZED
)
5446 migrate(dev
, IMSM_T_STATE_NORMAL
, MIGR_INIT
);
5448 migrate(dev
, IMSM_T_STATE_NORMAL
, MIGR_REPAIR
);
5449 super
->updates_pending
++;
5453 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5454 blocks_per_unit
= blocks_per_migr_unit(dev
);
5455 if (blocks_per_unit
) {
5459 units
= a
->last_checkpoint
/ blocks_per_unit
;
5462 /* check that we did not overflow 32-bits, and that
5463 * curr_migr_unit needs updating
5465 if (units32
== units
&&
5466 __le32_to_cpu(dev
->vol
.curr_migr_unit
) != units32
) {
5467 dprintf("imsm: mark checkpoint (%u)\n", units32
);
5468 dev
->vol
.curr_migr_unit
= __cpu_to_le32(units32
);
5469 super
->updates_pending
++;
5473 /* mark dirty / clean */
5474 if (dev
->vol
.dirty
!= !consistent
) {
5475 dprintf("imsm: mark '%s'\n", consistent
? "clean" : "dirty");
5480 super
->updates_pending
++;
5486 static void imsm_set_disk(struct active_array
*a
, int n
, int state
)
5488 int inst
= a
->info
.container_member
;
5489 struct intel_super
*super
= a
->container
->sb
;
5490 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5491 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5492 struct imsm_disk
*disk
;
5497 if (n
> map
->num_members
)
5498 fprintf(stderr
, "imsm: set_disk %d out of range 0..%d\n",
5499 n
, map
->num_members
- 1);
5504 dprintf("imsm: set_disk %d:%x\n", n
, state
);
5506 ord
= get_imsm_ord_tbl_ent(dev
, n
, -1);
5507 disk
= get_imsm_disk(super
, ord_to_idx(ord
));
5509 /* check for new failures */
5510 if (state
& DS_FAULTY
) {
5511 if (mark_failure(dev
, disk
, ord_to_idx(ord
)))
5512 super
->updates_pending
++;
5515 /* check if in_sync */
5516 if (state
& DS_INSYNC
&& ord
& IMSM_ORD_REBUILD
&& is_rebuilding(dev
)) {
5517 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
5519 set_imsm_ord_tbl_ent(migr_map
, n
, ord_to_idx(ord
));
5520 super
->updates_pending
++;
5523 failed
= imsm_count_failed(super
, dev
);
5524 map_state
= imsm_check_degraded(super
, dev
, failed
);
5526 /* check if recovery complete, newly degraded, or failed */
5527 if (map_state
== IMSM_T_STATE_NORMAL
&& is_rebuilding(dev
)) {
5528 end_migration(dev
, map_state
);
5529 map
= get_imsm_map(dev
, 0);
5530 map
->failed_disk_num
= ~0;
5531 super
->updates_pending
++;
5532 a
->last_checkpoint
= 0;
5533 } else if (map_state
== IMSM_T_STATE_DEGRADED
&&
5534 map
->map_state
!= map_state
&&
5535 !dev
->vol
.migr_state
) {
5536 dprintf("imsm: mark degraded\n");
5537 map
->map_state
= map_state
;
5538 super
->updates_pending
++;
5539 a
->last_checkpoint
= 0;
5540 } else if (map_state
== IMSM_T_STATE_FAILED
&&
5541 map
->map_state
!= map_state
) {
5542 dprintf("imsm: mark failed\n");
5543 end_migration(dev
, map_state
);
5544 super
->updates_pending
++;
5545 a
->last_checkpoint
= 0;
5546 } else if (is_gen_migration(dev
)) {
5547 dprintf("imsm: Detected General Migration in state: ");
5548 if (map_state
== IMSM_T_STATE_NORMAL
) {
5549 end_migration(dev
, map_state
);
5550 map
= get_imsm_map(dev
, 0);
5551 map
->failed_disk_num
= ~0;
5552 dprintf("normal\n");
5554 if (map_state
== IMSM_T_STATE_DEGRADED
) {
5555 printf("degraded\n");
5556 end_migration(dev
, map_state
);
5558 dprintf("failed\n");
5560 map
->map_state
= map_state
;
5562 super
->updates_pending
++;
5566 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
)
5569 __u32 mpb_size
= __le32_to_cpu(mpb
->mpb_size
);
5570 unsigned long long dsize
;
5571 unsigned long long sectors
;
5573 get_dev_size(fd
, NULL
, &dsize
);
5575 if (mpb_size
> 512) {
5576 /* -1 to account for anchor */
5577 sectors
= mpb_sectors(mpb
) - 1;
5579 /* write the extended mpb to the sectors preceeding the anchor */
5580 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0)
5583 if ((unsigned long long)write(fd
, buf
+ 512, 512 * sectors
)
5588 /* first block is stored on second to last sector of the disk */
5589 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0)
5592 if (write(fd
, buf
, 512) != 512)
5598 static void imsm_sync_metadata(struct supertype
*container
)
5600 struct intel_super
*super
= container
->sb
;
5602 dprintf("sync metadata: %d\n", super
->updates_pending
);
5603 if (!super
->updates_pending
)
5606 write_super_imsm(container
, 0);
5608 super
->updates_pending
= 0;
5611 static struct dl
*imsm_readd(struct intel_super
*super
, int idx
, struct active_array
*a
)
5613 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
5614 int i
= get_imsm_disk_idx(dev
, idx
, -1);
5617 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
5621 if (dl
&& is_failed(&dl
->disk
))
5625 dprintf("%s: found %x:%x\n", __func__
, dl
->major
, dl
->minor
);
5630 static struct dl
*imsm_add_spare(struct intel_super
*super
, int slot
,
5631 struct active_array
*a
, int activate_new
,
5632 struct mdinfo
*additional_test_list
)
5634 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
5635 int idx
= get_imsm_disk_idx(dev
, slot
, -1);
5636 struct imsm_super
*mpb
= super
->anchor
;
5637 struct imsm_map
*map
;
5638 unsigned long long pos
;
5643 __u32 array_start
= 0;
5644 __u32 array_end
= 0;
5646 struct mdinfo
*test_list
;
5648 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
5649 /* If in this array, skip */
5650 for (d
= a
->info
.devs
; d
; d
= d
->next
)
5651 if (d
->state_fd
>= 0 &&
5652 d
->disk
.major
== dl
->major
&&
5653 d
->disk
.minor
== dl
->minor
) {
5654 dprintf("%x:%x already in array\n",
5655 dl
->major
, dl
->minor
);
5660 test_list
= additional_test_list
;
5662 if (test_list
->disk
.major
== dl
->major
&&
5663 test_list
->disk
.minor
== dl
->minor
) {
5664 dprintf("%x:%x already in additional test list\n",
5665 dl
->major
, dl
->minor
);
5668 test_list
= test_list
->next
;
5673 /* skip in use or failed drives */
5674 if (is_failed(&dl
->disk
) || idx
== dl
->index
||
5676 dprintf("%x:%x status (failed: %d index: %d)\n",
5677 dl
->major
, dl
->minor
, is_failed(&dl
->disk
), idx
);
5681 /* skip pure spares when we are looking for partially
5682 * assimilated drives
5684 if (dl
->index
== -1 && !activate_new
)
5687 /* Does this unused device have the requisite free space?
5688 * It needs to be able to cover all member volumes
5690 ex
= get_extents(super
, dl
);
5692 dprintf("cannot get extents\n");
5695 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5696 dev
= get_imsm_dev(super
, i
);
5697 map
= get_imsm_map(dev
, 0);
5699 /* check if this disk is already a member of
5702 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
5708 array_start
= __le32_to_cpu(map
->pba_of_lba0
);
5709 array_end
= array_start
+
5710 __le32_to_cpu(map
->blocks_per_member
) - 1;
5713 /* check that we can start at pba_of_lba0 with
5714 * blocks_per_member of space
5716 if (array_start
>= pos
&& array_end
< ex
[j
].start
) {
5720 pos
= ex
[j
].start
+ ex
[j
].size
;
5722 } while (ex
[j
-1].size
);
5729 if (i
< mpb
->num_raid_devs
) {
5730 dprintf("%x:%x does not have %u to %u available\n",
5731 dl
->major
, dl
->minor
, array_start
, array_end
);
5742 static int imsm_rebuild_allowed(struct supertype
*cont
, int dev_idx
, int failed
)
5744 struct imsm_dev
*dev2
;
5745 struct imsm_map
*map
;
5751 dev2
= get_imsm_dev(cont
->sb
, dev_idx
);
5753 state
= imsm_check_degraded(cont
->sb
, dev2
, failed
);
5754 if (state
== IMSM_T_STATE_FAILED
) {
5755 map
= get_imsm_map(dev2
, 0);
5758 for (slot
= 0; slot
< map
->num_members
; slot
++) {
5760 * Check if failed disks are deleted from intel
5761 * disk list or are marked to be deleted
5763 idx
= get_imsm_disk_idx(dev2
, slot
, -1);
5764 idisk
= get_imsm_dl_disk(cont
->sb
, idx
);
5766 * Do not rebuild the array if failed disks
5767 * from failed sub-array are not removed from
5771 is_failed(&idisk
->disk
) &&
5772 (idisk
->action
!= DISK_REMOVE
))
5780 static struct mdinfo
*imsm_activate_spare(struct active_array
*a
,
5781 struct metadata_update
**updates
)
5784 * Find a device with unused free space and use it to replace a
5785 * failed/vacant region in an array. We replace failed regions one a
5786 * array at a time. The result is that a new spare disk will be added
5787 * to the first failed array and after the monitor has finished
5788 * propagating failures the remainder will be consumed.
5790 * FIXME add a capability for mdmon to request spares from another
5794 struct intel_super
*super
= a
->container
->sb
;
5795 int inst
= a
->info
.container_member
;
5796 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5797 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5798 int failed
= a
->info
.array
.raid_disks
;
5799 struct mdinfo
*rv
= NULL
;
5802 struct metadata_update
*mu
;
5804 struct imsm_update_activate_spare
*u
;
5809 for (d
= a
->info
.devs
; d
; d
= d
->next
) {
5810 if ((d
->curr_state
& DS_FAULTY
) &&
5812 /* wait for Removal to happen */
5814 if (d
->state_fd
>= 0)
5818 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
5819 inst
, failed
, a
->info
.array
.raid_disks
, a
->info
.array
.level
);
5821 if (dev
->vol
.migr_state
&&
5822 dev
->vol
.migr_type
== MIGR_GEN_MIGR
)
5823 /* No repair during migration */
5826 if (a
->info
.array
.level
== 4)
5827 /* No repair for takeovered array
5828 * imsm doesn't support raid4
5832 if (imsm_check_degraded(super
, dev
, failed
) != IMSM_T_STATE_DEGRADED
)
5836 * If there are any failed disks check state of the other volume.
5837 * Block rebuild if the another one is failed until failed disks
5838 * are removed from container.
5841 dprintf("found failed disks in %s, check if there another"
5842 "failed sub-array.\n",
5844 /* check if states of the other volumes allow for rebuild */
5845 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
5847 allowed
= imsm_rebuild_allowed(a
->container
,
5855 /* For each slot, if it is not working, find a spare */
5856 for (i
= 0; i
< a
->info
.array
.raid_disks
; i
++) {
5857 for (d
= a
->info
.devs
; d
; d
= d
->next
)
5858 if (d
->disk
.raid_disk
== i
)
5860 dprintf("found %d: %p %x\n", i
, d
, d
?d
->curr_state
:0);
5861 if (d
&& (d
->state_fd
>= 0))
5865 * OK, this device needs recovery. Try to re-add the
5866 * previous occupant of this slot, if this fails see if
5867 * we can continue the assimilation of a spare that was
5868 * partially assimilated, finally try to activate a new
5871 dl
= imsm_readd(super
, i
, a
);
5873 dl
= imsm_add_spare(super
, i
, a
, 0, NULL
);
5875 dl
= imsm_add_spare(super
, i
, a
, 1, NULL
);
5879 /* found a usable disk with enough space */
5880 di
= malloc(sizeof(*di
));
5883 memset(di
, 0, sizeof(*di
));
5885 /* dl->index will be -1 in the case we are activating a
5886 * pristine spare. imsm_process_update() will create a
5887 * new index in this case. Once a disk is found to be
5888 * failed in all member arrays it is kicked from the
5891 di
->disk
.number
= dl
->index
;
5893 /* (ab)use di->devs to store a pointer to the device
5896 di
->devs
= (struct mdinfo
*) dl
;
5898 di
->disk
.raid_disk
= i
;
5899 di
->disk
.major
= dl
->major
;
5900 di
->disk
.minor
= dl
->minor
;
5902 di
->recovery_start
= 0;
5903 di
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5904 di
->component_size
= a
->info
.component_size
;
5905 di
->container_member
= inst
;
5906 super
->random
= random32();
5910 dprintf("%x:%x to be %d at %llu\n", dl
->major
, dl
->minor
,
5911 i
, di
->data_offset
);
5917 /* No spares found */
5919 /* Now 'rv' has a list of devices to return.
5920 * Create a metadata_update record to update the
5921 * disk_ord_tbl for the array
5923 mu
= malloc(sizeof(*mu
));
5925 mu
->buf
= malloc(sizeof(struct imsm_update_activate_spare
) * num_spares
);
5926 if (mu
->buf
== NULL
) {
5933 struct mdinfo
*n
= rv
->next
;
5942 mu
->space_list
= NULL
;
5943 mu
->len
= sizeof(struct imsm_update_activate_spare
) * num_spares
;
5944 mu
->next
= *updates
;
5945 u
= (struct imsm_update_activate_spare
*) mu
->buf
;
5947 for (di
= rv
; di
; di
= di
->next
) {
5948 u
->type
= update_activate_spare
;
5949 u
->dl
= (struct dl
*) di
->devs
;
5951 u
->slot
= di
->disk
.raid_disk
;
5962 static int disks_overlap(struct intel_super
*super
, int idx
, struct imsm_update_create_array
*u
)
5964 struct imsm_dev
*dev
= get_imsm_dev(super
, idx
);
5965 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5966 struct imsm_map
*new_map
= get_imsm_map(&u
->dev
, 0);
5967 struct disk_info
*inf
= get_disk_info(u
);
5968 struct imsm_disk
*disk
;
5972 for (i
= 0; i
< map
->num_members
; i
++) {
5973 disk
= get_imsm_disk(super
, get_imsm_disk_idx(dev
, i
, -1));
5974 for (j
= 0; j
< new_map
->num_members
; j
++)
5975 if (serialcmp(disk
->serial
, inf
[j
].serial
) == 0)
5983 static struct dl
*get_disk_super(struct intel_super
*super
, int major
, int minor
)
5985 struct dl
*dl
= NULL
;
5986 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
5987 if ((dl
->major
== major
) && (dl
->minor
== minor
))
5992 static int remove_disk_super(struct intel_super
*super
, int major
, int minor
)
5994 struct dl
*prev
= NULL
;
5998 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
5999 if ((dl
->major
== major
) && (dl
->minor
== minor
)) {
6002 prev
->next
= dl
->next
;
6004 super
->disks
= dl
->next
;
6006 __free_imsm_disk(dl
);
6007 dprintf("%s: removed %x:%x\n",
6008 __func__
, major
, minor
);
6016 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
);
6018 static int add_remove_disk_update(struct intel_super
*super
)
6020 int check_degraded
= 0;
6021 struct dl
*disk
= NULL
;
6022 /* add/remove some spares to/from the metadata/contrainer */
6023 while (super
->disk_mgmt_list
) {
6024 struct dl
*disk_cfg
;
6026 disk_cfg
= super
->disk_mgmt_list
;
6027 super
->disk_mgmt_list
= disk_cfg
->next
;
6028 disk_cfg
->next
= NULL
;
6030 if (disk_cfg
->action
== DISK_ADD
) {
6031 disk_cfg
->next
= super
->disks
;
6032 super
->disks
= disk_cfg
;
6034 dprintf("%s: added %x:%x\n",
6035 __func__
, disk_cfg
->major
,
6037 } else if (disk_cfg
->action
== DISK_REMOVE
) {
6038 dprintf("Disk remove action processed: %x.%x\n",
6039 disk_cfg
->major
, disk_cfg
->minor
);
6040 disk
= get_disk_super(super
,
6044 /* store action status */
6045 disk
->action
= DISK_REMOVE
;
6046 /* remove spare disks only */
6047 if (disk
->index
== -1) {
6048 remove_disk_super(super
,
6053 /* release allocate disk structure */
6054 __free_imsm_disk(disk_cfg
);
6057 return check_degraded
;
6060 static int apply_reshape_container_disks_update(struct imsm_update_reshape
*u
,
6061 struct intel_super
*super
,
6064 struct dl
*new_disk
;
6065 struct intel_dev
*id
;
6067 int delta_disks
= u
->new_raid_disks
- u
->old_raid_disks
;
6068 int disk_count
= u
->old_raid_disks
;
6069 void **tofree
= NULL
;
6070 int devices_to_reshape
= 1;
6071 struct imsm_super
*mpb
= super
->anchor
;
6073 unsigned int dev_id
;
6075 dprintf("imsm: apply_reshape_container_disks_update()\n");
6077 /* enable spares to use in array */
6078 for (i
= 0; i
< delta_disks
; i
++) {
6079 new_disk
= get_disk_super(super
,
6080 major(u
->new_disks
[i
]),
6081 minor(u
->new_disks
[i
]));
6082 dprintf("imsm: new disk for reshape is: %i:%i "
6083 "(%p, index = %i)\n",
6084 major(u
->new_disks
[i
]), minor(u
->new_disks
[i
]),
6085 new_disk
, new_disk
->index
);
6086 if ((new_disk
== NULL
) ||
6087 ((new_disk
->index
>= 0) &&
6088 (new_disk
->index
< u
->old_raid_disks
)))
6089 goto update_reshape_exit
;
6090 new_disk
->index
= disk_count
++;
6091 /* slot to fill in autolayout
6093 new_disk
->raiddisk
= new_disk
->index
;
6094 new_disk
->disk
.status
|=
6096 new_disk
->disk
.status
&= ~SPARE_DISK
;
6099 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6100 mpb
->num_raid_devs
);
6101 /* manage changes in volume
6103 for (dev_id
= 0; dev_id
< mpb
->num_raid_devs
; dev_id
++) {
6104 void **sp
= *space_list
;
6105 struct imsm_dev
*newdev
;
6106 struct imsm_map
*newmap
, *oldmap
;
6108 for (id
= super
->devlist
; id
; id
= id
->next
) {
6109 if (id
->index
== dev_id
)
6118 /* Copy the dev, but not (all of) the map */
6119 memcpy(newdev
, id
->dev
, sizeof(*newdev
));
6120 oldmap
= get_imsm_map(id
->dev
, 0);
6121 newmap
= get_imsm_map(newdev
, 0);
6122 /* Copy the current map */
6123 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6124 /* update one device only
6126 if (devices_to_reshape
) {
6127 dprintf("imsm: modifying subdev: %i\n",
6129 devices_to_reshape
--;
6130 newdev
->vol
.migr_state
= 1;
6131 newdev
->vol
.curr_migr_unit
= 0;
6132 newdev
->vol
.migr_type
= MIGR_GEN_MIGR
;
6133 newmap
->num_members
= u
->new_raid_disks
;
6134 for (i
= 0; i
< delta_disks
; i
++) {
6135 set_imsm_ord_tbl_ent(newmap
,
6136 u
->old_raid_disks
+ i
,
6137 u
->old_raid_disks
+ i
);
6139 /* New map is correct, now need to save old map
6141 newmap
= get_imsm_map(newdev
, 1);
6142 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6144 imsm_set_array_size(newdev
);
6147 sp
= (void **)id
->dev
;
6153 *space_list
= tofree
;
6156 update_reshape_exit
:
6161 static int apply_takeover_update(struct imsm_update_takeover
*u
,
6162 struct intel_super
*super
,
6165 struct imsm_dev
*dev
= NULL
;
6166 struct intel_dev
*dv
;
6167 struct imsm_dev
*dev_new
;
6168 struct imsm_map
*map
;
6172 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
6173 if (dv
->index
== (unsigned int)u
->subarray
) {
6181 map
= get_imsm_map(dev
, 0);
6183 if (u
->direction
== R10_TO_R0
) {
6184 /* Number of failed disks must be half of initial disk number */
6185 if (imsm_count_failed(super
, dev
) != (map
->num_members
/ 2))
6188 /* iterate through devices to mark removed disks as spare */
6189 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6190 if (dm
->disk
.status
& FAILED_DISK
) {
6191 int idx
= dm
->index
;
6192 /* update indexes on the disk list */
6193 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6194 the index values will end up being correct.... NB */
6195 for (du
= super
->disks
; du
; du
= du
->next
)
6196 if (du
->index
> idx
)
6198 /* mark as spare disk */
6199 dm
->disk
.status
= SPARE_DISK
;
6204 map
->num_members
= map
->num_members
/ 2;
6205 map
->map_state
= IMSM_T_STATE_NORMAL
;
6206 map
->num_domains
= 1;
6207 map
->raid_level
= 0;
6208 map
->failed_disk_num
= -1;
6211 if (u
->direction
== R0_TO_R10
) {
6213 /* update slots in current disk list */
6214 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6218 /* create new *missing* disks */
6219 for (i
= 0; i
< map
->num_members
; i
++) {
6220 space
= *space_list
;
6223 *space_list
= *space
;
6225 memcpy(du
, super
->disks
, sizeof(*du
));
6229 du
->index
= (i
* 2) + 1;
6230 sprintf((char *)du
->disk
.serial
,
6231 " MISSING_%d", du
->index
);
6232 sprintf((char *)du
->serial
,
6233 "MISSING_%d", du
->index
);
6234 du
->next
= super
->missing
;
6235 super
->missing
= du
;
6237 /* create new dev and map */
6238 space
= *space_list
;
6241 *space_list
= *space
;
6242 dev_new
= (void *)space
;
6243 memcpy(dev_new
, dev
, sizeof(*dev
));
6244 /* update new map */
6245 map
= get_imsm_map(dev_new
, 0);
6246 map
->num_members
= map
->num_members
* 2;
6247 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6248 map
->num_domains
= 2;
6249 map
->raid_level
= 1;
6250 /* replace dev<->dev_new */
6253 /* update disk order table */
6254 for (du
= super
->disks
; du
; du
= du
->next
)
6256 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6257 for (du
= super
->missing
; du
; du
= du
->next
)
6258 if (du
->index
>= 0) {
6259 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6260 mark_missing(dev_new
, &du
->disk
, du
->index
);
6266 static void imsm_process_update(struct supertype
*st
,
6267 struct metadata_update
*update
)
6270 * crack open the metadata_update envelope to find the update record
6271 * update can be one of:
6272 * update_reshape_container_disks - all the arrays in the container
6273 * are being reshaped to have more devices. We need to mark
6274 * the arrays for general migration and convert selected spares
6275 * into active devices.
6276 * update_activate_spare - a spare device has replaced a failed
6277 * device in an array, update the disk_ord_tbl. If this disk is
6278 * present in all member arrays then also clear the SPARE_DISK
6280 * update_create_array
6282 * update_rename_array
6283 * update_add_remove_disk
6285 struct intel_super
*super
= st
->sb
;
6286 struct imsm_super
*mpb
;
6287 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6289 /* update requires a larger buf but the allocation failed */
6290 if (super
->next_len
&& !super
->next_buf
) {
6291 super
->next_len
= 0;
6295 if (super
->next_buf
) {
6296 memcpy(super
->next_buf
, super
->buf
, super
->len
);
6298 super
->len
= super
->next_len
;
6299 super
->buf
= super
->next_buf
;
6301 super
->next_len
= 0;
6302 super
->next_buf
= NULL
;
6305 mpb
= super
->anchor
;
6308 case update_takeover
: {
6309 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6310 if (apply_takeover_update(u
, super
, &update
->space_list
)) {
6311 imsm_update_version_info(super
);
6312 super
->updates_pending
++;
6317 case update_reshape_container_disks
: {
6318 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6319 if (apply_reshape_container_disks_update(
6320 u
, super
, &update
->space_list
))
6321 super
->updates_pending
++;
6324 case update_activate_spare
: {
6325 struct imsm_update_activate_spare
*u
= (void *) update
->buf
;
6326 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->array
);
6327 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6328 struct imsm_map
*migr_map
;
6329 struct active_array
*a
;
6330 struct imsm_disk
*disk
;
6335 int victim
= get_imsm_disk_idx(dev
, u
->slot
, -1);
6338 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6343 fprintf(stderr
, "error: imsm_activate_spare passed "
6344 "an unknown disk (index: %d)\n",
6349 super
->updates_pending
++;
6351 /* count failures (excluding rebuilds and the victim)
6352 * to determine map[0] state
6355 for (i
= 0; i
< map
->num_members
; i
++) {
6358 disk
= get_imsm_disk(super
,
6359 get_imsm_disk_idx(dev
, i
, -1));
6360 if (!disk
|| is_failed(disk
))
6364 /* adding a pristine spare, assign a new index */
6365 if (dl
->index
< 0) {
6366 dl
->index
= super
->anchor
->num_disks
;
6367 super
->anchor
->num_disks
++;
6370 disk
->status
|= CONFIGURED_DISK
;
6371 disk
->status
&= ~SPARE_DISK
;
6374 to_state
= imsm_check_degraded(super
, dev
, failed
);
6375 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6376 migrate(dev
, to_state
, MIGR_REBUILD
);
6377 migr_map
= get_imsm_map(dev
, 1);
6378 set_imsm_ord_tbl_ent(map
, u
->slot
, dl
->index
);
6379 set_imsm_ord_tbl_ent(migr_map
, u
->slot
, dl
->index
| IMSM_ORD_REBUILD
);
6381 /* update the family_num to mark a new container
6382 * generation, being careful to record the existing
6383 * family_num in orig_family_num to clean up after
6384 * earlier mdadm versions that neglected to set it.
6386 if (mpb
->orig_family_num
== 0)
6387 mpb
->orig_family_num
= mpb
->family_num
;
6388 mpb
->family_num
+= super
->random
;
6390 /* count arrays using the victim in the metadata */
6392 for (a
= st
->arrays
; a
; a
= a
->next
) {
6393 dev
= get_imsm_dev(super
, a
->info
.container_member
);
6394 map
= get_imsm_map(dev
, 0);
6396 if (get_imsm_disk_slot(map
, victim
) >= 0)
6400 /* delete the victim if it is no longer being
6406 /* We know that 'manager' isn't touching anything,
6407 * so it is safe to delete
6409 for (dlp
= &super
->disks
; *dlp
; dlp
= &(*dlp
)->next
)
6410 if ((*dlp
)->index
== victim
)
6413 /* victim may be on the missing list */
6415 for (dlp
= &super
->missing
; *dlp
; dlp
= &(*dlp
)->next
)
6416 if ((*dlp
)->index
== victim
)
6418 imsm_delete(super
, dlp
, victim
);
6422 case update_create_array
: {
6423 /* someone wants to create a new array, we need to be aware of
6424 * a few races/collisions:
6425 * 1/ 'Create' called by two separate instances of mdadm
6426 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6427 * devices that have since been assimilated via
6429 * In the event this update can not be carried out mdadm will
6430 * (FIX ME) notice that its update did not take hold.
6432 struct imsm_update_create_array
*u
= (void *) update
->buf
;
6433 struct intel_dev
*dv
;
6434 struct imsm_dev
*dev
;
6435 struct imsm_map
*map
, *new_map
;
6436 unsigned long long start
, end
;
6437 unsigned long long new_start
, new_end
;
6439 struct disk_info
*inf
;
6442 /* handle racing creates: first come first serve */
6443 if (u
->dev_idx
< mpb
->num_raid_devs
) {
6444 dprintf("%s: subarray %d already defined\n",
6445 __func__
, u
->dev_idx
);
6449 /* check update is next in sequence */
6450 if (u
->dev_idx
!= mpb
->num_raid_devs
) {
6451 dprintf("%s: can not create array %d expected index %d\n",
6452 __func__
, u
->dev_idx
, mpb
->num_raid_devs
);
6456 new_map
= get_imsm_map(&u
->dev
, 0);
6457 new_start
= __le32_to_cpu(new_map
->pba_of_lba0
);
6458 new_end
= new_start
+ __le32_to_cpu(new_map
->blocks_per_member
);
6459 inf
= get_disk_info(u
);
6461 /* handle activate_spare versus create race:
6462 * check to make sure that overlapping arrays do not include
6465 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6466 dev
= get_imsm_dev(super
, i
);
6467 map
= get_imsm_map(dev
, 0);
6468 start
= __le32_to_cpu(map
->pba_of_lba0
);
6469 end
= start
+ __le32_to_cpu(map
->blocks_per_member
);
6470 if ((new_start
>= start
&& new_start
<= end
) ||
6471 (start
>= new_start
&& start
<= new_end
))
6476 if (disks_overlap(super
, i
, u
)) {
6477 dprintf("%s: arrays overlap\n", __func__
);
6482 /* check that prepare update was successful */
6483 if (!update
->space
) {
6484 dprintf("%s: prepare update failed\n", __func__
);
6488 /* check that all disks are still active before committing
6489 * changes. FIXME: could we instead handle this by creating a
6490 * degraded array? That's probably not what the user expects,
6491 * so better to drop this update on the floor.
6493 for (i
= 0; i
< new_map
->num_members
; i
++) {
6494 dl
= serial_to_dl(inf
[i
].serial
, super
);
6496 dprintf("%s: disk disappeared\n", __func__
);
6501 super
->updates_pending
++;
6503 /* convert spares to members and fixup ord_tbl */
6504 for (i
= 0; i
< new_map
->num_members
; i
++) {
6505 dl
= serial_to_dl(inf
[i
].serial
, super
);
6506 if (dl
->index
== -1) {
6507 dl
->index
= mpb
->num_disks
;
6509 dl
->disk
.status
|= CONFIGURED_DISK
;
6510 dl
->disk
.status
&= ~SPARE_DISK
;
6512 set_imsm_ord_tbl_ent(new_map
, i
, dl
->index
);
6517 update
->space
= NULL
;
6518 imsm_copy_dev(dev
, &u
->dev
);
6519 dv
->index
= u
->dev_idx
;
6520 dv
->next
= super
->devlist
;
6521 super
->devlist
= dv
;
6522 mpb
->num_raid_devs
++;
6524 imsm_update_version_info(super
);
6527 /* mdmon knows how to release update->space, but not
6528 * ((struct intel_dev *) update->space)->dev
6530 if (update
->space
) {
6536 case update_kill_array
: {
6537 struct imsm_update_kill_array
*u
= (void *) update
->buf
;
6538 int victim
= u
->dev_idx
;
6539 struct active_array
*a
;
6540 struct intel_dev
**dp
;
6541 struct imsm_dev
*dev
;
6543 /* sanity check that we are not affecting the uuid of
6544 * active arrays, or deleting an active array
6546 * FIXME when immutable ids are available, but note that
6547 * we'll also need to fixup the invalidated/active
6548 * subarray indexes in mdstat
6550 for (a
= st
->arrays
; a
; a
= a
->next
)
6551 if (a
->info
.container_member
>= victim
)
6553 /* by definition if mdmon is running at least one array
6554 * is active in the container, so checking
6555 * mpb->num_raid_devs is just extra paranoia
6557 dev
= get_imsm_dev(super
, victim
);
6558 if (a
|| !dev
|| mpb
->num_raid_devs
== 1) {
6559 dprintf("failed to delete subarray-%d\n", victim
);
6563 for (dp
= &super
->devlist
; *dp
;)
6564 if ((*dp
)->index
== (unsigned)super
->current_vol
) {
6567 if ((*dp
)->index
> (unsigned)victim
)
6571 mpb
->num_raid_devs
--;
6572 super
->updates_pending
++;
6575 case update_rename_array
: {
6576 struct imsm_update_rename_array
*u
= (void *) update
->buf
;
6577 char name
[MAX_RAID_SERIAL_LEN
+1];
6578 int target
= u
->dev_idx
;
6579 struct active_array
*a
;
6580 struct imsm_dev
*dev
;
6582 /* sanity check that we are not affecting the uuid of
6585 snprintf(name
, MAX_RAID_SERIAL_LEN
, "%s", (char *) u
->name
);
6586 name
[MAX_RAID_SERIAL_LEN
] = '\0';
6587 for (a
= st
->arrays
; a
; a
= a
->next
)
6588 if (a
->info
.container_member
== target
)
6590 dev
= get_imsm_dev(super
, u
->dev_idx
);
6591 if (a
|| !dev
|| !check_name(super
, name
, 1)) {
6592 dprintf("failed to rename subarray-%d\n", target
);
6596 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
6597 super
->updates_pending
++;
6600 case update_add_remove_disk
: {
6601 /* we may be able to repair some arrays if disks are
6602 * being added, check teh status of add_remove_disk
6603 * if discs has been added.
6605 if (add_remove_disk_update(super
)) {
6606 struct active_array
*a
;
6608 super
->updates_pending
++;
6609 for (a
= st
->arrays
; a
; a
= a
->next
)
6610 a
->check_degraded
= 1;
6615 fprintf(stderr
, "error: unsuported process update type:"
6616 "(type: %d)\n", type
);
6620 static void imsm_prepare_update(struct supertype
*st
,
6621 struct metadata_update
*update
)
6624 * Allocate space to hold new disk entries, raid-device entries or a new
6625 * mpb if necessary. The manager synchronously waits for updates to
6626 * complete in the monitor, so new mpb buffers allocated here can be
6627 * integrated by the monitor thread without worrying about live pointers
6628 * in the manager thread.
6630 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6631 struct intel_super
*super
= st
->sb
;
6632 struct imsm_super
*mpb
= super
->anchor
;
6637 case update_takeover
: {
6638 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6639 if (u
->direction
== R0_TO_R10
) {
6640 void **tail
= (void **)&update
->space_list
;
6641 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subarray
);
6642 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6643 int num_members
= map
->num_members
;
6647 /* allocate memory for added disks */
6648 for (i
= 0; i
< num_members
; i
++) {
6649 size
= sizeof(struct dl
);
6650 space
= malloc(size
);
6659 /* allocate memory for new device */
6660 size
= sizeof_imsm_dev(super
->devlist
->dev
, 0) +
6661 (num_members
* sizeof(__u32
));
6662 space
= malloc(size
);
6671 len
= disks_to_mpb_size(num_members
* 2);
6673 /* if allocation didn't success, free buffer */
6674 while (update
->space_list
) {
6675 void **sp
= update
->space_list
;
6676 update
->space_list
= *sp
;
6684 case update_reshape_container_disks
: {
6685 /* Every raid device in the container is about to
6686 * gain some more devices, and we will enter a
6688 * So each 'imsm_map' will be bigger, and the imsm_vol
6689 * will now hold 2 of them.
6690 * Thus we need new 'struct imsm_dev' allocations sized
6691 * as sizeof_imsm_dev but with more devices in both maps.
6693 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6694 struct intel_dev
*dl
;
6695 void **space_tail
= (void**)&update
->space_list
;
6697 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
6699 for (dl
= super
->devlist
; dl
; dl
= dl
->next
) {
6700 int size
= sizeof_imsm_dev(dl
->dev
, 1);
6702 if (u
->new_raid_disks
> u
->old_raid_disks
)
6703 size
+= sizeof(__u32
)*2*
6704 (u
->new_raid_disks
- u
->old_raid_disks
);
6713 len
= disks_to_mpb_size(u
->new_raid_disks
);
6714 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
6717 case update_create_array
: {
6718 struct imsm_update_create_array
*u
= (void *) update
->buf
;
6719 struct intel_dev
*dv
;
6720 struct imsm_dev
*dev
= &u
->dev
;
6721 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6723 struct disk_info
*inf
;
6727 inf
= get_disk_info(u
);
6728 len
= sizeof_imsm_dev(dev
, 1);
6729 /* allocate a new super->devlist entry */
6730 dv
= malloc(sizeof(*dv
));
6732 dv
->dev
= malloc(len
);
6737 update
->space
= NULL
;
6741 /* count how many spares will be converted to members */
6742 for (i
= 0; i
< map
->num_members
; i
++) {
6743 dl
= serial_to_dl(inf
[i
].serial
, super
);
6745 /* hmm maybe it failed?, nothing we can do about
6750 if (count_memberships(dl
, super
) == 0)
6753 len
+= activate
* sizeof(struct imsm_disk
);
6760 /* check if we need a larger metadata buffer */
6761 if (super
->next_buf
)
6762 buf_len
= super
->next_len
;
6764 buf_len
= super
->len
;
6766 if (__le32_to_cpu(mpb
->mpb_size
) + len
> buf_len
) {
6767 /* ok we need a larger buf than what is currently allocated
6768 * if this allocation fails process_update will notice that
6769 * ->next_len is set and ->next_buf is NULL
6771 buf_len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + len
, 512);
6772 if (super
->next_buf
)
6773 free(super
->next_buf
);
6775 super
->next_len
= buf_len
;
6776 if (posix_memalign(&super
->next_buf
, 512, buf_len
) == 0)
6777 memset(super
->next_buf
, 0, buf_len
);
6779 super
->next_buf
= NULL
;
6783 /* must be called while manager is quiesced */
6784 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
)
6786 struct imsm_super
*mpb
= super
->anchor
;
6788 struct imsm_dev
*dev
;
6789 struct imsm_map
*map
;
6790 int i
, j
, num_members
;
6793 dprintf("%s: deleting device[%d] from imsm_super\n",
6796 /* shift all indexes down one */
6797 for (iter
= super
->disks
; iter
; iter
= iter
->next
)
6798 if (iter
->index
> (int)index
)
6800 for (iter
= super
->missing
; iter
; iter
= iter
->next
)
6801 if (iter
->index
> (int)index
)
6804 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6805 dev
= get_imsm_dev(super
, i
);
6806 map
= get_imsm_map(dev
, 0);
6807 num_members
= map
->num_members
;
6808 for (j
= 0; j
< num_members
; j
++) {
6809 /* update ord entries being careful not to propagate
6810 * ord-flags to the first map
6812 ord
= get_imsm_ord_tbl_ent(dev
, j
, -1);
6814 if (ord_to_idx(ord
) <= index
)
6817 map
= get_imsm_map(dev
, 0);
6818 set_imsm_ord_tbl_ent(map
, j
, ord_to_idx(ord
- 1));
6819 map
= get_imsm_map(dev
, 1);
6821 set_imsm_ord_tbl_ent(map
, j
, ord
- 1);
6826 super
->updates_pending
++;
6828 struct dl
*dl
= *dlp
;
6830 *dlp
= (*dlp
)->next
;
6831 __free_imsm_disk(dl
);
6835 static char disk_by_path
[] = "/dev/disk/by-path/";
6837 static const char *imsm_get_disk_controller_domain(const char *path
)
6839 char disk_path
[PATH_MAX
];
6843 strncpy(disk_path
, disk_by_path
, PATH_MAX
- 1);
6844 strncat(disk_path
, path
, PATH_MAX
- strlen(disk_path
) - 1);
6845 if (stat(disk_path
, &st
) == 0) {
6846 struct sys_dev
* hba
;
6849 path
= devt_to_devpath(st
.st_rdev
);
6852 hba
= find_disk_attached_hba(-1, path
);
6853 if (hba
&& hba
->type
== SYS_DEV_SAS
)
6855 else if (hba
&& hba
->type
== SYS_DEV_SATA
)
6859 dprintf("path: %s hba: %s attached: %s\n",
6860 path
, (hba
) ? hba
->path
: "NULL", drv
);
6868 static int imsm_find_array_minor_by_subdev(int subdev
, int container
, int *minor
)
6870 char subdev_name
[20];
6871 struct mdstat_ent
*mdstat
;
6873 sprintf(subdev_name
, "%d", subdev
);
6874 mdstat
= mdstat_by_subdev(subdev_name
, container
);
6878 *minor
= mdstat
->devnum
;
6879 free_mdstat(mdstat
);
6883 static int imsm_reshape_is_allowed_on_container(struct supertype
*st
,
6884 struct geo_params
*geo
,
6885 int *old_raid_disks
)
6887 /* currently we only support increasing the number of devices
6888 * for a container. This increases the number of device for each
6889 * member array. They must all be RAID0 or RAID5.
6892 struct mdinfo
*info
, *member
;
6893 int devices_that_can_grow
= 0;
6895 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
6896 "st->devnum = (%i)\n",
6899 if (geo
->size
!= -1 ||
6900 geo
->level
!= UnSet
||
6901 geo
->layout
!= UnSet
||
6902 geo
->chunksize
!= 0 ||
6903 geo
->raid_disks
== UnSet
) {
6904 dprintf("imsm: Container operation is allowed for "
6905 "raid disks number change only.\n");
6909 info
= container_content_imsm(st
, NULL
);
6910 for (member
= info
; member
; member
= member
->next
) {
6914 dprintf("imsm: checking device_num: %i\n",
6915 member
->container_member
);
6917 if (geo
->raid_disks
<= member
->array
.raid_disks
) {
6918 /* we work on container for Online Capacity Expansion
6919 * only so raid_disks has to grow
6921 dprintf("imsm: for container operation raid disks "
6922 "increase is required\n");
6926 if ((info
->array
.level
!= 0) &&
6927 (info
->array
.level
!= 5)) {
6928 /* we cannot use this container with other raid level
6930 dprintf("imsm: for container operation wrong"
6931 " raid level (%i) detected\n",
6935 /* check for platform support
6936 * for this raid level configuration
6938 struct intel_super
*super
= st
->sb
;
6939 if (!is_raid_level_supported(super
->orom
,
6940 member
->array
.level
,
6942 dprintf("platform does not support raid%d with"
6946 geo
->raid_disks
> 1 ? "s" : "");
6949 /* check if component size is aligned to chunk size
6951 if (info
->component_size
%
6952 (info
->array
.chunk_size
/512)) {
6953 dprintf("Component size is not aligned to "
6959 if (*old_raid_disks
&&
6960 info
->array
.raid_disks
!= *old_raid_disks
)
6962 *old_raid_disks
= info
->array
.raid_disks
;
6964 /* All raid5 and raid0 volumes in container
6965 * have to be ready for Online Capacity Expansion
6966 * so they need to be assembled. We have already
6967 * checked that no recovery etc is happening.
6969 result
= imsm_find_array_minor_by_subdev(member
->container_member
,
6973 dprintf("imsm: cannot find array\n");
6976 devices_that_can_grow
++;
6979 if (!member
&& devices_that_can_grow
)
6983 dprintf("\tContainer operation allowed\n");
6985 dprintf("\tError: %i\n", ret_val
);
6990 /* Function: get_spares_for_grow
6991 * Description: Allocates memory and creates list of spare devices
6992 * avaliable in container. Checks if spare drive size is acceptable.
6993 * Parameters: Pointer to the supertype structure
6994 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
6997 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
)
6999 unsigned long long min_size
= min_acceptable_spare_size_imsm(st
);
7000 return container_choose_spares(st
, min_size
, NULL
, NULL
, NULL
, 0);
7003 /******************************************************************************
7004 * function: imsm_create_metadata_update_for_reshape
7005 * Function creates update for whole IMSM container.
7007 ******************************************************************************/
7008 static int imsm_create_metadata_update_for_reshape(
7009 struct supertype
*st
,
7010 struct geo_params
*geo
,
7012 struct imsm_update_reshape
**updatep
)
7014 struct intel_super
*super
= st
->sb
;
7015 struct imsm_super
*mpb
= super
->anchor
;
7016 int update_memory_size
= 0;
7017 struct imsm_update_reshape
*u
= NULL
;
7018 struct mdinfo
*spares
= NULL
;
7020 int delta_disks
= 0;
7023 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7026 delta_disks
= geo
->raid_disks
- old_raid_disks
;
7028 /* size of all update data without anchor */
7029 update_memory_size
= sizeof(struct imsm_update_reshape
);
7031 /* now add space for spare disks that we need to add. */
7032 update_memory_size
+= sizeof(u
->new_disks
[0]) * (delta_disks
- 1);
7034 u
= calloc(1, update_memory_size
);
7037 "cannot get memory for imsm_update_reshape update\n");
7040 u
->type
= update_reshape_container_disks
;
7041 u
->old_raid_disks
= old_raid_disks
;
7042 u
->new_raid_disks
= geo
->raid_disks
;
7044 /* now get spare disks list
7046 spares
= get_spares_for_grow(st
);
7049 || delta_disks
> spares
->array
.spare_disks
) {
7050 fprintf(stderr
, Name
": imsm: ERROR: Cannot get spare devices "
7051 "for %s.\n", geo
->dev_name
);
7055 /* we have got spares
7056 * update disk list in imsm_disk list table in anchor
7058 dprintf("imsm: %i spares are available.\n\n",
7059 spares
->array
.spare_disks
);
7062 for (i
= 0; i
< delta_disks
; i
++) {
7067 u
->new_disks
[i
] = makedev(dev
->disk
.major
,
7069 dl
= get_disk_super(super
, dev
->disk
.major
, dev
->disk
.minor
);
7070 dl
->index
= mpb
->num_disks
;
7080 dprintf("imsm: reshape update preparation :");
7081 if (i
== delta_disks
) {
7084 return update_memory_size
;
7087 dprintf(" Error\n");
7092 static void imsm_update_metadata_locally(struct supertype
*st
,
7095 struct metadata_update mu
;
7100 mu
.space_list
= NULL
;
7102 imsm_prepare_update(st
, &mu
);
7103 imsm_process_update(st
, &mu
);
7105 while (mu
.space_list
) {
7106 void **space
= mu
.space_list
;
7107 mu
.space_list
= *space
;
7112 /***************************************************************************
7113 * Function: imsm_analyze_change
7114 * Description: Function analyze change for single volume
7115 * and validate if transition is supported
7116 * Parameters: Geometry parameters, supertype structure
7117 * Returns: Operation type code on success, -1 if fail
7118 ****************************************************************************/
7119 enum imsm_reshape_type
imsm_analyze_change(struct supertype
*st
,
7120 struct geo_params
*geo
)
7127 getinfo_super_imsm_volume(st
, &info
, NULL
);
7129 if ((geo
->level
!= info
.array
.level
) &&
7130 (geo
->level
>= 0) &&
7131 (geo
->level
!= UnSet
)) {
7132 switch (info
.array
.level
) {
7134 if (geo
->level
== 5) {
7135 change
= CH_MIGRATION
;
7138 if (geo
->level
== 10) {
7139 change
= CH_TAKEOVER
;
7144 if (geo
->level
== 0) {
7145 change
= CH_TAKEOVER
;
7150 if (geo
->level
== 0)
7151 change
= CH_MIGRATION
;
7154 if (geo
->level
== 0) {
7155 change
= CH_TAKEOVER
;
7162 Name
" Error. Level Migration from %d to %d "
7164 info
.array
.level
, geo
->level
);
7165 goto analyse_change_exit
;
7168 geo
->level
= info
.array
.level
;
7170 if ((geo
->layout
!= info
.array
.layout
)
7171 && ((geo
->layout
!= UnSet
) && (geo
->layout
!= -1))) {
7172 change
= CH_MIGRATION
;
7173 if ((info
.array
.layout
== 0)
7174 && (info
.array
.level
== 5)
7175 && (geo
->layout
== 5)) {
7176 /* reshape 5 -> 4 */
7177 } else if ((info
.array
.layout
== 5)
7178 && (info
.array
.level
== 5)
7179 && (geo
->layout
== 0)) {
7180 /* reshape 4 -> 5 */
7185 Name
" Error. Layout Migration from %d to %d "
7187 info
.array
.layout
, geo
->layout
);
7189 goto analyse_change_exit
;
7192 geo
->layout
= info
.array
.layout
;
7194 if ((geo
->chunksize
> 0) && (geo
->chunksize
!= UnSet
)
7195 && (geo
->chunksize
!= info
.array
.chunk_size
))
7196 change
= CH_MIGRATION
;
7198 geo
->chunksize
= info
.array
.chunk_size
;
7200 chunk
= geo
->chunksize
/ 1024;
7201 if (!validate_geometry_imsm(st
,
7211 struct intel_super
*super
= st
->sb
;
7212 struct imsm_super
*mpb
= super
->anchor
;
7214 if (mpb
->num_raid_devs
> 1) {
7216 Name
" Error. Cannot perform operation on %s"
7217 "- for this operation it MUST be single "
7218 "array in container\n",
7224 analyse_change_exit
:
7229 int imsm_takeover(struct supertype
*st
, struct geo_params
*geo
)
7231 struct intel_super
*super
= st
->sb
;
7232 struct imsm_update_takeover
*u
;
7234 u
= malloc(sizeof(struct imsm_update_takeover
));
7238 u
->type
= update_takeover
;
7239 u
->subarray
= super
->current_vol
;
7241 /* 10->0 transition */
7242 if (geo
->level
== 0)
7243 u
->direction
= R10_TO_R0
;
7245 /* 0->10 transition */
7246 if (geo
->level
== 10)
7247 u
->direction
= R0_TO_R10
;
7249 /* update metadata locally */
7250 imsm_update_metadata_locally(st
, u
,
7251 sizeof(struct imsm_update_takeover
));
7252 /* and possibly remotely */
7253 if (st
->update_tail
)
7254 append_metadata_update(st
, u
,
7255 sizeof(struct imsm_update_takeover
));
7262 static int warn_user_about_risk(void)
7267 "\nThis is an experimental feature. Data on the RAID volume(s) "
7268 "can be lost!!!\n\n"
7269 "To continue command execution please make sure that\n"
7270 "the grow process will not be interrupted. Use safe power\n"
7271 "supply to avoid unexpected system reboot. Make sure that\n"
7272 "reshaped container is not assembled automatically during\n"
7274 "If reshape is interrupted, assemble array manually\n"
7275 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
7276 "Assembly in scan mode is not possible in such case.\n"
7277 "Growing container with boot array is not possible.\n"
7278 "If boot array reshape is interrupted, whole file system\n"
7279 "can be lost.\n\n");
7280 rv
= ask("Do you want to continue? ");
7281 fprintf(stderr
, "\n");
7286 static int imsm_reshape_super(struct supertype
*st
, long long size
, int level
,
7287 int layout
, int chunksize
, int raid_disks
,
7288 int delta_disks
, char *backup
, char *dev
,
7292 struct geo_params geo
;
7294 dprintf("imsm: reshape_super called.\n");
7296 memset(&geo
, 0, sizeof(struct geo_params
));
7299 geo
.dev_id
= st
->devnum
;
7302 geo
.layout
= layout
;
7303 geo
.chunksize
= chunksize
;
7304 geo
.raid_disks
= raid_disks
;
7305 if (delta_disks
!= UnSet
)
7306 geo
.raid_disks
+= delta_disks
;
7308 dprintf("\tfor level : %i\n", geo
.level
);
7309 dprintf("\tfor raid_disks : %i\n", geo
.raid_disks
);
7311 if (experimental() == 0)
7314 if (st
->container_dev
== st
->devnum
) {
7315 /* On container level we can only increase number of devices. */
7316 dprintf("imsm: info: Container operation\n");
7317 int old_raid_disks
= 0;
7319 /* this warning will be removed when imsm checkpointing
7320 * will be implemented, and restoring from check-point
7321 * operation will be transparent for reboot process
7323 if (warn_user_about_risk() == 0)
7326 if (imsm_reshape_is_allowed_on_container(
7327 st
, &geo
, &old_raid_disks
)) {
7328 struct imsm_update_reshape
*u
= NULL
;
7331 len
= imsm_create_metadata_update_for_reshape(
7332 st
, &geo
, old_raid_disks
, &u
);
7335 dprintf("imsm: Cannot prepare update\n");
7336 goto exit_imsm_reshape_super
;
7340 /* update metadata locally */
7341 imsm_update_metadata_locally(st
, u
, len
);
7342 /* and possibly remotely */
7343 if (st
->update_tail
)
7344 append_metadata_update(st
, u
, len
);
7349 fprintf(stderr
, Name
": (imsm) Operation "
7350 "is not allowed on this container\n");
7353 /* On volume level we support following operations
7354 * - takeover: raid10 -> raid0; raid0 -> raid10
7355 * - chunk size migration
7356 * - migration: raid5 -> raid0; raid0 -> raid5
7358 struct intel_super
*super
= st
->sb
;
7359 struct intel_dev
*dev
= super
->devlist
;
7361 dprintf("imsm: info: Volume operation\n");
7362 /* find requested device */
7364 imsm_find_array_minor_by_subdev(dev
->index
, st
->container_dev
, &devnum
);
7365 if (devnum
== geo
.dev_id
)
7370 fprintf(stderr
, Name
" Cannot find %s (%i) subarray\n",
7371 geo
.dev_name
, geo
.dev_id
);
7372 goto exit_imsm_reshape_super
;
7374 super
->current_vol
= dev
->index
;
7375 change
= imsm_analyze_change(st
, &geo
);
7378 ret_val
= imsm_takeover(st
, &geo
);
7388 exit_imsm_reshape_super
:
7389 dprintf("imsm: reshape_super Exit code = %i\n", ret_val
);
7393 static int imsm_manage_reshape(
7394 int afd
, struct mdinfo
*sra
, struct reshape
*reshape
,
7395 struct supertype
*st
, unsigned long stripes
,
7396 int *fds
, unsigned long long *offsets
,
7397 int dests
, int *destfd
, unsigned long long *destoffsets
)
7399 /* Just use child_monitor for now */
7400 return child_monitor(
7401 afd
, sra
, reshape
, st
, stripes
,
7402 fds
, offsets
, dests
, destfd
, destoffsets
);
7404 #endif /* MDASSEMBLE */
7406 struct superswitch super_imsm
= {
7408 .examine_super
= examine_super_imsm
,
7409 .brief_examine_super
= brief_examine_super_imsm
,
7410 .brief_examine_subarrays
= brief_examine_subarrays_imsm
,
7411 .export_examine_super
= export_examine_super_imsm
,
7412 .detail_super
= detail_super_imsm
,
7413 .brief_detail_super
= brief_detail_super_imsm
,
7414 .write_init_super
= write_init_super_imsm
,
7415 .validate_geometry
= validate_geometry_imsm
,
7416 .add_to_super
= add_to_super_imsm
,
7417 .remove_from_super
= remove_from_super_imsm
,
7418 .detail_platform
= detail_platform_imsm
,
7419 .kill_subarray
= kill_subarray_imsm
,
7420 .update_subarray
= update_subarray_imsm
,
7421 .load_container
= load_container_imsm
,
7422 .default_geometry
= default_geometry_imsm
,
7423 .get_disk_controller_domain
= imsm_get_disk_controller_domain
,
7424 .reshape_super
= imsm_reshape_super
,
7425 .manage_reshape
= imsm_manage_reshape
,
7427 .match_home
= match_home_imsm
,
7428 .uuid_from_super
= uuid_from_super_imsm
,
7429 .getinfo_super
= getinfo_super_imsm
,
7430 .getinfo_super_disks
= getinfo_super_disks_imsm
,
7431 .update_super
= update_super_imsm
,
7433 .avail_size
= avail_size_imsm
,
7434 .min_acceptable_spare_size
= min_acceptable_spare_size_imsm
,
7436 .compare_super
= compare_super_imsm
,
7438 .load_super
= load_super_imsm
,
7439 .init_super
= init_super_imsm
,
7440 .store_super
= store_super_imsm
,
7441 .free_super
= free_super_imsm
,
7442 .match_metadata_desc
= match_metadata_desc_imsm
,
7443 .container_content
= container_content_imsm
,
7450 .open_new
= imsm_open_new
,
7451 .set_array_state
= imsm_set_array_state
,
7452 .set_disk
= imsm_set_disk
,
7453 .sync_metadata
= imsm_sync_metadata
,
7454 .activate_spare
= imsm_activate_spare
,
7455 .process_update
= imsm_process_update
,
7456 .prepare_update
= imsm_prepare_update
,
7457 #endif /* MDASSEMBLE */