2 * mdadm - Intel(R) Matrix Storage Manager Support
4 * Copyright (C) 2002-2008 Intel Corporation
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 #define HAVE_STDINT_H 1
24 #include "platform-intel.h"
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
54 #define MPB_SECTOR_CNT 418
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
61 __u8 serial
[MAX_RAID_SERIAL_LEN
];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks
; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id
; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status
; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num
; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler
[IMSM_DISK_FILLERS
]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
73 /* RAID map configuration infos. */
75 __u32 pba_of_lba0
; /* start address of partition */
76 __u32 blocks_per_member
;/* blocks per member */
77 __u32 num_data_stripes
; /* number of data stripes */
78 __u16 blocks_per_strip
;
79 __u8 map_state
; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members
; /* number of member disks */
89 __u8 num_domains
; /* number of parity domains */
90 __u8 failed_disk_num
; /* valid only when state is degraded */
92 __u32 filler
[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl
[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
97 } __attribute__ ((packed
));
100 __u32 curr_migr_unit
;
101 __u32 checkpoint_id
; /* id to access curr_migr_unit */
102 __u8 migr_state
; /* Normal or Migrating */
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type
; /* Initializing, Rebuilding, ... */
111 __u8 fs_state
; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors
; /* number of mismatches */
113 __u16 bad_blocks
; /* number of bad blocks during verify */
115 struct imsm_map map
[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed
));
120 __u8 volume
[MAX_RAID_SERIAL_LEN
];
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status
; /* Persistent RaidDev status */
137 __u32 reserved_blocks
; /* Reserved blocks at beginning of volume */
141 __u8 cng_master_disk
;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler
[IMSM_DEV_FILLERS
];
148 } __attribute__ ((packed
));
151 __u8 sig
[MAX_SIGNATURE_LENGTH
]; /* 0x00 - 0x1F */
152 __u32 check_sum
; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size
; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num
; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num
; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size
; /* 0x30 - 0x33 in bytes */
157 __u32 attributes
; /* 0x34 - 0x37 */
158 __u8 num_disks
; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs
; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos
; /* 0x3A */
161 __u8 fill
[1]; /* 0x3B */
162 __u32 cache_size
; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num
; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count
; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size
; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler
[IMSM_FILLERS
]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk
[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed
));
173 #define BBM_LOG_MAX_ENTRIES 254
175 struct bbm_log_entry
{
176 __u64 defective_block_start
;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset
;
179 __u16 remapped_marked_count
;
181 } __attribute__ ((__packed__
));
184 __u32 signature
; /* 0xABADB10C */
186 __u32 reserved_spare_block_count
; /* 0 */
187 __u32 reserved
; /* 0xFFFF */
188 __u64 first_spare_lba
;
189 struct bbm_log_entry mapped_block_entries
[BBM_LOG_MAX_ENTRIES
];
190 } __attribute__ ((__packed__
));
194 static char *map_state_str
[] = { "normal", "uninitialized", "degraded", "failed" };
197 static __u8
migr_type(struct imsm_dev
*dev
)
199 if (dev
->vol
.migr_type
== MIGR_VERIFY
&&
200 dev
->status
& DEV_VERIFY_AND_FIX
)
203 return dev
->vol
.migr_type
;
206 static void set_migr_type(struct imsm_dev
*dev
, __u8 migr_type
)
208 /* for compatibility with older oroms convert MIGR_REPAIR, into
209 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
211 if (migr_type
== MIGR_REPAIR
) {
212 dev
->vol
.migr_type
= MIGR_VERIFY
;
213 dev
->status
|= DEV_VERIFY_AND_FIX
;
215 dev
->vol
.migr_type
= migr_type
;
216 dev
->status
&= ~DEV_VERIFY_AND_FIX
;
220 static unsigned int sector_count(__u32 bytes
)
222 return ((bytes
+ (512-1)) & (~(512-1))) / 512;
225 static unsigned int mpb_sectors(struct imsm_super
*mpb
)
227 return sector_count(__le32_to_cpu(mpb
->mpb_size
));
231 struct imsm_dev
*dev
;
232 struct intel_dev
*next
;
237 enum sys_dev_type type
;
240 struct intel_hba
*next
;
247 /* internal representation of IMSM metadata */
250 void *buf
; /* O_DIRECT buffer for reading/writing metadata */
251 struct imsm_super
*anchor
; /* immovable parameters */
253 size_t len
; /* size of the 'buf' allocation */
254 void *next_buf
; /* for realloc'ing buf from the manager */
256 int updates_pending
; /* count of pending updates for mdmon */
257 int current_vol
; /* index of raid device undergoing creation */
258 __u32 create_offset
; /* common start for 'current_vol' */
259 __u32 random
; /* random data for seeding new family numbers */
260 struct intel_dev
*devlist
;
264 __u8 serial
[MAX_RAID_SERIAL_LEN
];
267 struct imsm_disk disk
;
270 struct extent
*e
; /* for determining freespace @ create */
271 int raiddisk
; /* slot to fill in autolayout */
274 struct dl
*disk_mgmt_list
; /* list of disks to add/remove while mdmon
276 struct dl
*missing
; /* disks removed while we weren't looking */
277 struct bbm_log
*bbm_log
;
278 struct intel_hba
*hba
; /* device path of the raid controller for this metadata */
279 const struct imsm_orom
*orom
; /* platform firmware support */
280 struct intel_super
*next
; /* (temp) list for disambiguating family_num */
284 struct imsm_disk disk
;
285 #define IMSM_UNKNOWN_OWNER (-1)
287 struct intel_disk
*next
;
291 unsigned long long start
, size
;
294 /* definitions of reshape process types */
295 enum imsm_reshape_type
{
300 /* definition of messages passed to imsm_process_update */
301 enum imsm_update_type
{
302 update_activate_spare
,
306 update_add_remove_disk
,
307 update_reshape_container_disks
,
308 update_reshape_migration
,
312 struct imsm_update_activate_spare
{
313 enum imsm_update_type type
;
317 struct imsm_update_activate_spare
*next
;
330 enum takeover_direction
{
334 struct imsm_update_takeover
{
335 enum imsm_update_type type
;
337 enum takeover_direction direction
;
340 struct imsm_update_reshape
{
341 enum imsm_update_type type
;
345 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
348 struct imsm_update_reshape_migration
{
349 enum imsm_update_type type
;
352 /* fields for array migration changes
359 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
363 __u8 serial
[MAX_RAID_SERIAL_LEN
];
366 struct imsm_update_create_array
{
367 enum imsm_update_type type
;
372 struct imsm_update_kill_array
{
373 enum imsm_update_type type
;
377 struct imsm_update_rename_array
{
378 enum imsm_update_type type
;
379 __u8 name
[MAX_RAID_SERIAL_LEN
];
383 struct imsm_update_add_remove_disk
{
384 enum imsm_update_type type
;
388 static const char *_sys_dev_type
[] = {
389 [SYS_DEV_UNKNOWN
] = "Unknown",
390 [SYS_DEV_SAS
] = "SAS",
391 [SYS_DEV_SATA
] = "SATA"
394 const char *get_sys_dev_type(enum sys_dev_type type
)
396 if (type
>= SYS_DEV_MAX
)
397 type
= SYS_DEV_UNKNOWN
;
399 return _sys_dev_type
[type
];
402 static struct intel_hba
* alloc_intel_hba(struct sys_dev
*device
)
404 struct intel_hba
*result
= malloc(sizeof(*result
));
406 result
->type
= device
->type
;
407 result
->path
= strdup(device
->path
);
409 if (result
->path
&& (result
->pci_id
= strrchr(result
->path
, '/')) != NULL
)
415 static struct intel_hba
* find_intel_hba(struct intel_hba
*hba
, struct sys_dev
*device
)
417 struct intel_hba
*result
=NULL
;
418 for (result
= hba
; result
; result
= result
->next
) {
419 if (result
->type
== device
->type
&& strcmp(result
->path
, device
->path
) == 0)
425 static int attach_hba_to_super(struct intel_super
*super
, struct sys_dev
*device
)
427 struct intel_hba
*hba
;
429 /* check if disk attached to Intel HBA */
430 hba
= find_intel_hba(super
->hba
, device
);
433 /* Check if HBA is already attached to super */
434 if (super
->hba
== NULL
) {
435 super
->hba
= alloc_intel_hba(device
);
440 /* Intel metadata allows for all disks attached to the same type HBA.
441 * Do not sypport odf HBA types mixing
443 if (device
->type
!= hba
->type
)
449 hba
->next
= alloc_intel_hba(device
);
453 static struct sys_dev
* find_disk_attached_hba(int fd
, const char *devname
)
455 struct sys_dev
*list
, *elem
, *prev
;
458 if ((list
= find_intel_devices()) == NULL
)
462 disk_path
= (char *) devname
;
464 disk_path
= diskfd_to_devpath(fd
);
471 for (prev
= NULL
, elem
= list
; elem
; prev
= elem
, elem
= elem
->next
) {
472 if (path_attached_to_hba(disk_path
, elem
->path
)) {
476 prev
->next
= elem
->next
;
478 if (disk_path
!= devname
)
484 if (disk_path
!= devname
)
492 static int find_intel_hba_capability(int fd
, struct intel_super
*super
,
495 static struct supertype
*match_metadata_desc_imsm(char *arg
)
497 struct supertype
*st
;
499 if (strcmp(arg
, "imsm") != 0 &&
500 strcmp(arg
, "default") != 0
504 st
= malloc(sizeof(*st
));
507 memset(st
, 0, sizeof(*st
));
508 st
->container_dev
= NoMdDev
;
509 st
->ss
= &super_imsm
;
510 st
->max_devs
= IMSM_MAX_DEVICES
;
511 st
->minor_version
= 0;
517 static __u8
*get_imsm_version(struct imsm_super
*mpb
)
519 return &mpb
->sig
[MPB_SIG_LEN
];
523 /* retrieve a disk directly from the anchor when the anchor is known to be
524 * up-to-date, currently only at load time
526 static struct imsm_disk
*__get_imsm_disk(struct imsm_super
*mpb
, __u8 index
)
528 if (index
>= mpb
->num_disks
)
530 return &mpb
->disk
[index
];
533 /* retrieve the disk description based on a index of the disk
536 static struct dl
*get_imsm_dl_disk(struct intel_super
*super
, __u8 index
)
540 for (d
= super
->disks
; d
; d
= d
->next
)
541 if (d
->index
== index
)
546 /* retrieve a disk from the parsed metadata */
547 static struct imsm_disk
*get_imsm_disk(struct intel_super
*super
, __u8 index
)
551 dl
= get_imsm_dl_disk(super
, index
);
558 /* generate a checksum directly from the anchor when the anchor is known to be
559 * up-to-date, currently only at load or write_super after coalescing
561 static __u32
__gen_imsm_checksum(struct imsm_super
*mpb
)
563 __u32 end
= mpb
->mpb_size
/ sizeof(end
);
564 __u32
*p
= (__u32
*) mpb
;
568 sum
+= __le32_to_cpu(*p
);
572 return sum
- __le32_to_cpu(mpb
->check_sum
);
575 static size_t sizeof_imsm_map(struct imsm_map
*map
)
577 return sizeof(struct imsm_map
) + sizeof(__u32
) * (map
->num_members
- 1);
580 struct imsm_map
*get_imsm_map(struct imsm_dev
*dev
, int second_map
)
582 /* A device can have 2 maps if it is in the middle of a migration.
584 * 0 - we return the first map
585 * 1 - we return the second map if it exists, else NULL
586 * -1 - we return the second map if it exists, else the first
588 struct imsm_map
*map
= &dev
->vol
.map
[0];
590 if (second_map
== 1 && !dev
->vol
.migr_state
)
592 else if (second_map
== 1 ||
593 (second_map
< 0 && dev
->vol
.migr_state
)) {
596 return ptr
+ sizeof_imsm_map(map
);
602 /* return the size of the device.
603 * migr_state increases the returned size if map[0] were to be duplicated
605 static size_t sizeof_imsm_dev(struct imsm_dev
*dev
, int migr_state
)
607 size_t size
= sizeof(*dev
) - sizeof(struct imsm_map
) +
608 sizeof_imsm_map(get_imsm_map(dev
, 0));
610 /* migrating means an additional map */
611 if (dev
->vol
.migr_state
)
612 size
+= sizeof_imsm_map(get_imsm_map(dev
, 1));
614 size
+= sizeof_imsm_map(get_imsm_map(dev
, 0));
620 /* retrieve disk serial number list from a metadata update */
621 static struct disk_info
*get_disk_info(struct imsm_update_create_array
*update
)
624 struct disk_info
*inf
;
626 inf
= u
+ sizeof(*update
) - sizeof(struct imsm_dev
) +
627 sizeof_imsm_dev(&update
->dev
, 0);
633 static struct imsm_dev
*__get_imsm_dev(struct imsm_super
*mpb
, __u8 index
)
639 if (index
>= mpb
->num_raid_devs
)
642 /* devices start after all disks */
643 offset
= ((void *) &mpb
->disk
[mpb
->num_disks
]) - _mpb
;
645 for (i
= 0; i
<= index
; i
++)
647 return _mpb
+ offset
;
649 offset
+= sizeof_imsm_dev(_mpb
+ offset
, 0);
654 static struct imsm_dev
*get_imsm_dev(struct intel_super
*super
, __u8 index
)
656 struct intel_dev
*dv
;
658 if (index
>= super
->anchor
->num_raid_devs
)
660 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
661 if (dv
->index
== index
)
669 * == 1 get second map
670 * == -1 than get map according to the current migr_state
672 static __u32
get_imsm_ord_tbl_ent(struct imsm_dev
*dev
,
676 struct imsm_map
*map
;
678 map
= get_imsm_map(dev
, second_map
);
680 /* top byte identifies disk under rebuild */
681 return __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
684 #define ord_to_idx(ord) (((ord) << 8) >> 8)
685 static __u32
get_imsm_disk_idx(struct imsm_dev
*dev
, int slot
, int second_map
)
687 __u32 ord
= get_imsm_ord_tbl_ent(dev
, slot
, second_map
);
689 return ord_to_idx(ord
);
692 static void set_imsm_ord_tbl_ent(struct imsm_map
*map
, int slot
, __u32 ord
)
694 map
->disk_ord_tbl
[slot
] = __cpu_to_le32(ord
);
697 static int get_imsm_disk_slot(struct imsm_map
*map
, unsigned idx
)
702 for (slot
= 0; slot
< map
->num_members
; slot
++) {
703 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
704 if (ord_to_idx(ord
) == idx
)
711 static int get_imsm_raid_level(struct imsm_map
*map
)
713 if (map
->raid_level
== 1) {
714 if (map
->num_members
== 2)
720 return map
->raid_level
;
723 static int cmp_extent(const void *av
, const void *bv
)
725 const struct extent
*a
= av
;
726 const struct extent
*b
= bv
;
727 if (a
->start
< b
->start
)
729 if (a
->start
> b
->start
)
734 static int count_memberships(struct dl
*dl
, struct intel_super
*super
)
739 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
740 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
741 struct imsm_map
*map
= get_imsm_map(dev
, 0);
743 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
750 static struct extent
*get_extents(struct intel_super
*super
, struct dl
*dl
)
752 /* find a list of used extents on the given physical device */
753 struct extent
*rv
, *e
;
755 int memberships
= count_memberships(dl
, super
);
756 __u32 reservation
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
758 rv
= malloc(sizeof(struct extent
) * (memberships
+ 1));
763 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
764 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
765 struct imsm_map
*map
= get_imsm_map(dev
, 0);
767 if (get_imsm_disk_slot(map
, dl
->index
) >= 0) {
768 e
->start
= __le32_to_cpu(map
->pba_of_lba0
);
769 e
->size
= __le32_to_cpu(map
->blocks_per_member
);
773 qsort(rv
, memberships
, sizeof(*rv
), cmp_extent
);
775 /* determine the start of the metadata
776 * when no raid devices are defined use the default
777 * ...otherwise allow the metadata to truncate the value
778 * as is the case with older versions of imsm
781 struct extent
*last
= &rv
[memberships
- 1];
784 remainder
= __le32_to_cpu(dl
->disk
.total_blocks
) -
785 (last
->start
+ last
->size
);
786 /* round down to 1k block to satisfy precision of the kernel
790 /* make sure remainder is still sane */
791 if (remainder
< (unsigned)ROUND_UP(super
->len
, 512) >> 9)
792 remainder
= ROUND_UP(super
->len
, 512) >> 9;
793 if (reservation
> remainder
)
794 reservation
= remainder
;
796 e
->start
= __le32_to_cpu(dl
->disk
.total_blocks
) - reservation
;
801 /* try to determine how much space is reserved for metadata from
802 * the last get_extents() entry, otherwise fallback to the
805 static __u32
imsm_reserved_sectors(struct intel_super
*super
, struct dl
*dl
)
811 /* for spares just return a minimal reservation which will grow
812 * once the spare is picked up by an array
815 return MPB_SECTOR_CNT
;
817 e
= get_extents(super
, dl
);
819 return MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
821 /* scroll to last entry */
822 for (i
= 0; e
[i
].size
; i
++)
825 rv
= __le32_to_cpu(dl
->disk
.total_blocks
) - e
[i
].start
;
832 static int is_spare(struct imsm_disk
*disk
)
834 return (disk
->status
& SPARE_DISK
) == SPARE_DISK
;
837 static int is_configured(struct imsm_disk
*disk
)
839 return (disk
->status
& CONFIGURED_DISK
) == CONFIGURED_DISK
;
842 static int is_failed(struct imsm_disk
*disk
)
844 return (disk
->status
& FAILED_DISK
) == FAILED_DISK
;
847 /* Return minimum size of a spare that can be used in this array*/
848 static unsigned long long min_acceptable_spare_size_imsm(struct supertype
*st
)
850 struct intel_super
*super
= st
->sb
;
854 unsigned long long rv
= 0;
858 /* find first active disk in array */
860 while (dl
&& (is_failed(&dl
->disk
) || dl
->index
== -1))
864 /* find last lba used by subarrays */
865 e
= get_extents(super
, dl
);
868 for (i
= 0; e
[i
].size
; i
++)
871 rv
= e
[i
-1].start
+ e
[i
-1].size
;
873 /* add the amount of space needed for metadata */
874 rv
= rv
+ MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
879 static __u64
blocks_per_migr_unit(struct imsm_dev
*dev
);
881 static void print_imsm_dev(struct imsm_dev
*dev
, char *uuid
, int disk_idx
)
885 struct imsm_map
*map
= get_imsm_map(dev
, 0);
886 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
890 printf("[%.16s]:\n", dev
->volume
);
891 printf(" UUID : %s\n", uuid
);
892 printf(" RAID Level : %d", get_imsm_raid_level(map
));
894 printf(" <-- %d", get_imsm_raid_level(map2
));
896 printf(" Members : %d", map
->num_members
);
898 printf(" <-- %d", map2
->num_members
);
900 printf(" Slots : [");
901 for (i
= 0; i
< map
->num_members
; i
++) {
902 ord
= get_imsm_ord_tbl_ent(dev
, i
, 0);
903 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
908 for (i
= 0; i
< map2
->num_members
; i
++) {
909 ord
= get_imsm_ord_tbl_ent(dev
, i
, 1);
910 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
915 printf(" Failed disk : ");
916 if (map
->failed_disk_num
== 0xff)
919 printf("%i", map
->failed_disk_num
);
921 slot
= get_imsm_disk_slot(map
, disk_idx
);
923 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
924 printf(" This Slot : %d%s\n", slot
,
925 ord
& IMSM_ORD_REBUILD
? " (out-of-sync)" : "");
927 printf(" This Slot : ?\n");
928 sz
= __le32_to_cpu(dev
->size_high
);
930 sz
+= __le32_to_cpu(dev
->size_low
);
931 printf(" Array Size : %llu%s\n", (unsigned long long)sz
,
932 human_size(sz
* 512));
933 sz
= __le32_to_cpu(map
->blocks_per_member
);
934 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz
,
935 human_size(sz
* 512));
936 printf(" Sector Offset : %u\n",
937 __le32_to_cpu(map
->pba_of_lba0
));
938 printf(" Num Stripes : %u\n",
939 __le32_to_cpu(map
->num_data_stripes
));
940 printf(" Chunk Size : %u KiB",
941 __le16_to_cpu(map
->blocks_per_strip
) / 2);
943 printf(" <-- %u KiB",
944 __le16_to_cpu(map2
->blocks_per_strip
) / 2);
946 printf(" Reserved : %d\n", __le32_to_cpu(dev
->reserved_blocks
));
947 printf(" Migrate State : ");
948 if (dev
->vol
.migr_state
) {
949 if (migr_type(dev
) == MIGR_INIT
)
950 printf("initialize\n");
951 else if (migr_type(dev
) == MIGR_REBUILD
)
953 else if (migr_type(dev
) == MIGR_VERIFY
)
955 else if (migr_type(dev
) == MIGR_GEN_MIGR
)
956 printf("general migration\n");
957 else if (migr_type(dev
) == MIGR_STATE_CHANGE
)
958 printf("state change\n");
959 else if (migr_type(dev
) == MIGR_REPAIR
)
962 printf("<unknown:%d>\n", migr_type(dev
));
965 printf(" Map State : %s", map_state_str
[map
->map_state
]);
966 if (dev
->vol
.migr_state
) {
967 struct imsm_map
*map
= get_imsm_map(dev
, 1);
969 printf(" <-- %s", map_state_str
[map
->map_state
]);
970 printf("\n Checkpoint : %u (%llu)",
971 __le32_to_cpu(dev
->vol
.curr_migr_unit
),
972 (unsigned long long)blocks_per_migr_unit(dev
));
975 printf(" Dirty State : %s\n", dev
->vol
.dirty
? "dirty" : "clean");
978 static void print_imsm_disk(struct imsm_super
*mpb
, int index
, __u32 reserved
)
980 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, index
);
981 char str
[MAX_RAID_SERIAL_LEN
+ 1];
984 if (index
< 0 || !disk
)
988 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
989 printf(" Disk%02d Serial : %s\n", index
, str
);
990 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
991 is_configured(disk
) ? " active" : "",
992 is_failed(disk
) ? " failed" : "");
993 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
994 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
995 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
996 human_size(sz
* 512));
999 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
);
1001 static void examine_super_imsm(struct supertype
*st
, char *homehost
)
1003 struct intel_super
*super
= st
->sb
;
1004 struct imsm_super
*mpb
= super
->anchor
;
1005 char str
[MAX_SIGNATURE_LENGTH
];
1010 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
1013 snprintf(str
, MPB_SIG_LEN
, "%s", mpb
->sig
);
1014 printf(" Magic : %s\n", str
);
1015 snprintf(str
, strlen(MPB_VERSION_RAID0
), "%s", get_imsm_version(mpb
));
1016 printf(" Version : %s\n", get_imsm_version(mpb
));
1017 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb
->orig_family_num
));
1018 printf(" Family : %08x\n", __le32_to_cpu(mpb
->family_num
));
1019 printf(" Generation : %08x\n", __le32_to_cpu(mpb
->generation_num
));
1020 getinfo_super_imsm(st
, &info
, NULL
);
1021 fname_from_uuid(st
, &info
, nbuf
, ':');
1022 printf(" UUID : %s\n", nbuf
+ 5);
1023 sum
= __le32_to_cpu(mpb
->check_sum
);
1024 printf(" Checksum : %08x %s\n", sum
,
1025 __gen_imsm_checksum(mpb
) == sum
? "correct" : "incorrect");
1026 printf(" MPB Sectors : %d\n", mpb_sectors(mpb
));
1027 printf(" Disks : %d\n", mpb
->num_disks
);
1028 printf(" RAID Devices : %d\n", mpb
->num_raid_devs
);
1029 print_imsm_disk(mpb
, super
->disks
->index
, reserved
);
1030 if (super
->bbm_log
) {
1031 struct bbm_log
*log
= super
->bbm_log
;
1034 printf("Bad Block Management Log:\n");
1035 printf(" Log Size : %d\n", __le32_to_cpu(mpb
->bbm_log_size
));
1036 printf(" Signature : %x\n", __le32_to_cpu(log
->signature
));
1037 printf(" Entry Count : %d\n", __le32_to_cpu(log
->entry_count
));
1038 printf(" Spare Blocks : %d\n", __le32_to_cpu(log
->reserved_spare_block_count
));
1039 printf(" First Spare : %llx\n",
1040 (unsigned long long) __le64_to_cpu(log
->first_spare_lba
));
1042 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1044 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
1046 super
->current_vol
= i
;
1047 getinfo_super_imsm(st
, &info
, NULL
);
1048 fname_from_uuid(st
, &info
, nbuf
, ':');
1049 print_imsm_dev(dev
, nbuf
+ 5, super
->disks
->index
);
1051 for (i
= 0; i
< mpb
->num_disks
; i
++) {
1052 if (i
== super
->disks
->index
)
1054 print_imsm_disk(mpb
, i
, reserved
);
1056 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1057 struct imsm_disk
*disk
;
1058 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1066 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1067 printf(" Disk Serial : %s\n", str
);
1068 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1069 is_configured(disk
) ? " active" : "",
1070 is_failed(disk
) ? " failed" : "");
1071 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1072 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1073 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1074 human_size(sz
* 512));
1078 static void brief_examine_super_imsm(struct supertype
*st
, int verbose
)
1080 /* We just write a generic IMSM ARRAY entry */
1083 struct intel_super
*super
= st
->sb
;
1085 if (!super
->anchor
->num_raid_devs
) {
1086 printf("ARRAY metadata=imsm\n");
1090 getinfo_super_imsm(st
, &info
, NULL
);
1091 fname_from_uuid(st
, &info
, nbuf
, ':');
1092 printf("ARRAY metadata=imsm UUID=%s\n", nbuf
+ 5);
1095 static void brief_examine_subarrays_imsm(struct supertype
*st
, int verbose
)
1097 /* We just write a generic IMSM ARRAY entry */
1101 struct intel_super
*super
= st
->sb
;
1104 if (!super
->anchor
->num_raid_devs
)
1107 getinfo_super_imsm(st
, &info
, NULL
);
1108 fname_from_uuid(st
, &info
, nbuf
, ':');
1109 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
1110 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
1112 super
->current_vol
= i
;
1113 getinfo_super_imsm(st
, &info
, NULL
);
1114 fname_from_uuid(st
, &info
, nbuf1
, ':');
1115 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1116 dev
->volume
, nbuf
+ 5, i
, nbuf1
+ 5);
1120 static void export_examine_super_imsm(struct supertype
*st
)
1122 struct intel_super
*super
= st
->sb
;
1123 struct imsm_super
*mpb
= super
->anchor
;
1127 getinfo_super_imsm(st
, &info
, NULL
);
1128 fname_from_uuid(st
, &info
, nbuf
, ':');
1129 printf("MD_METADATA=imsm\n");
1130 printf("MD_LEVEL=container\n");
1131 printf("MD_UUID=%s\n", nbuf
+5);
1132 printf("MD_DEVICES=%u\n", mpb
->num_disks
);
1135 static void detail_super_imsm(struct supertype
*st
, char *homehost
)
1140 getinfo_super_imsm(st
, &info
, NULL
);
1141 fname_from_uuid(st
, &info
, nbuf
, ':');
1142 printf("\n UUID : %s\n", nbuf
+ 5);
1145 static void brief_detail_super_imsm(struct supertype
*st
)
1149 getinfo_super_imsm(st
, &info
, NULL
);
1150 fname_from_uuid(st
, &info
, nbuf
, ':');
1151 printf(" UUID=%s", nbuf
+ 5);
1154 static int imsm_read_serial(int fd
, char *devname
, __u8
*serial
);
1155 static void fd2devname(int fd
, char *name
);
1157 static int ahci_enumerate_ports(const char *hba_path
, int port_count
, int host_base
, int verbose
)
1159 /* dump an unsorted list of devices attached to AHCI Intel storage
1160 * controller, as well as non-connected ports
1162 int hba_len
= strlen(hba_path
) + 1;
1167 unsigned long port_mask
= (1 << port_count
) - 1;
1169 if (port_count
> (int)sizeof(port_mask
) * 8) {
1171 fprintf(stderr
, Name
": port_count %d out of range\n", port_count
);
1175 /* scroll through /sys/dev/block looking for devices attached to
1178 dir
= opendir("/sys/dev/block");
1179 for (ent
= dir
? readdir(dir
) : NULL
; ent
; ent
= readdir(dir
)) {
1190 if (sscanf(ent
->d_name
, "%d:%d", &major
, &minor
) != 2)
1192 path
= devt_to_devpath(makedev(major
, minor
));
1195 if (!path_attached_to_hba(path
, hba_path
)) {
1201 /* retrieve the scsi device type */
1202 if (asprintf(&device
, "/sys/dev/block/%d:%d/device/xxxxxxx", major
, minor
) < 0) {
1204 fprintf(stderr
, Name
": failed to allocate 'device'\n");
1208 sprintf(device
, "/sys/dev/block/%d:%d/device/type", major
, minor
);
1209 if (load_sys(device
, buf
) != 0) {
1211 fprintf(stderr
, Name
": failed to read device type for %s\n",
1217 type
= strtoul(buf
, NULL
, 10);
1219 /* if it's not a disk print the vendor and model */
1220 if (!(type
== 0 || type
== 7 || type
== 14)) {
1223 sprintf(device
, "/sys/dev/block/%d:%d/device/vendor", major
, minor
);
1224 if (load_sys(device
, buf
) == 0) {
1225 strncpy(vendor
, buf
, sizeof(vendor
));
1226 vendor
[sizeof(vendor
) - 1] = '\0';
1227 c
= (char *) &vendor
[sizeof(vendor
) - 1];
1228 while (isspace(*c
) || *c
== '\0')
1232 sprintf(device
, "/sys/dev/block/%d:%d/device/model", major
, minor
);
1233 if (load_sys(device
, buf
) == 0) {
1234 strncpy(model
, buf
, sizeof(model
));
1235 model
[sizeof(model
) - 1] = '\0';
1236 c
= (char *) &model
[sizeof(model
) - 1];
1237 while (isspace(*c
) || *c
== '\0')
1241 if (vendor
[0] && model
[0])
1242 sprintf(buf
, "%.64s %.64s", vendor
, model
);
1244 switch (type
) { /* numbers from hald/linux/device.c */
1245 case 1: sprintf(buf
, "tape"); break;
1246 case 2: sprintf(buf
, "printer"); break;
1247 case 3: sprintf(buf
, "processor"); break;
1249 case 5: sprintf(buf
, "cdrom"); break;
1250 case 6: sprintf(buf
, "scanner"); break;
1251 case 8: sprintf(buf
, "media_changer"); break;
1252 case 9: sprintf(buf
, "comm"); break;
1253 case 12: sprintf(buf
, "raid"); break;
1254 default: sprintf(buf
, "unknown");
1260 /* chop device path to 'host%d' and calculate the port number */
1261 c
= strchr(&path
[hba_len
], '/');
1264 fprintf(stderr
, Name
": %s - invalid path name\n", path
+ hba_len
);
1269 if (sscanf(&path
[hba_len
], "host%d", &port
) == 1)
1273 *c
= '/'; /* repair the full string */
1274 fprintf(stderr
, Name
": failed to determine port number for %s\n",
1281 /* mark this port as used */
1282 port_mask
&= ~(1 << port
);
1284 /* print out the device information */
1286 printf(" Port%d : - non-disk device (%s) -\n", port
, buf
);
1290 fd
= dev_open(ent
->d_name
, O_RDONLY
);
1292 printf(" Port%d : - disk info unavailable -\n", port
);
1294 fd2devname(fd
, buf
);
1295 printf(" Port%d : %s", port
, buf
);
1296 if (imsm_read_serial(fd
, NULL
, (__u8
*) buf
) == 0)
1297 printf(" (%s)\n", buf
);
1312 for (i
= 0; i
< port_count
; i
++)
1313 if (port_mask
& (1 << i
))
1314 printf(" Port%d : - no device attached -\n", i
);
1322 static void print_found_intel_controllers(struct sys_dev
*elem
)
1324 for (; elem
; elem
= elem
->next
) {
1325 fprintf(stderr
, Name
": found Intel(R) ");
1326 if (elem
->type
== SYS_DEV_SATA
)
1327 fprintf(stderr
, "SATA ");
1328 else if (elem
->type
== SYS_DEV_SAS
)
1329 fprintf(stderr
, "SAS ");
1330 fprintf(stderr
, "RAID controller");
1332 fprintf(stderr
, " at %s", elem
->pci_id
);
1333 fprintf(stderr
, ".\n");
1338 static int ahci_get_port_count(const char *hba_path
, int *port_count
)
1345 if ((dir
= opendir(hba_path
)) == NULL
)
1348 for (ent
= readdir(dir
); ent
; ent
= readdir(dir
)) {
1351 if (sscanf(ent
->d_name
, "host%d", &host
) != 1)
1353 if (*port_count
== 0)
1355 else if (host
< host_base
)
1358 if (host
+ 1 > *port_count
+ host_base
)
1359 *port_count
= host
+ 1 - host_base
;
1365 static void print_imsm_capability(const struct imsm_orom
*orom
)
1367 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1368 printf(" Version : %d.%d.%d.%d\n", orom
->major_ver
, orom
->minor_ver
,
1369 orom
->hotfix_ver
, orom
->build
);
1370 printf(" RAID Levels :%s%s%s%s%s\n",
1371 imsm_orom_has_raid0(orom
) ? " raid0" : "",
1372 imsm_orom_has_raid1(orom
) ? " raid1" : "",
1373 imsm_orom_has_raid1e(orom
) ? " raid1e" : "",
1374 imsm_orom_has_raid10(orom
) ? " raid10" : "",
1375 imsm_orom_has_raid5(orom
) ? " raid5" : "");
1376 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1377 imsm_orom_has_chunk(orom
, 2) ? " 2k" : "",
1378 imsm_orom_has_chunk(orom
, 4) ? " 4k" : "",
1379 imsm_orom_has_chunk(orom
, 8) ? " 8k" : "",
1380 imsm_orom_has_chunk(orom
, 16) ? " 16k" : "",
1381 imsm_orom_has_chunk(orom
, 32) ? " 32k" : "",
1382 imsm_orom_has_chunk(orom
, 64) ? " 64k" : "",
1383 imsm_orom_has_chunk(orom
, 128) ? " 128k" : "",
1384 imsm_orom_has_chunk(orom
, 256) ? " 256k" : "",
1385 imsm_orom_has_chunk(orom
, 512) ? " 512k" : "",
1386 imsm_orom_has_chunk(orom
, 1024*1) ? " 1M" : "",
1387 imsm_orom_has_chunk(orom
, 1024*2) ? " 2M" : "",
1388 imsm_orom_has_chunk(orom
, 1024*4) ? " 4M" : "",
1389 imsm_orom_has_chunk(orom
, 1024*8) ? " 8M" : "",
1390 imsm_orom_has_chunk(orom
, 1024*16) ? " 16M" : "",
1391 imsm_orom_has_chunk(orom
, 1024*32) ? " 32M" : "",
1392 imsm_orom_has_chunk(orom
, 1024*64) ? " 64M" : "");
1393 printf(" Max Disks : %d\n", orom
->tds
);
1394 printf(" Max Volumes : %d\n", orom
->vpa
);
1398 static int detail_platform_imsm(int verbose
, int enumerate_only
)
1400 /* There are two components to imsm platform support, the ahci SATA
1401 * controller and the option-rom. To find the SATA controller we
1402 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1403 * controller with the Intel vendor id is present. This approach
1404 * allows mdadm to leverage the kernel's ahci detection logic, with the
1405 * caveat that if ahci.ko is not loaded mdadm will not be able to
1406 * detect platform raid capabilities. The option-rom resides in a
1407 * platform "Adapter ROM". We scan for its signature to retrieve the
1408 * platform capabilities. If raid support is disabled in the BIOS the
1409 * option-rom capability structure will not be available.
1411 const struct imsm_orom
*orom
;
1412 struct sys_dev
*list
, *hba
;
1417 if (enumerate_only
) {
1418 if (check_env("IMSM_NO_PLATFORM"))
1420 list
= find_intel_devices();
1423 for (hba
= list
; hba
; hba
= hba
->next
) {
1424 orom
= find_imsm_capability(hba
->type
);
1430 free_sys_dev(&list
);
1434 list
= find_intel_devices();
1437 fprintf(stderr
, Name
": no active Intel(R) RAID "
1438 "controller found.\n");
1439 free_sys_dev(&list
);
1442 print_found_intel_controllers(list
);
1444 for (hba
= list
; hba
; hba
= hba
->next
) {
1445 orom
= find_imsm_capability(hba
->type
);
1447 fprintf(stderr
, Name
": imsm capabilities not found for controller: %s (type %s)\n",
1448 hba
->path
, get_sys_dev_type(hba
->type
));
1450 print_imsm_capability(orom
);
1453 for (hba
= list
; hba
; hba
= hba
->next
) {
1454 printf(" I/O Controller : %s (%s)\n",
1455 hba
->path
, get_sys_dev_type(hba
->type
));
1457 if (hba
->type
== SYS_DEV_SATA
) {
1458 host_base
= ahci_get_port_count(hba
->path
, &port_count
);
1459 if (ahci_enumerate_ports(hba
->path
, port_count
, host_base
, verbose
)) {
1461 fprintf(stderr
, Name
": failed to enumerate "
1462 "ports on SATA controller at %s.", hba
->pci_id
);
1468 free_sys_dev(&list
);
1473 static int match_home_imsm(struct supertype
*st
, char *homehost
)
1475 /* the imsm metadata format does not specify any host
1476 * identification information. We return -1 since we can never
1477 * confirm nor deny whether a given array is "meant" for this
1478 * host. We rely on compare_super and the 'family_num' fields to
1479 * exclude member disks that do not belong, and we rely on
1480 * mdadm.conf to specify the arrays that should be assembled.
1481 * Auto-assembly may still pick up "foreign" arrays.
1487 static void uuid_from_super_imsm(struct supertype
*st
, int uuid
[4])
1489 /* The uuid returned here is used for:
1490 * uuid to put into bitmap file (Create, Grow)
1491 * uuid for backup header when saving critical section (Grow)
1492 * comparing uuids when re-adding a device into an array
1493 * In these cases the uuid required is that of the data-array,
1494 * not the device-set.
1495 * uuid to recognise same set when adding a missing device back
1496 * to an array. This is a uuid for the device-set.
1498 * For each of these we can make do with a truncated
1499 * or hashed uuid rather than the original, as long as
1501 * In each case the uuid required is that of the data-array,
1502 * not the device-set.
1504 /* imsm does not track uuid's so we synthesis one using sha1 on
1505 * - The signature (Which is constant for all imsm array, but no matter)
1506 * - the orig_family_num of the container
1507 * - the index number of the volume
1508 * - the 'serial' number of the volume.
1509 * Hopefully these are all constant.
1511 struct intel_super
*super
= st
->sb
;
1514 struct sha1_ctx ctx
;
1515 struct imsm_dev
*dev
= NULL
;
1518 /* some mdadm versions failed to set ->orig_family_num, in which
1519 * case fall back to ->family_num. orig_family_num will be
1520 * fixed up with the first metadata update.
1522 family_num
= super
->anchor
->orig_family_num
;
1523 if (family_num
== 0)
1524 family_num
= super
->anchor
->family_num
;
1525 sha1_init_ctx(&ctx
);
1526 sha1_process_bytes(super
->anchor
->sig
, MPB_SIG_LEN
, &ctx
);
1527 sha1_process_bytes(&family_num
, sizeof(__u32
), &ctx
);
1528 if (super
->current_vol
>= 0)
1529 dev
= get_imsm_dev(super
, super
->current_vol
);
1531 __u32 vol
= super
->current_vol
;
1532 sha1_process_bytes(&vol
, sizeof(vol
), &ctx
);
1533 sha1_process_bytes(dev
->volume
, MAX_RAID_SERIAL_LEN
, &ctx
);
1535 sha1_finish_ctx(&ctx
, buf
);
1536 memcpy(uuid
, buf
, 4*4);
1541 get_imsm_numerical_version(struct imsm_super
*mpb
, int *m
, int *p
)
1543 __u8
*v
= get_imsm_version(mpb
);
1544 __u8
*end
= mpb
->sig
+ MAX_SIGNATURE_LENGTH
;
1545 char major
[] = { 0, 0, 0 };
1546 char minor
[] = { 0 ,0, 0 };
1547 char patch
[] = { 0, 0, 0 };
1548 char *ver_parse
[] = { major
, minor
, patch
};
1552 while (*v
!= '\0' && v
< end
) {
1553 if (*v
!= '.' && j
< 2)
1554 ver_parse
[i
][j
++] = *v
;
1562 *m
= strtol(minor
, NULL
, 0);
1563 *p
= strtol(patch
, NULL
, 0);
1567 static __u32
migr_strip_blocks_resync(struct imsm_dev
*dev
)
1569 /* migr_strip_size when repairing or initializing parity */
1570 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1571 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1573 switch (get_imsm_raid_level(map
)) {
1578 return 128*1024 >> 9;
1582 static __u32
migr_strip_blocks_rebuild(struct imsm_dev
*dev
)
1584 /* migr_strip_size when rebuilding a degraded disk, no idea why
1585 * this is different than migr_strip_size_resync(), but it's good
1588 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1589 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1591 switch (get_imsm_raid_level(map
)) {
1594 if (map
->num_members
% map
->num_domains
== 0)
1595 return 128*1024 >> 9;
1599 return max((__u32
) 64*1024 >> 9, chunk
);
1601 return 128*1024 >> 9;
1605 static __u32
num_stripes_per_unit_resync(struct imsm_dev
*dev
)
1607 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1608 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1609 __u32 lo_chunk
= __le32_to_cpu(lo
->blocks_per_strip
);
1610 __u32 hi_chunk
= __le32_to_cpu(hi
->blocks_per_strip
);
1612 return max((__u32
) 1, hi_chunk
/ lo_chunk
);
1615 static __u32
num_stripes_per_unit_rebuild(struct imsm_dev
*dev
)
1617 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1618 int level
= get_imsm_raid_level(lo
);
1620 if (level
== 1 || level
== 10) {
1621 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1623 return hi
->num_domains
;
1625 return num_stripes_per_unit_resync(dev
);
1628 static __u8
imsm_num_data_members(struct imsm_dev
*dev
, int second_map
)
1630 /* named 'imsm_' because raid0, raid1 and raid10
1631 * counter-intuitively have the same number of data disks
1633 struct imsm_map
*map
= get_imsm_map(dev
, second_map
);
1635 switch (get_imsm_raid_level(map
)) {
1639 return map
->num_members
;
1641 return map
->num_members
- 1;
1643 dprintf("%s: unsupported raid level\n", __func__
);
1648 static __u32
parity_segment_depth(struct imsm_dev
*dev
)
1650 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1651 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1653 switch(get_imsm_raid_level(map
)) {
1656 return chunk
* map
->num_domains
;
1658 return chunk
* map
->num_members
;
1664 static __u32
map_migr_block(struct imsm_dev
*dev
, __u32 block
)
1666 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1667 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1668 __u32 strip
= block
/ chunk
;
1670 switch (get_imsm_raid_level(map
)) {
1673 __u32 vol_strip
= (strip
* map
->num_domains
) + 1;
1674 __u32 vol_stripe
= vol_strip
/ map
->num_members
;
1676 return vol_stripe
* chunk
+ block
% chunk
;
1678 __u32 stripe
= strip
/ (map
->num_members
- 1);
1680 return stripe
* chunk
+ block
% chunk
;
1687 static __u64
blocks_per_migr_unit(struct imsm_dev
*dev
)
1689 /* calculate the conversion factor between per member 'blocks'
1690 * (md/{resync,rebuild}_start) and imsm migration units, return
1691 * 0 for the 'not migrating' and 'unsupported migration' cases
1693 if (!dev
->vol
.migr_state
)
1696 switch (migr_type(dev
)) {
1701 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1702 __u32 stripes_per_unit
;
1703 __u32 blocks_per_unit
;
1712 /* yes, this is really the translation of migr_units to
1713 * per-member blocks in the 'resync' case
1715 stripes_per_unit
= num_stripes_per_unit_resync(dev
);
1716 migr_chunk
= migr_strip_blocks_resync(dev
);
1717 disks
= imsm_num_data_members(dev
, 0);
1718 blocks_per_unit
= stripes_per_unit
* migr_chunk
* disks
;
1719 stripe
= __le32_to_cpu(map
->blocks_per_strip
) * disks
;
1720 segment
= blocks_per_unit
/ stripe
;
1721 block_rel
= blocks_per_unit
- segment
* stripe
;
1722 parity_depth
= parity_segment_depth(dev
);
1723 block_map
= map_migr_block(dev
, block_rel
);
1724 return block_map
+ parity_depth
* segment
;
1726 case MIGR_REBUILD
: {
1727 __u32 stripes_per_unit
;
1730 stripes_per_unit
= num_stripes_per_unit_rebuild(dev
);
1731 migr_chunk
= migr_strip_blocks_rebuild(dev
);
1732 return migr_chunk
* stripes_per_unit
;
1734 case MIGR_STATE_CHANGE
:
1740 static int imsm_level_to_layout(int level
)
1748 return ALGORITHM_LEFT_ASYMMETRIC
;
1755 static void getinfo_super_imsm_volume(struct supertype
*st
, struct mdinfo
*info
, char *dmap
)
1757 struct intel_super
*super
= st
->sb
;
1758 struct imsm_dev
*dev
= get_imsm_dev(super
, super
->current_vol
);
1759 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1760 struct imsm_map
*prev_map
= get_imsm_map(dev
, 1);
1761 struct imsm_map
*map_to_analyse
= map
;
1764 unsigned int component_size_alligment
;
1765 int map_disks
= info
->array
.raid_disks
;
1768 map_to_analyse
= prev_map
;
1770 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
1771 if (dl
->raiddisk
== info
->disk
.raid_disk
)
1773 info
->container_member
= super
->current_vol
;
1774 info
->array
.raid_disks
= map
->num_members
;
1775 info
->array
.level
= get_imsm_raid_level(map_to_analyse
);
1776 info
->array
.layout
= imsm_level_to_layout(info
->array
.level
);
1777 info
->array
.md_minor
= -1;
1778 info
->array
.ctime
= 0;
1779 info
->array
.utime
= 0;
1780 info
->array
.chunk_size
=
1781 __le16_to_cpu(map_to_analyse
->blocks_per_strip
) << 9;
1782 info
->array
.state
= !dev
->vol
.dirty
;
1783 info
->custom_array_size
= __le32_to_cpu(dev
->size_high
);
1784 info
->custom_array_size
<<= 32;
1785 info
->custom_array_size
|= __le32_to_cpu(dev
->size_low
);
1786 if (prev_map
&& map
->map_state
== prev_map
->map_state
) {
1787 info
->reshape_active
= 1;
1788 info
->new_level
= get_imsm_raid_level(map
);
1789 info
->new_layout
= imsm_level_to_layout(info
->new_level
);
1790 info
->new_chunk
= __le16_to_cpu(map
->blocks_per_strip
) << 9;
1791 info
->delta_disks
= map
->num_members
- prev_map
->num_members
;
1792 if (info
->delta_disks
) {
1793 /* this needs to be applied to every array
1796 info
->reshape_active
= 2;
1798 /* We shape information that we give to md might have to be
1799 * modify to cope with md's requirement for reshaping arrays.
1800 * For example, when reshaping a RAID0, md requires it to be
1801 * presented as a degraded RAID4.
1802 * Also if a RAID0 is migrating to a RAID5 we need to specify
1803 * the array as already being RAID5, but the 'before' layout
1804 * is a RAID4-like layout.
1806 switch (info
->array
.level
) {
1808 switch(info
->new_level
) {
1810 /* conversion is happening as RAID4 */
1811 info
->array
.level
= 4;
1812 info
->array
.raid_disks
+= 1;
1815 /* conversion is happening as RAID5 */
1816 info
->array
.level
= 5;
1817 info
->array
.layout
= ALGORITHM_PARITY_N
;
1818 info
->array
.raid_disks
+= 1;
1819 info
->delta_disks
-= 1;
1822 /* FIXME error message */
1823 info
->array
.level
= UnSet
;
1829 info
->new_level
= UnSet
;
1830 info
->new_layout
= UnSet
;
1831 info
->new_chunk
= info
->array
.chunk_size
;
1832 info
->delta_disks
= 0;
1834 info
->disk
.major
= 0;
1835 info
->disk
.minor
= 0;
1837 info
->disk
.major
= dl
->major
;
1838 info
->disk
.minor
= dl
->minor
;
1841 info
->data_offset
= __le32_to_cpu(map_to_analyse
->pba_of_lba0
);
1842 info
->component_size
=
1843 __le32_to_cpu(map_to_analyse
->blocks_per_member
);
1845 /* check component size aligment
1847 component_size_alligment
=
1848 info
->component_size
% (info
->array
.chunk_size
/512);
1850 if (component_size_alligment
&&
1851 (info
->array
.level
!= 1) && (info
->array
.level
!= UnSet
)) {
1852 dprintf("imsm: reported component size alligned from %llu ",
1853 info
->component_size
);
1854 info
->component_size
-= component_size_alligment
;
1855 dprintf("to %llu (%i).\n",
1856 info
->component_size
, component_size_alligment
);
1859 memset(info
->uuid
, 0, sizeof(info
->uuid
));
1860 info
->recovery_start
= MaxSector
;
1862 info
->reshape_progress
= 0;
1863 info
->resync_start
= MaxSector
;
1864 if (map_to_analyse
->map_state
== IMSM_T_STATE_UNINITIALIZED
||
1866 info
->resync_start
= 0;
1868 if (dev
->vol
.migr_state
) {
1869 switch (migr_type(dev
)) {
1872 __u64 blocks_per_unit
= blocks_per_migr_unit(dev
);
1873 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
1875 info
->resync_start
= blocks_per_unit
* units
;
1878 case MIGR_GEN_MIGR
: {
1879 __u64 blocks_per_unit
= blocks_per_migr_unit(dev
);
1880 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
1881 unsigned long long array_blocks
;
1884 info
->reshape_progress
= blocks_per_unit
* units
;
1886 /* checkpoint is written per disks unit
1887 * recalculate it to reshape position
1889 used_disks
= imsm_num_data_members(dev
, 0);
1890 info
->reshape_progress
*= used_disks
;
1891 dprintf("IMSM: General Migration checkpoint : %llu "
1892 "(%llu) -> read reshape progress : %llu\n",
1893 units
, blocks_per_unit
, info
->reshape_progress
);
1895 used_disks
= imsm_num_data_members(dev
, 1);
1896 if (used_disks
> 0) {
1897 array_blocks
= map
->blocks_per_member
*
1899 /* round array size down to closest MB
1901 info
->custom_array_size
= (array_blocks
1902 >> SECT_PER_MB_SHIFT
)
1903 << SECT_PER_MB_SHIFT
;
1907 /* we could emulate the checkpointing of
1908 * 'sync_action=check' migrations, but for now
1909 * we just immediately complete them
1912 /* this is handled by container_content_imsm() */
1913 case MIGR_STATE_CHANGE
:
1914 /* FIXME handle other migrations */
1916 /* we are not dirty, so... */
1917 info
->resync_start
= MaxSector
;
1921 strncpy(info
->name
, (char *) dev
->volume
, MAX_RAID_SERIAL_LEN
);
1922 info
->name
[MAX_RAID_SERIAL_LEN
] = 0;
1924 info
->array
.major_version
= -1;
1925 info
->array
.minor_version
= -2;
1926 devname
= devnum2devname(st
->container_dev
);
1927 *info
->text_version
= '\0';
1929 sprintf(info
->text_version
, "/%s/%d", devname
, info
->container_member
);
1931 info
->safe_mode_delay
= 4000; /* 4 secs like the Matrix driver */
1932 uuid_from_super_imsm(st
, info
->uuid
);
1936 for (i
=0; i
<map_disks
; i
++) {
1938 if (i
< info
->array
.raid_disks
) {
1939 struct imsm_disk
*dsk
;
1940 j
= get_imsm_disk_idx(dev
, i
, -1);
1941 dsk
= get_imsm_disk(super
, j
);
1942 if (dsk
&& (dsk
->status
& CONFIGURED_DISK
))
1949 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
);
1950 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
);
1952 static struct imsm_disk
*get_imsm_missing(struct intel_super
*super
, __u8 index
)
1956 for (d
= super
->missing
; d
; d
= d
->next
)
1957 if (d
->index
== index
)
1962 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
)
1964 struct intel_super
*super
= st
->sb
;
1965 struct imsm_disk
*disk
;
1966 int map_disks
= info
->array
.raid_disks
;
1967 int max_enough
= -1;
1969 struct imsm_super
*mpb
;
1971 if (super
->current_vol
>= 0) {
1972 getinfo_super_imsm_volume(st
, info
, map
);
1976 /* Set raid_disks to zero so that Assemble will always pull in valid
1979 info
->array
.raid_disks
= 0;
1980 info
->array
.level
= LEVEL_CONTAINER
;
1981 info
->array
.layout
= 0;
1982 info
->array
.md_minor
= -1;
1983 info
->array
.ctime
= 0; /* N/A for imsm */
1984 info
->array
.utime
= 0;
1985 info
->array
.chunk_size
= 0;
1987 info
->disk
.major
= 0;
1988 info
->disk
.minor
= 0;
1989 info
->disk
.raid_disk
= -1;
1990 info
->reshape_active
= 0;
1991 info
->array
.major_version
= -1;
1992 info
->array
.minor_version
= -2;
1993 strcpy(info
->text_version
, "imsm");
1994 info
->safe_mode_delay
= 0;
1995 info
->disk
.number
= -1;
1996 info
->disk
.state
= 0;
1998 info
->recovery_start
= MaxSector
;
2000 /* do we have the all the insync disks that we expect? */
2001 mpb
= super
->anchor
;
2003 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
2004 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
2005 int failed
, enough
, j
, missing
= 0;
2006 struct imsm_map
*map
;
2009 failed
= imsm_count_failed(super
, dev
);
2010 state
= imsm_check_degraded(super
, dev
, failed
);
2011 map
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2013 /* any newly missing disks?
2014 * (catches single-degraded vs double-degraded)
2016 for (j
= 0; j
< map
->num_members
; j
++) {
2017 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
2018 __u32 idx
= ord_to_idx(ord
);
2020 if (!(ord
& IMSM_ORD_REBUILD
) &&
2021 get_imsm_missing(super
, idx
)) {
2027 if (state
== IMSM_T_STATE_FAILED
)
2029 else if (state
== IMSM_T_STATE_DEGRADED
&&
2030 (state
!= map
->map_state
|| missing
))
2032 else /* we're normal, or already degraded */
2035 /* in the missing/failed disk case check to see
2036 * if at least one array is runnable
2038 max_enough
= max(max_enough
, enough
);
2040 dprintf("%s: enough: %d\n", __func__
, max_enough
);
2041 info
->container_enough
= max_enough
;
2044 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
2046 disk
= &super
->disks
->disk
;
2047 info
->data_offset
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
2048 info
->component_size
= reserved
;
2049 info
->disk
.state
= is_configured(disk
) ? (1 << MD_DISK_ACTIVE
) : 0;
2050 /* we don't change info->disk.raid_disk here because
2051 * this state will be finalized in mdmon after we have
2052 * found the 'most fresh' version of the metadata
2054 info
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2055 info
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2058 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2059 * ->compare_super may have updated the 'num_raid_devs' field for spares
2061 if (info
->disk
.state
& (1 << MD_DISK_SYNC
) || super
->anchor
->num_raid_devs
)
2062 uuid_from_super_imsm(st
, info
->uuid
);
2064 memcpy(info
->uuid
, uuid_zero
, sizeof(uuid_zero
));
2066 /* I don't know how to compute 'map' on imsm, so use safe default */
2069 for (i
= 0; i
< map_disks
; i
++)
2075 /* allocates memory and fills disk in mdinfo structure
2076 * for each disk in array */
2077 struct mdinfo
*getinfo_super_disks_imsm(struct supertype
*st
)
2079 struct mdinfo
*mddev
= NULL
;
2080 struct intel_super
*super
= st
->sb
;
2081 struct imsm_disk
*disk
;
2084 if (!super
|| !super
->disks
)
2087 mddev
= malloc(sizeof(*mddev
));
2089 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2092 memset(mddev
, 0, sizeof(*mddev
));
2096 tmp
= malloc(sizeof(*tmp
));
2098 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2103 memset(tmp
, 0, sizeof(*tmp
));
2105 tmp
->next
= mddev
->devs
;
2107 tmp
->disk
.number
= count
++;
2108 tmp
->disk
.major
= dl
->major
;
2109 tmp
->disk
.minor
= dl
->minor
;
2110 tmp
->disk
.state
= is_configured(disk
) ?
2111 (1 << MD_DISK_ACTIVE
) : 0;
2112 tmp
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2113 tmp
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2114 tmp
->disk
.raid_disk
= -1;
2120 static int update_super_imsm(struct supertype
*st
, struct mdinfo
*info
,
2121 char *update
, char *devname
, int verbose
,
2122 int uuid_set
, char *homehost
)
2124 /* For 'assemble' and 'force' we need to return non-zero if any
2125 * change was made. For others, the return value is ignored.
2126 * Update options are:
2127 * force-one : This device looks a bit old but needs to be included,
2128 * update age info appropriately.
2129 * assemble: clear any 'faulty' flag to allow this device to
2131 * force-array: Array is degraded but being forced, mark it clean
2132 * if that will be needed to assemble it.
2134 * newdev: not used ????
2135 * grow: Array has gained a new device - this is currently for
2137 * resync: mark as dirty so a resync will happen.
2138 * name: update the name - preserving the homehost
2139 * uuid: Change the uuid of the array to match watch is given
2141 * Following are not relevant for this imsm:
2142 * sparc2.2 : update from old dodgey metadata
2143 * super-minor: change the preferred_minor number
2144 * summaries: update redundant counters.
2145 * homehost: update the recorded homehost
2146 * _reshape_progress: record new reshape_progress position.
2149 struct intel_super
*super
= st
->sb
;
2150 struct imsm_super
*mpb
;
2152 /* we can only update container info */
2153 if (!super
|| super
->current_vol
>= 0 || !super
->anchor
)
2156 mpb
= super
->anchor
;
2158 if (strcmp(update
, "uuid") == 0 && uuid_set
&& !info
->update_private
)
2160 else if (strcmp(update
, "uuid") == 0 && uuid_set
&& info
->update_private
) {
2161 mpb
->orig_family_num
= *((__u32
*) info
->update_private
);
2163 } else if (strcmp(update
, "uuid") == 0) {
2164 __u32
*new_family
= malloc(sizeof(*new_family
));
2166 /* update orig_family_number with the incoming random
2167 * data, report the new effective uuid, and store the
2168 * new orig_family_num for future updates.
2171 memcpy(&mpb
->orig_family_num
, info
->uuid
, sizeof(__u32
));
2172 uuid_from_super_imsm(st
, info
->uuid
);
2173 *new_family
= mpb
->orig_family_num
;
2174 info
->update_private
= new_family
;
2177 } else if (strcmp(update
, "assemble") == 0)
2182 /* successful update? recompute checksum */
2184 mpb
->check_sum
= __le32_to_cpu(__gen_imsm_checksum(mpb
));
2189 static size_t disks_to_mpb_size(int disks
)
2193 size
= sizeof(struct imsm_super
);
2194 size
+= (disks
- 1) * sizeof(struct imsm_disk
);
2195 size
+= 2 * sizeof(struct imsm_dev
);
2196 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2197 size
+= (4 - 2) * sizeof(struct imsm_map
);
2198 /* 4 possible disk_ord_tbl's */
2199 size
+= 4 * (disks
- 1) * sizeof(__u32
);
2204 static __u64
avail_size_imsm(struct supertype
*st
, __u64 devsize
)
2206 if (devsize
< (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
))
2209 return devsize
- (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
);
2212 static void free_devlist(struct intel_super
*super
)
2214 struct intel_dev
*dv
;
2216 while (super
->devlist
) {
2217 dv
= super
->devlist
->next
;
2218 free(super
->devlist
->dev
);
2219 free(super
->devlist
);
2220 super
->devlist
= dv
;
2224 static void imsm_copy_dev(struct imsm_dev
*dest
, struct imsm_dev
*src
)
2226 memcpy(dest
, src
, sizeof_imsm_dev(src
, 0));
2229 static int compare_super_imsm(struct supertype
*st
, struct supertype
*tst
)
2233 * 0 same, or first was empty, and second was copied
2234 * 1 second had wrong number
2236 * 3 wrong other info
2238 struct intel_super
*first
= st
->sb
;
2239 struct intel_super
*sec
= tst
->sb
;
2246 /* in platform dependent environment test if the disks
2247 * use the same Intel hba
2249 if (!check_env("IMSM_NO_PLATFORM")) {
2250 if (!first
->hba
|| !sec
->hba
||
2251 (first
->hba
->type
!= sec
->hba
->type
)) {
2253 "HBAs of devices does not match %s != %s\n",
2254 first
->hba
? get_sys_dev_type(first
->hba
->type
) : NULL
,
2255 sec
->hba
? get_sys_dev_type(sec
->hba
->type
) : NULL
);
2260 /* if an anchor does not have num_raid_devs set then it is a free
2263 if (first
->anchor
->num_raid_devs
> 0 &&
2264 sec
->anchor
->num_raid_devs
> 0) {
2265 /* Determine if these disks might ever have been
2266 * related. Further disambiguation can only take place
2267 * in load_super_imsm_all
2269 __u32 first_family
= first
->anchor
->orig_family_num
;
2270 __u32 sec_family
= sec
->anchor
->orig_family_num
;
2272 if (memcmp(first
->anchor
->sig
, sec
->anchor
->sig
,
2273 MAX_SIGNATURE_LENGTH
) != 0)
2276 if (first_family
== 0)
2277 first_family
= first
->anchor
->family_num
;
2278 if (sec_family
== 0)
2279 sec_family
= sec
->anchor
->family_num
;
2281 if (first_family
!= sec_family
)
2287 /* if 'first' is a spare promote it to a populated mpb with sec's
2290 if (first
->anchor
->num_raid_devs
== 0 &&
2291 sec
->anchor
->num_raid_devs
> 0) {
2293 struct intel_dev
*dv
;
2294 struct imsm_dev
*dev
;
2296 /* we need to copy raid device info from sec if an allocation
2297 * fails here we don't associate the spare
2299 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++) {
2300 dv
= malloc(sizeof(*dv
));
2303 dev
= malloc(sizeof_imsm_dev(get_imsm_dev(sec
, i
), 1));
2310 dv
->next
= first
->devlist
;
2311 first
->devlist
= dv
;
2313 if (i
< sec
->anchor
->num_raid_devs
) {
2314 /* allocation failure */
2315 free_devlist(first
);
2316 fprintf(stderr
, "imsm: failed to associate spare\n");
2319 first
->anchor
->num_raid_devs
= sec
->anchor
->num_raid_devs
;
2320 first
->anchor
->orig_family_num
= sec
->anchor
->orig_family_num
;
2321 first
->anchor
->family_num
= sec
->anchor
->family_num
;
2322 memcpy(first
->anchor
->sig
, sec
->anchor
->sig
, MAX_SIGNATURE_LENGTH
);
2323 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++)
2324 imsm_copy_dev(get_imsm_dev(first
, i
), get_imsm_dev(sec
, i
));
2330 static void fd2devname(int fd
, char *name
)
2334 char dname
[PATH_MAX
];
2339 if (fstat(fd
, &st
) != 0)
2341 sprintf(path
, "/sys/dev/block/%d:%d",
2342 major(st
.st_rdev
), minor(st
.st_rdev
));
2344 rv
= readlink(path
, dname
, sizeof(dname
));
2349 nm
= strrchr(dname
, '/');
2351 snprintf(name
, MAX_RAID_SERIAL_LEN
, "/dev/%s", nm
);
2354 extern int scsi_get_serial(int fd
, void *buf
, size_t buf_len
);
2356 static int imsm_read_serial(int fd
, char *devname
,
2357 __u8 serial
[MAX_RAID_SERIAL_LEN
])
2359 unsigned char scsi_serial
[255];
2368 memset(scsi_serial
, 0, sizeof(scsi_serial
));
2370 rv
= scsi_get_serial(fd
, scsi_serial
, sizeof(scsi_serial
));
2372 if (rv
&& check_env("IMSM_DEVNAME_AS_SERIAL")) {
2373 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2374 fd2devname(fd
, (char *) serial
);
2381 Name
": Failed to retrieve serial for %s\n",
2386 rsp_len
= scsi_serial
[3];
2390 Name
": Failed to retrieve serial for %s\n",
2394 rsp_buf
= (char *) &scsi_serial
[4];
2396 /* trim all whitespace and non-printable characters and convert
2399 for (i
= 0, dest
= rsp_buf
; i
< rsp_len
; i
++) {
2402 /* ':' is reserved for use in placeholder serial
2403 * numbers for missing disks
2411 len
= dest
- rsp_buf
;
2414 /* truncate leading characters */
2415 if (len
> MAX_RAID_SERIAL_LEN
) {
2416 dest
+= len
- MAX_RAID_SERIAL_LEN
;
2417 len
= MAX_RAID_SERIAL_LEN
;
2420 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2421 memcpy(serial
, dest
, len
);
2426 static int serialcmp(__u8
*s1
, __u8
*s2
)
2428 return strncmp((char *) s1
, (char *) s2
, MAX_RAID_SERIAL_LEN
);
2431 static void serialcpy(__u8
*dest
, __u8
*src
)
2433 strncpy((char *) dest
, (char *) src
, MAX_RAID_SERIAL_LEN
);
2437 static struct dl
*serial_to_dl(__u8
*serial
, struct intel_super
*super
)
2441 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
2442 if (serialcmp(dl
->serial
, serial
) == 0)
2449 static struct imsm_disk
*
2450 __serial_to_disk(__u8
*serial
, struct imsm_super
*mpb
, int *idx
)
2454 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2455 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
2457 if (serialcmp(disk
->serial
, serial
) == 0) {
2468 load_imsm_disk(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2470 struct imsm_disk
*disk
;
2475 __u8 serial
[MAX_RAID_SERIAL_LEN
];
2477 rv
= imsm_read_serial(fd
, devname
, serial
);
2482 dl
= calloc(1, sizeof(*dl
));
2486 Name
": failed to allocate disk buffer for %s\n",
2492 dl
->major
= major(stb
.st_rdev
);
2493 dl
->minor
= minor(stb
.st_rdev
);
2494 dl
->next
= super
->disks
;
2495 dl
->fd
= keep_fd
? fd
: -1;
2496 assert(super
->disks
== NULL
);
2498 serialcpy(dl
->serial
, serial
);
2501 fd2devname(fd
, name
);
2503 dl
->devname
= strdup(devname
);
2505 dl
->devname
= strdup(name
);
2507 /* look up this disk's index in the current anchor */
2508 disk
= __serial_to_disk(dl
->serial
, super
->anchor
, &dl
->index
);
2511 /* only set index on disks that are a member of a
2512 * populated contianer, i.e. one with raid_devs
2514 if (is_failed(&dl
->disk
))
2516 else if (is_spare(&dl
->disk
))
2524 /* When migrating map0 contains the 'destination' state while map1
2525 * contains the current state. When not migrating map0 contains the
2526 * current state. This routine assumes that map[0].map_state is set to
2527 * the current array state before being called.
2529 * Migration is indicated by one of the following states
2530 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2531 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2532 * map1state=unitialized)
2533 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2535 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2536 * map1state=degraded)
2538 static void migrate(struct imsm_dev
*dev
, __u8 to_state
, int migr_type
)
2540 struct imsm_map
*dest
;
2541 struct imsm_map
*src
= get_imsm_map(dev
, 0);
2543 dev
->vol
.migr_state
= 1;
2544 set_migr_type(dev
, migr_type
);
2545 dev
->vol
.curr_migr_unit
= 0;
2546 dest
= get_imsm_map(dev
, 1);
2548 /* duplicate and then set the target end state in map[0] */
2549 memcpy(dest
, src
, sizeof_imsm_map(src
));
2550 if ((migr_type
== MIGR_REBUILD
) ||
2551 (migr_type
== MIGR_GEN_MIGR
)) {
2555 for (i
= 0; i
< src
->num_members
; i
++) {
2556 ord
= __le32_to_cpu(src
->disk_ord_tbl
[i
]);
2557 set_imsm_ord_tbl_ent(src
, i
, ord_to_idx(ord
));
2561 src
->map_state
= to_state
;
2564 static void end_migration(struct imsm_dev
*dev
, __u8 map_state
)
2566 struct imsm_map
*map
= get_imsm_map(dev
, 0);
2567 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2570 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2571 * completed in the last migration.
2573 * FIXME add support for raid-level-migration
2575 for (i
= 0; i
< prev
->num_members
; i
++)
2576 for (j
= 0; j
< map
->num_members
; j
++)
2577 /* during online capacity expansion
2578 * disks position can be changed if takeover is used
2580 if (ord_to_idx(map
->disk_ord_tbl
[j
]) ==
2581 ord_to_idx(prev
->disk_ord_tbl
[i
])) {
2582 map
->disk_ord_tbl
[j
] |= prev
->disk_ord_tbl
[i
];
2586 dev
->vol
.migr_state
= 0;
2587 dev
->vol
.migr_type
= 0;
2588 dev
->vol
.curr_migr_unit
= 0;
2589 map
->map_state
= map_state
;
2593 static int parse_raid_devices(struct intel_super
*super
)
2596 struct imsm_dev
*dev_new
;
2597 size_t len
, len_migr
;
2599 size_t space_needed
= 0;
2600 struct imsm_super
*mpb
= super
->anchor
;
2602 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2603 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
2604 struct intel_dev
*dv
;
2606 len
= sizeof_imsm_dev(dev_iter
, 0);
2607 len_migr
= sizeof_imsm_dev(dev_iter
, 1);
2609 space_needed
+= len_migr
- len
;
2611 dv
= malloc(sizeof(*dv
));
2614 if (max_len
< len_migr
)
2616 if (max_len
> len_migr
)
2617 space_needed
+= max_len
- len_migr
;
2618 dev_new
= malloc(max_len
);
2623 imsm_copy_dev(dev_new
, dev_iter
);
2626 dv
->next
= super
->devlist
;
2627 super
->devlist
= dv
;
2630 /* ensure that super->buf is large enough when all raid devices
2633 if (__le32_to_cpu(mpb
->mpb_size
) + space_needed
> super
->len
) {
2636 len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + space_needed
, 512);
2637 if (posix_memalign(&buf
, 512, len
) != 0)
2640 memcpy(buf
, super
->buf
, super
->len
);
2641 memset(buf
+ super
->len
, 0, len
- super
->len
);
2650 /* retrieve a pointer to the bbm log which starts after all raid devices */
2651 struct bbm_log
*__get_imsm_bbm_log(struct imsm_super
*mpb
)
2655 if (__le32_to_cpu(mpb
->bbm_log_size
)) {
2657 ptr
+= mpb
->mpb_size
- __le32_to_cpu(mpb
->bbm_log_size
);
2663 static void __free_imsm(struct intel_super
*super
, int free_disks
);
2665 /* load_imsm_mpb - read matrix metadata
2666 * allocates super->mpb to be freed by free_imsm
2668 static int load_imsm_mpb(int fd
, struct intel_super
*super
, char *devname
)
2670 unsigned long long dsize
;
2671 unsigned long long sectors
;
2673 struct imsm_super
*anchor
;
2676 get_dev_size(fd
, NULL
, &dsize
);
2680 Name
": %s: device to small for imsm\n",
2685 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0) {
2688 Name
": Cannot seek to anchor block on %s: %s\n",
2689 devname
, strerror(errno
));
2693 if (posix_memalign((void**)&anchor
, 512, 512) != 0) {
2696 Name
": Failed to allocate imsm anchor buffer"
2697 " on %s\n", devname
);
2700 if (read(fd
, anchor
, 512) != 512) {
2703 Name
": Cannot read anchor block on %s: %s\n",
2704 devname
, strerror(errno
));
2709 if (strncmp((char *) anchor
->sig
, MPB_SIGNATURE
, MPB_SIG_LEN
) != 0) {
2712 Name
": no IMSM anchor on %s\n", devname
);
2717 __free_imsm(super
, 0);
2718 /* reload capability and hba */
2720 /* capability and hba must be updated with new super allocation */
2721 find_intel_hba_capability(fd
, super
, devname
);
2722 super
->len
= ROUND_UP(anchor
->mpb_size
, 512);
2723 if (posix_memalign(&super
->buf
, 512, super
->len
) != 0) {
2726 Name
": unable to allocate %zu byte mpb buffer\n",
2731 memcpy(super
->buf
, anchor
, 512);
2733 sectors
= mpb_sectors(anchor
) - 1;
2736 check_sum
= __gen_imsm_checksum(super
->anchor
);
2737 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
2740 Name
": IMSM checksum %x != %x on %s\n",
2742 __le32_to_cpu(super
->anchor
->check_sum
),
2750 /* read the extended mpb */
2751 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0) {
2754 Name
": Cannot seek to extended mpb on %s: %s\n",
2755 devname
, strerror(errno
));
2759 if ((unsigned)read(fd
, super
->buf
+ 512, super
->len
- 512) != super
->len
- 512) {
2762 Name
": Cannot read extended mpb on %s: %s\n",
2763 devname
, strerror(errno
));
2767 check_sum
= __gen_imsm_checksum(super
->anchor
);
2768 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
2771 Name
": IMSM checksum %x != %x on %s\n",
2772 check_sum
, __le32_to_cpu(super
->anchor
->check_sum
),
2777 /* FIXME the BBM log is disk specific so we cannot use this global
2778 * buffer for all disks. Ok for now since we only look at the global
2779 * bbm_log_size parameter to gate assembly
2781 super
->bbm_log
= __get_imsm_bbm_log(super
->anchor
);
2787 load_and_parse_mpb(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2791 err
= load_imsm_mpb(fd
, super
, devname
);
2794 err
= load_imsm_disk(fd
, super
, devname
, keep_fd
);
2797 err
= parse_raid_devices(super
);
2802 static void __free_imsm_disk(struct dl
*d
)
2814 static void free_imsm_disks(struct intel_super
*super
)
2818 while (super
->disks
) {
2820 super
->disks
= d
->next
;
2821 __free_imsm_disk(d
);
2823 while (super
->disk_mgmt_list
) {
2824 d
= super
->disk_mgmt_list
;
2825 super
->disk_mgmt_list
= d
->next
;
2826 __free_imsm_disk(d
);
2828 while (super
->missing
) {
2830 super
->missing
= d
->next
;
2831 __free_imsm_disk(d
);
2836 /* free all the pieces hanging off of a super pointer */
2837 static void __free_imsm(struct intel_super
*super
, int free_disks
)
2839 struct intel_hba
*elem
, *next
;
2845 /* unlink capability description */
2848 free_imsm_disks(super
);
2849 free_devlist(super
);
2853 free((void *)elem
->path
);
2861 static void free_imsm(struct intel_super
*super
)
2863 __free_imsm(super
, 1);
2867 static void free_super_imsm(struct supertype
*st
)
2869 struct intel_super
*super
= st
->sb
;
2878 static struct intel_super
*alloc_super(void)
2880 struct intel_super
*super
= malloc(sizeof(*super
));
2883 memset(super
, 0, sizeof(*super
));
2884 super
->current_vol
= -1;
2885 super
->create_offset
= ~((__u32
) 0);
2891 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
2893 static int find_intel_hba_capability(int fd
, struct intel_super
*super
, char *devname
)
2895 struct sys_dev
*hba_name
;
2898 if ((fd
< 0) || check_env("IMSM_NO_PLATFORM")) {
2903 hba_name
= find_disk_attached_hba(fd
, NULL
);
2907 Name
": %s is not attached to Intel(R) RAID controller.\n",
2911 rv
= attach_hba_to_super(super
, hba_name
);
2914 struct intel_hba
*hba
= super
->hba
;
2916 fprintf(stderr
, Name
": %s is attached to Intel(R) %s RAID "
2917 "controller (%s),\n"
2918 " but the container is assigned to Intel(R) "
2919 "%s RAID controller (",
2922 hba_name
->pci_id
? : "Err!",
2923 get_sys_dev_type(hba_name
->type
));
2926 fprintf(stderr
, "%s", hba
->pci_id
? : "Err!");
2928 fprintf(stderr
, ", ");
2932 fprintf(stderr
, ").\n"
2933 " Mixing devices attached to different controllers "
2934 "is not allowed.\n");
2936 free_sys_dev(&hba_name
);
2939 super
->orom
= find_imsm_capability(hba_name
->type
);
2940 free_sys_dev(&hba_name
);
2947 /* find_missing - helper routine for load_super_imsm_all that identifies
2948 * disks that have disappeared from the system. This routine relies on
2949 * the mpb being uptodate, which it is at load time.
2951 static int find_missing(struct intel_super
*super
)
2954 struct imsm_super
*mpb
= super
->anchor
;
2956 struct imsm_disk
*disk
;
2958 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2959 disk
= __get_imsm_disk(mpb
, i
);
2960 dl
= serial_to_dl(disk
->serial
, super
);
2964 dl
= malloc(sizeof(*dl
));
2970 dl
->devname
= strdup("missing");
2972 serialcpy(dl
->serial
, disk
->serial
);
2975 dl
->next
= super
->missing
;
2976 super
->missing
= dl
;
2982 static struct intel_disk
*disk_list_get(__u8
*serial
, struct intel_disk
*disk_list
)
2984 struct intel_disk
*idisk
= disk_list
;
2987 if (serialcmp(idisk
->disk
.serial
, serial
) == 0)
2989 idisk
= idisk
->next
;
2995 static int __prep_thunderdome(struct intel_super
**table
, int tbl_size
,
2996 struct intel_super
*super
,
2997 struct intel_disk
**disk_list
)
2999 struct imsm_disk
*d
= &super
->disks
->disk
;
3000 struct imsm_super
*mpb
= super
->anchor
;
3003 for (i
= 0; i
< tbl_size
; i
++) {
3004 struct imsm_super
*tbl_mpb
= table
[i
]->anchor
;
3005 struct imsm_disk
*tbl_d
= &table
[i
]->disks
->disk
;
3007 if (tbl_mpb
->family_num
== mpb
->family_num
) {
3008 if (tbl_mpb
->check_sum
== mpb
->check_sum
) {
3009 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3010 __func__
, super
->disks
->major
,
3011 super
->disks
->minor
,
3012 table
[i
]->disks
->major
,
3013 table
[i
]->disks
->minor
);
3017 if (((is_configured(d
) && !is_configured(tbl_d
)) ||
3018 is_configured(d
) == is_configured(tbl_d
)) &&
3019 tbl_mpb
->generation_num
< mpb
->generation_num
) {
3020 /* current version of the mpb is a
3021 * better candidate than the one in
3022 * super_table, but copy over "cross
3023 * generational" status
3025 struct intel_disk
*idisk
;
3027 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3028 __func__
, super
->disks
->major
,
3029 super
->disks
->minor
,
3030 table
[i
]->disks
->major
,
3031 table
[i
]->disks
->minor
);
3033 idisk
= disk_list_get(tbl_d
->serial
, *disk_list
);
3034 if (idisk
&& is_failed(&idisk
->disk
))
3035 tbl_d
->status
|= FAILED_DISK
;
3038 struct intel_disk
*idisk
;
3039 struct imsm_disk
*disk
;
3041 /* tbl_mpb is more up to date, but copy
3042 * over cross generational status before
3045 disk
= __serial_to_disk(d
->serial
, mpb
, NULL
);
3046 if (disk
&& is_failed(disk
))
3047 d
->status
|= FAILED_DISK
;
3049 idisk
= disk_list_get(d
->serial
, *disk_list
);
3052 if (disk
&& is_configured(disk
))
3053 idisk
->disk
.status
|= CONFIGURED_DISK
;
3056 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3057 __func__
, super
->disks
->major
,
3058 super
->disks
->minor
,
3059 table
[i
]->disks
->major
,
3060 table
[i
]->disks
->minor
);
3068 table
[tbl_size
++] = super
;
3072 /* update/extend the merged list of imsm_disk records */
3073 for (j
= 0; j
< mpb
->num_disks
; j
++) {
3074 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, j
);
3075 struct intel_disk
*idisk
;
3077 idisk
= disk_list_get(disk
->serial
, *disk_list
);
3079 idisk
->disk
.status
|= disk
->status
;
3080 if (is_configured(&idisk
->disk
) ||
3081 is_failed(&idisk
->disk
))
3082 idisk
->disk
.status
&= ~(SPARE_DISK
);
3084 idisk
= calloc(1, sizeof(*idisk
));
3087 idisk
->owner
= IMSM_UNKNOWN_OWNER
;
3088 idisk
->disk
= *disk
;
3089 idisk
->next
= *disk_list
;
3093 if (serialcmp(idisk
->disk
.serial
, d
->serial
) == 0)
3100 static struct intel_super
*
3101 validate_members(struct intel_super
*super
, struct intel_disk
*disk_list
,
3104 struct imsm_super
*mpb
= super
->anchor
;
3108 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3109 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
3110 struct intel_disk
*idisk
;
3112 idisk
= disk_list_get(disk
->serial
, disk_list
);
3114 if (idisk
->owner
== owner
||
3115 idisk
->owner
== IMSM_UNKNOWN_OWNER
)
3118 dprintf("%s: '%.16s' owner %d != %d\n",
3119 __func__
, disk
->serial
, idisk
->owner
,
3122 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3123 __func__
, __le32_to_cpu(mpb
->family_num
), i
,
3129 if (ok_count
== mpb
->num_disks
)
3134 static void show_conflicts(__u32 family_num
, struct intel_super
*super_list
)
3136 struct intel_super
*s
;
3138 for (s
= super_list
; s
; s
= s
->next
) {
3139 if (family_num
!= s
->anchor
->family_num
)
3141 fprintf(stderr
, "Conflict, offlining family %#x on '%s'\n",
3142 __le32_to_cpu(family_num
), s
->disks
->devname
);
3146 static struct intel_super
*
3147 imsm_thunderdome(struct intel_super
**super_list
, int len
)
3149 struct intel_super
*super_table
[len
];
3150 struct intel_disk
*disk_list
= NULL
;
3151 struct intel_super
*champion
, *spare
;
3152 struct intel_super
*s
, **del
;
3157 memset(super_table
, 0, sizeof(super_table
));
3158 for (s
= *super_list
; s
; s
= s
->next
)
3159 tbl_size
= __prep_thunderdome(super_table
, tbl_size
, s
, &disk_list
);
3161 for (i
= 0; i
< tbl_size
; i
++) {
3162 struct imsm_disk
*d
;
3163 struct intel_disk
*idisk
;
3164 struct imsm_super
*mpb
= super_table
[i
]->anchor
;
3167 d
= &s
->disks
->disk
;
3169 /* 'd' must appear in merged disk list for its
3170 * configuration to be valid
3172 idisk
= disk_list_get(d
->serial
, disk_list
);
3173 if (idisk
&& idisk
->owner
== i
)
3174 s
= validate_members(s
, disk_list
, i
);
3179 dprintf("%s: marking family: %#x from %d:%d offline\n",
3180 __func__
, mpb
->family_num
,
3181 super_table
[i
]->disks
->major
,
3182 super_table
[i
]->disks
->minor
);
3186 /* This is where the mdadm implementation differs from the Windows
3187 * driver which has no strict concept of a container. We can only
3188 * assemble one family from a container, so when returning a prodigal
3189 * array member to this system the code will not be able to disambiguate
3190 * the container contents that should be assembled ("foreign" versus
3191 * "local"). It requires user intervention to set the orig_family_num
3192 * to a new value to establish a new container. The Windows driver in
3193 * this situation fixes up the volume name in place and manages the
3194 * foreign array as an independent entity.
3199 for (i
= 0; i
< tbl_size
; i
++) {
3200 struct intel_super
*tbl_ent
= super_table
[i
];
3206 if (tbl_ent
->anchor
->num_raid_devs
== 0) {
3211 if (s
&& !is_spare
) {
3212 show_conflicts(tbl_ent
->anchor
->family_num
, *super_list
);
3214 } else if (!s
&& !is_spare
)
3227 fprintf(stderr
, "Chose family %#x on '%s', "
3228 "assemble conflicts to new container with '--update=uuid'\n",
3229 __le32_to_cpu(s
->anchor
->family_num
), s
->disks
->devname
);
3231 /* collect all dl's onto 'champion', and update them to
3232 * champion's version of the status
3234 for (s
= *super_list
; s
; s
= s
->next
) {
3235 struct imsm_super
*mpb
= champion
->anchor
;
3236 struct dl
*dl
= s
->disks
;
3241 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3242 struct imsm_disk
*disk
;
3244 disk
= __serial_to_disk(dl
->serial
, mpb
, &dl
->index
);
3247 /* only set index on disks that are a member of
3248 * a populated contianer, i.e. one with
3251 if (is_failed(&dl
->disk
))
3253 else if (is_spare(&dl
->disk
))
3259 if (i
>= mpb
->num_disks
) {
3260 struct intel_disk
*idisk
;
3262 idisk
= disk_list_get(dl
->serial
, disk_list
);
3263 if (idisk
&& is_spare(&idisk
->disk
) &&
3264 !is_failed(&idisk
->disk
) && !is_configured(&idisk
->disk
))
3272 dl
->next
= champion
->disks
;
3273 champion
->disks
= dl
;
3277 /* delete 'champion' from super_list */
3278 for (del
= super_list
; *del
; ) {
3279 if (*del
== champion
) {
3280 *del
= (*del
)->next
;
3283 del
= &(*del
)->next
;
3285 champion
->next
= NULL
;
3289 struct intel_disk
*idisk
= disk_list
;
3291 disk_list
= disk_list
->next
;
3298 static int load_super_imsm_all(struct supertype
*st
, int fd
, void **sbp
,
3302 struct intel_super
*super_list
= NULL
;
3303 struct intel_super
*super
= NULL
;
3304 int devnum
= fd2devnum(fd
);
3310 /* check if 'fd' an opened container */
3311 sra
= sysfs_read(fd
, 0, GET_LEVEL
|GET_VERSION
|GET_DEVS
|GET_STATE
);
3315 if (sra
->array
.major_version
!= -1 ||
3316 sra
->array
.minor_version
!= -2 ||
3317 strcmp(sra
->text_version
, "imsm") != 0) {
3322 for (sd
= sra
->devs
, i
= 0; sd
; sd
= sd
->next
, i
++) {
3323 struct intel_super
*s
= alloc_super();
3331 s
->next
= super_list
;
3335 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
3336 dfd
= dev_open(nm
, O_RDWR
);
3340 rv
= find_intel_hba_capability(dfd
, s
, devname
);
3341 /* no orom/efi or non-intel hba of the disk */
3345 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3347 /* retry the load if we might have raced against mdmon */
3348 if (err
== 3 && mdmon_running(devnum
))
3349 for (retry
= 0; retry
< 3; retry
++) {
3351 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3359 /* all mpbs enter, maybe one leaves */
3360 super
= imsm_thunderdome(&super_list
, i
);
3366 if (find_missing(super
) != 0) {
3374 while (super_list
) {
3375 struct intel_super
*s
= super_list
;
3377 super_list
= super_list
->next
;
3386 st
->container_dev
= devnum
;
3387 if (err
== 0 && st
->ss
== NULL
) {
3388 st
->ss
= &super_imsm
;
3389 st
->minor_version
= 0;
3390 st
->max_devs
= IMSM_MAX_DEVICES
;
3395 static int load_container_imsm(struct supertype
*st
, int fd
, char *devname
)
3397 return load_super_imsm_all(st
, fd
, &st
->sb
, devname
);
3401 static int load_super_imsm(struct supertype
*st
, int fd
, char *devname
)
3403 struct intel_super
*super
;
3406 if (test_partition(fd
))
3407 /* IMSM not allowed on partitions */
3410 free_super_imsm(st
);
3412 super
= alloc_super();
3415 Name
": malloc of %zu failed.\n",
3419 /* Load hba and capabilities if they exist.
3420 * But do not preclude loading metadata in case capabilities or hba are
3421 * non-compliant and ignore_hw_compat is set.
3423 rv
= find_intel_hba_capability(fd
, super
, devname
);
3424 /* no orom/efi or non-intel hba of the disk */
3425 if ((rv
!= 0) && (st
->ignore_hw_compat
== 0)) {
3428 Name
": No OROM/EFI properties for %s\n", devname
);
3432 rv
= load_and_parse_mpb(fd
, super
, devname
, 0);
3437 Name
": Failed to load all information "
3438 "sections on %s\n", devname
);
3444 if (st
->ss
== NULL
) {
3445 st
->ss
= &super_imsm
;
3446 st
->minor_version
= 0;
3447 st
->max_devs
= IMSM_MAX_DEVICES
;
3452 static __u16
info_to_blocks_per_strip(mdu_array_info_t
*info
)
3454 if (info
->level
== 1)
3456 return info
->chunk_size
>> 9;
3459 static __u32
info_to_num_data_stripes(mdu_array_info_t
*info
, int num_domains
)
3463 num_stripes
= (info
->size
* 2) / info_to_blocks_per_strip(info
);
3464 num_stripes
/= num_domains
;
3469 static __u32
info_to_blocks_per_member(mdu_array_info_t
*info
)
3471 if (info
->level
== 1)
3472 return info
->size
* 2;
3474 return (info
->size
* 2) & ~(info_to_blocks_per_strip(info
) - 1);
3477 static void imsm_update_version_info(struct intel_super
*super
)
3479 /* update the version and attributes */
3480 struct imsm_super
*mpb
= super
->anchor
;
3482 struct imsm_dev
*dev
;
3483 struct imsm_map
*map
;
3486 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3487 dev
= get_imsm_dev(super
, i
);
3488 map
= get_imsm_map(dev
, 0);
3489 if (__le32_to_cpu(dev
->size_high
) > 0)
3490 mpb
->attributes
|= MPB_ATTRIB_2TB
;
3492 /* FIXME detect when an array spans a port multiplier */
3494 mpb
->attributes
|= MPB_ATTRIB_PM
;
3497 if (mpb
->num_raid_devs
> 1 ||
3498 mpb
->attributes
!= MPB_ATTRIB_CHECKSUM_VERIFY
) {
3499 version
= MPB_VERSION_ATTRIBS
;
3500 switch (get_imsm_raid_level(map
)) {
3501 case 0: mpb
->attributes
|= MPB_ATTRIB_RAID0
; break;
3502 case 1: mpb
->attributes
|= MPB_ATTRIB_RAID1
; break;
3503 case 10: mpb
->attributes
|= MPB_ATTRIB_RAID10
; break;
3504 case 5: mpb
->attributes
|= MPB_ATTRIB_RAID5
; break;
3507 if (map
->num_members
>= 5)
3508 version
= MPB_VERSION_5OR6_DISK_ARRAY
;
3509 else if (dev
->status
== DEV_CLONE_N_GO
)
3510 version
= MPB_VERSION_CNG
;
3511 else if (get_imsm_raid_level(map
) == 5)
3512 version
= MPB_VERSION_RAID5
;
3513 else if (map
->num_members
>= 3)
3514 version
= MPB_VERSION_3OR4_DISK_ARRAY
;
3515 else if (get_imsm_raid_level(map
) == 1)
3516 version
= MPB_VERSION_RAID1
;
3518 version
= MPB_VERSION_RAID0
;
3520 strcpy(((char *) mpb
->sig
) + strlen(MPB_SIGNATURE
), version
);
3524 static int check_name(struct intel_super
*super
, char *name
, int quiet
)
3526 struct imsm_super
*mpb
= super
->anchor
;
3527 char *reason
= NULL
;
3530 if (strlen(name
) > MAX_RAID_SERIAL_LEN
)
3531 reason
= "must be 16 characters or less";
3533 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3534 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
3536 if (strncmp((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
) == 0) {
3537 reason
= "already exists";
3542 if (reason
&& !quiet
)
3543 fprintf(stderr
, Name
": imsm volume name %s\n", reason
);
3548 static int init_super_imsm_volume(struct supertype
*st
, mdu_array_info_t
*info
,
3549 unsigned long long size
, char *name
,
3550 char *homehost
, int *uuid
)
3552 /* We are creating a volume inside a pre-existing container.
3553 * so st->sb is already set.
3555 struct intel_super
*super
= st
->sb
;
3556 struct imsm_super
*mpb
= super
->anchor
;
3557 struct intel_dev
*dv
;
3558 struct imsm_dev
*dev
;
3559 struct imsm_vol
*vol
;
3560 struct imsm_map
*map
;
3561 int idx
= mpb
->num_raid_devs
;
3563 unsigned long long array_blocks
;
3564 size_t size_old
, size_new
;
3565 __u32 num_data_stripes
;
3567 if (super
->orom
&& mpb
->num_raid_devs
>= super
->orom
->vpa
) {
3568 fprintf(stderr
, Name
": This imsm-container already has the "
3569 "maximum of %d volumes\n", super
->orom
->vpa
);
3573 /* ensure the mpb is large enough for the new data */
3574 size_old
= __le32_to_cpu(mpb
->mpb_size
);
3575 size_new
= disks_to_mpb_size(info
->nr_disks
);
3576 if (size_new
> size_old
) {
3578 size_t size_round
= ROUND_UP(size_new
, 512);
3580 if (posix_memalign(&mpb_new
, 512, size_round
) != 0) {
3581 fprintf(stderr
, Name
": could not allocate new mpb\n");
3584 memcpy(mpb_new
, mpb
, size_old
);
3587 super
->anchor
= mpb_new
;
3588 mpb
->mpb_size
= __cpu_to_le32(size_new
);
3589 memset(mpb_new
+ size_old
, 0, size_round
- size_old
);
3591 super
->current_vol
= idx
;
3592 /* when creating the first raid device in this container set num_disks
3593 * to zero, i.e. delete this spare and add raid member devices in
3594 * add_to_super_imsm_volume()
3596 if (super
->current_vol
== 0)
3599 if (!check_name(super
, name
, 0))
3601 dv
= malloc(sizeof(*dv
));
3603 fprintf(stderr
, Name
": failed to allocate device list entry\n");
3606 dev
= calloc(1, sizeof(*dev
) + sizeof(__u32
) * (info
->raid_disks
- 1));
3609 fprintf(stderr
, Name
": could not allocate raid device\n");
3613 strncpy((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
);
3614 if (info
->level
== 1)
3615 array_blocks
= info_to_blocks_per_member(info
);
3617 array_blocks
= calc_array_size(info
->level
, info
->raid_disks
,
3618 info
->layout
, info
->chunk_size
,
3620 /* round array size down to closest MB */
3621 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
3623 dev
->size_low
= __cpu_to_le32((__u32
) array_blocks
);
3624 dev
->size_high
= __cpu_to_le32((__u32
) (array_blocks
>> 32));
3625 dev
->status
= (DEV_READ_COALESCING
| DEV_WRITE_COALESCING
);
3627 vol
->migr_state
= 0;
3628 set_migr_type(dev
, MIGR_INIT
);
3630 vol
->curr_migr_unit
= 0;
3631 map
= get_imsm_map(dev
, 0);
3632 map
->pba_of_lba0
= __cpu_to_le32(super
->create_offset
);
3633 map
->blocks_per_member
= __cpu_to_le32(info_to_blocks_per_member(info
));
3634 map
->blocks_per_strip
= __cpu_to_le16(info_to_blocks_per_strip(info
));
3635 map
->failed_disk_num
= ~0;
3636 map
->map_state
= info
->level
? IMSM_T_STATE_UNINITIALIZED
:
3637 IMSM_T_STATE_NORMAL
;
3640 if (info
->level
== 1 && info
->raid_disks
> 2) {
3643 fprintf(stderr
, Name
": imsm does not support more than 2 disks"
3644 "in a raid1 volume\n");
3648 map
->raid_level
= info
->level
;
3649 if (info
->level
== 10) {
3650 map
->raid_level
= 1;
3651 map
->num_domains
= info
->raid_disks
/ 2;
3652 } else if (info
->level
== 1)
3653 map
->num_domains
= info
->raid_disks
;
3655 map
->num_domains
= 1;
3657 num_data_stripes
= info_to_num_data_stripes(info
, map
->num_domains
);
3658 map
->num_data_stripes
= __cpu_to_le32(num_data_stripes
);
3660 map
->num_members
= info
->raid_disks
;
3661 for (i
= 0; i
< map
->num_members
; i
++) {
3662 /* initialized in add_to_super */
3663 set_imsm_ord_tbl_ent(map
, i
, IMSM_ORD_REBUILD
);
3665 mpb
->num_raid_devs
++;
3668 dv
->index
= super
->current_vol
;
3669 dv
->next
= super
->devlist
;
3670 super
->devlist
= dv
;
3672 imsm_update_version_info(super
);
3677 static int init_super_imsm(struct supertype
*st
, mdu_array_info_t
*info
,
3678 unsigned long long size
, char *name
,
3679 char *homehost
, int *uuid
)
3681 /* This is primarily called by Create when creating a new array.
3682 * We will then get add_to_super called for each component, and then
3683 * write_init_super called to write it out to each device.
3684 * For IMSM, Create can create on fresh devices or on a pre-existing
3686 * To create on a pre-existing array a different method will be called.
3687 * This one is just for fresh drives.
3689 struct intel_super
*super
;
3690 struct imsm_super
*mpb
;
3695 return init_super_imsm_volume(st
, info
, size
, name
, homehost
, uuid
);
3698 mpb_size
= disks_to_mpb_size(info
->nr_disks
);
3702 super
= alloc_super();
3703 if (super
&& posix_memalign(&super
->buf
, 512, mpb_size
) != 0) {
3708 fprintf(stderr
, Name
3709 ": %s could not allocate superblock\n", __func__
);
3712 memset(super
->buf
, 0, mpb_size
);
3714 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
3718 /* zeroing superblock */
3722 mpb
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
3724 version
= (char *) mpb
->sig
;
3725 strcpy(version
, MPB_SIGNATURE
);
3726 version
+= strlen(MPB_SIGNATURE
);
3727 strcpy(version
, MPB_VERSION_RAID0
);
3733 static int add_to_super_imsm_volume(struct supertype
*st
, mdu_disk_info_t
*dk
,
3734 int fd
, char *devname
)
3736 struct intel_super
*super
= st
->sb
;
3737 struct imsm_super
*mpb
= super
->anchor
;
3739 struct imsm_dev
*dev
;
3740 struct imsm_map
*map
;
3743 dev
= get_imsm_dev(super
, super
->current_vol
);
3744 map
= get_imsm_map(dev
, 0);
3746 if (! (dk
->state
& (1<<MD_DISK_SYNC
))) {
3747 fprintf(stderr
, Name
": %s: Cannot add spare devices to IMSM volume\n",
3753 /* we're doing autolayout so grab the pre-marked (in
3754 * validate_geometry) raid_disk
3756 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
3757 if (dl
->raiddisk
== dk
->raid_disk
)
3760 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
3761 if (dl
->major
== dk
->major
&&
3762 dl
->minor
== dk
->minor
)
3767 fprintf(stderr
, Name
": %s is not a member of the same container\n", devname
);
3771 /* add a pristine spare to the metadata */
3772 if (dl
->index
< 0) {
3773 dl
->index
= super
->anchor
->num_disks
;
3774 super
->anchor
->num_disks
++;
3776 /* Check the device has not already been added */
3777 slot
= get_imsm_disk_slot(map
, dl
->index
);
3779 (get_imsm_ord_tbl_ent(dev
, slot
, -1) & IMSM_ORD_REBUILD
) == 0) {
3780 fprintf(stderr
, Name
": %s has been included in this array twice\n",
3784 set_imsm_ord_tbl_ent(map
, dk
->number
, dl
->index
);
3785 dl
->disk
.status
= CONFIGURED_DISK
;
3787 /* if we are creating the first raid device update the family number */
3788 if (super
->current_vol
== 0) {
3790 struct imsm_dev
*_dev
= __get_imsm_dev(mpb
, 0);
3791 struct imsm_disk
*_disk
= __get_imsm_disk(mpb
, dl
->index
);
3793 if (!_dev
|| !_disk
) {
3794 fprintf(stderr
, Name
": BUG mpb setup error\n");
3800 sum
+= __gen_imsm_checksum(mpb
);
3801 mpb
->family_num
= __cpu_to_le32(sum
);
3802 mpb
->orig_family_num
= mpb
->family_num
;
3809 static int add_to_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
,
3810 int fd
, char *devname
)
3812 struct intel_super
*super
= st
->sb
;
3814 unsigned long long size
;
3819 /* If we are on an RAID enabled platform check that the disk is
3820 * attached to the raid controller.
3821 * We do not need to test disks attachment for container based additions,
3822 * they shall be already tested when container was created/assembled.
3824 rv
= find_intel_hba_capability(fd
, super
, devname
);
3825 /* no orom/efi or non-intel hba of the disk */
3827 dprintf("capability: %p fd: %d ret: %d\n",
3828 super
->orom
, fd
, rv
);
3832 if (super
->current_vol
>= 0)
3833 return add_to_super_imsm_volume(st
, dk
, fd
, devname
);
3836 dd
= malloc(sizeof(*dd
));
3839 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
3842 memset(dd
, 0, sizeof(*dd
));
3843 dd
->major
= major(stb
.st_rdev
);
3844 dd
->minor
= minor(stb
.st_rdev
);
3846 dd
->devname
= devname
? strdup(devname
) : NULL
;
3849 dd
->action
= DISK_ADD
;
3850 rv
= imsm_read_serial(fd
, devname
, dd
->serial
);
3853 Name
": failed to retrieve scsi serial, aborting\n");
3858 get_dev_size(fd
, NULL
, &size
);
3860 serialcpy(dd
->disk
.serial
, dd
->serial
);
3861 dd
->disk
.total_blocks
= __cpu_to_le32(size
);
3862 dd
->disk
.status
= SPARE_DISK
;
3863 if (sysfs_disk_to_scsi_id(fd
, &id
) == 0)
3864 dd
->disk
.scsi_id
= __cpu_to_le32(id
);
3866 dd
->disk
.scsi_id
= __cpu_to_le32(0);
3868 if (st
->update_tail
) {
3869 dd
->next
= super
->disk_mgmt_list
;
3870 super
->disk_mgmt_list
= dd
;
3872 dd
->next
= super
->disks
;
3874 super
->updates_pending
++;
3881 static int remove_from_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
)
3883 struct intel_super
*super
= st
->sb
;
3886 /* remove from super works only in mdmon - for communication
3887 * manager - monitor. Check if communication memory buffer
3890 if (!st
->update_tail
) {
3892 Name
": %s shall be used in mdmon context only"
3893 "(line %d).\n", __func__
, __LINE__
);
3896 dd
= malloc(sizeof(*dd
));
3899 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
3902 memset(dd
, 0, sizeof(*dd
));
3903 dd
->major
= dk
->major
;
3904 dd
->minor
= dk
->minor
;
3907 dd
->disk
.status
= SPARE_DISK
;
3908 dd
->action
= DISK_REMOVE
;
3910 dd
->next
= super
->disk_mgmt_list
;
3911 super
->disk_mgmt_list
= dd
;
3917 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
);
3921 struct imsm_super anchor
;
3922 } spare_record
__attribute__ ((aligned(512)));
3924 /* spare records have their own family number and do not have any defined raid
3927 static int write_super_imsm_spares(struct intel_super
*super
, int doclose
)
3929 struct imsm_super
*mpb
= super
->anchor
;
3930 struct imsm_super
*spare
= &spare_record
.anchor
;
3934 spare
->mpb_size
= __cpu_to_le32(sizeof(struct imsm_super
)),
3935 spare
->generation_num
= __cpu_to_le32(1UL),
3936 spare
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
3937 spare
->num_disks
= 1,
3938 spare
->num_raid_devs
= 0,
3939 spare
->cache_size
= mpb
->cache_size
,
3940 spare
->pwr_cycle_count
= __cpu_to_le32(1),
3942 snprintf((char *) spare
->sig
, MAX_SIGNATURE_LENGTH
,
3943 MPB_SIGNATURE MPB_VERSION_RAID0
);
3945 for (d
= super
->disks
; d
; d
= d
->next
) {
3949 spare
->disk
[0] = d
->disk
;
3950 sum
= __gen_imsm_checksum(spare
);
3951 spare
->family_num
= __cpu_to_le32(sum
);
3952 spare
->orig_family_num
= 0;
3953 sum
= __gen_imsm_checksum(spare
);
3954 spare
->check_sum
= __cpu_to_le32(sum
);
3956 if (store_imsm_mpb(d
->fd
, spare
)) {
3957 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
3958 __func__
, d
->major
, d
->minor
, strerror(errno
));
3970 static int write_super_imsm(struct supertype
*st
, int doclose
)
3972 struct intel_super
*super
= st
->sb
;
3973 struct imsm_super
*mpb
= super
->anchor
;
3979 __u32 mpb_size
= sizeof(struct imsm_super
) - sizeof(struct imsm_disk
);
3982 /* 'generation' is incremented everytime the metadata is written */
3983 generation
= __le32_to_cpu(mpb
->generation_num
);
3985 mpb
->generation_num
= __cpu_to_le32(generation
);
3987 /* fix up cases where previous mdadm releases failed to set
3990 if (mpb
->orig_family_num
== 0)
3991 mpb
->orig_family_num
= mpb
->family_num
;
3993 for (d
= super
->disks
; d
; d
= d
->next
) {
3997 mpb
->disk
[d
->index
] = d
->disk
;
4001 for (d
= super
->missing
; d
; d
= d
->next
) {
4002 mpb
->disk
[d
->index
] = d
->disk
;
4005 mpb
->num_disks
= num_disks
;
4006 mpb_size
+= sizeof(struct imsm_disk
) * mpb
->num_disks
;
4008 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4009 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
4010 struct imsm_dev
*dev2
= get_imsm_dev(super
, i
);
4012 imsm_copy_dev(dev
, dev2
);
4013 mpb_size
+= sizeof_imsm_dev(dev
, 0);
4016 mpb_size
+= __le32_to_cpu(mpb
->bbm_log_size
);
4017 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
4019 /* recalculate checksum */
4020 sum
= __gen_imsm_checksum(mpb
);
4021 mpb
->check_sum
= __cpu_to_le32(sum
);
4023 /* write the mpb for disks that compose raid devices */
4024 for (d
= super
->disks
; d
; d
= d
->next
) {
4027 if (store_imsm_mpb(d
->fd
, mpb
))
4028 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
4029 __func__
, d
->major
, d
->minor
, strerror(errno
));
4037 return write_super_imsm_spares(super
, doclose
);
4043 static int create_array(struct supertype
*st
, int dev_idx
)
4046 struct imsm_update_create_array
*u
;
4047 struct intel_super
*super
= st
->sb
;
4048 struct imsm_dev
*dev
= get_imsm_dev(super
, dev_idx
);
4049 struct imsm_map
*map
= get_imsm_map(dev
, 0);
4050 struct disk_info
*inf
;
4051 struct imsm_disk
*disk
;
4054 len
= sizeof(*u
) - sizeof(*dev
) + sizeof_imsm_dev(dev
, 0) +
4055 sizeof(*inf
) * map
->num_members
;
4058 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4063 u
->type
= update_create_array
;
4064 u
->dev_idx
= dev_idx
;
4065 imsm_copy_dev(&u
->dev
, dev
);
4066 inf
= get_disk_info(u
);
4067 for (i
= 0; i
< map
->num_members
; i
++) {
4068 int idx
= get_imsm_disk_idx(dev
, i
, -1);
4070 disk
= get_imsm_disk(super
, idx
);
4071 serialcpy(inf
[i
].serial
, disk
->serial
);
4073 append_metadata_update(st
, u
, len
);
4078 static int mgmt_disk(struct supertype
*st
)
4080 struct intel_super
*super
= st
->sb
;
4082 struct imsm_update_add_remove_disk
*u
;
4084 if (!super
->disk_mgmt_list
)
4090 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4095 u
->type
= update_add_remove_disk
;
4096 append_metadata_update(st
, u
, len
);
4101 static int write_init_super_imsm(struct supertype
*st
)
4103 struct intel_super
*super
= st
->sb
;
4104 int current_vol
= super
->current_vol
;
4106 /* we are done with current_vol reset it to point st at the container */
4107 super
->current_vol
= -1;
4109 if (st
->update_tail
) {
4110 /* queue the recently created array / added disk
4111 * as a metadata update */
4114 /* determine if we are creating a volume or adding a disk */
4115 if (current_vol
< 0) {
4116 /* in the mgmt (add/remove) disk case we are running
4117 * in mdmon context, so don't close fd's
4119 return mgmt_disk(st
);
4121 rv
= create_array(st
, current_vol
);
4126 for (d
= super
->disks
; d
; d
= d
->next
)
4127 Kill(d
->devname
, NULL
, 0, 1, 1);
4128 return write_super_imsm(st
, 1);
4133 static int store_super_imsm(struct supertype
*st
, int fd
)
4135 struct intel_super
*super
= st
->sb
;
4136 struct imsm_super
*mpb
= super
? super
->anchor
: NULL
;
4142 return store_imsm_mpb(fd
, mpb
);
4148 static int imsm_bbm_log_size(struct imsm_super
*mpb
)
4150 return __le32_to_cpu(mpb
->bbm_log_size
);
4154 static int validate_geometry_imsm_container(struct supertype
*st
, int level
,
4155 int layout
, int raiddisks
, int chunk
,
4156 unsigned long long size
, char *dev
,
4157 unsigned long long *freesize
,
4161 unsigned long long ldsize
;
4162 struct intel_super
*super
=NULL
;
4165 if (level
!= LEVEL_CONTAINER
)
4170 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4173 fprintf(stderr
, Name
": imsm: Cannot open %s: %s\n",
4174 dev
, strerror(errno
));
4177 if (!get_dev_size(fd
, dev
, &ldsize
)) {
4182 /* capabilities retrieve could be possible
4183 * note that there is no fd for the disks in array.
4185 super
= alloc_super();
4188 Name
": malloc of %zu failed.\n",
4194 rv
= find_intel_hba_capability(fd
, super
, verbose
? dev
: NULL
);
4198 fd2devname(fd
, str
);
4199 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4200 fd
, str
, super
->orom
, rv
, raiddisks
);
4202 /* no orom/efi or non-intel hba of the disk */
4208 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4210 fprintf(stderr
, Name
": %d exceeds maximum number of"
4211 " platform supported disks: %d\n",
4212 raiddisks
, super
->orom
->tds
);
4218 *freesize
= avail_size_imsm(st
, ldsize
>> 9);
4224 static unsigned long long find_size(struct extent
*e
, int *idx
, int num_extents
)
4226 const unsigned long long base_start
= e
[*idx
].start
;
4227 unsigned long long end
= base_start
+ e
[*idx
].size
;
4230 if (base_start
== end
)
4234 for (i
= *idx
; i
< num_extents
; i
++) {
4235 /* extend overlapping extents */
4236 if (e
[i
].start
>= base_start
&&
4237 e
[i
].start
<= end
) {
4240 if (e
[i
].start
+ e
[i
].size
> end
)
4241 end
= e
[i
].start
+ e
[i
].size
;
4242 } else if (e
[i
].start
> end
) {
4248 return end
- base_start
;
4251 static unsigned long long merge_extents(struct intel_super
*super
, int sum_extents
)
4253 /* build a composite disk with all known extents and generate a new
4254 * 'maxsize' given the "all disks in an array must share a common start
4255 * offset" constraint
4257 struct extent
*e
= calloc(sum_extents
, sizeof(*e
));
4261 unsigned long long pos
;
4262 unsigned long long start
= 0;
4263 unsigned long long maxsize
;
4264 unsigned long reserve
;
4269 /* coalesce and sort all extents. also, check to see if we need to
4270 * reserve space between member arrays
4273 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4276 for (i
= 0; i
< dl
->extent_cnt
; i
++)
4279 qsort(e
, sum_extents
, sizeof(*e
), cmp_extent
);
4284 while (i
< sum_extents
) {
4285 e
[j
].start
= e
[i
].start
;
4286 e
[j
].size
= find_size(e
, &i
, sum_extents
);
4288 if (e
[j
-1].size
== 0)
4297 unsigned long long esize
;
4299 esize
= e
[i
].start
- pos
;
4300 if (esize
>= maxsize
) {
4305 pos
= e
[i
].start
+ e
[i
].size
;
4307 } while (e
[i
-1].size
);
4313 /* FIXME assumes volume at offset 0 is the first volume in a
4316 if (start_extent
> 0)
4317 reserve
= IMSM_RESERVED_SECTORS
; /* gap between raid regions */
4321 if (maxsize
< reserve
)
4324 super
->create_offset
= ~((__u32
) 0);
4325 if (start
+ reserve
> super
->create_offset
)
4326 return 0; /* start overflows create_offset */
4327 super
->create_offset
= start
+ reserve
;
4329 return maxsize
- reserve
;
4332 static int is_raid_level_supported(const struct imsm_orom
*orom
, int level
, int raiddisks
)
4334 if (level
< 0 || level
== 6 || level
== 4)
4337 /* if we have an orom prevent invalid raid levels */
4340 case 0: return imsm_orom_has_raid0(orom
);
4343 return imsm_orom_has_raid1e(orom
);
4344 return imsm_orom_has_raid1(orom
) && raiddisks
== 2;
4345 case 10: return imsm_orom_has_raid10(orom
) && raiddisks
== 4;
4346 case 5: return imsm_orom_has_raid5(orom
) && raiddisks
> 2;
4349 return 1; /* not on an Intel RAID platform so anything goes */
4355 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4357 * validate volume parameters with OROM/EFI capabilities
4360 validate_geometry_imsm_orom(struct intel_super
*super
, int level
, int layout
,
4361 int raiddisks
, int *chunk
, int verbose
)
4366 /* validate container capabilities */
4367 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4369 fprintf(stderr
, Name
": %d exceeds maximum number of"
4370 " platform supported disks: %d\n",
4371 raiddisks
, super
->orom
->tds
);
4375 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4376 if (super
->orom
&& (!is_raid_level_supported(super
->orom
, level
,
4378 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4379 level
, raiddisks
, raiddisks
> 1 ? "s" : "");
4382 if (super
->orom
&& level
!= 1) {
4383 if (chunk
&& (*chunk
== 0 || *chunk
== UnSet
))
4384 *chunk
= imsm_orom_default_chunk(super
->orom
);
4385 else if (chunk
&& !imsm_orom_has_chunk(super
->orom
, *chunk
)) {
4386 pr_vrb(": platform does not support a chunk size of: "
4391 if (layout
!= imsm_level_to_layout(level
)) {
4393 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4394 else if (level
== 10)
4395 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4397 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4404 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4405 * FIX ME add ahci details
4407 static int validate_geometry_imsm_volume(struct supertype
*st
, int level
,
4408 int layout
, int raiddisks
, int *chunk
,
4409 unsigned long long size
, char *dev
,
4410 unsigned long long *freesize
,
4414 struct intel_super
*super
= st
->sb
;
4415 struct imsm_super
*mpb
= super
->anchor
;
4417 unsigned long long pos
= 0;
4418 unsigned long long maxsize
;
4422 /* We must have the container info already read in. */
4426 if (!validate_geometry_imsm_orom(super
, level
, layout
, raiddisks
, chunk
, verbose
)) {
4427 fprintf(stderr
, Name
": RAID gemetry validation failed. "
4428 "Cannot proceed with the action(s).\n");
4432 /* General test: make sure there is space for
4433 * 'raiddisks' device extents of size 'size' at a given
4436 unsigned long long minsize
= size
;
4437 unsigned long long start_offset
= MaxSector
;
4440 minsize
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
4441 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4446 e
= get_extents(super
, dl
);
4449 unsigned long long esize
;
4450 esize
= e
[i
].start
- pos
;
4451 if (esize
>= minsize
)
4453 if (found
&& start_offset
== MaxSector
) {
4456 } else if (found
&& pos
!= start_offset
) {
4460 pos
= e
[i
].start
+ e
[i
].size
;
4462 } while (e
[i
-1].size
);
4467 if (dcnt
< raiddisks
) {
4469 fprintf(stderr
, Name
": imsm: Not enough "
4470 "devices with space for this array "
4478 /* This device must be a member of the set */
4479 if (stat(dev
, &stb
) < 0)
4481 if ((S_IFMT
& stb
.st_mode
) != S_IFBLK
)
4483 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4484 if (dl
->major
== (int)major(stb
.st_rdev
) &&
4485 dl
->minor
== (int)minor(stb
.st_rdev
))
4490 fprintf(stderr
, Name
": %s is not in the "
4491 "same imsm set\n", dev
);
4493 } else if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
) {
4494 /* If a volume is present then the current creation attempt
4495 * cannot incorporate new spares because the orom may not
4496 * understand this configuration (all member disks must be
4497 * members of each array in the container).
4499 fprintf(stderr
, Name
": %s is a spare and a volume"
4500 " is already defined for this container\n", dev
);
4501 fprintf(stderr
, Name
": The option-rom requires all member"
4502 " disks to be a member of all volumes\n");
4506 /* retrieve the largest free space block */
4507 e
= get_extents(super
, dl
);
4512 unsigned long long esize
;
4514 esize
= e
[i
].start
- pos
;
4515 if (esize
>= maxsize
)
4517 pos
= e
[i
].start
+ e
[i
].size
;
4519 } while (e
[i
-1].size
);
4524 fprintf(stderr
, Name
": unable to determine free space for: %s\n",
4528 if (maxsize
< size
) {
4530 fprintf(stderr
, Name
": %s not enough space (%llu < %llu)\n",
4531 dev
, maxsize
, size
);
4535 /* count total number of extents for merge */
4537 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4539 i
+= dl
->extent_cnt
;
4541 maxsize
= merge_extents(super
, i
);
4542 if (maxsize
< size
|| maxsize
== 0) {
4544 fprintf(stderr
, Name
": not enough space after merge (%llu < %llu)\n",
4549 *freesize
= maxsize
;
4554 static int reserve_space(struct supertype
*st
, int raiddisks
,
4555 unsigned long long size
, int chunk
,
4556 unsigned long long *freesize
)
4558 struct intel_super
*super
= st
->sb
;
4559 struct imsm_super
*mpb
= super
->anchor
;
4564 unsigned long long maxsize
;
4565 unsigned long long minsize
;
4569 /* find the largest common start free region of the possible disks */
4573 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4579 /* don't activate new spares if we are orom constrained
4580 * and there is already a volume active in the container
4582 if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
)
4585 e
= get_extents(super
, dl
);
4588 for (i
= 1; e
[i
-1].size
; i
++)
4596 maxsize
= merge_extents(super
, extent_cnt
);
4600 minsize
= chunk
* 2;
4602 if (cnt
< raiddisks
||
4603 (super
->orom
&& used
&& used
!= raiddisks
) ||
4604 maxsize
< minsize
||
4606 fprintf(stderr
, Name
": not enough devices with space to create array.\n");
4607 return 0; /* No enough free spaces large enough */
4619 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4621 dl
->raiddisk
= cnt
++;
4628 static int validate_geometry_imsm(struct supertype
*st
, int level
, int layout
,
4629 int raiddisks
, int *chunk
, unsigned long long size
,
4630 char *dev
, unsigned long long *freesize
,
4638 * if given unused devices create a container
4639 * if given given devices in a container create a member volume
4641 if (level
== LEVEL_CONTAINER
) {
4642 /* Must be a fresh device to add to a container */
4643 return validate_geometry_imsm_container(st
, level
, layout
,
4645 chunk
?*chunk
:0, size
,
4651 if (st
->sb
&& freesize
) {
4652 /* we are being asked to automatically layout a
4653 * new volume based on the current contents of
4654 * the container. If the the parameters can be
4655 * satisfied reserve_space will record the disks,
4656 * start offset, and size of the volume to be
4657 * created. add_to_super and getinfo_super
4658 * detect when autolayout is in progress.
4660 if (!validate_geometry_imsm_orom(st
->sb
, level
, layout
,
4664 return reserve_space(st
, raiddisks
, size
,
4665 chunk
?*chunk
:0, freesize
);
4670 /* creating in a given container */
4671 return validate_geometry_imsm_volume(st
, level
, layout
,
4672 raiddisks
, chunk
, size
,
4673 dev
, freesize
, verbose
);
4676 /* This device needs to be a device in an 'imsm' container */
4677 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4681 Name
": Cannot create this array on device %s\n",
4686 if (errno
!= EBUSY
|| (fd
= open(dev
, O_RDONLY
, 0)) < 0) {
4688 fprintf(stderr
, Name
": Cannot open %s: %s\n",
4689 dev
, strerror(errno
));
4692 /* Well, it is in use by someone, maybe an 'imsm' container. */
4693 cfd
= open_container(fd
);
4697 fprintf(stderr
, Name
": Cannot use %s: It is busy\n",
4701 sra
= sysfs_read(cfd
, 0, GET_VERSION
);
4702 if (sra
&& sra
->array
.major_version
== -1 &&
4703 strcmp(sra
->text_version
, "imsm") == 0)
4707 /* This is a member of a imsm container. Load the container
4708 * and try to create a volume
4710 struct intel_super
*super
;
4712 if (load_super_imsm_all(st
, cfd
, (void **) &super
, NULL
) == 0) {
4714 st
->container_dev
= fd2devnum(cfd
);
4716 return validate_geometry_imsm_volume(st
, level
, layout
,
4724 fprintf(stderr
, Name
": failed container membership check\n");
4730 static void default_geometry_imsm(struct supertype
*st
, int *level
, int *layout
, int *chunk
)
4732 struct intel_super
*super
= st
->sb
;
4734 if (level
&& *level
== UnSet
)
4735 *level
= LEVEL_CONTAINER
;
4737 if (level
&& layout
&& *layout
== UnSet
)
4738 *layout
= imsm_level_to_layout(*level
);
4740 if (chunk
&& (*chunk
== UnSet
|| *chunk
== 0) &&
4741 super
&& super
->orom
)
4742 *chunk
= imsm_orom_default_chunk(super
->orom
);
4745 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
);
4747 static int kill_subarray_imsm(struct supertype
*st
)
4749 /* remove the subarray currently referenced by ->current_vol */
4751 struct intel_dev
**dp
;
4752 struct intel_super
*super
= st
->sb
;
4753 __u8 current_vol
= super
->current_vol
;
4754 struct imsm_super
*mpb
= super
->anchor
;
4756 if (super
->current_vol
< 0)
4758 super
->current_vol
= -1; /* invalidate subarray cursor */
4760 /* block deletions that would change the uuid of active subarrays
4762 * FIXME when immutable ids are available, but note that we'll
4763 * also need to fixup the invalidated/active subarray indexes in
4766 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4769 if (i
< current_vol
)
4771 sprintf(subarray
, "%u", i
);
4772 if (is_subarray_active(subarray
, st
->devname
)) {
4774 Name
": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
4781 if (st
->update_tail
) {
4782 struct imsm_update_kill_array
*u
= malloc(sizeof(*u
));
4786 u
->type
= update_kill_array
;
4787 u
->dev_idx
= current_vol
;
4788 append_metadata_update(st
, u
, sizeof(*u
));
4793 for (dp
= &super
->devlist
; *dp
;)
4794 if ((*dp
)->index
== current_vol
) {
4797 handle_missing(super
, (*dp
)->dev
);
4798 if ((*dp
)->index
> current_vol
)
4803 /* no more raid devices, all active components are now spares,
4804 * but of course failed are still failed
4806 if (--mpb
->num_raid_devs
== 0) {
4809 for (d
= super
->disks
; d
; d
= d
->next
)
4810 if (d
->index
> -2) {
4812 d
->disk
.status
= SPARE_DISK
;
4816 super
->updates_pending
++;
4821 static int update_subarray_imsm(struct supertype
*st
, char *subarray
,
4822 char *update
, struct mddev_ident
*ident
)
4824 /* update the subarray currently referenced by ->current_vol */
4825 struct intel_super
*super
= st
->sb
;
4826 struct imsm_super
*mpb
= super
->anchor
;
4828 if (strcmp(update
, "name") == 0) {
4829 char *name
= ident
->name
;
4833 if (is_subarray_active(subarray
, st
->devname
)) {
4835 Name
": Unable to update name of active subarray\n");
4839 if (!check_name(super
, name
, 0))
4842 vol
= strtoul(subarray
, &ep
, 10);
4843 if (*ep
!= '\0' || vol
>= super
->anchor
->num_raid_devs
)
4846 if (st
->update_tail
) {
4847 struct imsm_update_rename_array
*u
= malloc(sizeof(*u
));
4851 u
->type
= update_rename_array
;
4853 snprintf((char *) u
->name
, MAX_RAID_SERIAL_LEN
, "%s", name
);
4854 append_metadata_update(st
, u
, sizeof(*u
));
4856 struct imsm_dev
*dev
;
4859 dev
= get_imsm_dev(super
, vol
);
4860 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
4861 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4862 dev
= get_imsm_dev(super
, i
);
4863 handle_missing(super
, dev
);
4865 super
->updates_pending
++;
4873 static int is_gen_migration(struct imsm_dev
*dev
)
4875 if (!dev
->vol
.migr_state
)
4878 if (migr_type(dev
) == MIGR_GEN_MIGR
)
4883 #endif /* MDASSEMBLE */
4885 static int is_rebuilding(struct imsm_dev
*dev
)
4887 struct imsm_map
*migr_map
;
4889 if (!dev
->vol
.migr_state
)
4892 if (migr_type(dev
) != MIGR_REBUILD
)
4895 migr_map
= get_imsm_map(dev
, 1);
4897 if (migr_map
->map_state
== IMSM_T_STATE_DEGRADED
)
4903 static void update_recovery_start(struct imsm_dev
*dev
, struct mdinfo
*array
)
4905 struct mdinfo
*rebuild
= NULL
;
4909 if (!is_rebuilding(dev
))
4912 /* Find the rebuild target, but punt on the dual rebuild case */
4913 for (d
= array
->devs
; d
; d
= d
->next
)
4914 if (d
->recovery_start
== 0) {
4921 /* (?) none of the disks are marked with
4922 * IMSM_ORD_REBUILD, so assume they are missing and the
4923 * disk_ord_tbl was not correctly updated
4925 dprintf("%s: failed to locate out-of-sync disk\n", __func__
);
4929 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
4930 rebuild
->recovery_start
= units
* blocks_per_migr_unit(dev
);
4934 static struct mdinfo
*container_content_imsm(struct supertype
*st
, char *subarray
)
4936 /* Given a container loaded by load_super_imsm_all,
4937 * extract information about all the arrays into
4939 * If 'subarray' is given, just extract info about that array.
4941 * For each imsm_dev create an mdinfo, fill it in,
4942 * then look for matching devices in super->disks
4943 * and create appropriate device mdinfo.
4945 struct intel_super
*super
= st
->sb
;
4946 struct imsm_super
*mpb
= super
->anchor
;
4947 struct mdinfo
*rest
= NULL
;
4951 int spare_disks
= 0;
4953 /* check for bad blocks */
4954 if (imsm_bbm_log_size(super
->anchor
))
4957 /* count spare devices, not used in maps
4959 for (d
= super
->disks
; d
; d
= d
->next
)
4963 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4964 struct imsm_dev
*dev
;
4965 struct imsm_map
*map
;
4966 struct imsm_map
*map2
;
4967 struct mdinfo
*this;
4972 (i
!= strtoul(subarray
, &ep
, 10) || *ep
!= '\0'))
4975 dev
= get_imsm_dev(super
, i
);
4976 map
= get_imsm_map(dev
, 0);
4977 map2
= get_imsm_map(dev
, 1);
4979 /* do not publish arrays that are in the middle of an
4980 * unsupported migration
4982 if (dev
->vol
.migr_state
&&
4983 (migr_type(dev
) == MIGR_STATE_CHANGE
)) {
4984 fprintf(stderr
, Name
": cannot assemble volume '%.16s':"
4985 " unsupported migration in progress\n",
4989 /* do not publish arrays that are not support by controller's
4993 chunk
= __le16_to_cpu(map
->blocks_per_strip
) >> 1;
4995 if (!validate_geometry_imsm_orom(super
,
4996 get_imsm_raid_level(map
), /* RAID level */
4997 imsm_level_to_layout(get_imsm_raid_level(map
)),
4998 map
->num_members
, /* raid disks */
5001 fprintf(stderr
, Name
": RAID gemetry validation failed. "
5002 "Cannot proceed with the action(s).\n");
5005 #endif /* MDASSEMBLE */
5006 this = malloc(sizeof(*this));
5008 fprintf(stderr
, Name
": failed to allocate %zu bytes\n",
5012 memset(this, 0, sizeof(*this));
5015 super
->current_vol
= i
;
5016 getinfo_super_imsm_volume(st
, this, NULL
);
5017 for (slot
= 0 ; slot
< map
->num_members
; slot
++) {
5018 unsigned long long recovery_start
;
5019 struct mdinfo
*info_d
;
5026 idx
= get_imsm_disk_idx(dev
, slot
, 0);
5027 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
5028 for (d
= super
->disks
; d
; d
= d
->next
)
5029 if (d
->index
== idx
)
5032 recovery_start
= MaxSector
;
5035 if (d
&& is_failed(&d
->disk
))
5037 if (ord
& IMSM_ORD_REBUILD
)
5041 * if we skip some disks the array will be assmebled degraded;
5042 * reset resync start to avoid a dirty-degraded
5043 * situation when performing the intial sync
5045 * FIXME handle dirty degraded
5047 if ((skip
|| recovery_start
== 0) && !dev
->vol
.dirty
)
5048 this->resync_start
= MaxSector
;
5052 info_d
= calloc(1, sizeof(*info_d
));
5054 fprintf(stderr
, Name
": failed to allocate disk"
5055 " for volume %.16s\n", dev
->volume
);
5056 info_d
= this->devs
;
5058 struct mdinfo
*d
= info_d
->next
;
5067 info_d
->next
= this->devs
;
5068 this->devs
= info_d
;
5070 info_d
->disk
.number
= d
->index
;
5071 info_d
->disk
.major
= d
->major
;
5072 info_d
->disk
.minor
= d
->minor
;
5073 info_d
->disk
.raid_disk
= slot
;
5074 info_d
->recovery_start
= recovery_start
;
5076 if (slot
< map2
->num_members
)
5077 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5079 this->array
.spare_disks
++;
5081 if (slot
< map
->num_members
)
5082 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5084 this->array
.spare_disks
++;
5086 if (info_d
->recovery_start
== MaxSector
)
5087 this->array
.working_disks
++;
5089 info_d
->events
= __le32_to_cpu(mpb
->generation_num
);
5090 info_d
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5091 info_d
->component_size
= __le32_to_cpu(map
->blocks_per_member
);
5093 /* now that the disk list is up-to-date fixup recovery_start */
5094 update_recovery_start(dev
, this);
5095 this->array
.spare_disks
+= spare_disks
;
5099 /* if array has bad blocks, set suitable bit in array status */
5101 rest
->array
.state
|= (1<<MD_SB_BBM_ERRORS
);
5107 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
)
5109 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5112 return map
->map_state
== IMSM_T_STATE_UNINITIALIZED
?
5113 IMSM_T_STATE_UNINITIALIZED
: IMSM_T_STATE_NORMAL
;
5115 switch (get_imsm_raid_level(map
)) {
5117 return IMSM_T_STATE_FAILED
;
5120 if (failed
< map
->num_members
)
5121 return IMSM_T_STATE_DEGRADED
;
5123 return IMSM_T_STATE_FAILED
;
5128 * check to see if any mirrors have failed, otherwise we
5129 * are degraded. Even numbered slots are mirrored on
5133 /* gcc -Os complains that this is unused */
5134 int insync
= insync
;
5136 for (i
= 0; i
< map
->num_members
; i
++) {
5137 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
5138 int idx
= ord_to_idx(ord
);
5139 struct imsm_disk
*disk
;
5141 /* reset the potential in-sync count on even-numbered
5142 * slots. num_copies is always 2 for imsm raid10
5147 disk
= get_imsm_disk(super
, idx
);
5148 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5151 /* no in-sync disks left in this mirror the
5155 return IMSM_T_STATE_FAILED
;
5158 return IMSM_T_STATE_DEGRADED
;
5162 return IMSM_T_STATE_DEGRADED
;
5164 return IMSM_T_STATE_FAILED
;
5170 return map
->map_state
;
5173 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
)
5177 struct imsm_disk
*disk
;
5178 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5179 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
5183 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5184 * disks that are being rebuilt. New failures are recorded to
5185 * map[0]. So we look through all the disks we started with and
5186 * see if any failures are still present, or if any new ones
5189 * FIXME add support for online capacity expansion and
5190 * raid-level-migration
5192 for (i
= 0; i
< prev
->num_members
; i
++) {
5193 ord
= __le32_to_cpu(prev
->disk_ord_tbl
[i
]);
5194 ord
|= __le32_to_cpu(map
->disk_ord_tbl
[i
]);
5195 idx
= ord_to_idx(ord
);
5197 disk
= get_imsm_disk(super
, idx
);
5198 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5206 static int imsm_open_new(struct supertype
*c
, struct active_array
*a
,
5209 struct intel_super
*super
= c
->sb
;
5210 struct imsm_super
*mpb
= super
->anchor
;
5212 if (atoi(inst
) >= mpb
->num_raid_devs
) {
5213 fprintf(stderr
, "%s: subarry index %d, out of range\n",
5214 __func__
, atoi(inst
));
5218 dprintf("imsm: open_new %s\n", inst
);
5219 a
->info
.container_member
= atoi(inst
);
5223 static int is_resyncing(struct imsm_dev
*dev
)
5225 struct imsm_map
*migr_map
;
5227 if (!dev
->vol
.migr_state
)
5230 if (migr_type(dev
) == MIGR_INIT
||
5231 migr_type(dev
) == MIGR_REPAIR
)
5234 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5237 migr_map
= get_imsm_map(dev
, 1);
5239 if ((migr_map
->map_state
== IMSM_T_STATE_NORMAL
) &&
5240 (dev
->vol
.migr_type
!= MIGR_GEN_MIGR
))
5246 /* return true if we recorded new information */
5247 static int mark_failure(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5251 struct imsm_map
*map
;
5253 /* new failures are always set in map[0] */
5254 map
= get_imsm_map(dev
, 0);
5256 slot
= get_imsm_disk_slot(map
, idx
);
5260 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
5261 if (is_failed(disk
) && (ord
& IMSM_ORD_REBUILD
))
5264 disk
->status
|= FAILED_DISK
;
5265 set_imsm_ord_tbl_ent(map
, slot
, idx
| IMSM_ORD_REBUILD
);
5266 if (map
->failed_disk_num
== 0xff)
5267 map
->failed_disk_num
= slot
;
5271 static void mark_missing(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5273 mark_failure(dev
, disk
, idx
);
5275 if (disk
->scsi_id
== __cpu_to_le32(~(__u32
)0))
5278 disk
->scsi_id
= __cpu_to_le32(~(__u32
)0);
5279 memmove(&disk
->serial
[0], &disk
->serial
[1], MAX_RAID_SERIAL_LEN
- 1);
5282 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
)
5288 if (!super
->missing
)
5290 failed
= imsm_count_failed(super
, dev
);
5291 map_state
= imsm_check_degraded(super
, dev
, failed
);
5293 dprintf("imsm: mark missing\n");
5294 end_migration(dev
, map_state
);
5295 for (dl
= super
->missing
; dl
; dl
= dl
->next
)
5296 mark_missing(dev
, &dl
->disk
, dl
->index
);
5297 super
->updates_pending
++;
5300 static unsigned long long imsm_set_array_size(struct imsm_dev
*dev
)
5302 int used_disks
= imsm_num_data_members(dev
, 0);
5303 unsigned long long array_blocks
;
5304 struct imsm_map
*map
;
5306 if (used_disks
== 0) {
5307 /* when problems occures
5308 * return current array_blocks value
5310 array_blocks
= __le32_to_cpu(dev
->size_high
);
5311 array_blocks
= array_blocks
<< 32;
5312 array_blocks
+= __le32_to_cpu(dev
->size_low
);
5314 return array_blocks
;
5317 /* set array size in metadata
5319 map
= get_imsm_map(dev
, 0);
5320 array_blocks
= map
->blocks_per_member
* used_disks
;
5322 /* round array size down to closest MB
5324 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
5325 dev
->size_low
= __cpu_to_le32((__u32
)array_blocks
);
5326 dev
->size_high
= __cpu_to_le32((__u32
)(array_blocks
>> 32));
5328 return array_blocks
;
5331 static void imsm_set_disk(struct active_array
*a
, int n
, int state
);
5333 static void imsm_progress_container_reshape(struct intel_super
*super
)
5335 /* if no device has a migr_state, but some device has a
5336 * different number of members than the previous device, start
5337 * changing the number of devices in this device to match
5340 struct imsm_super
*mpb
= super
->anchor
;
5341 int prev_disks
= -1;
5345 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5346 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
5347 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5348 struct imsm_map
*map2
;
5349 int prev_num_members
;
5351 if (dev
->vol
.migr_state
)
5354 if (prev_disks
== -1)
5355 prev_disks
= map
->num_members
;
5356 if (prev_disks
== map
->num_members
)
5359 /* OK, this array needs to enter reshape mode.
5360 * i.e it needs a migr_state
5363 copy_map_size
= sizeof_imsm_map(map
);
5364 prev_num_members
= map
->num_members
;
5365 map
->num_members
= prev_disks
;
5366 dev
->vol
.migr_state
= 1;
5367 dev
->vol
.curr_migr_unit
= 0;
5368 dev
->vol
.migr_type
= MIGR_GEN_MIGR
;
5369 for (i
= prev_num_members
;
5370 i
< map
->num_members
; i
++)
5371 set_imsm_ord_tbl_ent(map
, i
, i
);
5372 map2
= get_imsm_map(dev
, 1);
5373 /* Copy the current map */
5374 memcpy(map2
, map
, copy_map_size
);
5375 map2
->num_members
= prev_num_members
;
5377 imsm_set_array_size(dev
);
5378 super
->updates_pending
++;
5382 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5383 * states are handled in imsm_set_disk() with one exception, when a
5384 * resync is stopped due to a new failure this routine will set the
5385 * 'degraded' state for the array.
5387 static int imsm_set_array_state(struct active_array
*a
, int consistent
)
5389 int inst
= a
->info
.container_member
;
5390 struct intel_super
*super
= a
->container
->sb
;
5391 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5392 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5393 int failed
= imsm_count_failed(super
, dev
);
5394 __u8 map_state
= imsm_check_degraded(super
, dev
, failed
);
5395 __u32 blocks_per_unit
;
5397 if (dev
->vol
.migr_state
&&
5398 dev
->vol
.migr_type
== MIGR_GEN_MIGR
) {
5399 /* array state change is blocked due to reshape action
5401 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5402 * - finish the reshape (if last_checkpoint is big and action != reshape)
5403 * - update curr_migr_unit
5405 if (a
->curr_action
== reshape
) {
5406 /* still reshaping, maybe update curr_migr_unit */
5407 goto mark_checkpoint
;
5409 if (a
->last_checkpoint
== 0 && a
->prev_action
== reshape
) {
5410 /* for some reason we aborted the reshape.
5413 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
5414 dev
->vol
.migr_state
= 0;
5415 dev
->vol
.migr_type
= 0;
5416 dev
->vol
.curr_migr_unit
= 0;
5417 memcpy(map
, map2
, sizeof_imsm_map(map2
));
5418 super
->updates_pending
++;
5420 if (a
->last_checkpoint
>= a
->info
.component_size
) {
5421 unsigned long long array_blocks
;
5425 used_disks
= imsm_num_data_members(dev
, 0);
5426 if (used_disks
> 0) {
5428 map
->blocks_per_member
*
5430 /* round array size down to closest MB
5432 array_blocks
= (array_blocks
5433 >> SECT_PER_MB_SHIFT
)
5434 << SECT_PER_MB_SHIFT
;
5435 a
->info
.custom_array_size
= array_blocks
;
5436 /* encourage manager to update array
5440 a
->check_reshape
= 1;
5442 /* finalize online capacity expansion/reshape */
5443 for (mdi
= a
->info
.devs
; mdi
; mdi
= mdi
->next
)
5445 mdi
->disk
.raid_disk
,
5448 imsm_progress_container_reshape(super
);
5453 /* before we activate this array handle any missing disks */
5454 if (consistent
== 2)
5455 handle_missing(super
, dev
);
5457 if (consistent
== 2 &&
5458 (!is_resync_complete(&a
->info
) ||
5459 map_state
!= IMSM_T_STATE_NORMAL
||
5460 dev
->vol
.migr_state
))
5463 if (is_resync_complete(&a
->info
)) {
5464 /* complete intialization / resync,
5465 * recovery and interrupted recovery is completed in
5468 if (is_resyncing(dev
)) {
5469 dprintf("imsm: mark resync done\n");
5470 end_migration(dev
, map_state
);
5471 super
->updates_pending
++;
5472 a
->last_checkpoint
= 0;
5474 } else if (!is_resyncing(dev
) && !failed
) {
5475 /* mark the start of the init process if nothing is failed */
5476 dprintf("imsm: mark resync start\n");
5477 if (map
->map_state
== IMSM_T_STATE_UNINITIALIZED
)
5478 migrate(dev
, IMSM_T_STATE_NORMAL
, MIGR_INIT
);
5480 migrate(dev
, IMSM_T_STATE_NORMAL
, MIGR_REPAIR
);
5481 super
->updates_pending
++;
5485 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5486 blocks_per_unit
= blocks_per_migr_unit(dev
);
5487 if (blocks_per_unit
) {
5491 units
= a
->last_checkpoint
/ blocks_per_unit
;
5494 /* check that we did not overflow 32-bits, and that
5495 * curr_migr_unit needs updating
5497 if (units32
== units
&&
5499 __le32_to_cpu(dev
->vol
.curr_migr_unit
) != units32
) {
5500 dprintf("imsm: mark checkpoint (%u)\n", units32
);
5501 dev
->vol
.curr_migr_unit
= __cpu_to_le32(units32
);
5502 super
->updates_pending
++;
5506 /* mark dirty / clean */
5507 if (dev
->vol
.dirty
!= !consistent
) {
5508 dprintf("imsm: mark '%s'\n", consistent
? "clean" : "dirty");
5513 super
->updates_pending
++;
5519 static void imsm_set_disk(struct active_array
*a
, int n
, int state
)
5521 int inst
= a
->info
.container_member
;
5522 struct intel_super
*super
= a
->container
->sb
;
5523 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5524 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5525 struct imsm_disk
*disk
;
5530 if (n
> map
->num_members
)
5531 fprintf(stderr
, "imsm: set_disk %d out of range 0..%d\n",
5532 n
, map
->num_members
- 1);
5537 dprintf("imsm: set_disk %d:%x\n", n
, state
);
5539 ord
= get_imsm_ord_tbl_ent(dev
, n
, -1);
5540 disk
= get_imsm_disk(super
, ord_to_idx(ord
));
5542 /* check for new failures */
5543 if (state
& DS_FAULTY
) {
5544 if (mark_failure(dev
, disk
, ord_to_idx(ord
)))
5545 super
->updates_pending
++;
5548 /* check if in_sync */
5549 if (state
& DS_INSYNC
&& ord
& IMSM_ORD_REBUILD
&& is_rebuilding(dev
)) {
5550 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
5552 set_imsm_ord_tbl_ent(migr_map
, n
, ord_to_idx(ord
));
5553 super
->updates_pending
++;
5556 failed
= imsm_count_failed(super
, dev
);
5557 map_state
= imsm_check_degraded(super
, dev
, failed
);
5559 /* check if recovery complete, newly degraded, or failed */
5560 if (map_state
== IMSM_T_STATE_NORMAL
&& is_rebuilding(dev
)) {
5561 end_migration(dev
, map_state
);
5562 map
= get_imsm_map(dev
, 0);
5563 map
->failed_disk_num
= ~0;
5564 super
->updates_pending
++;
5565 a
->last_checkpoint
= 0;
5566 } else if (map_state
== IMSM_T_STATE_DEGRADED
&&
5567 map
->map_state
!= map_state
&&
5568 !dev
->vol
.migr_state
) {
5569 dprintf("imsm: mark degraded\n");
5570 map
->map_state
= map_state
;
5571 super
->updates_pending
++;
5572 a
->last_checkpoint
= 0;
5573 } else if (map_state
== IMSM_T_STATE_FAILED
&&
5574 map
->map_state
!= map_state
) {
5575 dprintf("imsm: mark failed\n");
5576 end_migration(dev
, map_state
);
5577 super
->updates_pending
++;
5578 a
->last_checkpoint
= 0;
5579 } else if (is_gen_migration(dev
)) {
5580 dprintf("imsm: Detected General Migration in state: ");
5581 if (map_state
== IMSM_T_STATE_NORMAL
) {
5582 end_migration(dev
, map_state
);
5583 map
= get_imsm_map(dev
, 0);
5584 map
->failed_disk_num
= ~0;
5585 dprintf("normal\n");
5587 if (map_state
== IMSM_T_STATE_DEGRADED
) {
5588 printf("degraded\n");
5589 end_migration(dev
, map_state
);
5591 dprintf("failed\n");
5593 map
->map_state
= map_state
;
5595 super
->updates_pending
++;
5599 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
)
5602 __u32 mpb_size
= __le32_to_cpu(mpb
->mpb_size
);
5603 unsigned long long dsize
;
5604 unsigned long long sectors
;
5606 get_dev_size(fd
, NULL
, &dsize
);
5608 if (mpb_size
> 512) {
5609 /* -1 to account for anchor */
5610 sectors
= mpb_sectors(mpb
) - 1;
5612 /* write the extended mpb to the sectors preceeding the anchor */
5613 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0)
5616 if ((unsigned long long)write(fd
, buf
+ 512, 512 * sectors
)
5621 /* first block is stored on second to last sector of the disk */
5622 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0)
5625 if (write(fd
, buf
, 512) != 512)
5631 static void imsm_sync_metadata(struct supertype
*container
)
5633 struct intel_super
*super
= container
->sb
;
5635 dprintf("sync metadata: %d\n", super
->updates_pending
);
5636 if (!super
->updates_pending
)
5639 write_super_imsm(container
, 0);
5641 super
->updates_pending
= 0;
5644 static struct dl
*imsm_readd(struct intel_super
*super
, int idx
, struct active_array
*a
)
5646 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
5647 int i
= get_imsm_disk_idx(dev
, idx
, -1);
5650 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
5654 if (dl
&& is_failed(&dl
->disk
))
5658 dprintf("%s: found %x:%x\n", __func__
, dl
->major
, dl
->minor
);
5663 static struct dl
*imsm_add_spare(struct intel_super
*super
, int slot
,
5664 struct active_array
*a
, int activate_new
,
5665 struct mdinfo
*additional_test_list
)
5667 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
5668 int idx
= get_imsm_disk_idx(dev
, slot
, -1);
5669 struct imsm_super
*mpb
= super
->anchor
;
5670 struct imsm_map
*map
;
5671 unsigned long long pos
;
5676 __u32 array_start
= 0;
5677 __u32 array_end
= 0;
5679 struct mdinfo
*test_list
;
5681 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
5682 /* If in this array, skip */
5683 for (d
= a
->info
.devs
; d
; d
= d
->next
)
5684 if (d
->state_fd
>= 0 &&
5685 d
->disk
.major
== dl
->major
&&
5686 d
->disk
.minor
== dl
->minor
) {
5687 dprintf("%x:%x already in array\n",
5688 dl
->major
, dl
->minor
);
5693 test_list
= additional_test_list
;
5695 if (test_list
->disk
.major
== dl
->major
&&
5696 test_list
->disk
.minor
== dl
->minor
) {
5697 dprintf("%x:%x already in additional test list\n",
5698 dl
->major
, dl
->minor
);
5701 test_list
= test_list
->next
;
5706 /* skip in use or failed drives */
5707 if (is_failed(&dl
->disk
) || idx
== dl
->index
||
5709 dprintf("%x:%x status (failed: %d index: %d)\n",
5710 dl
->major
, dl
->minor
, is_failed(&dl
->disk
), idx
);
5714 /* skip pure spares when we are looking for partially
5715 * assimilated drives
5717 if (dl
->index
== -1 && !activate_new
)
5720 /* Does this unused device have the requisite free space?
5721 * It needs to be able to cover all member volumes
5723 ex
= get_extents(super
, dl
);
5725 dprintf("cannot get extents\n");
5728 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5729 dev
= get_imsm_dev(super
, i
);
5730 map
= get_imsm_map(dev
, 0);
5732 /* check if this disk is already a member of
5735 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
5741 array_start
= __le32_to_cpu(map
->pba_of_lba0
);
5742 array_end
= array_start
+
5743 __le32_to_cpu(map
->blocks_per_member
) - 1;
5746 /* check that we can start at pba_of_lba0 with
5747 * blocks_per_member of space
5749 if (array_start
>= pos
&& array_end
< ex
[j
].start
) {
5753 pos
= ex
[j
].start
+ ex
[j
].size
;
5755 } while (ex
[j
-1].size
);
5762 if (i
< mpb
->num_raid_devs
) {
5763 dprintf("%x:%x does not have %u to %u available\n",
5764 dl
->major
, dl
->minor
, array_start
, array_end
);
5775 static int imsm_rebuild_allowed(struct supertype
*cont
, int dev_idx
, int failed
)
5777 struct imsm_dev
*dev2
;
5778 struct imsm_map
*map
;
5784 dev2
= get_imsm_dev(cont
->sb
, dev_idx
);
5786 state
= imsm_check_degraded(cont
->sb
, dev2
, failed
);
5787 if (state
== IMSM_T_STATE_FAILED
) {
5788 map
= get_imsm_map(dev2
, 0);
5791 for (slot
= 0; slot
< map
->num_members
; slot
++) {
5793 * Check if failed disks are deleted from intel
5794 * disk list or are marked to be deleted
5796 idx
= get_imsm_disk_idx(dev2
, slot
, -1);
5797 idisk
= get_imsm_dl_disk(cont
->sb
, idx
);
5799 * Do not rebuild the array if failed disks
5800 * from failed sub-array are not removed from
5804 is_failed(&idisk
->disk
) &&
5805 (idisk
->action
!= DISK_REMOVE
))
5813 static struct mdinfo
*imsm_activate_spare(struct active_array
*a
,
5814 struct metadata_update
**updates
)
5817 * Find a device with unused free space and use it to replace a
5818 * failed/vacant region in an array. We replace failed regions one a
5819 * array at a time. The result is that a new spare disk will be added
5820 * to the first failed array and after the monitor has finished
5821 * propagating failures the remainder will be consumed.
5823 * FIXME add a capability for mdmon to request spares from another
5827 struct intel_super
*super
= a
->container
->sb
;
5828 int inst
= a
->info
.container_member
;
5829 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5830 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5831 int failed
= a
->info
.array
.raid_disks
;
5832 struct mdinfo
*rv
= NULL
;
5835 struct metadata_update
*mu
;
5837 struct imsm_update_activate_spare
*u
;
5842 for (d
= a
->info
.devs
; d
; d
= d
->next
) {
5843 if ((d
->curr_state
& DS_FAULTY
) &&
5845 /* wait for Removal to happen */
5847 if (d
->state_fd
>= 0)
5851 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
5852 inst
, failed
, a
->info
.array
.raid_disks
, a
->info
.array
.level
);
5854 if (dev
->vol
.migr_state
&&
5855 dev
->vol
.migr_type
== MIGR_GEN_MIGR
)
5856 /* No repair during migration */
5859 if (a
->info
.array
.level
== 4)
5860 /* No repair for takeovered array
5861 * imsm doesn't support raid4
5865 if (imsm_check_degraded(super
, dev
, failed
) != IMSM_T_STATE_DEGRADED
)
5869 * If there are any failed disks check state of the other volume.
5870 * Block rebuild if the another one is failed until failed disks
5871 * are removed from container.
5874 dprintf("found failed disks in %s, check if there another"
5875 "failed sub-array.\n",
5877 /* check if states of the other volumes allow for rebuild */
5878 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
5880 allowed
= imsm_rebuild_allowed(a
->container
,
5888 /* For each slot, if it is not working, find a spare */
5889 for (i
= 0; i
< a
->info
.array
.raid_disks
; i
++) {
5890 for (d
= a
->info
.devs
; d
; d
= d
->next
)
5891 if (d
->disk
.raid_disk
== i
)
5893 dprintf("found %d: %p %x\n", i
, d
, d
?d
->curr_state
:0);
5894 if (d
&& (d
->state_fd
>= 0))
5898 * OK, this device needs recovery. Try to re-add the
5899 * previous occupant of this slot, if this fails see if
5900 * we can continue the assimilation of a spare that was
5901 * partially assimilated, finally try to activate a new
5904 dl
= imsm_readd(super
, i
, a
);
5906 dl
= imsm_add_spare(super
, i
, a
, 0, NULL
);
5908 dl
= imsm_add_spare(super
, i
, a
, 1, NULL
);
5912 /* found a usable disk with enough space */
5913 di
= malloc(sizeof(*di
));
5916 memset(di
, 0, sizeof(*di
));
5918 /* dl->index will be -1 in the case we are activating a
5919 * pristine spare. imsm_process_update() will create a
5920 * new index in this case. Once a disk is found to be
5921 * failed in all member arrays it is kicked from the
5924 di
->disk
.number
= dl
->index
;
5926 /* (ab)use di->devs to store a pointer to the device
5929 di
->devs
= (struct mdinfo
*) dl
;
5931 di
->disk
.raid_disk
= i
;
5932 di
->disk
.major
= dl
->major
;
5933 di
->disk
.minor
= dl
->minor
;
5935 di
->recovery_start
= 0;
5936 di
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5937 di
->component_size
= a
->info
.component_size
;
5938 di
->container_member
= inst
;
5939 super
->random
= random32();
5943 dprintf("%x:%x to be %d at %llu\n", dl
->major
, dl
->minor
,
5944 i
, di
->data_offset
);
5950 /* No spares found */
5952 /* Now 'rv' has a list of devices to return.
5953 * Create a metadata_update record to update the
5954 * disk_ord_tbl for the array
5956 mu
= malloc(sizeof(*mu
));
5958 mu
->buf
= malloc(sizeof(struct imsm_update_activate_spare
) * num_spares
);
5959 if (mu
->buf
== NULL
) {
5966 struct mdinfo
*n
= rv
->next
;
5975 mu
->space_list
= NULL
;
5976 mu
->len
= sizeof(struct imsm_update_activate_spare
) * num_spares
;
5977 mu
->next
= *updates
;
5978 u
= (struct imsm_update_activate_spare
*) mu
->buf
;
5980 for (di
= rv
; di
; di
= di
->next
) {
5981 u
->type
= update_activate_spare
;
5982 u
->dl
= (struct dl
*) di
->devs
;
5984 u
->slot
= di
->disk
.raid_disk
;
5995 static int disks_overlap(struct intel_super
*super
, int idx
, struct imsm_update_create_array
*u
)
5997 struct imsm_dev
*dev
= get_imsm_dev(super
, idx
);
5998 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5999 struct imsm_map
*new_map
= get_imsm_map(&u
->dev
, 0);
6000 struct disk_info
*inf
= get_disk_info(u
);
6001 struct imsm_disk
*disk
;
6005 for (i
= 0; i
< map
->num_members
; i
++) {
6006 disk
= get_imsm_disk(super
, get_imsm_disk_idx(dev
, i
, -1));
6007 for (j
= 0; j
< new_map
->num_members
; j
++)
6008 if (serialcmp(disk
->serial
, inf
[j
].serial
) == 0)
6016 static struct dl
*get_disk_super(struct intel_super
*super
, int major
, int minor
)
6018 struct dl
*dl
= NULL
;
6019 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6020 if ((dl
->major
== major
) && (dl
->minor
== minor
))
6025 static int remove_disk_super(struct intel_super
*super
, int major
, int minor
)
6027 struct dl
*prev
= NULL
;
6031 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
6032 if ((dl
->major
== major
) && (dl
->minor
== minor
)) {
6035 prev
->next
= dl
->next
;
6037 super
->disks
= dl
->next
;
6039 __free_imsm_disk(dl
);
6040 dprintf("%s: removed %x:%x\n",
6041 __func__
, major
, minor
);
6049 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
);
6051 static int add_remove_disk_update(struct intel_super
*super
)
6053 int check_degraded
= 0;
6054 struct dl
*disk
= NULL
;
6055 /* add/remove some spares to/from the metadata/contrainer */
6056 while (super
->disk_mgmt_list
) {
6057 struct dl
*disk_cfg
;
6059 disk_cfg
= super
->disk_mgmt_list
;
6060 super
->disk_mgmt_list
= disk_cfg
->next
;
6061 disk_cfg
->next
= NULL
;
6063 if (disk_cfg
->action
== DISK_ADD
) {
6064 disk_cfg
->next
= super
->disks
;
6065 super
->disks
= disk_cfg
;
6067 dprintf("%s: added %x:%x\n",
6068 __func__
, disk_cfg
->major
,
6070 } else if (disk_cfg
->action
== DISK_REMOVE
) {
6071 dprintf("Disk remove action processed: %x.%x\n",
6072 disk_cfg
->major
, disk_cfg
->minor
);
6073 disk
= get_disk_super(super
,
6077 /* store action status */
6078 disk
->action
= DISK_REMOVE
;
6079 /* remove spare disks only */
6080 if (disk
->index
== -1) {
6081 remove_disk_super(super
,
6086 /* release allocate disk structure */
6087 __free_imsm_disk(disk_cfg
);
6090 return check_degraded
;
6094 static int apply_reshape_migration_update(struct imsm_update_reshape_migration
*u
,
6095 struct intel_super
*super
,
6098 struct intel_dev
*id
;
6099 void **tofree
= NULL
;
6102 dprintf("apply_reshape_migration_update()\n");
6103 if ((u
->subdev
< 0) ||
6105 dprintf("imsm: Error: Wrong subdev: %i\n", u
->subdev
);
6108 if ((space_list
== NULL
) || (*space_list
== NULL
)) {
6109 dprintf("imsm: Error: Memory is not allocated\n");
6113 for (id
= super
->devlist
; id
; id
= id
->next
) {
6114 if (id
->index
== (unsigned)u
->subdev
) {
6115 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subdev
);
6116 struct imsm_map
*map
;
6117 struct imsm_dev
*new_dev
=
6118 (struct imsm_dev
*)*space_list
;
6119 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
6121 struct dl
*new_disk
;
6123 if (new_dev
== NULL
)
6125 *space_list
= **space_list
;
6126 memcpy(new_dev
, dev
, sizeof_imsm_dev(dev
, 0));
6127 map
= get_imsm_map(new_dev
, 0);
6129 dprintf("imsm: Error: migration in progress");
6133 to_state
= map
->map_state
;
6134 if ((u
->new_level
== 5) && (map
->raid_level
== 0)) {
6136 /* this should not happen */
6137 if (u
->new_disks
[0] < 0) {
6138 map
->failed_disk_num
=
6139 map
->num_members
- 1;
6140 to_state
= IMSM_T_STATE_DEGRADED
;
6142 to_state
= IMSM_T_STATE_NORMAL
;
6144 migrate(new_dev
, to_state
, MIGR_GEN_MIGR
);
6145 if (u
->new_level
> -1)
6146 map
->raid_level
= u
->new_level
;
6147 migr_map
= get_imsm_map(new_dev
, 1);
6148 if ((u
->new_level
== 5) &&
6149 (migr_map
->raid_level
== 0)) {
6150 int ord
= map
->num_members
- 1;
6151 migr_map
->num_members
--;
6152 if (u
->new_disks
[0] < 0)
6153 ord
|= IMSM_ORD_REBUILD
;
6154 set_imsm_ord_tbl_ent(map
,
6155 map
->num_members
- 1,
6159 tofree
= (void **)dev
;
6161 /* update chunk size
6163 if (u
->new_chunksize
> 0)
6164 map
->blocks_per_strip
=
6165 __cpu_to_le16(u
->new_chunksize
* 2);
6169 if ((u
->new_level
!= 5) ||
6170 (migr_map
->raid_level
!= 0) ||
6171 (migr_map
->raid_level
== map
->raid_level
))
6174 if (u
->new_disks
[0] >= 0) {
6177 new_disk
= get_disk_super(super
,
6178 major(u
->new_disks
[0]),
6179 minor(u
->new_disks
[0]));
6180 dprintf("imsm: new disk for reshape is: %i:%i "
6181 "(%p, index = %i)\n",
6182 major(u
->new_disks
[0]),
6183 minor(u
->new_disks
[0]),
6184 new_disk
, new_disk
->index
);
6185 if (new_disk
== NULL
)
6186 goto error_disk_add
;
6188 new_disk
->index
= map
->num_members
- 1;
6189 /* slot to fill in autolayout
6191 new_disk
->raiddisk
= new_disk
->index
;
6192 new_disk
->disk
.status
|= CONFIGURED_DISK
;
6193 new_disk
->disk
.status
&= ~SPARE_DISK
;
6195 goto error_disk_add
;
6198 *tofree
= *space_list
;
6199 /* calculate new size
6201 imsm_set_array_size(new_dev
);
6208 *space_list
= tofree
;
6212 dprintf("Error: imsm: Cannot find disk.\n");
6217 static int apply_reshape_container_disks_update(struct imsm_update_reshape
*u
,
6218 struct intel_super
*super
,
6221 struct dl
*new_disk
;
6222 struct intel_dev
*id
;
6224 int delta_disks
= u
->new_raid_disks
- u
->old_raid_disks
;
6225 int disk_count
= u
->old_raid_disks
;
6226 void **tofree
= NULL
;
6227 int devices_to_reshape
= 1;
6228 struct imsm_super
*mpb
= super
->anchor
;
6230 unsigned int dev_id
;
6232 dprintf("imsm: apply_reshape_container_disks_update()\n");
6234 /* enable spares to use in array */
6235 for (i
= 0; i
< delta_disks
; i
++) {
6236 new_disk
= get_disk_super(super
,
6237 major(u
->new_disks
[i
]),
6238 minor(u
->new_disks
[i
]));
6239 dprintf("imsm: new disk for reshape is: %i:%i "
6240 "(%p, index = %i)\n",
6241 major(u
->new_disks
[i
]), minor(u
->new_disks
[i
]),
6242 new_disk
, new_disk
->index
);
6243 if ((new_disk
== NULL
) ||
6244 ((new_disk
->index
>= 0) &&
6245 (new_disk
->index
< u
->old_raid_disks
)))
6246 goto update_reshape_exit
;
6247 new_disk
->index
= disk_count
++;
6248 /* slot to fill in autolayout
6250 new_disk
->raiddisk
= new_disk
->index
;
6251 new_disk
->disk
.status
|=
6253 new_disk
->disk
.status
&= ~SPARE_DISK
;
6256 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6257 mpb
->num_raid_devs
);
6258 /* manage changes in volume
6260 for (dev_id
= 0; dev_id
< mpb
->num_raid_devs
; dev_id
++) {
6261 void **sp
= *space_list
;
6262 struct imsm_dev
*newdev
;
6263 struct imsm_map
*newmap
, *oldmap
;
6265 for (id
= super
->devlist
; id
; id
= id
->next
) {
6266 if (id
->index
== dev_id
)
6275 /* Copy the dev, but not (all of) the map */
6276 memcpy(newdev
, id
->dev
, sizeof(*newdev
));
6277 oldmap
= get_imsm_map(id
->dev
, 0);
6278 newmap
= get_imsm_map(newdev
, 0);
6279 /* Copy the current map */
6280 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6281 /* update one device only
6283 if (devices_to_reshape
) {
6284 dprintf("imsm: modifying subdev: %i\n",
6286 devices_to_reshape
--;
6287 newdev
->vol
.migr_state
= 1;
6288 newdev
->vol
.curr_migr_unit
= 0;
6289 newdev
->vol
.migr_type
= MIGR_GEN_MIGR
;
6290 newmap
->num_members
= u
->new_raid_disks
;
6291 for (i
= 0; i
< delta_disks
; i
++) {
6292 set_imsm_ord_tbl_ent(newmap
,
6293 u
->old_raid_disks
+ i
,
6294 u
->old_raid_disks
+ i
);
6296 /* New map is correct, now need to save old map
6298 newmap
= get_imsm_map(newdev
, 1);
6299 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6301 imsm_set_array_size(newdev
);
6304 sp
= (void **)id
->dev
;
6310 *space_list
= tofree
;
6313 update_reshape_exit
:
6318 static int apply_takeover_update(struct imsm_update_takeover
*u
,
6319 struct intel_super
*super
,
6322 struct imsm_dev
*dev
= NULL
;
6323 struct intel_dev
*dv
;
6324 struct imsm_dev
*dev_new
;
6325 struct imsm_map
*map
;
6329 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
6330 if (dv
->index
== (unsigned int)u
->subarray
) {
6338 map
= get_imsm_map(dev
, 0);
6340 if (u
->direction
== R10_TO_R0
) {
6341 /* Number of failed disks must be half of initial disk number */
6342 if (imsm_count_failed(super
, dev
) != (map
->num_members
/ 2))
6345 /* iterate through devices to mark removed disks as spare */
6346 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6347 if (dm
->disk
.status
& FAILED_DISK
) {
6348 int idx
= dm
->index
;
6349 /* update indexes on the disk list */
6350 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6351 the index values will end up being correct.... NB */
6352 for (du
= super
->disks
; du
; du
= du
->next
)
6353 if (du
->index
> idx
)
6355 /* mark as spare disk */
6356 dm
->disk
.status
= SPARE_DISK
;
6361 map
->num_members
= map
->num_members
/ 2;
6362 map
->map_state
= IMSM_T_STATE_NORMAL
;
6363 map
->num_domains
= 1;
6364 map
->raid_level
= 0;
6365 map
->failed_disk_num
= -1;
6368 if (u
->direction
== R0_TO_R10
) {
6370 /* update slots in current disk list */
6371 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6375 /* create new *missing* disks */
6376 for (i
= 0; i
< map
->num_members
; i
++) {
6377 space
= *space_list
;
6380 *space_list
= *space
;
6382 memcpy(du
, super
->disks
, sizeof(*du
));
6386 du
->index
= (i
* 2) + 1;
6387 sprintf((char *)du
->disk
.serial
,
6388 " MISSING_%d", du
->index
);
6389 sprintf((char *)du
->serial
,
6390 "MISSING_%d", du
->index
);
6391 du
->next
= super
->missing
;
6392 super
->missing
= du
;
6394 /* create new dev and map */
6395 space
= *space_list
;
6398 *space_list
= *space
;
6399 dev_new
= (void *)space
;
6400 memcpy(dev_new
, dev
, sizeof(*dev
));
6401 /* update new map */
6402 map
= get_imsm_map(dev_new
, 0);
6403 map
->num_members
= map
->num_members
* 2;
6404 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6405 map
->num_domains
= 2;
6406 map
->raid_level
= 1;
6407 /* replace dev<->dev_new */
6410 /* update disk order table */
6411 for (du
= super
->disks
; du
; du
= du
->next
)
6413 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6414 for (du
= super
->missing
; du
; du
= du
->next
)
6415 if (du
->index
>= 0) {
6416 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6417 mark_missing(dev_new
, &du
->disk
, du
->index
);
6423 static void imsm_process_update(struct supertype
*st
,
6424 struct metadata_update
*update
)
6427 * crack open the metadata_update envelope to find the update record
6428 * update can be one of:
6429 * update_reshape_container_disks - all the arrays in the container
6430 * are being reshaped to have more devices. We need to mark
6431 * the arrays for general migration and convert selected spares
6432 * into active devices.
6433 * update_activate_spare - a spare device has replaced a failed
6434 * device in an array, update the disk_ord_tbl. If this disk is
6435 * present in all member arrays then also clear the SPARE_DISK
6437 * update_create_array
6439 * update_rename_array
6440 * update_add_remove_disk
6442 struct intel_super
*super
= st
->sb
;
6443 struct imsm_super
*mpb
;
6444 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6446 /* update requires a larger buf but the allocation failed */
6447 if (super
->next_len
&& !super
->next_buf
) {
6448 super
->next_len
= 0;
6452 if (super
->next_buf
) {
6453 memcpy(super
->next_buf
, super
->buf
, super
->len
);
6455 super
->len
= super
->next_len
;
6456 super
->buf
= super
->next_buf
;
6458 super
->next_len
= 0;
6459 super
->next_buf
= NULL
;
6462 mpb
= super
->anchor
;
6465 case update_takeover
: {
6466 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6467 if (apply_takeover_update(u
, super
, &update
->space_list
)) {
6468 imsm_update_version_info(super
);
6469 super
->updates_pending
++;
6474 case update_reshape_container_disks
: {
6475 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6476 if (apply_reshape_container_disks_update(
6477 u
, super
, &update
->space_list
))
6478 super
->updates_pending
++;
6481 case update_reshape_migration
: {
6482 struct imsm_update_reshape_migration
*u
= (void *)update
->buf
;
6483 if (apply_reshape_migration_update(
6484 u
, super
, &update
->space_list
))
6485 super
->updates_pending
++;
6488 case update_activate_spare
: {
6489 struct imsm_update_activate_spare
*u
= (void *) update
->buf
;
6490 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->array
);
6491 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6492 struct imsm_map
*migr_map
;
6493 struct active_array
*a
;
6494 struct imsm_disk
*disk
;
6499 int victim
= get_imsm_disk_idx(dev
, u
->slot
, -1);
6502 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6507 fprintf(stderr
, "error: imsm_activate_spare passed "
6508 "an unknown disk (index: %d)\n",
6513 super
->updates_pending
++;
6514 /* count failures (excluding rebuilds and the victim)
6515 * to determine map[0] state
6518 for (i
= 0; i
< map
->num_members
; i
++) {
6521 disk
= get_imsm_disk(super
,
6522 get_imsm_disk_idx(dev
, i
, -1));
6523 if (!disk
|| is_failed(disk
))
6527 /* adding a pristine spare, assign a new index */
6528 if (dl
->index
< 0) {
6529 dl
->index
= super
->anchor
->num_disks
;
6530 super
->anchor
->num_disks
++;
6533 disk
->status
|= CONFIGURED_DISK
;
6534 disk
->status
&= ~SPARE_DISK
;
6537 to_state
= imsm_check_degraded(super
, dev
, failed
);
6538 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6539 migrate(dev
, to_state
, MIGR_REBUILD
);
6540 migr_map
= get_imsm_map(dev
, 1);
6541 set_imsm_ord_tbl_ent(map
, u
->slot
, dl
->index
);
6542 set_imsm_ord_tbl_ent(migr_map
, u
->slot
, dl
->index
| IMSM_ORD_REBUILD
);
6544 /* update the family_num to mark a new container
6545 * generation, being careful to record the existing
6546 * family_num in orig_family_num to clean up after
6547 * earlier mdadm versions that neglected to set it.
6549 if (mpb
->orig_family_num
== 0)
6550 mpb
->orig_family_num
= mpb
->family_num
;
6551 mpb
->family_num
+= super
->random
;
6553 /* count arrays using the victim in the metadata */
6555 for (a
= st
->arrays
; a
; a
= a
->next
) {
6556 dev
= get_imsm_dev(super
, a
->info
.container_member
);
6557 map
= get_imsm_map(dev
, 0);
6559 if (get_imsm_disk_slot(map
, victim
) >= 0)
6563 /* delete the victim if it is no longer being
6569 /* We know that 'manager' isn't touching anything,
6570 * so it is safe to delete
6572 for (dlp
= &super
->disks
; *dlp
; dlp
= &(*dlp
)->next
)
6573 if ((*dlp
)->index
== victim
)
6576 /* victim may be on the missing list */
6578 for (dlp
= &super
->missing
; *dlp
; dlp
= &(*dlp
)->next
)
6579 if ((*dlp
)->index
== victim
)
6581 imsm_delete(super
, dlp
, victim
);
6585 case update_create_array
: {
6586 /* someone wants to create a new array, we need to be aware of
6587 * a few races/collisions:
6588 * 1/ 'Create' called by two separate instances of mdadm
6589 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6590 * devices that have since been assimilated via
6592 * In the event this update can not be carried out mdadm will
6593 * (FIX ME) notice that its update did not take hold.
6595 struct imsm_update_create_array
*u
= (void *) update
->buf
;
6596 struct intel_dev
*dv
;
6597 struct imsm_dev
*dev
;
6598 struct imsm_map
*map
, *new_map
;
6599 unsigned long long start
, end
;
6600 unsigned long long new_start
, new_end
;
6602 struct disk_info
*inf
;
6605 /* handle racing creates: first come first serve */
6606 if (u
->dev_idx
< mpb
->num_raid_devs
) {
6607 dprintf("%s: subarray %d already defined\n",
6608 __func__
, u
->dev_idx
);
6612 /* check update is next in sequence */
6613 if (u
->dev_idx
!= mpb
->num_raid_devs
) {
6614 dprintf("%s: can not create array %d expected index %d\n",
6615 __func__
, u
->dev_idx
, mpb
->num_raid_devs
);
6619 new_map
= get_imsm_map(&u
->dev
, 0);
6620 new_start
= __le32_to_cpu(new_map
->pba_of_lba0
);
6621 new_end
= new_start
+ __le32_to_cpu(new_map
->blocks_per_member
);
6622 inf
= get_disk_info(u
);
6624 /* handle activate_spare versus create race:
6625 * check to make sure that overlapping arrays do not include
6628 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6629 dev
= get_imsm_dev(super
, i
);
6630 map
= get_imsm_map(dev
, 0);
6631 start
= __le32_to_cpu(map
->pba_of_lba0
);
6632 end
= start
+ __le32_to_cpu(map
->blocks_per_member
);
6633 if ((new_start
>= start
&& new_start
<= end
) ||
6634 (start
>= new_start
&& start
<= new_end
))
6639 if (disks_overlap(super
, i
, u
)) {
6640 dprintf("%s: arrays overlap\n", __func__
);
6645 /* check that prepare update was successful */
6646 if (!update
->space
) {
6647 dprintf("%s: prepare update failed\n", __func__
);
6651 /* check that all disks are still active before committing
6652 * changes. FIXME: could we instead handle this by creating a
6653 * degraded array? That's probably not what the user expects,
6654 * so better to drop this update on the floor.
6656 for (i
= 0; i
< new_map
->num_members
; i
++) {
6657 dl
= serial_to_dl(inf
[i
].serial
, super
);
6659 dprintf("%s: disk disappeared\n", __func__
);
6664 super
->updates_pending
++;
6666 /* convert spares to members and fixup ord_tbl */
6667 for (i
= 0; i
< new_map
->num_members
; i
++) {
6668 dl
= serial_to_dl(inf
[i
].serial
, super
);
6669 if (dl
->index
== -1) {
6670 dl
->index
= mpb
->num_disks
;
6672 dl
->disk
.status
|= CONFIGURED_DISK
;
6673 dl
->disk
.status
&= ~SPARE_DISK
;
6675 set_imsm_ord_tbl_ent(new_map
, i
, dl
->index
);
6680 update
->space
= NULL
;
6681 imsm_copy_dev(dev
, &u
->dev
);
6682 dv
->index
= u
->dev_idx
;
6683 dv
->next
= super
->devlist
;
6684 super
->devlist
= dv
;
6685 mpb
->num_raid_devs
++;
6687 imsm_update_version_info(super
);
6690 /* mdmon knows how to release update->space, but not
6691 * ((struct intel_dev *) update->space)->dev
6693 if (update
->space
) {
6699 case update_kill_array
: {
6700 struct imsm_update_kill_array
*u
= (void *) update
->buf
;
6701 int victim
= u
->dev_idx
;
6702 struct active_array
*a
;
6703 struct intel_dev
**dp
;
6704 struct imsm_dev
*dev
;
6706 /* sanity check that we are not affecting the uuid of
6707 * active arrays, or deleting an active array
6709 * FIXME when immutable ids are available, but note that
6710 * we'll also need to fixup the invalidated/active
6711 * subarray indexes in mdstat
6713 for (a
= st
->arrays
; a
; a
= a
->next
)
6714 if (a
->info
.container_member
>= victim
)
6716 /* by definition if mdmon is running at least one array
6717 * is active in the container, so checking
6718 * mpb->num_raid_devs is just extra paranoia
6720 dev
= get_imsm_dev(super
, victim
);
6721 if (a
|| !dev
|| mpb
->num_raid_devs
== 1) {
6722 dprintf("failed to delete subarray-%d\n", victim
);
6726 for (dp
= &super
->devlist
; *dp
;)
6727 if ((*dp
)->index
== (unsigned)super
->current_vol
) {
6730 if ((*dp
)->index
> (unsigned)victim
)
6734 mpb
->num_raid_devs
--;
6735 super
->updates_pending
++;
6738 case update_rename_array
: {
6739 struct imsm_update_rename_array
*u
= (void *) update
->buf
;
6740 char name
[MAX_RAID_SERIAL_LEN
+1];
6741 int target
= u
->dev_idx
;
6742 struct active_array
*a
;
6743 struct imsm_dev
*dev
;
6745 /* sanity check that we are not affecting the uuid of
6748 snprintf(name
, MAX_RAID_SERIAL_LEN
, "%s", (char *) u
->name
);
6749 name
[MAX_RAID_SERIAL_LEN
] = '\0';
6750 for (a
= st
->arrays
; a
; a
= a
->next
)
6751 if (a
->info
.container_member
== target
)
6753 dev
= get_imsm_dev(super
, u
->dev_idx
);
6754 if (a
|| !dev
|| !check_name(super
, name
, 1)) {
6755 dprintf("failed to rename subarray-%d\n", target
);
6759 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
6760 super
->updates_pending
++;
6763 case update_add_remove_disk
: {
6764 /* we may be able to repair some arrays if disks are
6765 * being added, check teh status of add_remove_disk
6766 * if discs has been added.
6768 if (add_remove_disk_update(super
)) {
6769 struct active_array
*a
;
6771 super
->updates_pending
++;
6772 for (a
= st
->arrays
; a
; a
= a
->next
)
6773 a
->check_degraded
= 1;
6778 fprintf(stderr
, "error: unsuported process update type:"
6779 "(type: %d)\n", type
);
6783 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
);
6785 static void imsm_prepare_update(struct supertype
*st
,
6786 struct metadata_update
*update
)
6789 * Allocate space to hold new disk entries, raid-device entries or a new
6790 * mpb if necessary. The manager synchronously waits for updates to
6791 * complete in the monitor, so new mpb buffers allocated here can be
6792 * integrated by the monitor thread without worrying about live pointers
6793 * in the manager thread.
6795 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6796 struct intel_super
*super
= st
->sb
;
6797 struct imsm_super
*mpb
= super
->anchor
;
6802 case update_takeover
: {
6803 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6804 if (u
->direction
== R0_TO_R10
) {
6805 void **tail
= (void **)&update
->space_list
;
6806 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subarray
);
6807 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6808 int num_members
= map
->num_members
;
6812 /* allocate memory for added disks */
6813 for (i
= 0; i
< num_members
; i
++) {
6814 size
= sizeof(struct dl
);
6815 space
= malloc(size
);
6824 /* allocate memory for new device */
6825 size
= sizeof_imsm_dev(super
->devlist
->dev
, 0) +
6826 (num_members
* sizeof(__u32
));
6827 space
= malloc(size
);
6836 len
= disks_to_mpb_size(num_members
* 2);
6838 /* if allocation didn't success, free buffer */
6839 while (update
->space_list
) {
6840 void **sp
= update
->space_list
;
6841 update
->space_list
= *sp
;
6849 case update_reshape_container_disks
: {
6850 /* Every raid device in the container is about to
6851 * gain some more devices, and we will enter a
6853 * So each 'imsm_map' will be bigger, and the imsm_vol
6854 * will now hold 2 of them.
6855 * Thus we need new 'struct imsm_dev' allocations sized
6856 * as sizeof_imsm_dev but with more devices in both maps.
6858 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6859 struct intel_dev
*dl
;
6860 void **space_tail
= (void**)&update
->space_list
;
6862 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
6864 for (dl
= super
->devlist
; dl
; dl
= dl
->next
) {
6865 int size
= sizeof_imsm_dev(dl
->dev
, 1);
6867 if (u
->new_raid_disks
> u
->old_raid_disks
)
6868 size
+= sizeof(__u32
)*2*
6869 (u
->new_raid_disks
- u
->old_raid_disks
);
6878 len
= disks_to_mpb_size(u
->new_raid_disks
);
6879 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
6882 case update_reshape_migration
: {
6883 /* for migration level 0->5 we need to add disks
6884 * so the same as for container operation we will copy
6885 * device to the bigger location.
6886 * in memory prepared device and new disk area are prepared
6887 * for usage in process update
6889 struct imsm_update_reshape_migration
*u
= (void *)update
->buf
;
6890 struct intel_dev
*id
;
6891 void **space_tail
= (void **)&update
->space_list
;
6894 int current_level
= -1;
6896 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
6898 /* add space for bigger array in update
6900 for (id
= super
->devlist
; id
; id
= id
->next
) {
6901 if (id
->index
== (unsigned)u
->subdev
) {
6902 size
= sizeof_imsm_dev(id
->dev
, 1);
6903 if (u
->new_raid_disks
> u
->old_raid_disks
)
6904 size
+= sizeof(__u32
)*2*
6905 (u
->new_raid_disks
- u
->old_raid_disks
);
6915 if (update
->space_list
== NULL
)
6918 /* add space for disk in update
6920 size
= sizeof(struct dl
);
6923 free(update
->space_list
);
6924 update
->space_list
= NULL
;
6931 /* add spare device to update
6933 for (id
= super
->devlist
; id
; id
= id
->next
)
6934 if (id
->index
== (unsigned)u
->subdev
) {
6935 struct imsm_dev
*dev
;
6936 struct imsm_map
*map
;
6938 dev
= get_imsm_dev(super
, u
->subdev
);
6939 map
= get_imsm_map(dev
, 0);
6940 current_level
= map
->raid_level
;
6943 if ((u
->new_level
== 5) && (u
->new_level
!= current_level
)) {
6944 struct mdinfo
*spares
;
6946 spares
= get_spares_for_grow(st
);
6954 makedev(dev
->disk
.major
,
6956 dl
= get_disk_super(super
,
6959 dl
->index
= u
->old_raid_disks
;
6965 len
= disks_to_mpb_size(u
->new_raid_disks
);
6966 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
6969 case update_create_array
: {
6970 struct imsm_update_create_array
*u
= (void *) update
->buf
;
6971 struct intel_dev
*dv
;
6972 struct imsm_dev
*dev
= &u
->dev
;
6973 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6975 struct disk_info
*inf
;
6979 inf
= get_disk_info(u
);
6980 len
= sizeof_imsm_dev(dev
, 1);
6981 /* allocate a new super->devlist entry */
6982 dv
= malloc(sizeof(*dv
));
6984 dv
->dev
= malloc(len
);
6989 update
->space
= NULL
;
6993 /* count how many spares will be converted to members */
6994 for (i
= 0; i
< map
->num_members
; i
++) {
6995 dl
= serial_to_dl(inf
[i
].serial
, super
);
6997 /* hmm maybe it failed?, nothing we can do about
7002 if (count_memberships(dl
, super
) == 0)
7005 len
+= activate
* sizeof(struct imsm_disk
);
7012 /* check if we need a larger metadata buffer */
7013 if (super
->next_buf
)
7014 buf_len
= super
->next_len
;
7016 buf_len
= super
->len
;
7018 if (__le32_to_cpu(mpb
->mpb_size
) + len
> buf_len
) {
7019 /* ok we need a larger buf than what is currently allocated
7020 * if this allocation fails process_update will notice that
7021 * ->next_len is set and ->next_buf is NULL
7023 buf_len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + len
, 512);
7024 if (super
->next_buf
)
7025 free(super
->next_buf
);
7027 super
->next_len
= buf_len
;
7028 if (posix_memalign(&super
->next_buf
, 512, buf_len
) == 0)
7029 memset(super
->next_buf
, 0, buf_len
);
7031 super
->next_buf
= NULL
;
7035 /* must be called while manager is quiesced */
7036 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
)
7038 struct imsm_super
*mpb
= super
->anchor
;
7040 struct imsm_dev
*dev
;
7041 struct imsm_map
*map
;
7042 int i
, j
, num_members
;
7045 dprintf("%s: deleting device[%d] from imsm_super\n",
7048 /* shift all indexes down one */
7049 for (iter
= super
->disks
; iter
; iter
= iter
->next
)
7050 if (iter
->index
> (int)index
)
7052 for (iter
= super
->missing
; iter
; iter
= iter
->next
)
7053 if (iter
->index
> (int)index
)
7056 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
7057 dev
= get_imsm_dev(super
, i
);
7058 map
= get_imsm_map(dev
, 0);
7059 num_members
= map
->num_members
;
7060 for (j
= 0; j
< num_members
; j
++) {
7061 /* update ord entries being careful not to propagate
7062 * ord-flags to the first map
7064 ord
= get_imsm_ord_tbl_ent(dev
, j
, -1);
7066 if (ord_to_idx(ord
) <= index
)
7069 map
= get_imsm_map(dev
, 0);
7070 set_imsm_ord_tbl_ent(map
, j
, ord_to_idx(ord
- 1));
7071 map
= get_imsm_map(dev
, 1);
7073 set_imsm_ord_tbl_ent(map
, j
, ord
- 1);
7078 super
->updates_pending
++;
7080 struct dl
*dl
= *dlp
;
7082 *dlp
= (*dlp
)->next
;
7083 __free_imsm_disk(dl
);
7087 static char disk_by_path
[] = "/dev/disk/by-path/";
7089 static const char *imsm_get_disk_controller_domain(const char *path
)
7091 char disk_path
[PATH_MAX
];
7095 strncpy(disk_path
, disk_by_path
, PATH_MAX
- 1);
7096 strncat(disk_path
, path
, PATH_MAX
- strlen(disk_path
) - 1);
7097 if (stat(disk_path
, &st
) == 0) {
7098 struct sys_dev
* hba
;
7101 path
= devt_to_devpath(st
.st_rdev
);
7104 hba
= find_disk_attached_hba(-1, path
);
7105 if (hba
&& hba
->type
== SYS_DEV_SAS
)
7107 else if (hba
&& hba
->type
== SYS_DEV_SATA
)
7111 dprintf("path: %s hba: %s attached: %s\n",
7112 path
, (hba
) ? hba
->path
: "NULL", drv
);
7120 static int imsm_find_array_minor_by_subdev(int subdev
, int container
, int *minor
)
7122 char subdev_name
[20];
7123 struct mdstat_ent
*mdstat
;
7125 sprintf(subdev_name
, "%d", subdev
);
7126 mdstat
= mdstat_by_subdev(subdev_name
, container
);
7130 *minor
= mdstat
->devnum
;
7131 free_mdstat(mdstat
);
7135 static int imsm_reshape_is_allowed_on_container(struct supertype
*st
,
7136 struct geo_params
*geo
,
7137 int *old_raid_disks
)
7139 /* currently we only support increasing the number of devices
7140 * for a container. This increases the number of device for each
7141 * member array. They must all be RAID0 or RAID5.
7144 struct mdinfo
*info
, *member
;
7145 int devices_that_can_grow
= 0;
7147 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7148 "st->devnum = (%i)\n",
7151 if (geo
->size
!= -1 ||
7152 geo
->level
!= UnSet
||
7153 geo
->layout
!= UnSet
||
7154 geo
->chunksize
!= 0 ||
7155 geo
->raid_disks
== UnSet
) {
7156 dprintf("imsm: Container operation is allowed for "
7157 "raid disks number change only.\n");
7161 info
= container_content_imsm(st
, NULL
);
7162 for (member
= info
; member
; member
= member
->next
) {
7166 dprintf("imsm: checking device_num: %i\n",
7167 member
->container_member
);
7169 if (geo
->raid_disks
<= member
->array
.raid_disks
) {
7170 /* we work on container for Online Capacity Expansion
7171 * only so raid_disks has to grow
7173 dprintf("imsm: for container operation raid disks "
7174 "increase is required\n");
7178 if ((info
->array
.level
!= 0) &&
7179 (info
->array
.level
!= 5)) {
7180 /* we cannot use this container with other raid level
7182 dprintf("imsm: for container operation wrong"
7183 " raid level (%i) detected\n",
7187 /* check for platform support
7188 * for this raid level configuration
7190 struct intel_super
*super
= st
->sb
;
7191 if (!is_raid_level_supported(super
->orom
,
7192 member
->array
.level
,
7194 dprintf("platform does not support raid%d with"
7198 geo
->raid_disks
> 1 ? "s" : "");
7201 /* check if component size is aligned to chunk size
7203 if (info
->component_size
%
7204 (info
->array
.chunk_size
/512)) {
7205 dprintf("Component size is not aligned to "
7211 if (*old_raid_disks
&&
7212 info
->array
.raid_disks
!= *old_raid_disks
)
7214 *old_raid_disks
= info
->array
.raid_disks
;
7216 /* All raid5 and raid0 volumes in container
7217 * have to be ready for Online Capacity Expansion
7218 * so they need to be assembled. We have already
7219 * checked that no recovery etc is happening.
7221 result
= imsm_find_array_minor_by_subdev(member
->container_member
,
7225 dprintf("imsm: cannot find array\n");
7228 devices_that_can_grow
++;
7231 if (!member
&& devices_that_can_grow
)
7235 dprintf("\tContainer operation allowed\n");
7237 dprintf("\tError: %i\n", ret_val
);
7242 /* Function: get_spares_for_grow
7243 * Description: Allocates memory and creates list of spare devices
7244 * avaliable in container. Checks if spare drive size is acceptable.
7245 * Parameters: Pointer to the supertype structure
7246 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7249 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
)
7251 unsigned long long min_size
= min_acceptable_spare_size_imsm(st
);
7252 return container_choose_spares(st
, min_size
, NULL
, NULL
, NULL
, 0);
7255 /******************************************************************************
7256 * function: imsm_create_metadata_update_for_reshape
7257 * Function creates update for whole IMSM container.
7259 ******************************************************************************/
7260 static int imsm_create_metadata_update_for_reshape(
7261 struct supertype
*st
,
7262 struct geo_params
*geo
,
7264 struct imsm_update_reshape
**updatep
)
7266 struct intel_super
*super
= st
->sb
;
7267 struct imsm_super
*mpb
= super
->anchor
;
7268 int update_memory_size
= 0;
7269 struct imsm_update_reshape
*u
= NULL
;
7270 struct mdinfo
*spares
= NULL
;
7272 int delta_disks
= 0;
7275 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7278 delta_disks
= geo
->raid_disks
- old_raid_disks
;
7280 /* size of all update data without anchor */
7281 update_memory_size
= sizeof(struct imsm_update_reshape
);
7283 /* now add space for spare disks that we need to add. */
7284 update_memory_size
+= sizeof(u
->new_disks
[0]) * (delta_disks
- 1);
7286 u
= calloc(1, update_memory_size
);
7289 "cannot get memory for imsm_update_reshape update\n");
7292 u
->type
= update_reshape_container_disks
;
7293 u
->old_raid_disks
= old_raid_disks
;
7294 u
->new_raid_disks
= geo
->raid_disks
;
7296 /* now get spare disks list
7298 spares
= get_spares_for_grow(st
);
7301 || delta_disks
> spares
->array
.spare_disks
) {
7302 fprintf(stderr
, Name
": imsm: ERROR: Cannot get spare devices "
7303 "for %s.\n", geo
->dev_name
);
7307 /* we have got spares
7308 * update disk list in imsm_disk list table in anchor
7310 dprintf("imsm: %i spares are available.\n\n",
7311 spares
->array
.spare_disks
);
7314 for (i
= 0; i
< delta_disks
; i
++) {
7319 u
->new_disks
[i
] = makedev(dev
->disk
.major
,
7321 dl
= get_disk_super(super
, dev
->disk
.major
, dev
->disk
.minor
);
7322 dl
->index
= mpb
->num_disks
;
7332 dprintf("imsm: reshape update preparation :");
7333 if (i
== delta_disks
) {
7336 return update_memory_size
;
7339 dprintf(" Error\n");
7344 /******************************************************************************
7345 * function: imsm_create_metadata_update_for_migration()
7346 * Creates update for IMSM array.
7348 ******************************************************************************/
7349 static int imsm_create_metadata_update_for_migration(
7350 struct supertype
*st
,
7351 struct geo_params
*geo
,
7352 struct imsm_update_reshape_migration
**updatep
)
7354 struct intel_super
*super
= st
->sb
;
7355 int update_memory_size
= 0;
7356 struct imsm_update_reshape_migration
*u
= NULL
;
7357 struct imsm_dev
*dev
;
7358 int previous_level
= -1;
7360 dprintf("imsm_create_metadata_update_for_migration(enter)"
7361 " New Level = %i\n", geo
->level
);
7363 /* size of all update data without anchor */
7364 update_memory_size
= sizeof(struct imsm_update_reshape_migration
);
7366 u
= calloc(1, update_memory_size
);
7368 dprintf("error: cannot get memory for "
7369 "imsm_create_metadata_update_for_migration\n");
7372 u
->type
= update_reshape_migration
;
7373 u
->subdev
= super
->current_vol
;
7374 u
->new_level
= geo
->level
;
7375 u
->new_layout
= geo
->layout
;
7376 u
->new_raid_disks
= u
->old_raid_disks
= geo
->raid_disks
;
7377 u
->new_disks
[0] = -1;
7378 u
->new_chunksize
= -1;
7380 dev
= get_imsm_dev(super
, u
->subdev
);
7382 struct imsm_map
*map
;
7384 map
= get_imsm_map(dev
, 0);
7386 int current_chunk_size
=
7387 __le16_to_cpu(map
->blocks_per_strip
) / 2;
7389 if (geo
->chunksize
!= current_chunk_size
) {
7390 u
->new_chunksize
= geo
->chunksize
/ 1024;
7392 "chunk size change from %i to %i\n",
7393 current_chunk_size
, u
->new_chunksize
);
7395 previous_level
= map
->raid_level
;
7398 if ((geo
->level
== 5) && (previous_level
== 0)) {
7399 struct mdinfo
*spares
= NULL
;
7401 u
->new_raid_disks
++;
7402 spares
= get_spares_for_grow(st
);
7403 if ((spares
== NULL
) || (spares
->array
.spare_disks
< 1)) {
7406 update_memory_size
= 0;
7407 dprintf("error: cannot get spare device "
7408 "for requested migration");
7413 dprintf("imsm: reshape update preparation : OK\n");
7416 return update_memory_size
;
7419 static void imsm_update_metadata_locally(struct supertype
*st
,
7422 struct metadata_update mu
;
7427 mu
.space_list
= NULL
;
7429 imsm_prepare_update(st
, &mu
);
7430 imsm_process_update(st
, &mu
);
7432 while (mu
.space_list
) {
7433 void **space
= mu
.space_list
;
7434 mu
.space_list
= *space
;
7439 /***************************************************************************
7440 * Function: imsm_analyze_change
7441 * Description: Function analyze change for single volume
7442 * and validate if transition is supported
7443 * Parameters: Geometry parameters, supertype structure
7444 * Returns: Operation type code on success, -1 if fail
7445 ****************************************************************************/
7446 enum imsm_reshape_type
imsm_analyze_change(struct supertype
*st
,
7447 struct geo_params
*geo
)
7454 getinfo_super_imsm_volume(st
, &info
, NULL
);
7456 if ((geo
->level
!= info
.array
.level
) &&
7457 (geo
->level
>= 0) &&
7458 (geo
->level
!= UnSet
)) {
7459 switch (info
.array
.level
) {
7461 if (geo
->level
== 5) {
7462 change
= CH_MIGRATION
;
7465 if (geo
->level
== 10) {
7466 change
= CH_TAKEOVER
;
7471 if (geo
->level
== 0) {
7472 change
= CH_TAKEOVER
;
7477 if (geo
->level
== 0) {
7478 change
= CH_TAKEOVER
;
7485 Name
" Error. Level Migration from %d to %d "
7487 info
.array
.level
, geo
->level
);
7488 goto analyse_change_exit
;
7491 geo
->level
= info
.array
.level
;
7493 if ((geo
->layout
!= info
.array
.layout
)
7494 && ((geo
->layout
!= UnSet
) && (geo
->layout
!= -1))) {
7495 change
= CH_MIGRATION
;
7496 if ((info
.array
.layout
== 0)
7497 && (info
.array
.level
== 5)
7498 && (geo
->layout
== 5)) {
7499 /* reshape 5 -> 4 */
7500 } else if ((info
.array
.layout
== 5)
7501 && (info
.array
.level
== 5)
7502 && (geo
->layout
== 0)) {
7503 /* reshape 4 -> 5 */
7508 Name
" Error. Layout Migration from %d to %d "
7510 info
.array
.layout
, geo
->layout
);
7512 goto analyse_change_exit
;
7515 geo
->layout
= info
.array
.layout
;
7517 if ((geo
->chunksize
> 0) && (geo
->chunksize
!= UnSet
)
7518 && (geo
->chunksize
!= info
.array
.chunk_size
))
7519 change
= CH_MIGRATION
;
7521 geo
->chunksize
= info
.array
.chunk_size
;
7523 chunk
= geo
->chunksize
/ 1024;
7524 if (!validate_geometry_imsm(st
,
7534 struct intel_super
*super
= st
->sb
;
7535 struct imsm_super
*mpb
= super
->anchor
;
7537 if (mpb
->num_raid_devs
> 1) {
7539 Name
" Error. Cannot perform operation on %s"
7540 "- for this operation it MUST be single "
7541 "array in container\n",
7547 analyse_change_exit
:
7552 int imsm_takeover(struct supertype
*st
, struct geo_params
*geo
)
7554 struct intel_super
*super
= st
->sb
;
7555 struct imsm_update_takeover
*u
;
7557 u
= malloc(sizeof(struct imsm_update_takeover
));
7561 u
->type
= update_takeover
;
7562 u
->subarray
= super
->current_vol
;
7564 /* 10->0 transition */
7565 if (geo
->level
== 0)
7566 u
->direction
= R10_TO_R0
;
7568 /* 0->10 transition */
7569 if (geo
->level
== 10)
7570 u
->direction
= R0_TO_R10
;
7572 /* update metadata locally */
7573 imsm_update_metadata_locally(st
, u
,
7574 sizeof(struct imsm_update_takeover
));
7575 /* and possibly remotely */
7576 if (st
->update_tail
)
7577 append_metadata_update(st
, u
,
7578 sizeof(struct imsm_update_takeover
));
7585 static int warn_user_about_risk(void)
7590 "\nThis is an experimental feature. Data on the RAID volume(s) "
7591 "can be lost!!!\n\n"
7592 "To continue command execution please make sure that\n"
7593 "the grow process will not be interrupted. Use safe power\n"
7594 "supply to avoid unexpected system reboot. Make sure that\n"
7595 "reshaped container is not assembled automatically during\n"
7597 "If reshape is interrupted, assemble array manually\n"
7598 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
7599 "Assembly in scan mode is not possible in such case.\n"
7600 "Growing container with boot array is not possible.\n"
7601 "If boot array reshape is interrupted, whole file system\n"
7602 "can be lost.\n\n");
7603 rv
= ask("Do you want to continue? ");
7604 fprintf(stderr
, "\n");
7609 static int imsm_reshape_super(struct supertype
*st
, long long size
, int level
,
7610 int layout
, int chunksize
, int raid_disks
,
7611 int delta_disks
, char *backup
, char *dev
,
7615 struct geo_params geo
;
7617 dprintf("imsm: reshape_super called.\n");
7619 memset(&geo
, 0, sizeof(struct geo_params
));
7622 geo
.dev_id
= st
->devnum
;
7625 geo
.layout
= layout
;
7626 geo
.chunksize
= chunksize
;
7627 geo
.raid_disks
= raid_disks
;
7628 if (delta_disks
!= UnSet
)
7629 geo
.raid_disks
+= delta_disks
;
7631 dprintf("\tfor level : %i\n", geo
.level
);
7632 dprintf("\tfor raid_disks : %i\n", geo
.raid_disks
);
7634 if (experimental() == 0)
7637 if (st
->container_dev
== st
->devnum
) {
7638 /* On container level we can only increase number of devices. */
7639 dprintf("imsm: info: Container operation\n");
7640 int old_raid_disks
= 0;
7642 /* this warning will be removed when imsm checkpointing
7643 * will be implemented, and restoring from check-point
7644 * operation will be transparent for reboot process
7646 if (warn_user_about_risk() == 0)
7649 if (imsm_reshape_is_allowed_on_container(
7650 st
, &geo
, &old_raid_disks
)) {
7651 struct imsm_update_reshape
*u
= NULL
;
7654 len
= imsm_create_metadata_update_for_reshape(
7655 st
, &geo
, old_raid_disks
, &u
);
7658 dprintf("imsm: Cannot prepare update\n");
7659 goto exit_imsm_reshape_super
;
7663 /* update metadata locally */
7664 imsm_update_metadata_locally(st
, u
, len
);
7665 /* and possibly remotely */
7666 if (st
->update_tail
)
7667 append_metadata_update(st
, u
, len
);
7672 fprintf(stderr
, Name
": (imsm) Operation "
7673 "is not allowed on this container\n");
7676 /* On volume level we support following operations
7677 * - takeover: raid10 -> raid0; raid0 -> raid10
7678 * - chunk size migration
7679 * - migration: raid5 -> raid0; raid0 -> raid5
7681 struct intel_super
*super
= st
->sb
;
7682 struct intel_dev
*dev
= super
->devlist
;
7684 dprintf("imsm: info: Volume operation\n");
7685 /* find requested device */
7687 imsm_find_array_minor_by_subdev(dev
->index
, st
->container_dev
, &devnum
);
7688 if (devnum
== geo
.dev_id
)
7693 fprintf(stderr
, Name
" Cannot find %s (%i) subarray\n",
7694 geo
.dev_name
, geo
.dev_id
);
7695 goto exit_imsm_reshape_super
;
7697 super
->current_vol
= dev
->index
;
7698 change
= imsm_analyze_change(st
, &geo
);
7701 ret_val
= imsm_takeover(st
, &geo
);
7703 case CH_MIGRATION
: {
7704 struct imsm_update_reshape_migration
*u
= NULL
;
7706 imsm_create_metadata_update_for_migration(
7710 "Cannot prepare update\n");
7714 /* update metadata locally */
7715 imsm_update_metadata_locally(st
, u
, len
);
7716 /* and possibly remotely */
7717 if (st
->update_tail
)
7718 append_metadata_update(st
, u
, len
);
7728 exit_imsm_reshape_super
:
7729 dprintf("imsm: reshape_super Exit code = %i\n", ret_val
);
7733 static int imsm_manage_reshape(
7734 int afd
, struct mdinfo
*sra
, struct reshape
*reshape
,
7735 struct supertype
*st
, unsigned long stripes
,
7736 int *fds
, unsigned long long *offsets
,
7737 int dests
, int *destfd
, unsigned long long *destoffsets
)
7739 /* Just use child_monitor for now */
7740 return child_monitor(
7741 afd
, sra
, reshape
, st
, stripes
,
7742 fds
, offsets
, dests
, destfd
, destoffsets
);
7744 #endif /* MDASSEMBLE */
7746 struct superswitch super_imsm
= {
7748 .examine_super
= examine_super_imsm
,
7749 .brief_examine_super
= brief_examine_super_imsm
,
7750 .brief_examine_subarrays
= brief_examine_subarrays_imsm
,
7751 .export_examine_super
= export_examine_super_imsm
,
7752 .detail_super
= detail_super_imsm
,
7753 .brief_detail_super
= brief_detail_super_imsm
,
7754 .write_init_super
= write_init_super_imsm
,
7755 .validate_geometry
= validate_geometry_imsm
,
7756 .add_to_super
= add_to_super_imsm
,
7757 .remove_from_super
= remove_from_super_imsm
,
7758 .detail_platform
= detail_platform_imsm
,
7759 .kill_subarray
= kill_subarray_imsm
,
7760 .update_subarray
= update_subarray_imsm
,
7761 .load_container
= load_container_imsm
,
7762 .default_geometry
= default_geometry_imsm
,
7763 .get_disk_controller_domain
= imsm_get_disk_controller_domain
,
7764 .reshape_super
= imsm_reshape_super
,
7765 .manage_reshape
= imsm_manage_reshape
,
7767 .match_home
= match_home_imsm
,
7768 .uuid_from_super
= uuid_from_super_imsm
,
7769 .getinfo_super
= getinfo_super_imsm
,
7770 .getinfo_super_disks
= getinfo_super_disks_imsm
,
7771 .update_super
= update_super_imsm
,
7773 .avail_size
= avail_size_imsm
,
7774 .min_acceptable_spare_size
= min_acceptable_spare_size_imsm
,
7776 .compare_super
= compare_super_imsm
,
7778 .load_super
= load_super_imsm
,
7779 .init_super
= init_super_imsm
,
7780 .store_super
= store_super_imsm
,
7781 .free_super
= free_super_imsm
,
7782 .match_metadata_desc
= match_metadata_desc_imsm
,
7783 .container_content
= container_content_imsm
,
7790 .open_new
= imsm_open_new
,
7791 .set_array_state
= imsm_set_array_state
,
7792 .set_disk
= imsm_set_disk
,
7793 .sync_metadata
= imsm_sync_metadata
,
7794 .activate_spare
= imsm_activate_spare
,
7795 .process_update
= imsm_process_update
,
7796 .prepare_update
= imsm_prepare_update
,
7797 #endif /* MDASSEMBLE */