2 * mdadm - Intel(R) Matrix Storage Manager Support
4 * Copyright (C) 2002-2008 Intel Corporation
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 #define HAVE_STDINT_H 1
24 #include "platform-intel.h"
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
54 #define MPB_SECTOR_CNT 2210
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
61 __u8 serial
[MAX_RAID_SERIAL_LEN
];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks
; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id
; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status
; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num
; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler
[IMSM_DISK_FILLERS
]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
73 /* RAID map configuration infos. */
75 __u32 pba_of_lba0
; /* start address of partition */
76 __u32 blocks_per_member
;/* blocks per member */
77 __u32 num_data_stripes
; /* number of data stripes */
78 __u16 blocks_per_strip
;
79 __u8 map_state
; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members
; /* number of member disks */
89 __u8 num_domains
; /* number of parity domains */
90 __u8 failed_disk_num
; /* valid only when state is degraded */
92 __u32 filler
[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl
[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
97 } __attribute__ ((packed
));
100 __u32 curr_migr_unit
;
101 __u32 checkpoint_id
; /* id to access curr_migr_unit */
102 __u8 migr_state
; /* Normal or Migrating */
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type
; /* Initializing, Rebuilding, ... */
111 __u8 fs_state
; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors
; /* number of mismatches */
113 __u16 bad_blocks
; /* number of bad blocks during verify */
115 struct imsm_map map
[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed
));
120 __u8 volume
[MAX_RAID_SERIAL_LEN
];
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status
; /* Persistent RaidDev status */
137 __u32 reserved_blocks
; /* Reserved blocks at beginning of volume */
141 __u8 cng_master_disk
;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler
[IMSM_DEV_FILLERS
];
148 } __attribute__ ((packed
));
151 __u8 sig
[MAX_SIGNATURE_LENGTH
]; /* 0x00 - 0x1F */
152 __u32 check_sum
; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size
; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num
; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num
; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size
; /* 0x30 - 0x33 in bytes */
157 __u32 attributes
; /* 0x34 - 0x37 */
158 __u8 num_disks
; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs
; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos
; /* 0x3A */
161 __u8 fill
[1]; /* 0x3B */
162 __u32 cache_size
; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num
; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count
; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size
; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler
[IMSM_FILLERS
]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk
[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed
));
173 #define BBM_LOG_MAX_ENTRIES 254
175 struct bbm_log_entry
{
176 __u64 defective_block_start
;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset
;
179 __u16 remapped_marked_count
;
181 } __attribute__ ((__packed__
));
184 __u32 signature
; /* 0xABADB10C */
186 __u32 reserved_spare_block_count
; /* 0 */
187 __u32 reserved
; /* 0xFFFF */
188 __u64 first_spare_lba
;
189 struct bbm_log_entry mapped_block_entries
[BBM_LOG_MAX_ENTRIES
];
190 } __attribute__ ((__packed__
));
194 static char *map_state_str
[] = { "normal", "uninitialized", "degraded", "failed" };
197 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
199 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
201 #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203 #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
207 __u32 rec_status
; /* Status used to determine how to restart
208 * migration in case it aborts
210 __u32 curr_migr_unit
; /* 0..numMigrUnits-1 */
211 __u32 family_num
; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr
; /* True if migrating in increasing
216 __u32 blocks_per_unit
; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit
; /* Num member blocks each destMap
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba
; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba
; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units
; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap
; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi
; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num
; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230 } __attribute__ ((__packed__
));
232 static __u8
migr_type(struct imsm_dev
*dev
)
234 if (dev
->vol
.migr_type
== MIGR_VERIFY
&&
235 dev
->status
& DEV_VERIFY_AND_FIX
)
238 return dev
->vol
.migr_type
;
241 static void set_migr_type(struct imsm_dev
*dev
, __u8 migr_type
)
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
246 if (migr_type
== MIGR_REPAIR
) {
247 dev
->vol
.migr_type
= MIGR_VERIFY
;
248 dev
->status
|= DEV_VERIFY_AND_FIX
;
250 dev
->vol
.migr_type
= migr_type
;
251 dev
->status
&= ~DEV_VERIFY_AND_FIX
;
255 static unsigned int sector_count(__u32 bytes
)
257 return ((bytes
+ (512-1)) & (~(512-1))) / 512;
260 static unsigned int mpb_sectors(struct imsm_super
*mpb
)
262 return sector_count(__le32_to_cpu(mpb
->mpb_size
));
266 struct imsm_dev
*dev
;
267 struct intel_dev
*next
;
272 enum sys_dev_type type
;
275 struct intel_hba
*next
;
282 /* internal representation of IMSM metadata */
285 void *buf
; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super
*anchor
; /* immovable parameters */
289 void *migr_rec_buf
; /* buffer for I/O operations */
290 struct migr_record
*migr_rec
; /* migration record */
292 size_t len
; /* size of the 'buf' allocation */
293 void *next_buf
; /* for realloc'ing buf from the manager */
295 int updates_pending
; /* count of pending updates for mdmon */
296 int current_vol
; /* index of raid device undergoing creation */
297 __u32 create_offset
; /* common start for 'current_vol' */
298 __u32 random
; /* random data for seeding new family numbers */
299 struct intel_dev
*devlist
;
303 __u8 serial
[MAX_RAID_SERIAL_LEN
];
306 struct imsm_disk disk
;
309 struct extent
*e
; /* for determining freespace @ create */
310 int raiddisk
; /* slot to fill in autolayout */
313 struct dl
*disk_mgmt_list
; /* list of disks to add/remove while mdmon
315 struct dl
*missing
; /* disks removed while we weren't looking */
316 struct bbm_log
*bbm_log
;
317 struct intel_hba
*hba
; /* device path of the raid controller for this metadata */
318 const struct imsm_orom
*orom
; /* platform firmware support */
319 struct intel_super
*next
; /* (temp) list for disambiguating family_num */
323 struct imsm_disk disk
;
324 #define IMSM_UNKNOWN_OWNER (-1)
326 struct intel_disk
*next
;
330 unsigned long long start
, size
;
333 /* definitions of reshape process types */
334 enum imsm_reshape_type
{
339 /* definition of messages passed to imsm_process_update */
340 enum imsm_update_type
{
341 update_activate_spare
,
345 update_add_remove_disk
,
346 update_reshape_container_disks
,
347 update_reshape_migration
,
351 struct imsm_update_activate_spare
{
352 enum imsm_update_type type
;
356 struct imsm_update_activate_spare
*next
;
369 enum takeover_direction
{
373 struct imsm_update_takeover
{
374 enum imsm_update_type type
;
376 enum takeover_direction direction
;
379 struct imsm_update_reshape
{
380 enum imsm_update_type type
;
384 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
387 struct imsm_update_reshape_migration
{
388 enum imsm_update_type type
;
391 /* fields for array migration changes
398 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
402 __u8 serial
[MAX_RAID_SERIAL_LEN
];
405 struct imsm_update_create_array
{
406 enum imsm_update_type type
;
411 struct imsm_update_kill_array
{
412 enum imsm_update_type type
;
416 struct imsm_update_rename_array
{
417 enum imsm_update_type type
;
418 __u8 name
[MAX_RAID_SERIAL_LEN
];
422 struct imsm_update_add_remove_disk
{
423 enum imsm_update_type type
;
427 static const char *_sys_dev_type
[] = {
428 [SYS_DEV_UNKNOWN
] = "Unknown",
429 [SYS_DEV_SAS
] = "SAS",
430 [SYS_DEV_SATA
] = "SATA"
433 const char *get_sys_dev_type(enum sys_dev_type type
)
435 if (type
>= SYS_DEV_MAX
)
436 type
= SYS_DEV_UNKNOWN
;
438 return _sys_dev_type
[type
];
441 static struct intel_hba
* alloc_intel_hba(struct sys_dev
*device
)
443 struct intel_hba
*result
= malloc(sizeof(*result
));
445 result
->type
= device
->type
;
446 result
->path
= strdup(device
->path
);
448 if (result
->path
&& (result
->pci_id
= strrchr(result
->path
, '/')) != NULL
)
454 static struct intel_hba
* find_intel_hba(struct intel_hba
*hba
, struct sys_dev
*device
)
456 struct intel_hba
*result
=NULL
;
457 for (result
= hba
; result
; result
= result
->next
) {
458 if (result
->type
== device
->type
&& strcmp(result
->path
, device
->path
) == 0)
464 static int attach_hba_to_super(struct intel_super
*super
, struct sys_dev
*device
)
466 struct intel_hba
*hba
;
468 /* check if disk attached to Intel HBA */
469 hba
= find_intel_hba(super
->hba
, device
);
472 /* Check if HBA is already attached to super */
473 if (super
->hba
== NULL
) {
474 super
->hba
= alloc_intel_hba(device
);
479 /* Intel metadata allows for all disks attached to the same type HBA.
480 * Do not sypport odf HBA types mixing
482 if (device
->type
!= hba
->type
)
488 hba
->next
= alloc_intel_hba(device
);
492 static struct sys_dev
* find_disk_attached_hba(int fd
, const char *devname
)
494 struct sys_dev
*list
, *elem
, *prev
;
497 if ((list
= find_intel_devices()) == NULL
)
501 disk_path
= (char *) devname
;
503 disk_path
= diskfd_to_devpath(fd
);
510 for (prev
= NULL
, elem
= list
; elem
; prev
= elem
, elem
= elem
->next
) {
511 if (path_attached_to_hba(disk_path
, elem
->path
)) {
515 prev
->next
= elem
->next
;
517 if (disk_path
!= devname
)
523 if (disk_path
!= devname
)
531 static int find_intel_hba_capability(int fd
, struct intel_super
*super
,
534 static struct supertype
*match_metadata_desc_imsm(char *arg
)
536 struct supertype
*st
;
538 if (strcmp(arg
, "imsm") != 0 &&
539 strcmp(arg
, "default") != 0
543 st
= malloc(sizeof(*st
));
546 memset(st
, 0, sizeof(*st
));
547 st
->container_dev
= NoMdDev
;
548 st
->ss
= &super_imsm
;
549 st
->max_devs
= IMSM_MAX_DEVICES
;
550 st
->minor_version
= 0;
556 static __u8
*get_imsm_version(struct imsm_super
*mpb
)
558 return &mpb
->sig
[MPB_SIG_LEN
];
562 /* retrieve a disk directly from the anchor when the anchor is known to be
563 * up-to-date, currently only at load time
565 static struct imsm_disk
*__get_imsm_disk(struct imsm_super
*mpb
, __u8 index
)
567 if (index
>= mpb
->num_disks
)
569 return &mpb
->disk
[index
];
572 /* retrieve the disk description based on a index of the disk
575 static struct dl
*get_imsm_dl_disk(struct intel_super
*super
, __u8 index
)
579 for (d
= super
->disks
; d
; d
= d
->next
)
580 if (d
->index
== index
)
585 /* retrieve a disk from the parsed metadata */
586 static struct imsm_disk
*get_imsm_disk(struct intel_super
*super
, __u8 index
)
590 dl
= get_imsm_dl_disk(super
, index
);
597 /* generate a checksum directly from the anchor when the anchor is known to be
598 * up-to-date, currently only at load or write_super after coalescing
600 static __u32
__gen_imsm_checksum(struct imsm_super
*mpb
)
602 __u32 end
= mpb
->mpb_size
/ sizeof(end
);
603 __u32
*p
= (__u32
*) mpb
;
607 sum
+= __le32_to_cpu(*p
);
611 return sum
- __le32_to_cpu(mpb
->check_sum
);
614 static size_t sizeof_imsm_map(struct imsm_map
*map
)
616 return sizeof(struct imsm_map
) + sizeof(__u32
) * (map
->num_members
- 1);
619 struct imsm_map
*get_imsm_map(struct imsm_dev
*dev
, int second_map
)
621 /* A device can have 2 maps if it is in the middle of a migration.
623 * 0 - we return the first map
624 * 1 - we return the second map if it exists, else NULL
625 * -1 - we return the second map if it exists, else the first
627 struct imsm_map
*map
= &dev
->vol
.map
[0];
629 if (second_map
== 1 && !dev
->vol
.migr_state
)
631 else if (second_map
== 1 ||
632 (second_map
< 0 && dev
->vol
.migr_state
)) {
635 return ptr
+ sizeof_imsm_map(map
);
641 /* return the size of the device.
642 * migr_state increases the returned size if map[0] were to be duplicated
644 static size_t sizeof_imsm_dev(struct imsm_dev
*dev
, int migr_state
)
646 size_t size
= sizeof(*dev
) - sizeof(struct imsm_map
) +
647 sizeof_imsm_map(get_imsm_map(dev
, 0));
649 /* migrating means an additional map */
650 if (dev
->vol
.migr_state
)
651 size
+= sizeof_imsm_map(get_imsm_map(dev
, 1));
653 size
+= sizeof_imsm_map(get_imsm_map(dev
, 0));
659 /* retrieve disk serial number list from a metadata update */
660 static struct disk_info
*get_disk_info(struct imsm_update_create_array
*update
)
663 struct disk_info
*inf
;
665 inf
= u
+ sizeof(*update
) - sizeof(struct imsm_dev
) +
666 sizeof_imsm_dev(&update
->dev
, 0);
672 static struct imsm_dev
*__get_imsm_dev(struct imsm_super
*mpb
, __u8 index
)
678 if (index
>= mpb
->num_raid_devs
)
681 /* devices start after all disks */
682 offset
= ((void *) &mpb
->disk
[mpb
->num_disks
]) - _mpb
;
684 for (i
= 0; i
<= index
; i
++)
686 return _mpb
+ offset
;
688 offset
+= sizeof_imsm_dev(_mpb
+ offset
, 0);
693 static struct imsm_dev
*get_imsm_dev(struct intel_super
*super
, __u8 index
)
695 struct intel_dev
*dv
;
697 if (index
>= super
->anchor
->num_raid_devs
)
699 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
700 if (dv
->index
== index
)
708 * == 1 get second map
709 * == -1 than get map according to the current migr_state
711 static __u32
get_imsm_ord_tbl_ent(struct imsm_dev
*dev
,
715 struct imsm_map
*map
;
717 map
= get_imsm_map(dev
, second_map
);
719 /* top byte identifies disk under rebuild */
720 return __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
723 #define ord_to_idx(ord) (((ord) << 8) >> 8)
724 static __u32
get_imsm_disk_idx(struct imsm_dev
*dev
, int slot
, int second_map
)
726 __u32 ord
= get_imsm_ord_tbl_ent(dev
, slot
, second_map
);
728 return ord_to_idx(ord
);
731 static void set_imsm_ord_tbl_ent(struct imsm_map
*map
, int slot
, __u32 ord
)
733 map
->disk_ord_tbl
[slot
] = __cpu_to_le32(ord
);
736 static int get_imsm_disk_slot(struct imsm_map
*map
, unsigned idx
)
741 for (slot
= 0; slot
< map
->num_members
; slot
++) {
742 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
743 if (ord_to_idx(ord
) == idx
)
750 static int get_imsm_raid_level(struct imsm_map
*map
)
752 if (map
->raid_level
== 1) {
753 if (map
->num_members
== 2)
759 return map
->raid_level
;
762 static int cmp_extent(const void *av
, const void *bv
)
764 const struct extent
*a
= av
;
765 const struct extent
*b
= bv
;
766 if (a
->start
< b
->start
)
768 if (a
->start
> b
->start
)
773 static int count_memberships(struct dl
*dl
, struct intel_super
*super
)
778 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
779 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
780 struct imsm_map
*map
= get_imsm_map(dev
, 0);
782 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
789 static struct extent
*get_extents(struct intel_super
*super
, struct dl
*dl
)
791 /* find a list of used extents on the given physical device */
792 struct extent
*rv
, *e
;
794 int memberships
= count_memberships(dl
, super
);
795 __u32 reservation
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
797 rv
= malloc(sizeof(struct extent
) * (memberships
+ 1));
802 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
803 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
804 struct imsm_map
*map
= get_imsm_map(dev
, 0);
806 if (get_imsm_disk_slot(map
, dl
->index
) >= 0) {
807 e
->start
= __le32_to_cpu(map
->pba_of_lba0
);
808 e
->size
= __le32_to_cpu(map
->blocks_per_member
);
812 qsort(rv
, memberships
, sizeof(*rv
), cmp_extent
);
814 /* determine the start of the metadata
815 * when no raid devices are defined use the default
816 * ...otherwise allow the metadata to truncate the value
817 * as is the case with older versions of imsm
820 struct extent
*last
= &rv
[memberships
- 1];
823 remainder
= __le32_to_cpu(dl
->disk
.total_blocks
) -
824 (last
->start
+ last
->size
);
825 /* round down to 1k block to satisfy precision of the kernel
829 /* make sure remainder is still sane */
830 if (remainder
< (unsigned)ROUND_UP(super
->len
, 512) >> 9)
831 remainder
= ROUND_UP(super
->len
, 512) >> 9;
832 if (reservation
> remainder
)
833 reservation
= remainder
;
835 e
->start
= __le32_to_cpu(dl
->disk
.total_blocks
) - reservation
;
840 /* try to determine how much space is reserved for metadata from
841 * the last get_extents() entry, otherwise fallback to the
844 static __u32
imsm_reserved_sectors(struct intel_super
*super
, struct dl
*dl
)
850 /* for spares just return a minimal reservation which will grow
851 * once the spare is picked up by an array
854 return MPB_SECTOR_CNT
;
856 e
= get_extents(super
, dl
);
858 return MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
860 /* scroll to last entry */
861 for (i
= 0; e
[i
].size
; i
++)
864 rv
= __le32_to_cpu(dl
->disk
.total_blocks
) - e
[i
].start
;
871 static int is_spare(struct imsm_disk
*disk
)
873 return (disk
->status
& SPARE_DISK
) == SPARE_DISK
;
876 static int is_configured(struct imsm_disk
*disk
)
878 return (disk
->status
& CONFIGURED_DISK
) == CONFIGURED_DISK
;
881 static int is_failed(struct imsm_disk
*disk
)
883 return (disk
->status
& FAILED_DISK
) == FAILED_DISK
;
886 /* Return minimum size of a spare that can be used in this array*/
887 static unsigned long long min_acceptable_spare_size_imsm(struct supertype
*st
)
889 struct intel_super
*super
= st
->sb
;
893 unsigned long long rv
= 0;
897 /* find first active disk in array */
899 while (dl
&& (is_failed(&dl
->disk
) || dl
->index
== -1))
903 /* find last lba used by subarrays */
904 e
= get_extents(super
, dl
);
907 for (i
= 0; e
[i
].size
; i
++)
910 rv
= e
[i
-1].start
+ e
[i
-1].size
;
912 /* add the amount of space needed for metadata */
913 rv
= rv
+ MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
918 static __u64
blocks_per_migr_unit(struct intel_super
*super
,
919 struct imsm_dev
*dev
);
921 static void print_imsm_dev(struct intel_super
*super
,
922 struct imsm_dev
*dev
,
928 struct imsm_map
*map
= get_imsm_map(dev
, 0);
929 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
933 printf("[%.16s]:\n", dev
->volume
);
934 printf(" UUID : %s\n", uuid
);
935 printf(" RAID Level : %d", get_imsm_raid_level(map
));
937 printf(" <-- %d", get_imsm_raid_level(map2
));
939 printf(" Members : %d", map
->num_members
);
941 printf(" <-- %d", map2
->num_members
);
943 printf(" Slots : [");
944 for (i
= 0; i
< map
->num_members
; i
++) {
945 ord
= get_imsm_ord_tbl_ent(dev
, i
, 0);
946 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
951 for (i
= 0; i
< map2
->num_members
; i
++) {
952 ord
= get_imsm_ord_tbl_ent(dev
, i
, 1);
953 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
958 printf(" Failed disk : ");
959 if (map
->failed_disk_num
== 0xff)
962 printf("%i", map
->failed_disk_num
);
964 slot
= get_imsm_disk_slot(map
, disk_idx
);
966 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
967 printf(" This Slot : %d%s\n", slot
,
968 ord
& IMSM_ORD_REBUILD
? " (out-of-sync)" : "");
970 printf(" This Slot : ?\n");
971 sz
= __le32_to_cpu(dev
->size_high
);
973 sz
+= __le32_to_cpu(dev
->size_low
);
974 printf(" Array Size : %llu%s\n", (unsigned long long)sz
,
975 human_size(sz
* 512));
976 sz
= __le32_to_cpu(map
->blocks_per_member
);
977 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz
,
978 human_size(sz
* 512));
979 printf(" Sector Offset : %u\n",
980 __le32_to_cpu(map
->pba_of_lba0
));
981 printf(" Num Stripes : %u\n",
982 __le32_to_cpu(map
->num_data_stripes
));
983 printf(" Chunk Size : %u KiB",
984 __le16_to_cpu(map
->blocks_per_strip
) / 2);
986 printf(" <-- %u KiB",
987 __le16_to_cpu(map2
->blocks_per_strip
) / 2);
989 printf(" Reserved : %d\n", __le32_to_cpu(dev
->reserved_blocks
));
990 printf(" Migrate State : ");
991 if (dev
->vol
.migr_state
) {
992 if (migr_type(dev
) == MIGR_INIT
)
993 printf("initialize\n");
994 else if (migr_type(dev
) == MIGR_REBUILD
)
996 else if (migr_type(dev
) == MIGR_VERIFY
)
998 else if (migr_type(dev
) == MIGR_GEN_MIGR
)
999 printf("general migration\n");
1000 else if (migr_type(dev
) == MIGR_STATE_CHANGE
)
1001 printf("state change\n");
1002 else if (migr_type(dev
) == MIGR_REPAIR
)
1005 printf("<unknown:%d>\n", migr_type(dev
));
1008 printf(" Map State : %s", map_state_str
[map
->map_state
]);
1009 if (dev
->vol
.migr_state
) {
1010 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1012 printf(" <-- %s", map_state_str
[map
->map_state
]);
1013 printf("\n Checkpoint : %u (%llu)",
1014 __le32_to_cpu(dev
->vol
.curr_migr_unit
),
1015 (unsigned long long)blocks_per_migr_unit(super
, dev
));
1018 printf(" Dirty State : %s\n", dev
->vol
.dirty
? "dirty" : "clean");
1021 static void print_imsm_disk(struct imsm_super
*mpb
, int index
, __u32 reserved
)
1023 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, index
);
1024 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1027 if (index
< 0 || !disk
)
1031 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1032 printf(" Disk%02d Serial : %s\n", index
, str
);
1033 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1034 is_configured(disk
) ? " active" : "",
1035 is_failed(disk
) ? " failed" : "");
1036 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1037 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1038 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1039 human_size(sz
* 512));
1042 static int is_gen_migration(struct imsm_dev
*dev
);
1044 void examine_migr_rec_imsm(struct intel_super
*super
)
1046 struct migr_record
*migr_rec
= super
->migr_rec
;
1047 struct imsm_super
*mpb
= super
->anchor
;
1050 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1051 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
1052 if (is_gen_migration(dev
) == 0)
1055 printf("\nMigration Record Information:");
1056 if (super
->disks
->index
> 1) {
1057 printf(" Empty\n ");
1058 printf("Examine one of first two disks in array\n");
1061 printf("\n Status : ");
1062 if (__le32_to_cpu(migr_rec
->rec_status
) == UNIT_SRC_NORMAL
)
1065 printf("Contains Data\n");
1066 printf(" Current Unit : %u\n",
1067 __le32_to_cpu(migr_rec
->curr_migr_unit
));
1068 printf(" Family : %u\n",
1069 __le32_to_cpu(migr_rec
->family_num
));
1070 printf(" Ascending : %u\n",
1071 __le32_to_cpu(migr_rec
->ascending_migr
));
1072 printf(" Blocks Per Unit : %u\n",
1073 __le32_to_cpu(migr_rec
->blocks_per_unit
));
1074 printf(" Dest. Depth Per Unit : %u\n",
1075 __le32_to_cpu(migr_rec
->dest_depth_per_unit
));
1076 printf(" Checkpoint Area pba : %u\n",
1077 __le32_to_cpu(migr_rec
->ckpt_area_pba
));
1078 printf(" First member lba : %u\n",
1079 __le32_to_cpu(migr_rec
->dest_1st_member_lba
));
1080 printf(" Total Number of Units : %u\n",
1081 __le32_to_cpu(migr_rec
->num_migr_units
));
1082 printf(" Size of volume : %u\n",
1083 __le32_to_cpu(migr_rec
->post_migr_vol_cap
));
1084 printf(" Expansion space for LBA64 : %u\n",
1085 __le32_to_cpu(migr_rec
->post_migr_vol_cap_hi
));
1086 printf(" Record was read from : %u\n",
1087 __le32_to_cpu(migr_rec
->ckpt_read_disk_num
));
1093 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
);
1095 static void examine_super_imsm(struct supertype
*st
, char *homehost
)
1097 struct intel_super
*super
= st
->sb
;
1098 struct imsm_super
*mpb
= super
->anchor
;
1099 char str
[MAX_SIGNATURE_LENGTH
];
1104 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
1107 snprintf(str
, MPB_SIG_LEN
, "%s", mpb
->sig
);
1108 printf(" Magic : %s\n", str
);
1109 snprintf(str
, strlen(MPB_VERSION_RAID0
), "%s", get_imsm_version(mpb
));
1110 printf(" Version : %s\n", get_imsm_version(mpb
));
1111 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb
->orig_family_num
));
1112 printf(" Family : %08x\n", __le32_to_cpu(mpb
->family_num
));
1113 printf(" Generation : %08x\n", __le32_to_cpu(mpb
->generation_num
));
1114 getinfo_super_imsm(st
, &info
, NULL
);
1115 fname_from_uuid(st
, &info
, nbuf
, ':');
1116 printf(" UUID : %s\n", nbuf
+ 5);
1117 sum
= __le32_to_cpu(mpb
->check_sum
);
1118 printf(" Checksum : %08x %s\n", sum
,
1119 __gen_imsm_checksum(mpb
) == sum
? "correct" : "incorrect");
1120 printf(" MPB Sectors : %d\n", mpb_sectors(mpb
));
1121 printf(" Disks : %d\n", mpb
->num_disks
);
1122 printf(" RAID Devices : %d\n", mpb
->num_raid_devs
);
1123 print_imsm_disk(mpb
, super
->disks
->index
, reserved
);
1124 if (super
->bbm_log
) {
1125 struct bbm_log
*log
= super
->bbm_log
;
1128 printf("Bad Block Management Log:\n");
1129 printf(" Log Size : %d\n", __le32_to_cpu(mpb
->bbm_log_size
));
1130 printf(" Signature : %x\n", __le32_to_cpu(log
->signature
));
1131 printf(" Entry Count : %d\n", __le32_to_cpu(log
->entry_count
));
1132 printf(" Spare Blocks : %d\n", __le32_to_cpu(log
->reserved_spare_block_count
));
1133 printf(" First Spare : %llx\n",
1134 (unsigned long long) __le64_to_cpu(log
->first_spare_lba
));
1136 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1138 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
1140 super
->current_vol
= i
;
1141 getinfo_super_imsm(st
, &info
, NULL
);
1142 fname_from_uuid(st
, &info
, nbuf
, ':');
1143 print_imsm_dev(super
, dev
, nbuf
+ 5, super
->disks
->index
);
1145 for (i
= 0; i
< mpb
->num_disks
; i
++) {
1146 if (i
== super
->disks
->index
)
1148 print_imsm_disk(mpb
, i
, reserved
);
1150 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1151 struct imsm_disk
*disk
;
1152 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1160 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1161 printf(" Disk Serial : %s\n", str
);
1162 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1163 is_configured(disk
) ? " active" : "",
1164 is_failed(disk
) ? " failed" : "");
1165 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1166 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1167 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1168 human_size(sz
* 512));
1171 examine_migr_rec_imsm(super
);
1174 static void brief_examine_super_imsm(struct supertype
*st
, int verbose
)
1176 /* We just write a generic IMSM ARRAY entry */
1179 struct intel_super
*super
= st
->sb
;
1181 if (!super
->anchor
->num_raid_devs
) {
1182 printf("ARRAY metadata=imsm\n");
1186 getinfo_super_imsm(st
, &info
, NULL
);
1187 fname_from_uuid(st
, &info
, nbuf
, ':');
1188 printf("ARRAY metadata=imsm UUID=%s\n", nbuf
+ 5);
1191 static void brief_examine_subarrays_imsm(struct supertype
*st
, int verbose
)
1193 /* We just write a generic IMSM ARRAY entry */
1197 struct intel_super
*super
= st
->sb
;
1200 if (!super
->anchor
->num_raid_devs
)
1203 getinfo_super_imsm(st
, &info
, NULL
);
1204 fname_from_uuid(st
, &info
, nbuf
, ':');
1205 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
1206 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
1208 super
->current_vol
= i
;
1209 getinfo_super_imsm(st
, &info
, NULL
);
1210 fname_from_uuid(st
, &info
, nbuf1
, ':');
1211 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1212 dev
->volume
, nbuf
+ 5, i
, nbuf1
+ 5);
1216 static void export_examine_super_imsm(struct supertype
*st
)
1218 struct intel_super
*super
= st
->sb
;
1219 struct imsm_super
*mpb
= super
->anchor
;
1223 getinfo_super_imsm(st
, &info
, NULL
);
1224 fname_from_uuid(st
, &info
, nbuf
, ':');
1225 printf("MD_METADATA=imsm\n");
1226 printf("MD_LEVEL=container\n");
1227 printf("MD_UUID=%s\n", nbuf
+5);
1228 printf("MD_DEVICES=%u\n", mpb
->num_disks
);
1231 static void detail_super_imsm(struct supertype
*st
, char *homehost
)
1236 getinfo_super_imsm(st
, &info
, NULL
);
1237 fname_from_uuid(st
, &info
, nbuf
, ':');
1238 printf("\n UUID : %s\n", nbuf
+ 5);
1241 static void brief_detail_super_imsm(struct supertype
*st
)
1245 getinfo_super_imsm(st
, &info
, NULL
);
1246 fname_from_uuid(st
, &info
, nbuf
, ':');
1247 printf(" UUID=%s", nbuf
+ 5);
1250 static int imsm_read_serial(int fd
, char *devname
, __u8
*serial
);
1251 static void fd2devname(int fd
, char *name
);
1253 static int ahci_enumerate_ports(const char *hba_path
, int port_count
, int host_base
, int verbose
)
1255 /* dump an unsorted list of devices attached to AHCI Intel storage
1256 * controller, as well as non-connected ports
1258 int hba_len
= strlen(hba_path
) + 1;
1263 unsigned long port_mask
= (1 << port_count
) - 1;
1265 if (port_count
> (int)sizeof(port_mask
) * 8) {
1267 fprintf(stderr
, Name
": port_count %d out of range\n", port_count
);
1271 /* scroll through /sys/dev/block looking for devices attached to
1274 dir
= opendir("/sys/dev/block");
1275 for (ent
= dir
? readdir(dir
) : NULL
; ent
; ent
= readdir(dir
)) {
1286 if (sscanf(ent
->d_name
, "%d:%d", &major
, &minor
) != 2)
1288 path
= devt_to_devpath(makedev(major
, minor
));
1291 if (!path_attached_to_hba(path
, hba_path
)) {
1297 /* retrieve the scsi device type */
1298 if (asprintf(&device
, "/sys/dev/block/%d:%d/device/xxxxxxx", major
, minor
) < 0) {
1300 fprintf(stderr
, Name
": failed to allocate 'device'\n");
1304 sprintf(device
, "/sys/dev/block/%d:%d/device/type", major
, minor
);
1305 if (load_sys(device
, buf
) != 0) {
1307 fprintf(stderr
, Name
": failed to read device type for %s\n",
1313 type
= strtoul(buf
, NULL
, 10);
1315 /* if it's not a disk print the vendor and model */
1316 if (!(type
== 0 || type
== 7 || type
== 14)) {
1319 sprintf(device
, "/sys/dev/block/%d:%d/device/vendor", major
, minor
);
1320 if (load_sys(device
, buf
) == 0) {
1321 strncpy(vendor
, buf
, sizeof(vendor
));
1322 vendor
[sizeof(vendor
) - 1] = '\0';
1323 c
= (char *) &vendor
[sizeof(vendor
) - 1];
1324 while (isspace(*c
) || *c
== '\0')
1328 sprintf(device
, "/sys/dev/block/%d:%d/device/model", major
, minor
);
1329 if (load_sys(device
, buf
) == 0) {
1330 strncpy(model
, buf
, sizeof(model
));
1331 model
[sizeof(model
) - 1] = '\0';
1332 c
= (char *) &model
[sizeof(model
) - 1];
1333 while (isspace(*c
) || *c
== '\0')
1337 if (vendor
[0] && model
[0])
1338 sprintf(buf
, "%.64s %.64s", vendor
, model
);
1340 switch (type
) { /* numbers from hald/linux/device.c */
1341 case 1: sprintf(buf
, "tape"); break;
1342 case 2: sprintf(buf
, "printer"); break;
1343 case 3: sprintf(buf
, "processor"); break;
1345 case 5: sprintf(buf
, "cdrom"); break;
1346 case 6: sprintf(buf
, "scanner"); break;
1347 case 8: sprintf(buf
, "media_changer"); break;
1348 case 9: sprintf(buf
, "comm"); break;
1349 case 12: sprintf(buf
, "raid"); break;
1350 default: sprintf(buf
, "unknown");
1356 /* chop device path to 'host%d' and calculate the port number */
1357 c
= strchr(&path
[hba_len
], '/');
1360 fprintf(stderr
, Name
": %s - invalid path name\n", path
+ hba_len
);
1365 if (sscanf(&path
[hba_len
], "host%d", &port
) == 1)
1369 *c
= '/'; /* repair the full string */
1370 fprintf(stderr
, Name
": failed to determine port number for %s\n",
1377 /* mark this port as used */
1378 port_mask
&= ~(1 << port
);
1380 /* print out the device information */
1382 printf(" Port%d : - non-disk device (%s) -\n", port
, buf
);
1386 fd
= dev_open(ent
->d_name
, O_RDONLY
);
1388 printf(" Port%d : - disk info unavailable -\n", port
);
1390 fd2devname(fd
, buf
);
1391 printf(" Port%d : %s", port
, buf
);
1392 if (imsm_read_serial(fd
, NULL
, (__u8
*) buf
) == 0)
1393 printf(" (%s)\n", buf
);
1408 for (i
= 0; i
< port_count
; i
++)
1409 if (port_mask
& (1 << i
))
1410 printf(" Port%d : - no device attached -\n", i
);
1418 static void print_found_intel_controllers(struct sys_dev
*elem
)
1420 for (; elem
; elem
= elem
->next
) {
1421 fprintf(stderr
, Name
": found Intel(R) ");
1422 if (elem
->type
== SYS_DEV_SATA
)
1423 fprintf(stderr
, "SATA ");
1424 else if (elem
->type
== SYS_DEV_SAS
)
1425 fprintf(stderr
, "SAS ");
1426 fprintf(stderr
, "RAID controller");
1428 fprintf(stderr
, " at %s", elem
->pci_id
);
1429 fprintf(stderr
, ".\n");
1434 static int ahci_get_port_count(const char *hba_path
, int *port_count
)
1441 if ((dir
= opendir(hba_path
)) == NULL
)
1444 for (ent
= readdir(dir
); ent
; ent
= readdir(dir
)) {
1447 if (sscanf(ent
->d_name
, "host%d", &host
) != 1)
1449 if (*port_count
== 0)
1451 else if (host
< host_base
)
1454 if (host
+ 1 > *port_count
+ host_base
)
1455 *port_count
= host
+ 1 - host_base
;
1461 static void print_imsm_capability(const struct imsm_orom
*orom
)
1463 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1464 printf(" Version : %d.%d.%d.%d\n", orom
->major_ver
, orom
->minor_ver
,
1465 orom
->hotfix_ver
, orom
->build
);
1466 printf(" RAID Levels :%s%s%s%s%s\n",
1467 imsm_orom_has_raid0(orom
) ? " raid0" : "",
1468 imsm_orom_has_raid1(orom
) ? " raid1" : "",
1469 imsm_orom_has_raid1e(orom
) ? " raid1e" : "",
1470 imsm_orom_has_raid10(orom
) ? " raid10" : "",
1471 imsm_orom_has_raid5(orom
) ? " raid5" : "");
1472 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1473 imsm_orom_has_chunk(orom
, 2) ? " 2k" : "",
1474 imsm_orom_has_chunk(orom
, 4) ? " 4k" : "",
1475 imsm_orom_has_chunk(orom
, 8) ? " 8k" : "",
1476 imsm_orom_has_chunk(orom
, 16) ? " 16k" : "",
1477 imsm_orom_has_chunk(orom
, 32) ? " 32k" : "",
1478 imsm_orom_has_chunk(orom
, 64) ? " 64k" : "",
1479 imsm_orom_has_chunk(orom
, 128) ? " 128k" : "",
1480 imsm_orom_has_chunk(orom
, 256) ? " 256k" : "",
1481 imsm_orom_has_chunk(orom
, 512) ? " 512k" : "",
1482 imsm_orom_has_chunk(orom
, 1024*1) ? " 1M" : "",
1483 imsm_orom_has_chunk(orom
, 1024*2) ? " 2M" : "",
1484 imsm_orom_has_chunk(orom
, 1024*4) ? " 4M" : "",
1485 imsm_orom_has_chunk(orom
, 1024*8) ? " 8M" : "",
1486 imsm_orom_has_chunk(orom
, 1024*16) ? " 16M" : "",
1487 imsm_orom_has_chunk(orom
, 1024*32) ? " 32M" : "",
1488 imsm_orom_has_chunk(orom
, 1024*64) ? " 64M" : "");
1489 printf(" Max Disks : %d\n", orom
->tds
);
1490 printf(" Max Volumes : %d\n", orom
->vpa
);
1494 static int detail_platform_imsm(int verbose
, int enumerate_only
)
1496 /* There are two components to imsm platform support, the ahci SATA
1497 * controller and the option-rom. To find the SATA controller we
1498 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1499 * controller with the Intel vendor id is present. This approach
1500 * allows mdadm to leverage the kernel's ahci detection logic, with the
1501 * caveat that if ahci.ko is not loaded mdadm will not be able to
1502 * detect platform raid capabilities. The option-rom resides in a
1503 * platform "Adapter ROM". We scan for its signature to retrieve the
1504 * platform capabilities. If raid support is disabled in the BIOS the
1505 * option-rom capability structure will not be available.
1507 const struct imsm_orom
*orom
;
1508 struct sys_dev
*list
, *hba
;
1513 if (enumerate_only
) {
1514 if (check_env("IMSM_NO_PLATFORM"))
1516 list
= find_intel_devices();
1519 for (hba
= list
; hba
; hba
= hba
->next
) {
1520 orom
= find_imsm_capability(hba
->type
);
1526 free_sys_dev(&list
);
1530 list
= find_intel_devices();
1533 fprintf(stderr
, Name
": no active Intel(R) RAID "
1534 "controller found.\n");
1535 free_sys_dev(&list
);
1538 print_found_intel_controllers(list
);
1540 for (hba
= list
; hba
; hba
= hba
->next
) {
1541 orom
= find_imsm_capability(hba
->type
);
1543 fprintf(stderr
, Name
": imsm capabilities not found for controller: %s (type %s)\n",
1544 hba
->path
, get_sys_dev_type(hba
->type
));
1546 print_imsm_capability(orom
);
1549 for (hba
= list
; hba
; hba
= hba
->next
) {
1550 printf(" I/O Controller : %s (%s)\n",
1551 hba
->path
, get_sys_dev_type(hba
->type
));
1553 if (hba
->type
== SYS_DEV_SATA
) {
1554 host_base
= ahci_get_port_count(hba
->path
, &port_count
);
1555 if (ahci_enumerate_ports(hba
->path
, port_count
, host_base
, verbose
)) {
1557 fprintf(stderr
, Name
": failed to enumerate "
1558 "ports on SATA controller at %s.", hba
->pci_id
);
1564 free_sys_dev(&list
);
1569 static int match_home_imsm(struct supertype
*st
, char *homehost
)
1571 /* the imsm metadata format does not specify any host
1572 * identification information. We return -1 since we can never
1573 * confirm nor deny whether a given array is "meant" for this
1574 * host. We rely on compare_super and the 'family_num' fields to
1575 * exclude member disks that do not belong, and we rely on
1576 * mdadm.conf to specify the arrays that should be assembled.
1577 * Auto-assembly may still pick up "foreign" arrays.
1583 static void uuid_from_super_imsm(struct supertype
*st
, int uuid
[4])
1585 /* The uuid returned here is used for:
1586 * uuid to put into bitmap file (Create, Grow)
1587 * uuid for backup header when saving critical section (Grow)
1588 * comparing uuids when re-adding a device into an array
1589 * In these cases the uuid required is that of the data-array,
1590 * not the device-set.
1591 * uuid to recognise same set when adding a missing device back
1592 * to an array. This is a uuid for the device-set.
1594 * For each of these we can make do with a truncated
1595 * or hashed uuid rather than the original, as long as
1597 * In each case the uuid required is that of the data-array,
1598 * not the device-set.
1600 /* imsm does not track uuid's so we synthesis one using sha1 on
1601 * - The signature (Which is constant for all imsm array, but no matter)
1602 * - the orig_family_num of the container
1603 * - the index number of the volume
1604 * - the 'serial' number of the volume.
1605 * Hopefully these are all constant.
1607 struct intel_super
*super
= st
->sb
;
1610 struct sha1_ctx ctx
;
1611 struct imsm_dev
*dev
= NULL
;
1614 /* some mdadm versions failed to set ->orig_family_num, in which
1615 * case fall back to ->family_num. orig_family_num will be
1616 * fixed up with the first metadata update.
1618 family_num
= super
->anchor
->orig_family_num
;
1619 if (family_num
== 0)
1620 family_num
= super
->anchor
->family_num
;
1621 sha1_init_ctx(&ctx
);
1622 sha1_process_bytes(super
->anchor
->sig
, MPB_SIG_LEN
, &ctx
);
1623 sha1_process_bytes(&family_num
, sizeof(__u32
), &ctx
);
1624 if (super
->current_vol
>= 0)
1625 dev
= get_imsm_dev(super
, super
->current_vol
);
1627 __u32 vol
= super
->current_vol
;
1628 sha1_process_bytes(&vol
, sizeof(vol
), &ctx
);
1629 sha1_process_bytes(dev
->volume
, MAX_RAID_SERIAL_LEN
, &ctx
);
1631 sha1_finish_ctx(&ctx
, buf
);
1632 memcpy(uuid
, buf
, 4*4);
1637 get_imsm_numerical_version(struct imsm_super
*mpb
, int *m
, int *p
)
1639 __u8
*v
= get_imsm_version(mpb
);
1640 __u8
*end
= mpb
->sig
+ MAX_SIGNATURE_LENGTH
;
1641 char major
[] = { 0, 0, 0 };
1642 char minor
[] = { 0 ,0, 0 };
1643 char patch
[] = { 0, 0, 0 };
1644 char *ver_parse
[] = { major
, minor
, patch
};
1648 while (*v
!= '\0' && v
< end
) {
1649 if (*v
!= '.' && j
< 2)
1650 ver_parse
[i
][j
++] = *v
;
1658 *m
= strtol(minor
, NULL
, 0);
1659 *p
= strtol(patch
, NULL
, 0);
1663 static __u32
migr_strip_blocks_resync(struct imsm_dev
*dev
)
1665 /* migr_strip_size when repairing or initializing parity */
1666 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1667 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1669 switch (get_imsm_raid_level(map
)) {
1674 return 128*1024 >> 9;
1678 static __u32
migr_strip_blocks_rebuild(struct imsm_dev
*dev
)
1680 /* migr_strip_size when rebuilding a degraded disk, no idea why
1681 * this is different than migr_strip_size_resync(), but it's good
1684 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1685 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1687 switch (get_imsm_raid_level(map
)) {
1690 if (map
->num_members
% map
->num_domains
== 0)
1691 return 128*1024 >> 9;
1695 return max((__u32
) 64*1024 >> 9, chunk
);
1697 return 128*1024 >> 9;
1701 static __u32
num_stripes_per_unit_resync(struct imsm_dev
*dev
)
1703 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1704 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1705 __u32 lo_chunk
= __le32_to_cpu(lo
->blocks_per_strip
);
1706 __u32 hi_chunk
= __le32_to_cpu(hi
->blocks_per_strip
);
1708 return max((__u32
) 1, hi_chunk
/ lo_chunk
);
1711 static __u32
num_stripes_per_unit_rebuild(struct imsm_dev
*dev
)
1713 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1714 int level
= get_imsm_raid_level(lo
);
1716 if (level
== 1 || level
== 10) {
1717 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1719 return hi
->num_domains
;
1721 return num_stripes_per_unit_resync(dev
);
1724 static __u8
imsm_num_data_members(struct imsm_dev
*dev
, int second_map
)
1726 /* named 'imsm_' because raid0, raid1 and raid10
1727 * counter-intuitively have the same number of data disks
1729 struct imsm_map
*map
= get_imsm_map(dev
, second_map
);
1731 switch (get_imsm_raid_level(map
)) {
1735 return map
->num_members
;
1737 return map
->num_members
- 1;
1739 dprintf("%s: unsupported raid level\n", __func__
);
1744 static __u32
parity_segment_depth(struct imsm_dev
*dev
)
1746 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1747 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1749 switch(get_imsm_raid_level(map
)) {
1752 return chunk
* map
->num_domains
;
1754 return chunk
* map
->num_members
;
1760 static __u32
map_migr_block(struct imsm_dev
*dev
, __u32 block
)
1762 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1763 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1764 __u32 strip
= block
/ chunk
;
1766 switch (get_imsm_raid_level(map
)) {
1769 __u32 vol_strip
= (strip
* map
->num_domains
) + 1;
1770 __u32 vol_stripe
= vol_strip
/ map
->num_members
;
1772 return vol_stripe
* chunk
+ block
% chunk
;
1774 __u32 stripe
= strip
/ (map
->num_members
- 1);
1776 return stripe
* chunk
+ block
% chunk
;
1783 static __u64
blocks_per_migr_unit(struct intel_super
*super
,
1784 struct imsm_dev
*dev
)
1786 /* calculate the conversion factor between per member 'blocks'
1787 * (md/{resync,rebuild}_start) and imsm migration units, return
1788 * 0 for the 'not migrating' and 'unsupported migration' cases
1790 if (!dev
->vol
.migr_state
)
1793 switch (migr_type(dev
)) {
1794 case MIGR_GEN_MIGR
: {
1795 struct migr_record
*migr_rec
= super
->migr_rec
;
1796 return __le32_to_cpu(migr_rec
->blocks_per_unit
);
1801 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1802 __u32 stripes_per_unit
;
1803 __u32 blocks_per_unit
;
1812 /* yes, this is really the translation of migr_units to
1813 * per-member blocks in the 'resync' case
1815 stripes_per_unit
= num_stripes_per_unit_resync(dev
);
1816 migr_chunk
= migr_strip_blocks_resync(dev
);
1817 disks
= imsm_num_data_members(dev
, 0);
1818 blocks_per_unit
= stripes_per_unit
* migr_chunk
* disks
;
1819 stripe
= __le32_to_cpu(map
->blocks_per_strip
) * disks
;
1820 segment
= blocks_per_unit
/ stripe
;
1821 block_rel
= blocks_per_unit
- segment
* stripe
;
1822 parity_depth
= parity_segment_depth(dev
);
1823 block_map
= map_migr_block(dev
, block_rel
);
1824 return block_map
+ parity_depth
* segment
;
1826 case MIGR_REBUILD
: {
1827 __u32 stripes_per_unit
;
1830 stripes_per_unit
= num_stripes_per_unit_rebuild(dev
);
1831 migr_chunk
= migr_strip_blocks_rebuild(dev
);
1832 return migr_chunk
* stripes_per_unit
;
1834 case MIGR_STATE_CHANGE
:
1840 static int imsm_level_to_layout(int level
)
1848 return ALGORITHM_LEFT_ASYMMETRIC
;
1855 /*******************************************************************************
1856 * Function: read_imsm_migr_rec
1857 * Description: Function reads imsm migration record from last sector of disk
1859 * fd : disk descriptor
1860 * super : metadata info
1864 ******************************************************************************/
1865 static int read_imsm_migr_rec(int fd
, struct intel_super
*super
)
1868 unsigned long long dsize
;
1870 get_dev_size(fd
, NULL
, &dsize
);
1871 if (lseek64(fd
, dsize
- 512, SEEK_SET
) < 0) {
1873 Name
": Cannot seek to anchor block: %s\n",
1877 if (read(fd
, super
->migr_rec_buf
, 512) != 512) {
1879 Name
": Cannot read migr record block: %s\n",
1889 /*******************************************************************************
1890 * Function: load_imsm_migr_rec
1891 * Description: Function reads imsm migration record (it is stored at the last
1894 * super : imsm internal array info
1895 * info : general array info
1899 ******************************************************************************/
1900 static int load_imsm_migr_rec(struct intel_super
*super
, struct mdinfo
*info
)
1903 struct dl
*dl
= NULL
;
1909 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
1910 /* read only from one of the first two slots */
1911 if ((sd
->disk
.raid_disk
> 1) ||
1912 (sd
->disk
.raid_disk
< 0))
1914 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
1915 fd
= dev_open(nm
, O_RDONLY
);
1921 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1922 /* read only from one of the first two slots */
1925 sprintf(nm
, "%d:%d", dl
->major
, dl
->minor
);
1926 fd
= dev_open(nm
, O_RDONLY
);
1933 retval
= read_imsm_migr_rec(fd
, super
);
1941 /*******************************************************************************
1942 * Function: write_imsm_migr_rec
1943 * Description: Function writes imsm migration record
1944 * (at the last sector of disk)
1946 * super : imsm internal array info
1950 ******************************************************************************/
1951 static int write_imsm_migr_rec(struct supertype
*st
)
1953 struct intel_super
*super
= st
->sb
;
1954 unsigned long long dsize
;
1960 for (sd
= super
->disks
; sd
; sd
= sd
->next
) {
1961 /* write to 2 first slots only */
1962 if ((sd
->index
< 0) || (sd
->index
> 1))
1964 sprintf(nm
, "%d:%d", sd
->major
, sd
->minor
);
1965 fd
= dev_open(nm
, O_RDWR
);
1968 get_dev_size(fd
, NULL
, &dsize
);
1969 if (lseek64(fd
, dsize
- 512, SEEK_SET
) < 0) {
1971 Name
": Cannot seek to anchor block: %s\n",
1975 if (write(fd
, super
->migr_rec_buf
, 512) != 512) {
1977 Name
": Cannot write migr record block: %s\n",
1992 static void getinfo_super_imsm_volume(struct supertype
*st
, struct mdinfo
*info
, char *dmap
)
1994 struct intel_super
*super
= st
->sb
;
1995 struct migr_record
*migr_rec
= super
->migr_rec
;
1996 struct imsm_dev
*dev
= get_imsm_dev(super
, super
->current_vol
);
1997 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1998 struct imsm_map
*prev_map
= get_imsm_map(dev
, 1);
1999 struct imsm_map
*map_to_analyse
= map
;
2002 unsigned int component_size_alligment
;
2003 int map_disks
= info
->array
.raid_disks
;
2005 memset(info
, 0, sizeof(*info
));
2007 map_to_analyse
= prev_map
;
2009 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
2010 if (dl
->raiddisk
== info
->disk
.raid_disk
)
2012 info
->container_member
= super
->current_vol
;
2013 info
->array
.raid_disks
= map
->num_members
;
2014 info
->array
.level
= get_imsm_raid_level(map_to_analyse
);
2015 info
->array
.layout
= imsm_level_to_layout(info
->array
.level
);
2016 info
->array
.md_minor
= -1;
2017 info
->array
.ctime
= 0;
2018 info
->array
.utime
= 0;
2019 info
->array
.chunk_size
=
2020 __le16_to_cpu(map_to_analyse
->blocks_per_strip
) << 9;
2021 info
->array
.state
= !dev
->vol
.dirty
;
2022 info
->custom_array_size
= __le32_to_cpu(dev
->size_high
);
2023 info
->custom_array_size
<<= 32;
2024 info
->custom_array_size
|= __le32_to_cpu(dev
->size_low
);
2025 if (prev_map
&& map
->map_state
== prev_map
->map_state
) {
2026 info
->reshape_active
= 1;
2027 info
->new_level
= get_imsm_raid_level(map
);
2028 info
->new_layout
= imsm_level_to_layout(info
->new_level
);
2029 info
->new_chunk
= __le16_to_cpu(map
->blocks_per_strip
) << 9;
2030 info
->delta_disks
= map
->num_members
- prev_map
->num_members
;
2031 if (info
->delta_disks
) {
2032 /* this needs to be applied to every array
2035 info
->reshape_active
= 2;
2037 /* We shape information that we give to md might have to be
2038 * modify to cope with md's requirement for reshaping arrays.
2039 * For example, when reshaping a RAID0, md requires it to be
2040 * presented as a degraded RAID4.
2041 * Also if a RAID0 is migrating to a RAID5 we need to specify
2042 * the array as already being RAID5, but the 'before' layout
2043 * is a RAID4-like layout.
2045 switch (info
->array
.level
) {
2047 switch(info
->new_level
) {
2049 /* conversion is happening as RAID4 */
2050 info
->array
.level
= 4;
2051 info
->array
.raid_disks
+= 1;
2054 /* conversion is happening as RAID5 */
2055 info
->array
.level
= 5;
2056 info
->array
.layout
= ALGORITHM_PARITY_N
;
2057 info
->array
.raid_disks
+= 1;
2058 info
->delta_disks
-= 1;
2061 /* FIXME error message */
2062 info
->array
.level
= UnSet
;
2068 info
->new_level
= UnSet
;
2069 info
->new_layout
= UnSet
;
2070 info
->new_chunk
= info
->array
.chunk_size
;
2071 info
->delta_disks
= 0;
2073 info
->disk
.major
= 0;
2074 info
->disk
.minor
= 0;
2076 info
->disk
.major
= dl
->major
;
2077 info
->disk
.minor
= dl
->minor
;
2080 info
->data_offset
= __le32_to_cpu(map_to_analyse
->pba_of_lba0
);
2081 info
->component_size
=
2082 __le32_to_cpu(map_to_analyse
->blocks_per_member
);
2084 /* check component size aligment
2086 component_size_alligment
=
2087 info
->component_size
% (info
->array
.chunk_size
/512);
2089 if (component_size_alligment
&&
2090 (info
->array
.level
!= 1) && (info
->array
.level
!= UnSet
)) {
2091 dprintf("imsm: reported component size alligned from %llu ",
2092 info
->component_size
);
2093 info
->component_size
-= component_size_alligment
;
2094 dprintf("to %llu (%i).\n",
2095 info
->component_size
, component_size_alligment
);
2098 memset(info
->uuid
, 0, sizeof(info
->uuid
));
2099 info
->recovery_start
= MaxSector
;
2101 info
->reshape_progress
= 0;
2102 info
->resync_start
= MaxSector
;
2103 if (map_to_analyse
->map_state
== IMSM_T_STATE_UNINITIALIZED
||
2105 info
->resync_start
= 0;
2107 if (dev
->vol
.migr_state
) {
2108 switch (migr_type(dev
)) {
2111 __u64 blocks_per_unit
= blocks_per_migr_unit(super
,
2113 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
2115 info
->resync_start
= blocks_per_unit
* units
;
2118 case MIGR_GEN_MIGR
: {
2119 __u64 blocks_per_unit
= blocks_per_migr_unit(super
,
2121 __u64 units
= __le32_to_cpu(migr_rec
->curr_migr_unit
);
2122 unsigned long long array_blocks
;
2125 info
->reshape_progress
= blocks_per_unit
* units
;
2127 dprintf("IMSM: General Migration checkpoint : %llu "
2128 "(%llu) -> read reshape progress : %llu\n",
2129 units
, blocks_per_unit
, info
->reshape_progress
);
2131 used_disks
= imsm_num_data_members(dev
, 1);
2132 if (used_disks
> 0) {
2133 array_blocks
= map
->blocks_per_member
*
2135 /* round array size down to closest MB
2137 info
->custom_array_size
= (array_blocks
2138 >> SECT_PER_MB_SHIFT
)
2139 << SECT_PER_MB_SHIFT
;
2143 /* we could emulate the checkpointing of
2144 * 'sync_action=check' migrations, but for now
2145 * we just immediately complete them
2148 /* this is handled by container_content_imsm() */
2149 case MIGR_STATE_CHANGE
:
2150 /* FIXME handle other migrations */
2152 /* we are not dirty, so... */
2153 info
->resync_start
= MaxSector
;
2157 strncpy(info
->name
, (char *) dev
->volume
, MAX_RAID_SERIAL_LEN
);
2158 info
->name
[MAX_RAID_SERIAL_LEN
] = 0;
2160 info
->array
.major_version
= -1;
2161 info
->array
.minor_version
= -2;
2162 devname
= devnum2devname(st
->container_dev
);
2163 *info
->text_version
= '\0';
2165 sprintf(info
->text_version
, "/%s/%d", devname
, info
->container_member
);
2167 info
->safe_mode_delay
= 4000; /* 4 secs like the Matrix driver */
2168 uuid_from_super_imsm(st
, info
->uuid
);
2172 for (i
=0; i
<map_disks
; i
++) {
2174 if (i
< info
->array
.raid_disks
) {
2175 struct imsm_disk
*dsk
;
2176 j
= get_imsm_disk_idx(dev
, i
, -1);
2177 dsk
= get_imsm_disk(super
, j
);
2178 if (dsk
&& (dsk
->status
& CONFIGURED_DISK
))
2185 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
);
2186 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
);
2188 static struct imsm_disk
*get_imsm_missing(struct intel_super
*super
, __u8 index
)
2192 for (d
= super
->missing
; d
; d
= d
->next
)
2193 if (d
->index
== index
)
2198 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
)
2200 struct intel_super
*super
= st
->sb
;
2201 struct imsm_disk
*disk
;
2202 int map_disks
= info
->array
.raid_disks
;
2203 int max_enough
= -1;
2205 struct imsm_super
*mpb
;
2207 if (super
->current_vol
>= 0) {
2208 getinfo_super_imsm_volume(st
, info
, map
);
2211 memset(info
, 0, sizeof(*info
));
2213 /* Set raid_disks to zero so that Assemble will always pull in valid
2216 info
->array
.raid_disks
= 0;
2217 info
->array
.level
= LEVEL_CONTAINER
;
2218 info
->array
.layout
= 0;
2219 info
->array
.md_minor
= -1;
2220 info
->array
.ctime
= 0; /* N/A for imsm */
2221 info
->array
.utime
= 0;
2222 info
->array
.chunk_size
= 0;
2224 info
->disk
.major
= 0;
2225 info
->disk
.minor
= 0;
2226 info
->disk
.raid_disk
= -1;
2227 info
->reshape_active
= 0;
2228 info
->array
.major_version
= -1;
2229 info
->array
.minor_version
= -2;
2230 strcpy(info
->text_version
, "imsm");
2231 info
->safe_mode_delay
= 0;
2232 info
->disk
.number
= -1;
2233 info
->disk
.state
= 0;
2235 info
->recovery_start
= MaxSector
;
2237 /* do we have the all the insync disks that we expect? */
2238 mpb
= super
->anchor
;
2240 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
2241 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
2242 int failed
, enough
, j
, missing
= 0;
2243 struct imsm_map
*map
;
2246 failed
= imsm_count_failed(super
, dev
);
2247 state
= imsm_check_degraded(super
, dev
, failed
);
2248 map
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2250 /* any newly missing disks?
2251 * (catches single-degraded vs double-degraded)
2253 for (j
= 0; j
< map
->num_members
; j
++) {
2254 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
2255 __u32 idx
= ord_to_idx(ord
);
2257 if (!(ord
& IMSM_ORD_REBUILD
) &&
2258 get_imsm_missing(super
, idx
)) {
2264 if (state
== IMSM_T_STATE_FAILED
)
2266 else if (state
== IMSM_T_STATE_DEGRADED
&&
2267 (state
!= map
->map_state
|| missing
))
2269 else /* we're normal, or already degraded */
2272 /* in the missing/failed disk case check to see
2273 * if at least one array is runnable
2275 max_enough
= max(max_enough
, enough
);
2277 dprintf("%s: enough: %d\n", __func__
, max_enough
);
2278 info
->container_enough
= max_enough
;
2281 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
2283 disk
= &super
->disks
->disk
;
2284 info
->data_offset
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
2285 info
->component_size
= reserved
;
2286 info
->disk
.state
= is_configured(disk
) ? (1 << MD_DISK_ACTIVE
) : 0;
2287 /* we don't change info->disk.raid_disk here because
2288 * this state will be finalized in mdmon after we have
2289 * found the 'most fresh' version of the metadata
2291 info
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2292 info
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2295 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2296 * ->compare_super may have updated the 'num_raid_devs' field for spares
2298 if (info
->disk
.state
& (1 << MD_DISK_SYNC
) || super
->anchor
->num_raid_devs
)
2299 uuid_from_super_imsm(st
, info
->uuid
);
2301 memcpy(info
->uuid
, uuid_zero
, sizeof(uuid_zero
));
2303 /* I don't know how to compute 'map' on imsm, so use safe default */
2306 for (i
= 0; i
< map_disks
; i
++)
2312 /* allocates memory and fills disk in mdinfo structure
2313 * for each disk in array */
2314 struct mdinfo
*getinfo_super_disks_imsm(struct supertype
*st
)
2316 struct mdinfo
*mddev
= NULL
;
2317 struct intel_super
*super
= st
->sb
;
2318 struct imsm_disk
*disk
;
2321 if (!super
|| !super
->disks
)
2324 mddev
= malloc(sizeof(*mddev
));
2326 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2329 memset(mddev
, 0, sizeof(*mddev
));
2333 tmp
= malloc(sizeof(*tmp
));
2335 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2340 memset(tmp
, 0, sizeof(*tmp
));
2342 tmp
->next
= mddev
->devs
;
2344 tmp
->disk
.number
= count
++;
2345 tmp
->disk
.major
= dl
->major
;
2346 tmp
->disk
.minor
= dl
->minor
;
2347 tmp
->disk
.state
= is_configured(disk
) ?
2348 (1 << MD_DISK_ACTIVE
) : 0;
2349 tmp
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2350 tmp
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2351 tmp
->disk
.raid_disk
= -1;
2357 static int update_super_imsm(struct supertype
*st
, struct mdinfo
*info
,
2358 char *update
, char *devname
, int verbose
,
2359 int uuid_set
, char *homehost
)
2361 /* For 'assemble' and 'force' we need to return non-zero if any
2362 * change was made. For others, the return value is ignored.
2363 * Update options are:
2364 * force-one : This device looks a bit old but needs to be included,
2365 * update age info appropriately.
2366 * assemble: clear any 'faulty' flag to allow this device to
2368 * force-array: Array is degraded but being forced, mark it clean
2369 * if that will be needed to assemble it.
2371 * newdev: not used ????
2372 * grow: Array has gained a new device - this is currently for
2374 * resync: mark as dirty so a resync will happen.
2375 * name: update the name - preserving the homehost
2376 * uuid: Change the uuid of the array to match watch is given
2378 * Following are not relevant for this imsm:
2379 * sparc2.2 : update from old dodgey metadata
2380 * super-minor: change the preferred_minor number
2381 * summaries: update redundant counters.
2382 * homehost: update the recorded homehost
2383 * _reshape_progress: record new reshape_progress position.
2386 struct intel_super
*super
= st
->sb
;
2387 struct imsm_super
*mpb
;
2389 /* we can only update container info */
2390 if (!super
|| super
->current_vol
>= 0 || !super
->anchor
)
2393 mpb
= super
->anchor
;
2395 if (strcmp(update
, "uuid") == 0 && uuid_set
&& !info
->update_private
)
2397 else if (strcmp(update
, "uuid") == 0 && uuid_set
&& info
->update_private
) {
2398 mpb
->orig_family_num
= *((__u32
*) info
->update_private
);
2400 } else if (strcmp(update
, "uuid") == 0) {
2401 __u32
*new_family
= malloc(sizeof(*new_family
));
2403 /* update orig_family_number with the incoming random
2404 * data, report the new effective uuid, and store the
2405 * new orig_family_num for future updates.
2408 memcpy(&mpb
->orig_family_num
, info
->uuid
, sizeof(__u32
));
2409 uuid_from_super_imsm(st
, info
->uuid
);
2410 *new_family
= mpb
->orig_family_num
;
2411 info
->update_private
= new_family
;
2414 } else if (strcmp(update
, "assemble") == 0)
2419 /* successful update? recompute checksum */
2421 mpb
->check_sum
= __le32_to_cpu(__gen_imsm_checksum(mpb
));
2426 static size_t disks_to_mpb_size(int disks
)
2430 size
= sizeof(struct imsm_super
);
2431 size
+= (disks
- 1) * sizeof(struct imsm_disk
);
2432 size
+= 2 * sizeof(struct imsm_dev
);
2433 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2434 size
+= (4 - 2) * sizeof(struct imsm_map
);
2435 /* 4 possible disk_ord_tbl's */
2436 size
+= 4 * (disks
- 1) * sizeof(__u32
);
2441 static __u64
avail_size_imsm(struct supertype
*st
, __u64 devsize
)
2443 if (devsize
< (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
))
2446 return devsize
- (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
);
2449 static void free_devlist(struct intel_super
*super
)
2451 struct intel_dev
*dv
;
2453 while (super
->devlist
) {
2454 dv
= super
->devlist
->next
;
2455 free(super
->devlist
->dev
);
2456 free(super
->devlist
);
2457 super
->devlist
= dv
;
2461 static void imsm_copy_dev(struct imsm_dev
*dest
, struct imsm_dev
*src
)
2463 memcpy(dest
, src
, sizeof_imsm_dev(src
, 0));
2466 static int compare_super_imsm(struct supertype
*st
, struct supertype
*tst
)
2470 * 0 same, or first was empty, and second was copied
2471 * 1 second had wrong number
2473 * 3 wrong other info
2475 struct intel_super
*first
= st
->sb
;
2476 struct intel_super
*sec
= tst
->sb
;
2483 /* in platform dependent environment test if the disks
2484 * use the same Intel hba
2486 if (!check_env("IMSM_NO_PLATFORM")) {
2487 if (!first
->hba
|| !sec
->hba
||
2488 (first
->hba
->type
!= sec
->hba
->type
)) {
2490 "HBAs of devices does not match %s != %s\n",
2491 first
->hba
? get_sys_dev_type(first
->hba
->type
) : NULL
,
2492 sec
->hba
? get_sys_dev_type(sec
->hba
->type
) : NULL
);
2497 /* if an anchor does not have num_raid_devs set then it is a free
2500 if (first
->anchor
->num_raid_devs
> 0 &&
2501 sec
->anchor
->num_raid_devs
> 0) {
2502 /* Determine if these disks might ever have been
2503 * related. Further disambiguation can only take place
2504 * in load_super_imsm_all
2506 __u32 first_family
= first
->anchor
->orig_family_num
;
2507 __u32 sec_family
= sec
->anchor
->orig_family_num
;
2509 if (memcmp(first
->anchor
->sig
, sec
->anchor
->sig
,
2510 MAX_SIGNATURE_LENGTH
) != 0)
2513 if (first_family
== 0)
2514 first_family
= first
->anchor
->family_num
;
2515 if (sec_family
== 0)
2516 sec_family
= sec
->anchor
->family_num
;
2518 if (first_family
!= sec_family
)
2524 /* if 'first' is a spare promote it to a populated mpb with sec's
2527 if (first
->anchor
->num_raid_devs
== 0 &&
2528 sec
->anchor
->num_raid_devs
> 0) {
2530 struct intel_dev
*dv
;
2531 struct imsm_dev
*dev
;
2533 /* we need to copy raid device info from sec if an allocation
2534 * fails here we don't associate the spare
2536 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++) {
2537 dv
= malloc(sizeof(*dv
));
2540 dev
= malloc(sizeof_imsm_dev(get_imsm_dev(sec
, i
), 1));
2547 dv
->next
= first
->devlist
;
2548 first
->devlist
= dv
;
2550 if (i
< sec
->anchor
->num_raid_devs
) {
2551 /* allocation failure */
2552 free_devlist(first
);
2553 fprintf(stderr
, "imsm: failed to associate spare\n");
2556 first
->anchor
->num_raid_devs
= sec
->anchor
->num_raid_devs
;
2557 first
->anchor
->orig_family_num
= sec
->anchor
->orig_family_num
;
2558 first
->anchor
->family_num
= sec
->anchor
->family_num
;
2559 memcpy(first
->anchor
->sig
, sec
->anchor
->sig
, MAX_SIGNATURE_LENGTH
);
2560 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++)
2561 imsm_copy_dev(get_imsm_dev(first
, i
), get_imsm_dev(sec
, i
));
2567 static void fd2devname(int fd
, char *name
)
2571 char dname
[PATH_MAX
];
2576 if (fstat(fd
, &st
) != 0)
2578 sprintf(path
, "/sys/dev/block/%d:%d",
2579 major(st
.st_rdev
), minor(st
.st_rdev
));
2581 rv
= readlink(path
, dname
, sizeof(dname
));
2586 nm
= strrchr(dname
, '/');
2588 snprintf(name
, MAX_RAID_SERIAL_LEN
, "/dev/%s", nm
);
2591 extern int scsi_get_serial(int fd
, void *buf
, size_t buf_len
);
2593 static int imsm_read_serial(int fd
, char *devname
,
2594 __u8 serial
[MAX_RAID_SERIAL_LEN
])
2596 unsigned char scsi_serial
[255];
2605 memset(scsi_serial
, 0, sizeof(scsi_serial
));
2607 rv
= scsi_get_serial(fd
, scsi_serial
, sizeof(scsi_serial
));
2609 if (rv
&& check_env("IMSM_DEVNAME_AS_SERIAL")) {
2610 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2611 fd2devname(fd
, (char *) serial
);
2618 Name
": Failed to retrieve serial for %s\n",
2623 rsp_len
= scsi_serial
[3];
2627 Name
": Failed to retrieve serial for %s\n",
2631 rsp_buf
= (char *) &scsi_serial
[4];
2633 /* trim all whitespace and non-printable characters and convert
2636 for (i
= 0, dest
= rsp_buf
; i
< rsp_len
; i
++) {
2639 /* ':' is reserved for use in placeholder serial
2640 * numbers for missing disks
2648 len
= dest
- rsp_buf
;
2651 /* truncate leading characters */
2652 if (len
> MAX_RAID_SERIAL_LEN
) {
2653 dest
+= len
- MAX_RAID_SERIAL_LEN
;
2654 len
= MAX_RAID_SERIAL_LEN
;
2657 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2658 memcpy(serial
, dest
, len
);
2663 static int serialcmp(__u8
*s1
, __u8
*s2
)
2665 return strncmp((char *) s1
, (char *) s2
, MAX_RAID_SERIAL_LEN
);
2668 static void serialcpy(__u8
*dest
, __u8
*src
)
2670 strncpy((char *) dest
, (char *) src
, MAX_RAID_SERIAL_LEN
);
2674 static struct dl
*serial_to_dl(__u8
*serial
, struct intel_super
*super
)
2678 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
2679 if (serialcmp(dl
->serial
, serial
) == 0)
2686 static struct imsm_disk
*
2687 __serial_to_disk(__u8
*serial
, struct imsm_super
*mpb
, int *idx
)
2691 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2692 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
2694 if (serialcmp(disk
->serial
, serial
) == 0) {
2705 load_imsm_disk(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2707 struct imsm_disk
*disk
;
2712 __u8 serial
[MAX_RAID_SERIAL_LEN
];
2714 rv
= imsm_read_serial(fd
, devname
, serial
);
2719 dl
= calloc(1, sizeof(*dl
));
2723 Name
": failed to allocate disk buffer for %s\n",
2729 dl
->major
= major(stb
.st_rdev
);
2730 dl
->minor
= minor(stb
.st_rdev
);
2731 dl
->next
= super
->disks
;
2732 dl
->fd
= keep_fd
? fd
: -1;
2733 assert(super
->disks
== NULL
);
2735 serialcpy(dl
->serial
, serial
);
2738 fd2devname(fd
, name
);
2740 dl
->devname
= strdup(devname
);
2742 dl
->devname
= strdup(name
);
2744 /* look up this disk's index in the current anchor */
2745 disk
= __serial_to_disk(dl
->serial
, super
->anchor
, &dl
->index
);
2748 /* only set index on disks that are a member of a
2749 * populated contianer, i.e. one with raid_devs
2751 if (is_failed(&dl
->disk
))
2753 else if (is_spare(&dl
->disk
))
2761 /* When migrating map0 contains the 'destination' state while map1
2762 * contains the current state. When not migrating map0 contains the
2763 * current state. This routine assumes that map[0].map_state is set to
2764 * the current array state before being called.
2766 * Migration is indicated by one of the following states
2767 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2768 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2769 * map1state=unitialized)
2770 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2772 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2773 * map1state=degraded)
2774 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2777 static void migrate(struct imsm_dev
*dev
, struct intel_super
*super
,
2778 __u8 to_state
, int migr_type
)
2780 struct imsm_map
*dest
;
2781 struct imsm_map
*src
= get_imsm_map(dev
, 0);
2783 dev
->vol
.migr_state
= 1;
2784 set_migr_type(dev
, migr_type
);
2785 dev
->vol
.curr_migr_unit
= 0;
2786 dest
= get_imsm_map(dev
, 1);
2788 /* duplicate and then set the target end state in map[0] */
2789 memcpy(dest
, src
, sizeof_imsm_map(src
));
2790 if ((migr_type
== MIGR_REBUILD
) ||
2791 (migr_type
== MIGR_GEN_MIGR
)) {
2795 for (i
= 0; i
< src
->num_members
; i
++) {
2796 ord
= __le32_to_cpu(src
->disk_ord_tbl
[i
]);
2797 set_imsm_ord_tbl_ent(src
, i
, ord_to_idx(ord
));
2801 if (migr_type
== MIGR_GEN_MIGR
)
2802 /* Clear migration record */
2803 memset(super
->migr_rec
, 0, sizeof(struct migr_record
));
2805 src
->map_state
= to_state
;
2808 static void end_migration(struct imsm_dev
*dev
, __u8 map_state
)
2810 struct imsm_map
*map
= get_imsm_map(dev
, 0);
2811 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2814 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2815 * completed in the last migration.
2817 * FIXME add support for raid-level-migration
2819 for (i
= 0; i
< prev
->num_members
; i
++)
2820 for (j
= 0; j
< map
->num_members
; j
++)
2821 /* during online capacity expansion
2822 * disks position can be changed if takeover is used
2824 if (ord_to_idx(map
->disk_ord_tbl
[j
]) ==
2825 ord_to_idx(prev
->disk_ord_tbl
[i
])) {
2826 map
->disk_ord_tbl
[j
] |= prev
->disk_ord_tbl
[i
];
2830 dev
->vol
.migr_state
= 0;
2831 dev
->vol
.migr_type
= 0;
2832 dev
->vol
.curr_migr_unit
= 0;
2833 map
->map_state
= map_state
;
2837 static int parse_raid_devices(struct intel_super
*super
)
2840 struct imsm_dev
*dev_new
;
2841 size_t len
, len_migr
;
2843 size_t space_needed
= 0;
2844 struct imsm_super
*mpb
= super
->anchor
;
2846 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2847 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
2848 struct intel_dev
*dv
;
2850 len
= sizeof_imsm_dev(dev_iter
, 0);
2851 len_migr
= sizeof_imsm_dev(dev_iter
, 1);
2853 space_needed
+= len_migr
- len
;
2855 dv
= malloc(sizeof(*dv
));
2858 if (max_len
< len_migr
)
2860 if (max_len
> len_migr
)
2861 space_needed
+= max_len
- len_migr
;
2862 dev_new
= malloc(max_len
);
2867 imsm_copy_dev(dev_new
, dev_iter
);
2870 dv
->next
= super
->devlist
;
2871 super
->devlist
= dv
;
2874 /* ensure that super->buf is large enough when all raid devices
2877 if (__le32_to_cpu(mpb
->mpb_size
) + space_needed
> super
->len
) {
2880 len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + space_needed
, 512);
2881 if (posix_memalign(&buf
, 512, len
) != 0)
2884 memcpy(buf
, super
->buf
, super
->len
);
2885 memset(buf
+ super
->len
, 0, len
- super
->len
);
2894 /* retrieve a pointer to the bbm log which starts after all raid devices */
2895 struct bbm_log
*__get_imsm_bbm_log(struct imsm_super
*mpb
)
2899 if (__le32_to_cpu(mpb
->bbm_log_size
)) {
2901 ptr
+= mpb
->mpb_size
- __le32_to_cpu(mpb
->bbm_log_size
);
2907 /*******************************************************************************
2908 * Function: check_mpb_migr_compatibility
2909 * Description: Function checks for unsupported migration features:
2910 * - migration optimization area (pba_of_lba0)
2911 * - descending reshape (ascending_migr)
2913 * super : imsm metadata information
2915 * 0 : migration is compatible
2916 * -1 : migration is not compatible
2917 ******************************************************************************/
2918 int check_mpb_migr_compatibility(struct intel_super
*super
)
2920 struct imsm_map
*map0
, *map1
;
2921 struct migr_record
*migr_rec
= super
->migr_rec
;
2924 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2925 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
2928 dev_iter
->vol
.migr_state
== 1 &&
2929 dev_iter
->vol
.migr_type
== MIGR_GEN_MIGR
) {
2930 /* This device is migrating */
2931 map0
= get_imsm_map(dev_iter
, 0);
2932 map1
= get_imsm_map(dev_iter
, 1);
2933 if (map0
->pba_of_lba0
!= map1
->pba_of_lba0
)
2934 /* migration optimization area was used */
2936 if (migr_rec
->ascending_migr
== 0
2937 && migr_rec
->dest_depth_per_unit
> 0)
2938 /* descending reshape not supported yet */
2945 static void __free_imsm(struct intel_super
*super
, int free_disks
);
2947 /* load_imsm_mpb - read matrix metadata
2948 * allocates super->mpb to be freed by free_imsm
2950 static int load_imsm_mpb(int fd
, struct intel_super
*super
, char *devname
)
2952 unsigned long long dsize
;
2953 unsigned long long sectors
;
2955 struct imsm_super
*anchor
;
2958 get_dev_size(fd
, NULL
, &dsize
);
2962 Name
": %s: device to small for imsm\n",
2967 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0) {
2970 Name
": Cannot seek to anchor block on %s: %s\n",
2971 devname
, strerror(errno
));
2975 if (posix_memalign((void**)&anchor
, 512, 512) != 0) {
2978 Name
": Failed to allocate imsm anchor buffer"
2979 " on %s\n", devname
);
2982 if (read(fd
, anchor
, 512) != 512) {
2985 Name
": Cannot read anchor block on %s: %s\n",
2986 devname
, strerror(errno
));
2991 if (strncmp((char *) anchor
->sig
, MPB_SIGNATURE
, MPB_SIG_LEN
) != 0) {
2994 Name
": no IMSM anchor on %s\n", devname
);
2999 __free_imsm(super
, 0);
3000 /* reload capability and hba */
3002 /* capability and hba must be updated with new super allocation */
3003 find_intel_hba_capability(fd
, super
, devname
);
3004 super
->len
= ROUND_UP(anchor
->mpb_size
, 512);
3005 if (posix_memalign(&super
->buf
, 512, super
->len
) != 0) {
3008 Name
": unable to allocate %zu byte mpb buffer\n",
3013 memcpy(super
->buf
, anchor
, 512);
3015 sectors
= mpb_sectors(anchor
) - 1;
3018 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
3019 fprintf(stderr
, Name
3020 ": %s could not allocate migr_rec buffer\n", __func__
);
3026 check_sum
= __gen_imsm_checksum(super
->anchor
);
3027 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
3030 Name
": IMSM checksum %x != %x on %s\n",
3032 __le32_to_cpu(super
->anchor
->check_sum
),
3040 /* read the extended mpb */
3041 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0) {
3044 Name
": Cannot seek to extended mpb on %s: %s\n",
3045 devname
, strerror(errno
));
3049 if ((unsigned)read(fd
, super
->buf
+ 512, super
->len
- 512) != super
->len
- 512) {
3052 Name
": Cannot read extended mpb on %s: %s\n",
3053 devname
, strerror(errno
));
3057 check_sum
= __gen_imsm_checksum(super
->anchor
);
3058 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
3061 Name
": IMSM checksum %x != %x on %s\n",
3062 check_sum
, __le32_to_cpu(super
->anchor
->check_sum
),
3067 /* FIXME the BBM log is disk specific so we cannot use this global
3068 * buffer for all disks. Ok for now since we only look at the global
3069 * bbm_log_size parameter to gate assembly
3071 super
->bbm_log
= __get_imsm_bbm_log(super
->anchor
);
3076 static int read_imsm_migr_rec(int fd
, struct intel_super
*super
);
3079 load_and_parse_mpb(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
3083 err
= load_imsm_mpb(fd
, super
, devname
);
3086 err
= load_imsm_disk(fd
, super
, devname
, keep_fd
);
3089 err
= parse_raid_devices(super
);
3094 static void __free_imsm_disk(struct dl
*d
)
3106 static void free_imsm_disks(struct intel_super
*super
)
3110 while (super
->disks
) {
3112 super
->disks
= d
->next
;
3113 __free_imsm_disk(d
);
3115 while (super
->disk_mgmt_list
) {
3116 d
= super
->disk_mgmt_list
;
3117 super
->disk_mgmt_list
= d
->next
;
3118 __free_imsm_disk(d
);
3120 while (super
->missing
) {
3122 super
->missing
= d
->next
;
3123 __free_imsm_disk(d
);
3128 /* free all the pieces hanging off of a super pointer */
3129 static void __free_imsm(struct intel_super
*super
, int free_disks
)
3131 struct intel_hba
*elem
, *next
;
3137 /* unlink capability description */
3139 if (super
->migr_rec_buf
) {
3140 free(super
->migr_rec_buf
);
3141 super
->migr_rec_buf
= NULL
;
3144 free_imsm_disks(super
);
3145 free_devlist(super
);
3149 free((void *)elem
->path
);
3157 static void free_imsm(struct intel_super
*super
)
3159 __free_imsm(super
, 1);
3163 static void free_super_imsm(struct supertype
*st
)
3165 struct intel_super
*super
= st
->sb
;
3174 static struct intel_super
*alloc_super(void)
3176 struct intel_super
*super
= malloc(sizeof(*super
));
3179 memset(super
, 0, sizeof(*super
));
3180 super
->current_vol
= -1;
3181 super
->create_offset
= ~((__u32
) 0);
3187 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3189 static int find_intel_hba_capability(int fd
, struct intel_super
*super
, char *devname
)
3191 struct sys_dev
*hba_name
;
3194 if ((fd
< 0) || check_env("IMSM_NO_PLATFORM")) {
3199 hba_name
= find_disk_attached_hba(fd
, NULL
);
3203 Name
": %s is not attached to Intel(R) RAID controller.\n",
3207 rv
= attach_hba_to_super(super
, hba_name
);
3210 struct intel_hba
*hba
= super
->hba
;
3212 fprintf(stderr
, Name
": %s is attached to Intel(R) %s RAID "
3213 "controller (%s),\n"
3214 " but the container is assigned to Intel(R) "
3215 "%s RAID controller (",
3218 hba_name
->pci_id
? : "Err!",
3219 get_sys_dev_type(hba_name
->type
));
3222 fprintf(stderr
, "%s", hba
->pci_id
? : "Err!");
3224 fprintf(stderr
, ", ");
3228 fprintf(stderr
, ").\n"
3229 " Mixing devices attached to different controllers "
3230 "is not allowed.\n");
3232 free_sys_dev(&hba_name
);
3235 super
->orom
= find_imsm_capability(hba_name
->type
);
3236 free_sys_dev(&hba_name
);
3243 /* find_missing - helper routine for load_super_imsm_all that identifies
3244 * disks that have disappeared from the system. This routine relies on
3245 * the mpb being uptodate, which it is at load time.
3247 static int find_missing(struct intel_super
*super
)
3250 struct imsm_super
*mpb
= super
->anchor
;
3252 struct imsm_disk
*disk
;
3254 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3255 disk
= __get_imsm_disk(mpb
, i
);
3256 dl
= serial_to_dl(disk
->serial
, super
);
3260 dl
= malloc(sizeof(*dl
));
3266 dl
->devname
= strdup("missing");
3268 serialcpy(dl
->serial
, disk
->serial
);
3271 dl
->next
= super
->missing
;
3272 super
->missing
= dl
;
3278 static struct intel_disk
*disk_list_get(__u8
*serial
, struct intel_disk
*disk_list
)
3280 struct intel_disk
*idisk
= disk_list
;
3283 if (serialcmp(idisk
->disk
.serial
, serial
) == 0)
3285 idisk
= idisk
->next
;
3291 static int __prep_thunderdome(struct intel_super
**table
, int tbl_size
,
3292 struct intel_super
*super
,
3293 struct intel_disk
**disk_list
)
3295 struct imsm_disk
*d
= &super
->disks
->disk
;
3296 struct imsm_super
*mpb
= super
->anchor
;
3299 for (i
= 0; i
< tbl_size
; i
++) {
3300 struct imsm_super
*tbl_mpb
= table
[i
]->anchor
;
3301 struct imsm_disk
*tbl_d
= &table
[i
]->disks
->disk
;
3303 if (tbl_mpb
->family_num
== mpb
->family_num
) {
3304 if (tbl_mpb
->check_sum
== mpb
->check_sum
) {
3305 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3306 __func__
, super
->disks
->major
,
3307 super
->disks
->minor
,
3308 table
[i
]->disks
->major
,
3309 table
[i
]->disks
->minor
);
3313 if (((is_configured(d
) && !is_configured(tbl_d
)) ||
3314 is_configured(d
) == is_configured(tbl_d
)) &&
3315 tbl_mpb
->generation_num
< mpb
->generation_num
) {
3316 /* current version of the mpb is a
3317 * better candidate than the one in
3318 * super_table, but copy over "cross
3319 * generational" status
3321 struct intel_disk
*idisk
;
3323 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3324 __func__
, super
->disks
->major
,
3325 super
->disks
->minor
,
3326 table
[i
]->disks
->major
,
3327 table
[i
]->disks
->minor
);
3329 idisk
= disk_list_get(tbl_d
->serial
, *disk_list
);
3330 if (idisk
&& is_failed(&idisk
->disk
))
3331 tbl_d
->status
|= FAILED_DISK
;
3334 struct intel_disk
*idisk
;
3335 struct imsm_disk
*disk
;
3337 /* tbl_mpb is more up to date, but copy
3338 * over cross generational status before
3341 disk
= __serial_to_disk(d
->serial
, mpb
, NULL
);
3342 if (disk
&& is_failed(disk
))
3343 d
->status
|= FAILED_DISK
;
3345 idisk
= disk_list_get(d
->serial
, *disk_list
);
3348 if (disk
&& is_configured(disk
))
3349 idisk
->disk
.status
|= CONFIGURED_DISK
;
3352 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3353 __func__
, super
->disks
->major
,
3354 super
->disks
->minor
,
3355 table
[i
]->disks
->major
,
3356 table
[i
]->disks
->minor
);
3364 table
[tbl_size
++] = super
;
3368 /* update/extend the merged list of imsm_disk records */
3369 for (j
= 0; j
< mpb
->num_disks
; j
++) {
3370 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, j
);
3371 struct intel_disk
*idisk
;
3373 idisk
= disk_list_get(disk
->serial
, *disk_list
);
3375 idisk
->disk
.status
|= disk
->status
;
3376 if (is_configured(&idisk
->disk
) ||
3377 is_failed(&idisk
->disk
))
3378 idisk
->disk
.status
&= ~(SPARE_DISK
);
3380 idisk
= calloc(1, sizeof(*idisk
));
3383 idisk
->owner
= IMSM_UNKNOWN_OWNER
;
3384 idisk
->disk
= *disk
;
3385 idisk
->next
= *disk_list
;
3389 if (serialcmp(idisk
->disk
.serial
, d
->serial
) == 0)
3396 static struct intel_super
*
3397 validate_members(struct intel_super
*super
, struct intel_disk
*disk_list
,
3400 struct imsm_super
*mpb
= super
->anchor
;
3404 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3405 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
3406 struct intel_disk
*idisk
;
3408 idisk
= disk_list_get(disk
->serial
, disk_list
);
3410 if (idisk
->owner
== owner
||
3411 idisk
->owner
== IMSM_UNKNOWN_OWNER
)
3414 dprintf("%s: '%.16s' owner %d != %d\n",
3415 __func__
, disk
->serial
, idisk
->owner
,
3418 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3419 __func__
, __le32_to_cpu(mpb
->family_num
), i
,
3425 if (ok_count
== mpb
->num_disks
)
3430 static void show_conflicts(__u32 family_num
, struct intel_super
*super_list
)
3432 struct intel_super
*s
;
3434 for (s
= super_list
; s
; s
= s
->next
) {
3435 if (family_num
!= s
->anchor
->family_num
)
3437 fprintf(stderr
, "Conflict, offlining family %#x on '%s'\n",
3438 __le32_to_cpu(family_num
), s
->disks
->devname
);
3442 static struct intel_super
*
3443 imsm_thunderdome(struct intel_super
**super_list
, int len
)
3445 struct intel_super
*super_table
[len
];
3446 struct intel_disk
*disk_list
= NULL
;
3447 struct intel_super
*champion
, *spare
;
3448 struct intel_super
*s
, **del
;
3453 memset(super_table
, 0, sizeof(super_table
));
3454 for (s
= *super_list
; s
; s
= s
->next
)
3455 tbl_size
= __prep_thunderdome(super_table
, tbl_size
, s
, &disk_list
);
3457 for (i
= 0; i
< tbl_size
; i
++) {
3458 struct imsm_disk
*d
;
3459 struct intel_disk
*idisk
;
3460 struct imsm_super
*mpb
= super_table
[i
]->anchor
;
3463 d
= &s
->disks
->disk
;
3465 /* 'd' must appear in merged disk list for its
3466 * configuration to be valid
3468 idisk
= disk_list_get(d
->serial
, disk_list
);
3469 if (idisk
&& idisk
->owner
== i
)
3470 s
= validate_members(s
, disk_list
, i
);
3475 dprintf("%s: marking family: %#x from %d:%d offline\n",
3476 __func__
, mpb
->family_num
,
3477 super_table
[i
]->disks
->major
,
3478 super_table
[i
]->disks
->minor
);
3482 /* This is where the mdadm implementation differs from the Windows
3483 * driver which has no strict concept of a container. We can only
3484 * assemble one family from a container, so when returning a prodigal
3485 * array member to this system the code will not be able to disambiguate
3486 * the container contents that should be assembled ("foreign" versus
3487 * "local"). It requires user intervention to set the orig_family_num
3488 * to a new value to establish a new container. The Windows driver in
3489 * this situation fixes up the volume name in place and manages the
3490 * foreign array as an independent entity.
3495 for (i
= 0; i
< tbl_size
; i
++) {
3496 struct intel_super
*tbl_ent
= super_table
[i
];
3502 if (tbl_ent
->anchor
->num_raid_devs
== 0) {
3507 if (s
&& !is_spare
) {
3508 show_conflicts(tbl_ent
->anchor
->family_num
, *super_list
);
3510 } else if (!s
&& !is_spare
)
3523 fprintf(stderr
, "Chose family %#x on '%s', "
3524 "assemble conflicts to new container with '--update=uuid'\n",
3525 __le32_to_cpu(s
->anchor
->family_num
), s
->disks
->devname
);
3527 /* collect all dl's onto 'champion', and update them to
3528 * champion's version of the status
3530 for (s
= *super_list
; s
; s
= s
->next
) {
3531 struct imsm_super
*mpb
= champion
->anchor
;
3532 struct dl
*dl
= s
->disks
;
3537 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3538 struct imsm_disk
*disk
;
3540 disk
= __serial_to_disk(dl
->serial
, mpb
, &dl
->index
);
3543 /* only set index on disks that are a member of
3544 * a populated contianer, i.e. one with
3547 if (is_failed(&dl
->disk
))
3549 else if (is_spare(&dl
->disk
))
3555 if (i
>= mpb
->num_disks
) {
3556 struct intel_disk
*idisk
;
3558 idisk
= disk_list_get(dl
->serial
, disk_list
);
3559 if (idisk
&& is_spare(&idisk
->disk
) &&
3560 !is_failed(&idisk
->disk
) && !is_configured(&idisk
->disk
))
3568 dl
->next
= champion
->disks
;
3569 champion
->disks
= dl
;
3573 /* delete 'champion' from super_list */
3574 for (del
= super_list
; *del
; ) {
3575 if (*del
== champion
) {
3576 *del
= (*del
)->next
;
3579 del
= &(*del
)->next
;
3581 champion
->next
= NULL
;
3585 struct intel_disk
*idisk
= disk_list
;
3587 disk_list
= disk_list
->next
;
3594 static int load_super_imsm_all(struct supertype
*st
, int fd
, void **sbp
,
3598 struct intel_super
*super_list
= NULL
;
3599 struct intel_super
*super
= NULL
;
3600 int devnum
= fd2devnum(fd
);
3606 /* check if 'fd' an opened container */
3607 sra
= sysfs_read(fd
, 0, GET_LEVEL
|GET_VERSION
|GET_DEVS
|GET_STATE
);
3611 if (sra
->array
.major_version
!= -1 ||
3612 sra
->array
.minor_version
!= -2 ||
3613 strcmp(sra
->text_version
, "imsm") != 0) {
3618 for (sd
= sra
->devs
, i
= 0; sd
; sd
= sd
->next
, i
++) {
3619 struct intel_super
*s
= alloc_super();
3627 s
->next
= super_list
;
3631 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
3632 dfd
= dev_open(nm
, O_RDWR
);
3636 rv
= find_intel_hba_capability(dfd
, s
, devname
);
3637 /* no orom/efi or non-intel hba of the disk */
3641 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3643 /* retry the load if we might have raced against mdmon */
3644 if (err
== 3 && mdmon_running(devnum
))
3645 for (retry
= 0; retry
< 3; retry
++) {
3647 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3655 /* all mpbs enter, maybe one leaves */
3656 super
= imsm_thunderdome(&super_list
, i
);
3662 if (find_missing(super
) != 0) {
3668 /* load migration record */
3669 err
= load_imsm_migr_rec(super
, NULL
);
3675 /* Check migration compatibility */
3676 if (check_mpb_migr_compatibility(super
) != 0) {
3677 fprintf(stderr
, Name
": Unsupported migration detected");
3679 fprintf(stderr
, " on %s\n", devname
);
3681 fprintf(stderr
, " (IMSM).\n");
3690 while (super_list
) {
3691 struct intel_super
*s
= super_list
;
3693 super_list
= super_list
->next
;
3702 st
->container_dev
= devnum
;
3703 if (err
== 0 && st
->ss
== NULL
) {
3704 st
->ss
= &super_imsm
;
3705 st
->minor_version
= 0;
3706 st
->max_devs
= IMSM_MAX_DEVICES
;
3711 static int load_container_imsm(struct supertype
*st
, int fd
, char *devname
)
3713 return load_super_imsm_all(st
, fd
, &st
->sb
, devname
);
3717 static int load_super_imsm(struct supertype
*st
, int fd
, char *devname
)
3719 struct intel_super
*super
;
3722 if (test_partition(fd
))
3723 /* IMSM not allowed on partitions */
3726 free_super_imsm(st
);
3728 super
= alloc_super();
3731 Name
": malloc of %zu failed.\n",
3735 /* Load hba and capabilities if they exist.
3736 * But do not preclude loading metadata in case capabilities or hba are
3737 * non-compliant and ignore_hw_compat is set.
3739 rv
= find_intel_hba_capability(fd
, super
, devname
);
3740 /* no orom/efi or non-intel hba of the disk */
3741 if ((rv
!= 0) && (st
->ignore_hw_compat
== 0)) {
3744 Name
": No OROM/EFI properties for %s\n", devname
);
3748 rv
= load_and_parse_mpb(fd
, super
, devname
, 0);
3753 Name
": Failed to load all information "
3754 "sections on %s\n", devname
);
3760 if (st
->ss
== NULL
) {
3761 st
->ss
= &super_imsm
;
3762 st
->minor_version
= 0;
3763 st
->max_devs
= IMSM_MAX_DEVICES
;
3766 /* load migration record */
3767 load_imsm_migr_rec(super
, NULL
);
3769 /* Check for unsupported migration features */
3770 if (check_mpb_migr_compatibility(super
) != 0) {
3771 fprintf(stderr
, Name
": Unsupported migration detected");
3773 fprintf(stderr
, " on %s\n", devname
);
3775 fprintf(stderr
, " (IMSM).\n");
3782 static __u16
info_to_blocks_per_strip(mdu_array_info_t
*info
)
3784 if (info
->level
== 1)
3786 return info
->chunk_size
>> 9;
3789 static __u32
info_to_num_data_stripes(mdu_array_info_t
*info
, int num_domains
)
3793 num_stripes
= (info
->size
* 2) / info_to_blocks_per_strip(info
);
3794 num_stripes
/= num_domains
;
3799 static __u32
info_to_blocks_per_member(mdu_array_info_t
*info
)
3801 if (info
->level
== 1)
3802 return info
->size
* 2;
3804 return (info
->size
* 2) & ~(info_to_blocks_per_strip(info
) - 1);
3807 static void imsm_update_version_info(struct intel_super
*super
)
3809 /* update the version and attributes */
3810 struct imsm_super
*mpb
= super
->anchor
;
3812 struct imsm_dev
*dev
;
3813 struct imsm_map
*map
;
3816 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3817 dev
= get_imsm_dev(super
, i
);
3818 map
= get_imsm_map(dev
, 0);
3819 if (__le32_to_cpu(dev
->size_high
) > 0)
3820 mpb
->attributes
|= MPB_ATTRIB_2TB
;
3822 /* FIXME detect when an array spans a port multiplier */
3824 mpb
->attributes
|= MPB_ATTRIB_PM
;
3827 if (mpb
->num_raid_devs
> 1 ||
3828 mpb
->attributes
!= MPB_ATTRIB_CHECKSUM_VERIFY
) {
3829 version
= MPB_VERSION_ATTRIBS
;
3830 switch (get_imsm_raid_level(map
)) {
3831 case 0: mpb
->attributes
|= MPB_ATTRIB_RAID0
; break;
3832 case 1: mpb
->attributes
|= MPB_ATTRIB_RAID1
; break;
3833 case 10: mpb
->attributes
|= MPB_ATTRIB_RAID10
; break;
3834 case 5: mpb
->attributes
|= MPB_ATTRIB_RAID5
; break;
3837 if (map
->num_members
>= 5)
3838 version
= MPB_VERSION_5OR6_DISK_ARRAY
;
3839 else if (dev
->status
== DEV_CLONE_N_GO
)
3840 version
= MPB_VERSION_CNG
;
3841 else if (get_imsm_raid_level(map
) == 5)
3842 version
= MPB_VERSION_RAID5
;
3843 else if (map
->num_members
>= 3)
3844 version
= MPB_VERSION_3OR4_DISK_ARRAY
;
3845 else if (get_imsm_raid_level(map
) == 1)
3846 version
= MPB_VERSION_RAID1
;
3848 version
= MPB_VERSION_RAID0
;
3850 strcpy(((char *) mpb
->sig
) + strlen(MPB_SIGNATURE
), version
);
3854 static int check_name(struct intel_super
*super
, char *name
, int quiet
)
3856 struct imsm_super
*mpb
= super
->anchor
;
3857 char *reason
= NULL
;
3860 if (strlen(name
) > MAX_RAID_SERIAL_LEN
)
3861 reason
= "must be 16 characters or less";
3863 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3864 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
3866 if (strncmp((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
) == 0) {
3867 reason
= "already exists";
3872 if (reason
&& !quiet
)
3873 fprintf(stderr
, Name
": imsm volume name %s\n", reason
);
3878 static int init_super_imsm_volume(struct supertype
*st
, mdu_array_info_t
*info
,
3879 unsigned long long size
, char *name
,
3880 char *homehost
, int *uuid
)
3882 /* We are creating a volume inside a pre-existing container.
3883 * so st->sb is already set.
3885 struct intel_super
*super
= st
->sb
;
3886 struct imsm_super
*mpb
= super
->anchor
;
3887 struct intel_dev
*dv
;
3888 struct imsm_dev
*dev
;
3889 struct imsm_vol
*vol
;
3890 struct imsm_map
*map
;
3891 int idx
= mpb
->num_raid_devs
;
3893 unsigned long long array_blocks
;
3894 size_t size_old
, size_new
;
3895 __u32 num_data_stripes
;
3897 if (super
->orom
&& mpb
->num_raid_devs
>= super
->orom
->vpa
) {
3898 fprintf(stderr
, Name
": This imsm-container already has the "
3899 "maximum of %d volumes\n", super
->orom
->vpa
);
3903 /* ensure the mpb is large enough for the new data */
3904 size_old
= __le32_to_cpu(mpb
->mpb_size
);
3905 size_new
= disks_to_mpb_size(info
->nr_disks
);
3906 if (size_new
> size_old
) {
3908 size_t size_round
= ROUND_UP(size_new
, 512);
3910 if (posix_memalign(&mpb_new
, 512, size_round
) != 0) {
3911 fprintf(stderr
, Name
": could not allocate new mpb\n");
3914 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
3915 fprintf(stderr
, Name
3916 ": %s could not allocate migr_rec buffer\n",
3922 memcpy(mpb_new
, mpb
, size_old
);
3925 super
->anchor
= mpb_new
;
3926 mpb
->mpb_size
= __cpu_to_le32(size_new
);
3927 memset(mpb_new
+ size_old
, 0, size_round
- size_old
);
3929 super
->current_vol
= idx
;
3930 /* when creating the first raid device in this container set num_disks
3931 * to zero, i.e. delete this spare and add raid member devices in
3932 * add_to_super_imsm_volume()
3934 if (super
->current_vol
== 0)
3937 if (!check_name(super
, name
, 0))
3939 dv
= malloc(sizeof(*dv
));
3941 fprintf(stderr
, Name
": failed to allocate device list entry\n");
3944 dev
= calloc(1, sizeof(*dev
) + sizeof(__u32
) * (info
->raid_disks
- 1));
3947 fprintf(stderr
, Name
": could not allocate raid device\n");
3951 strncpy((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
);
3952 if (info
->level
== 1)
3953 array_blocks
= info_to_blocks_per_member(info
);
3955 array_blocks
= calc_array_size(info
->level
, info
->raid_disks
,
3956 info
->layout
, info
->chunk_size
,
3958 /* round array size down to closest MB */
3959 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
3961 dev
->size_low
= __cpu_to_le32((__u32
) array_blocks
);
3962 dev
->size_high
= __cpu_to_le32((__u32
) (array_blocks
>> 32));
3963 dev
->status
= (DEV_READ_COALESCING
| DEV_WRITE_COALESCING
);
3965 vol
->migr_state
= 0;
3966 set_migr_type(dev
, MIGR_INIT
);
3968 vol
->curr_migr_unit
= 0;
3969 map
= get_imsm_map(dev
, 0);
3970 map
->pba_of_lba0
= __cpu_to_le32(super
->create_offset
);
3971 map
->blocks_per_member
= __cpu_to_le32(info_to_blocks_per_member(info
));
3972 map
->blocks_per_strip
= __cpu_to_le16(info_to_blocks_per_strip(info
));
3973 map
->failed_disk_num
= ~0;
3974 map
->map_state
= info
->level
? IMSM_T_STATE_UNINITIALIZED
:
3975 IMSM_T_STATE_NORMAL
;
3978 if (info
->level
== 1 && info
->raid_disks
> 2) {
3981 fprintf(stderr
, Name
": imsm does not support more than 2 disks"
3982 "in a raid1 volume\n");
3986 map
->raid_level
= info
->level
;
3987 if (info
->level
== 10) {
3988 map
->raid_level
= 1;
3989 map
->num_domains
= info
->raid_disks
/ 2;
3990 } else if (info
->level
== 1)
3991 map
->num_domains
= info
->raid_disks
;
3993 map
->num_domains
= 1;
3995 num_data_stripes
= info_to_num_data_stripes(info
, map
->num_domains
);
3996 map
->num_data_stripes
= __cpu_to_le32(num_data_stripes
);
3998 map
->num_members
= info
->raid_disks
;
3999 for (i
= 0; i
< map
->num_members
; i
++) {
4000 /* initialized in add_to_super */
4001 set_imsm_ord_tbl_ent(map
, i
, IMSM_ORD_REBUILD
);
4003 mpb
->num_raid_devs
++;
4006 dv
->index
= super
->current_vol
;
4007 dv
->next
= super
->devlist
;
4008 super
->devlist
= dv
;
4010 imsm_update_version_info(super
);
4015 static int init_super_imsm(struct supertype
*st
, mdu_array_info_t
*info
,
4016 unsigned long long size
, char *name
,
4017 char *homehost
, int *uuid
)
4019 /* This is primarily called by Create when creating a new array.
4020 * We will then get add_to_super called for each component, and then
4021 * write_init_super called to write it out to each device.
4022 * For IMSM, Create can create on fresh devices or on a pre-existing
4024 * To create on a pre-existing array a different method will be called.
4025 * This one is just for fresh drives.
4027 struct intel_super
*super
;
4028 struct imsm_super
*mpb
;
4033 return init_super_imsm_volume(st
, info
, size
, name
, homehost
, uuid
);
4036 mpb_size
= disks_to_mpb_size(info
->nr_disks
);
4040 super
= alloc_super();
4041 if (super
&& posix_memalign(&super
->buf
, 512, mpb_size
) != 0) {
4046 fprintf(stderr
, Name
4047 ": %s could not allocate superblock\n", __func__
);
4050 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
4051 fprintf(stderr
, Name
4052 ": %s could not allocate migr_rec buffer\n", __func__
);
4057 memset(super
->buf
, 0, mpb_size
);
4059 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
4063 /* zeroing superblock */
4067 mpb
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
4069 version
= (char *) mpb
->sig
;
4070 strcpy(version
, MPB_SIGNATURE
);
4071 version
+= strlen(MPB_SIGNATURE
);
4072 strcpy(version
, MPB_VERSION_RAID0
);
4078 static int add_to_super_imsm_volume(struct supertype
*st
, mdu_disk_info_t
*dk
,
4079 int fd
, char *devname
)
4081 struct intel_super
*super
= st
->sb
;
4082 struct imsm_super
*mpb
= super
->anchor
;
4084 struct imsm_dev
*dev
;
4085 struct imsm_map
*map
;
4088 dev
= get_imsm_dev(super
, super
->current_vol
);
4089 map
= get_imsm_map(dev
, 0);
4091 if (! (dk
->state
& (1<<MD_DISK_SYNC
))) {
4092 fprintf(stderr
, Name
": %s: Cannot add spare devices to IMSM volume\n",
4098 /* we're doing autolayout so grab the pre-marked (in
4099 * validate_geometry) raid_disk
4101 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4102 if (dl
->raiddisk
== dk
->raid_disk
)
4105 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4106 if (dl
->major
== dk
->major
&&
4107 dl
->minor
== dk
->minor
)
4112 fprintf(stderr
, Name
": %s is not a member of the same container\n", devname
);
4116 /* add a pristine spare to the metadata */
4117 if (dl
->index
< 0) {
4118 dl
->index
= super
->anchor
->num_disks
;
4119 super
->anchor
->num_disks
++;
4121 /* Check the device has not already been added */
4122 slot
= get_imsm_disk_slot(map
, dl
->index
);
4124 (get_imsm_ord_tbl_ent(dev
, slot
, -1) & IMSM_ORD_REBUILD
) == 0) {
4125 fprintf(stderr
, Name
": %s has been included in this array twice\n",
4129 set_imsm_ord_tbl_ent(map
, dk
->number
, dl
->index
);
4130 dl
->disk
.status
= CONFIGURED_DISK
;
4132 /* if we are creating the first raid device update the family number */
4133 if (super
->current_vol
== 0) {
4135 struct imsm_dev
*_dev
= __get_imsm_dev(mpb
, 0);
4136 struct imsm_disk
*_disk
= __get_imsm_disk(mpb
, dl
->index
);
4138 if (!_dev
|| !_disk
) {
4139 fprintf(stderr
, Name
": BUG mpb setup error\n");
4145 sum
+= __gen_imsm_checksum(mpb
);
4146 mpb
->family_num
= __cpu_to_le32(sum
);
4147 mpb
->orig_family_num
= mpb
->family_num
;
4154 static int add_to_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
,
4155 int fd
, char *devname
)
4157 struct intel_super
*super
= st
->sb
;
4159 unsigned long long size
;
4164 /* If we are on an RAID enabled platform check that the disk is
4165 * attached to the raid controller.
4166 * We do not need to test disks attachment for container based additions,
4167 * they shall be already tested when container was created/assembled.
4169 rv
= find_intel_hba_capability(fd
, super
, devname
);
4170 /* no orom/efi or non-intel hba of the disk */
4172 dprintf("capability: %p fd: %d ret: %d\n",
4173 super
->orom
, fd
, rv
);
4177 if (super
->current_vol
>= 0)
4178 return add_to_super_imsm_volume(st
, dk
, fd
, devname
);
4181 dd
= malloc(sizeof(*dd
));
4184 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
4187 memset(dd
, 0, sizeof(*dd
));
4188 dd
->major
= major(stb
.st_rdev
);
4189 dd
->minor
= minor(stb
.st_rdev
);
4191 dd
->devname
= devname
? strdup(devname
) : NULL
;
4194 dd
->action
= DISK_ADD
;
4195 rv
= imsm_read_serial(fd
, devname
, dd
->serial
);
4198 Name
": failed to retrieve scsi serial, aborting\n");
4203 get_dev_size(fd
, NULL
, &size
);
4205 serialcpy(dd
->disk
.serial
, dd
->serial
);
4206 dd
->disk
.total_blocks
= __cpu_to_le32(size
);
4207 dd
->disk
.status
= SPARE_DISK
;
4208 if (sysfs_disk_to_scsi_id(fd
, &id
) == 0)
4209 dd
->disk
.scsi_id
= __cpu_to_le32(id
);
4211 dd
->disk
.scsi_id
= __cpu_to_le32(0);
4213 if (st
->update_tail
) {
4214 dd
->next
= super
->disk_mgmt_list
;
4215 super
->disk_mgmt_list
= dd
;
4217 dd
->next
= super
->disks
;
4219 super
->updates_pending
++;
4226 static int remove_from_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
)
4228 struct intel_super
*super
= st
->sb
;
4231 /* remove from super works only in mdmon - for communication
4232 * manager - monitor. Check if communication memory buffer
4235 if (!st
->update_tail
) {
4237 Name
": %s shall be used in mdmon context only"
4238 "(line %d).\n", __func__
, __LINE__
);
4241 dd
= malloc(sizeof(*dd
));
4244 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
4247 memset(dd
, 0, sizeof(*dd
));
4248 dd
->major
= dk
->major
;
4249 dd
->minor
= dk
->minor
;
4252 dd
->disk
.status
= SPARE_DISK
;
4253 dd
->action
= DISK_REMOVE
;
4255 dd
->next
= super
->disk_mgmt_list
;
4256 super
->disk_mgmt_list
= dd
;
4262 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
);
4266 struct imsm_super anchor
;
4267 } spare_record
__attribute__ ((aligned(512)));
4269 /* spare records have their own family number and do not have any defined raid
4272 static int write_super_imsm_spares(struct intel_super
*super
, int doclose
)
4274 struct imsm_super
*mpb
= super
->anchor
;
4275 struct imsm_super
*spare
= &spare_record
.anchor
;
4279 spare
->mpb_size
= __cpu_to_le32(sizeof(struct imsm_super
)),
4280 spare
->generation_num
= __cpu_to_le32(1UL),
4281 spare
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
4282 spare
->num_disks
= 1,
4283 spare
->num_raid_devs
= 0,
4284 spare
->cache_size
= mpb
->cache_size
,
4285 spare
->pwr_cycle_count
= __cpu_to_le32(1),
4287 snprintf((char *) spare
->sig
, MAX_SIGNATURE_LENGTH
,
4288 MPB_SIGNATURE MPB_VERSION_RAID0
);
4290 for (d
= super
->disks
; d
; d
= d
->next
) {
4294 spare
->disk
[0] = d
->disk
;
4295 sum
= __gen_imsm_checksum(spare
);
4296 spare
->family_num
= __cpu_to_le32(sum
);
4297 spare
->orig_family_num
= 0;
4298 sum
= __gen_imsm_checksum(spare
);
4299 spare
->check_sum
= __cpu_to_le32(sum
);
4301 if (store_imsm_mpb(d
->fd
, spare
)) {
4302 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
4303 __func__
, d
->major
, d
->minor
, strerror(errno
));
4315 static int is_gen_migration(struct imsm_dev
*dev
);
4317 static int write_super_imsm(struct supertype
*st
, int doclose
)
4319 struct intel_super
*super
= st
->sb
;
4320 struct imsm_super
*mpb
= super
->anchor
;
4326 __u32 mpb_size
= sizeof(struct imsm_super
) - sizeof(struct imsm_disk
);
4328 int clear_migration_record
= 1;
4330 /* 'generation' is incremented everytime the metadata is written */
4331 generation
= __le32_to_cpu(mpb
->generation_num
);
4333 mpb
->generation_num
= __cpu_to_le32(generation
);
4335 /* fix up cases where previous mdadm releases failed to set
4338 if (mpb
->orig_family_num
== 0)
4339 mpb
->orig_family_num
= mpb
->family_num
;
4341 for (d
= super
->disks
; d
; d
= d
->next
) {
4345 mpb
->disk
[d
->index
] = d
->disk
;
4349 for (d
= super
->missing
; d
; d
= d
->next
) {
4350 mpb
->disk
[d
->index
] = d
->disk
;
4353 mpb
->num_disks
= num_disks
;
4354 mpb_size
+= sizeof(struct imsm_disk
) * mpb
->num_disks
;
4356 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4357 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
4358 struct imsm_dev
*dev2
= get_imsm_dev(super
, i
);
4360 imsm_copy_dev(dev
, dev2
);
4361 mpb_size
+= sizeof_imsm_dev(dev
, 0);
4363 if (is_gen_migration(dev2
))
4364 clear_migration_record
= 0;
4366 mpb_size
+= __le32_to_cpu(mpb
->bbm_log_size
);
4367 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
4369 /* recalculate checksum */
4370 sum
= __gen_imsm_checksum(mpb
);
4371 mpb
->check_sum
= __cpu_to_le32(sum
);
4373 if (clear_migration_record
)
4374 memset(super
->migr_rec_buf
, 0, 512);
4376 /* write the mpb for disks that compose raid devices */
4377 for (d
= super
->disks
; d
; d
= d
->next
) {
4380 if (store_imsm_mpb(d
->fd
, mpb
))
4381 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
4382 __func__
, d
->major
, d
->minor
, strerror(errno
));
4383 if (clear_migration_record
) {
4384 unsigned long long dsize
;
4386 get_dev_size(d
->fd
, NULL
, &dsize
);
4387 if (lseek64(d
->fd
, dsize
- 512, SEEK_SET
) >= 0) {
4388 write(d
->fd
, super
->migr_rec_buf
, 512);
4398 return write_super_imsm_spares(super
, doclose
);
4404 static int create_array(struct supertype
*st
, int dev_idx
)
4407 struct imsm_update_create_array
*u
;
4408 struct intel_super
*super
= st
->sb
;
4409 struct imsm_dev
*dev
= get_imsm_dev(super
, dev_idx
);
4410 struct imsm_map
*map
= get_imsm_map(dev
, 0);
4411 struct disk_info
*inf
;
4412 struct imsm_disk
*disk
;
4415 len
= sizeof(*u
) - sizeof(*dev
) + sizeof_imsm_dev(dev
, 0) +
4416 sizeof(*inf
) * map
->num_members
;
4419 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4424 u
->type
= update_create_array
;
4425 u
->dev_idx
= dev_idx
;
4426 imsm_copy_dev(&u
->dev
, dev
);
4427 inf
= get_disk_info(u
);
4428 for (i
= 0; i
< map
->num_members
; i
++) {
4429 int idx
= get_imsm_disk_idx(dev
, i
, -1);
4431 disk
= get_imsm_disk(super
, idx
);
4432 serialcpy(inf
[i
].serial
, disk
->serial
);
4434 append_metadata_update(st
, u
, len
);
4439 static int mgmt_disk(struct supertype
*st
)
4441 struct intel_super
*super
= st
->sb
;
4443 struct imsm_update_add_remove_disk
*u
;
4445 if (!super
->disk_mgmt_list
)
4451 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4456 u
->type
= update_add_remove_disk
;
4457 append_metadata_update(st
, u
, len
);
4462 static int write_init_super_imsm(struct supertype
*st
)
4464 struct intel_super
*super
= st
->sb
;
4465 int current_vol
= super
->current_vol
;
4467 /* we are done with current_vol reset it to point st at the container */
4468 super
->current_vol
= -1;
4470 if (st
->update_tail
) {
4471 /* queue the recently created array / added disk
4472 * as a metadata update */
4475 /* determine if we are creating a volume or adding a disk */
4476 if (current_vol
< 0) {
4477 /* in the mgmt (add/remove) disk case we are running
4478 * in mdmon context, so don't close fd's
4480 return mgmt_disk(st
);
4482 rv
= create_array(st
, current_vol
);
4487 for (d
= super
->disks
; d
; d
= d
->next
)
4488 Kill(d
->devname
, NULL
, 0, 1, 1);
4489 return write_super_imsm(st
, 1);
4494 static int store_super_imsm(struct supertype
*st
, int fd
)
4496 struct intel_super
*super
= st
->sb
;
4497 struct imsm_super
*mpb
= super
? super
->anchor
: NULL
;
4503 return store_imsm_mpb(fd
, mpb
);
4509 static int imsm_bbm_log_size(struct imsm_super
*mpb
)
4511 return __le32_to_cpu(mpb
->bbm_log_size
);
4515 static int validate_geometry_imsm_container(struct supertype
*st
, int level
,
4516 int layout
, int raiddisks
, int chunk
,
4517 unsigned long long size
, char *dev
,
4518 unsigned long long *freesize
,
4522 unsigned long long ldsize
;
4523 struct intel_super
*super
=NULL
;
4526 if (level
!= LEVEL_CONTAINER
)
4531 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4534 fprintf(stderr
, Name
": imsm: Cannot open %s: %s\n",
4535 dev
, strerror(errno
));
4538 if (!get_dev_size(fd
, dev
, &ldsize
)) {
4543 /* capabilities retrieve could be possible
4544 * note that there is no fd for the disks in array.
4546 super
= alloc_super();
4549 Name
": malloc of %zu failed.\n",
4555 rv
= find_intel_hba_capability(fd
, super
, verbose
? dev
: NULL
);
4559 fd2devname(fd
, str
);
4560 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4561 fd
, str
, super
->orom
, rv
, raiddisks
);
4563 /* no orom/efi or non-intel hba of the disk */
4569 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4571 fprintf(stderr
, Name
": %d exceeds maximum number of"
4572 " platform supported disks: %d\n",
4573 raiddisks
, super
->orom
->tds
);
4579 *freesize
= avail_size_imsm(st
, ldsize
>> 9);
4585 static unsigned long long find_size(struct extent
*e
, int *idx
, int num_extents
)
4587 const unsigned long long base_start
= e
[*idx
].start
;
4588 unsigned long long end
= base_start
+ e
[*idx
].size
;
4591 if (base_start
== end
)
4595 for (i
= *idx
; i
< num_extents
; i
++) {
4596 /* extend overlapping extents */
4597 if (e
[i
].start
>= base_start
&&
4598 e
[i
].start
<= end
) {
4601 if (e
[i
].start
+ e
[i
].size
> end
)
4602 end
= e
[i
].start
+ e
[i
].size
;
4603 } else if (e
[i
].start
> end
) {
4609 return end
- base_start
;
4612 static unsigned long long merge_extents(struct intel_super
*super
, int sum_extents
)
4614 /* build a composite disk with all known extents and generate a new
4615 * 'maxsize' given the "all disks in an array must share a common start
4616 * offset" constraint
4618 struct extent
*e
= calloc(sum_extents
, sizeof(*e
));
4622 unsigned long long pos
;
4623 unsigned long long start
= 0;
4624 unsigned long long maxsize
;
4625 unsigned long reserve
;
4630 /* coalesce and sort all extents. also, check to see if we need to
4631 * reserve space between member arrays
4634 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4637 for (i
= 0; i
< dl
->extent_cnt
; i
++)
4640 qsort(e
, sum_extents
, sizeof(*e
), cmp_extent
);
4645 while (i
< sum_extents
) {
4646 e
[j
].start
= e
[i
].start
;
4647 e
[j
].size
= find_size(e
, &i
, sum_extents
);
4649 if (e
[j
-1].size
== 0)
4658 unsigned long long esize
;
4660 esize
= e
[i
].start
- pos
;
4661 if (esize
>= maxsize
) {
4666 pos
= e
[i
].start
+ e
[i
].size
;
4668 } while (e
[i
-1].size
);
4674 /* FIXME assumes volume at offset 0 is the first volume in a
4677 if (start_extent
> 0)
4678 reserve
= IMSM_RESERVED_SECTORS
; /* gap between raid regions */
4682 if (maxsize
< reserve
)
4685 super
->create_offset
= ~((__u32
) 0);
4686 if (start
+ reserve
> super
->create_offset
)
4687 return 0; /* start overflows create_offset */
4688 super
->create_offset
= start
+ reserve
;
4690 return maxsize
- reserve
;
4693 static int is_raid_level_supported(const struct imsm_orom
*orom
, int level
, int raiddisks
)
4695 if (level
< 0 || level
== 6 || level
== 4)
4698 /* if we have an orom prevent invalid raid levels */
4701 case 0: return imsm_orom_has_raid0(orom
);
4704 return imsm_orom_has_raid1e(orom
);
4705 return imsm_orom_has_raid1(orom
) && raiddisks
== 2;
4706 case 10: return imsm_orom_has_raid10(orom
) && raiddisks
== 4;
4707 case 5: return imsm_orom_has_raid5(orom
) && raiddisks
> 2;
4710 return 1; /* not on an Intel RAID platform so anything goes */
4716 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4718 * validate volume parameters with OROM/EFI capabilities
4721 validate_geometry_imsm_orom(struct intel_super
*super
, int level
, int layout
,
4722 int raiddisks
, int *chunk
, int verbose
)
4727 /* validate container capabilities */
4728 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4730 fprintf(stderr
, Name
": %d exceeds maximum number of"
4731 " platform supported disks: %d\n",
4732 raiddisks
, super
->orom
->tds
);
4736 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4737 if (super
->orom
&& (!is_raid_level_supported(super
->orom
, level
,
4739 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4740 level
, raiddisks
, raiddisks
> 1 ? "s" : "");
4743 if (super
->orom
&& level
!= 1) {
4744 if (chunk
&& (*chunk
== 0 || *chunk
== UnSet
))
4745 *chunk
= imsm_orom_default_chunk(super
->orom
);
4746 else if (chunk
&& !imsm_orom_has_chunk(super
->orom
, *chunk
)) {
4747 pr_vrb(": platform does not support a chunk size of: "
4752 if (layout
!= imsm_level_to_layout(level
)) {
4754 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4755 else if (level
== 10)
4756 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4758 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4765 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4766 * FIX ME add ahci details
4768 static int validate_geometry_imsm_volume(struct supertype
*st
, int level
,
4769 int layout
, int raiddisks
, int *chunk
,
4770 unsigned long long size
, char *dev
,
4771 unsigned long long *freesize
,
4775 struct intel_super
*super
= st
->sb
;
4776 struct imsm_super
*mpb
= super
->anchor
;
4778 unsigned long long pos
= 0;
4779 unsigned long long maxsize
;
4783 /* We must have the container info already read in. */
4787 if (!validate_geometry_imsm_orom(super
, level
, layout
, raiddisks
, chunk
, verbose
)) {
4788 fprintf(stderr
, Name
": RAID gemetry validation failed. "
4789 "Cannot proceed with the action(s).\n");
4793 /* General test: make sure there is space for
4794 * 'raiddisks' device extents of size 'size' at a given
4797 unsigned long long minsize
= size
;
4798 unsigned long long start_offset
= MaxSector
;
4801 minsize
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
4802 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4807 e
= get_extents(super
, dl
);
4810 unsigned long long esize
;
4811 esize
= e
[i
].start
- pos
;
4812 if (esize
>= minsize
)
4814 if (found
&& start_offset
== MaxSector
) {
4817 } else if (found
&& pos
!= start_offset
) {
4821 pos
= e
[i
].start
+ e
[i
].size
;
4823 } while (e
[i
-1].size
);
4828 if (dcnt
< raiddisks
) {
4830 fprintf(stderr
, Name
": imsm: Not enough "
4831 "devices with space for this array "
4839 /* This device must be a member of the set */
4840 if (stat(dev
, &stb
) < 0)
4842 if ((S_IFMT
& stb
.st_mode
) != S_IFBLK
)
4844 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4845 if (dl
->major
== (int)major(stb
.st_rdev
) &&
4846 dl
->minor
== (int)minor(stb
.st_rdev
))
4851 fprintf(stderr
, Name
": %s is not in the "
4852 "same imsm set\n", dev
);
4854 } else if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
) {
4855 /* If a volume is present then the current creation attempt
4856 * cannot incorporate new spares because the orom may not
4857 * understand this configuration (all member disks must be
4858 * members of each array in the container).
4860 fprintf(stderr
, Name
": %s is a spare and a volume"
4861 " is already defined for this container\n", dev
);
4862 fprintf(stderr
, Name
": The option-rom requires all member"
4863 " disks to be a member of all volumes\n");
4867 /* retrieve the largest free space block */
4868 e
= get_extents(super
, dl
);
4873 unsigned long long esize
;
4875 esize
= e
[i
].start
- pos
;
4876 if (esize
>= maxsize
)
4878 pos
= e
[i
].start
+ e
[i
].size
;
4880 } while (e
[i
-1].size
);
4885 fprintf(stderr
, Name
": unable to determine free space for: %s\n",
4889 if (maxsize
< size
) {
4891 fprintf(stderr
, Name
": %s not enough space (%llu < %llu)\n",
4892 dev
, maxsize
, size
);
4896 /* count total number of extents for merge */
4898 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4900 i
+= dl
->extent_cnt
;
4902 maxsize
= merge_extents(super
, i
);
4903 if (maxsize
< size
|| maxsize
== 0) {
4905 fprintf(stderr
, Name
": not enough space after merge (%llu < %llu)\n",
4910 *freesize
= maxsize
;
4915 static int reserve_space(struct supertype
*st
, int raiddisks
,
4916 unsigned long long size
, int chunk
,
4917 unsigned long long *freesize
)
4919 struct intel_super
*super
= st
->sb
;
4920 struct imsm_super
*mpb
= super
->anchor
;
4925 unsigned long long maxsize
;
4926 unsigned long long minsize
;
4930 /* find the largest common start free region of the possible disks */
4934 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4940 /* don't activate new spares if we are orom constrained
4941 * and there is already a volume active in the container
4943 if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
)
4946 e
= get_extents(super
, dl
);
4949 for (i
= 1; e
[i
-1].size
; i
++)
4957 maxsize
= merge_extents(super
, extent_cnt
);
4961 minsize
= chunk
* 2;
4963 if (cnt
< raiddisks
||
4964 (super
->orom
&& used
&& used
!= raiddisks
) ||
4965 maxsize
< minsize
||
4967 fprintf(stderr
, Name
": not enough devices with space to create array.\n");
4968 return 0; /* No enough free spaces large enough */
4980 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4982 dl
->raiddisk
= cnt
++;
4989 static int validate_geometry_imsm(struct supertype
*st
, int level
, int layout
,
4990 int raiddisks
, int *chunk
, unsigned long long size
,
4991 char *dev
, unsigned long long *freesize
,
4999 * if given unused devices create a container
5000 * if given given devices in a container create a member volume
5002 if (level
== LEVEL_CONTAINER
) {
5003 /* Must be a fresh device to add to a container */
5004 return validate_geometry_imsm_container(st
, level
, layout
,
5006 chunk
?*chunk
:0, size
,
5012 if (st
->sb
&& freesize
) {
5013 /* we are being asked to automatically layout a
5014 * new volume based on the current contents of
5015 * the container. If the the parameters can be
5016 * satisfied reserve_space will record the disks,
5017 * start offset, and size of the volume to be
5018 * created. add_to_super and getinfo_super
5019 * detect when autolayout is in progress.
5021 if (!validate_geometry_imsm_orom(st
->sb
, level
, layout
,
5025 return reserve_space(st
, raiddisks
, size
,
5026 chunk
?*chunk
:0, freesize
);
5031 /* creating in a given container */
5032 return validate_geometry_imsm_volume(st
, level
, layout
,
5033 raiddisks
, chunk
, size
,
5034 dev
, freesize
, verbose
);
5037 /* This device needs to be a device in an 'imsm' container */
5038 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
5042 Name
": Cannot create this array on device %s\n",
5047 if (errno
!= EBUSY
|| (fd
= open(dev
, O_RDONLY
, 0)) < 0) {
5049 fprintf(stderr
, Name
": Cannot open %s: %s\n",
5050 dev
, strerror(errno
));
5053 /* Well, it is in use by someone, maybe an 'imsm' container. */
5054 cfd
= open_container(fd
);
5058 fprintf(stderr
, Name
": Cannot use %s: It is busy\n",
5062 sra
= sysfs_read(cfd
, 0, GET_VERSION
);
5063 if (sra
&& sra
->array
.major_version
== -1 &&
5064 strcmp(sra
->text_version
, "imsm") == 0)
5068 /* This is a member of a imsm container. Load the container
5069 * and try to create a volume
5071 struct intel_super
*super
;
5073 if (load_super_imsm_all(st
, cfd
, (void **) &super
, NULL
) == 0) {
5075 st
->container_dev
= fd2devnum(cfd
);
5077 return validate_geometry_imsm_volume(st
, level
, layout
,
5085 fprintf(stderr
, Name
": failed container membership check\n");
5091 static void default_geometry_imsm(struct supertype
*st
, int *level
, int *layout
, int *chunk
)
5093 struct intel_super
*super
= st
->sb
;
5095 if (level
&& *level
== UnSet
)
5096 *level
= LEVEL_CONTAINER
;
5098 if (level
&& layout
&& *layout
== UnSet
)
5099 *layout
= imsm_level_to_layout(*level
);
5101 if (chunk
&& (*chunk
== UnSet
|| *chunk
== 0) &&
5102 super
&& super
->orom
)
5103 *chunk
= imsm_orom_default_chunk(super
->orom
);
5106 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
);
5108 static int kill_subarray_imsm(struct supertype
*st
)
5110 /* remove the subarray currently referenced by ->current_vol */
5112 struct intel_dev
**dp
;
5113 struct intel_super
*super
= st
->sb
;
5114 __u8 current_vol
= super
->current_vol
;
5115 struct imsm_super
*mpb
= super
->anchor
;
5117 if (super
->current_vol
< 0)
5119 super
->current_vol
= -1; /* invalidate subarray cursor */
5121 /* block deletions that would change the uuid of active subarrays
5123 * FIXME when immutable ids are available, but note that we'll
5124 * also need to fixup the invalidated/active subarray indexes in
5127 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5130 if (i
< current_vol
)
5132 sprintf(subarray
, "%u", i
);
5133 if (is_subarray_active(subarray
, st
->devname
)) {
5135 Name
": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
5142 if (st
->update_tail
) {
5143 struct imsm_update_kill_array
*u
= malloc(sizeof(*u
));
5147 u
->type
= update_kill_array
;
5148 u
->dev_idx
= current_vol
;
5149 append_metadata_update(st
, u
, sizeof(*u
));
5154 for (dp
= &super
->devlist
; *dp
;)
5155 if ((*dp
)->index
== current_vol
) {
5158 handle_missing(super
, (*dp
)->dev
);
5159 if ((*dp
)->index
> current_vol
)
5164 /* no more raid devices, all active components are now spares,
5165 * but of course failed are still failed
5167 if (--mpb
->num_raid_devs
== 0) {
5170 for (d
= super
->disks
; d
; d
= d
->next
)
5171 if (d
->index
> -2) {
5173 d
->disk
.status
= SPARE_DISK
;
5177 super
->updates_pending
++;
5182 static int update_subarray_imsm(struct supertype
*st
, char *subarray
,
5183 char *update
, struct mddev_ident
*ident
)
5185 /* update the subarray currently referenced by ->current_vol */
5186 struct intel_super
*super
= st
->sb
;
5187 struct imsm_super
*mpb
= super
->anchor
;
5189 if (strcmp(update
, "name") == 0) {
5190 char *name
= ident
->name
;
5194 if (is_subarray_active(subarray
, st
->devname
)) {
5196 Name
": Unable to update name of active subarray\n");
5200 if (!check_name(super
, name
, 0))
5203 vol
= strtoul(subarray
, &ep
, 10);
5204 if (*ep
!= '\0' || vol
>= super
->anchor
->num_raid_devs
)
5207 if (st
->update_tail
) {
5208 struct imsm_update_rename_array
*u
= malloc(sizeof(*u
));
5212 u
->type
= update_rename_array
;
5214 snprintf((char *) u
->name
, MAX_RAID_SERIAL_LEN
, "%s", name
);
5215 append_metadata_update(st
, u
, sizeof(*u
));
5217 struct imsm_dev
*dev
;
5220 dev
= get_imsm_dev(super
, vol
);
5221 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
5222 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5223 dev
= get_imsm_dev(super
, i
);
5224 handle_missing(super
, dev
);
5226 super
->updates_pending
++;
5234 static int is_gen_migration(struct imsm_dev
*dev
)
5236 if (!dev
->vol
.migr_state
)
5239 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5244 #endif /* MDASSEMBLE */
5246 static int is_rebuilding(struct imsm_dev
*dev
)
5248 struct imsm_map
*migr_map
;
5250 if (!dev
->vol
.migr_state
)
5253 if (migr_type(dev
) != MIGR_REBUILD
)
5256 migr_map
= get_imsm_map(dev
, 1);
5258 if (migr_map
->map_state
== IMSM_T_STATE_DEGRADED
)
5264 static void update_recovery_start(struct intel_super
*super
,
5265 struct imsm_dev
*dev
,
5266 struct mdinfo
*array
)
5268 struct mdinfo
*rebuild
= NULL
;
5272 if (!is_rebuilding(dev
))
5275 /* Find the rebuild target, but punt on the dual rebuild case */
5276 for (d
= array
->devs
; d
; d
= d
->next
)
5277 if (d
->recovery_start
== 0) {
5284 /* (?) none of the disks are marked with
5285 * IMSM_ORD_REBUILD, so assume they are missing and the
5286 * disk_ord_tbl was not correctly updated
5288 dprintf("%s: failed to locate out-of-sync disk\n", __func__
);
5292 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
5293 rebuild
->recovery_start
= units
* blocks_per_migr_unit(super
, dev
);
5296 static int recover_backup_imsm(struct supertype
*st
, struct mdinfo
*info
);
5298 static struct mdinfo
*container_content_imsm(struct supertype
*st
, char *subarray
)
5300 /* Given a container loaded by load_super_imsm_all,
5301 * extract information about all the arrays into
5303 * If 'subarray' is given, just extract info about that array.
5305 * For each imsm_dev create an mdinfo, fill it in,
5306 * then look for matching devices in super->disks
5307 * and create appropriate device mdinfo.
5309 struct intel_super
*super
= st
->sb
;
5310 struct imsm_super
*mpb
= super
->anchor
;
5311 struct mdinfo
*rest
= NULL
;
5315 int spare_disks
= 0;
5317 /* check for bad blocks */
5318 if (imsm_bbm_log_size(super
->anchor
))
5321 /* count spare devices, not used in maps
5323 for (d
= super
->disks
; d
; d
= d
->next
)
5327 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5328 struct imsm_dev
*dev
;
5329 struct imsm_map
*map
;
5330 struct imsm_map
*map2
;
5331 struct mdinfo
*this;
5336 (i
!= strtoul(subarray
, &ep
, 10) || *ep
!= '\0'))
5339 dev
= get_imsm_dev(super
, i
);
5340 map
= get_imsm_map(dev
, 0);
5341 map2
= get_imsm_map(dev
, 1);
5343 /* do not publish arrays that are in the middle of an
5344 * unsupported migration
5346 if (dev
->vol
.migr_state
&&
5347 (migr_type(dev
) == MIGR_STATE_CHANGE
)) {
5348 fprintf(stderr
, Name
": cannot assemble volume '%.16s':"
5349 " unsupported migration in progress\n",
5353 /* do not publish arrays that are not support by controller's
5357 chunk
= __le16_to_cpu(map
->blocks_per_strip
) >> 1;
5359 if (!validate_geometry_imsm_orom(super
,
5360 get_imsm_raid_level(map
), /* RAID level */
5361 imsm_level_to_layout(get_imsm_raid_level(map
)),
5362 map
->num_members
, /* raid disks */
5365 fprintf(stderr
, Name
": RAID gemetry validation failed. "
5366 "Cannot proceed with the action(s).\n");
5369 #endif /* MDASSEMBLE */
5370 this = malloc(sizeof(*this));
5372 fprintf(stderr
, Name
": failed to allocate %zu bytes\n",
5376 memset(this, 0, sizeof(*this));
5379 super
->current_vol
= i
;
5380 getinfo_super_imsm_volume(st
, this, NULL
);
5381 for (slot
= 0 ; slot
< map
->num_members
; slot
++) {
5382 unsigned long long recovery_start
;
5383 struct mdinfo
*info_d
;
5390 idx
= get_imsm_disk_idx(dev
, slot
, 0);
5391 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
5392 for (d
= super
->disks
; d
; d
= d
->next
)
5393 if (d
->index
== idx
)
5396 recovery_start
= MaxSector
;
5399 if (d
&& is_failed(&d
->disk
))
5401 if (ord
& IMSM_ORD_REBUILD
)
5405 * if we skip some disks the array will be assmebled degraded;
5406 * reset resync start to avoid a dirty-degraded
5407 * situation when performing the intial sync
5409 * FIXME handle dirty degraded
5411 if ((skip
|| recovery_start
== 0) && !dev
->vol
.dirty
)
5412 this->resync_start
= MaxSector
;
5416 info_d
= calloc(1, sizeof(*info_d
));
5418 fprintf(stderr
, Name
": failed to allocate disk"
5419 " for volume %.16s\n", dev
->volume
);
5420 info_d
= this->devs
;
5422 struct mdinfo
*d
= info_d
->next
;
5431 info_d
->next
= this->devs
;
5432 this->devs
= info_d
;
5434 info_d
->disk
.number
= d
->index
;
5435 info_d
->disk
.major
= d
->major
;
5436 info_d
->disk
.minor
= d
->minor
;
5437 info_d
->disk
.raid_disk
= slot
;
5438 info_d
->recovery_start
= recovery_start
;
5440 if (slot
< map2
->num_members
)
5441 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5443 this->array
.spare_disks
++;
5445 if (slot
< map
->num_members
)
5446 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5448 this->array
.spare_disks
++;
5450 if (info_d
->recovery_start
== MaxSector
)
5451 this->array
.working_disks
++;
5453 info_d
->events
= __le32_to_cpu(mpb
->generation_num
);
5454 info_d
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5455 info_d
->component_size
= __le32_to_cpu(map
->blocks_per_member
);
5457 /* now that the disk list is up-to-date fixup recovery_start */
5458 update_recovery_start(super
, dev
, this);
5459 this->array
.spare_disks
+= spare_disks
;
5461 /* check for reshape */
5462 if (this->reshape_active
== 1)
5463 recover_backup_imsm(st
, this);
5468 /* if array has bad blocks, set suitable bit in array status */
5470 rest
->array
.state
|= (1<<MD_SB_BBM_ERRORS
);
5476 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
)
5478 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5481 return map
->map_state
== IMSM_T_STATE_UNINITIALIZED
?
5482 IMSM_T_STATE_UNINITIALIZED
: IMSM_T_STATE_NORMAL
;
5484 switch (get_imsm_raid_level(map
)) {
5486 return IMSM_T_STATE_FAILED
;
5489 if (failed
< map
->num_members
)
5490 return IMSM_T_STATE_DEGRADED
;
5492 return IMSM_T_STATE_FAILED
;
5497 * check to see if any mirrors have failed, otherwise we
5498 * are degraded. Even numbered slots are mirrored on
5502 /* gcc -Os complains that this is unused */
5503 int insync
= insync
;
5505 for (i
= 0; i
< map
->num_members
; i
++) {
5506 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
5507 int idx
= ord_to_idx(ord
);
5508 struct imsm_disk
*disk
;
5510 /* reset the potential in-sync count on even-numbered
5511 * slots. num_copies is always 2 for imsm raid10
5516 disk
= get_imsm_disk(super
, idx
);
5517 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5520 /* no in-sync disks left in this mirror the
5524 return IMSM_T_STATE_FAILED
;
5527 return IMSM_T_STATE_DEGRADED
;
5531 return IMSM_T_STATE_DEGRADED
;
5533 return IMSM_T_STATE_FAILED
;
5539 return map
->map_state
;
5542 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
)
5546 struct imsm_disk
*disk
;
5547 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5548 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
5552 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5553 * disks that are being rebuilt. New failures are recorded to
5554 * map[0]. So we look through all the disks we started with and
5555 * see if any failures are still present, or if any new ones
5558 * FIXME add support for online capacity expansion and
5559 * raid-level-migration
5561 for (i
= 0; i
< prev
->num_members
; i
++) {
5562 ord
= __le32_to_cpu(prev
->disk_ord_tbl
[i
]);
5563 ord
|= __le32_to_cpu(map
->disk_ord_tbl
[i
]);
5564 idx
= ord_to_idx(ord
);
5566 disk
= get_imsm_disk(super
, idx
);
5567 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5575 static int imsm_open_new(struct supertype
*c
, struct active_array
*a
,
5578 struct intel_super
*super
= c
->sb
;
5579 struct imsm_super
*mpb
= super
->anchor
;
5581 if (atoi(inst
) >= mpb
->num_raid_devs
) {
5582 fprintf(stderr
, "%s: subarry index %d, out of range\n",
5583 __func__
, atoi(inst
));
5587 dprintf("imsm: open_new %s\n", inst
);
5588 a
->info
.container_member
= atoi(inst
);
5592 static int is_resyncing(struct imsm_dev
*dev
)
5594 struct imsm_map
*migr_map
;
5596 if (!dev
->vol
.migr_state
)
5599 if (migr_type(dev
) == MIGR_INIT
||
5600 migr_type(dev
) == MIGR_REPAIR
)
5603 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5606 migr_map
= get_imsm_map(dev
, 1);
5608 if ((migr_map
->map_state
== IMSM_T_STATE_NORMAL
) &&
5609 (dev
->vol
.migr_type
!= MIGR_GEN_MIGR
))
5615 /* return true if we recorded new information */
5616 static int mark_failure(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5620 struct imsm_map
*map
;
5622 /* new failures are always set in map[0] */
5623 map
= get_imsm_map(dev
, 0);
5625 slot
= get_imsm_disk_slot(map
, idx
);
5629 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
5630 if (is_failed(disk
) && (ord
& IMSM_ORD_REBUILD
))
5633 disk
->status
|= FAILED_DISK
;
5634 set_imsm_ord_tbl_ent(map
, slot
, idx
| IMSM_ORD_REBUILD
);
5635 if (map
->failed_disk_num
== 0xff)
5636 map
->failed_disk_num
= slot
;
5640 static void mark_missing(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5642 mark_failure(dev
, disk
, idx
);
5644 if (disk
->scsi_id
== __cpu_to_le32(~(__u32
)0))
5647 disk
->scsi_id
= __cpu_to_le32(~(__u32
)0);
5648 memmove(&disk
->serial
[0], &disk
->serial
[1], MAX_RAID_SERIAL_LEN
- 1);
5651 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
)
5657 if (!super
->missing
)
5659 failed
= imsm_count_failed(super
, dev
);
5660 map_state
= imsm_check_degraded(super
, dev
, failed
);
5662 dprintf("imsm: mark missing\n");
5663 end_migration(dev
, map_state
);
5664 for (dl
= super
->missing
; dl
; dl
= dl
->next
)
5665 mark_missing(dev
, &dl
->disk
, dl
->index
);
5666 super
->updates_pending
++;
5669 static unsigned long long imsm_set_array_size(struct imsm_dev
*dev
)
5671 int used_disks
= imsm_num_data_members(dev
, 0);
5672 unsigned long long array_blocks
;
5673 struct imsm_map
*map
;
5675 if (used_disks
== 0) {
5676 /* when problems occures
5677 * return current array_blocks value
5679 array_blocks
= __le32_to_cpu(dev
->size_high
);
5680 array_blocks
= array_blocks
<< 32;
5681 array_blocks
+= __le32_to_cpu(dev
->size_low
);
5683 return array_blocks
;
5686 /* set array size in metadata
5688 map
= get_imsm_map(dev
, 0);
5689 array_blocks
= map
->blocks_per_member
* used_disks
;
5691 /* round array size down to closest MB
5693 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
5694 dev
->size_low
= __cpu_to_le32((__u32
)array_blocks
);
5695 dev
->size_high
= __cpu_to_le32((__u32
)(array_blocks
>> 32));
5697 return array_blocks
;
5700 static void imsm_set_disk(struct active_array
*a
, int n
, int state
);
5702 static void imsm_progress_container_reshape(struct intel_super
*super
)
5704 /* if no device has a migr_state, but some device has a
5705 * different number of members than the previous device, start
5706 * changing the number of devices in this device to match
5709 struct imsm_super
*mpb
= super
->anchor
;
5710 int prev_disks
= -1;
5714 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5715 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
5716 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5717 struct imsm_map
*map2
;
5718 int prev_num_members
;
5720 if (dev
->vol
.migr_state
)
5723 if (prev_disks
== -1)
5724 prev_disks
= map
->num_members
;
5725 if (prev_disks
== map
->num_members
)
5728 /* OK, this array needs to enter reshape mode.
5729 * i.e it needs a migr_state
5732 copy_map_size
= sizeof_imsm_map(map
);
5733 prev_num_members
= map
->num_members
;
5734 map
->num_members
= prev_disks
;
5735 dev
->vol
.migr_state
= 1;
5736 dev
->vol
.curr_migr_unit
= 0;
5737 dev
->vol
.migr_type
= MIGR_GEN_MIGR
;
5738 for (i
= prev_num_members
;
5739 i
< map
->num_members
; i
++)
5740 set_imsm_ord_tbl_ent(map
, i
, i
);
5741 map2
= get_imsm_map(dev
, 1);
5742 /* Copy the current map */
5743 memcpy(map2
, map
, copy_map_size
);
5744 map2
->num_members
= prev_num_members
;
5746 imsm_set_array_size(dev
);
5747 super
->updates_pending
++;
5751 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5752 * states are handled in imsm_set_disk() with one exception, when a
5753 * resync is stopped due to a new failure this routine will set the
5754 * 'degraded' state for the array.
5756 static int imsm_set_array_state(struct active_array
*a
, int consistent
)
5758 int inst
= a
->info
.container_member
;
5759 struct intel_super
*super
= a
->container
->sb
;
5760 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5761 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5762 int failed
= imsm_count_failed(super
, dev
);
5763 __u8 map_state
= imsm_check_degraded(super
, dev
, failed
);
5764 __u32 blocks_per_unit
;
5766 if (dev
->vol
.migr_state
&&
5767 dev
->vol
.migr_type
== MIGR_GEN_MIGR
) {
5768 /* array state change is blocked due to reshape action
5770 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5771 * - finish the reshape (if last_checkpoint is big and action != reshape)
5772 * - update curr_migr_unit
5774 if (a
->curr_action
== reshape
) {
5775 /* still reshaping, maybe update curr_migr_unit */
5776 goto mark_checkpoint
;
5778 if (a
->last_checkpoint
== 0 && a
->prev_action
== reshape
) {
5779 /* for some reason we aborted the reshape.
5782 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
5783 dev
->vol
.migr_state
= 0;
5784 dev
->vol
.migr_type
= 0;
5785 dev
->vol
.curr_migr_unit
= 0;
5786 memcpy(map
, map2
, sizeof_imsm_map(map2
));
5787 super
->updates_pending
++;
5789 if (a
->last_checkpoint
>= a
->info
.component_size
) {
5790 unsigned long long array_blocks
;
5794 used_disks
= imsm_num_data_members(dev
, 0);
5795 if (used_disks
> 0) {
5797 map
->blocks_per_member
*
5799 /* round array size down to closest MB
5801 array_blocks
= (array_blocks
5802 >> SECT_PER_MB_SHIFT
)
5803 << SECT_PER_MB_SHIFT
;
5804 a
->info
.custom_array_size
= array_blocks
;
5805 /* encourage manager to update array
5809 a
->check_reshape
= 1;
5811 /* finalize online capacity expansion/reshape */
5812 for (mdi
= a
->info
.devs
; mdi
; mdi
= mdi
->next
)
5814 mdi
->disk
.raid_disk
,
5817 imsm_progress_container_reshape(super
);
5822 /* before we activate this array handle any missing disks */
5823 if (consistent
== 2)
5824 handle_missing(super
, dev
);
5826 if (consistent
== 2 &&
5827 (!is_resync_complete(&a
->info
) ||
5828 map_state
!= IMSM_T_STATE_NORMAL
||
5829 dev
->vol
.migr_state
))
5832 if (is_resync_complete(&a
->info
)) {
5833 /* complete intialization / resync,
5834 * recovery and interrupted recovery is completed in
5837 if (is_resyncing(dev
)) {
5838 dprintf("imsm: mark resync done\n");
5839 end_migration(dev
, map_state
);
5840 super
->updates_pending
++;
5841 a
->last_checkpoint
= 0;
5843 } else if (!is_resyncing(dev
) && !failed
) {
5844 /* mark the start of the init process if nothing is failed */
5845 dprintf("imsm: mark resync start\n");
5846 if (map
->map_state
== IMSM_T_STATE_UNINITIALIZED
)
5847 migrate(dev
, super
, IMSM_T_STATE_NORMAL
, MIGR_INIT
);
5849 migrate(dev
, super
, IMSM_T_STATE_NORMAL
, MIGR_REPAIR
);
5850 super
->updates_pending
++;
5854 /* skip checkpointing for general migration,
5855 * it is controlled in mdadm
5857 if (is_gen_migration(dev
))
5858 goto skip_mark_checkpoint
;
5860 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5861 blocks_per_unit
= blocks_per_migr_unit(super
, dev
);
5862 if (blocks_per_unit
) {
5866 units
= a
->last_checkpoint
/ blocks_per_unit
;
5869 /* check that we did not overflow 32-bits, and that
5870 * curr_migr_unit needs updating
5872 if (units32
== units
&&
5874 __le32_to_cpu(dev
->vol
.curr_migr_unit
) != units32
) {
5875 dprintf("imsm: mark checkpoint (%u)\n", units32
);
5876 dev
->vol
.curr_migr_unit
= __cpu_to_le32(units32
);
5877 super
->updates_pending
++;
5881 skip_mark_checkpoint
:
5882 /* mark dirty / clean */
5883 if (dev
->vol
.dirty
!= !consistent
) {
5884 dprintf("imsm: mark '%s'\n", consistent
? "clean" : "dirty");
5889 super
->updates_pending
++;
5895 static void imsm_set_disk(struct active_array
*a
, int n
, int state
)
5897 int inst
= a
->info
.container_member
;
5898 struct intel_super
*super
= a
->container
->sb
;
5899 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5900 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5901 struct imsm_disk
*disk
;
5906 if (n
> map
->num_members
)
5907 fprintf(stderr
, "imsm: set_disk %d out of range 0..%d\n",
5908 n
, map
->num_members
- 1);
5913 dprintf("imsm: set_disk %d:%x\n", n
, state
);
5915 ord
= get_imsm_ord_tbl_ent(dev
, n
, -1);
5916 disk
= get_imsm_disk(super
, ord_to_idx(ord
));
5918 /* check for new failures */
5919 if (state
& DS_FAULTY
) {
5920 if (mark_failure(dev
, disk
, ord_to_idx(ord
)))
5921 super
->updates_pending
++;
5924 /* check if in_sync */
5925 if (state
& DS_INSYNC
&& ord
& IMSM_ORD_REBUILD
&& is_rebuilding(dev
)) {
5926 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
5928 set_imsm_ord_tbl_ent(migr_map
, n
, ord_to_idx(ord
));
5929 super
->updates_pending
++;
5932 failed
= imsm_count_failed(super
, dev
);
5933 map_state
= imsm_check_degraded(super
, dev
, failed
);
5935 /* check if recovery complete, newly degraded, or failed */
5936 if (map_state
== IMSM_T_STATE_NORMAL
&& is_rebuilding(dev
)) {
5937 end_migration(dev
, map_state
);
5938 map
= get_imsm_map(dev
, 0);
5939 map
->failed_disk_num
= ~0;
5940 super
->updates_pending
++;
5941 a
->last_checkpoint
= 0;
5942 } else if (map_state
== IMSM_T_STATE_DEGRADED
&&
5943 map
->map_state
!= map_state
&&
5944 !dev
->vol
.migr_state
) {
5945 dprintf("imsm: mark degraded\n");
5946 map
->map_state
= map_state
;
5947 super
->updates_pending
++;
5948 a
->last_checkpoint
= 0;
5949 } else if (map_state
== IMSM_T_STATE_FAILED
&&
5950 map
->map_state
!= map_state
) {
5951 dprintf("imsm: mark failed\n");
5952 end_migration(dev
, map_state
);
5953 super
->updates_pending
++;
5954 a
->last_checkpoint
= 0;
5955 } else if (is_gen_migration(dev
)) {
5956 dprintf("imsm: Detected General Migration in state: ");
5957 if (map_state
== IMSM_T_STATE_NORMAL
) {
5958 end_migration(dev
, map_state
);
5959 map
= get_imsm_map(dev
, 0);
5960 map
->failed_disk_num
= ~0;
5961 dprintf("normal\n");
5963 if (map_state
== IMSM_T_STATE_DEGRADED
) {
5964 printf("degraded\n");
5965 end_migration(dev
, map_state
);
5967 dprintf("failed\n");
5969 map
->map_state
= map_state
;
5971 super
->updates_pending
++;
5975 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
)
5978 __u32 mpb_size
= __le32_to_cpu(mpb
->mpb_size
);
5979 unsigned long long dsize
;
5980 unsigned long long sectors
;
5982 get_dev_size(fd
, NULL
, &dsize
);
5984 if (mpb_size
> 512) {
5985 /* -1 to account for anchor */
5986 sectors
= mpb_sectors(mpb
) - 1;
5988 /* write the extended mpb to the sectors preceeding the anchor */
5989 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0)
5992 if ((unsigned long long)write(fd
, buf
+ 512, 512 * sectors
)
5997 /* first block is stored on second to last sector of the disk */
5998 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0)
6001 if (write(fd
, buf
, 512) != 512)
6007 static void imsm_sync_metadata(struct supertype
*container
)
6009 struct intel_super
*super
= container
->sb
;
6011 dprintf("sync metadata: %d\n", super
->updates_pending
);
6012 if (!super
->updates_pending
)
6015 write_super_imsm(container
, 0);
6017 super
->updates_pending
= 0;
6020 static struct dl
*imsm_readd(struct intel_super
*super
, int idx
, struct active_array
*a
)
6022 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
6023 int i
= get_imsm_disk_idx(dev
, idx
, -1);
6026 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6030 if (dl
&& is_failed(&dl
->disk
))
6034 dprintf("%s: found %x:%x\n", __func__
, dl
->major
, dl
->minor
);
6039 static struct dl
*imsm_add_spare(struct intel_super
*super
, int slot
,
6040 struct active_array
*a
, int activate_new
,
6041 struct mdinfo
*additional_test_list
)
6043 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
6044 int idx
= get_imsm_disk_idx(dev
, slot
, -1);
6045 struct imsm_super
*mpb
= super
->anchor
;
6046 struct imsm_map
*map
;
6047 unsigned long long pos
;
6052 __u32 array_start
= 0;
6053 __u32 array_end
= 0;
6055 struct mdinfo
*test_list
;
6057 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
6058 /* If in this array, skip */
6059 for (d
= a
->info
.devs
; d
; d
= d
->next
)
6060 if (d
->state_fd
>= 0 &&
6061 d
->disk
.major
== dl
->major
&&
6062 d
->disk
.minor
== dl
->minor
) {
6063 dprintf("%x:%x already in array\n",
6064 dl
->major
, dl
->minor
);
6069 test_list
= additional_test_list
;
6071 if (test_list
->disk
.major
== dl
->major
&&
6072 test_list
->disk
.minor
== dl
->minor
) {
6073 dprintf("%x:%x already in additional test list\n",
6074 dl
->major
, dl
->minor
);
6077 test_list
= test_list
->next
;
6082 /* skip in use or failed drives */
6083 if (is_failed(&dl
->disk
) || idx
== dl
->index
||
6085 dprintf("%x:%x status (failed: %d index: %d)\n",
6086 dl
->major
, dl
->minor
, is_failed(&dl
->disk
), idx
);
6090 /* skip pure spares when we are looking for partially
6091 * assimilated drives
6093 if (dl
->index
== -1 && !activate_new
)
6096 /* Does this unused device have the requisite free space?
6097 * It needs to be able to cover all member volumes
6099 ex
= get_extents(super
, dl
);
6101 dprintf("cannot get extents\n");
6104 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6105 dev
= get_imsm_dev(super
, i
);
6106 map
= get_imsm_map(dev
, 0);
6108 /* check if this disk is already a member of
6111 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
6117 array_start
= __le32_to_cpu(map
->pba_of_lba0
);
6118 array_end
= array_start
+
6119 __le32_to_cpu(map
->blocks_per_member
) - 1;
6122 /* check that we can start at pba_of_lba0 with
6123 * blocks_per_member of space
6125 if (array_start
>= pos
&& array_end
< ex
[j
].start
) {
6129 pos
= ex
[j
].start
+ ex
[j
].size
;
6131 } while (ex
[j
-1].size
);
6138 if (i
< mpb
->num_raid_devs
) {
6139 dprintf("%x:%x does not have %u to %u available\n",
6140 dl
->major
, dl
->minor
, array_start
, array_end
);
6151 static int imsm_rebuild_allowed(struct supertype
*cont
, int dev_idx
, int failed
)
6153 struct imsm_dev
*dev2
;
6154 struct imsm_map
*map
;
6160 dev2
= get_imsm_dev(cont
->sb
, dev_idx
);
6162 state
= imsm_check_degraded(cont
->sb
, dev2
, failed
);
6163 if (state
== IMSM_T_STATE_FAILED
) {
6164 map
= get_imsm_map(dev2
, 0);
6167 for (slot
= 0; slot
< map
->num_members
; slot
++) {
6169 * Check if failed disks are deleted from intel
6170 * disk list or are marked to be deleted
6172 idx
= get_imsm_disk_idx(dev2
, slot
, -1);
6173 idisk
= get_imsm_dl_disk(cont
->sb
, idx
);
6175 * Do not rebuild the array if failed disks
6176 * from failed sub-array are not removed from
6180 is_failed(&idisk
->disk
) &&
6181 (idisk
->action
!= DISK_REMOVE
))
6189 static struct mdinfo
*imsm_activate_spare(struct active_array
*a
,
6190 struct metadata_update
**updates
)
6193 * Find a device with unused free space and use it to replace a
6194 * failed/vacant region in an array. We replace failed regions one a
6195 * array at a time. The result is that a new spare disk will be added
6196 * to the first failed array and after the monitor has finished
6197 * propagating failures the remainder will be consumed.
6199 * FIXME add a capability for mdmon to request spares from another
6203 struct intel_super
*super
= a
->container
->sb
;
6204 int inst
= a
->info
.container_member
;
6205 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
6206 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6207 int failed
= a
->info
.array
.raid_disks
;
6208 struct mdinfo
*rv
= NULL
;
6211 struct metadata_update
*mu
;
6213 struct imsm_update_activate_spare
*u
;
6218 for (d
= a
->info
.devs
; d
; d
= d
->next
) {
6219 if ((d
->curr_state
& DS_FAULTY
) &&
6221 /* wait for Removal to happen */
6223 if (d
->state_fd
>= 0)
6227 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6228 inst
, failed
, a
->info
.array
.raid_disks
, a
->info
.array
.level
);
6230 if (dev
->vol
.migr_state
&&
6231 dev
->vol
.migr_type
== MIGR_GEN_MIGR
)
6232 /* No repair during migration */
6235 if (a
->info
.array
.level
== 4)
6236 /* No repair for takeovered array
6237 * imsm doesn't support raid4
6241 if (imsm_check_degraded(super
, dev
, failed
) != IMSM_T_STATE_DEGRADED
)
6245 * If there are any failed disks check state of the other volume.
6246 * Block rebuild if the another one is failed until failed disks
6247 * are removed from container.
6250 dprintf("found failed disks in %s, check if there another"
6251 "failed sub-array.\n",
6253 /* check if states of the other volumes allow for rebuild */
6254 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
6256 allowed
= imsm_rebuild_allowed(a
->container
,
6264 /* For each slot, if it is not working, find a spare */
6265 for (i
= 0; i
< a
->info
.array
.raid_disks
; i
++) {
6266 for (d
= a
->info
.devs
; d
; d
= d
->next
)
6267 if (d
->disk
.raid_disk
== i
)
6269 dprintf("found %d: %p %x\n", i
, d
, d
?d
->curr_state
:0);
6270 if (d
&& (d
->state_fd
>= 0))
6274 * OK, this device needs recovery. Try to re-add the
6275 * previous occupant of this slot, if this fails see if
6276 * we can continue the assimilation of a spare that was
6277 * partially assimilated, finally try to activate a new
6280 dl
= imsm_readd(super
, i
, a
);
6282 dl
= imsm_add_spare(super
, i
, a
, 0, NULL
);
6284 dl
= imsm_add_spare(super
, i
, a
, 1, NULL
);
6288 /* found a usable disk with enough space */
6289 di
= malloc(sizeof(*di
));
6292 memset(di
, 0, sizeof(*di
));
6294 /* dl->index will be -1 in the case we are activating a
6295 * pristine spare. imsm_process_update() will create a
6296 * new index in this case. Once a disk is found to be
6297 * failed in all member arrays it is kicked from the
6300 di
->disk
.number
= dl
->index
;
6302 /* (ab)use di->devs to store a pointer to the device
6305 di
->devs
= (struct mdinfo
*) dl
;
6307 di
->disk
.raid_disk
= i
;
6308 di
->disk
.major
= dl
->major
;
6309 di
->disk
.minor
= dl
->minor
;
6311 di
->recovery_start
= 0;
6312 di
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
6313 di
->component_size
= a
->info
.component_size
;
6314 di
->container_member
= inst
;
6315 super
->random
= random32();
6319 dprintf("%x:%x to be %d at %llu\n", dl
->major
, dl
->minor
,
6320 i
, di
->data_offset
);
6326 /* No spares found */
6328 /* Now 'rv' has a list of devices to return.
6329 * Create a metadata_update record to update the
6330 * disk_ord_tbl for the array
6332 mu
= malloc(sizeof(*mu
));
6334 mu
->buf
= malloc(sizeof(struct imsm_update_activate_spare
) * num_spares
);
6335 if (mu
->buf
== NULL
) {
6342 struct mdinfo
*n
= rv
->next
;
6351 mu
->space_list
= NULL
;
6352 mu
->len
= sizeof(struct imsm_update_activate_spare
) * num_spares
;
6353 mu
->next
= *updates
;
6354 u
= (struct imsm_update_activate_spare
*) mu
->buf
;
6356 for (di
= rv
; di
; di
= di
->next
) {
6357 u
->type
= update_activate_spare
;
6358 u
->dl
= (struct dl
*) di
->devs
;
6360 u
->slot
= di
->disk
.raid_disk
;
6371 static int disks_overlap(struct intel_super
*super
, int idx
, struct imsm_update_create_array
*u
)
6373 struct imsm_dev
*dev
= get_imsm_dev(super
, idx
);
6374 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6375 struct imsm_map
*new_map
= get_imsm_map(&u
->dev
, 0);
6376 struct disk_info
*inf
= get_disk_info(u
);
6377 struct imsm_disk
*disk
;
6381 for (i
= 0; i
< map
->num_members
; i
++) {
6382 disk
= get_imsm_disk(super
, get_imsm_disk_idx(dev
, i
, -1));
6383 for (j
= 0; j
< new_map
->num_members
; j
++)
6384 if (serialcmp(disk
->serial
, inf
[j
].serial
) == 0)
6392 static struct dl
*get_disk_super(struct intel_super
*super
, int major
, int minor
)
6394 struct dl
*dl
= NULL
;
6395 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6396 if ((dl
->major
== major
) && (dl
->minor
== minor
))
6401 static int remove_disk_super(struct intel_super
*super
, int major
, int minor
)
6403 struct dl
*prev
= NULL
;
6407 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
6408 if ((dl
->major
== major
) && (dl
->minor
== minor
)) {
6411 prev
->next
= dl
->next
;
6413 super
->disks
= dl
->next
;
6415 __free_imsm_disk(dl
);
6416 dprintf("%s: removed %x:%x\n",
6417 __func__
, major
, minor
);
6425 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
);
6427 static int add_remove_disk_update(struct intel_super
*super
)
6429 int check_degraded
= 0;
6430 struct dl
*disk
= NULL
;
6431 /* add/remove some spares to/from the metadata/contrainer */
6432 while (super
->disk_mgmt_list
) {
6433 struct dl
*disk_cfg
;
6435 disk_cfg
= super
->disk_mgmt_list
;
6436 super
->disk_mgmt_list
= disk_cfg
->next
;
6437 disk_cfg
->next
= NULL
;
6439 if (disk_cfg
->action
== DISK_ADD
) {
6440 disk_cfg
->next
= super
->disks
;
6441 super
->disks
= disk_cfg
;
6443 dprintf("%s: added %x:%x\n",
6444 __func__
, disk_cfg
->major
,
6446 } else if (disk_cfg
->action
== DISK_REMOVE
) {
6447 dprintf("Disk remove action processed: %x.%x\n",
6448 disk_cfg
->major
, disk_cfg
->minor
);
6449 disk
= get_disk_super(super
,
6453 /* store action status */
6454 disk
->action
= DISK_REMOVE
;
6455 /* remove spare disks only */
6456 if (disk
->index
== -1) {
6457 remove_disk_super(super
,
6462 /* release allocate disk structure */
6463 __free_imsm_disk(disk_cfg
);
6466 return check_degraded
;
6470 static int apply_reshape_migration_update(struct imsm_update_reshape_migration
*u
,
6471 struct intel_super
*super
,
6474 struct intel_dev
*id
;
6475 void **tofree
= NULL
;
6478 dprintf("apply_reshape_migration_update()\n");
6479 if ((u
->subdev
< 0) ||
6481 dprintf("imsm: Error: Wrong subdev: %i\n", u
->subdev
);
6484 if ((space_list
== NULL
) || (*space_list
== NULL
)) {
6485 dprintf("imsm: Error: Memory is not allocated\n");
6489 for (id
= super
->devlist
; id
; id
= id
->next
) {
6490 if (id
->index
== (unsigned)u
->subdev
) {
6491 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subdev
);
6492 struct imsm_map
*map
;
6493 struct imsm_dev
*new_dev
=
6494 (struct imsm_dev
*)*space_list
;
6495 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
6497 struct dl
*new_disk
;
6499 if (new_dev
== NULL
)
6501 *space_list
= **space_list
;
6502 memcpy(new_dev
, dev
, sizeof_imsm_dev(dev
, 0));
6503 map
= get_imsm_map(new_dev
, 0);
6505 dprintf("imsm: Error: migration in progress");
6509 to_state
= map
->map_state
;
6510 if ((u
->new_level
== 5) && (map
->raid_level
== 0)) {
6512 /* this should not happen */
6513 if (u
->new_disks
[0] < 0) {
6514 map
->failed_disk_num
=
6515 map
->num_members
- 1;
6516 to_state
= IMSM_T_STATE_DEGRADED
;
6518 to_state
= IMSM_T_STATE_NORMAL
;
6520 migrate(new_dev
, super
, to_state
, MIGR_GEN_MIGR
);
6521 if (u
->new_level
> -1)
6522 map
->raid_level
= u
->new_level
;
6523 migr_map
= get_imsm_map(new_dev
, 1);
6524 if ((u
->new_level
== 5) &&
6525 (migr_map
->raid_level
== 0)) {
6526 int ord
= map
->num_members
- 1;
6527 migr_map
->num_members
--;
6528 if (u
->new_disks
[0] < 0)
6529 ord
|= IMSM_ORD_REBUILD
;
6530 set_imsm_ord_tbl_ent(map
,
6531 map
->num_members
- 1,
6535 tofree
= (void **)dev
;
6537 /* update chunk size
6539 if (u
->new_chunksize
> 0)
6540 map
->blocks_per_strip
=
6541 __cpu_to_le16(u
->new_chunksize
* 2);
6545 if ((u
->new_level
!= 5) ||
6546 (migr_map
->raid_level
!= 0) ||
6547 (migr_map
->raid_level
== map
->raid_level
))
6550 if (u
->new_disks
[0] >= 0) {
6553 new_disk
= get_disk_super(super
,
6554 major(u
->new_disks
[0]),
6555 minor(u
->new_disks
[0]));
6556 dprintf("imsm: new disk for reshape is: %i:%i "
6557 "(%p, index = %i)\n",
6558 major(u
->new_disks
[0]),
6559 minor(u
->new_disks
[0]),
6560 new_disk
, new_disk
->index
);
6561 if (new_disk
== NULL
)
6562 goto error_disk_add
;
6564 new_disk
->index
= map
->num_members
- 1;
6565 /* slot to fill in autolayout
6567 new_disk
->raiddisk
= new_disk
->index
;
6568 new_disk
->disk
.status
|= CONFIGURED_DISK
;
6569 new_disk
->disk
.status
&= ~SPARE_DISK
;
6571 goto error_disk_add
;
6574 *tofree
= *space_list
;
6575 /* calculate new size
6577 imsm_set_array_size(new_dev
);
6584 *space_list
= tofree
;
6588 dprintf("Error: imsm: Cannot find disk.\n");
6593 static int apply_reshape_container_disks_update(struct imsm_update_reshape
*u
,
6594 struct intel_super
*super
,
6597 struct dl
*new_disk
;
6598 struct intel_dev
*id
;
6600 int delta_disks
= u
->new_raid_disks
- u
->old_raid_disks
;
6601 int disk_count
= u
->old_raid_disks
;
6602 void **tofree
= NULL
;
6603 int devices_to_reshape
= 1;
6604 struct imsm_super
*mpb
= super
->anchor
;
6606 unsigned int dev_id
;
6608 dprintf("imsm: apply_reshape_container_disks_update()\n");
6610 /* enable spares to use in array */
6611 for (i
= 0; i
< delta_disks
; i
++) {
6612 new_disk
= get_disk_super(super
,
6613 major(u
->new_disks
[i
]),
6614 minor(u
->new_disks
[i
]));
6615 dprintf("imsm: new disk for reshape is: %i:%i "
6616 "(%p, index = %i)\n",
6617 major(u
->new_disks
[i
]), minor(u
->new_disks
[i
]),
6618 new_disk
, new_disk
->index
);
6619 if ((new_disk
== NULL
) ||
6620 ((new_disk
->index
>= 0) &&
6621 (new_disk
->index
< u
->old_raid_disks
)))
6622 goto update_reshape_exit
;
6623 new_disk
->index
= disk_count
++;
6624 /* slot to fill in autolayout
6626 new_disk
->raiddisk
= new_disk
->index
;
6627 new_disk
->disk
.status
|=
6629 new_disk
->disk
.status
&= ~SPARE_DISK
;
6632 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6633 mpb
->num_raid_devs
);
6634 /* manage changes in volume
6636 for (dev_id
= 0; dev_id
< mpb
->num_raid_devs
; dev_id
++) {
6637 void **sp
= *space_list
;
6638 struct imsm_dev
*newdev
;
6639 struct imsm_map
*newmap
, *oldmap
;
6641 for (id
= super
->devlist
; id
; id
= id
->next
) {
6642 if (id
->index
== dev_id
)
6651 /* Copy the dev, but not (all of) the map */
6652 memcpy(newdev
, id
->dev
, sizeof(*newdev
));
6653 oldmap
= get_imsm_map(id
->dev
, 0);
6654 newmap
= get_imsm_map(newdev
, 0);
6655 /* Copy the current map */
6656 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6657 /* update one device only
6659 if (devices_to_reshape
) {
6660 dprintf("imsm: modifying subdev: %i\n",
6662 devices_to_reshape
--;
6663 newdev
->vol
.migr_state
= 1;
6664 newdev
->vol
.curr_migr_unit
= 0;
6665 newdev
->vol
.migr_type
= MIGR_GEN_MIGR
;
6666 newmap
->num_members
= u
->new_raid_disks
;
6667 for (i
= 0; i
< delta_disks
; i
++) {
6668 set_imsm_ord_tbl_ent(newmap
,
6669 u
->old_raid_disks
+ i
,
6670 u
->old_raid_disks
+ i
);
6672 /* New map is correct, now need to save old map
6674 newmap
= get_imsm_map(newdev
, 1);
6675 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6677 imsm_set_array_size(newdev
);
6680 sp
= (void **)id
->dev
;
6685 /* Clear migration record */
6686 memset(super
->migr_rec
, 0, sizeof(struct migr_record
));
6689 *space_list
= tofree
;
6692 update_reshape_exit
:
6697 static int apply_takeover_update(struct imsm_update_takeover
*u
,
6698 struct intel_super
*super
,
6701 struct imsm_dev
*dev
= NULL
;
6702 struct intel_dev
*dv
;
6703 struct imsm_dev
*dev_new
;
6704 struct imsm_map
*map
;
6708 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
6709 if (dv
->index
== (unsigned int)u
->subarray
) {
6717 map
= get_imsm_map(dev
, 0);
6719 if (u
->direction
== R10_TO_R0
) {
6720 /* Number of failed disks must be half of initial disk number */
6721 if (imsm_count_failed(super
, dev
) != (map
->num_members
/ 2))
6724 /* iterate through devices to mark removed disks as spare */
6725 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6726 if (dm
->disk
.status
& FAILED_DISK
) {
6727 int idx
= dm
->index
;
6728 /* update indexes on the disk list */
6729 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6730 the index values will end up being correct.... NB */
6731 for (du
= super
->disks
; du
; du
= du
->next
)
6732 if (du
->index
> idx
)
6734 /* mark as spare disk */
6735 dm
->disk
.status
= SPARE_DISK
;
6740 map
->num_members
= map
->num_members
/ 2;
6741 map
->map_state
= IMSM_T_STATE_NORMAL
;
6742 map
->num_domains
= 1;
6743 map
->raid_level
= 0;
6744 map
->failed_disk_num
= -1;
6747 if (u
->direction
== R0_TO_R10
) {
6749 /* update slots in current disk list */
6750 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6754 /* create new *missing* disks */
6755 for (i
= 0; i
< map
->num_members
; i
++) {
6756 space
= *space_list
;
6759 *space_list
= *space
;
6761 memcpy(du
, super
->disks
, sizeof(*du
));
6765 du
->index
= (i
* 2) + 1;
6766 sprintf((char *)du
->disk
.serial
,
6767 " MISSING_%d", du
->index
);
6768 sprintf((char *)du
->serial
,
6769 "MISSING_%d", du
->index
);
6770 du
->next
= super
->missing
;
6771 super
->missing
= du
;
6773 /* create new dev and map */
6774 space
= *space_list
;
6777 *space_list
= *space
;
6778 dev_new
= (void *)space
;
6779 memcpy(dev_new
, dev
, sizeof(*dev
));
6780 /* update new map */
6781 map
= get_imsm_map(dev_new
, 0);
6782 map
->num_members
= map
->num_members
* 2;
6783 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6784 map
->num_domains
= 2;
6785 map
->raid_level
= 1;
6786 /* replace dev<->dev_new */
6789 /* update disk order table */
6790 for (du
= super
->disks
; du
; du
= du
->next
)
6792 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6793 for (du
= super
->missing
; du
; du
= du
->next
)
6794 if (du
->index
>= 0) {
6795 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6796 mark_missing(dev_new
, &du
->disk
, du
->index
);
6802 static void imsm_process_update(struct supertype
*st
,
6803 struct metadata_update
*update
)
6806 * crack open the metadata_update envelope to find the update record
6807 * update can be one of:
6808 * update_reshape_container_disks - all the arrays in the container
6809 * are being reshaped to have more devices. We need to mark
6810 * the arrays for general migration and convert selected spares
6811 * into active devices.
6812 * update_activate_spare - a spare device has replaced a failed
6813 * device in an array, update the disk_ord_tbl. If this disk is
6814 * present in all member arrays then also clear the SPARE_DISK
6816 * update_create_array
6818 * update_rename_array
6819 * update_add_remove_disk
6821 struct intel_super
*super
= st
->sb
;
6822 struct imsm_super
*mpb
;
6823 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6825 /* update requires a larger buf but the allocation failed */
6826 if (super
->next_len
&& !super
->next_buf
) {
6827 super
->next_len
= 0;
6831 if (super
->next_buf
) {
6832 memcpy(super
->next_buf
, super
->buf
, super
->len
);
6834 super
->len
= super
->next_len
;
6835 super
->buf
= super
->next_buf
;
6837 super
->next_len
= 0;
6838 super
->next_buf
= NULL
;
6841 mpb
= super
->anchor
;
6844 case update_takeover
: {
6845 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6846 if (apply_takeover_update(u
, super
, &update
->space_list
)) {
6847 imsm_update_version_info(super
);
6848 super
->updates_pending
++;
6853 case update_reshape_container_disks
: {
6854 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6855 if (apply_reshape_container_disks_update(
6856 u
, super
, &update
->space_list
))
6857 super
->updates_pending
++;
6860 case update_reshape_migration
: {
6861 struct imsm_update_reshape_migration
*u
= (void *)update
->buf
;
6862 if (apply_reshape_migration_update(
6863 u
, super
, &update
->space_list
))
6864 super
->updates_pending
++;
6867 case update_activate_spare
: {
6868 struct imsm_update_activate_spare
*u
= (void *) update
->buf
;
6869 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->array
);
6870 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6871 struct imsm_map
*migr_map
;
6872 struct active_array
*a
;
6873 struct imsm_disk
*disk
;
6878 int victim
= get_imsm_disk_idx(dev
, u
->slot
, -1);
6881 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6886 fprintf(stderr
, "error: imsm_activate_spare passed "
6887 "an unknown disk (index: %d)\n",
6892 super
->updates_pending
++;
6893 /* count failures (excluding rebuilds and the victim)
6894 * to determine map[0] state
6897 for (i
= 0; i
< map
->num_members
; i
++) {
6900 disk
= get_imsm_disk(super
,
6901 get_imsm_disk_idx(dev
, i
, -1));
6902 if (!disk
|| is_failed(disk
))
6906 /* adding a pristine spare, assign a new index */
6907 if (dl
->index
< 0) {
6908 dl
->index
= super
->anchor
->num_disks
;
6909 super
->anchor
->num_disks
++;
6912 disk
->status
|= CONFIGURED_DISK
;
6913 disk
->status
&= ~SPARE_DISK
;
6916 to_state
= imsm_check_degraded(super
, dev
, failed
);
6917 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6918 migrate(dev
, super
, to_state
, MIGR_REBUILD
);
6919 migr_map
= get_imsm_map(dev
, 1);
6920 set_imsm_ord_tbl_ent(map
, u
->slot
, dl
->index
);
6921 set_imsm_ord_tbl_ent(migr_map
, u
->slot
, dl
->index
| IMSM_ORD_REBUILD
);
6923 /* update the family_num to mark a new container
6924 * generation, being careful to record the existing
6925 * family_num in orig_family_num to clean up after
6926 * earlier mdadm versions that neglected to set it.
6928 if (mpb
->orig_family_num
== 0)
6929 mpb
->orig_family_num
= mpb
->family_num
;
6930 mpb
->family_num
+= super
->random
;
6932 /* count arrays using the victim in the metadata */
6934 for (a
= st
->arrays
; a
; a
= a
->next
) {
6935 dev
= get_imsm_dev(super
, a
->info
.container_member
);
6936 map
= get_imsm_map(dev
, 0);
6938 if (get_imsm_disk_slot(map
, victim
) >= 0)
6942 /* delete the victim if it is no longer being
6948 /* We know that 'manager' isn't touching anything,
6949 * so it is safe to delete
6951 for (dlp
= &super
->disks
; *dlp
; dlp
= &(*dlp
)->next
)
6952 if ((*dlp
)->index
== victim
)
6955 /* victim may be on the missing list */
6957 for (dlp
= &super
->missing
; *dlp
; dlp
= &(*dlp
)->next
)
6958 if ((*dlp
)->index
== victim
)
6960 imsm_delete(super
, dlp
, victim
);
6964 case update_create_array
: {
6965 /* someone wants to create a new array, we need to be aware of
6966 * a few races/collisions:
6967 * 1/ 'Create' called by two separate instances of mdadm
6968 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6969 * devices that have since been assimilated via
6971 * In the event this update can not be carried out mdadm will
6972 * (FIX ME) notice that its update did not take hold.
6974 struct imsm_update_create_array
*u
= (void *) update
->buf
;
6975 struct intel_dev
*dv
;
6976 struct imsm_dev
*dev
;
6977 struct imsm_map
*map
, *new_map
;
6978 unsigned long long start
, end
;
6979 unsigned long long new_start
, new_end
;
6981 struct disk_info
*inf
;
6984 /* handle racing creates: first come first serve */
6985 if (u
->dev_idx
< mpb
->num_raid_devs
) {
6986 dprintf("%s: subarray %d already defined\n",
6987 __func__
, u
->dev_idx
);
6991 /* check update is next in sequence */
6992 if (u
->dev_idx
!= mpb
->num_raid_devs
) {
6993 dprintf("%s: can not create array %d expected index %d\n",
6994 __func__
, u
->dev_idx
, mpb
->num_raid_devs
);
6998 new_map
= get_imsm_map(&u
->dev
, 0);
6999 new_start
= __le32_to_cpu(new_map
->pba_of_lba0
);
7000 new_end
= new_start
+ __le32_to_cpu(new_map
->blocks_per_member
);
7001 inf
= get_disk_info(u
);
7003 /* handle activate_spare versus create race:
7004 * check to make sure that overlapping arrays do not include
7007 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
7008 dev
= get_imsm_dev(super
, i
);
7009 map
= get_imsm_map(dev
, 0);
7010 start
= __le32_to_cpu(map
->pba_of_lba0
);
7011 end
= start
+ __le32_to_cpu(map
->blocks_per_member
);
7012 if ((new_start
>= start
&& new_start
<= end
) ||
7013 (start
>= new_start
&& start
<= new_end
))
7018 if (disks_overlap(super
, i
, u
)) {
7019 dprintf("%s: arrays overlap\n", __func__
);
7024 /* check that prepare update was successful */
7025 if (!update
->space
) {
7026 dprintf("%s: prepare update failed\n", __func__
);
7030 /* check that all disks are still active before committing
7031 * changes. FIXME: could we instead handle this by creating a
7032 * degraded array? That's probably not what the user expects,
7033 * so better to drop this update on the floor.
7035 for (i
= 0; i
< new_map
->num_members
; i
++) {
7036 dl
= serial_to_dl(inf
[i
].serial
, super
);
7038 dprintf("%s: disk disappeared\n", __func__
);
7043 super
->updates_pending
++;
7045 /* convert spares to members and fixup ord_tbl */
7046 for (i
= 0; i
< new_map
->num_members
; i
++) {
7047 dl
= serial_to_dl(inf
[i
].serial
, super
);
7048 if (dl
->index
== -1) {
7049 dl
->index
= mpb
->num_disks
;
7051 dl
->disk
.status
|= CONFIGURED_DISK
;
7052 dl
->disk
.status
&= ~SPARE_DISK
;
7054 set_imsm_ord_tbl_ent(new_map
, i
, dl
->index
);
7059 update
->space
= NULL
;
7060 imsm_copy_dev(dev
, &u
->dev
);
7061 dv
->index
= u
->dev_idx
;
7062 dv
->next
= super
->devlist
;
7063 super
->devlist
= dv
;
7064 mpb
->num_raid_devs
++;
7066 imsm_update_version_info(super
);
7069 /* mdmon knows how to release update->space, but not
7070 * ((struct intel_dev *) update->space)->dev
7072 if (update
->space
) {
7078 case update_kill_array
: {
7079 struct imsm_update_kill_array
*u
= (void *) update
->buf
;
7080 int victim
= u
->dev_idx
;
7081 struct active_array
*a
;
7082 struct intel_dev
**dp
;
7083 struct imsm_dev
*dev
;
7085 /* sanity check that we are not affecting the uuid of
7086 * active arrays, or deleting an active array
7088 * FIXME when immutable ids are available, but note that
7089 * we'll also need to fixup the invalidated/active
7090 * subarray indexes in mdstat
7092 for (a
= st
->arrays
; a
; a
= a
->next
)
7093 if (a
->info
.container_member
>= victim
)
7095 /* by definition if mdmon is running at least one array
7096 * is active in the container, so checking
7097 * mpb->num_raid_devs is just extra paranoia
7099 dev
= get_imsm_dev(super
, victim
);
7100 if (a
|| !dev
|| mpb
->num_raid_devs
== 1) {
7101 dprintf("failed to delete subarray-%d\n", victim
);
7105 for (dp
= &super
->devlist
; *dp
;)
7106 if ((*dp
)->index
== (unsigned)super
->current_vol
) {
7109 if ((*dp
)->index
> (unsigned)victim
)
7113 mpb
->num_raid_devs
--;
7114 super
->updates_pending
++;
7117 case update_rename_array
: {
7118 struct imsm_update_rename_array
*u
= (void *) update
->buf
;
7119 char name
[MAX_RAID_SERIAL_LEN
+1];
7120 int target
= u
->dev_idx
;
7121 struct active_array
*a
;
7122 struct imsm_dev
*dev
;
7124 /* sanity check that we are not affecting the uuid of
7127 snprintf(name
, MAX_RAID_SERIAL_LEN
, "%s", (char *) u
->name
);
7128 name
[MAX_RAID_SERIAL_LEN
] = '\0';
7129 for (a
= st
->arrays
; a
; a
= a
->next
)
7130 if (a
->info
.container_member
== target
)
7132 dev
= get_imsm_dev(super
, u
->dev_idx
);
7133 if (a
|| !dev
|| !check_name(super
, name
, 1)) {
7134 dprintf("failed to rename subarray-%d\n", target
);
7138 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
7139 super
->updates_pending
++;
7142 case update_add_remove_disk
: {
7143 /* we may be able to repair some arrays if disks are
7144 * being added, check teh status of add_remove_disk
7145 * if discs has been added.
7147 if (add_remove_disk_update(super
)) {
7148 struct active_array
*a
;
7150 super
->updates_pending
++;
7151 for (a
= st
->arrays
; a
; a
= a
->next
)
7152 a
->check_degraded
= 1;
7157 fprintf(stderr
, "error: unsuported process update type:"
7158 "(type: %d)\n", type
);
7162 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
);
7164 static void imsm_prepare_update(struct supertype
*st
,
7165 struct metadata_update
*update
)
7168 * Allocate space to hold new disk entries, raid-device entries or a new
7169 * mpb if necessary. The manager synchronously waits for updates to
7170 * complete in the monitor, so new mpb buffers allocated here can be
7171 * integrated by the monitor thread without worrying about live pointers
7172 * in the manager thread.
7174 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
7175 struct intel_super
*super
= st
->sb
;
7176 struct imsm_super
*mpb
= super
->anchor
;
7181 case update_takeover
: {
7182 struct imsm_update_takeover
*u
= (void *)update
->buf
;
7183 if (u
->direction
== R0_TO_R10
) {
7184 void **tail
= (void **)&update
->space_list
;
7185 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subarray
);
7186 struct imsm_map
*map
= get_imsm_map(dev
, 0);
7187 int num_members
= map
->num_members
;
7191 /* allocate memory for added disks */
7192 for (i
= 0; i
< num_members
; i
++) {
7193 size
= sizeof(struct dl
);
7194 space
= malloc(size
);
7203 /* allocate memory for new device */
7204 size
= sizeof_imsm_dev(super
->devlist
->dev
, 0) +
7205 (num_members
* sizeof(__u32
));
7206 space
= malloc(size
);
7215 len
= disks_to_mpb_size(num_members
* 2);
7217 /* if allocation didn't success, free buffer */
7218 while (update
->space_list
) {
7219 void **sp
= update
->space_list
;
7220 update
->space_list
= *sp
;
7228 case update_reshape_container_disks
: {
7229 /* Every raid device in the container is about to
7230 * gain some more devices, and we will enter a
7232 * So each 'imsm_map' will be bigger, and the imsm_vol
7233 * will now hold 2 of them.
7234 * Thus we need new 'struct imsm_dev' allocations sized
7235 * as sizeof_imsm_dev but with more devices in both maps.
7237 struct imsm_update_reshape
*u
= (void *)update
->buf
;
7238 struct intel_dev
*dl
;
7239 void **space_tail
= (void**)&update
->space_list
;
7241 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7243 for (dl
= super
->devlist
; dl
; dl
= dl
->next
) {
7244 int size
= sizeof_imsm_dev(dl
->dev
, 1);
7246 if (u
->new_raid_disks
> u
->old_raid_disks
)
7247 size
+= sizeof(__u32
)*2*
7248 (u
->new_raid_disks
- u
->old_raid_disks
);
7257 len
= disks_to_mpb_size(u
->new_raid_disks
);
7258 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
7261 case update_reshape_migration
: {
7262 /* for migration level 0->5 we need to add disks
7263 * so the same as for container operation we will copy
7264 * device to the bigger location.
7265 * in memory prepared device and new disk area are prepared
7266 * for usage in process update
7268 struct imsm_update_reshape_migration
*u
= (void *)update
->buf
;
7269 struct intel_dev
*id
;
7270 void **space_tail
= (void **)&update
->space_list
;
7273 int current_level
= -1;
7275 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7277 /* add space for bigger array in update
7279 for (id
= super
->devlist
; id
; id
= id
->next
) {
7280 if (id
->index
== (unsigned)u
->subdev
) {
7281 size
= sizeof_imsm_dev(id
->dev
, 1);
7282 if (u
->new_raid_disks
> u
->old_raid_disks
)
7283 size
+= sizeof(__u32
)*2*
7284 (u
->new_raid_disks
- u
->old_raid_disks
);
7294 if (update
->space_list
== NULL
)
7297 /* add space for disk in update
7299 size
= sizeof(struct dl
);
7302 free(update
->space_list
);
7303 update
->space_list
= NULL
;
7310 /* add spare device to update
7312 for (id
= super
->devlist
; id
; id
= id
->next
)
7313 if (id
->index
== (unsigned)u
->subdev
) {
7314 struct imsm_dev
*dev
;
7315 struct imsm_map
*map
;
7317 dev
= get_imsm_dev(super
, u
->subdev
);
7318 map
= get_imsm_map(dev
, 0);
7319 current_level
= map
->raid_level
;
7322 if ((u
->new_level
== 5) && (u
->new_level
!= current_level
)) {
7323 struct mdinfo
*spares
;
7325 spares
= get_spares_for_grow(st
);
7333 makedev(dev
->disk
.major
,
7335 dl
= get_disk_super(super
,
7338 dl
->index
= u
->old_raid_disks
;
7344 len
= disks_to_mpb_size(u
->new_raid_disks
);
7345 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
7348 case update_create_array
: {
7349 struct imsm_update_create_array
*u
= (void *) update
->buf
;
7350 struct intel_dev
*dv
;
7351 struct imsm_dev
*dev
= &u
->dev
;
7352 struct imsm_map
*map
= get_imsm_map(dev
, 0);
7354 struct disk_info
*inf
;
7358 inf
= get_disk_info(u
);
7359 len
= sizeof_imsm_dev(dev
, 1);
7360 /* allocate a new super->devlist entry */
7361 dv
= malloc(sizeof(*dv
));
7363 dv
->dev
= malloc(len
);
7368 update
->space
= NULL
;
7372 /* count how many spares will be converted to members */
7373 for (i
= 0; i
< map
->num_members
; i
++) {
7374 dl
= serial_to_dl(inf
[i
].serial
, super
);
7376 /* hmm maybe it failed?, nothing we can do about
7381 if (count_memberships(dl
, super
) == 0)
7384 len
+= activate
* sizeof(struct imsm_disk
);
7391 /* check if we need a larger metadata buffer */
7392 if (super
->next_buf
)
7393 buf_len
= super
->next_len
;
7395 buf_len
= super
->len
;
7397 if (__le32_to_cpu(mpb
->mpb_size
) + len
> buf_len
) {
7398 /* ok we need a larger buf than what is currently allocated
7399 * if this allocation fails process_update will notice that
7400 * ->next_len is set and ->next_buf is NULL
7402 buf_len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + len
, 512);
7403 if (super
->next_buf
)
7404 free(super
->next_buf
);
7406 super
->next_len
= buf_len
;
7407 if (posix_memalign(&super
->next_buf
, 512, buf_len
) == 0)
7408 memset(super
->next_buf
, 0, buf_len
);
7410 super
->next_buf
= NULL
;
7414 /* must be called while manager is quiesced */
7415 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
)
7417 struct imsm_super
*mpb
= super
->anchor
;
7419 struct imsm_dev
*dev
;
7420 struct imsm_map
*map
;
7421 int i
, j
, num_members
;
7424 dprintf("%s: deleting device[%d] from imsm_super\n",
7427 /* shift all indexes down one */
7428 for (iter
= super
->disks
; iter
; iter
= iter
->next
)
7429 if (iter
->index
> (int)index
)
7431 for (iter
= super
->missing
; iter
; iter
= iter
->next
)
7432 if (iter
->index
> (int)index
)
7435 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
7436 dev
= get_imsm_dev(super
, i
);
7437 map
= get_imsm_map(dev
, 0);
7438 num_members
= map
->num_members
;
7439 for (j
= 0; j
< num_members
; j
++) {
7440 /* update ord entries being careful not to propagate
7441 * ord-flags to the first map
7443 ord
= get_imsm_ord_tbl_ent(dev
, j
, -1);
7445 if (ord_to_idx(ord
) <= index
)
7448 map
= get_imsm_map(dev
, 0);
7449 set_imsm_ord_tbl_ent(map
, j
, ord_to_idx(ord
- 1));
7450 map
= get_imsm_map(dev
, 1);
7452 set_imsm_ord_tbl_ent(map
, j
, ord
- 1);
7457 super
->updates_pending
++;
7459 struct dl
*dl
= *dlp
;
7461 *dlp
= (*dlp
)->next
;
7462 __free_imsm_disk(dl
);
7466 /*******************************************************************************
7467 * Function: open_backup_targets
7468 * Description: Function opens file descriptors for all devices given in
7471 * info : general array info
7472 * raid_disks : number of disks
7473 * raid_fds : table of device's file descriptors
7477 ******************************************************************************/
7478 int open_backup_targets(struct mdinfo
*info
, int raid_disks
, int *raid_fds
)
7482 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
7485 if (sd
->disk
.state
& (1<<MD_DISK_FAULTY
)) {
7486 dprintf("disk is faulty!!\n");
7490 if ((sd
->disk
.raid_disk
>= raid_disks
) ||
7491 (sd
->disk
.raid_disk
< 0))
7494 dn
= map_dev(sd
->disk
.major
,
7496 raid_fds
[sd
->disk
.raid_disk
] = dev_open(dn
, O_RDWR
);
7497 if (raid_fds
[sd
->disk
.raid_disk
] < 0) {
7498 fprintf(stderr
, "cannot open component\n");
7505 /*******************************************************************************
7506 * Function: init_migr_record_imsm
7507 * Description: Function inits imsm migration record
7509 * super : imsm internal array info
7510 * dev : device under migration
7511 * info : general array info to find the smallest device
7514 ******************************************************************************/
7515 void init_migr_record_imsm(struct supertype
*st
, struct imsm_dev
*dev
,
7516 struct mdinfo
*info
)
7518 struct intel_super
*super
= st
->sb
;
7519 struct migr_record
*migr_rec
= super
->migr_rec
;
7521 unsigned long long dsize
, dev_sectors
;
7522 long long unsigned min_dev_sectors
= -1LLU;
7526 struct imsm_map
*map_dest
= get_imsm_map(dev
, 0);
7527 struct imsm_map
*map_src
= get_imsm_map(dev
, 1);
7528 unsigned long long num_migr_units
;
7530 unsigned long long array_blocks
=
7531 (((unsigned long long)__le32_to_cpu(dev
->size_high
)) << 32) +
7532 __le32_to_cpu(dev
->size_low
);
7534 memset(migr_rec
, 0, sizeof(struct migr_record
));
7535 migr_rec
->family_num
= __cpu_to_le32(super
->anchor
->family_num
);
7537 /* only ascending reshape supported now */
7538 migr_rec
->ascending_migr
= __cpu_to_le32(1);
7540 migr_rec
->dest_depth_per_unit
= GEN_MIGR_AREA_SIZE
/
7541 max(map_dest
->blocks_per_strip
, map_src
->blocks_per_strip
);
7542 migr_rec
->dest_depth_per_unit
*= map_dest
->blocks_per_strip
;
7543 new_data_disks
= imsm_num_data_members(dev
, 0);
7544 migr_rec
->blocks_per_unit
=
7545 __cpu_to_le32(migr_rec
->dest_depth_per_unit
* new_data_disks
);
7546 migr_rec
->dest_depth_per_unit
=
7547 __cpu_to_le32(migr_rec
->dest_depth_per_unit
);
7550 array_blocks
/ __le32_to_cpu(migr_rec
->blocks_per_unit
);
7552 if (array_blocks
% __le32_to_cpu(migr_rec
->blocks_per_unit
))
7554 migr_rec
->num_migr_units
= __cpu_to_le32(num_migr_units
);
7556 migr_rec
->post_migr_vol_cap
= dev
->size_low
;
7557 migr_rec
->post_migr_vol_cap_hi
= dev
->size_high
;
7560 /* Find the smallest dev */
7561 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
7562 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
7563 fd
= dev_open(nm
, O_RDONLY
);
7566 get_dev_size(fd
, NULL
, &dsize
);
7567 dev_sectors
= dsize
/ 512;
7568 if (dev_sectors
< min_dev_sectors
)
7569 min_dev_sectors
= dev_sectors
;
7572 migr_rec
->ckpt_area_pba
= __cpu_to_le32(min_dev_sectors
-
7573 RAID_DISK_RESERVED_BLOCKS_IMSM_HI
);
7575 write_imsm_migr_rec(st
);
7580 /*******************************************************************************
7581 * Function: save_backup_imsm
7582 * Description: Function saves critical data stripes to Migration Copy Area
7583 * and updates the current migration unit status.
7584 * Use restore_stripes() to form a destination stripe,
7585 * and to write it to the Copy Area.
7587 * st : supertype information
7588 * info : general array info
7589 * buf : input buffer
7590 * write_offset : address of data to backup
7591 * length : length of data to backup (blocks_per_unit)
7595 ******************************************************************************/
7596 int save_backup_imsm(struct supertype
*st
,
7597 struct imsm_dev
*dev
,
7598 struct mdinfo
*info
,
7604 struct intel_super
*super
= st
->sb
;
7605 unsigned long long *target_offsets
= NULL
;
7606 int *targets
= NULL
;
7608 struct imsm_map
*map_dest
= get_imsm_map(dev
, 0);
7609 int new_disks
= map_dest
->num_members
;
7611 targets
= malloc(new_disks
* sizeof(int));
7615 target_offsets
= malloc(new_disks
* sizeof(unsigned long long));
7616 if (!target_offsets
)
7619 for (i
= 0; i
< new_disks
; i
++) {
7621 target_offsets
[i
] = (unsigned long long)
7622 __le32_to_cpu(super
->migr_rec
->ckpt_area_pba
) * 512;
7625 if (open_backup_targets(info
, new_disks
, targets
))
7628 if (restore_stripes(targets
, /* list of dest devices */
7629 target_offsets
, /* migration record offsets */
7634 -1, /* source backup file descriptor */
7635 0, /* input buf offset
7636 * always 0 buf is already offset */
7640 fprintf(stderr
, Name
": Error restoring stripes\n");
7648 for (i
= 0; i
< new_disks
; i
++)
7649 if (targets
[i
] >= 0)
7653 free(target_offsets
);
7658 /*******************************************************************************
7659 * Function: save_checkpoint_imsm
7660 * Description: Function called for current unit status update
7661 * in the migration record. It writes it to disk.
7663 * super : imsm internal array info
7664 * info : general array info
7668 ******************************************************************************/
7669 int save_checkpoint_imsm(struct supertype
*st
, struct mdinfo
*info
, int state
)
7671 struct intel_super
*super
= st
->sb
;
7672 load_imsm_migr_rec(super
, info
);
7673 if (__le32_to_cpu(super
->migr_rec
->blocks_per_unit
) == 0) {
7674 dprintf("ERROR: blocks_per_unit = 0!!!\n");
7678 super
->migr_rec
->curr_migr_unit
=
7679 __cpu_to_le32(info
->reshape_progress
/
7680 __le32_to_cpu(super
->migr_rec
->blocks_per_unit
));
7681 super
->migr_rec
->rec_status
= __cpu_to_le32(state
);
7682 super
->migr_rec
->dest_1st_member_lba
=
7683 __cpu_to_le32((__le32_to_cpu(super
->migr_rec
->curr_migr_unit
))
7684 * __le32_to_cpu(super
->migr_rec
->dest_depth_per_unit
));
7685 if (write_imsm_migr_rec(st
) < 0) {
7686 dprintf("imsm: Cannot write migration record "
7687 "outside backup area\n");
7694 static __u64
blocks_per_migr_unit(struct intel_super
*super
,
7695 struct imsm_dev
*dev
);
7697 /*******************************************************************************
7698 * Function: recover_backup_imsm
7699 * Description: Function recovers critical data from the Migration Copy Area
7700 * while assembling an array.
7702 * super : imsm internal array info
7703 * info : general array info
7705 * 0 : success (or there is no data to recover)
7707 ******************************************************************************/
7708 int recover_backup_imsm(struct supertype
*st
, struct mdinfo
*info
)
7710 struct intel_super
*super
= st
->sb
;
7711 struct migr_record
*migr_rec
= super
->migr_rec
;
7712 struct imsm_map
*map_dest
= NULL
;
7713 struct intel_dev
*id
= NULL
;
7714 unsigned long long read_offset
;
7715 unsigned long long write_offset
;
7717 int *targets
= NULL
;
7718 int new_disks
, i
, err
;
7721 unsigned long curr_migr_unit
= __le32_to_cpu(migr_rec
->curr_migr_unit
);
7722 unsigned long num_migr_units
= __le32_to_cpu(migr_rec
->num_migr_units
);
7723 int ascending
= __le32_to_cpu(migr_rec
->ascending_migr
);
7726 err
= sysfs_get_str(info
, NULL
, "array_state", (char *)buffer
, 20);
7730 /* recover data only during assemblation */
7731 if (strncmp(buffer
, "inactive", 8) != 0)
7733 /* no data to recover */
7734 if (__le32_to_cpu(migr_rec
->rec_status
) == UNIT_SRC_NORMAL
)
7736 if (curr_migr_unit
>= num_migr_units
)
7739 /* find device during reshape */
7740 for (id
= super
->devlist
; id
; id
= id
->next
)
7741 if (is_gen_migration(id
->dev
))
7746 map_dest
= get_imsm_map(id
->dev
, 0);
7747 new_disks
= map_dest
->num_members
;
7749 read_offset
= (unsigned long long)
7750 __le32_to_cpu(migr_rec
->ckpt_area_pba
) * 512;
7752 write_offset
= ((unsigned long long)
7753 __le32_to_cpu(migr_rec
->dest_1st_member_lba
) +
7754 info
->data_offset
) * 512;
7756 unit_len
= __le32_to_cpu(migr_rec
->dest_depth_per_unit
) * 512;
7757 if (posix_memalign((void **)&buf
, 512, unit_len
) != 0)
7759 targets
= malloc(new_disks
* sizeof(int));
7763 open_backup_targets(info
, new_disks
, targets
);
7765 for (i
= 0; i
< new_disks
; i
++) {
7766 if (lseek64(targets
[i
], read_offset
, SEEK_SET
) < 0) {
7768 Name
": Cannot seek to block: %s\n",
7772 if (read(targets
[i
], buf
, unit_len
) != unit_len
) {
7774 Name
": Cannot read copy area block: %s\n",
7778 if (lseek64(targets
[i
], write_offset
, SEEK_SET
) < 0) {
7780 Name
": Cannot seek to block: %s\n",
7784 if (write(targets
[i
], buf
, unit_len
) != unit_len
) {
7786 Name
": Cannot restore block: %s\n",
7792 if (ascending
&& curr_migr_unit
< (num_migr_units
-1))
7795 migr_rec
->curr_migr_unit
= __le32_to_cpu(curr_migr_unit
);
7796 super
->migr_rec
->rec_status
= __cpu_to_le32(UNIT_SRC_NORMAL
);
7797 if (write_imsm_migr_rec(st
) == 0) {
7798 __u64 blocks_per_unit
= blocks_per_migr_unit(super
, id
->dev
);
7799 info
->reshape_progress
= curr_migr_unit
* blocks_per_unit
;
7805 for (i
= 0; i
< new_disks
; i
++)
7814 static char disk_by_path
[] = "/dev/disk/by-path/";
7816 static const char *imsm_get_disk_controller_domain(const char *path
)
7818 char disk_path
[PATH_MAX
];
7822 strncpy(disk_path
, disk_by_path
, PATH_MAX
- 1);
7823 strncat(disk_path
, path
, PATH_MAX
- strlen(disk_path
) - 1);
7824 if (stat(disk_path
, &st
) == 0) {
7825 struct sys_dev
* hba
;
7828 path
= devt_to_devpath(st
.st_rdev
);
7831 hba
= find_disk_attached_hba(-1, path
);
7832 if (hba
&& hba
->type
== SYS_DEV_SAS
)
7834 else if (hba
&& hba
->type
== SYS_DEV_SATA
)
7838 dprintf("path: %s hba: %s attached: %s\n",
7839 path
, (hba
) ? hba
->path
: "NULL", drv
);
7847 static int imsm_find_array_minor_by_subdev(int subdev
, int container
, int *minor
)
7849 char subdev_name
[20];
7850 struct mdstat_ent
*mdstat
;
7852 sprintf(subdev_name
, "%d", subdev
);
7853 mdstat
= mdstat_by_subdev(subdev_name
, container
);
7857 *minor
= mdstat
->devnum
;
7858 free_mdstat(mdstat
);
7862 static int imsm_reshape_is_allowed_on_container(struct supertype
*st
,
7863 struct geo_params
*geo
,
7864 int *old_raid_disks
)
7866 /* currently we only support increasing the number of devices
7867 * for a container. This increases the number of device for each
7868 * member array. They must all be RAID0 or RAID5.
7871 struct mdinfo
*info
, *member
;
7872 int devices_that_can_grow
= 0;
7874 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7875 "st->devnum = (%i)\n",
7878 if (geo
->size
!= -1 ||
7879 geo
->level
!= UnSet
||
7880 geo
->layout
!= UnSet
||
7881 geo
->chunksize
!= 0 ||
7882 geo
->raid_disks
== UnSet
) {
7883 dprintf("imsm: Container operation is allowed for "
7884 "raid disks number change only.\n");
7888 info
= container_content_imsm(st
, NULL
);
7889 for (member
= info
; member
; member
= member
->next
) {
7893 dprintf("imsm: checking device_num: %i\n",
7894 member
->container_member
);
7896 if (geo
->raid_disks
<= member
->array
.raid_disks
) {
7897 /* we work on container for Online Capacity Expansion
7898 * only so raid_disks has to grow
7900 dprintf("imsm: for container operation raid disks "
7901 "increase is required\n");
7905 if ((info
->array
.level
!= 0) &&
7906 (info
->array
.level
!= 5)) {
7907 /* we cannot use this container with other raid level
7909 dprintf("imsm: for container operation wrong"
7910 " raid level (%i) detected\n",
7914 /* check for platform support
7915 * for this raid level configuration
7917 struct intel_super
*super
= st
->sb
;
7918 if (!is_raid_level_supported(super
->orom
,
7919 member
->array
.level
,
7921 dprintf("platform does not support raid%d with"
7925 geo
->raid_disks
> 1 ? "s" : "");
7928 /* check if component size is aligned to chunk size
7930 if (info
->component_size
%
7931 (info
->array
.chunk_size
/512)) {
7932 dprintf("Component size is not aligned to "
7938 if (*old_raid_disks
&&
7939 info
->array
.raid_disks
!= *old_raid_disks
)
7941 *old_raid_disks
= info
->array
.raid_disks
;
7943 /* All raid5 and raid0 volumes in container
7944 * have to be ready for Online Capacity Expansion
7945 * so they need to be assembled. We have already
7946 * checked that no recovery etc is happening.
7948 result
= imsm_find_array_minor_by_subdev(member
->container_member
,
7952 dprintf("imsm: cannot find array\n");
7955 devices_that_can_grow
++;
7958 if (!member
&& devices_that_can_grow
)
7962 dprintf("\tContainer operation allowed\n");
7964 dprintf("\tError: %i\n", ret_val
);
7969 /* Function: get_spares_for_grow
7970 * Description: Allocates memory and creates list of spare devices
7971 * avaliable in container. Checks if spare drive size is acceptable.
7972 * Parameters: Pointer to the supertype structure
7973 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7976 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
)
7978 unsigned long long min_size
= min_acceptable_spare_size_imsm(st
);
7979 return container_choose_spares(st
, min_size
, NULL
, NULL
, NULL
, 0);
7982 /******************************************************************************
7983 * function: imsm_create_metadata_update_for_reshape
7984 * Function creates update for whole IMSM container.
7986 ******************************************************************************/
7987 static int imsm_create_metadata_update_for_reshape(
7988 struct supertype
*st
,
7989 struct geo_params
*geo
,
7991 struct imsm_update_reshape
**updatep
)
7993 struct intel_super
*super
= st
->sb
;
7994 struct imsm_super
*mpb
= super
->anchor
;
7995 int update_memory_size
= 0;
7996 struct imsm_update_reshape
*u
= NULL
;
7997 struct mdinfo
*spares
= NULL
;
7999 int delta_disks
= 0;
8002 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
8005 delta_disks
= geo
->raid_disks
- old_raid_disks
;
8007 /* size of all update data without anchor */
8008 update_memory_size
= sizeof(struct imsm_update_reshape
);
8010 /* now add space for spare disks that we need to add. */
8011 update_memory_size
+= sizeof(u
->new_disks
[0]) * (delta_disks
- 1);
8013 u
= calloc(1, update_memory_size
);
8016 "cannot get memory for imsm_update_reshape update\n");
8019 u
->type
= update_reshape_container_disks
;
8020 u
->old_raid_disks
= old_raid_disks
;
8021 u
->new_raid_disks
= geo
->raid_disks
;
8023 /* now get spare disks list
8025 spares
= get_spares_for_grow(st
);
8028 || delta_disks
> spares
->array
.spare_disks
) {
8029 fprintf(stderr
, Name
": imsm: ERROR: Cannot get spare devices "
8030 "for %s.\n", geo
->dev_name
);
8034 /* we have got spares
8035 * update disk list in imsm_disk list table in anchor
8037 dprintf("imsm: %i spares are available.\n\n",
8038 spares
->array
.spare_disks
);
8041 for (i
= 0; i
< delta_disks
; i
++) {
8046 u
->new_disks
[i
] = makedev(dev
->disk
.major
,
8048 dl
= get_disk_super(super
, dev
->disk
.major
, dev
->disk
.minor
);
8049 dl
->index
= mpb
->num_disks
;
8059 dprintf("imsm: reshape update preparation :");
8060 if (i
== delta_disks
) {
8063 return update_memory_size
;
8066 dprintf(" Error\n");
8071 /******************************************************************************
8072 * function: imsm_create_metadata_update_for_migration()
8073 * Creates update for IMSM array.
8075 ******************************************************************************/
8076 static int imsm_create_metadata_update_for_migration(
8077 struct supertype
*st
,
8078 struct geo_params
*geo
,
8079 struct imsm_update_reshape_migration
**updatep
)
8081 struct intel_super
*super
= st
->sb
;
8082 int update_memory_size
= 0;
8083 struct imsm_update_reshape_migration
*u
= NULL
;
8084 struct imsm_dev
*dev
;
8085 int previous_level
= -1;
8087 dprintf("imsm_create_metadata_update_for_migration(enter)"
8088 " New Level = %i\n", geo
->level
);
8090 /* size of all update data without anchor */
8091 update_memory_size
= sizeof(struct imsm_update_reshape_migration
);
8093 u
= calloc(1, update_memory_size
);
8095 dprintf("error: cannot get memory for "
8096 "imsm_create_metadata_update_for_migration\n");
8099 u
->type
= update_reshape_migration
;
8100 u
->subdev
= super
->current_vol
;
8101 u
->new_level
= geo
->level
;
8102 u
->new_layout
= geo
->layout
;
8103 u
->new_raid_disks
= u
->old_raid_disks
= geo
->raid_disks
;
8104 u
->new_disks
[0] = -1;
8105 u
->new_chunksize
= -1;
8107 dev
= get_imsm_dev(super
, u
->subdev
);
8109 struct imsm_map
*map
;
8111 map
= get_imsm_map(dev
, 0);
8113 int current_chunk_size
=
8114 __le16_to_cpu(map
->blocks_per_strip
) / 2;
8116 if (geo
->chunksize
!= current_chunk_size
) {
8117 u
->new_chunksize
= geo
->chunksize
/ 1024;
8119 "chunk size change from %i to %i\n",
8120 current_chunk_size
, u
->new_chunksize
);
8122 previous_level
= map
->raid_level
;
8125 if ((geo
->level
== 5) && (previous_level
== 0)) {
8126 struct mdinfo
*spares
= NULL
;
8128 u
->new_raid_disks
++;
8129 spares
= get_spares_for_grow(st
);
8130 if ((spares
== NULL
) || (spares
->array
.spare_disks
< 1)) {
8133 update_memory_size
= 0;
8134 dprintf("error: cannot get spare device "
8135 "for requested migration");
8140 dprintf("imsm: reshape update preparation : OK\n");
8143 return update_memory_size
;
8146 static void imsm_update_metadata_locally(struct supertype
*st
,
8149 struct metadata_update mu
;
8154 mu
.space_list
= NULL
;
8156 imsm_prepare_update(st
, &mu
);
8157 imsm_process_update(st
, &mu
);
8159 while (mu
.space_list
) {
8160 void **space
= mu
.space_list
;
8161 mu
.space_list
= *space
;
8166 /***************************************************************************
8167 * Function: imsm_analyze_change
8168 * Description: Function analyze change for single volume
8169 * and validate if transition is supported
8170 * Parameters: Geometry parameters, supertype structure
8171 * Returns: Operation type code on success, -1 if fail
8172 ****************************************************************************/
8173 enum imsm_reshape_type
imsm_analyze_change(struct supertype
*st
,
8174 struct geo_params
*geo
)
8181 getinfo_super_imsm_volume(st
, &info
, NULL
);
8183 if ((geo
->level
!= info
.array
.level
) &&
8184 (geo
->level
>= 0) &&
8185 (geo
->level
!= UnSet
)) {
8186 switch (info
.array
.level
) {
8188 if (geo
->level
== 5) {
8189 change
= CH_MIGRATION
;
8192 if (geo
->level
== 10) {
8193 change
= CH_TAKEOVER
;
8198 if (geo
->level
== 0) {
8199 change
= CH_TAKEOVER
;
8204 if (geo
->level
== 0) {
8205 change
= CH_TAKEOVER
;
8212 Name
" Error. Level Migration from %d to %d "
8214 info
.array
.level
, geo
->level
);
8215 goto analyse_change_exit
;
8218 geo
->level
= info
.array
.level
;
8220 if ((geo
->layout
!= info
.array
.layout
)
8221 && ((geo
->layout
!= UnSet
) && (geo
->layout
!= -1))) {
8222 change
= CH_MIGRATION
;
8223 if ((info
.array
.layout
== 0)
8224 && (info
.array
.level
== 5)
8225 && (geo
->layout
== 5)) {
8226 /* reshape 5 -> 4 */
8227 } else if ((info
.array
.layout
== 5)
8228 && (info
.array
.level
== 5)
8229 && (geo
->layout
== 0)) {
8230 /* reshape 4 -> 5 */
8235 Name
" Error. Layout Migration from %d to %d "
8237 info
.array
.layout
, geo
->layout
);
8239 goto analyse_change_exit
;
8242 geo
->layout
= info
.array
.layout
;
8244 if ((geo
->chunksize
> 0) && (geo
->chunksize
!= UnSet
)
8245 && (geo
->chunksize
!= info
.array
.chunk_size
))
8246 change
= CH_MIGRATION
;
8248 geo
->chunksize
= info
.array
.chunk_size
;
8250 chunk
= geo
->chunksize
/ 1024;
8251 if (!validate_geometry_imsm(st
,
8261 struct intel_super
*super
= st
->sb
;
8262 struct imsm_super
*mpb
= super
->anchor
;
8264 if (mpb
->num_raid_devs
> 1) {
8266 Name
" Error. Cannot perform operation on %s"
8267 "- for this operation it MUST be single "
8268 "array in container\n",
8274 analyse_change_exit
:
8279 int imsm_takeover(struct supertype
*st
, struct geo_params
*geo
)
8281 struct intel_super
*super
= st
->sb
;
8282 struct imsm_update_takeover
*u
;
8284 u
= malloc(sizeof(struct imsm_update_takeover
));
8288 u
->type
= update_takeover
;
8289 u
->subarray
= super
->current_vol
;
8291 /* 10->0 transition */
8292 if (geo
->level
== 0)
8293 u
->direction
= R10_TO_R0
;
8295 /* 0->10 transition */
8296 if (geo
->level
== 10)
8297 u
->direction
= R0_TO_R10
;
8299 /* update metadata locally */
8300 imsm_update_metadata_locally(st
, u
,
8301 sizeof(struct imsm_update_takeover
));
8302 /* and possibly remotely */
8303 if (st
->update_tail
)
8304 append_metadata_update(st
, u
,
8305 sizeof(struct imsm_update_takeover
));
8312 static int warn_user_about_risk(void)
8317 "\nThis is an experimental feature. Data on the RAID volume(s) "
8318 "can be lost!!!\n\n"
8319 "To continue command execution please make sure that\n"
8320 "the grow process will not be interrupted. Use safe power\n"
8321 "supply to avoid unexpected system reboot. Make sure that\n"
8322 "reshaped container is not assembled automatically during\n"
8324 "If reshape is interrupted, assemble array manually\n"
8325 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
8326 "Assembly in scan mode is not possible in such case.\n"
8327 "Growing container with boot array is not possible.\n"
8328 "If boot array reshape is interrupted, whole file system\n"
8329 "can be lost.\n\n");
8330 rv
= ask("Do you want to continue? ");
8331 fprintf(stderr
, "\n");
8336 static int imsm_reshape_super(struct supertype
*st
, long long size
, int level
,
8337 int layout
, int chunksize
, int raid_disks
,
8338 int delta_disks
, char *backup
, char *dev
,
8342 struct geo_params geo
;
8344 dprintf("imsm: reshape_super called.\n");
8346 memset(&geo
, 0, sizeof(struct geo_params
));
8349 geo
.dev_id
= st
->devnum
;
8352 geo
.layout
= layout
;
8353 geo
.chunksize
= chunksize
;
8354 geo
.raid_disks
= raid_disks
;
8355 if (delta_disks
!= UnSet
)
8356 geo
.raid_disks
+= delta_disks
;
8358 dprintf("\tfor level : %i\n", geo
.level
);
8359 dprintf("\tfor raid_disks : %i\n", geo
.raid_disks
);
8361 if (experimental() == 0)
8364 if (st
->container_dev
== st
->devnum
) {
8365 /* On container level we can only increase number of devices. */
8366 dprintf("imsm: info: Container operation\n");
8367 int old_raid_disks
= 0;
8369 /* this warning will be removed when imsm checkpointing
8370 * will be implemented, and restoring from check-point
8371 * operation will be transparent for reboot process
8373 if (warn_user_about_risk() == 0)
8376 if (imsm_reshape_is_allowed_on_container(
8377 st
, &geo
, &old_raid_disks
)) {
8378 struct imsm_update_reshape
*u
= NULL
;
8381 len
= imsm_create_metadata_update_for_reshape(
8382 st
, &geo
, old_raid_disks
, &u
);
8385 dprintf("imsm: Cannot prepare update\n");
8386 goto exit_imsm_reshape_super
;
8390 /* update metadata locally */
8391 imsm_update_metadata_locally(st
, u
, len
);
8392 /* and possibly remotely */
8393 if (st
->update_tail
)
8394 append_metadata_update(st
, u
, len
);
8399 fprintf(stderr
, Name
": (imsm) Operation "
8400 "is not allowed on this container\n");
8403 /* On volume level we support following operations
8404 * - takeover: raid10 -> raid0; raid0 -> raid10
8405 * - chunk size migration
8406 * - migration: raid5 -> raid0; raid0 -> raid5
8408 struct intel_super
*super
= st
->sb
;
8409 struct intel_dev
*dev
= super
->devlist
;
8411 dprintf("imsm: info: Volume operation\n");
8412 /* find requested device */
8414 imsm_find_array_minor_by_subdev(dev
->index
, st
->container_dev
, &devnum
);
8415 if (devnum
== geo
.dev_id
)
8420 fprintf(stderr
, Name
" Cannot find %s (%i) subarray\n",
8421 geo
.dev_name
, geo
.dev_id
);
8422 goto exit_imsm_reshape_super
;
8424 super
->current_vol
= dev
->index
;
8425 change
= imsm_analyze_change(st
, &geo
);
8428 ret_val
= imsm_takeover(st
, &geo
);
8430 case CH_MIGRATION
: {
8431 struct imsm_update_reshape_migration
*u
= NULL
;
8433 imsm_create_metadata_update_for_migration(
8437 "Cannot prepare update\n");
8441 /* update metadata locally */
8442 imsm_update_metadata_locally(st
, u
, len
);
8443 /* and possibly remotely */
8444 if (st
->update_tail
)
8445 append_metadata_update(st
, u
, len
);
8455 exit_imsm_reshape_super
:
8456 dprintf("imsm: reshape_super Exit code = %i\n", ret_val
);
8460 /*******************************************************************************
8461 * Function: wait_for_reshape_imsm
8462 * Description: Function writes new sync_max value and waits until
8463 * reshape process reach new position
8465 * sra : general array info
8466 * to_complete : new sync_max position
8467 * ndata : number of disks in new array's layout
8470 * 1 : there is no reshape in progress,
8472 ******************************************************************************/
8473 int wait_for_reshape_imsm(struct mdinfo
*sra
, unsigned long long to_complete
,
8476 int fd
= sysfs_get_fd(sra
, NULL
, "reshape_position");
8477 unsigned long long completed
;
8479 struct timeval timeout
;
8484 sysfs_fd_get_ll(fd
, &completed
);
8486 if (to_complete
== 0) {/* reshape till the end of array */
8487 sysfs_set_str(sra
, NULL
, "sync_max", "max");
8488 to_complete
= MaxSector
;
8490 if (completed
> to_complete
)
8492 if (sysfs_set_num(sra
, NULL
, "sync_max",
8493 to_complete
/ ndata
) != 0) {
8499 /* FIXME should not need a timeout at all */
8500 timeout
.tv_sec
= 30;
8501 timeout
.tv_usec
= 0;
8507 select(fd
+1, NULL
, NULL
, &rfds
, &timeout
);
8508 if (sysfs_fd_get_ll(fd
, &completed
) < 0) {
8512 if (sysfs_get_str(sra
, NULL
, "sync_action",
8514 strncmp(action
, "reshape", 7) != 0)
8516 } while (completed
< to_complete
);
8522 /*******************************************************************************
8523 * Function: check_degradation_change
8524 * Description: Check that array hasn't become failed.
8526 * info : for sysfs access
8527 * sources : source disks descriptors
8528 * degraded: previous degradation level
8531 ******************************************************************************/
8532 int check_degradation_change(struct mdinfo
*info
,
8536 unsigned long long new_degraded
;
8537 sysfs_get_ll(info
, NULL
, "degraded", &new_degraded
);
8538 if (new_degraded
!= (unsigned long long)degraded
) {
8539 /* check each device to ensure it is still working */
8542 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
8543 if (sd
->disk
.state
& (1<<MD_DISK_FAULTY
))
8545 if (sd
->disk
.state
& (1<<MD_DISK_SYNC
)) {
8547 if (sysfs_get_str(info
,
8548 sd
, "state", sbuf
, 20) < 0 ||
8549 strstr(sbuf
, "faulty") ||
8550 strstr(sbuf
, "in_sync") == NULL
) {
8551 /* this device is dead */
8552 sd
->disk
.state
= (1<<MD_DISK_FAULTY
);
8553 if (sd
->disk
.raid_disk
>= 0 &&
8554 sources
[sd
->disk
.raid_disk
] >= 0) {
8556 sd
->disk
.raid_disk
]);
8557 sources
[sd
->disk
.raid_disk
] =
8566 return new_degraded
;
8569 /*******************************************************************************
8570 * Function: imsm_manage_reshape
8571 * Description: Function finds array under reshape and it manages reshape
8572 * process. It creates stripes backups (if required) and sets
8575 * afd : Backup handle (nattive) - not used
8576 * sra : general array info
8577 * reshape : reshape parameters - not used
8578 * st : supertype structure
8579 * blocks : size of critical section [blocks]
8580 * fds : table of source device descriptor
8581 * offsets : start of array (offest per devices)
8583 * destfd : table of destination device descriptor
8584 * destoffsets : table of destination offsets (per device)
8586 * 1 : success, reshape is done
8588 ******************************************************************************/
8589 static int imsm_manage_reshape(
8590 int afd
, struct mdinfo
*sra
, struct reshape
*reshape
,
8591 struct supertype
*st
, unsigned long backup_blocks
,
8592 int *fds
, unsigned long long *offsets
,
8593 int dests
, int *destfd
, unsigned long long *destoffsets
)
8596 struct intel_super
*super
= st
->sb
;
8597 struct intel_dev
*dv
= NULL
;
8598 struct imsm_dev
*dev
= NULL
;
8599 struct imsm_map
*map_src
, *map_dest
;
8600 int migr_vol_qan
= 0;
8601 int ndata
, odata
; /* [bytes] */
8602 int chunk
; /* [bytes] */
8603 struct migr_record
*migr_rec
;
8605 unsigned int buf_size
; /* [bytes] */
8606 unsigned long long max_position
; /* array size [bytes] */
8607 unsigned long long next_step
; /* [blocks]/[bytes] */
8608 unsigned long long old_data_stripe_length
;
8609 unsigned long long new_data_stripe_length
;
8610 unsigned long long start_src
; /* [bytes] */
8611 unsigned long long start
; /* [bytes] */
8612 unsigned long long start_buf_shift
; /* [bytes] */
8615 if (!fds
|| !offsets
|| !destfd
|| !destoffsets
|| !sra
)
8618 /* Find volume during the reshape */
8619 for (dv
= super
->devlist
; dv
; dv
= dv
->next
) {
8620 if (dv
->dev
->vol
.migr_type
== MIGR_GEN_MIGR
8621 && dv
->dev
->vol
.migr_state
== 1) {
8626 /* Only one volume can migrate at the same time */
8627 if (migr_vol_qan
!= 1) {
8628 fprintf(stderr
, Name
" : %s", migr_vol_qan
?
8629 "Number of migrating volumes greater than 1\n" :
8630 "There is no volume during migrationg\n");
8634 map_src
= get_imsm_map(dev
, 1);
8635 if (map_src
== NULL
)
8637 map_dest
= get_imsm_map(dev
, 0);
8639 ndata
= imsm_num_data_members(dev
, 0);
8640 odata
= imsm_num_data_members(dev
, 1);
8642 chunk
= map_src
->blocks_per_strip
* 512;
8643 old_data_stripe_length
= odata
* chunk
;
8645 migr_rec
= super
->migr_rec
;
8648 sra
->new_chunk
= __le16_to_cpu(map_dest
->blocks_per_strip
) * 512;
8649 sra
->new_level
= map_dest
->raid_level
;
8650 new_data_stripe_length
= sra
->new_chunk
* ndata
;
8652 /* initialize migration record for start condition */
8653 if (sra
->reshape_progress
== 0)
8654 init_migr_record_imsm(st
, dev
, sra
);
8657 buf_size
= __le32_to_cpu(migr_rec
->blocks_per_unit
) * 512;
8658 /* extend buffer size for parity disk */
8659 buf_size
+= __le32_to_cpu(migr_rec
->dest_depth_per_unit
) * 512;
8660 /* add space for stripe aligment */
8661 buf_size
+= old_data_stripe_length
;
8662 if (posix_memalign((void **)&buf
, 4096, buf_size
)) {
8663 dprintf("imsm: Cannot allocate checpoint buffer\n");
8668 __le32_to_cpu(migr_rec
->post_migr_vol_cap
) +
8669 ((unsigned long long)__le32_to_cpu(
8670 migr_rec
->post_migr_vol_cap_hi
) << 32);
8672 while (__le32_to_cpu(migr_rec
->curr_migr_unit
) <
8673 __le32_to_cpu(migr_rec
->num_migr_units
)) {
8674 /* current reshape position [blocks] */
8675 unsigned long long current_position
=
8676 __le32_to_cpu(migr_rec
->blocks_per_unit
)
8677 * __le32_to_cpu(migr_rec
->curr_migr_unit
);
8678 unsigned long long border
;
8680 /* Check that array hasn't become failed.
8682 degraded
= check_degradation_change(sra
, fds
, degraded
);
8684 dprintf("imsm: Abort reshape due to degradation"
8685 " level (%i)\n", degraded
);
8689 next_step
= __le32_to_cpu(migr_rec
->blocks_per_unit
);
8691 if ((current_position
+ next_step
) > max_position
)
8692 next_step
= max_position
- current_position
;
8694 start
= (map_src
->pba_of_lba0
+ dev
->reserved_blocks
+
8695 current_position
) * 512;
8697 /* allign reading start to old geometry */
8698 start_buf_shift
= start
% old_data_stripe_length
;
8699 start_src
= start
- start_buf_shift
;
8701 border
= (start_src
/ odata
) - (start
/ ndata
);
8703 if (border
<= __le32_to_cpu(migr_rec
->dest_depth_per_unit
)) {
8704 /* save critical stripes to buf
8705 * start - start address of current unit
8707 * start_src - start address of current unit
8708 * to backup alligned to source array
8711 unsigned long long next_step_filler
= 0;
8712 unsigned long long copy_length
= next_step
* 512;
8714 /* allign copy area length to stripe in old geometry */
8715 next_step_filler
= ((copy_length
+ start_buf_shift
)
8716 % old_data_stripe_length
);
8717 if (next_step_filler
)
8718 next_step_filler
= (old_data_stripe_length
8719 - next_step_filler
);
8720 dprintf("save_stripes() parameters: start = %llu,"
8721 "\tstart_src = %llu,\tnext_step*512 = %llu,"
8722 "\tstart_in_buf_shift = %llu,"
8723 "\tnext_step_filler = %llu\n",
8724 start
, start_src
, copy_length
,
8725 start_buf_shift
, next_step_filler
);
8727 if (save_stripes(fds
, offsets
, map_src
->num_members
,
8728 chunk
, sra
->array
.level
,
8729 sra
->array
.layout
, 0, NULL
, start_src
,
8731 next_step_filler
+ start_buf_shift
,
8733 dprintf("imsm: Cannot save stripes"
8737 /* Convert data to destination format and store it
8738 * in backup general migration area
8740 if (save_backup_imsm(st
, dev
, sra
,
8741 buf
+ start_buf_shift
,
8742 ndata
, copy_length
)) {
8743 dprintf("imsm: Cannot save stripes to "
8744 "target devices\n");
8747 if (save_checkpoint_imsm(st
, sra
,
8748 UNIT_SRC_IN_CP_AREA
)) {
8749 dprintf("imsm: Cannot write checkpoint to "
8750 "migration record (UNIT_SRC_IN_CP_AREA)\n");
8753 /* decrease backup_blocks */
8754 if (backup_blocks
> (unsigned long)next_step
)
8755 backup_blocks
-= next_step
;
8759 /* When data backed up, checkpoint stored,
8760 * kick the kernel to reshape unit of data
8762 next_step
= next_step
+ sra
->reshape_progress
;
8763 sysfs_set_num(sra
, NULL
, "suspend_lo", sra
->reshape_progress
);
8764 sysfs_set_num(sra
, NULL
, "suspend_hi", next_step
);
8766 /* wait until reshape finish */
8767 if (wait_for_reshape_imsm(sra
, next_step
, ndata
) < 0) {
8768 dprintf("wait_for_reshape_imsm returned error!\n");
8772 sra
->reshape_progress
= next_step
;
8774 if (save_checkpoint_imsm(st
, sra
, UNIT_SRC_NORMAL
)) {
8775 dprintf("imsm: Cannot write checkpoint to "
8776 "migration record (UNIT_SRC_NORMAL)\n");
8782 /* return '1' if done */
8790 #endif /* MDASSEMBLE */
8792 struct superswitch super_imsm
= {
8794 .examine_super
= examine_super_imsm
,
8795 .brief_examine_super
= brief_examine_super_imsm
,
8796 .brief_examine_subarrays
= brief_examine_subarrays_imsm
,
8797 .export_examine_super
= export_examine_super_imsm
,
8798 .detail_super
= detail_super_imsm
,
8799 .brief_detail_super
= brief_detail_super_imsm
,
8800 .write_init_super
= write_init_super_imsm
,
8801 .validate_geometry
= validate_geometry_imsm
,
8802 .add_to_super
= add_to_super_imsm
,
8803 .remove_from_super
= remove_from_super_imsm
,
8804 .detail_platform
= detail_platform_imsm
,
8805 .kill_subarray
= kill_subarray_imsm
,
8806 .update_subarray
= update_subarray_imsm
,
8807 .load_container
= load_container_imsm
,
8808 .default_geometry
= default_geometry_imsm
,
8809 .get_disk_controller_domain
= imsm_get_disk_controller_domain
,
8810 .reshape_super
= imsm_reshape_super
,
8811 .manage_reshape
= imsm_manage_reshape
,
8813 .match_home
= match_home_imsm
,
8814 .uuid_from_super
= uuid_from_super_imsm
,
8815 .getinfo_super
= getinfo_super_imsm
,
8816 .getinfo_super_disks
= getinfo_super_disks_imsm
,
8817 .update_super
= update_super_imsm
,
8819 .avail_size
= avail_size_imsm
,
8820 .min_acceptable_spare_size
= min_acceptable_spare_size_imsm
,
8822 .compare_super
= compare_super_imsm
,
8824 .load_super
= load_super_imsm
,
8825 .init_super
= init_super_imsm
,
8826 .store_super
= store_super_imsm
,
8827 .free_super
= free_super_imsm
,
8828 .match_metadata_desc
= match_metadata_desc_imsm
,
8829 .container_content
= container_content_imsm
,
8831 .recover_backup
= recover_backup_imsm
,
8838 .open_new
= imsm_open_new
,
8839 .set_array_state
= imsm_set_array_state
,
8840 .set_disk
= imsm_set_disk
,
8841 .sync_metadata
= imsm_sync_metadata
,
8842 .activate_spare
= imsm_activate_spare
,
8843 .process_update
= imsm_process_update
,
8844 .prepare_update
= imsm_prepare_update
,
8845 #endif /* MDASSEMBLE */