2 * mdadm - Intel(R) Matrix Storage Manager Support
4 * Copyright (C) 2002-2008 Intel Corporation
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 #define HAVE_STDINT_H 1
24 #include "platform-intel.h"
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
54 #define MPB_SECTOR_CNT 2210
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
61 __u8 serial
[MAX_RAID_SERIAL_LEN
];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks
; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id
; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status
; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num
; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler
[IMSM_DISK_FILLERS
]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
73 /* RAID map configuration infos. */
75 __u32 pba_of_lba0
; /* start address of partition */
76 __u32 blocks_per_member
;/* blocks per member */
77 __u32 num_data_stripes
; /* number of data stripes */
78 __u16 blocks_per_strip
;
79 __u8 map_state
; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members
; /* number of member disks */
89 __u8 num_domains
; /* number of parity domains */
90 __u8 failed_disk_num
; /* valid only when state is degraded */
92 __u32 filler
[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl
[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
97 } __attribute__ ((packed
));
100 __u32 curr_migr_unit
;
101 __u32 checkpoint_id
; /* id to access curr_migr_unit */
102 __u8 migr_state
; /* Normal or Migrating */
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type
; /* Initializing, Rebuilding, ... */
111 __u8 fs_state
; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors
; /* number of mismatches */
113 __u16 bad_blocks
; /* number of bad blocks during verify */
115 struct imsm_map map
[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed
));
120 __u8 volume
[MAX_RAID_SERIAL_LEN
];
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status
; /* Persistent RaidDev status */
137 __u32 reserved_blocks
; /* Reserved blocks at beginning of volume */
141 __u8 cng_master_disk
;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler
[IMSM_DEV_FILLERS
];
148 } __attribute__ ((packed
));
151 __u8 sig
[MAX_SIGNATURE_LENGTH
]; /* 0x00 - 0x1F */
152 __u32 check_sum
; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size
; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num
; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num
; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size
; /* 0x30 - 0x33 in bytes */
157 __u32 attributes
; /* 0x34 - 0x37 */
158 __u8 num_disks
; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs
; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos
; /* 0x3A */
161 __u8 fill
[1]; /* 0x3B */
162 __u32 cache_size
; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num
; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count
; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size
; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler
[IMSM_FILLERS
]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk
[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed
));
173 #define BBM_LOG_MAX_ENTRIES 254
175 struct bbm_log_entry
{
176 __u64 defective_block_start
;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset
;
179 __u16 remapped_marked_count
;
181 } __attribute__ ((__packed__
));
184 __u32 signature
; /* 0xABADB10C */
186 __u32 reserved_spare_block_count
; /* 0 */
187 __u32 reserved
; /* 0xFFFF */
188 __u64 first_spare_lba
;
189 struct bbm_log_entry mapped_block_entries
[BBM_LOG_MAX_ENTRIES
];
190 } __attribute__ ((__packed__
));
194 static char *map_state_str
[] = { "normal", "uninitialized", "degraded", "failed" };
197 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
199 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
201 #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203 #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
207 __u32 rec_status
; /* Status used to determine how to restart
208 * migration in case it aborts
210 __u32 curr_migr_unit
; /* 0..numMigrUnits-1 */
211 __u32 family_num
; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr
; /* True if migrating in increasing
216 __u32 blocks_per_unit
; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit
; /* Num member blocks each destMap
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba
; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba
; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units
; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap
; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi
; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num
; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230 } __attribute__ ((__packed__
));
232 static __u8
migr_type(struct imsm_dev
*dev
)
234 if (dev
->vol
.migr_type
== MIGR_VERIFY
&&
235 dev
->status
& DEV_VERIFY_AND_FIX
)
238 return dev
->vol
.migr_type
;
241 static void set_migr_type(struct imsm_dev
*dev
, __u8 migr_type
)
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
246 if (migr_type
== MIGR_REPAIR
) {
247 dev
->vol
.migr_type
= MIGR_VERIFY
;
248 dev
->status
|= DEV_VERIFY_AND_FIX
;
250 dev
->vol
.migr_type
= migr_type
;
251 dev
->status
&= ~DEV_VERIFY_AND_FIX
;
255 static unsigned int sector_count(__u32 bytes
)
257 return ((bytes
+ (512-1)) & (~(512-1))) / 512;
260 static unsigned int mpb_sectors(struct imsm_super
*mpb
)
262 return sector_count(__le32_to_cpu(mpb
->mpb_size
));
266 struct imsm_dev
*dev
;
267 struct intel_dev
*next
;
272 enum sys_dev_type type
;
275 struct intel_hba
*next
;
282 /* internal representation of IMSM metadata */
285 void *buf
; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super
*anchor
; /* immovable parameters */
289 void *migr_rec_buf
; /* buffer for I/O operations */
290 struct migr_record
*migr_rec
; /* migration record */
292 size_t len
; /* size of the 'buf' allocation */
293 void *next_buf
; /* for realloc'ing buf from the manager */
295 int updates_pending
; /* count of pending updates for mdmon */
296 int current_vol
; /* index of raid device undergoing creation */
297 __u32 create_offset
; /* common start for 'current_vol' */
298 __u32 random
; /* random data for seeding new family numbers */
299 struct intel_dev
*devlist
;
303 __u8 serial
[MAX_RAID_SERIAL_LEN
];
306 struct imsm_disk disk
;
309 struct extent
*e
; /* for determining freespace @ create */
310 int raiddisk
; /* slot to fill in autolayout */
313 struct dl
*disk_mgmt_list
; /* list of disks to add/remove while mdmon
315 struct dl
*missing
; /* disks removed while we weren't looking */
316 struct bbm_log
*bbm_log
;
317 struct intel_hba
*hba
; /* device path of the raid controller for this metadata */
318 const struct imsm_orom
*orom
; /* platform firmware support */
319 struct intel_super
*next
; /* (temp) list for disambiguating family_num */
323 struct imsm_disk disk
;
324 #define IMSM_UNKNOWN_OWNER (-1)
326 struct intel_disk
*next
;
330 unsigned long long start
, size
;
333 /* definitions of reshape process types */
334 enum imsm_reshape_type
{
339 /* definition of messages passed to imsm_process_update */
340 enum imsm_update_type
{
341 update_activate_spare
,
345 update_add_remove_disk
,
346 update_reshape_container_disks
,
347 update_reshape_migration
,
351 struct imsm_update_activate_spare
{
352 enum imsm_update_type type
;
356 struct imsm_update_activate_spare
*next
;
369 enum takeover_direction
{
373 struct imsm_update_takeover
{
374 enum imsm_update_type type
;
376 enum takeover_direction direction
;
379 struct imsm_update_reshape
{
380 enum imsm_update_type type
;
384 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
387 struct imsm_update_reshape_migration
{
388 enum imsm_update_type type
;
391 /* fields for array migration changes
398 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
402 __u8 serial
[MAX_RAID_SERIAL_LEN
];
405 struct imsm_update_create_array
{
406 enum imsm_update_type type
;
411 struct imsm_update_kill_array
{
412 enum imsm_update_type type
;
416 struct imsm_update_rename_array
{
417 enum imsm_update_type type
;
418 __u8 name
[MAX_RAID_SERIAL_LEN
];
422 struct imsm_update_add_remove_disk
{
423 enum imsm_update_type type
;
427 static const char *_sys_dev_type
[] = {
428 [SYS_DEV_UNKNOWN
] = "Unknown",
429 [SYS_DEV_SAS
] = "SAS",
430 [SYS_DEV_SATA
] = "SATA"
433 const char *get_sys_dev_type(enum sys_dev_type type
)
435 if (type
>= SYS_DEV_MAX
)
436 type
= SYS_DEV_UNKNOWN
;
438 return _sys_dev_type
[type
];
441 static struct intel_hba
* alloc_intel_hba(struct sys_dev
*device
)
443 struct intel_hba
*result
= malloc(sizeof(*result
));
445 result
->type
= device
->type
;
446 result
->path
= strdup(device
->path
);
448 if (result
->path
&& (result
->pci_id
= strrchr(result
->path
, '/')) != NULL
)
454 static struct intel_hba
* find_intel_hba(struct intel_hba
*hba
, struct sys_dev
*device
)
456 struct intel_hba
*result
=NULL
;
457 for (result
= hba
; result
; result
= result
->next
) {
458 if (result
->type
== device
->type
&& strcmp(result
->path
, device
->path
) == 0)
464 static int attach_hba_to_super(struct intel_super
*super
, struct sys_dev
*device
)
466 struct intel_hba
*hba
;
468 /* check if disk attached to Intel HBA */
469 hba
= find_intel_hba(super
->hba
, device
);
472 /* Check if HBA is already attached to super */
473 if (super
->hba
== NULL
) {
474 super
->hba
= alloc_intel_hba(device
);
479 /* Intel metadata allows for all disks attached to the same type HBA.
480 * Do not sypport odf HBA types mixing
482 if (device
->type
!= hba
->type
)
488 hba
->next
= alloc_intel_hba(device
);
492 static struct sys_dev
* find_disk_attached_hba(int fd
, const char *devname
)
494 struct sys_dev
*list
, *elem
, *prev
;
497 if ((list
= find_intel_devices()) == NULL
)
501 disk_path
= (char *) devname
;
503 disk_path
= diskfd_to_devpath(fd
);
510 for (prev
= NULL
, elem
= list
; elem
; prev
= elem
, elem
= elem
->next
) {
511 if (path_attached_to_hba(disk_path
, elem
->path
)) {
515 prev
->next
= elem
->next
;
517 if (disk_path
!= devname
)
523 if (disk_path
!= devname
)
531 static int find_intel_hba_capability(int fd
, struct intel_super
*super
,
534 static struct supertype
*match_metadata_desc_imsm(char *arg
)
536 struct supertype
*st
;
538 if (strcmp(arg
, "imsm") != 0 &&
539 strcmp(arg
, "default") != 0
543 st
= malloc(sizeof(*st
));
546 memset(st
, 0, sizeof(*st
));
547 st
->container_dev
= NoMdDev
;
548 st
->ss
= &super_imsm
;
549 st
->max_devs
= IMSM_MAX_DEVICES
;
550 st
->minor_version
= 0;
556 static __u8
*get_imsm_version(struct imsm_super
*mpb
)
558 return &mpb
->sig
[MPB_SIG_LEN
];
562 /* retrieve a disk directly from the anchor when the anchor is known to be
563 * up-to-date, currently only at load time
565 static struct imsm_disk
*__get_imsm_disk(struct imsm_super
*mpb
, __u8 index
)
567 if (index
>= mpb
->num_disks
)
569 return &mpb
->disk
[index
];
572 /* retrieve the disk description based on a index of the disk
575 static struct dl
*get_imsm_dl_disk(struct intel_super
*super
, __u8 index
)
579 for (d
= super
->disks
; d
; d
= d
->next
)
580 if (d
->index
== index
)
585 /* retrieve a disk from the parsed metadata */
586 static struct imsm_disk
*get_imsm_disk(struct intel_super
*super
, __u8 index
)
590 dl
= get_imsm_dl_disk(super
, index
);
597 /* generate a checksum directly from the anchor when the anchor is known to be
598 * up-to-date, currently only at load or write_super after coalescing
600 static __u32
__gen_imsm_checksum(struct imsm_super
*mpb
)
602 __u32 end
= mpb
->mpb_size
/ sizeof(end
);
603 __u32
*p
= (__u32
*) mpb
;
607 sum
+= __le32_to_cpu(*p
);
611 return sum
- __le32_to_cpu(mpb
->check_sum
);
614 static size_t sizeof_imsm_map(struct imsm_map
*map
)
616 return sizeof(struct imsm_map
) + sizeof(__u32
) * (map
->num_members
- 1);
619 struct imsm_map
*get_imsm_map(struct imsm_dev
*dev
, int second_map
)
621 /* A device can have 2 maps if it is in the middle of a migration.
623 * 0 - we return the first map
624 * 1 - we return the second map if it exists, else NULL
625 * -1 - we return the second map if it exists, else the first
627 struct imsm_map
*map
= &dev
->vol
.map
[0];
629 if (second_map
== 1 && !dev
->vol
.migr_state
)
631 else if (second_map
== 1 ||
632 (second_map
< 0 && dev
->vol
.migr_state
)) {
635 return ptr
+ sizeof_imsm_map(map
);
641 /* return the size of the device.
642 * migr_state increases the returned size if map[0] were to be duplicated
644 static size_t sizeof_imsm_dev(struct imsm_dev
*dev
, int migr_state
)
646 size_t size
= sizeof(*dev
) - sizeof(struct imsm_map
) +
647 sizeof_imsm_map(get_imsm_map(dev
, 0));
649 /* migrating means an additional map */
650 if (dev
->vol
.migr_state
)
651 size
+= sizeof_imsm_map(get_imsm_map(dev
, 1));
653 size
+= sizeof_imsm_map(get_imsm_map(dev
, 0));
659 /* retrieve disk serial number list from a metadata update */
660 static struct disk_info
*get_disk_info(struct imsm_update_create_array
*update
)
663 struct disk_info
*inf
;
665 inf
= u
+ sizeof(*update
) - sizeof(struct imsm_dev
) +
666 sizeof_imsm_dev(&update
->dev
, 0);
672 static struct imsm_dev
*__get_imsm_dev(struct imsm_super
*mpb
, __u8 index
)
678 if (index
>= mpb
->num_raid_devs
)
681 /* devices start after all disks */
682 offset
= ((void *) &mpb
->disk
[mpb
->num_disks
]) - _mpb
;
684 for (i
= 0; i
<= index
; i
++)
686 return _mpb
+ offset
;
688 offset
+= sizeof_imsm_dev(_mpb
+ offset
, 0);
693 static struct imsm_dev
*get_imsm_dev(struct intel_super
*super
, __u8 index
)
695 struct intel_dev
*dv
;
697 if (index
>= super
->anchor
->num_raid_devs
)
699 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
700 if (dv
->index
== index
)
708 * == 1 get second map
709 * == -1 than get map according to the current migr_state
711 static __u32
get_imsm_ord_tbl_ent(struct imsm_dev
*dev
,
715 struct imsm_map
*map
;
717 map
= get_imsm_map(dev
, second_map
);
719 /* top byte identifies disk under rebuild */
720 return __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
723 #define ord_to_idx(ord) (((ord) << 8) >> 8)
724 static __u32
get_imsm_disk_idx(struct imsm_dev
*dev
, int slot
, int second_map
)
726 __u32 ord
= get_imsm_ord_tbl_ent(dev
, slot
, second_map
);
728 return ord_to_idx(ord
);
731 static void set_imsm_ord_tbl_ent(struct imsm_map
*map
, int slot
, __u32 ord
)
733 map
->disk_ord_tbl
[slot
] = __cpu_to_le32(ord
);
736 static int get_imsm_disk_slot(struct imsm_map
*map
, unsigned idx
)
741 for (slot
= 0; slot
< map
->num_members
; slot
++) {
742 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
743 if (ord_to_idx(ord
) == idx
)
750 static int get_imsm_raid_level(struct imsm_map
*map
)
752 if (map
->raid_level
== 1) {
753 if (map
->num_members
== 2)
759 return map
->raid_level
;
762 static int cmp_extent(const void *av
, const void *bv
)
764 const struct extent
*a
= av
;
765 const struct extent
*b
= bv
;
766 if (a
->start
< b
->start
)
768 if (a
->start
> b
->start
)
773 static int count_memberships(struct dl
*dl
, struct intel_super
*super
)
778 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
779 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
780 struct imsm_map
*map
= get_imsm_map(dev
, 0);
782 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
789 static struct extent
*get_extents(struct intel_super
*super
, struct dl
*dl
)
791 /* find a list of used extents on the given physical device */
792 struct extent
*rv
, *e
;
794 int memberships
= count_memberships(dl
, super
);
795 __u32 reservation
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
797 rv
= malloc(sizeof(struct extent
) * (memberships
+ 1));
802 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
803 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
804 struct imsm_map
*map
= get_imsm_map(dev
, 0);
806 if (get_imsm_disk_slot(map
, dl
->index
) >= 0) {
807 e
->start
= __le32_to_cpu(map
->pba_of_lba0
);
808 e
->size
= __le32_to_cpu(map
->blocks_per_member
);
812 qsort(rv
, memberships
, sizeof(*rv
), cmp_extent
);
814 /* determine the start of the metadata
815 * when no raid devices are defined use the default
816 * ...otherwise allow the metadata to truncate the value
817 * as is the case with older versions of imsm
820 struct extent
*last
= &rv
[memberships
- 1];
823 remainder
= __le32_to_cpu(dl
->disk
.total_blocks
) -
824 (last
->start
+ last
->size
);
825 /* round down to 1k block to satisfy precision of the kernel
829 /* make sure remainder is still sane */
830 if (remainder
< (unsigned)ROUND_UP(super
->len
, 512) >> 9)
831 remainder
= ROUND_UP(super
->len
, 512) >> 9;
832 if (reservation
> remainder
)
833 reservation
= remainder
;
835 e
->start
= __le32_to_cpu(dl
->disk
.total_blocks
) - reservation
;
840 /* try to determine how much space is reserved for metadata from
841 * the last get_extents() entry, otherwise fallback to the
844 static __u32
imsm_reserved_sectors(struct intel_super
*super
, struct dl
*dl
)
850 /* for spares just return a minimal reservation which will grow
851 * once the spare is picked up by an array
854 return MPB_SECTOR_CNT
;
856 e
= get_extents(super
, dl
);
858 return MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
860 /* scroll to last entry */
861 for (i
= 0; e
[i
].size
; i
++)
864 rv
= __le32_to_cpu(dl
->disk
.total_blocks
) - e
[i
].start
;
871 static int is_spare(struct imsm_disk
*disk
)
873 return (disk
->status
& SPARE_DISK
) == SPARE_DISK
;
876 static int is_configured(struct imsm_disk
*disk
)
878 return (disk
->status
& CONFIGURED_DISK
) == CONFIGURED_DISK
;
881 static int is_failed(struct imsm_disk
*disk
)
883 return (disk
->status
& FAILED_DISK
) == FAILED_DISK
;
886 /* Return minimum size of a spare that can be used in this array*/
887 static unsigned long long min_acceptable_spare_size_imsm(struct supertype
*st
)
889 struct intel_super
*super
= st
->sb
;
893 unsigned long long rv
= 0;
897 /* find first active disk in array */
899 while (dl
&& (is_failed(&dl
->disk
) || dl
->index
== -1))
903 /* find last lba used by subarrays */
904 e
= get_extents(super
, dl
);
907 for (i
= 0; e
[i
].size
; i
++)
910 rv
= e
[i
-1].start
+ e
[i
-1].size
;
912 /* add the amount of space needed for metadata */
913 rv
= rv
+ MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
918 static __u64
blocks_per_migr_unit(struct imsm_dev
*dev
);
920 static void print_imsm_dev(struct imsm_dev
*dev
, char *uuid
, int disk_idx
)
924 struct imsm_map
*map
= get_imsm_map(dev
, 0);
925 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
929 printf("[%.16s]:\n", dev
->volume
);
930 printf(" UUID : %s\n", uuid
);
931 printf(" RAID Level : %d", get_imsm_raid_level(map
));
933 printf(" <-- %d", get_imsm_raid_level(map2
));
935 printf(" Members : %d", map
->num_members
);
937 printf(" <-- %d", map2
->num_members
);
939 printf(" Slots : [");
940 for (i
= 0; i
< map
->num_members
; i
++) {
941 ord
= get_imsm_ord_tbl_ent(dev
, i
, 0);
942 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
947 for (i
= 0; i
< map2
->num_members
; i
++) {
948 ord
= get_imsm_ord_tbl_ent(dev
, i
, 1);
949 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
954 printf(" Failed disk : ");
955 if (map
->failed_disk_num
== 0xff)
958 printf("%i", map
->failed_disk_num
);
960 slot
= get_imsm_disk_slot(map
, disk_idx
);
962 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
963 printf(" This Slot : %d%s\n", slot
,
964 ord
& IMSM_ORD_REBUILD
? " (out-of-sync)" : "");
966 printf(" This Slot : ?\n");
967 sz
= __le32_to_cpu(dev
->size_high
);
969 sz
+= __le32_to_cpu(dev
->size_low
);
970 printf(" Array Size : %llu%s\n", (unsigned long long)sz
,
971 human_size(sz
* 512));
972 sz
= __le32_to_cpu(map
->blocks_per_member
);
973 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz
,
974 human_size(sz
* 512));
975 printf(" Sector Offset : %u\n",
976 __le32_to_cpu(map
->pba_of_lba0
));
977 printf(" Num Stripes : %u\n",
978 __le32_to_cpu(map
->num_data_stripes
));
979 printf(" Chunk Size : %u KiB",
980 __le16_to_cpu(map
->blocks_per_strip
) / 2);
982 printf(" <-- %u KiB",
983 __le16_to_cpu(map2
->blocks_per_strip
) / 2);
985 printf(" Reserved : %d\n", __le32_to_cpu(dev
->reserved_blocks
));
986 printf(" Migrate State : ");
987 if (dev
->vol
.migr_state
) {
988 if (migr_type(dev
) == MIGR_INIT
)
989 printf("initialize\n");
990 else if (migr_type(dev
) == MIGR_REBUILD
)
992 else if (migr_type(dev
) == MIGR_VERIFY
)
994 else if (migr_type(dev
) == MIGR_GEN_MIGR
)
995 printf("general migration\n");
996 else if (migr_type(dev
) == MIGR_STATE_CHANGE
)
997 printf("state change\n");
998 else if (migr_type(dev
) == MIGR_REPAIR
)
1001 printf("<unknown:%d>\n", migr_type(dev
));
1004 printf(" Map State : %s", map_state_str
[map
->map_state
]);
1005 if (dev
->vol
.migr_state
) {
1006 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1008 printf(" <-- %s", map_state_str
[map
->map_state
]);
1009 printf("\n Checkpoint : %u (%llu)",
1010 __le32_to_cpu(dev
->vol
.curr_migr_unit
),
1011 (unsigned long long)blocks_per_migr_unit(dev
));
1014 printf(" Dirty State : %s\n", dev
->vol
.dirty
? "dirty" : "clean");
1017 static void print_imsm_disk(struct imsm_super
*mpb
, int index
, __u32 reserved
)
1019 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, index
);
1020 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1023 if (index
< 0 || !disk
)
1027 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1028 printf(" Disk%02d Serial : %s\n", index
, str
);
1029 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1030 is_configured(disk
) ? " active" : "",
1031 is_failed(disk
) ? " failed" : "");
1032 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1033 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1034 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1035 human_size(sz
* 512));
1038 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
);
1040 static void examine_super_imsm(struct supertype
*st
, char *homehost
)
1042 struct intel_super
*super
= st
->sb
;
1043 struct imsm_super
*mpb
= super
->anchor
;
1044 char str
[MAX_SIGNATURE_LENGTH
];
1049 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
1052 snprintf(str
, MPB_SIG_LEN
, "%s", mpb
->sig
);
1053 printf(" Magic : %s\n", str
);
1054 snprintf(str
, strlen(MPB_VERSION_RAID0
), "%s", get_imsm_version(mpb
));
1055 printf(" Version : %s\n", get_imsm_version(mpb
));
1056 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb
->orig_family_num
));
1057 printf(" Family : %08x\n", __le32_to_cpu(mpb
->family_num
));
1058 printf(" Generation : %08x\n", __le32_to_cpu(mpb
->generation_num
));
1059 getinfo_super_imsm(st
, &info
, NULL
);
1060 fname_from_uuid(st
, &info
, nbuf
, ':');
1061 printf(" UUID : %s\n", nbuf
+ 5);
1062 sum
= __le32_to_cpu(mpb
->check_sum
);
1063 printf(" Checksum : %08x %s\n", sum
,
1064 __gen_imsm_checksum(mpb
) == sum
? "correct" : "incorrect");
1065 printf(" MPB Sectors : %d\n", mpb_sectors(mpb
));
1066 printf(" Disks : %d\n", mpb
->num_disks
);
1067 printf(" RAID Devices : %d\n", mpb
->num_raid_devs
);
1068 print_imsm_disk(mpb
, super
->disks
->index
, reserved
);
1069 if (super
->bbm_log
) {
1070 struct bbm_log
*log
= super
->bbm_log
;
1073 printf("Bad Block Management Log:\n");
1074 printf(" Log Size : %d\n", __le32_to_cpu(mpb
->bbm_log_size
));
1075 printf(" Signature : %x\n", __le32_to_cpu(log
->signature
));
1076 printf(" Entry Count : %d\n", __le32_to_cpu(log
->entry_count
));
1077 printf(" Spare Blocks : %d\n", __le32_to_cpu(log
->reserved_spare_block_count
));
1078 printf(" First Spare : %llx\n",
1079 (unsigned long long) __le64_to_cpu(log
->first_spare_lba
));
1081 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1083 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
1085 super
->current_vol
= i
;
1086 getinfo_super_imsm(st
, &info
, NULL
);
1087 fname_from_uuid(st
, &info
, nbuf
, ':');
1088 print_imsm_dev(dev
, nbuf
+ 5, super
->disks
->index
);
1090 for (i
= 0; i
< mpb
->num_disks
; i
++) {
1091 if (i
== super
->disks
->index
)
1093 print_imsm_disk(mpb
, i
, reserved
);
1095 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1096 struct imsm_disk
*disk
;
1097 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1105 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1106 printf(" Disk Serial : %s\n", str
);
1107 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1108 is_configured(disk
) ? " active" : "",
1109 is_failed(disk
) ? " failed" : "");
1110 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1111 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1112 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1113 human_size(sz
* 512));
1117 static void brief_examine_super_imsm(struct supertype
*st
, int verbose
)
1119 /* We just write a generic IMSM ARRAY entry */
1122 struct intel_super
*super
= st
->sb
;
1124 if (!super
->anchor
->num_raid_devs
) {
1125 printf("ARRAY metadata=imsm\n");
1129 getinfo_super_imsm(st
, &info
, NULL
);
1130 fname_from_uuid(st
, &info
, nbuf
, ':');
1131 printf("ARRAY metadata=imsm UUID=%s\n", nbuf
+ 5);
1134 static void brief_examine_subarrays_imsm(struct supertype
*st
, int verbose
)
1136 /* We just write a generic IMSM ARRAY entry */
1140 struct intel_super
*super
= st
->sb
;
1143 if (!super
->anchor
->num_raid_devs
)
1146 getinfo_super_imsm(st
, &info
, NULL
);
1147 fname_from_uuid(st
, &info
, nbuf
, ':');
1148 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
1149 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
1151 super
->current_vol
= i
;
1152 getinfo_super_imsm(st
, &info
, NULL
);
1153 fname_from_uuid(st
, &info
, nbuf1
, ':');
1154 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1155 dev
->volume
, nbuf
+ 5, i
, nbuf1
+ 5);
1159 static void export_examine_super_imsm(struct supertype
*st
)
1161 struct intel_super
*super
= st
->sb
;
1162 struct imsm_super
*mpb
= super
->anchor
;
1166 getinfo_super_imsm(st
, &info
, NULL
);
1167 fname_from_uuid(st
, &info
, nbuf
, ':');
1168 printf("MD_METADATA=imsm\n");
1169 printf("MD_LEVEL=container\n");
1170 printf("MD_UUID=%s\n", nbuf
+5);
1171 printf("MD_DEVICES=%u\n", mpb
->num_disks
);
1174 static void detail_super_imsm(struct supertype
*st
, char *homehost
)
1179 getinfo_super_imsm(st
, &info
, NULL
);
1180 fname_from_uuid(st
, &info
, nbuf
, ':');
1181 printf("\n UUID : %s\n", nbuf
+ 5);
1184 static void brief_detail_super_imsm(struct supertype
*st
)
1188 getinfo_super_imsm(st
, &info
, NULL
);
1189 fname_from_uuid(st
, &info
, nbuf
, ':');
1190 printf(" UUID=%s", nbuf
+ 5);
1193 static int imsm_read_serial(int fd
, char *devname
, __u8
*serial
);
1194 static void fd2devname(int fd
, char *name
);
1196 static int ahci_enumerate_ports(const char *hba_path
, int port_count
, int host_base
, int verbose
)
1198 /* dump an unsorted list of devices attached to AHCI Intel storage
1199 * controller, as well as non-connected ports
1201 int hba_len
= strlen(hba_path
) + 1;
1206 unsigned long port_mask
= (1 << port_count
) - 1;
1208 if (port_count
> (int)sizeof(port_mask
) * 8) {
1210 fprintf(stderr
, Name
": port_count %d out of range\n", port_count
);
1214 /* scroll through /sys/dev/block looking for devices attached to
1217 dir
= opendir("/sys/dev/block");
1218 for (ent
= dir
? readdir(dir
) : NULL
; ent
; ent
= readdir(dir
)) {
1229 if (sscanf(ent
->d_name
, "%d:%d", &major
, &minor
) != 2)
1231 path
= devt_to_devpath(makedev(major
, minor
));
1234 if (!path_attached_to_hba(path
, hba_path
)) {
1240 /* retrieve the scsi device type */
1241 if (asprintf(&device
, "/sys/dev/block/%d:%d/device/xxxxxxx", major
, minor
) < 0) {
1243 fprintf(stderr
, Name
": failed to allocate 'device'\n");
1247 sprintf(device
, "/sys/dev/block/%d:%d/device/type", major
, minor
);
1248 if (load_sys(device
, buf
) != 0) {
1250 fprintf(stderr
, Name
": failed to read device type for %s\n",
1256 type
= strtoul(buf
, NULL
, 10);
1258 /* if it's not a disk print the vendor and model */
1259 if (!(type
== 0 || type
== 7 || type
== 14)) {
1262 sprintf(device
, "/sys/dev/block/%d:%d/device/vendor", major
, minor
);
1263 if (load_sys(device
, buf
) == 0) {
1264 strncpy(vendor
, buf
, sizeof(vendor
));
1265 vendor
[sizeof(vendor
) - 1] = '\0';
1266 c
= (char *) &vendor
[sizeof(vendor
) - 1];
1267 while (isspace(*c
) || *c
== '\0')
1271 sprintf(device
, "/sys/dev/block/%d:%d/device/model", major
, minor
);
1272 if (load_sys(device
, buf
) == 0) {
1273 strncpy(model
, buf
, sizeof(model
));
1274 model
[sizeof(model
) - 1] = '\0';
1275 c
= (char *) &model
[sizeof(model
) - 1];
1276 while (isspace(*c
) || *c
== '\0')
1280 if (vendor
[0] && model
[0])
1281 sprintf(buf
, "%.64s %.64s", vendor
, model
);
1283 switch (type
) { /* numbers from hald/linux/device.c */
1284 case 1: sprintf(buf
, "tape"); break;
1285 case 2: sprintf(buf
, "printer"); break;
1286 case 3: sprintf(buf
, "processor"); break;
1288 case 5: sprintf(buf
, "cdrom"); break;
1289 case 6: sprintf(buf
, "scanner"); break;
1290 case 8: sprintf(buf
, "media_changer"); break;
1291 case 9: sprintf(buf
, "comm"); break;
1292 case 12: sprintf(buf
, "raid"); break;
1293 default: sprintf(buf
, "unknown");
1299 /* chop device path to 'host%d' and calculate the port number */
1300 c
= strchr(&path
[hba_len
], '/');
1303 fprintf(stderr
, Name
": %s - invalid path name\n", path
+ hba_len
);
1308 if (sscanf(&path
[hba_len
], "host%d", &port
) == 1)
1312 *c
= '/'; /* repair the full string */
1313 fprintf(stderr
, Name
": failed to determine port number for %s\n",
1320 /* mark this port as used */
1321 port_mask
&= ~(1 << port
);
1323 /* print out the device information */
1325 printf(" Port%d : - non-disk device (%s) -\n", port
, buf
);
1329 fd
= dev_open(ent
->d_name
, O_RDONLY
);
1331 printf(" Port%d : - disk info unavailable -\n", port
);
1333 fd2devname(fd
, buf
);
1334 printf(" Port%d : %s", port
, buf
);
1335 if (imsm_read_serial(fd
, NULL
, (__u8
*) buf
) == 0)
1336 printf(" (%s)\n", buf
);
1351 for (i
= 0; i
< port_count
; i
++)
1352 if (port_mask
& (1 << i
))
1353 printf(" Port%d : - no device attached -\n", i
);
1361 static void print_found_intel_controllers(struct sys_dev
*elem
)
1363 for (; elem
; elem
= elem
->next
) {
1364 fprintf(stderr
, Name
": found Intel(R) ");
1365 if (elem
->type
== SYS_DEV_SATA
)
1366 fprintf(stderr
, "SATA ");
1367 else if (elem
->type
== SYS_DEV_SAS
)
1368 fprintf(stderr
, "SAS ");
1369 fprintf(stderr
, "RAID controller");
1371 fprintf(stderr
, " at %s", elem
->pci_id
);
1372 fprintf(stderr
, ".\n");
1377 static int ahci_get_port_count(const char *hba_path
, int *port_count
)
1384 if ((dir
= opendir(hba_path
)) == NULL
)
1387 for (ent
= readdir(dir
); ent
; ent
= readdir(dir
)) {
1390 if (sscanf(ent
->d_name
, "host%d", &host
) != 1)
1392 if (*port_count
== 0)
1394 else if (host
< host_base
)
1397 if (host
+ 1 > *port_count
+ host_base
)
1398 *port_count
= host
+ 1 - host_base
;
1404 static void print_imsm_capability(const struct imsm_orom
*orom
)
1406 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1407 printf(" Version : %d.%d.%d.%d\n", orom
->major_ver
, orom
->minor_ver
,
1408 orom
->hotfix_ver
, orom
->build
);
1409 printf(" RAID Levels :%s%s%s%s%s\n",
1410 imsm_orom_has_raid0(orom
) ? " raid0" : "",
1411 imsm_orom_has_raid1(orom
) ? " raid1" : "",
1412 imsm_orom_has_raid1e(orom
) ? " raid1e" : "",
1413 imsm_orom_has_raid10(orom
) ? " raid10" : "",
1414 imsm_orom_has_raid5(orom
) ? " raid5" : "");
1415 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1416 imsm_orom_has_chunk(orom
, 2) ? " 2k" : "",
1417 imsm_orom_has_chunk(orom
, 4) ? " 4k" : "",
1418 imsm_orom_has_chunk(orom
, 8) ? " 8k" : "",
1419 imsm_orom_has_chunk(orom
, 16) ? " 16k" : "",
1420 imsm_orom_has_chunk(orom
, 32) ? " 32k" : "",
1421 imsm_orom_has_chunk(orom
, 64) ? " 64k" : "",
1422 imsm_orom_has_chunk(orom
, 128) ? " 128k" : "",
1423 imsm_orom_has_chunk(orom
, 256) ? " 256k" : "",
1424 imsm_orom_has_chunk(orom
, 512) ? " 512k" : "",
1425 imsm_orom_has_chunk(orom
, 1024*1) ? " 1M" : "",
1426 imsm_orom_has_chunk(orom
, 1024*2) ? " 2M" : "",
1427 imsm_orom_has_chunk(orom
, 1024*4) ? " 4M" : "",
1428 imsm_orom_has_chunk(orom
, 1024*8) ? " 8M" : "",
1429 imsm_orom_has_chunk(orom
, 1024*16) ? " 16M" : "",
1430 imsm_orom_has_chunk(orom
, 1024*32) ? " 32M" : "",
1431 imsm_orom_has_chunk(orom
, 1024*64) ? " 64M" : "");
1432 printf(" Max Disks : %d\n", orom
->tds
);
1433 printf(" Max Volumes : %d\n", orom
->vpa
);
1437 static int detail_platform_imsm(int verbose
, int enumerate_only
)
1439 /* There are two components to imsm platform support, the ahci SATA
1440 * controller and the option-rom. To find the SATA controller we
1441 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1442 * controller with the Intel vendor id is present. This approach
1443 * allows mdadm to leverage the kernel's ahci detection logic, with the
1444 * caveat that if ahci.ko is not loaded mdadm will not be able to
1445 * detect platform raid capabilities. The option-rom resides in a
1446 * platform "Adapter ROM". We scan for its signature to retrieve the
1447 * platform capabilities. If raid support is disabled in the BIOS the
1448 * option-rom capability structure will not be available.
1450 const struct imsm_orom
*orom
;
1451 struct sys_dev
*list
, *hba
;
1456 if (enumerate_only
) {
1457 if (check_env("IMSM_NO_PLATFORM"))
1459 list
= find_intel_devices();
1462 for (hba
= list
; hba
; hba
= hba
->next
) {
1463 orom
= find_imsm_capability(hba
->type
);
1469 free_sys_dev(&list
);
1473 list
= find_intel_devices();
1476 fprintf(stderr
, Name
": no active Intel(R) RAID "
1477 "controller found.\n");
1478 free_sys_dev(&list
);
1481 print_found_intel_controllers(list
);
1483 for (hba
= list
; hba
; hba
= hba
->next
) {
1484 orom
= find_imsm_capability(hba
->type
);
1486 fprintf(stderr
, Name
": imsm capabilities not found for controller: %s (type %s)\n",
1487 hba
->path
, get_sys_dev_type(hba
->type
));
1489 print_imsm_capability(orom
);
1492 for (hba
= list
; hba
; hba
= hba
->next
) {
1493 printf(" I/O Controller : %s (%s)\n",
1494 hba
->path
, get_sys_dev_type(hba
->type
));
1496 if (hba
->type
== SYS_DEV_SATA
) {
1497 host_base
= ahci_get_port_count(hba
->path
, &port_count
);
1498 if (ahci_enumerate_ports(hba
->path
, port_count
, host_base
, verbose
)) {
1500 fprintf(stderr
, Name
": failed to enumerate "
1501 "ports on SATA controller at %s.", hba
->pci_id
);
1507 free_sys_dev(&list
);
1512 static int match_home_imsm(struct supertype
*st
, char *homehost
)
1514 /* the imsm metadata format does not specify any host
1515 * identification information. We return -1 since we can never
1516 * confirm nor deny whether a given array is "meant" for this
1517 * host. We rely on compare_super and the 'family_num' fields to
1518 * exclude member disks that do not belong, and we rely on
1519 * mdadm.conf to specify the arrays that should be assembled.
1520 * Auto-assembly may still pick up "foreign" arrays.
1526 static void uuid_from_super_imsm(struct supertype
*st
, int uuid
[4])
1528 /* The uuid returned here is used for:
1529 * uuid to put into bitmap file (Create, Grow)
1530 * uuid for backup header when saving critical section (Grow)
1531 * comparing uuids when re-adding a device into an array
1532 * In these cases the uuid required is that of the data-array,
1533 * not the device-set.
1534 * uuid to recognise same set when adding a missing device back
1535 * to an array. This is a uuid for the device-set.
1537 * For each of these we can make do with a truncated
1538 * or hashed uuid rather than the original, as long as
1540 * In each case the uuid required is that of the data-array,
1541 * not the device-set.
1543 /* imsm does not track uuid's so we synthesis one using sha1 on
1544 * - The signature (Which is constant for all imsm array, but no matter)
1545 * - the orig_family_num of the container
1546 * - the index number of the volume
1547 * - the 'serial' number of the volume.
1548 * Hopefully these are all constant.
1550 struct intel_super
*super
= st
->sb
;
1553 struct sha1_ctx ctx
;
1554 struct imsm_dev
*dev
= NULL
;
1557 /* some mdadm versions failed to set ->orig_family_num, in which
1558 * case fall back to ->family_num. orig_family_num will be
1559 * fixed up with the first metadata update.
1561 family_num
= super
->anchor
->orig_family_num
;
1562 if (family_num
== 0)
1563 family_num
= super
->anchor
->family_num
;
1564 sha1_init_ctx(&ctx
);
1565 sha1_process_bytes(super
->anchor
->sig
, MPB_SIG_LEN
, &ctx
);
1566 sha1_process_bytes(&family_num
, sizeof(__u32
), &ctx
);
1567 if (super
->current_vol
>= 0)
1568 dev
= get_imsm_dev(super
, super
->current_vol
);
1570 __u32 vol
= super
->current_vol
;
1571 sha1_process_bytes(&vol
, sizeof(vol
), &ctx
);
1572 sha1_process_bytes(dev
->volume
, MAX_RAID_SERIAL_LEN
, &ctx
);
1574 sha1_finish_ctx(&ctx
, buf
);
1575 memcpy(uuid
, buf
, 4*4);
1580 get_imsm_numerical_version(struct imsm_super
*mpb
, int *m
, int *p
)
1582 __u8
*v
= get_imsm_version(mpb
);
1583 __u8
*end
= mpb
->sig
+ MAX_SIGNATURE_LENGTH
;
1584 char major
[] = { 0, 0, 0 };
1585 char minor
[] = { 0 ,0, 0 };
1586 char patch
[] = { 0, 0, 0 };
1587 char *ver_parse
[] = { major
, minor
, patch
};
1591 while (*v
!= '\0' && v
< end
) {
1592 if (*v
!= '.' && j
< 2)
1593 ver_parse
[i
][j
++] = *v
;
1601 *m
= strtol(minor
, NULL
, 0);
1602 *p
= strtol(patch
, NULL
, 0);
1606 static __u32
migr_strip_blocks_resync(struct imsm_dev
*dev
)
1608 /* migr_strip_size when repairing or initializing parity */
1609 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1610 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1612 switch (get_imsm_raid_level(map
)) {
1617 return 128*1024 >> 9;
1621 static __u32
migr_strip_blocks_rebuild(struct imsm_dev
*dev
)
1623 /* migr_strip_size when rebuilding a degraded disk, no idea why
1624 * this is different than migr_strip_size_resync(), but it's good
1627 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1628 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1630 switch (get_imsm_raid_level(map
)) {
1633 if (map
->num_members
% map
->num_domains
== 0)
1634 return 128*1024 >> 9;
1638 return max((__u32
) 64*1024 >> 9, chunk
);
1640 return 128*1024 >> 9;
1644 static __u32
num_stripes_per_unit_resync(struct imsm_dev
*dev
)
1646 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1647 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1648 __u32 lo_chunk
= __le32_to_cpu(lo
->blocks_per_strip
);
1649 __u32 hi_chunk
= __le32_to_cpu(hi
->blocks_per_strip
);
1651 return max((__u32
) 1, hi_chunk
/ lo_chunk
);
1654 static __u32
num_stripes_per_unit_rebuild(struct imsm_dev
*dev
)
1656 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1657 int level
= get_imsm_raid_level(lo
);
1659 if (level
== 1 || level
== 10) {
1660 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1662 return hi
->num_domains
;
1664 return num_stripes_per_unit_resync(dev
);
1667 static __u8
imsm_num_data_members(struct imsm_dev
*dev
, int second_map
)
1669 /* named 'imsm_' because raid0, raid1 and raid10
1670 * counter-intuitively have the same number of data disks
1672 struct imsm_map
*map
= get_imsm_map(dev
, second_map
);
1674 switch (get_imsm_raid_level(map
)) {
1678 return map
->num_members
;
1680 return map
->num_members
- 1;
1682 dprintf("%s: unsupported raid level\n", __func__
);
1687 static __u32
parity_segment_depth(struct imsm_dev
*dev
)
1689 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1690 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1692 switch(get_imsm_raid_level(map
)) {
1695 return chunk
* map
->num_domains
;
1697 return chunk
* map
->num_members
;
1703 static __u32
map_migr_block(struct imsm_dev
*dev
, __u32 block
)
1705 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1706 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1707 __u32 strip
= block
/ chunk
;
1709 switch (get_imsm_raid_level(map
)) {
1712 __u32 vol_strip
= (strip
* map
->num_domains
) + 1;
1713 __u32 vol_stripe
= vol_strip
/ map
->num_members
;
1715 return vol_stripe
* chunk
+ block
% chunk
;
1717 __u32 stripe
= strip
/ (map
->num_members
- 1);
1719 return stripe
* chunk
+ block
% chunk
;
1726 static __u64
blocks_per_migr_unit(struct imsm_dev
*dev
)
1728 /* calculate the conversion factor between per member 'blocks'
1729 * (md/{resync,rebuild}_start) and imsm migration units, return
1730 * 0 for the 'not migrating' and 'unsupported migration' cases
1732 if (!dev
->vol
.migr_state
)
1735 switch (migr_type(dev
)) {
1740 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1741 __u32 stripes_per_unit
;
1742 __u32 blocks_per_unit
;
1751 /* yes, this is really the translation of migr_units to
1752 * per-member blocks in the 'resync' case
1754 stripes_per_unit
= num_stripes_per_unit_resync(dev
);
1755 migr_chunk
= migr_strip_blocks_resync(dev
);
1756 disks
= imsm_num_data_members(dev
, 0);
1757 blocks_per_unit
= stripes_per_unit
* migr_chunk
* disks
;
1758 stripe
= __le32_to_cpu(map
->blocks_per_strip
) * disks
;
1759 segment
= blocks_per_unit
/ stripe
;
1760 block_rel
= blocks_per_unit
- segment
* stripe
;
1761 parity_depth
= parity_segment_depth(dev
);
1762 block_map
= map_migr_block(dev
, block_rel
);
1763 return block_map
+ parity_depth
* segment
;
1765 case MIGR_REBUILD
: {
1766 __u32 stripes_per_unit
;
1769 stripes_per_unit
= num_stripes_per_unit_rebuild(dev
);
1770 migr_chunk
= migr_strip_blocks_rebuild(dev
);
1771 return migr_chunk
* stripes_per_unit
;
1773 case MIGR_STATE_CHANGE
:
1779 static int imsm_level_to_layout(int level
)
1787 return ALGORITHM_LEFT_ASYMMETRIC
;
1794 /*******************************************************************************
1795 * Function: read_imsm_migr_rec
1796 * Description: Function reads imsm migration record from last sector of disk
1798 * fd : disk descriptor
1799 * super : metadata info
1803 ******************************************************************************/
1804 static int read_imsm_migr_rec(int fd
, struct intel_super
*super
)
1807 unsigned long long dsize
;
1809 get_dev_size(fd
, NULL
, &dsize
);
1810 if (lseek64(fd
, dsize
- 512, SEEK_SET
) < 0) {
1812 Name
": Cannot seek to anchor block: %s\n",
1816 if (read(fd
, super
->migr_rec_buf
, 512) != 512) {
1818 Name
": Cannot read migr record block: %s\n",
1828 /*******************************************************************************
1829 * Function: load_imsm_migr_rec
1830 * Description: Function reads imsm migration record (it is stored at the last
1833 * super : imsm internal array info
1834 * info : general array info
1838 ******************************************************************************/
1839 static int load_imsm_migr_rec(struct intel_super
*super
, struct mdinfo
*info
)
1842 struct dl
*dl
= NULL
;
1848 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
1849 /* read only from one of the first two slots */
1850 if ((sd
->disk
.raid_disk
> 1) ||
1851 (sd
->disk
.raid_disk
< 0))
1853 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
1854 fd
= dev_open(nm
, O_RDONLY
);
1860 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1861 /* read only from one of the first two slots */
1864 sprintf(nm
, "%d:%d", dl
->major
, dl
->minor
);
1865 fd
= dev_open(nm
, O_RDONLY
);
1872 retval
= read_imsm_migr_rec(fd
, super
);
1880 /*******************************************************************************
1881 * Function: write_imsm_migr_rec
1882 * Description: Function writes imsm migration record
1883 * (at the last sector of disk)
1885 * super : imsm internal array info
1889 ******************************************************************************/
1890 static int write_imsm_migr_rec(struct supertype
*st
)
1892 struct intel_super
*super
= st
->sb
;
1893 unsigned long long dsize
;
1899 for (sd
= super
->disks
; sd
; sd
= sd
->next
) {
1900 /* write to 2 first slots only */
1901 if ((sd
->index
< 0) || (sd
->index
> 1))
1903 sprintf(nm
, "%d:%d", sd
->major
, sd
->minor
);
1904 fd
= dev_open(nm
, O_RDWR
);
1907 get_dev_size(fd
, NULL
, &dsize
);
1908 if (lseek64(fd
, dsize
- 512, SEEK_SET
) < 0) {
1910 Name
": Cannot seek to anchor block: %s\n",
1914 if (write(fd
, super
->migr_rec_buf
, 512) != 512) {
1916 Name
": Cannot write migr record block: %s\n",
1931 static void getinfo_super_imsm_volume(struct supertype
*st
, struct mdinfo
*info
, char *dmap
)
1933 struct intel_super
*super
= st
->sb
;
1934 struct imsm_dev
*dev
= get_imsm_dev(super
, super
->current_vol
);
1935 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1936 struct imsm_map
*prev_map
= get_imsm_map(dev
, 1);
1937 struct imsm_map
*map_to_analyse
= map
;
1940 unsigned int component_size_alligment
;
1941 int map_disks
= info
->array
.raid_disks
;
1943 memset(info
, 0, sizeof(*info
));
1945 map_to_analyse
= prev_map
;
1947 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
1948 if (dl
->raiddisk
== info
->disk
.raid_disk
)
1950 info
->container_member
= super
->current_vol
;
1951 info
->array
.raid_disks
= map
->num_members
;
1952 info
->array
.level
= get_imsm_raid_level(map_to_analyse
);
1953 info
->array
.layout
= imsm_level_to_layout(info
->array
.level
);
1954 info
->array
.md_minor
= -1;
1955 info
->array
.ctime
= 0;
1956 info
->array
.utime
= 0;
1957 info
->array
.chunk_size
=
1958 __le16_to_cpu(map_to_analyse
->blocks_per_strip
) << 9;
1959 info
->array
.state
= !dev
->vol
.dirty
;
1960 info
->custom_array_size
= __le32_to_cpu(dev
->size_high
);
1961 info
->custom_array_size
<<= 32;
1962 info
->custom_array_size
|= __le32_to_cpu(dev
->size_low
);
1963 if (prev_map
&& map
->map_state
== prev_map
->map_state
) {
1964 info
->reshape_active
= 1;
1965 info
->new_level
= get_imsm_raid_level(map
);
1966 info
->new_layout
= imsm_level_to_layout(info
->new_level
);
1967 info
->new_chunk
= __le16_to_cpu(map
->blocks_per_strip
) << 9;
1968 info
->delta_disks
= map
->num_members
- prev_map
->num_members
;
1969 if (info
->delta_disks
) {
1970 /* this needs to be applied to every array
1973 info
->reshape_active
= 2;
1975 /* We shape information that we give to md might have to be
1976 * modify to cope with md's requirement for reshaping arrays.
1977 * For example, when reshaping a RAID0, md requires it to be
1978 * presented as a degraded RAID4.
1979 * Also if a RAID0 is migrating to a RAID5 we need to specify
1980 * the array as already being RAID5, but the 'before' layout
1981 * is a RAID4-like layout.
1983 switch (info
->array
.level
) {
1985 switch(info
->new_level
) {
1987 /* conversion is happening as RAID4 */
1988 info
->array
.level
= 4;
1989 info
->array
.raid_disks
+= 1;
1992 /* conversion is happening as RAID5 */
1993 info
->array
.level
= 5;
1994 info
->array
.layout
= ALGORITHM_PARITY_N
;
1995 info
->array
.raid_disks
+= 1;
1996 info
->delta_disks
-= 1;
1999 /* FIXME error message */
2000 info
->array
.level
= UnSet
;
2006 info
->new_level
= UnSet
;
2007 info
->new_layout
= UnSet
;
2008 info
->new_chunk
= info
->array
.chunk_size
;
2009 info
->delta_disks
= 0;
2011 info
->disk
.major
= 0;
2012 info
->disk
.minor
= 0;
2014 info
->disk
.major
= dl
->major
;
2015 info
->disk
.minor
= dl
->minor
;
2018 info
->data_offset
= __le32_to_cpu(map_to_analyse
->pba_of_lba0
);
2019 info
->component_size
=
2020 __le32_to_cpu(map_to_analyse
->blocks_per_member
);
2022 /* check component size aligment
2024 component_size_alligment
=
2025 info
->component_size
% (info
->array
.chunk_size
/512);
2027 if (component_size_alligment
&&
2028 (info
->array
.level
!= 1) && (info
->array
.level
!= UnSet
)) {
2029 dprintf("imsm: reported component size alligned from %llu ",
2030 info
->component_size
);
2031 info
->component_size
-= component_size_alligment
;
2032 dprintf("to %llu (%i).\n",
2033 info
->component_size
, component_size_alligment
);
2036 memset(info
->uuid
, 0, sizeof(info
->uuid
));
2037 info
->recovery_start
= MaxSector
;
2039 info
->reshape_progress
= 0;
2040 info
->resync_start
= MaxSector
;
2041 if (map_to_analyse
->map_state
== IMSM_T_STATE_UNINITIALIZED
||
2043 info
->resync_start
= 0;
2045 if (dev
->vol
.migr_state
) {
2046 switch (migr_type(dev
)) {
2049 __u64 blocks_per_unit
= blocks_per_migr_unit(dev
);
2050 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
2052 info
->resync_start
= blocks_per_unit
* units
;
2055 case MIGR_GEN_MIGR
: {
2056 __u64 blocks_per_unit
= blocks_per_migr_unit(dev
);
2057 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
2058 unsigned long long array_blocks
;
2061 info
->reshape_progress
= blocks_per_unit
* units
;
2063 dprintf("IMSM: General Migration checkpoint : %llu "
2064 "(%llu) -> read reshape progress : %llu\n",
2065 units
, blocks_per_unit
, info
->reshape_progress
);
2067 used_disks
= imsm_num_data_members(dev
, 1);
2068 if (used_disks
> 0) {
2069 array_blocks
= map
->blocks_per_member
*
2071 /* round array size down to closest MB
2073 info
->custom_array_size
= (array_blocks
2074 >> SECT_PER_MB_SHIFT
)
2075 << SECT_PER_MB_SHIFT
;
2079 /* we could emulate the checkpointing of
2080 * 'sync_action=check' migrations, but for now
2081 * we just immediately complete them
2084 /* this is handled by container_content_imsm() */
2085 case MIGR_STATE_CHANGE
:
2086 /* FIXME handle other migrations */
2088 /* we are not dirty, so... */
2089 info
->resync_start
= MaxSector
;
2093 strncpy(info
->name
, (char *) dev
->volume
, MAX_RAID_SERIAL_LEN
);
2094 info
->name
[MAX_RAID_SERIAL_LEN
] = 0;
2096 info
->array
.major_version
= -1;
2097 info
->array
.minor_version
= -2;
2098 devname
= devnum2devname(st
->container_dev
);
2099 *info
->text_version
= '\0';
2101 sprintf(info
->text_version
, "/%s/%d", devname
, info
->container_member
);
2103 info
->safe_mode_delay
= 4000; /* 4 secs like the Matrix driver */
2104 uuid_from_super_imsm(st
, info
->uuid
);
2108 for (i
=0; i
<map_disks
; i
++) {
2110 if (i
< info
->array
.raid_disks
) {
2111 struct imsm_disk
*dsk
;
2112 j
= get_imsm_disk_idx(dev
, i
, -1);
2113 dsk
= get_imsm_disk(super
, j
);
2114 if (dsk
&& (dsk
->status
& CONFIGURED_DISK
))
2121 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
);
2122 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
);
2124 static struct imsm_disk
*get_imsm_missing(struct intel_super
*super
, __u8 index
)
2128 for (d
= super
->missing
; d
; d
= d
->next
)
2129 if (d
->index
== index
)
2134 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
)
2136 struct intel_super
*super
= st
->sb
;
2137 struct imsm_disk
*disk
;
2138 int map_disks
= info
->array
.raid_disks
;
2139 int max_enough
= -1;
2141 struct imsm_super
*mpb
;
2143 if (super
->current_vol
>= 0) {
2144 getinfo_super_imsm_volume(st
, info
, map
);
2147 memset(info
, 0, sizeof(*info
));
2149 /* Set raid_disks to zero so that Assemble will always pull in valid
2152 info
->array
.raid_disks
= 0;
2153 info
->array
.level
= LEVEL_CONTAINER
;
2154 info
->array
.layout
= 0;
2155 info
->array
.md_minor
= -1;
2156 info
->array
.ctime
= 0; /* N/A for imsm */
2157 info
->array
.utime
= 0;
2158 info
->array
.chunk_size
= 0;
2160 info
->disk
.major
= 0;
2161 info
->disk
.minor
= 0;
2162 info
->disk
.raid_disk
= -1;
2163 info
->reshape_active
= 0;
2164 info
->array
.major_version
= -1;
2165 info
->array
.minor_version
= -2;
2166 strcpy(info
->text_version
, "imsm");
2167 info
->safe_mode_delay
= 0;
2168 info
->disk
.number
= -1;
2169 info
->disk
.state
= 0;
2171 info
->recovery_start
= MaxSector
;
2173 /* do we have the all the insync disks that we expect? */
2174 mpb
= super
->anchor
;
2176 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
2177 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
2178 int failed
, enough
, j
, missing
= 0;
2179 struct imsm_map
*map
;
2182 failed
= imsm_count_failed(super
, dev
);
2183 state
= imsm_check_degraded(super
, dev
, failed
);
2184 map
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2186 /* any newly missing disks?
2187 * (catches single-degraded vs double-degraded)
2189 for (j
= 0; j
< map
->num_members
; j
++) {
2190 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
2191 __u32 idx
= ord_to_idx(ord
);
2193 if (!(ord
& IMSM_ORD_REBUILD
) &&
2194 get_imsm_missing(super
, idx
)) {
2200 if (state
== IMSM_T_STATE_FAILED
)
2202 else if (state
== IMSM_T_STATE_DEGRADED
&&
2203 (state
!= map
->map_state
|| missing
))
2205 else /* we're normal, or already degraded */
2208 /* in the missing/failed disk case check to see
2209 * if at least one array is runnable
2211 max_enough
= max(max_enough
, enough
);
2213 dprintf("%s: enough: %d\n", __func__
, max_enough
);
2214 info
->container_enough
= max_enough
;
2217 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
2219 disk
= &super
->disks
->disk
;
2220 info
->data_offset
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
2221 info
->component_size
= reserved
;
2222 info
->disk
.state
= is_configured(disk
) ? (1 << MD_DISK_ACTIVE
) : 0;
2223 /* we don't change info->disk.raid_disk here because
2224 * this state will be finalized in mdmon after we have
2225 * found the 'most fresh' version of the metadata
2227 info
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2228 info
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2231 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2232 * ->compare_super may have updated the 'num_raid_devs' field for spares
2234 if (info
->disk
.state
& (1 << MD_DISK_SYNC
) || super
->anchor
->num_raid_devs
)
2235 uuid_from_super_imsm(st
, info
->uuid
);
2237 memcpy(info
->uuid
, uuid_zero
, sizeof(uuid_zero
));
2239 /* I don't know how to compute 'map' on imsm, so use safe default */
2242 for (i
= 0; i
< map_disks
; i
++)
2248 /* allocates memory and fills disk in mdinfo structure
2249 * for each disk in array */
2250 struct mdinfo
*getinfo_super_disks_imsm(struct supertype
*st
)
2252 struct mdinfo
*mddev
= NULL
;
2253 struct intel_super
*super
= st
->sb
;
2254 struct imsm_disk
*disk
;
2257 if (!super
|| !super
->disks
)
2260 mddev
= malloc(sizeof(*mddev
));
2262 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2265 memset(mddev
, 0, sizeof(*mddev
));
2269 tmp
= malloc(sizeof(*tmp
));
2271 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2276 memset(tmp
, 0, sizeof(*tmp
));
2278 tmp
->next
= mddev
->devs
;
2280 tmp
->disk
.number
= count
++;
2281 tmp
->disk
.major
= dl
->major
;
2282 tmp
->disk
.minor
= dl
->minor
;
2283 tmp
->disk
.state
= is_configured(disk
) ?
2284 (1 << MD_DISK_ACTIVE
) : 0;
2285 tmp
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2286 tmp
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2287 tmp
->disk
.raid_disk
= -1;
2293 static int update_super_imsm(struct supertype
*st
, struct mdinfo
*info
,
2294 char *update
, char *devname
, int verbose
,
2295 int uuid_set
, char *homehost
)
2297 /* For 'assemble' and 'force' we need to return non-zero if any
2298 * change was made. For others, the return value is ignored.
2299 * Update options are:
2300 * force-one : This device looks a bit old but needs to be included,
2301 * update age info appropriately.
2302 * assemble: clear any 'faulty' flag to allow this device to
2304 * force-array: Array is degraded but being forced, mark it clean
2305 * if that will be needed to assemble it.
2307 * newdev: not used ????
2308 * grow: Array has gained a new device - this is currently for
2310 * resync: mark as dirty so a resync will happen.
2311 * name: update the name - preserving the homehost
2312 * uuid: Change the uuid of the array to match watch is given
2314 * Following are not relevant for this imsm:
2315 * sparc2.2 : update from old dodgey metadata
2316 * super-minor: change the preferred_minor number
2317 * summaries: update redundant counters.
2318 * homehost: update the recorded homehost
2319 * _reshape_progress: record new reshape_progress position.
2322 struct intel_super
*super
= st
->sb
;
2323 struct imsm_super
*mpb
;
2325 /* we can only update container info */
2326 if (!super
|| super
->current_vol
>= 0 || !super
->anchor
)
2329 mpb
= super
->anchor
;
2331 if (strcmp(update
, "uuid") == 0 && uuid_set
&& !info
->update_private
)
2333 else if (strcmp(update
, "uuid") == 0 && uuid_set
&& info
->update_private
) {
2334 mpb
->orig_family_num
= *((__u32
*) info
->update_private
);
2336 } else if (strcmp(update
, "uuid") == 0) {
2337 __u32
*new_family
= malloc(sizeof(*new_family
));
2339 /* update orig_family_number with the incoming random
2340 * data, report the new effective uuid, and store the
2341 * new orig_family_num for future updates.
2344 memcpy(&mpb
->orig_family_num
, info
->uuid
, sizeof(__u32
));
2345 uuid_from_super_imsm(st
, info
->uuid
);
2346 *new_family
= mpb
->orig_family_num
;
2347 info
->update_private
= new_family
;
2350 } else if (strcmp(update
, "assemble") == 0)
2355 /* successful update? recompute checksum */
2357 mpb
->check_sum
= __le32_to_cpu(__gen_imsm_checksum(mpb
));
2362 static size_t disks_to_mpb_size(int disks
)
2366 size
= sizeof(struct imsm_super
);
2367 size
+= (disks
- 1) * sizeof(struct imsm_disk
);
2368 size
+= 2 * sizeof(struct imsm_dev
);
2369 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2370 size
+= (4 - 2) * sizeof(struct imsm_map
);
2371 /* 4 possible disk_ord_tbl's */
2372 size
+= 4 * (disks
- 1) * sizeof(__u32
);
2377 static __u64
avail_size_imsm(struct supertype
*st
, __u64 devsize
)
2379 if (devsize
< (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
))
2382 return devsize
- (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
);
2385 static void free_devlist(struct intel_super
*super
)
2387 struct intel_dev
*dv
;
2389 while (super
->devlist
) {
2390 dv
= super
->devlist
->next
;
2391 free(super
->devlist
->dev
);
2392 free(super
->devlist
);
2393 super
->devlist
= dv
;
2397 static void imsm_copy_dev(struct imsm_dev
*dest
, struct imsm_dev
*src
)
2399 memcpy(dest
, src
, sizeof_imsm_dev(src
, 0));
2402 static int compare_super_imsm(struct supertype
*st
, struct supertype
*tst
)
2406 * 0 same, or first was empty, and second was copied
2407 * 1 second had wrong number
2409 * 3 wrong other info
2411 struct intel_super
*first
= st
->sb
;
2412 struct intel_super
*sec
= tst
->sb
;
2419 /* in platform dependent environment test if the disks
2420 * use the same Intel hba
2422 if (!check_env("IMSM_NO_PLATFORM")) {
2423 if (!first
->hba
|| !sec
->hba
||
2424 (first
->hba
->type
!= sec
->hba
->type
)) {
2426 "HBAs of devices does not match %s != %s\n",
2427 first
->hba
? get_sys_dev_type(first
->hba
->type
) : NULL
,
2428 sec
->hba
? get_sys_dev_type(sec
->hba
->type
) : NULL
);
2433 /* if an anchor does not have num_raid_devs set then it is a free
2436 if (first
->anchor
->num_raid_devs
> 0 &&
2437 sec
->anchor
->num_raid_devs
> 0) {
2438 /* Determine if these disks might ever have been
2439 * related. Further disambiguation can only take place
2440 * in load_super_imsm_all
2442 __u32 first_family
= first
->anchor
->orig_family_num
;
2443 __u32 sec_family
= sec
->anchor
->orig_family_num
;
2445 if (memcmp(first
->anchor
->sig
, sec
->anchor
->sig
,
2446 MAX_SIGNATURE_LENGTH
) != 0)
2449 if (first_family
== 0)
2450 first_family
= first
->anchor
->family_num
;
2451 if (sec_family
== 0)
2452 sec_family
= sec
->anchor
->family_num
;
2454 if (first_family
!= sec_family
)
2460 /* if 'first' is a spare promote it to a populated mpb with sec's
2463 if (first
->anchor
->num_raid_devs
== 0 &&
2464 sec
->anchor
->num_raid_devs
> 0) {
2466 struct intel_dev
*dv
;
2467 struct imsm_dev
*dev
;
2469 /* we need to copy raid device info from sec if an allocation
2470 * fails here we don't associate the spare
2472 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++) {
2473 dv
= malloc(sizeof(*dv
));
2476 dev
= malloc(sizeof_imsm_dev(get_imsm_dev(sec
, i
), 1));
2483 dv
->next
= first
->devlist
;
2484 first
->devlist
= dv
;
2486 if (i
< sec
->anchor
->num_raid_devs
) {
2487 /* allocation failure */
2488 free_devlist(first
);
2489 fprintf(stderr
, "imsm: failed to associate spare\n");
2492 first
->anchor
->num_raid_devs
= sec
->anchor
->num_raid_devs
;
2493 first
->anchor
->orig_family_num
= sec
->anchor
->orig_family_num
;
2494 first
->anchor
->family_num
= sec
->anchor
->family_num
;
2495 memcpy(first
->anchor
->sig
, sec
->anchor
->sig
, MAX_SIGNATURE_LENGTH
);
2496 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++)
2497 imsm_copy_dev(get_imsm_dev(first
, i
), get_imsm_dev(sec
, i
));
2503 static void fd2devname(int fd
, char *name
)
2507 char dname
[PATH_MAX
];
2512 if (fstat(fd
, &st
) != 0)
2514 sprintf(path
, "/sys/dev/block/%d:%d",
2515 major(st
.st_rdev
), minor(st
.st_rdev
));
2517 rv
= readlink(path
, dname
, sizeof(dname
));
2522 nm
= strrchr(dname
, '/');
2524 snprintf(name
, MAX_RAID_SERIAL_LEN
, "/dev/%s", nm
);
2527 extern int scsi_get_serial(int fd
, void *buf
, size_t buf_len
);
2529 static int imsm_read_serial(int fd
, char *devname
,
2530 __u8 serial
[MAX_RAID_SERIAL_LEN
])
2532 unsigned char scsi_serial
[255];
2541 memset(scsi_serial
, 0, sizeof(scsi_serial
));
2543 rv
= scsi_get_serial(fd
, scsi_serial
, sizeof(scsi_serial
));
2545 if (rv
&& check_env("IMSM_DEVNAME_AS_SERIAL")) {
2546 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2547 fd2devname(fd
, (char *) serial
);
2554 Name
": Failed to retrieve serial for %s\n",
2559 rsp_len
= scsi_serial
[3];
2563 Name
": Failed to retrieve serial for %s\n",
2567 rsp_buf
= (char *) &scsi_serial
[4];
2569 /* trim all whitespace and non-printable characters and convert
2572 for (i
= 0, dest
= rsp_buf
; i
< rsp_len
; i
++) {
2575 /* ':' is reserved for use in placeholder serial
2576 * numbers for missing disks
2584 len
= dest
- rsp_buf
;
2587 /* truncate leading characters */
2588 if (len
> MAX_RAID_SERIAL_LEN
) {
2589 dest
+= len
- MAX_RAID_SERIAL_LEN
;
2590 len
= MAX_RAID_SERIAL_LEN
;
2593 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2594 memcpy(serial
, dest
, len
);
2599 static int serialcmp(__u8
*s1
, __u8
*s2
)
2601 return strncmp((char *) s1
, (char *) s2
, MAX_RAID_SERIAL_LEN
);
2604 static void serialcpy(__u8
*dest
, __u8
*src
)
2606 strncpy((char *) dest
, (char *) src
, MAX_RAID_SERIAL_LEN
);
2610 static struct dl
*serial_to_dl(__u8
*serial
, struct intel_super
*super
)
2614 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
2615 if (serialcmp(dl
->serial
, serial
) == 0)
2622 static struct imsm_disk
*
2623 __serial_to_disk(__u8
*serial
, struct imsm_super
*mpb
, int *idx
)
2627 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2628 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
2630 if (serialcmp(disk
->serial
, serial
) == 0) {
2641 load_imsm_disk(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2643 struct imsm_disk
*disk
;
2648 __u8 serial
[MAX_RAID_SERIAL_LEN
];
2650 rv
= imsm_read_serial(fd
, devname
, serial
);
2655 dl
= calloc(1, sizeof(*dl
));
2659 Name
": failed to allocate disk buffer for %s\n",
2665 dl
->major
= major(stb
.st_rdev
);
2666 dl
->minor
= minor(stb
.st_rdev
);
2667 dl
->next
= super
->disks
;
2668 dl
->fd
= keep_fd
? fd
: -1;
2669 assert(super
->disks
== NULL
);
2671 serialcpy(dl
->serial
, serial
);
2674 fd2devname(fd
, name
);
2676 dl
->devname
= strdup(devname
);
2678 dl
->devname
= strdup(name
);
2680 /* look up this disk's index in the current anchor */
2681 disk
= __serial_to_disk(dl
->serial
, super
->anchor
, &dl
->index
);
2684 /* only set index on disks that are a member of a
2685 * populated contianer, i.e. one with raid_devs
2687 if (is_failed(&dl
->disk
))
2689 else if (is_spare(&dl
->disk
))
2697 /* When migrating map0 contains the 'destination' state while map1
2698 * contains the current state. When not migrating map0 contains the
2699 * current state. This routine assumes that map[0].map_state is set to
2700 * the current array state before being called.
2702 * Migration is indicated by one of the following states
2703 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2704 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2705 * map1state=unitialized)
2706 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2708 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2709 * map1state=degraded)
2710 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2713 static void migrate(struct imsm_dev
*dev
, struct intel_super
*super
,
2714 __u8 to_state
, int migr_type
)
2716 struct imsm_map
*dest
;
2717 struct imsm_map
*src
= get_imsm_map(dev
, 0);
2719 dev
->vol
.migr_state
= 1;
2720 set_migr_type(dev
, migr_type
);
2721 dev
->vol
.curr_migr_unit
= 0;
2722 dest
= get_imsm_map(dev
, 1);
2724 /* duplicate and then set the target end state in map[0] */
2725 memcpy(dest
, src
, sizeof_imsm_map(src
));
2726 if ((migr_type
== MIGR_REBUILD
) ||
2727 (migr_type
== MIGR_GEN_MIGR
)) {
2731 for (i
= 0; i
< src
->num_members
; i
++) {
2732 ord
= __le32_to_cpu(src
->disk_ord_tbl
[i
]);
2733 set_imsm_ord_tbl_ent(src
, i
, ord_to_idx(ord
));
2737 if (migr_type
== MIGR_GEN_MIGR
)
2738 /* Clear migration record */
2739 memset(super
->migr_rec
, 0, sizeof(struct migr_record
));
2741 src
->map_state
= to_state
;
2744 static void end_migration(struct imsm_dev
*dev
, __u8 map_state
)
2746 struct imsm_map
*map
= get_imsm_map(dev
, 0);
2747 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2750 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2751 * completed in the last migration.
2753 * FIXME add support for raid-level-migration
2755 for (i
= 0; i
< prev
->num_members
; i
++)
2756 for (j
= 0; j
< map
->num_members
; j
++)
2757 /* during online capacity expansion
2758 * disks position can be changed if takeover is used
2760 if (ord_to_idx(map
->disk_ord_tbl
[j
]) ==
2761 ord_to_idx(prev
->disk_ord_tbl
[i
])) {
2762 map
->disk_ord_tbl
[j
] |= prev
->disk_ord_tbl
[i
];
2766 dev
->vol
.migr_state
= 0;
2767 dev
->vol
.migr_type
= 0;
2768 dev
->vol
.curr_migr_unit
= 0;
2769 map
->map_state
= map_state
;
2773 static int parse_raid_devices(struct intel_super
*super
)
2776 struct imsm_dev
*dev_new
;
2777 size_t len
, len_migr
;
2779 size_t space_needed
= 0;
2780 struct imsm_super
*mpb
= super
->anchor
;
2782 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2783 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
2784 struct intel_dev
*dv
;
2786 len
= sizeof_imsm_dev(dev_iter
, 0);
2787 len_migr
= sizeof_imsm_dev(dev_iter
, 1);
2789 space_needed
+= len_migr
- len
;
2791 dv
= malloc(sizeof(*dv
));
2794 if (max_len
< len_migr
)
2796 if (max_len
> len_migr
)
2797 space_needed
+= max_len
- len_migr
;
2798 dev_new
= malloc(max_len
);
2803 imsm_copy_dev(dev_new
, dev_iter
);
2806 dv
->next
= super
->devlist
;
2807 super
->devlist
= dv
;
2810 /* ensure that super->buf is large enough when all raid devices
2813 if (__le32_to_cpu(mpb
->mpb_size
) + space_needed
> super
->len
) {
2816 len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + space_needed
, 512);
2817 if (posix_memalign(&buf
, 512, len
) != 0)
2820 memcpy(buf
, super
->buf
, super
->len
);
2821 memset(buf
+ super
->len
, 0, len
- super
->len
);
2830 /* retrieve a pointer to the bbm log which starts after all raid devices */
2831 struct bbm_log
*__get_imsm_bbm_log(struct imsm_super
*mpb
)
2835 if (__le32_to_cpu(mpb
->bbm_log_size
)) {
2837 ptr
+= mpb
->mpb_size
- __le32_to_cpu(mpb
->bbm_log_size
);
2843 /*******************************************************************************
2844 * Function: check_mpb_migr_compatibility
2845 * Description: Function checks for unsupported migration features:
2846 * - migration optimization area (pba_of_lba0)
2847 * - descending reshape (ascending_migr)
2849 * super : imsm metadata information
2851 * 0 : migration is compatible
2852 * -1 : migration is not compatible
2853 ******************************************************************************/
2854 int check_mpb_migr_compatibility(struct intel_super
*super
)
2856 struct imsm_map
*map0
, *map1
;
2857 struct migr_record
*migr_rec
= super
->migr_rec
;
2860 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2861 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
2864 dev_iter
->vol
.migr_state
== 1 &&
2865 dev_iter
->vol
.migr_type
== MIGR_GEN_MIGR
) {
2866 /* This device is migrating */
2867 map0
= get_imsm_map(dev_iter
, 0);
2868 map1
= get_imsm_map(dev_iter
, 1);
2869 if (map0
->pba_of_lba0
!= map1
->pba_of_lba0
)
2870 /* migration optimization area was used */
2872 if (migr_rec
->ascending_migr
== 0
2873 && migr_rec
->dest_depth_per_unit
> 0)
2874 /* descending reshape not supported yet */
2881 static void __free_imsm(struct intel_super
*super
, int free_disks
);
2883 /* load_imsm_mpb - read matrix metadata
2884 * allocates super->mpb to be freed by free_imsm
2886 static int load_imsm_mpb(int fd
, struct intel_super
*super
, char *devname
)
2888 unsigned long long dsize
;
2889 unsigned long long sectors
;
2891 struct imsm_super
*anchor
;
2894 get_dev_size(fd
, NULL
, &dsize
);
2898 Name
": %s: device to small for imsm\n",
2903 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0) {
2906 Name
": Cannot seek to anchor block on %s: %s\n",
2907 devname
, strerror(errno
));
2911 if (posix_memalign((void**)&anchor
, 512, 512) != 0) {
2914 Name
": Failed to allocate imsm anchor buffer"
2915 " on %s\n", devname
);
2918 if (read(fd
, anchor
, 512) != 512) {
2921 Name
": Cannot read anchor block on %s: %s\n",
2922 devname
, strerror(errno
));
2927 if (strncmp((char *) anchor
->sig
, MPB_SIGNATURE
, MPB_SIG_LEN
) != 0) {
2930 Name
": no IMSM anchor on %s\n", devname
);
2935 __free_imsm(super
, 0);
2936 /* reload capability and hba */
2938 /* capability and hba must be updated with new super allocation */
2939 find_intel_hba_capability(fd
, super
, devname
);
2940 super
->len
= ROUND_UP(anchor
->mpb_size
, 512);
2941 if (posix_memalign(&super
->buf
, 512, super
->len
) != 0) {
2944 Name
": unable to allocate %zu byte mpb buffer\n",
2949 memcpy(super
->buf
, anchor
, 512);
2951 sectors
= mpb_sectors(anchor
) - 1;
2954 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
2955 fprintf(stderr
, Name
2956 ": %s could not allocate migr_rec buffer\n", __func__
);
2962 check_sum
= __gen_imsm_checksum(super
->anchor
);
2963 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
2966 Name
": IMSM checksum %x != %x on %s\n",
2968 __le32_to_cpu(super
->anchor
->check_sum
),
2976 /* read the extended mpb */
2977 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0) {
2980 Name
": Cannot seek to extended mpb on %s: %s\n",
2981 devname
, strerror(errno
));
2985 if ((unsigned)read(fd
, super
->buf
+ 512, super
->len
- 512) != super
->len
- 512) {
2988 Name
": Cannot read extended mpb on %s: %s\n",
2989 devname
, strerror(errno
));
2993 check_sum
= __gen_imsm_checksum(super
->anchor
);
2994 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
2997 Name
": IMSM checksum %x != %x on %s\n",
2998 check_sum
, __le32_to_cpu(super
->anchor
->check_sum
),
3003 /* FIXME the BBM log is disk specific so we cannot use this global
3004 * buffer for all disks. Ok for now since we only look at the global
3005 * bbm_log_size parameter to gate assembly
3007 super
->bbm_log
= __get_imsm_bbm_log(super
->anchor
);
3012 static int read_imsm_migr_rec(int fd
, struct intel_super
*super
);
3015 load_and_parse_mpb(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
3019 err
= load_imsm_mpb(fd
, super
, devname
);
3022 err
= load_imsm_disk(fd
, super
, devname
, keep_fd
);
3025 err
= parse_raid_devices(super
);
3030 static void __free_imsm_disk(struct dl
*d
)
3042 static void free_imsm_disks(struct intel_super
*super
)
3046 while (super
->disks
) {
3048 super
->disks
= d
->next
;
3049 __free_imsm_disk(d
);
3051 while (super
->disk_mgmt_list
) {
3052 d
= super
->disk_mgmt_list
;
3053 super
->disk_mgmt_list
= d
->next
;
3054 __free_imsm_disk(d
);
3056 while (super
->missing
) {
3058 super
->missing
= d
->next
;
3059 __free_imsm_disk(d
);
3064 /* free all the pieces hanging off of a super pointer */
3065 static void __free_imsm(struct intel_super
*super
, int free_disks
)
3067 struct intel_hba
*elem
, *next
;
3073 /* unlink capability description */
3075 if (super
->migr_rec_buf
) {
3076 free(super
->migr_rec_buf
);
3077 super
->migr_rec_buf
= NULL
;
3080 free_imsm_disks(super
);
3081 free_devlist(super
);
3085 free((void *)elem
->path
);
3093 static void free_imsm(struct intel_super
*super
)
3095 __free_imsm(super
, 1);
3099 static void free_super_imsm(struct supertype
*st
)
3101 struct intel_super
*super
= st
->sb
;
3110 static struct intel_super
*alloc_super(void)
3112 struct intel_super
*super
= malloc(sizeof(*super
));
3115 memset(super
, 0, sizeof(*super
));
3116 super
->current_vol
= -1;
3117 super
->create_offset
= ~((__u32
) 0);
3123 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3125 static int find_intel_hba_capability(int fd
, struct intel_super
*super
, char *devname
)
3127 struct sys_dev
*hba_name
;
3130 if ((fd
< 0) || check_env("IMSM_NO_PLATFORM")) {
3135 hba_name
= find_disk_attached_hba(fd
, NULL
);
3139 Name
": %s is not attached to Intel(R) RAID controller.\n",
3143 rv
= attach_hba_to_super(super
, hba_name
);
3146 struct intel_hba
*hba
= super
->hba
;
3148 fprintf(stderr
, Name
": %s is attached to Intel(R) %s RAID "
3149 "controller (%s),\n"
3150 " but the container is assigned to Intel(R) "
3151 "%s RAID controller (",
3154 hba_name
->pci_id
? : "Err!",
3155 get_sys_dev_type(hba_name
->type
));
3158 fprintf(stderr
, "%s", hba
->pci_id
? : "Err!");
3160 fprintf(stderr
, ", ");
3164 fprintf(stderr
, ").\n"
3165 " Mixing devices attached to different controllers "
3166 "is not allowed.\n");
3168 free_sys_dev(&hba_name
);
3171 super
->orom
= find_imsm_capability(hba_name
->type
);
3172 free_sys_dev(&hba_name
);
3179 /* find_missing - helper routine for load_super_imsm_all that identifies
3180 * disks that have disappeared from the system. This routine relies on
3181 * the mpb being uptodate, which it is at load time.
3183 static int find_missing(struct intel_super
*super
)
3186 struct imsm_super
*mpb
= super
->anchor
;
3188 struct imsm_disk
*disk
;
3190 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3191 disk
= __get_imsm_disk(mpb
, i
);
3192 dl
= serial_to_dl(disk
->serial
, super
);
3196 dl
= malloc(sizeof(*dl
));
3202 dl
->devname
= strdup("missing");
3204 serialcpy(dl
->serial
, disk
->serial
);
3207 dl
->next
= super
->missing
;
3208 super
->missing
= dl
;
3214 static struct intel_disk
*disk_list_get(__u8
*serial
, struct intel_disk
*disk_list
)
3216 struct intel_disk
*idisk
= disk_list
;
3219 if (serialcmp(idisk
->disk
.serial
, serial
) == 0)
3221 idisk
= idisk
->next
;
3227 static int __prep_thunderdome(struct intel_super
**table
, int tbl_size
,
3228 struct intel_super
*super
,
3229 struct intel_disk
**disk_list
)
3231 struct imsm_disk
*d
= &super
->disks
->disk
;
3232 struct imsm_super
*mpb
= super
->anchor
;
3235 for (i
= 0; i
< tbl_size
; i
++) {
3236 struct imsm_super
*tbl_mpb
= table
[i
]->anchor
;
3237 struct imsm_disk
*tbl_d
= &table
[i
]->disks
->disk
;
3239 if (tbl_mpb
->family_num
== mpb
->family_num
) {
3240 if (tbl_mpb
->check_sum
== mpb
->check_sum
) {
3241 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3242 __func__
, super
->disks
->major
,
3243 super
->disks
->minor
,
3244 table
[i
]->disks
->major
,
3245 table
[i
]->disks
->minor
);
3249 if (((is_configured(d
) && !is_configured(tbl_d
)) ||
3250 is_configured(d
) == is_configured(tbl_d
)) &&
3251 tbl_mpb
->generation_num
< mpb
->generation_num
) {
3252 /* current version of the mpb is a
3253 * better candidate than the one in
3254 * super_table, but copy over "cross
3255 * generational" status
3257 struct intel_disk
*idisk
;
3259 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3260 __func__
, super
->disks
->major
,
3261 super
->disks
->minor
,
3262 table
[i
]->disks
->major
,
3263 table
[i
]->disks
->minor
);
3265 idisk
= disk_list_get(tbl_d
->serial
, *disk_list
);
3266 if (idisk
&& is_failed(&idisk
->disk
))
3267 tbl_d
->status
|= FAILED_DISK
;
3270 struct intel_disk
*idisk
;
3271 struct imsm_disk
*disk
;
3273 /* tbl_mpb is more up to date, but copy
3274 * over cross generational status before
3277 disk
= __serial_to_disk(d
->serial
, mpb
, NULL
);
3278 if (disk
&& is_failed(disk
))
3279 d
->status
|= FAILED_DISK
;
3281 idisk
= disk_list_get(d
->serial
, *disk_list
);
3284 if (disk
&& is_configured(disk
))
3285 idisk
->disk
.status
|= CONFIGURED_DISK
;
3288 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3289 __func__
, super
->disks
->major
,
3290 super
->disks
->minor
,
3291 table
[i
]->disks
->major
,
3292 table
[i
]->disks
->minor
);
3300 table
[tbl_size
++] = super
;
3304 /* update/extend the merged list of imsm_disk records */
3305 for (j
= 0; j
< mpb
->num_disks
; j
++) {
3306 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, j
);
3307 struct intel_disk
*idisk
;
3309 idisk
= disk_list_get(disk
->serial
, *disk_list
);
3311 idisk
->disk
.status
|= disk
->status
;
3312 if (is_configured(&idisk
->disk
) ||
3313 is_failed(&idisk
->disk
))
3314 idisk
->disk
.status
&= ~(SPARE_DISK
);
3316 idisk
= calloc(1, sizeof(*idisk
));
3319 idisk
->owner
= IMSM_UNKNOWN_OWNER
;
3320 idisk
->disk
= *disk
;
3321 idisk
->next
= *disk_list
;
3325 if (serialcmp(idisk
->disk
.serial
, d
->serial
) == 0)
3332 static struct intel_super
*
3333 validate_members(struct intel_super
*super
, struct intel_disk
*disk_list
,
3336 struct imsm_super
*mpb
= super
->anchor
;
3340 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3341 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
3342 struct intel_disk
*idisk
;
3344 idisk
= disk_list_get(disk
->serial
, disk_list
);
3346 if (idisk
->owner
== owner
||
3347 idisk
->owner
== IMSM_UNKNOWN_OWNER
)
3350 dprintf("%s: '%.16s' owner %d != %d\n",
3351 __func__
, disk
->serial
, idisk
->owner
,
3354 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3355 __func__
, __le32_to_cpu(mpb
->family_num
), i
,
3361 if (ok_count
== mpb
->num_disks
)
3366 static void show_conflicts(__u32 family_num
, struct intel_super
*super_list
)
3368 struct intel_super
*s
;
3370 for (s
= super_list
; s
; s
= s
->next
) {
3371 if (family_num
!= s
->anchor
->family_num
)
3373 fprintf(stderr
, "Conflict, offlining family %#x on '%s'\n",
3374 __le32_to_cpu(family_num
), s
->disks
->devname
);
3378 static struct intel_super
*
3379 imsm_thunderdome(struct intel_super
**super_list
, int len
)
3381 struct intel_super
*super_table
[len
];
3382 struct intel_disk
*disk_list
= NULL
;
3383 struct intel_super
*champion
, *spare
;
3384 struct intel_super
*s
, **del
;
3389 memset(super_table
, 0, sizeof(super_table
));
3390 for (s
= *super_list
; s
; s
= s
->next
)
3391 tbl_size
= __prep_thunderdome(super_table
, tbl_size
, s
, &disk_list
);
3393 for (i
= 0; i
< tbl_size
; i
++) {
3394 struct imsm_disk
*d
;
3395 struct intel_disk
*idisk
;
3396 struct imsm_super
*mpb
= super_table
[i
]->anchor
;
3399 d
= &s
->disks
->disk
;
3401 /* 'd' must appear in merged disk list for its
3402 * configuration to be valid
3404 idisk
= disk_list_get(d
->serial
, disk_list
);
3405 if (idisk
&& idisk
->owner
== i
)
3406 s
= validate_members(s
, disk_list
, i
);
3411 dprintf("%s: marking family: %#x from %d:%d offline\n",
3412 __func__
, mpb
->family_num
,
3413 super_table
[i
]->disks
->major
,
3414 super_table
[i
]->disks
->minor
);
3418 /* This is where the mdadm implementation differs from the Windows
3419 * driver which has no strict concept of a container. We can only
3420 * assemble one family from a container, so when returning a prodigal
3421 * array member to this system the code will not be able to disambiguate
3422 * the container contents that should be assembled ("foreign" versus
3423 * "local"). It requires user intervention to set the orig_family_num
3424 * to a new value to establish a new container. The Windows driver in
3425 * this situation fixes up the volume name in place and manages the
3426 * foreign array as an independent entity.
3431 for (i
= 0; i
< tbl_size
; i
++) {
3432 struct intel_super
*tbl_ent
= super_table
[i
];
3438 if (tbl_ent
->anchor
->num_raid_devs
== 0) {
3443 if (s
&& !is_spare
) {
3444 show_conflicts(tbl_ent
->anchor
->family_num
, *super_list
);
3446 } else if (!s
&& !is_spare
)
3459 fprintf(stderr
, "Chose family %#x on '%s', "
3460 "assemble conflicts to new container with '--update=uuid'\n",
3461 __le32_to_cpu(s
->anchor
->family_num
), s
->disks
->devname
);
3463 /* collect all dl's onto 'champion', and update them to
3464 * champion's version of the status
3466 for (s
= *super_list
; s
; s
= s
->next
) {
3467 struct imsm_super
*mpb
= champion
->anchor
;
3468 struct dl
*dl
= s
->disks
;
3473 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3474 struct imsm_disk
*disk
;
3476 disk
= __serial_to_disk(dl
->serial
, mpb
, &dl
->index
);
3479 /* only set index on disks that are a member of
3480 * a populated contianer, i.e. one with
3483 if (is_failed(&dl
->disk
))
3485 else if (is_spare(&dl
->disk
))
3491 if (i
>= mpb
->num_disks
) {
3492 struct intel_disk
*idisk
;
3494 idisk
= disk_list_get(dl
->serial
, disk_list
);
3495 if (idisk
&& is_spare(&idisk
->disk
) &&
3496 !is_failed(&idisk
->disk
) && !is_configured(&idisk
->disk
))
3504 dl
->next
= champion
->disks
;
3505 champion
->disks
= dl
;
3509 /* delete 'champion' from super_list */
3510 for (del
= super_list
; *del
; ) {
3511 if (*del
== champion
) {
3512 *del
= (*del
)->next
;
3515 del
= &(*del
)->next
;
3517 champion
->next
= NULL
;
3521 struct intel_disk
*idisk
= disk_list
;
3523 disk_list
= disk_list
->next
;
3530 static int load_super_imsm_all(struct supertype
*st
, int fd
, void **sbp
,
3534 struct intel_super
*super_list
= NULL
;
3535 struct intel_super
*super
= NULL
;
3536 int devnum
= fd2devnum(fd
);
3542 /* check if 'fd' an opened container */
3543 sra
= sysfs_read(fd
, 0, GET_LEVEL
|GET_VERSION
|GET_DEVS
|GET_STATE
);
3547 if (sra
->array
.major_version
!= -1 ||
3548 sra
->array
.minor_version
!= -2 ||
3549 strcmp(sra
->text_version
, "imsm") != 0) {
3554 for (sd
= sra
->devs
, i
= 0; sd
; sd
= sd
->next
, i
++) {
3555 struct intel_super
*s
= alloc_super();
3563 s
->next
= super_list
;
3567 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
3568 dfd
= dev_open(nm
, O_RDWR
);
3572 rv
= find_intel_hba_capability(dfd
, s
, devname
);
3573 /* no orom/efi or non-intel hba of the disk */
3577 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3579 /* retry the load if we might have raced against mdmon */
3580 if (err
== 3 && mdmon_running(devnum
))
3581 for (retry
= 0; retry
< 3; retry
++) {
3583 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3591 /* all mpbs enter, maybe one leaves */
3592 super
= imsm_thunderdome(&super_list
, i
);
3598 if (find_missing(super
) != 0) {
3604 /* load migration record */
3605 err
= load_imsm_migr_rec(super
, NULL
);
3611 /* Check migration compatibility */
3612 if (check_mpb_migr_compatibility(super
) != 0) {
3613 fprintf(stderr
, Name
": Unsupported migration detected");
3615 fprintf(stderr
, " on %s\n", devname
);
3617 fprintf(stderr
, " (IMSM).\n");
3626 while (super_list
) {
3627 struct intel_super
*s
= super_list
;
3629 super_list
= super_list
->next
;
3638 st
->container_dev
= devnum
;
3639 if (err
== 0 && st
->ss
== NULL
) {
3640 st
->ss
= &super_imsm
;
3641 st
->minor_version
= 0;
3642 st
->max_devs
= IMSM_MAX_DEVICES
;
3647 static int load_container_imsm(struct supertype
*st
, int fd
, char *devname
)
3649 return load_super_imsm_all(st
, fd
, &st
->sb
, devname
);
3653 static int load_super_imsm(struct supertype
*st
, int fd
, char *devname
)
3655 struct intel_super
*super
;
3658 if (test_partition(fd
))
3659 /* IMSM not allowed on partitions */
3662 free_super_imsm(st
);
3664 super
= alloc_super();
3667 Name
": malloc of %zu failed.\n",
3671 /* Load hba and capabilities if they exist.
3672 * But do not preclude loading metadata in case capabilities or hba are
3673 * non-compliant and ignore_hw_compat is set.
3675 rv
= find_intel_hba_capability(fd
, super
, devname
);
3676 /* no orom/efi or non-intel hba of the disk */
3677 if ((rv
!= 0) && (st
->ignore_hw_compat
== 0)) {
3680 Name
": No OROM/EFI properties for %s\n", devname
);
3684 rv
= load_and_parse_mpb(fd
, super
, devname
, 0);
3689 Name
": Failed to load all information "
3690 "sections on %s\n", devname
);
3696 if (st
->ss
== NULL
) {
3697 st
->ss
= &super_imsm
;
3698 st
->minor_version
= 0;
3699 st
->max_devs
= IMSM_MAX_DEVICES
;
3702 /* load migration record */
3703 load_imsm_migr_rec(super
, NULL
);
3705 /* Check for unsupported migration features */
3706 if (check_mpb_migr_compatibility(super
) != 0) {
3707 fprintf(stderr
, Name
": Unsupported migration detected");
3709 fprintf(stderr
, " on %s\n", devname
);
3711 fprintf(stderr
, " (IMSM).\n");
3718 static __u16
info_to_blocks_per_strip(mdu_array_info_t
*info
)
3720 if (info
->level
== 1)
3722 return info
->chunk_size
>> 9;
3725 static __u32
info_to_num_data_stripes(mdu_array_info_t
*info
, int num_domains
)
3729 num_stripes
= (info
->size
* 2) / info_to_blocks_per_strip(info
);
3730 num_stripes
/= num_domains
;
3735 static __u32
info_to_blocks_per_member(mdu_array_info_t
*info
)
3737 if (info
->level
== 1)
3738 return info
->size
* 2;
3740 return (info
->size
* 2) & ~(info_to_blocks_per_strip(info
) - 1);
3743 static void imsm_update_version_info(struct intel_super
*super
)
3745 /* update the version and attributes */
3746 struct imsm_super
*mpb
= super
->anchor
;
3748 struct imsm_dev
*dev
;
3749 struct imsm_map
*map
;
3752 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3753 dev
= get_imsm_dev(super
, i
);
3754 map
= get_imsm_map(dev
, 0);
3755 if (__le32_to_cpu(dev
->size_high
) > 0)
3756 mpb
->attributes
|= MPB_ATTRIB_2TB
;
3758 /* FIXME detect when an array spans a port multiplier */
3760 mpb
->attributes
|= MPB_ATTRIB_PM
;
3763 if (mpb
->num_raid_devs
> 1 ||
3764 mpb
->attributes
!= MPB_ATTRIB_CHECKSUM_VERIFY
) {
3765 version
= MPB_VERSION_ATTRIBS
;
3766 switch (get_imsm_raid_level(map
)) {
3767 case 0: mpb
->attributes
|= MPB_ATTRIB_RAID0
; break;
3768 case 1: mpb
->attributes
|= MPB_ATTRIB_RAID1
; break;
3769 case 10: mpb
->attributes
|= MPB_ATTRIB_RAID10
; break;
3770 case 5: mpb
->attributes
|= MPB_ATTRIB_RAID5
; break;
3773 if (map
->num_members
>= 5)
3774 version
= MPB_VERSION_5OR6_DISK_ARRAY
;
3775 else if (dev
->status
== DEV_CLONE_N_GO
)
3776 version
= MPB_VERSION_CNG
;
3777 else if (get_imsm_raid_level(map
) == 5)
3778 version
= MPB_VERSION_RAID5
;
3779 else if (map
->num_members
>= 3)
3780 version
= MPB_VERSION_3OR4_DISK_ARRAY
;
3781 else if (get_imsm_raid_level(map
) == 1)
3782 version
= MPB_VERSION_RAID1
;
3784 version
= MPB_VERSION_RAID0
;
3786 strcpy(((char *) mpb
->sig
) + strlen(MPB_SIGNATURE
), version
);
3790 static int check_name(struct intel_super
*super
, char *name
, int quiet
)
3792 struct imsm_super
*mpb
= super
->anchor
;
3793 char *reason
= NULL
;
3796 if (strlen(name
) > MAX_RAID_SERIAL_LEN
)
3797 reason
= "must be 16 characters or less";
3799 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3800 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
3802 if (strncmp((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
) == 0) {
3803 reason
= "already exists";
3808 if (reason
&& !quiet
)
3809 fprintf(stderr
, Name
": imsm volume name %s\n", reason
);
3814 static int init_super_imsm_volume(struct supertype
*st
, mdu_array_info_t
*info
,
3815 unsigned long long size
, char *name
,
3816 char *homehost
, int *uuid
)
3818 /* We are creating a volume inside a pre-existing container.
3819 * so st->sb is already set.
3821 struct intel_super
*super
= st
->sb
;
3822 struct imsm_super
*mpb
= super
->anchor
;
3823 struct intel_dev
*dv
;
3824 struct imsm_dev
*dev
;
3825 struct imsm_vol
*vol
;
3826 struct imsm_map
*map
;
3827 int idx
= mpb
->num_raid_devs
;
3829 unsigned long long array_blocks
;
3830 size_t size_old
, size_new
;
3831 __u32 num_data_stripes
;
3833 if (super
->orom
&& mpb
->num_raid_devs
>= super
->orom
->vpa
) {
3834 fprintf(stderr
, Name
": This imsm-container already has the "
3835 "maximum of %d volumes\n", super
->orom
->vpa
);
3839 /* ensure the mpb is large enough for the new data */
3840 size_old
= __le32_to_cpu(mpb
->mpb_size
);
3841 size_new
= disks_to_mpb_size(info
->nr_disks
);
3842 if (size_new
> size_old
) {
3844 size_t size_round
= ROUND_UP(size_new
, 512);
3846 if (posix_memalign(&mpb_new
, 512, size_round
) != 0) {
3847 fprintf(stderr
, Name
": could not allocate new mpb\n");
3850 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
3851 fprintf(stderr
, Name
3852 ": %s could not allocate migr_rec buffer\n",
3858 memcpy(mpb_new
, mpb
, size_old
);
3861 super
->anchor
= mpb_new
;
3862 mpb
->mpb_size
= __cpu_to_le32(size_new
);
3863 memset(mpb_new
+ size_old
, 0, size_round
- size_old
);
3865 super
->current_vol
= idx
;
3866 /* when creating the first raid device in this container set num_disks
3867 * to zero, i.e. delete this spare and add raid member devices in
3868 * add_to_super_imsm_volume()
3870 if (super
->current_vol
== 0)
3873 if (!check_name(super
, name
, 0))
3875 dv
= malloc(sizeof(*dv
));
3877 fprintf(stderr
, Name
": failed to allocate device list entry\n");
3880 dev
= calloc(1, sizeof(*dev
) + sizeof(__u32
) * (info
->raid_disks
- 1));
3883 fprintf(stderr
, Name
": could not allocate raid device\n");
3887 strncpy((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
);
3888 if (info
->level
== 1)
3889 array_blocks
= info_to_blocks_per_member(info
);
3891 array_blocks
= calc_array_size(info
->level
, info
->raid_disks
,
3892 info
->layout
, info
->chunk_size
,
3894 /* round array size down to closest MB */
3895 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
3897 dev
->size_low
= __cpu_to_le32((__u32
) array_blocks
);
3898 dev
->size_high
= __cpu_to_le32((__u32
) (array_blocks
>> 32));
3899 dev
->status
= (DEV_READ_COALESCING
| DEV_WRITE_COALESCING
);
3901 vol
->migr_state
= 0;
3902 set_migr_type(dev
, MIGR_INIT
);
3904 vol
->curr_migr_unit
= 0;
3905 map
= get_imsm_map(dev
, 0);
3906 map
->pba_of_lba0
= __cpu_to_le32(super
->create_offset
);
3907 map
->blocks_per_member
= __cpu_to_le32(info_to_blocks_per_member(info
));
3908 map
->blocks_per_strip
= __cpu_to_le16(info_to_blocks_per_strip(info
));
3909 map
->failed_disk_num
= ~0;
3910 map
->map_state
= info
->level
? IMSM_T_STATE_UNINITIALIZED
:
3911 IMSM_T_STATE_NORMAL
;
3914 if (info
->level
== 1 && info
->raid_disks
> 2) {
3917 fprintf(stderr
, Name
": imsm does not support more than 2 disks"
3918 "in a raid1 volume\n");
3922 map
->raid_level
= info
->level
;
3923 if (info
->level
== 10) {
3924 map
->raid_level
= 1;
3925 map
->num_domains
= info
->raid_disks
/ 2;
3926 } else if (info
->level
== 1)
3927 map
->num_domains
= info
->raid_disks
;
3929 map
->num_domains
= 1;
3931 num_data_stripes
= info_to_num_data_stripes(info
, map
->num_domains
);
3932 map
->num_data_stripes
= __cpu_to_le32(num_data_stripes
);
3934 map
->num_members
= info
->raid_disks
;
3935 for (i
= 0; i
< map
->num_members
; i
++) {
3936 /* initialized in add_to_super */
3937 set_imsm_ord_tbl_ent(map
, i
, IMSM_ORD_REBUILD
);
3939 mpb
->num_raid_devs
++;
3942 dv
->index
= super
->current_vol
;
3943 dv
->next
= super
->devlist
;
3944 super
->devlist
= dv
;
3946 imsm_update_version_info(super
);
3951 static int init_super_imsm(struct supertype
*st
, mdu_array_info_t
*info
,
3952 unsigned long long size
, char *name
,
3953 char *homehost
, int *uuid
)
3955 /* This is primarily called by Create when creating a new array.
3956 * We will then get add_to_super called for each component, and then
3957 * write_init_super called to write it out to each device.
3958 * For IMSM, Create can create on fresh devices or on a pre-existing
3960 * To create on a pre-existing array a different method will be called.
3961 * This one is just for fresh drives.
3963 struct intel_super
*super
;
3964 struct imsm_super
*mpb
;
3969 return init_super_imsm_volume(st
, info
, size
, name
, homehost
, uuid
);
3972 mpb_size
= disks_to_mpb_size(info
->nr_disks
);
3976 super
= alloc_super();
3977 if (super
&& posix_memalign(&super
->buf
, 512, mpb_size
) != 0) {
3982 fprintf(stderr
, Name
3983 ": %s could not allocate superblock\n", __func__
);
3986 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
3987 fprintf(stderr
, Name
3988 ": %s could not allocate migr_rec buffer\n", __func__
);
3993 memset(super
->buf
, 0, mpb_size
);
3995 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
3999 /* zeroing superblock */
4003 mpb
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
4005 version
= (char *) mpb
->sig
;
4006 strcpy(version
, MPB_SIGNATURE
);
4007 version
+= strlen(MPB_SIGNATURE
);
4008 strcpy(version
, MPB_VERSION_RAID0
);
4014 static int add_to_super_imsm_volume(struct supertype
*st
, mdu_disk_info_t
*dk
,
4015 int fd
, char *devname
)
4017 struct intel_super
*super
= st
->sb
;
4018 struct imsm_super
*mpb
= super
->anchor
;
4020 struct imsm_dev
*dev
;
4021 struct imsm_map
*map
;
4024 dev
= get_imsm_dev(super
, super
->current_vol
);
4025 map
= get_imsm_map(dev
, 0);
4027 if (! (dk
->state
& (1<<MD_DISK_SYNC
))) {
4028 fprintf(stderr
, Name
": %s: Cannot add spare devices to IMSM volume\n",
4034 /* we're doing autolayout so grab the pre-marked (in
4035 * validate_geometry) raid_disk
4037 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4038 if (dl
->raiddisk
== dk
->raid_disk
)
4041 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4042 if (dl
->major
== dk
->major
&&
4043 dl
->minor
== dk
->minor
)
4048 fprintf(stderr
, Name
": %s is not a member of the same container\n", devname
);
4052 /* add a pristine spare to the metadata */
4053 if (dl
->index
< 0) {
4054 dl
->index
= super
->anchor
->num_disks
;
4055 super
->anchor
->num_disks
++;
4057 /* Check the device has not already been added */
4058 slot
= get_imsm_disk_slot(map
, dl
->index
);
4060 (get_imsm_ord_tbl_ent(dev
, slot
, -1) & IMSM_ORD_REBUILD
) == 0) {
4061 fprintf(stderr
, Name
": %s has been included in this array twice\n",
4065 set_imsm_ord_tbl_ent(map
, dk
->number
, dl
->index
);
4066 dl
->disk
.status
= CONFIGURED_DISK
;
4068 /* if we are creating the first raid device update the family number */
4069 if (super
->current_vol
== 0) {
4071 struct imsm_dev
*_dev
= __get_imsm_dev(mpb
, 0);
4072 struct imsm_disk
*_disk
= __get_imsm_disk(mpb
, dl
->index
);
4074 if (!_dev
|| !_disk
) {
4075 fprintf(stderr
, Name
": BUG mpb setup error\n");
4081 sum
+= __gen_imsm_checksum(mpb
);
4082 mpb
->family_num
= __cpu_to_le32(sum
);
4083 mpb
->orig_family_num
= mpb
->family_num
;
4090 static int add_to_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
,
4091 int fd
, char *devname
)
4093 struct intel_super
*super
= st
->sb
;
4095 unsigned long long size
;
4100 /* If we are on an RAID enabled platform check that the disk is
4101 * attached to the raid controller.
4102 * We do not need to test disks attachment for container based additions,
4103 * they shall be already tested when container was created/assembled.
4105 rv
= find_intel_hba_capability(fd
, super
, devname
);
4106 /* no orom/efi or non-intel hba of the disk */
4108 dprintf("capability: %p fd: %d ret: %d\n",
4109 super
->orom
, fd
, rv
);
4113 if (super
->current_vol
>= 0)
4114 return add_to_super_imsm_volume(st
, dk
, fd
, devname
);
4117 dd
= malloc(sizeof(*dd
));
4120 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
4123 memset(dd
, 0, sizeof(*dd
));
4124 dd
->major
= major(stb
.st_rdev
);
4125 dd
->minor
= minor(stb
.st_rdev
);
4127 dd
->devname
= devname
? strdup(devname
) : NULL
;
4130 dd
->action
= DISK_ADD
;
4131 rv
= imsm_read_serial(fd
, devname
, dd
->serial
);
4134 Name
": failed to retrieve scsi serial, aborting\n");
4139 get_dev_size(fd
, NULL
, &size
);
4141 serialcpy(dd
->disk
.serial
, dd
->serial
);
4142 dd
->disk
.total_blocks
= __cpu_to_le32(size
);
4143 dd
->disk
.status
= SPARE_DISK
;
4144 if (sysfs_disk_to_scsi_id(fd
, &id
) == 0)
4145 dd
->disk
.scsi_id
= __cpu_to_le32(id
);
4147 dd
->disk
.scsi_id
= __cpu_to_le32(0);
4149 if (st
->update_tail
) {
4150 dd
->next
= super
->disk_mgmt_list
;
4151 super
->disk_mgmt_list
= dd
;
4153 dd
->next
= super
->disks
;
4155 super
->updates_pending
++;
4162 static int remove_from_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
)
4164 struct intel_super
*super
= st
->sb
;
4167 /* remove from super works only in mdmon - for communication
4168 * manager - monitor. Check if communication memory buffer
4171 if (!st
->update_tail
) {
4173 Name
": %s shall be used in mdmon context only"
4174 "(line %d).\n", __func__
, __LINE__
);
4177 dd
= malloc(sizeof(*dd
));
4180 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
4183 memset(dd
, 0, sizeof(*dd
));
4184 dd
->major
= dk
->major
;
4185 dd
->minor
= dk
->minor
;
4188 dd
->disk
.status
= SPARE_DISK
;
4189 dd
->action
= DISK_REMOVE
;
4191 dd
->next
= super
->disk_mgmt_list
;
4192 super
->disk_mgmt_list
= dd
;
4198 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
);
4202 struct imsm_super anchor
;
4203 } spare_record
__attribute__ ((aligned(512)));
4205 /* spare records have their own family number and do not have any defined raid
4208 static int write_super_imsm_spares(struct intel_super
*super
, int doclose
)
4210 struct imsm_super
*mpb
= super
->anchor
;
4211 struct imsm_super
*spare
= &spare_record
.anchor
;
4215 spare
->mpb_size
= __cpu_to_le32(sizeof(struct imsm_super
)),
4216 spare
->generation_num
= __cpu_to_le32(1UL),
4217 spare
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
4218 spare
->num_disks
= 1,
4219 spare
->num_raid_devs
= 0,
4220 spare
->cache_size
= mpb
->cache_size
,
4221 spare
->pwr_cycle_count
= __cpu_to_le32(1),
4223 snprintf((char *) spare
->sig
, MAX_SIGNATURE_LENGTH
,
4224 MPB_SIGNATURE MPB_VERSION_RAID0
);
4226 for (d
= super
->disks
; d
; d
= d
->next
) {
4230 spare
->disk
[0] = d
->disk
;
4231 sum
= __gen_imsm_checksum(spare
);
4232 spare
->family_num
= __cpu_to_le32(sum
);
4233 spare
->orig_family_num
= 0;
4234 sum
= __gen_imsm_checksum(spare
);
4235 spare
->check_sum
= __cpu_to_le32(sum
);
4237 if (store_imsm_mpb(d
->fd
, spare
)) {
4238 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
4239 __func__
, d
->major
, d
->minor
, strerror(errno
));
4251 static int write_super_imsm(struct supertype
*st
, int doclose
)
4253 struct intel_super
*super
= st
->sb
;
4254 struct imsm_super
*mpb
= super
->anchor
;
4260 __u32 mpb_size
= sizeof(struct imsm_super
) - sizeof(struct imsm_disk
);
4263 /* 'generation' is incremented everytime the metadata is written */
4264 generation
= __le32_to_cpu(mpb
->generation_num
);
4266 mpb
->generation_num
= __cpu_to_le32(generation
);
4268 /* fix up cases where previous mdadm releases failed to set
4271 if (mpb
->orig_family_num
== 0)
4272 mpb
->orig_family_num
= mpb
->family_num
;
4274 for (d
= super
->disks
; d
; d
= d
->next
) {
4278 mpb
->disk
[d
->index
] = d
->disk
;
4282 for (d
= super
->missing
; d
; d
= d
->next
) {
4283 mpb
->disk
[d
->index
] = d
->disk
;
4286 mpb
->num_disks
= num_disks
;
4287 mpb_size
+= sizeof(struct imsm_disk
) * mpb
->num_disks
;
4289 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4290 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
4291 struct imsm_dev
*dev2
= get_imsm_dev(super
, i
);
4293 imsm_copy_dev(dev
, dev2
);
4294 mpb_size
+= sizeof_imsm_dev(dev
, 0);
4297 mpb_size
+= __le32_to_cpu(mpb
->bbm_log_size
);
4298 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
4300 /* recalculate checksum */
4301 sum
= __gen_imsm_checksum(mpb
);
4302 mpb
->check_sum
= __cpu_to_le32(sum
);
4304 /* write the mpb for disks that compose raid devices */
4305 for (d
= super
->disks
; d
; d
= d
->next
) {
4308 if (store_imsm_mpb(d
->fd
, mpb
))
4309 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
4310 __func__
, d
->major
, d
->minor
, strerror(errno
));
4318 return write_super_imsm_spares(super
, doclose
);
4324 static int create_array(struct supertype
*st
, int dev_idx
)
4327 struct imsm_update_create_array
*u
;
4328 struct intel_super
*super
= st
->sb
;
4329 struct imsm_dev
*dev
= get_imsm_dev(super
, dev_idx
);
4330 struct imsm_map
*map
= get_imsm_map(dev
, 0);
4331 struct disk_info
*inf
;
4332 struct imsm_disk
*disk
;
4335 len
= sizeof(*u
) - sizeof(*dev
) + sizeof_imsm_dev(dev
, 0) +
4336 sizeof(*inf
) * map
->num_members
;
4339 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4344 u
->type
= update_create_array
;
4345 u
->dev_idx
= dev_idx
;
4346 imsm_copy_dev(&u
->dev
, dev
);
4347 inf
= get_disk_info(u
);
4348 for (i
= 0; i
< map
->num_members
; i
++) {
4349 int idx
= get_imsm_disk_idx(dev
, i
, -1);
4351 disk
= get_imsm_disk(super
, idx
);
4352 serialcpy(inf
[i
].serial
, disk
->serial
);
4354 append_metadata_update(st
, u
, len
);
4359 static int mgmt_disk(struct supertype
*st
)
4361 struct intel_super
*super
= st
->sb
;
4363 struct imsm_update_add_remove_disk
*u
;
4365 if (!super
->disk_mgmt_list
)
4371 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4376 u
->type
= update_add_remove_disk
;
4377 append_metadata_update(st
, u
, len
);
4382 static int write_init_super_imsm(struct supertype
*st
)
4384 struct intel_super
*super
= st
->sb
;
4385 int current_vol
= super
->current_vol
;
4387 /* we are done with current_vol reset it to point st at the container */
4388 super
->current_vol
= -1;
4390 if (st
->update_tail
) {
4391 /* queue the recently created array / added disk
4392 * as a metadata update */
4395 /* determine if we are creating a volume or adding a disk */
4396 if (current_vol
< 0) {
4397 /* in the mgmt (add/remove) disk case we are running
4398 * in mdmon context, so don't close fd's
4400 return mgmt_disk(st
);
4402 rv
= create_array(st
, current_vol
);
4407 for (d
= super
->disks
; d
; d
= d
->next
)
4408 Kill(d
->devname
, NULL
, 0, 1, 1);
4409 return write_super_imsm(st
, 1);
4414 static int store_super_imsm(struct supertype
*st
, int fd
)
4416 struct intel_super
*super
= st
->sb
;
4417 struct imsm_super
*mpb
= super
? super
->anchor
: NULL
;
4423 return store_imsm_mpb(fd
, mpb
);
4429 static int imsm_bbm_log_size(struct imsm_super
*mpb
)
4431 return __le32_to_cpu(mpb
->bbm_log_size
);
4435 static int validate_geometry_imsm_container(struct supertype
*st
, int level
,
4436 int layout
, int raiddisks
, int chunk
,
4437 unsigned long long size
, char *dev
,
4438 unsigned long long *freesize
,
4442 unsigned long long ldsize
;
4443 struct intel_super
*super
=NULL
;
4446 if (level
!= LEVEL_CONTAINER
)
4451 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4454 fprintf(stderr
, Name
": imsm: Cannot open %s: %s\n",
4455 dev
, strerror(errno
));
4458 if (!get_dev_size(fd
, dev
, &ldsize
)) {
4463 /* capabilities retrieve could be possible
4464 * note that there is no fd for the disks in array.
4466 super
= alloc_super();
4469 Name
": malloc of %zu failed.\n",
4475 rv
= find_intel_hba_capability(fd
, super
, verbose
? dev
: NULL
);
4479 fd2devname(fd
, str
);
4480 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4481 fd
, str
, super
->orom
, rv
, raiddisks
);
4483 /* no orom/efi or non-intel hba of the disk */
4489 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4491 fprintf(stderr
, Name
": %d exceeds maximum number of"
4492 " platform supported disks: %d\n",
4493 raiddisks
, super
->orom
->tds
);
4499 *freesize
= avail_size_imsm(st
, ldsize
>> 9);
4505 static unsigned long long find_size(struct extent
*e
, int *idx
, int num_extents
)
4507 const unsigned long long base_start
= e
[*idx
].start
;
4508 unsigned long long end
= base_start
+ e
[*idx
].size
;
4511 if (base_start
== end
)
4515 for (i
= *idx
; i
< num_extents
; i
++) {
4516 /* extend overlapping extents */
4517 if (e
[i
].start
>= base_start
&&
4518 e
[i
].start
<= end
) {
4521 if (e
[i
].start
+ e
[i
].size
> end
)
4522 end
= e
[i
].start
+ e
[i
].size
;
4523 } else if (e
[i
].start
> end
) {
4529 return end
- base_start
;
4532 static unsigned long long merge_extents(struct intel_super
*super
, int sum_extents
)
4534 /* build a composite disk with all known extents and generate a new
4535 * 'maxsize' given the "all disks in an array must share a common start
4536 * offset" constraint
4538 struct extent
*e
= calloc(sum_extents
, sizeof(*e
));
4542 unsigned long long pos
;
4543 unsigned long long start
= 0;
4544 unsigned long long maxsize
;
4545 unsigned long reserve
;
4550 /* coalesce and sort all extents. also, check to see if we need to
4551 * reserve space between member arrays
4554 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4557 for (i
= 0; i
< dl
->extent_cnt
; i
++)
4560 qsort(e
, sum_extents
, sizeof(*e
), cmp_extent
);
4565 while (i
< sum_extents
) {
4566 e
[j
].start
= e
[i
].start
;
4567 e
[j
].size
= find_size(e
, &i
, sum_extents
);
4569 if (e
[j
-1].size
== 0)
4578 unsigned long long esize
;
4580 esize
= e
[i
].start
- pos
;
4581 if (esize
>= maxsize
) {
4586 pos
= e
[i
].start
+ e
[i
].size
;
4588 } while (e
[i
-1].size
);
4594 /* FIXME assumes volume at offset 0 is the first volume in a
4597 if (start_extent
> 0)
4598 reserve
= IMSM_RESERVED_SECTORS
; /* gap between raid regions */
4602 if (maxsize
< reserve
)
4605 super
->create_offset
= ~((__u32
) 0);
4606 if (start
+ reserve
> super
->create_offset
)
4607 return 0; /* start overflows create_offset */
4608 super
->create_offset
= start
+ reserve
;
4610 return maxsize
- reserve
;
4613 static int is_raid_level_supported(const struct imsm_orom
*orom
, int level
, int raiddisks
)
4615 if (level
< 0 || level
== 6 || level
== 4)
4618 /* if we have an orom prevent invalid raid levels */
4621 case 0: return imsm_orom_has_raid0(orom
);
4624 return imsm_orom_has_raid1e(orom
);
4625 return imsm_orom_has_raid1(orom
) && raiddisks
== 2;
4626 case 10: return imsm_orom_has_raid10(orom
) && raiddisks
== 4;
4627 case 5: return imsm_orom_has_raid5(orom
) && raiddisks
> 2;
4630 return 1; /* not on an Intel RAID platform so anything goes */
4636 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4638 * validate volume parameters with OROM/EFI capabilities
4641 validate_geometry_imsm_orom(struct intel_super
*super
, int level
, int layout
,
4642 int raiddisks
, int *chunk
, int verbose
)
4647 /* validate container capabilities */
4648 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4650 fprintf(stderr
, Name
": %d exceeds maximum number of"
4651 " platform supported disks: %d\n",
4652 raiddisks
, super
->orom
->tds
);
4656 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4657 if (super
->orom
&& (!is_raid_level_supported(super
->orom
, level
,
4659 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4660 level
, raiddisks
, raiddisks
> 1 ? "s" : "");
4663 if (super
->orom
&& level
!= 1) {
4664 if (chunk
&& (*chunk
== 0 || *chunk
== UnSet
))
4665 *chunk
= imsm_orom_default_chunk(super
->orom
);
4666 else if (chunk
&& !imsm_orom_has_chunk(super
->orom
, *chunk
)) {
4667 pr_vrb(": platform does not support a chunk size of: "
4672 if (layout
!= imsm_level_to_layout(level
)) {
4674 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4675 else if (level
== 10)
4676 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4678 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4685 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4686 * FIX ME add ahci details
4688 static int validate_geometry_imsm_volume(struct supertype
*st
, int level
,
4689 int layout
, int raiddisks
, int *chunk
,
4690 unsigned long long size
, char *dev
,
4691 unsigned long long *freesize
,
4695 struct intel_super
*super
= st
->sb
;
4696 struct imsm_super
*mpb
= super
->anchor
;
4698 unsigned long long pos
= 0;
4699 unsigned long long maxsize
;
4703 /* We must have the container info already read in. */
4707 if (!validate_geometry_imsm_orom(super
, level
, layout
, raiddisks
, chunk
, verbose
)) {
4708 fprintf(stderr
, Name
": RAID gemetry validation failed. "
4709 "Cannot proceed with the action(s).\n");
4713 /* General test: make sure there is space for
4714 * 'raiddisks' device extents of size 'size' at a given
4717 unsigned long long minsize
= size
;
4718 unsigned long long start_offset
= MaxSector
;
4721 minsize
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
4722 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4727 e
= get_extents(super
, dl
);
4730 unsigned long long esize
;
4731 esize
= e
[i
].start
- pos
;
4732 if (esize
>= minsize
)
4734 if (found
&& start_offset
== MaxSector
) {
4737 } else if (found
&& pos
!= start_offset
) {
4741 pos
= e
[i
].start
+ e
[i
].size
;
4743 } while (e
[i
-1].size
);
4748 if (dcnt
< raiddisks
) {
4750 fprintf(stderr
, Name
": imsm: Not enough "
4751 "devices with space for this array "
4759 /* This device must be a member of the set */
4760 if (stat(dev
, &stb
) < 0)
4762 if ((S_IFMT
& stb
.st_mode
) != S_IFBLK
)
4764 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4765 if (dl
->major
== (int)major(stb
.st_rdev
) &&
4766 dl
->minor
== (int)minor(stb
.st_rdev
))
4771 fprintf(stderr
, Name
": %s is not in the "
4772 "same imsm set\n", dev
);
4774 } else if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
) {
4775 /* If a volume is present then the current creation attempt
4776 * cannot incorporate new spares because the orom may not
4777 * understand this configuration (all member disks must be
4778 * members of each array in the container).
4780 fprintf(stderr
, Name
": %s is a spare and a volume"
4781 " is already defined for this container\n", dev
);
4782 fprintf(stderr
, Name
": The option-rom requires all member"
4783 " disks to be a member of all volumes\n");
4787 /* retrieve the largest free space block */
4788 e
= get_extents(super
, dl
);
4793 unsigned long long esize
;
4795 esize
= e
[i
].start
- pos
;
4796 if (esize
>= maxsize
)
4798 pos
= e
[i
].start
+ e
[i
].size
;
4800 } while (e
[i
-1].size
);
4805 fprintf(stderr
, Name
": unable to determine free space for: %s\n",
4809 if (maxsize
< size
) {
4811 fprintf(stderr
, Name
": %s not enough space (%llu < %llu)\n",
4812 dev
, maxsize
, size
);
4816 /* count total number of extents for merge */
4818 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4820 i
+= dl
->extent_cnt
;
4822 maxsize
= merge_extents(super
, i
);
4823 if (maxsize
< size
|| maxsize
== 0) {
4825 fprintf(stderr
, Name
": not enough space after merge (%llu < %llu)\n",
4830 *freesize
= maxsize
;
4835 static int reserve_space(struct supertype
*st
, int raiddisks
,
4836 unsigned long long size
, int chunk
,
4837 unsigned long long *freesize
)
4839 struct intel_super
*super
= st
->sb
;
4840 struct imsm_super
*mpb
= super
->anchor
;
4845 unsigned long long maxsize
;
4846 unsigned long long minsize
;
4850 /* find the largest common start free region of the possible disks */
4854 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4860 /* don't activate new spares if we are orom constrained
4861 * and there is already a volume active in the container
4863 if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
)
4866 e
= get_extents(super
, dl
);
4869 for (i
= 1; e
[i
-1].size
; i
++)
4877 maxsize
= merge_extents(super
, extent_cnt
);
4881 minsize
= chunk
* 2;
4883 if (cnt
< raiddisks
||
4884 (super
->orom
&& used
&& used
!= raiddisks
) ||
4885 maxsize
< minsize
||
4887 fprintf(stderr
, Name
": not enough devices with space to create array.\n");
4888 return 0; /* No enough free spaces large enough */
4900 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4902 dl
->raiddisk
= cnt
++;
4909 static int validate_geometry_imsm(struct supertype
*st
, int level
, int layout
,
4910 int raiddisks
, int *chunk
, unsigned long long size
,
4911 char *dev
, unsigned long long *freesize
,
4919 * if given unused devices create a container
4920 * if given given devices in a container create a member volume
4922 if (level
== LEVEL_CONTAINER
) {
4923 /* Must be a fresh device to add to a container */
4924 return validate_geometry_imsm_container(st
, level
, layout
,
4926 chunk
?*chunk
:0, size
,
4932 if (st
->sb
&& freesize
) {
4933 /* we are being asked to automatically layout a
4934 * new volume based on the current contents of
4935 * the container. If the the parameters can be
4936 * satisfied reserve_space will record the disks,
4937 * start offset, and size of the volume to be
4938 * created. add_to_super and getinfo_super
4939 * detect when autolayout is in progress.
4941 if (!validate_geometry_imsm_orom(st
->sb
, level
, layout
,
4945 return reserve_space(st
, raiddisks
, size
,
4946 chunk
?*chunk
:0, freesize
);
4951 /* creating in a given container */
4952 return validate_geometry_imsm_volume(st
, level
, layout
,
4953 raiddisks
, chunk
, size
,
4954 dev
, freesize
, verbose
);
4957 /* This device needs to be a device in an 'imsm' container */
4958 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4962 Name
": Cannot create this array on device %s\n",
4967 if (errno
!= EBUSY
|| (fd
= open(dev
, O_RDONLY
, 0)) < 0) {
4969 fprintf(stderr
, Name
": Cannot open %s: %s\n",
4970 dev
, strerror(errno
));
4973 /* Well, it is in use by someone, maybe an 'imsm' container. */
4974 cfd
= open_container(fd
);
4978 fprintf(stderr
, Name
": Cannot use %s: It is busy\n",
4982 sra
= sysfs_read(cfd
, 0, GET_VERSION
);
4983 if (sra
&& sra
->array
.major_version
== -1 &&
4984 strcmp(sra
->text_version
, "imsm") == 0)
4988 /* This is a member of a imsm container. Load the container
4989 * and try to create a volume
4991 struct intel_super
*super
;
4993 if (load_super_imsm_all(st
, cfd
, (void **) &super
, NULL
) == 0) {
4995 st
->container_dev
= fd2devnum(cfd
);
4997 return validate_geometry_imsm_volume(st
, level
, layout
,
5005 fprintf(stderr
, Name
": failed container membership check\n");
5011 static void default_geometry_imsm(struct supertype
*st
, int *level
, int *layout
, int *chunk
)
5013 struct intel_super
*super
= st
->sb
;
5015 if (level
&& *level
== UnSet
)
5016 *level
= LEVEL_CONTAINER
;
5018 if (level
&& layout
&& *layout
== UnSet
)
5019 *layout
= imsm_level_to_layout(*level
);
5021 if (chunk
&& (*chunk
== UnSet
|| *chunk
== 0) &&
5022 super
&& super
->orom
)
5023 *chunk
= imsm_orom_default_chunk(super
->orom
);
5026 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
);
5028 static int kill_subarray_imsm(struct supertype
*st
)
5030 /* remove the subarray currently referenced by ->current_vol */
5032 struct intel_dev
**dp
;
5033 struct intel_super
*super
= st
->sb
;
5034 __u8 current_vol
= super
->current_vol
;
5035 struct imsm_super
*mpb
= super
->anchor
;
5037 if (super
->current_vol
< 0)
5039 super
->current_vol
= -1; /* invalidate subarray cursor */
5041 /* block deletions that would change the uuid of active subarrays
5043 * FIXME when immutable ids are available, but note that we'll
5044 * also need to fixup the invalidated/active subarray indexes in
5047 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5050 if (i
< current_vol
)
5052 sprintf(subarray
, "%u", i
);
5053 if (is_subarray_active(subarray
, st
->devname
)) {
5055 Name
": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
5062 if (st
->update_tail
) {
5063 struct imsm_update_kill_array
*u
= malloc(sizeof(*u
));
5067 u
->type
= update_kill_array
;
5068 u
->dev_idx
= current_vol
;
5069 append_metadata_update(st
, u
, sizeof(*u
));
5074 for (dp
= &super
->devlist
; *dp
;)
5075 if ((*dp
)->index
== current_vol
) {
5078 handle_missing(super
, (*dp
)->dev
);
5079 if ((*dp
)->index
> current_vol
)
5084 /* no more raid devices, all active components are now spares,
5085 * but of course failed are still failed
5087 if (--mpb
->num_raid_devs
== 0) {
5090 for (d
= super
->disks
; d
; d
= d
->next
)
5091 if (d
->index
> -2) {
5093 d
->disk
.status
= SPARE_DISK
;
5097 super
->updates_pending
++;
5102 static int update_subarray_imsm(struct supertype
*st
, char *subarray
,
5103 char *update
, struct mddev_ident
*ident
)
5105 /* update the subarray currently referenced by ->current_vol */
5106 struct intel_super
*super
= st
->sb
;
5107 struct imsm_super
*mpb
= super
->anchor
;
5109 if (strcmp(update
, "name") == 0) {
5110 char *name
= ident
->name
;
5114 if (is_subarray_active(subarray
, st
->devname
)) {
5116 Name
": Unable to update name of active subarray\n");
5120 if (!check_name(super
, name
, 0))
5123 vol
= strtoul(subarray
, &ep
, 10);
5124 if (*ep
!= '\0' || vol
>= super
->anchor
->num_raid_devs
)
5127 if (st
->update_tail
) {
5128 struct imsm_update_rename_array
*u
= malloc(sizeof(*u
));
5132 u
->type
= update_rename_array
;
5134 snprintf((char *) u
->name
, MAX_RAID_SERIAL_LEN
, "%s", name
);
5135 append_metadata_update(st
, u
, sizeof(*u
));
5137 struct imsm_dev
*dev
;
5140 dev
= get_imsm_dev(super
, vol
);
5141 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
5142 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5143 dev
= get_imsm_dev(super
, i
);
5144 handle_missing(super
, dev
);
5146 super
->updates_pending
++;
5154 static int is_gen_migration(struct imsm_dev
*dev
)
5156 if (!dev
->vol
.migr_state
)
5159 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5164 #endif /* MDASSEMBLE */
5166 static int is_rebuilding(struct imsm_dev
*dev
)
5168 struct imsm_map
*migr_map
;
5170 if (!dev
->vol
.migr_state
)
5173 if (migr_type(dev
) != MIGR_REBUILD
)
5176 migr_map
= get_imsm_map(dev
, 1);
5178 if (migr_map
->map_state
== IMSM_T_STATE_DEGRADED
)
5184 static void update_recovery_start(struct imsm_dev
*dev
, struct mdinfo
*array
)
5186 struct mdinfo
*rebuild
= NULL
;
5190 if (!is_rebuilding(dev
))
5193 /* Find the rebuild target, but punt on the dual rebuild case */
5194 for (d
= array
->devs
; d
; d
= d
->next
)
5195 if (d
->recovery_start
== 0) {
5202 /* (?) none of the disks are marked with
5203 * IMSM_ORD_REBUILD, so assume they are missing and the
5204 * disk_ord_tbl was not correctly updated
5206 dprintf("%s: failed to locate out-of-sync disk\n", __func__
);
5210 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
5211 rebuild
->recovery_start
= units
* blocks_per_migr_unit(dev
);
5215 static struct mdinfo
*container_content_imsm(struct supertype
*st
, char *subarray
)
5217 /* Given a container loaded by load_super_imsm_all,
5218 * extract information about all the arrays into
5220 * If 'subarray' is given, just extract info about that array.
5222 * For each imsm_dev create an mdinfo, fill it in,
5223 * then look for matching devices in super->disks
5224 * and create appropriate device mdinfo.
5226 struct intel_super
*super
= st
->sb
;
5227 struct imsm_super
*mpb
= super
->anchor
;
5228 struct mdinfo
*rest
= NULL
;
5232 int spare_disks
= 0;
5234 /* check for bad blocks */
5235 if (imsm_bbm_log_size(super
->anchor
))
5238 /* count spare devices, not used in maps
5240 for (d
= super
->disks
; d
; d
= d
->next
)
5244 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5245 struct imsm_dev
*dev
;
5246 struct imsm_map
*map
;
5247 struct imsm_map
*map2
;
5248 struct mdinfo
*this;
5253 (i
!= strtoul(subarray
, &ep
, 10) || *ep
!= '\0'))
5256 dev
= get_imsm_dev(super
, i
);
5257 map
= get_imsm_map(dev
, 0);
5258 map2
= get_imsm_map(dev
, 1);
5260 /* do not publish arrays that are in the middle of an
5261 * unsupported migration
5263 if (dev
->vol
.migr_state
&&
5264 (migr_type(dev
) == MIGR_STATE_CHANGE
)) {
5265 fprintf(stderr
, Name
": cannot assemble volume '%.16s':"
5266 " unsupported migration in progress\n",
5270 /* do not publish arrays that are not support by controller's
5274 chunk
= __le16_to_cpu(map
->blocks_per_strip
) >> 1;
5276 if (!validate_geometry_imsm_orom(super
,
5277 get_imsm_raid_level(map
), /* RAID level */
5278 imsm_level_to_layout(get_imsm_raid_level(map
)),
5279 map
->num_members
, /* raid disks */
5282 fprintf(stderr
, Name
": RAID gemetry validation failed. "
5283 "Cannot proceed with the action(s).\n");
5286 #endif /* MDASSEMBLE */
5287 this = malloc(sizeof(*this));
5289 fprintf(stderr
, Name
": failed to allocate %zu bytes\n",
5293 memset(this, 0, sizeof(*this));
5296 super
->current_vol
= i
;
5297 getinfo_super_imsm_volume(st
, this, NULL
);
5298 for (slot
= 0 ; slot
< map
->num_members
; slot
++) {
5299 unsigned long long recovery_start
;
5300 struct mdinfo
*info_d
;
5307 idx
= get_imsm_disk_idx(dev
, slot
, 0);
5308 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
5309 for (d
= super
->disks
; d
; d
= d
->next
)
5310 if (d
->index
== idx
)
5313 recovery_start
= MaxSector
;
5316 if (d
&& is_failed(&d
->disk
))
5318 if (ord
& IMSM_ORD_REBUILD
)
5322 * if we skip some disks the array will be assmebled degraded;
5323 * reset resync start to avoid a dirty-degraded
5324 * situation when performing the intial sync
5326 * FIXME handle dirty degraded
5328 if ((skip
|| recovery_start
== 0) && !dev
->vol
.dirty
)
5329 this->resync_start
= MaxSector
;
5333 info_d
= calloc(1, sizeof(*info_d
));
5335 fprintf(stderr
, Name
": failed to allocate disk"
5336 " for volume %.16s\n", dev
->volume
);
5337 info_d
= this->devs
;
5339 struct mdinfo
*d
= info_d
->next
;
5348 info_d
->next
= this->devs
;
5349 this->devs
= info_d
;
5351 info_d
->disk
.number
= d
->index
;
5352 info_d
->disk
.major
= d
->major
;
5353 info_d
->disk
.minor
= d
->minor
;
5354 info_d
->disk
.raid_disk
= slot
;
5355 info_d
->recovery_start
= recovery_start
;
5357 if (slot
< map2
->num_members
)
5358 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5360 this->array
.spare_disks
++;
5362 if (slot
< map
->num_members
)
5363 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5365 this->array
.spare_disks
++;
5367 if (info_d
->recovery_start
== MaxSector
)
5368 this->array
.working_disks
++;
5370 info_d
->events
= __le32_to_cpu(mpb
->generation_num
);
5371 info_d
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5372 info_d
->component_size
= __le32_to_cpu(map
->blocks_per_member
);
5374 /* now that the disk list is up-to-date fixup recovery_start */
5375 update_recovery_start(dev
, this);
5376 this->array
.spare_disks
+= spare_disks
;
5380 /* if array has bad blocks, set suitable bit in array status */
5382 rest
->array
.state
|= (1<<MD_SB_BBM_ERRORS
);
5388 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
)
5390 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5393 return map
->map_state
== IMSM_T_STATE_UNINITIALIZED
?
5394 IMSM_T_STATE_UNINITIALIZED
: IMSM_T_STATE_NORMAL
;
5396 switch (get_imsm_raid_level(map
)) {
5398 return IMSM_T_STATE_FAILED
;
5401 if (failed
< map
->num_members
)
5402 return IMSM_T_STATE_DEGRADED
;
5404 return IMSM_T_STATE_FAILED
;
5409 * check to see if any mirrors have failed, otherwise we
5410 * are degraded. Even numbered slots are mirrored on
5414 /* gcc -Os complains that this is unused */
5415 int insync
= insync
;
5417 for (i
= 0; i
< map
->num_members
; i
++) {
5418 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
5419 int idx
= ord_to_idx(ord
);
5420 struct imsm_disk
*disk
;
5422 /* reset the potential in-sync count on even-numbered
5423 * slots. num_copies is always 2 for imsm raid10
5428 disk
= get_imsm_disk(super
, idx
);
5429 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5432 /* no in-sync disks left in this mirror the
5436 return IMSM_T_STATE_FAILED
;
5439 return IMSM_T_STATE_DEGRADED
;
5443 return IMSM_T_STATE_DEGRADED
;
5445 return IMSM_T_STATE_FAILED
;
5451 return map
->map_state
;
5454 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
)
5458 struct imsm_disk
*disk
;
5459 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5460 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
5464 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5465 * disks that are being rebuilt. New failures are recorded to
5466 * map[0]. So we look through all the disks we started with and
5467 * see if any failures are still present, or if any new ones
5470 * FIXME add support for online capacity expansion and
5471 * raid-level-migration
5473 for (i
= 0; i
< prev
->num_members
; i
++) {
5474 ord
= __le32_to_cpu(prev
->disk_ord_tbl
[i
]);
5475 ord
|= __le32_to_cpu(map
->disk_ord_tbl
[i
]);
5476 idx
= ord_to_idx(ord
);
5478 disk
= get_imsm_disk(super
, idx
);
5479 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5487 static int imsm_open_new(struct supertype
*c
, struct active_array
*a
,
5490 struct intel_super
*super
= c
->sb
;
5491 struct imsm_super
*mpb
= super
->anchor
;
5493 if (atoi(inst
) >= mpb
->num_raid_devs
) {
5494 fprintf(stderr
, "%s: subarry index %d, out of range\n",
5495 __func__
, atoi(inst
));
5499 dprintf("imsm: open_new %s\n", inst
);
5500 a
->info
.container_member
= atoi(inst
);
5504 static int is_resyncing(struct imsm_dev
*dev
)
5506 struct imsm_map
*migr_map
;
5508 if (!dev
->vol
.migr_state
)
5511 if (migr_type(dev
) == MIGR_INIT
||
5512 migr_type(dev
) == MIGR_REPAIR
)
5515 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5518 migr_map
= get_imsm_map(dev
, 1);
5520 if ((migr_map
->map_state
== IMSM_T_STATE_NORMAL
) &&
5521 (dev
->vol
.migr_type
!= MIGR_GEN_MIGR
))
5527 /* return true if we recorded new information */
5528 static int mark_failure(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5532 struct imsm_map
*map
;
5534 /* new failures are always set in map[0] */
5535 map
= get_imsm_map(dev
, 0);
5537 slot
= get_imsm_disk_slot(map
, idx
);
5541 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
5542 if (is_failed(disk
) && (ord
& IMSM_ORD_REBUILD
))
5545 disk
->status
|= FAILED_DISK
;
5546 set_imsm_ord_tbl_ent(map
, slot
, idx
| IMSM_ORD_REBUILD
);
5547 if (map
->failed_disk_num
== 0xff)
5548 map
->failed_disk_num
= slot
;
5552 static void mark_missing(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5554 mark_failure(dev
, disk
, idx
);
5556 if (disk
->scsi_id
== __cpu_to_le32(~(__u32
)0))
5559 disk
->scsi_id
= __cpu_to_le32(~(__u32
)0);
5560 memmove(&disk
->serial
[0], &disk
->serial
[1], MAX_RAID_SERIAL_LEN
- 1);
5563 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
)
5569 if (!super
->missing
)
5571 failed
= imsm_count_failed(super
, dev
);
5572 map_state
= imsm_check_degraded(super
, dev
, failed
);
5574 dprintf("imsm: mark missing\n");
5575 end_migration(dev
, map_state
);
5576 for (dl
= super
->missing
; dl
; dl
= dl
->next
)
5577 mark_missing(dev
, &dl
->disk
, dl
->index
);
5578 super
->updates_pending
++;
5581 static unsigned long long imsm_set_array_size(struct imsm_dev
*dev
)
5583 int used_disks
= imsm_num_data_members(dev
, 0);
5584 unsigned long long array_blocks
;
5585 struct imsm_map
*map
;
5587 if (used_disks
== 0) {
5588 /* when problems occures
5589 * return current array_blocks value
5591 array_blocks
= __le32_to_cpu(dev
->size_high
);
5592 array_blocks
= array_blocks
<< 32;
5593 array_blocks
+= __le32_to_cpu(dev
->size_low
);
5595 return array_blocks
;
5598 /* set array size in metadata
5600 map
= get_imsm_map(dev
, 0);
5601 array_blocks
= map
->blocks_per_member
* used_disks
;
5603 /* round array size down to closest MB
5605 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
5606 dev
->size_low
= __cpu_to_le32((__u32
)array_blocks
);
5607 dev
->size_high
= __cpu_to_le32((__u32
)(array_blocks
>> 32));
5609 return array_blocks
;
5612 static void imsm_set_disk(struct active_array
*a
, int n
, int state
);
5614 static void imsm_progress_container_reshape(struct intel_super
*super
)
5616 /* if no device has a migr_state, but some device has a
5617 * different number of members than the previous device, start
5618 * changing the number of devices in this device to match
5621 struct imsm_super
*mpb
= super
->anchor
;
5622 int prev_disks
= -1;
5626 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5627 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
5628 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5629 struct imsm_map
*map2
;
5630 int prev_num_members
;
5632 if (dev
->vol
.migr_state
)
5635 if (prev_disks
== -1)
5636 prev_disks
= map
->num_members
;
5637 if (prev_disks
== map
->num_members
)
5640 /* OK, this array needs to enter reshape mode.
5641 * i.e it needs a migr_state
5644 copy_map_size
= sizeof_imsm_map(map
);
5645 prev_num_members
= map
->num_members
;
5646 map
->num_members
= prev_disks
;
5647 dev
->vol
.migr_state
= 1;
5648 dev
->vol
.curr_migr_unit
= 0;
5649 dev
->vol
.migr_type
= MIGR_GEN_MIGR
;
5650 for (i
= prev_num_members
;
5651 i
< map
->num_members
; i
++)
5652 set_imsm_ord_tbl_ent(map
, i
, i
);
5653 map2
= get_imsm_map(dev
, 1);
5654 /* Copy the current map */
5655 memcpy(map2
, map
, copy_map_size
);
5656 map2
->num_members
= prev_num_members
;
5658 imsm_set_array_size(dev
);
5659 super
->updates_pending
++;
5663 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5664 * states are handled in imsm_set_disk() with one exception, when a
5665 * resync is stopped due to a new failure this routine will set the
5666 * 'degraded' state for the array.
5668 static int imsm_set_array_state(struct active_array
*a
, int consistent
)
5670 int inst
= a
->info
.container_member
;
5671 struct intel_super
*super
= a
->container
->sb
;
5672 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5673 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5674 int failed
= imsm_count_failed(super
, dev
);
5675 __u8 map_state
= imsm_check_degraded(super
, dev
, failed
);
5676 __u32 blocks_per_unit
;
5678 if (dev
->vol
.migr_state
&&
5679 dev
->vol
.migr_type
== MIGR_GEN_MIGR
) {
5680 /* array state change is blocked due to reshape action
5682 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5683 * - finish the reshape (if last_checkpoint is big and action != reshape)
5684 * - update curr_migr_unit
5686 if (a
->curr_action
== reshape
) {
5687 /* still reshaping, maybe update curr_migr_unit */
5688 goto mark_checkpoint
;
5690 if (a
->last_checkpoint
== 0 && a
->prev_action
== reshape
) {
5691 /* for some reason we aborted the reshape.
5694 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
5695 dev
->vol
.migr_state
= 0;
5696 dev
->vol
.migr_type
= 0;
5697 dev
->vol
.curr_migr_unit
= 0;
5698 memcpy(map
, map2
, sizeof_imsm_map(map2
));
5699 super
->updates_pending
++;
5701 if (a
->last_checkpoint
>= a
->info
.component_size
) {
5702 unsigned long long array_blocks
;
5706 used_disks
= imsm_num_data_members(dev
, 0);
5707 if (used_disks
> 0) {
5709 map
->blocks_per_member
*
5711 /* round array size down to closest MB
5713 array_blocks
= (array_blocks
5714 >> SECT_PER_MB_SHIFT
)
5715 << SECT_PER_MB_SHIFT
;
5716 a
->info
.custom_array_size
= array_blocks
;
5717 /* encourage manager to update array
5721 a
->check_reshape
= 1;
5723 /* finalize online capacity expansion/reshape */
5724 for (mdi
= a
->info
.devs
; mdi
; mdi
= mdi
->next
)
5726 mdi
->disk
.raid_disk
,
5729 imsm_progress_container_reshape(super
);
5734 /* before we activate this array handle any missing disks */
5735 if (consistent
== 2)
5736 handle_missing(super
, dev
);
5738 if (consistent
== 2 &&
5739 (!is_resync_complete(&a
->info
) ||
5740 map_state
!= IMSM_T_STATE_NORMAL
||
5741 dev
->vol
.migr_state
))
5744 if (is_resync_complete(&a
->info
)) {
5745 /* complete intialization / resync,
5746 * recovery and interrupted recovery is completed in
5749 if (is_resyncing(dev
)) {
5750 dprintf("imsm: mark resync done\n");
5751 end_migration(dev
, map_state
);
5752 super
->updates_pending
++;
5753 a
->last_checkpoint
= 0;
5755 } else if (!is_resyncing(dev
) && !failed
) {
5756 /* mark the start of the init process if nothing is failed */
5757 dprintf("imsm: mark resync start\n");
5758 if (map
->map_state
== IMSM_T_STATE_UNINITIALIZED
)
5759 migrate(dev
, super
, IMSM_T_STATE_NORMAL
, MIGR_INIT
);
5761 migrate(dev
, super
, IMSM_T_STATE_NORMAL
, MIGR_REPAIR
);
5762 super
->updates_pending
++;
5766 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5767 blocks_per_unit
= blocks_per_migr_unit(dev
);
5768 if (blocks_per_unit
) {
5772 units
= a
->last_checkpoint
/ blocks_per_unit
;
5775 /* check that we did not overflow 32-bits, and that
5776 * curr_migr_unit needs updating
5778 if (units32
== units
&&
5780 __le32_to_cpu(dev
->vol
.curr_migr_unit
) != units32
) {
5781 dprintf("imsm: mark checkpoint (%u)\n", units32
);
5782 dev
->vol
.curr_migr_unit
= __cpu_to_le32(units32
);
5783 super
->updates_pending
++;
5787 /* mark dirty / clean */
5788 if (dev
->vol
.dirty
!= !consistent
) {
5789 dprintf("imsm: mark '%s'\n", consistent
? "clean" : "dirty");
5794 super
->updates_pending
++;
5800 static void imsm_set_disk(struct active_array
*a
, int n
, int state
)
5802 int inst
= a
->info
.container_member
;
5803 struct intel_super
*super
= a
->container
->sb
;
5804 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5805 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5806 struct imsm_disk
*disk
;
5811 if (n
> map
->num_members
)
5812 fprintf(stderr
, "imsm: set_disk %d out of range 0..%d\n",
5813 n
, map
->num_members
- 1);
5818 dprintf("imsm: set_disk %d:%x\n", n
, state
);
5820 ord
= get_imsm_ord_tbl_ent(dev
, n
, -1);
5821 disk
= get_imsm_disk(super
, ord_to_idx(ord
));
5823 /* check for new failures */
5824 if (state
& DS_FAULTY
) {
5825 if (mark_failure(dev
, disk
, ord_to_idx(ord
)))
5826 super
->updates_pending
++;
5829 /* check if in_sync */
5830 if (state
& DS_INSYNC
&& ord
& IMSM_ORD_REBUILD
&& is_rebuilding(dev
)) {
5831 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
5833 set_imsm_ord_tbl_ent(migr_map
, n
, ord_to_idx(ord
));
5834 super
->updates_pending
++;
5837 failed
= imsm_count_failed(super
, dev
);
5838 map_state
= imsm_check_degraded(super
, dev
, failed
);
5840 /* check if recovery complete, newly degraded, or failed */
5841 if (map_state
== IMSM_T_STATE_NORMAL
&& is_rebuilding(dev
)) {
5842 end_migration(dev
, map_state
);
5843 map
= get_imsm_map(dev
, 0);
5844 map
->failed_disk_num
= ~0;
5845 super
->updates_pending
++;
5846 a
->last_checkpoint
= 0;
5847 } else if (map_state
== IMSM_T_STATE_DEGRADED
&&
5848 map
->map_state
!= map_state
&&
5849 !dev
->vol
.migr_state
) {
5850 dprintf("imsm: mark degraded\n");
5851 map
->map_state
= map_state
;
5852 super
->updates_pending
++;
5853 a
->last_checkpoint
= 0;
5854 } else if (map_state
== IMSM_T_STATE_FAILED
&&
5855 map
->map_state
!= map_state
) {
5856 dprintf("imsm: mark failed\n");
5857 end_migration(dev
, map_state
);
5858 super
->updates_pending
++;
5859 a
->last_checkpoint
= 0;
5860 } else if (is_gen_migration(dev
)) {
5861 dprintf("imsm: Detected General Migration in state: ");
5862 if (map_state
== IMSM_T_STATE_NORMAL
) {
5863 end_migration(dev
, map_state
);
5864 map
= get_imsm_map(dev
, 0);
5865 map
->failed_disk_num
= ~0;
5866 dprintf("normal\n");
5868 if (map_state
== IMSM_T_STATE_DEGRADED
) {
5869 printf("degraded\n");
5870 end_migration(dev
, map_state
);
5872 dprintf("failed\n");
5874 map
->map_state
= map_state
;
5876 super
->updates_pending
++;
5880 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
)
5883 __u32 mpb_size
= __le32_to_cpu(mpb
->mpb_size
);
5884 unsigned long long dsize
;
5885 unsigned long long sectors
;
5887 get_dev_size(fd
, NULL
, &dsize
);
5889 if (mpb_size
> 512) {
5890 /* -1 to account for anchor */
5891 sectors
= mpb_sectors(mpb
) - 1;
5893 /* write the extended mpb to the sectors preceeding the anchor */
5894 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0)
5897 if ((unsigned long long)write(fd
, buf
+ 512, 512 * sectors
)
5902 /* first block is stored on second to last sector of the disk */
5903 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0)
5906 if (write(fd
, buf
, 512) != 512)
5912 static void imsm_sync_metadata(struct supertype
*container
)
5914 struct intel_super
*super
= container
->sb
;
5916 dprintf("sync metadata: %d\n", super
->updates_pending
);
5917 if (!super
->updates_pending
)
5920 write_super_imsm(container
, 0);
5922 super
->updates_pending
= 0;
5925 static struct dl
*imsm_readd(struct intel_super
*super
, int idx
, struct active_array
*a
)
5927 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
5928 int i
= get_imsm_disk_idx(dev
, idx
, -1);
5931 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
5935 if (dl
&& is_failed(&dl
->disk
))
5939 dprintf("%s: found %x:%x\n", __func__
, dl
->major
, dl
->minor
);
5944 static struct dl
*imsm_add_spare(struct intel_super
*super
, int slot
,
5945 struct active_array
*a
, int activate_new
,
5946 struct mdinfo
*additional_test_list
)
5948 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
5949 int idx
= get_imsm_disk_idx(dev
, slot
, -1);
5950 struct imsm_super
*mpb
= super
->anchor
;
5951 struct imsm_map
*map
;
5952 unsigned long long pos
;
5957 __u32 array_start
= 0;
5958 __u32 array_end
= 0;
5960 struct mdinfo
*test_list
;
5962 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
5963 /* If in this array, skip */
5964 for (d
= a
->info
.devs
; d
; d
= d
->next
)
5965 if (d
->state_fd
>= 0 &&
5966 d
->disk
.major
== dl
->major
&&
5967 d
->disk
.minor
== dl
->minor
) {
5968 dprintf("%x:%x already in array\n",
5969 dl
->major
, dl
->minor
);
5974 test_list
= additional_test_list
;
5976 if (test_list
->disk
.major
== dl
->major
&&
5977 test_list
->disk
.minor
== dl
->minor
) {
5978 dprintf("%x:%x already in additional test list\n",
5979 dl
->major
, dl
->minor
);
5982 test_list
= test_list
->next
;
5987 /* skip in use or failed drives */
5988 if (is_failed(&dl
->disk
) || idx
== dl
->index
||
5990 dprintf("%x:%x status (failed: %d index: %d)\n",
5991 dl
->major
, dl
->minor
, is_failed(&dl
->disk
), idx
);
5995 /* skip pure spares when we are looking for partially
5996 * assimilated drives
5998 if (dl
->index
== -1 && !activate_new
)
6001 /* Does this unused device have the requisite free space?
6002 * It needs to be able to cover all member volumes
6004 ex
= get_extents(super
, dl
);
6006 dprintf("cannot get extents\n");
6009 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6010 dev
= get_imsm_dev(super
, i
);
6011 map
= get_imsm_map(dev
, 0);
6013 /* check if this disk is already a member of
6016 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
6022 array_start
= __le32_to_cpu(map
->pba_of_lba0
);
6023 array_end
= array_start
+
6024 __le32_to_cpu(map
->blocks_per_member
) - 1;
6027 /* check that we can start at pba_of_lba0 with
6028 * blocks_per_member of space
6030 if (array_start
>= pos
&& array_end
< ex
[j
].start
) {
6034 pos
= ex
[j
].start
+ ex
[j
].size
;
6036 } while (ex
[j
-1].size
);
6043 if (i
< mpb
->num_raid_devs
) {
6044 dprintf("%x:%x does not have %u to %u available\n",
6045 dl
->major
, dl
->minor
, array_start
, array_end
);
6056 static int imsm_rebuild_allowed(struct supertype
*cont
, int dev_idx
, int failed
)
6058 struct imsm_dev
*dev2
;
6059 struct imsm_map
*map
;
6065 dev2
= get_imsm_dev(cont
->sb
, dev_idx
);
6067 state
= imsm_check_degraded(cont
->sb
, dev2
, failed
);
6068 if (state
== IMSM_T_STATE_FAILED
) {
6069 map
= get_imsm_map(dev2
, 0);
6072 for (slot
= 0; slot
< map
->num_members
; slot
++) {
6074 * Check if failed disks are deleted from intel
6075 * disk list or are marked to be deleted
6077 idx
= get_imsm_disk_idx(dev2
, slot
, -1);
6078 idisk
= get_imsm_dl_disk(cont
->sb
, idx
);
6080 * Do not rebuild the array if failed disks
6081 * from failed sub-array are not removed from
6085 is_failed(&idisk
->disk
) &&
6086 (idisk
->action
!= DISK_REMOVE
))
6094 static struct mdinfo
*imsm_activate_spare(struct active_array
*a
,
6095 struct metadata_update
**updates
)
6098 * Find a device with unused free space and use it to replace a
6099 * failed/vacant region in an array. We replace failed regions one a
6100 * array at a time. The result is that a new spare disk will be added
6101 * to the first failed array and after the monitor has finished
6102 * propagating failures the remainder will be consumed.
6104 * FIXME add a capability for mdmon to request spares from another
6108 struct intel_super
*super
= a
->container
->sb
;
6109 int inst
= a
->info
.container_member
;
6110 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
6111 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6112 int failed
= a
->info
.array
.raid_disks
;
6113 struct mdinfo
*rv
= NULL
;
6116 struct metadata_update
*mu
;
6118 struct imsm_update_activate_spare
*u
;
6123 for (d
= a
->info
.devs
; d
; d
= d
->next
) {
6124 if ((d
->curr_state
& DS_FAULTY
) &&
6126 /* wait for Removal to happen */
6128 if (d
->state_fd
>= 0)
6132 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6133 inst
, failed
, a
->info
.array
.raid_disks
, a
->info
.array
.level
);
6135 if (dev
->vol
.migr_state
&&
6136 dev
->vol
.migr_type
== MIGR_GEN_MIGR
)
6137 /* No repair during migration */
6140 if (a
->info
.array
.level
== 4)
6141 /* No repair for takeovered array
6142 * imsm doesn't support raid4
6146 if (imsm_check_degraded(super
, dev
, failed
) != IMSM_T_STATE_DEGRADED
)
6150 * If there are any failed disks check state of the other volume.
6151 * Block rebuild if the another one is failed until failed disks
6152 * are removed from container.
6155 dprintf("found failed disks in %s, check if there another"
6156 "failed sub-array.\n",
6158 /* check if states of the other volumes allow for rebuild */
6159 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
6161 allowed
= imsm_rebuild_allowed(a
->container
,
6169 /* For each slot, if it is not working, find a spare */
6170 for (i
= 0; i
< a
->info
.array
.raid_disks
; i
++) {
6171 for (d
= a
->info
.devs
; d
; d
= d
->next
)
6172 if (d
->disk
.raid_disk
== i
)
6174 dprintf("found %d: %p %x\n", i
, d
, d
?d
->curr_state
:0);
6175 if (d
&& (d
->state_fd
>= 0))
6179 * OK, this device needs recovery. Try to re-add the
6180 * previous occupant of this slot, if this fails see if
6181 * we can continue the assimilation of a spare that was
6182 * partially assimilated, finally try to activate a new
6185 dl
= imsm_readd(super
, i
, a
);
6187 dl
= imsm_add_spare(super
, i
, a
, 0, NULL
);
6189 dl
= imsm_add_spare(super
, i
, a
, 1, NULL
);
6193 /* found a usable disk with enough space */
6194 di
= malloc(sizeof(*di
));
6197 memset(di
, 0, sizeof(*di
));
6199 /* dl->index will be -1 in the case we are activating a
6200 * pristine spare. imsm_process_update() will create a
6201 * new index in this case. Once a disk is found to be
6202 * failed in all member arrays it is kicked from the
6205 di
->disk
.number
= dl
->index
;
6207 /* (ab)use di->devs to store a pointer to the device
6210 di
->devs
= (struct mdinfo
*) dl
;
6212 di
->disk
.raid_disk
= i
;
6213 di
->disk
.major
= dl
->major
;
6214 di
->disk
.minor
= dl
->minor
;
6216 di
->recovery_start
= 0;
6217 di
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
6218 di
->component_size
= a
->info
.component_size
;
6219 di
->container_member
= inst
;
6220 super
->random
= random32();
6224 dprintf("%x:%x to be %d at %llu\n", dl
->major
, dl
->minor
,
6225 i
, di
->data_offset
);
6231 /* No spares found */
6233 /* Now 'rv' has a list of devices to return.
6234 * Create a metadata_update record to update the
6235 * disk_ord_tbl for the array
6237 mu
= malloc(sizeof(*mu
));
6239 mu
->buf
= malloc(sizeof(struct imsm_update_activate_spare
) * num_spares
);
6240 if (mu
->buf
== NULL
) {
6247 struct mdinfo
*n
= rv
->next
;
6256 mu
->space_list
= NULL
;
6257 mu
->len
= sizeof(struct imsm_update_activate_spare
) * num_spares
;
6258 mu
->next
= *updates
;
6259 u
= (struct imsm_update_activate_spare
*) mu
->buf
;
6261 for (di
= rv
; di
; di
= di
->next
) {
6262 u
->type
= update_activate_spare
;
6263 u
->dl
= (struct dl
*) di
->devs
;
6265 u
->slot
= di
->disk
.raid_disk
;
6276 static int disks_overlap(struct intel_super
*super
, int idx
, struct imsm_update_create_array
*u
)
6278 struct imsm_dev
*dev
= get_imsm_dev(super
, idx
);
6279 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6280 struct imsm_map
*new_map
= get_imsm_map(&u
->dev
, 0);
6281 struct disk_info
*inf
= get_disk_info(u
);
6282 struct imsm_disk
*disk
;
6286 for (i
= 0; i
< map
->num_members
; i
++) {
6287 disk
= get_imsm_disk(super
, get_imsm_disk_idx(dev
, i
, -1));
6288 for (j
= 0; j
< new_map
->num_members
; j
++)
6289 if (serialcmp(disk
->serial
, inf
[j
].serial
) == 0)
6297 static struct dl
*get_disk_super(struct intel_super
*super
, int major
, int minor
)
6299 struct dl
*dl
= NULL
;
6300 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6301 if ((dl
->major
== major
) && (dl
->minor
== minor
))
6306 static int remove_disk_super(struct intel_super
*super
, int major
, int minor
)
6308 struct dl
*prev
= NULL
;
6312 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
6313 if ((dl
->major
== major
) && (dl
->minor
== minor
)) {
6316 prev
->next
= dl
->next
;
6318 super
->disks
= dl
->next
;
6320 __free_imsm_disk(dl
);
6321 dprintf("%s: removed %x:%x\n",
6322 __func__
, major
, minor
);
6330 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
);
6332 static int add_remove_disk_update(struct intel_super
*super
)
6334 int check_degraded
= 0;
6335 struct dl
*disk
= NULL
;
6336 /* add/remove some spares to/from the metadata/contrainer */
6337 while (super
->disk_mgmt_list
) {
6338 struct dl
*disk_cfg
;
6340 disk_cfg
= super
->disk_mgmt_list
;
6341 super
->disk_mgmt_list
= disk_cfg
->next
;
6342 disk_cfg
->next
= NULL
;
6344 if (disk_cfg
->action
== DISK_ADD
) {
6345 disk_cfg
->next
= super
->disks
;
6346 super
->disks
= disk_cfg
;
6348 dprintf("%s: added %x:%x\n",
6349 __func__
, disk_cfg
->major
,
6351 } else if (disk_cfg
->action
== DISK_REMOVE
) {
6352 dprintf("Disk remove action processed: %x.%x\n",
6353 disk_cfg
->major
, disk_cfg
->minor
);
6354 disk
= get_disk_super(super
,
6358 /* store action status */
6359 disk
->action
= DISK_REMOVE
;
6360 /* remove spare disks only */
6361 if (disk
->index
== -1) {
6362 remove_disk_super(super
,
6367 /* release allocate disk structure */
6368 __free_imsm_disk(disk_cfg
);
6371 return check_degraded
;
6375 static int apply_reshape_migration_update(struct imsm_update_reshape_migration
*u
,
6376 struct intel_super
*super
,
6379 struct intel_dev
*id
;
6380 void **tofree
= NULL
;
6383 dprintf("apply_reshape_migration_update()\n");
6384 if ((u
->subdev
< 0) ||
6386 dprintf("imsm: Error: Wrong subdev: %i\n", u
->subdev
);
6389 if ((space_list
== NULL
) || (*space_list
== NULL
)) {
6390 dprintf("imsm: Error: Memory is not allocated\n");
6394 for (id
= super
->devlist
; id
; id
= id
->next
) {
6395 if (id
->index
== (unsigned)u
->subdev
) {
6396 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subdev
);
6397 struct imsm_map
*map
;
6398 struct imsm_dev
*new_dev
=
6399 (struct imsm_dev
*)*space_list
;
6400 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
6402 struct dl
*new_disk
;
6404 if (new_dev
== NULL
)
6406 *space_list
= **space_list
;
6407 memcpy(new_dev
, dev
, sizeof_imsm_dev(dev
, 0));
6408 map
= get_imsm_map(new_dev
, 0);
6410 dprintf("imsm: Error: migration in progress");
6414 to_state
= map
->map_state
;
6415 if ((u
->new_level
== 5) && (map
->raid_level
== 0)) {
6417 /* this should not happen */
6418 if (u
->new_disks
[0] < 0) {
6419 map
->failed_disk_num
=
6420 map
->num_members
- 1;
6421 to_state
= IMSM_T_STATE_DEGRADED
;
6423 to_state
= IMSM_T_STATE_NORMAL
;
6425 migrate(new_dev
, super
, to_state
, MIGR_GEN_MIGR
);
6426 if (u
->new_level
> -1)
6427 map
->raid_level
= u
->new_level
;
6428 migr_map
= get_imsm_map(new_dev
, 1);
6429 if ((u
->new_level
== 5) &&
6430 (migr_map
->raid_level
== 0)) {
6431 int ord
= map
->num_members
- 1;
6432 migr_map
->num_members
--;
6433 if (u
->new_disks
[0] < 0)
6434 ord
|= IMSM_ORD_REBUILD
;
6435 set_imsm_ord_tbl_ent(map
,
6436 map
->num_members
- 1,
6440 tofree
= (void **)dev
;
6442 /* update chunk size
6444 if (u
->new_chunksize
> 0)
6445 map
->blocks_per_strip
=
6446 __cpu_to_le16(u
->new_chunksize
* 2);
6450 if ((u
->new_level
!= 5) ||
6451 (migr_map
->raid_level
!= 0) ||
6452 (migr_map
->raid_level
== map
->raid_level
))
6455 if (u
->new_disks
[0] >= 0) {
6458 new_disk
= get_disk_super(super
,
6459 major(u
->new_disks
[0]),
6460 minor(u
->new_disks
[0]));
6461 dprintf("imsm: new disk for reshape is: %i:%i "
6462 "(%p, index = %i)\n",
6463 major(u
->new_disks
[0]),
6464 minor(u
->new_disks
[0]),
6465 new_disk
, new_disk
->index
);
6466 if (new_disk
== NULL
)
6467 goto error_disk_add
;
6469 new_disk
->index
= map
->num_members
- 1;
6470 /* slot to fill in autolayout
6472 new_disk
->raiddisk
= new_disk
->index
;
6473 new_disk
->disk
.status
|= CONFIGURED_DISK
;
6474 new_disk
->disk
.status
&= ~SPARE_DISK
;
6476 goto error_disk_add
;
6479 *tofree
= *space_list
;
6480 /* calculate new size
6482 imsm_set_array_size(new_dev
);
6489 *space_list
= tofree
;
6493 dprintf("Error: imsm: Cannot find disk.\n");
6498 static int apply_reshape_container_disks_update(struct imsm_update_reshape
*u
,
6499 struct intel_super
*super
,
6502 struct dl
*new_disk
;
6503 struct intel_dev
*id
;
6505 int delta_disks
= u
->new_raid_disks
- u
->old_raid_disks
;
6506 int disk_count
= u
->old_raid_disks
;
6507 void **tofree
= NULL
;
6508 int devices_to_reshape
= 1;
6509 struct imsm_super
*mpb
= super
->anchor
;
6511 unsigned int dev_id
;
6513 dprintf("imsm: apply_reshape_container_disks_update()\n");
6515 /* enable spares to use in array */
6516 for (i
= 0; i
< delta_disks
; i
++) {
6517 new_disk
= get_disk_super(super
,
6518 major(u
->new_disks
[i
]),
6519 minor(u
->new_disks
[i
]));
6520 dprintf("imsm: new disk for reshape is: %i:%i "
6521 "(%p, index = %i)\n",
6522 major(u
->new_disks
[i
]), minor(u
->new_disks
[i
]),
6523 new_disk
, new_disk
->index
);
6524 if ((new_disk
== NULL
) ||
6525 ((new_disk
->index
>= 0) &&
6526 (new_disk
->index
< u
->old_raid_disks
)))
6527 goto update_reshape_exit
;
6528 new_disk
->index
= disk_count
++;
6529 /* slot to fill in autolayout
6531 new_disk
->raiddisk
= new_disk
->index
;
6532 new_disk
->disk
.status
|=
6534 new_disk
->disk
.status
&= ~SPARE_DISK
;
6537 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6538 mpb
->num_raid_devs
);
6539 /* manage changes in volume
6541 for (dev_id
= 0; dev_id
< mpb
->num_raid_devs
; dev_id
++) {
6542 void **sp
= *space_list
;
6543 struct imsm_dev
*newdev
;
6544 struct imsm_map
*newmap
, *oldmap
;
6546 for (id
= super
->devlist
; id
; id
= id
->next
) {
6547 if (id
->index
== dev_id
)
6556 /* Copy the dev, but not (all of) the map */
6557 memcpy(newdev
, id
->dev
, sizeof(*newdev
));
6558 oldmap
= get_imsm_map(id
->dev
, 0);
6559 newmap
= get_imsm_map(newdev
, 0);
6560 /* Copy the current map */
6561 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6562 /* update one device only
6564 if (devices_to_reshape
) {
6565 dprintf("imsm: modifying subdev: %i\n",
6567 devices_to_reshape
--;
6568 newdev
->vol
.migr_state
= 1;
6569 newdev
->vol
.curr_migr_unit
= 0;
6570 newdev
->vol
.migr_type
= MIGR_GEN_MIGR
;
6571 newmap
->num_members
= u
->new_raid_disks
;
6572 for (i
= 0; i
< delta_disks
; i
++) {
6573 set_imsm_ord_tbl_ent(newmap
,
6574 u
->old_raid_disks
+ i
,
6575 u
->old_raid_disks
+ i
);
6577 /* New map is correct, now need to save old map
6579 newmap
= get_imsm_map(newdev
, 1);
6580 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6582 imsm_set_array_size(newdev
);
6585 sp
= (void **)id
->dev
;
6590 /* Clear migration record */
6591 memset(super
->migr_rec
, 0, sizeof(struct migr_record
));
6594 *space_list
= tofree
;
6597 update_reshape_exit
:
6602 static int apply_takeover_update(struct imsm_update_takeover
*u
,
6603 struct intel_super
*super
,
6606 struct imsm_dev
*dev
= NULL
;
6607 struct intel_dev
*dv
;
6608 struct imsm_dev
*dev_new
;
6609 struct imsm_map
*map
;
6613 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
6614 if (dv
->index
== (unsigned int)u
->subarray
) {
6622 map
= get_imsm_map(dev
, 0);
6624 if (u
->direction
== R10_TO_R0
) {
6625 /* Number of failed disks must be half of initial disk number */
6626 if (imsm_count_failed(super
, dev
) != (map
->num_members
/ 2))
6629 /* iterate through devices to mark removed disks as spare */
6630 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6631 if (dm
->disk
.status
& FAILED_DISK
) {
6632 int idx
= dm
->index
;
6633 /* update indexes on the disk list */
6634 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6635 the index values will end up being correct.... NB */
6636 for (du
= super
->disks
; du
; du
= du
->next
)
6637 if (du
->index
> idx
)
6639 /* mark as spare disk */
6640 dm
->disk
.status
= SPARE_DISK
;
6645 map
->num_members
= map
->num_members
/ 2;
6646 map
->map_state
= IMSM_T_STATE_NORMAL
;
6647 map
->num_domains
= 1;
6648 map
->raid_level
= 0;
6649 map
->failed_disk_num
= -1;
6652 if (u
->direction
== R0_TO_R10
) {
6654 /* update slots in current disk list */
6655 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6659 /* create new *missing* disks */
6660 for (i
= 0; i
< map
->num_members
; i
++) {
6661 space
= *space_list
;
6664 *space_list
= *space
;
6666 memcpy(du
, super
->disks
, sizeof(*du
));
6670 du
->index
= (i
* 2) + 1;
6671 sprintf((char *)du
->disk
.serial
,
6672 " MISSING_%d", du
->index
);
6673 sprintf((char *)du
->serial
,
6674 "MISSING_%d", du
->index
);
6675 du
->next
= super
->missing
;
6676 super
->missing
= du
;
6678 /* create new dev and map */
6679 space
= *space_list
;
6682 *space_list
= *space
;
6683 dev_new
= (void *)space
;
6684 memcpy(dev_new
, dev
, sizeof(*dev
));
6685 /* update new map */
6686 map
= get_imsm_map(dev_new
, 0);
6687 map
->num_members
= map
->num_members
* 2;
6688 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6689 map
->num_domains
= 2;
6690 map
->raid_level
= 1;
6691 /* replace dev<->dev_new */
6694 /* update disk order table */
6695 for (du
= super
->disks
; du
; du
= du
->next
)
6697 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6698 for (du
= super
->missing
; du
; du
= du
->next
)
6699 if (du
->index
>= 0) {
6700 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6701 mark_missing(dev_new
, &du
->disk
, du
->index
);
6707 static void imsm_process_update(struct supertype
*st
,
6708 struct metadata_update
*update
)
6711 * crack open the metadata_update envelope to find the update record
6712 * update can be one of:
6713 * update_reshape_container_disks - all the arrays in the container
6714 * are being reshaped to have more devices. We need to mark
6715 * the arrays for general migration and convert selected spares
6716 * into active devices.
6717 * update_activate_spare - a spare device has replaced a failed
6718 * device in an array, update the disk_ord_tbl. If this disk is
6719 * present in all member arrays then also clear the SPARE_DISK
6721 * update_create_array
6723 * update_rename_array
6724 * update_add_remove_disk
6726 struct intel_super
*super
= st
->sb
;
6727 struct imsm_super
*mpb
;
6728 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6730 /* update requires a larger buf but the allocation failed */
6731 if (super
->next_len
&& !super
->next_buf
) {
6732 super
->next_len
= 0;
6736 if (super
->next_buf
) {
6737 memcpy(super
->next_buf
, super
->buf
, super
->len
);
6739 super
->len
= super
->next_len
;
6740 super
->buf
= super
->next_buf
;
6742 super
->next_len
= 0;
6743 super
->next_buf
= NULL
;
6746 mpb
= super
->anchor
;
6749 case update_takeover
: {
6750 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6751 if (apply_takeover_update(u
, super
, &update
->space_list
)) {
6752 imsm_update_version_info(super
);
6753 super
->updates_pending
++;
6758 case update_reshape_container_disks
: {
6759 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6760 if (apply_reshape_container_disks_update(
6761 u
, super
, &update
->space_list
))
6762 super
->updates_pending
++;
6765 case update_reshape_migration
: {
6766 struct imsm_update_reshape_migration
*u
= (void *)update
->buf
;
6767 if (apply_reshape_migration_update(
6768 u
, super
, &update
->space_list
))
6769 super
->updates_pending
++;
6772 case update_activate_spare
: {
6773 struct imsm_update_activate_spare
*u
= (void *) update
->buf
;
6774 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->array
);
6775 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6776 struct imsm_map
*migr_map
;
6777 struct active_array
*a
;
6778 struct imsm_disk
*disk
;
6783 int victim
= get_imsm_disk_idx(dev
, u
->slot
, -1);
6786 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6791 fprintf(stderr
, "error: imsm_activate_spare passed "
6792 "an unknown disk (index: %d)\n",
6797 super
->updates_pending
++;
6798 /* count failures (excluding rebuilds and the victim)
6799 * to determine map[0] state
6802 for (i
= 0; i
< map
->num_members
; i
++) {
6805 disk
= get_imsm_disk(super
,
6806 get_imsm_disk_idx(dev
, i
, -1));
6807 if (!disk
|| is_failed(disk
))
6811 /* adding a pristine spare, assign a new index */
6812 if (dl
->index
< 0) {
6813 dl
->index
= super
->anchor
->num_disks
;
6814 super
->anchor
->num_disks
++;
6817 disk
->status
|= CONFIGURED_DISK
;
6818 disk
->status
&= ~SPARE_DISK
;
6821 to_state
= imsm_check_degraded(super
, dev
, failed
);
6822 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6823 migrate(dev
, super
, to_state
, MIGR_REBUILD
);
6824 migr_map
= get_imsm_map(dev
, 1);
6825 set_imsm_ord_tbl_ent(map
, u
->slot
, dl
->index
);
6826 set_imsm_ord_tbl_ent(migr_map
, u
->slot
, dl
->index
| IMSM_ORD_REBUILD
);
6828 /* update the family_num to mark a new container
6829 * generation, being careful to record the existing
6830 * family_num in orig_family_num to clean up after
6831 * earlier mdadm versions that neglected to set it.
6833 if (mpb
->orig_family_num
== 0)
6834 mpb
->orig_family_num
= mpb
->family_num
;
6835 mpb
->family_num
+= super
->random
;
6837 /* count arrays using the victim in the metadata */
6839 for (a
= st
->arrays
; a
; a
= a
->next
) {
6840 dev
= get_imsm_dev(super
, a
->info
.container_member
);
6841 map
= get_imsm_map(dev
, 0);
6843 if (get_imsm_disk_slot(map
, victim
) >= 0)
6847 /* delete the victim if it is no longer being
6853 /* We know that 'manager' isn't touching anything,
6854 * so it is safe to delete
6856 for (dlp
= &super
->disks
; *dlp
; dlp
= &(*dlp
)->next
)
6857 if ((*dlp
)->index
== victim
)
6860 /* victim may be on the missing list */
6862 for (dlp
= &super
->missing
; *dlp
; dlp
= &(*dlp
)->next
)
6863 if ((*dlp
)->index
== victim
)
6865 imsm_delete(super
, dlp
, victim
);
6869 case update_create_array
: {
6870 /* someone wants to create a new array, we need to be aware of
6871 * a few races/collisions:
6872 * 1/ 'Create' called by two separate instances of mdadm
6873 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6874 * devices that have since been assimilated via
6876 * In the event this update can not be carried out mdadm will
6877 * (FIX ME) notice that its update did not take hold.
6879 struct imsm_update_create_array
*u
= (void *) update
->buf
;
6880 struct intel_dev
*dv
;
6881 struct imsm_dev
*dev
;
6882 struct imsm_map
*map
, *new_map
;
6883 unsigned long long start
, end
;
6884 unsigned long long new_start
, new_end
;
6886 struct disk_info
*inf
;
6889 /* handle racing creates: first come first serve */
6890 if (u
->dev_idx
< mpb
->num_raid_devs
) {
6891 dprintf("%s: subarray %d already defined\n",
6892 __func__
, u
->dev_idx
);
6896 /* check update is next in sequence */
6897 if (u
->dev_idx
!= mpb
->num_raid_devs
) {
6898 dprintf("%s: can not create array %d expected index %d\n",
6899 __func__
, u
->dev_idx
, mpb
->num_raid_devs
);
6903 new_map
= get_imsm_map(&u
->dev
, 0);
6904 new_start
= __le32_to_cpu(new_map
->pba_of_lba0
);
6905 new_end
= new_start
+ __le32_to_cpu(new_map
->blocks_per_member
);
6906 inf
= get_disk_info(u
);
6908 /* handle activate_spare versus create race:
6909 * check to make sure that overlapping arrays do not include
6912 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6913 dev
= get_imsm_dev(super
, i
);
6914 map
= get_imsm_map(dev
, 0);
6915 start
= __le32_to_cpu(map
->pba_of_lba0
);
6916 end
= start
+ __le32_to_cpu(map
->blocks_per_member
);
6917 if ((new_start
>= start
&& new_start
<= end
) ||
6918 (start
>= new_start
&& start
<= new_end
))
6923 if (disks_overlap(super
, i
, u
)) {
6924 dprintf("%s: arrays overlap\n", __func__
);
6929 /* check that prepare update was successful */
6930 if (!update
->space
) {
6931 dprintf("%s: prepare update failed\n", __func__
);
6935 /* check that all disks are still active before committing
6936 * changes. FIXME: could we instead handle this by creating a
6937 * degraded array? That's probably not what the user expects,
6938 * so better to drop this update on the floor.
6940 for (i
= 0; i
< new_map
->num_members
; i
++) {
6941 dl
= serial_to_dl(inf
[i
].serial
, super
);
6943 dprintf("%s: disk disappeared\n", __func__
);
6948 super
->updates_pending
++;
6950 /* convert spares to members and fixup ord_tbl */
6951 for (i
= 0; i
< new_map
->num_members
; i
++) {
6952 dl
= serial_to_dl(inf
[i
].serial
, super
);
6953 if (dl
->index
== -1) {
6954 dl
->index
= mpb
->num_disks
;
6956 dl
->disk
.status
|= CONFIGURED_DISK
;
6957 dl
->disk
.status
&= ~SPARE_DISK
;
6959 set_imsm_ord_tbl_ent(new_map
, i
, dl
->index
);
6964 update
->space
= NULL
;
6965 imsm_copy_dev(dev
, &u
->dev
);
6966 dv
->index
= u
->dev_idx
;
6967 dv
->next
= super
->devlist
;
6968 super
->devlist
= dv
;
6969 mpb
->num_raid_devs
++;
6971 imsm_update_version_info(super
);
6974 /* mdmon knows how to release update->space, but not
6975 * ((struct intel_dev *) update->space)->dev
6977 if (update
->space
) {
6983 case update_kill_array
: {
6984 struct imsm_update_kill_array
*u
= (void *) update
->buf
;
6985 int victim
= u
->dev_idx
;
6986 struct active_array
*a
;
6987 struct intel_dev
**dp
;
6988 struct imsm_dev
*dev
;
6990 /* sanity check that we are not affecting the uuid of
6991 * active arrays, or deleting an active array
6993 * FIXME when immutable ids are available, but note that
6994 * we'll also need to fixup the invalidated/active
6995 * subarray indexes in mdstat
6997 for (a
= st
->arrays
; a
; a
= a
->next
)
6998 if (a
->info
.container_member
>= victim
)
7000 /* by definition if mdmon is running at least one array
7001 * is active in the container, so checking
7002 * mpb->num_raid_devs is just extra paranoia
7004 dev
= get_imsm_dev(super
, victim
);
7005 if (a
|| !dev
|| mpb
->num_raid_devs
== 1) {
7006 dprintf("failed to delete subarray-%d\n", victim
);
7010 for (dp
= &super
->devlist
; *dp
;)
7011 if ((*dp
)->index
== (unsigned)super
->current_vol
) {
7014 if ((*dp
)->index
> (unsigned)victim
)
7018 mpb
->num_raid_devs
--;
7019 super
->updates_pending
++;
7022 case update_rename_array
: {
7023 struct imsm_update_rename_array
*u
= (void *) update
->buf
;
7024 char name
[MAX_RAID_SERIAL_LEN
+1];
7025 int target
= u
->dev_idx
;
7026 struct active_array
*a
;
7027 struct imsm_dev
*dev
;
7029 /* sanity check that we are not affecting the uuid of
7032 snprintf(name
, MAX_RAID_SERIAL_LEN
, "%s", (char *) u
->name
);
7033 name
[MAX_RAID_SERIAL_LEN
] = '\0';
7034 for (a
= st
->arrays
; a
; a
= a
->next
)
7035 if (a
->info
.container_member
== target
)
7037 dev
= get_imsm_dev(super
, u
->dev_idx
);
7038 if (a
|| !dev
|| !check_name(super
, name
, 1)) {
7039 dprintf("failed to rename subarray-%d\n", target
);
7043 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
7044 super
->updates_pending
++;
7047 case update_add_remove_disk
: {
7048 /* we may be able to repair some arrays if disks are
7049 * being added, check teh status of add_remove_disk
7050 * if discs has been added.
7052 if (add_remove_disk_update(super
)) {
7053 struct active_array
*a
;
7055 super
->updates_pending
++;
7056 for (a
= st
->arrays
; a
; a
= a
->next
)
7057 a
->check_degraded
= 1;
7062 fprintf(stderr
, "error: unsuported process update type:"
7063 "(type: %d)\n", type
);
7067 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
);
7069 static void imsm_prepare_update(struct supertype
*st
,
7070 struct metadata_update
*update
)
7073 * Allocate space to hold new disk entries, raid-device entries or a new
7074 * mpb if necessary. The manager synchronously waits for updates to
7075 * complete in the monitor, so new mpb buffers allocated here can be
7076 * integrated by the monitor thread without worrying about live pointers
7077 * in the manager thread.
7079 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
7080 struct intel_super
*super
= st
->sb
;
7081 struct imsm_super
*mpb
= super
->anchor
;
7086 case update_takeover
: {
7087 struct imsm_update_takeover
*u
= (void *)update
->buf
;
7088 if (u
->direction
== R0_TO_R10
) {
7089 void **tail
= (void **)&update
->space_list
;
7090 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subarray
);
7091 struct imsm_map
*map
= get_imsm_map(dev
, 0);
7092 int num_members
= map
->num_members
;
7096 /* allocate memory for added disks */
7097 for (i
= 0; i
< num_members
; i
++) {
7098 size
= sizeof(struct dl
);
7099 space
= malloc(size
);
7108 /* allocate memory for new device */
7109 size
= sizeof_imsm_dev(super
->devlist
->dev
, 0) +
7110 (num_members
* sizeof(__u32
));
7111 space
= malloc(size
);
7120 len
= disks_to_mpb_size(num_members
* 2);
7122 /* if allocation didn't success, free buffer */
7123 while (update
->space_list
) {
7124 void **sp
= update
->space_list
;
7125 update
->space_list
= *sp
;
7133 case update_reshape_container_disks
: {
7134 /* Every raid device in the container is about to
7135 * gain some more devices, and we will enter a
7137 * So each 'imsm_map' will be bigger, and the imsm_vol
7138 * will now hold 2 of them.
7139 * Thus we need new 'struct imsm_dev' allocations sized
7140 * as sizeof_imsm_dev but with more devices in both maps.
7142 struct imsm_update_reshape
*u
= (void *)update
->buf
;
7143 struct intel_dev
*dl
;
7144 void **space_tail
= (void**)&update
->space_list
;
7146 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7148 for (dl
= super
->devlist
; dl
; dl
= dl
->next
) {
7149 int size
= sizeof_imsm_dev(dl
->dev
, 1);
7151 if (u
->new_raid_disks
> u
->old_raid_disks
)
7152 size
+= sizeof(__u32
)*2*
7153 (u
->new_raid_disks
- u
->old_raid_disks
);
7162 len
= disks_to_mpb_size(u
->new_raid_disks
);
7163 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
7166 case update_reshape_migration
: {
7167 /* for migration level 0->5 we need to add disks
7168 * so the same as for container operation we will copy
7169 * device to the bigger location.
7170 * in memory prepared device and new disk area are prepared
7171 * for usage in process update
7173 struct imsm_update_reshape_migration
*u
= (void *)update
->buf
;
7174 struct intel_dev
*id
;
7175 void **space_tail
= (void **)&update
->space_list
;
7178 int current_level
= -1;
7180 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7182 /* add space for bigger array in update
7184 for (id
= super
->devlist
; id
; id
= id
->next
) {
7185 if (id
->index
== (unsigned)u
->subdev
) {
7186 size
= sizeof_imsm_dev(id
->dev
, 1);
7187 if (u
->new_raid_disks
> u
->old_raid_disks
)
7188 size
+= sizeof(__u32
)*2*
7189 (u
->new_raid_disks
- u
->old_raid_disks
);
7199 if (update
->space_list
== NULL
)
7202 /* add space for disk in update
7204 size
= sizeof(struct dl
);
7207 free(update
->space_list
);
7208 update
->space_list
= NULL
;
7215 /* add spare device to update
7217 for (id
= super
->devlist
; id
; id
= id
->next
)
7218 if (id
->index
== (unsigned)u
->subdev
) {
7219 struct imsm_dev
*dev
;
7220 struct imsm_map
*map
;
7222 dev
= get_imsm_dev(super
, u
->subdev
);
7223 map
= get_imsm_map(dev
, 0);
7224 current_level
= map
->raid_level
;
7227 if ((u
->new_level
== 5) && (u
->new_level
!= current_level
)) {
7228 struct mdinfo
*spares
;
7230 spares
= get_spares_for_grow(st
);
7238 makedev(dev
->disk
.major
,
7240 dl
= get_disk_super(super
,
7243 dl
->index
= u
->old_raid_disks
;
7249 len
= disks_to_mpb_size(u
->new_raid_disks
);
7250 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
7253 case update_create_array
: {
7254 struct imsm_update_create_array
*u
= (void *) update
->buf
;
7255 struct intel_dev
*dv
;
7256 struct imsm_dev
*dev
= &u
->dev
;
7257 struct imsm_map
*map
= get_imsm_map(dev
, 0);
7259 struct disk_info
*inf
;
7263 inf
= get_disk_info(u
);
7264 len
= sizeof_imsm_dev(dev
, 1);
7265 /* allocate a new super->devlist entry */
7266 dv
= malloc(sizeof(*dv
));
7268 dv
->dev
= malloc(len
);
7273 update
->space
= NULL
;
7277 /* count how many spares will be converted to members */
7278 for (i
= 0; i
< map
->num_members
; i
++) {
7279 dl
= serial_to_dl(inf
[i
].serial
, super
);
7281 /* hmm maybe it failed?, nothing we can do about
7286 if (count_memberships(dl
, super
) == 0)
7289 len
+= activate
* sizeof(struct imsm_disk
);
7296 /* check if we need a larger metadata buffer */
7297 if (super
->next_buf
)
7298 buf_len
= super
->next_len
;
7300 buf_len
= super
->len
;
7302 if (__le32_to_cpu(mpb
->mpb_size
) + len
> buf_len
) {
7303 /* ok we need a larger buf than what is currently allocated
7304 * if this allocation fails process_update will notice that
7305 * ->next_len is set and ->next_buf is NULL
7307 buf_len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + len
, 512);
7308 if (super
->next_buf
)
7309 free(super
->next_buf
);
7311 super
->next_len
= buf_len
;
7312 if (posix_memalign(&super
->next_buf
, 512, buf_len
) == 0)
7313 memset(super
->next_buf
, 0, buf_len
);
7315 super
->next_buf
= NULL
;
7319 /* must be called while manager is quiesced */
7320 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
)
7322 struct imsm_super
*mpb
= super
->anchor
;
7324 struct imsm_dev
*dev
;
7325 struct imsm_map
*map
;
7326 int i
, j
, num_members
;
7329 dprintf("%s: deleting device[%d] from imsm_super\n",
7332 /* shift all indexes down one */
7333 for (iter
= super
->disks
; iter
; iter
= iter
->next
)
7334 if (iter
->index
> (int)index
)
7336 for (iter
= super
->missing
; iter
; iter
= iter
->next
)
7337 if (iter
->index
> (int)index
)
7340 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
7341 dev
= get_imsm_dev(super
, i
);
7342 map
= get_imsm_map(dev
, 0);
7343 num_members
= map
->num_members
;
7344 for (j
= 0; j
< num_members
; j
++) {
7345 /* update ord entries being careful not to propagate
7346 * ord-flags to the first map
7348 ord
= get_imsm_ord_tbl_ent(dev
, j
, -1);
7350 if (ord_to_idx(ord
) <= index
)
7353 map
= get_imsm_map(dev
, 0);
7354 set_imsm_ord_tbl_ent(map
, j
, ord_to_idx(ord
- 1));
7355 map
= get_imsm_map(dev
, 1);
7357 set_imsm_ord_tbl_ent(map
, j
, ord
- 1);
7362 super
->updates_pending
++;
7364 struct dl
*dl
= *dlp
;
7366 *dlp
= (*dlp
)->next
;
7367 __free_imsm_disk(dl
);
7371 /*******************************************************************************
7372 * Function: open_backup_targets
7373 * Description: Function opens file descriptors for all devices given in
7376 * info : general array info
7377 * raid_disks : number of disks
7378 * raid_fds : table of device's file descriptors
7382 ******************************************************************************/
7383 int open_backup_targets(struct mdinfo
*info
, int raid_disks
, int *raid_fds
)
7387 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
7390 if (sd
->disk
.state
& (1<<MD_DISK_FAULTY
)) {
7391 dprintf("disk is faulty!!\n");
7395 if ((sd
->disk
.raid_disk
>= raid_disks
) ||
7396 (sd
->disk
.raid_disk
< 0))
7399 dn
= map_dev(sd
->disk
.major
,
7401 raid_fds
[sd
->disk
.raid_disk
] = dev_open(dn
, O_RDWR
);
7402 if (raid_fds
[sd
->disk
.raid_disk
] < 0) {
7403 fprintf(stderr
, "cannot open component\n");
7410 /*******************************************************************************
7411 * Function: init_migr_record_imsm
7412 * Description: Function inits imsm migration record
7414 * super : imsm internal array info
7415 * dev : device under migration
7416 * info : general array info to find the smallest device
7419 ******************************************************************************/
7420 void init_migr_record_imsm(struct supertype
*st
, struct imsm_dev
*dev
,
7421 struct mdinfo
*info
)
7423 struct intel_super
*super
= st
->sb
;
7424 struct migr_record
*migr_rec
= super
->migr_rec
;
7426 unsigned long long dsize
, dev_sectors
;
7427 long long unsigned min_dev_sectors
= -1LLU;
7431 struct imsm_map
*map_dest
= get_imsm_map(dev
, 0);
7432 struct imsm_map
*map_src
= get_imsm_map(dev
, 1);
7433 unsigned long long num_migr_units
;
7435 unsigned long long array_blocks
=
7436 (((unsigned long long)__le32_to_cpu(dev
->size_high
)) << 32) +
7437 __le32_to_cpu(dev
->size_low
);
7439 memset(migr_rec
, 0, sizeof(struct migr_record
));
7440 migr_rec
->family_num
= __cpu_to_le32(super
->anchor
->family_num
);
7442 /* only ascending reshape supported now */
7443 migr_rec
->ascending_migr
= __cpu_to_le32(1);
7445 migr_rec
->dest_depth_per_unit
= GEN_MIGR_AREA_SIZE
/
7446 max(map_dest
->blocks_per_strip
, map_src
->blocks_per_strip
);
7447 migr_rec
->dest_depth_per_unit
*= map_dest
->blocks_per_strip
;
7448 new_data_disks
= imsm_num_data_members(dev
, 0);
7449 migr_rec
->blocks_per_unit
=
7450 __cpu_to_le32(migr_rec
->dest_depth_per_unit
* new_data_disks
);
7451 migr_rec
->dest_depth_per_unit
=
7452 __cpu_to_le32(migr_rec
->dest_depth_per_unit
);
7455 array_blocks
/ __le32_to_cpu(migr_rec
->blocks_per_unit
);
7457 if (array_blocks
% __le32_to_cpu(migr_rec
->blocks_per_unit
))
7459 migr_rec
->num_migr_units
= __cpu_to_le32(num_migr_units
);
7461 migr_rec
->post_migr_vol_cap
= dev
->size_low
;
7462 migr_rec
->post_migr_vol_cap_hi
= dev
->size_high
;
7465 /* Find the smallest dev */
7466 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
7467 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
7468 fd
= dev_open(nm
, O_RDONLY
);
7471 get_dev_size(fd
, NULL
, &dsize
);
7472 dev_sectors
= dsize
/ 512;
7473 if (dev_sectors
< min_dev_sectors
)
7474 min_dev_sectors
= dev_sectors
;
7477 migr_rec
->ckpt_area_pba
= __cpu_to_le32(min_dev_sectors
-
7478 RAID_DISK_RESERVED_BLOCKS_IMSM_HI
);
7480 write_imsm_migr_rec(st
);
7485 /*******************************************************************************
7486 * Function: save_backup_imsm
7487 * Description: Function saves critical data stripes to Migration Copy Area
7488 * and updates the current migration unit status.
7489 * Use restore_stripes() to form a destination stripe,
7490 * and to write it to the Copy Area.
7492 * st : supertype information
7493 * info : general array info
7494 * buf : input buffer
7495 * write_offset : address of data to backup
7496 * length : length of data to backup (blocks_per_unit)
7500 ******************************************************************************/
7501 int save_backup_imsm(struct supertype
*st
,
7502 struct imsm_dev
*dev
,
7503 struct mdinfo
*info
,
7509 struct intel_super
*super
= st
->sb
;
7510 unsigned long long *target_offsets
= NULL
;
7511 int *targets
= NULL
;
7513 struct imsm_map
*map_dest
= get_imsm_map(dev
, 0);
7514 int new_disks
= map_dest
->num_members
;
7516 targets
= malloc(new_disks
* sizeof(int));
7520 target_offsets
= malloc(new_disks
* sizeof(unsigned long long));
7521 if (!target_offsets
)
7524 for (i
= 0; i
< new_disks
; i
++) {
7526 target_offsets
[i
] = (unsigned long long)
7527 __le32_to_cpu(super
->migr_rec
->ckpt_area_pba
) * 512;
7530 if (open_backup_targets(info
, new_disks
, targets
))
7533 if (restore_stripes(targets
, /* list of dest devices */
7534 target_offsets
, /* migration record offsets */
7539 -1, /* source backup file descriptor */
7540 0, /* input buf offset
7541 * always 0 buf is already offset */
7545 fprintf(stderr
, Name
": Error restoring stripes\n");
7553 for (i
= 0; i
< new_disks
; i
++)
7554 if (targets
[i
] >= 0)
7558 free(target_offsets
);
7563 /*******************************************************************************
7564 * Function: save_checkpoint_imsm
7565 * Description: Function called for current unit status update
7566 * in the migration record. It writes it to disk.
7568 * super : imsm internal array info
7569 * info : general array info
7573 ******************************************************************************/
7574 int save_checkpoint_imsm(struct supertype
*st
, struct mdinfo
*info
, int state
)
7576 struct intel_super
*super
= st
->sb
;
7577 load_imsm_migr_rec(super
, info
);
7578 if (__le32_to_cpu(super
->migr_rec
->blocks_per_unit
) == 0) {
7579 dprintf("ERROR: blocks_per_unit = 0!!!\n");
7583 super
->migr_rec
->curr_migr_unit
=
7584 __cpu_to_le32(info
->reshape_progress
/
7585 __le32_to_cpu(super
->migr_rec
->blocks_per_unit
));
7586 super
->migr_rec
->rec_status
= __cpu_to_le32(state
);
7587 super
->migr_rec
->dest_1st_member_lba
=
7588 __cpu_to_le32((__le32_to_cpu(super
->migr_rec
->curr_migr_unit
))
7589 * __le32_to_cpu(super
->migr_rec
->dest_depth_per_unit
));
7590 if (write_imsm_migr_rec(st
) < 0) {
7591 dprintf("imsm: Cannot write migration record "
7592 "outside backup area\n");
7599 static char disk_by_path
[] = "/dev/disk/by-path/";
7601 static const char *imsm_get_disk_controller_domain(const char *path
)
7603 char disk_path
[PATH_MAX
];
7607 strncpy(disk_path
, disk_by_path
, PATH_MAX
- 1);
7608 strncat(disk_path
, path
, PATH_MAX
- strlen(disk_path
) - 1);
7609 if (stat(disk_path
, &st
) == 0) {
7610 struct sys_dev
* hba
;
7613 path
= devt_to_devpath(st
.st_rdev
);
7616 hba
= find_disk_attached_hba(-1, path
);
7617 if (hba
&& hba
->type
== SYS_DEV_SAS
)
7619 else if (hba
&& hba
->type
== SYS_DEV_SATA
)
7623 dprintf("path: %s hba: %s attached: %s\n",
7624 path
, (hba
) ? hba
->path
: "NULL", drv
);
7632 static int imsm_find_array_minor_by_subdev(int subdev
, int container
, int *minor
)
7634 char subdev_name
[20];
7635 struct mdstat_ent
*mdstat
;
7637 sprintf(subdev_name
, "%d", subdev
);
7638 mdstat
= mdstat_by_subdev(subdev_name
, container
);
7642 *minor
= mdstat
->devnum
;
7643 free_mdstat(mdstat
);
7647 static int imsm_reshape_is_allowed_on_container(struct supertype
*st
,
7648 struct geo_params
*geo
,
7649 int *old_raid_disks
)
7651 /* currently we only support increasing the number of devices
7652 * for a container. This increases the number of device for each
7653 * member array. They must all be RAID0 or RAID5.
7656 struct mdinfo
*info
, *member
;
7657 int devices_that_can_grow
= 0;
7659 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7660 "st->devnum = (%i)\n",
7663 if (geo
->size
!= -1 ||
7664 geo
->level
!= UnSet
||
7665 geo
->layout
!= UnSet
||
7666 geo
->chunksize
!= 0 ||
7667 geo
->raid_disks
== UnSet
) {
7668 dprintf("imsm: Container operation is allowed for "
7669 "raid disks number change only.\n");
7673 info
= container_content_imsm(st
, NULL
);
7674 for (member
= info
; member
; member
= member
->next
) {
7678 dprintf("imsm: checking device_num: %i\n",
7679 member
->container_member
);
7681 if (geo
->raid_disks
<= member
->array
.raid_disks
) {
7682 /* we work on container for Online Capacity Expansion
7683 * only so raid_disks has to grow
7685 dprintf("imsm: for container operation raid disks "
7686 "increase is required\n");
7690 if ((info
->array
.level
!= 0) &&
7691 (info
->array
.level
!= 5)) {
7692 /* we cannot use this container with other raid level
7694 dprintf("imsm: for container operation wrong"
7695 " raid level (%i) detected\n",
7699 /* check for platform support
7700 * for this raid level configuration
7702 struct intel_super
*super
= st
->sb
;
7703 if (!is_raid_level_supported(super
->orom
,
7704 member
->array
.level
,
7706 dprintf("platform does not support raid%d with"
7710 geo
->raid_disks
> 1 ? "s" : "");
7713 /* check if component size is aligned to chunk size
7715 if (info
->component_size
%
7716 (info
->array
.chunk_size
/512)) {
7717 dprintf("Component size is not aligned to "
7723 if (*old_raid_disks
&&
7724 info
->array
.raid_disks
!= *old_raid_disks
)
7726 *old_raid_disks
= info
->array
.raid_disks
;
7728 /* All raid5 and raid0 volumes in container
7729 * have to be ready for Online Capacity Expansion
7730 * so they need to be assembled. We have already
7731 * checked that no recovery etc is happening.
7733 result
= imsm_find_array_minor_by_subdev(member
->container_member
,
7737 dprintf("imsm: cannot find array\n");
7740 devices_that_can_grow
++;
7743 if (!member
&& devices_that_can_grow
)
7747 dprintf("\tContainer operation allowed\n");
7749 dprintf("\tError: %i\n", ret_val
);
7754 /* Function: get_spares_for_grow
7755 * Description: Allocates memory and creates list of spare devices
7756 * avaliable in container. Checks if spare drive size is acceptable.
7757 * Parameters: Pointer to the supertype structure
7758 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7761 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
)
7763 unsigned long long min_size
= min_acceptable_spare_size_imsm(st
);
7764 return container_choose_spares(st
, min_size
, NULL
, NULL
, NULL
, 0);
7767 /******************************************************************************
7768 * function: imsm_create_metadata_update_for_reshape
7769 * Function creates update for whole IMSM container.
7771 ******************************************************************************/
7772 static int imsm_create_metadata_update_for_reshape(
7773 struct supertype
*st
,
7774 struct geo_params
*geo
,
7776 struct imsm_update_reshape
**updatep
)
7778 struct intel_super
*super
= st
->sb
;
7779 struct imsm_super
*mpb
= super
->anchor
;
7780 int update_memory_size
= 0;
7781 struct imsm_update_reshape
*u
= NULL
;
7782 struct mdinfo
*spares
= NULL
;
7784 int delta_disks
= 0;
7787 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7790 delta_disks
= geo
->raid_disks
- old_raid_disks
;
7792 /* size of all update data without anchor */
7793 update_memory_size
= sizeof(struct imsm_update_reshape
);
7795 /* now add space for spare disks that we need to add. */
7796 update_memory_size
+= sizeof(u
->new_disks
[0]) * (delta_disks
- 1);
7798 u
= calloc(1, update_memory_size
);
7801 "cannot get memory for imsm_update_reshape update\n");
7804 u
->type
= update_reshape_container_disks
;
7805 u
->old_raid_disks
= old_raid_disks
;
7806 u
->new_raid_disks
= geo
->raid_disks
;
7808 /* now get spare disks list
7810 spares
= get_spares_for_grow(st
);
7813 || delta_disks
> spares
->array
.spare_disks
) {
7814 fprintf(stderr
, Name
": imsm: ERROR: Cannot get spare devices "
7815 "for %s.\n", geo
->dev_name
);
7819 /* we have got spares
7820 * update disk list in imsm_disk list table in anchor
7822 dprintf("imsm: %i spares are available.\n\n",
7823 spares
->array
.spare_disks
);
7826 for (i
= 0; i
< delta_disks
; i
++) {
7831 u
->new_disks
[i
] = makedev(dev
->disk
.major
,
7833 dl
= get_disk_super(super
, dev
->disk
.major
, dev
->disk
.minor
);
7834 dl
->index
= mpb
->num_disks
;
7844 dprintf("imsm: reshape update preparation :");
7845 if (i
== delta_disks
) {
7848 return update_memory_size
;
7851 dprintf(" Error\n");
7856 /******************************************************************************
7857 * function: imsm_create_metadata_update_for_migration()
7858 * Creates update for IMSM array.
7860 ******************************************************************************/
7861 static int imsm_create_metadata_update_for_migration(
7862 struct supertype
*st
,
7863 struct geo_params
*geo
,
7864 struct imsm_update_reshape_migration
**updatep
)
7866 struct intel_super
*super
= st
->sb
;
7867 int update_memory_size
= 0;
7868 struct imsm_update_reshape_migration
*u
= NULL
;
7869 struct imsm_dev
*dev
;
7870 int previous_level
= -1;
7872 dprintf("imsm_create_metadata_update_for_migration(enter)"
7873 " New Level = %i\n", geo
->level
);
7875 /* size of all update data without anchor */
7876 update_memory_size
= sizeof(struct imsm_update_reshape_migration
);
7878 u
= calloc(1, update_memory_size
);
7880 dprintf("error: cannot get memory for "
7881 "imsm_create_metadata_update_for_migration\n");
7884 u
->type
= update_reshape_migration
;
7885 u
->subdev
= super
->current_vol
;
7886 u
->new_level
= geo
->level
;
7887 u
->new_layout
= geo
->layout
;
7888 u
->new_raid_disks
= u
->old_raid_disks
= geo
->raid_disks
;
7889 u
->new_disks
[0] = -1;
7890 u
->new_chunksize
= -1;
7892 dev
= get_imsm_dev(super
, u
->subdev
);
7894 struct imsm_map
*map
;
7896 map
= get_imsm_map(dev
, 0);
7898 int current_chunk_size
=
7899 __le16_to_cpu(map
->blocks_per_strip
) / 2;
7901 if (geo
->chunksize
!= current_chunk_size
) {
7902 u
->new_chunksize
= geo
->chunksize
/ 1024;
7904 "chunk size change from %i to %i\n",
7905 current_chunk_size
, u
->new_chunksize
);
7907 previous_level
= map
->raid_level
;
7910 if ((geo
->level
== 5) && (previous_level
== 0)) {
7911 struct mdinfo
*spares
= NULL
;
7913 u
->new_raid_disks
++;
7914 spares
= get_spares_for_grow(st
);
7915 if ((spares
== NULL
) || (spares
->array
.spare_disks
< 1)) {
7918 update_memory_size
= 0;
7919 dprintf("error: cannot get spare device "
7920 "for requested migration");
7925 dprintf("imsm: reshape update preparation : OK\n");
7928 return update_memory_size
;
7931 static void imsm_update_metadata_locally(struct supertype
*st
,
7934 struct metadata_update mu
;
7939 mu
.space_list
= NULL
;
7941 imsm_prepare_update(st
, &mu
);
7942 imsm_process_update(st
, &mu
);
7944 while (mu
.space_list
) {
7945 void **space
= mu
.space_list
;
7946 mu
.space_list
= *space
;
7951 /***************************************************************************
7952 * Function: imsm_analyze_change
7953 * Description: Function analyze change for single volume
7954 * and validate if transition is supported
7955 * Parameters: Geometry parameters, supertype structure
7956 * Returns: Operation type code on success, -1 if fail
7957 ****************************************************************************/
7958 enum imsm_reshape_type
imsm_analyze_change(struct supertype
*st
,
7959 struct geo_params
*geo
)
7966 getinfo_super_imsm_volume(st
, &info
, NULL
);
7968 if ((geo
->level
!= info
.array
.level
) &&
7969 (geo
->level
>= 0) &&
7970 (geo
->level
!= UnSet
)) {
7971 switch (info
.array
.level
) {
7973 if (geo
->level
== 5) {
7974 change
= CH_MIGRATION
;
7977 if (geo
->level
== 10) {
7978 change
= CH_TAKEOVER
;
7983 if (geo
->level
== 0) {
7984 change
= CH_TAKEOVER
;
7989 if (geo
->level
== 0) {
7990 change
= CH_TAKEOVER
;
7997 Name
" Error. Level Migration from %d to %d "
7999 info
.array
.level
, geo
->level
);
8000 goto analyse_change_exit
;
8003 geo
->level
= info
.array
.level
;
8005 if ((geo
->layout
!= info
.array
.layout
)
8006 && ((geo
->layout
!= UnSet
) && (geo
->layout
!= -1))) {
8007 change
= CH_MIGRATION
;
8008 if ((info
.array
.layout
== 0)
8009 && (info
.array
.level
== 5)
8010 && (geo
->layout
== 5)) {
8011 /* reshape 5 -> 4 */
8012 } else if ((info
.array
.layout
== 5)
8013 && (info
.array
.level
== 5)
8014 && (geo
->layout
== 0)) {
8015 /* reshape 4 -> 5 */
8020 Name
" Error. Layout Migration from %d to %d "
8022 info
.array
.layout
, geo
->layout
);
8024 goto analyse_change_exit
;
8027 geo
->layout
= info
.array
.layout
;
8029 if ((geo
->chunksize
> 0) && (geo
->chunksize
!= UnSet
)
8030 && (geo
->chunksize
!= info
.array
.chunk_size
))
8031 change
= CH_MIGRATION
;
8033 geo
->chunksize
= info
.array
.chunk_size
;
8035 chunk
= geo
->chunksize
/ 1024;
8036 if (!validate_geometry_imsm(st
,
8046 struct intel_super
*super
= st
->sb
;
8047 struct imsm_super
*mpb
= super
->anchor
;
8049 if (mpb
->num_raid_devs
> 1) {
8051 Name
" Error. Cannot perform operation on %s"
8052 "- for this operation it MUST be single "
8053 "array in container\n",
8059 analyse_change_exit
:
8064 int imsm_takeover(struct supertype
*st
, struct geo_params
*geo
)
8066 struct intel_super
*super
= st
->sb
;
8067 struct imsm_update_takeover
*u
;
8069 u
= malloc(sizeof(struct imsm_update_takeover
));
8073 u
->type
= update_takeover
;
8074 u
->subarray
= super
->current_vol
;
8076 /* 10->0 transition */
8077 if (geo
->level
== 0)
8078 u
->direction
= R10_TO_R0
;
8080 /* 0->10 transition */
8081 if (geo
->level
== 10)
8082 u
->direction
= R0_TO_R10
;
8084 /* update metadata locally */
8085 imsm_update_metadata_locally(st
, u
,
8086 sizeof(struct imsm_update_takeover
));
8087 /* and possibly remotely */
8088 if (st
->update_tail
)
8089 append_metadata_update(st
, u
,
8090 sizeof(struct imsm_update_takeover
));
8097 static int warn_user_about_risk(void)
8102 "\nThis is an experimental feature. Data on the RAID volume(s) "
8103 "can be lost!!!\n\n"
8104 "To continue command execution please make sure that\n"
8105 "the grow process will not be interrupted. Use safe power\n"
8106 "supply to avoid unexpected system reboot. Make sure that\n"
8107 "reshaped container is not assembled automatically during\n"
8109 "If reshape is interrupted, assemble array manually\n"
8110 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
8111 "Assembly in scan mode is not possible in such case.\n"
8112 "Growing container with boot array is not possible.\n"
8113 "If boot array reshape is interrupted, whole file system\n"
8114 "can be lost.\n\n");
8115 rv
= ask("Do you want to continue? ");
8116 fprintf(stderr
, "\n");
8121 static int imsm_reshape_super(struct supertype
*st
, long long size
, int level
,
8122 int layout
, int chunksize
, int raid_disks
,
8123 int delta_disks
, char *backup
, char *dev
,
8127 struct geo_params geo
;
8129 dprintf("imsm: reshape_super called.\n");
8131 memset(&geo
, 0, sizeof(struct geo_params
));
8134 geo
.dev_id
= st
->devnum
;
8137 geo
.layout
= layout
;
8138 geo
.chunksize
= chunksize
;
8139 geo
.raid_disks
= raid_disks
;
8140 if (delta_disks
!= UnSet
)
8141 geo
.raid_disks
+= delta_disks
;
8143 dprintf("\tfor level : %i\n", geo
.level
);
8144 dprintf("\tfor raid_disks : %i\n", geo
.raid_disks
);
8146 if (experimental() == 0)
8149 if (st
->container_dev
== st
->devnum
) {
8150 /* On container level we can only increase number of devices. */
8151 dprintf("imsm: info: Container operation\n");
8152 int old_raid_disks
= 0;
8154 /* this warning will be removed when imsm checkpointing
8155 * will be implemented, and restoring from check-point
8156 * operation will be transparent for reboot process
8158 if (warn_user_about_risk() == 0)
8161 if (imsm_reshape_is_allowed_on_container(
8162 st
, &geo
, &old_raid_disks
)) {
8163 struct imsm_update_reshape
*u
= NULL
;
8166 len
= imsm_create_metadata_update_for_reshape(
8167 st
, &geo
, old_raid_disks
, &u
);
8170 dprintf("imsm: Cannot prepare update\n");
8171 goto exit_imsm_reshape_super
;
8175 /* update metadata locally */
8176 imsm_update_metadata_locally(st
, u
, len
);
8177 /* and possibly remotely */
8178 if (st
->update_tail
)
8179 append_metadata_update(st
, u
, len
);
8184 fprintf(stderr
, Name
": (imsm) Operation "
8185 "is not allowed on this container\n");
8188 /* On volume level we support following operations
8189 * - takeover: raid10 -> raid0; raid0 -> raid10
8190 * - chunk size migration
8191 * - migration: raid5 -> raid0; raid0 -> raid5
8193 struct intel_super
*super
= st
->sb
;
8194 struct intel_dev
*dev
= super
->devlist
;
8196 dprintf("imsm: info: Volume operation\n");
8197 /* find requested device */
8199 imsm_find_array_minor_by_subdev(dev
->index
, st
->container_dev
, &devnum
);
8200 if (devnum
== geo
.dev_id
)
8205 fprintf(stderr
, Name
" Cannot find %s (%i) subarray\n",
8206 geo
.dev_name
, geo
.dev_id
);
8207 goto exit_imsm_reshape_super
;
8209 super
->current_vol
= dev
->index
;
8210 change
= imsm_analyze_change(st
, &geo
);
8213 ret_val
= imsm_takeover(st
, &geo
);
8215 case CH_MIGRATION
: {
8216 struct imsm_update_reshape_migration
*u
= NULL
;
8218 imsm_create_metadata_update_for_migration(
8222 "Cannot prepare update\n");
8226 /* update metadata locally */
8227 imsm_update_metadata_locally(st
, u
, len
);
8228 /* and possibly remotely */
8229 if (st
->update_tail
)
8230 append_metadata_update(st
, u
, len
);
8240 exit_imsm_reshape_super
:
8241 dprintf("imsm: reshape_super Exit code = %i\n", ret_val
);
8245 /*******************************************************************************
8246 * Function: wait_for_reshape_imsm
8247 * Description: Function writes new sync_max value and waits until
8248 * reshape process reach new position
8250 * sra : general array info
8251 * to_complete : new sync_max position
8252 * ndata : number of disks in new array's layout
8255 * 1 : there is no reshape in progress,
8257 ******************************************************************************/
8258 int wait_for_reshape_imsm(struct mdinfo
*sra
, unsigned long long to_complete
,
8261 int fd
= sysfs_get_fd(sra
, NULL
, "reshape_position");
8262 unsigned long long completed
;
8264 struct timeval timeout
;
8269 sysfs_fd_get_ll(fd
, &completed
);
8271 if (to_complete
== 0) {/* reshape till the end of array */
8272 sysfs_set_str(sra
, NULL
, "sync_max", "max");
8273 to_complete
= MaxSector
;
8275 if (completed
> to_complete
)
8277 if (sysfs_set_num(sra
, NULL
, "sync_max",
8278 to_complete
/ ndata
) != 0) {
8284 /* FIXME should not need a timeout at all */
8285 timeout
.tv_sec
= 30;
8286 timeout
.tv_usec
= 0;
8292 select(fd
+1, NULL
, NULL
, &rfds
, &timeout
);
8293 if (sysfs_fd_get_ll(fd
, &completed
) < 0) {
8297 if (sysfs_get_str(sra
, NULL
, "sync_action",
8299 strncmp(action
, "reshape", 7) != 0)
8301 } while (completed
< to_complete
);
8307 /*******************************************************************************
8308 * Function: check_degradation_change
8309 * Description: Check that array hasn't become failed.
8311 * info : for sysfs access
8312 * sources : source disks descriptors
8313 * degraded: previous degradation level
8316 ******************************************************************************/
8317 int check_degradation_change(struct mdinfo
*info
,
8321 unsigned long long new_degraded
;
8322 sysfs_get_ll(info
, NULL
, "degraded", &new_degraded
);
8323 if (new_degraded
!= (unsigned long long)degraded
) {
8324 /* check each device to ensure it is still working */
8327 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
8328 if (sd
->disk
.state
& (1<<MD_DISK_FAULTY
))
8330 if (sd
->disk
.state
& (1<<MD_DISK_SYNC
)) {
8332 if (sysfs_get_str(info
,
8333 sd
, "state", sbuf
, 20) < 0 ||
8334 strstr(sbuf
, "faulty") ||
8335 strstr(sbuf
, "in_sync") == NULL
) {
8336 /* this device is dead */
8337 sd
->disk
.state
= (1<<MD_DISK_FAULTY
);
8338 if (sd
->disk
.raid_disk
>= 0 &&
8339 sources
[sd
->disk
.raid_disk
] >= 0) {
8341 sd
->disk
.raid_disk
]);
8342 sources
[sd
->disk
.raid_disk
] =
8351 return new_degraded
;
8354 /*******************************************************************************
8355 * Function: imsm_manage_reshape
8356 * Description: Function finds array under reshape and it manages reshape
8357 * process. It creates stripes backups (if required) and sets
8360 * afd : Backup handle (nattive) - not used
8361 * sra : general array info
8362 * reshape : reshape parameters - not used
8363 * st : supertype structure
8364 * blocks : size of critical section [blocks]
8365 * fds : table of source device descriptor
8366 * offsets : start of array (offest per devices)
8368 * destfd : table of destination device descriptor
8369 * destoffsets : table of destination offsets (per device)
8371 * 1 : success, reshape is done
8373 ******************************************************************************/
8374 static int imsm_manage_reshape(
8375 int afd
, struct mdinfo
*sra
, struct reshape
*reshape
,
8376 struct supertype
*st
, unsigned long backup_blocks
,
8377 int *fds
, unsigned long long *offsets
,
8378 int dests
, int *destfd
, unsigned long long *destoffsets
)
8381 struct intel_super
*super
= st
->sb
;
8382 struct intel_dev
*dv
= NULL
;
8383 struct imsm_dev
*dev
= NULL
;
8384 struct imsm_map
*map_src
, *map_dest
;
8385 int migr_vol_qan
= 0;
8386 int ndata
, odata
; /* [bytes] */
8387 int chunk
; /* [bytes] */
8388 struct migr_record
*migr_rec
;
8390 unsigned int buf_size
; /* [bytes] */
8391 unsigned long long max_position
; /* array size [bytes] */
8392 unsigned long long next_step
; /* [blocks]/[bytes] */
8393 unsigned long long old_data_stripe_length
;
8394 unsigned long long new_data_stripe_length
;
8395 unsigned long long start_src
; /* [bytes] */
8396 unsigned long long start
; /* [bytes] */
8397 unsigned long long start_buf_shift
; /* [bytes] */
8400 if (!fds
|| !offsets
|| !destfd
|| !destoffsets
|| !sra
)
8403 /* Find volume during the reshape */
8404 for (dv
= super
->devlist
; dv
; dv
= dv
->next
) {
8405 if (dv
->dev
->vol
.migr_type
== MIGR_GEN_MIGR
8406 && dv
->dev
->vol
.migr_state
== 1) {
8411 /* Only one volume can migrate at the same time */
8412 if (migr_vol_qan
!= 1) {
8413 fprintf(stderr
, Name
" : %s", migr_vol_qan
?
8414 "Number of migrating volumes greater than 1\n" :
8415 "There is no volume during migrationg\n");
8419 map_src
= get_imsm_map(dev
, 1);
8420 if (map_src
== NULL
)
8422 map_dest
= get_imsm_map(dev
, 0);
8424 ndata
= imsm_num_data_members(dev
, 0);
8425 odata
= imsm_num_data_members(dev
, 1);
8427 chunk
= map_src
->blocks_per_strip
* 512;
8428 old_data_stripe_length
= odata
* chunk
;
8430 migr_rec
= super
->migr_rec
;
8433 sra
->new_chunk
= __le16_to_cpu(map_dest
->blocks_per_strip
) * 512;
8434 sra
->new_level
= map_dest
->raid_level
;
8435 new_data_stripe_length
= sra
->new_chunk
* ndata
;
8437 /* initialize migration record for start condition */
8438 if (sra
->reshape_progress
== 0)
8439 init_migr_record_imsm(st
, dev
, sra
);
8442 buf_size
= __le32_to_cpu(migr_rec
->blocks_per_unit
) * 512;
8443 /* extend buffer size for parity disk */
8444 buf_size
+= __le32_to_cpu(migr_rec
->dest_depth_per_unit
) * 512;
8445 /* add space for stripe aligment */
8446 buf_size
+= old_data_stripe_length
;
8447 if (posix_memalign((void **)&buf
, 4096, buf_size
)) {
8448 dprintf("imsm: Cannot allocate checpoint buffer\n");
8453 __le32_to_cpu(migr_rec
->post_migr_vol_cap
) +
8454 ((unsigned long long)__le32_to_cpu(
8455 migr_rec
->post_migr_vol_cap_hi
) << 32);
8457 while (__le32_to_cpu(migr_rec
->curr_migr_unit
) <
8458 __le32_to_cpu(migr_rec
->num_migr_units
)) {
8459 /* current reshape position [blocks] */
8460 unsigned long long current_position
=
8461 __le32_to_cpu(migr_rec
->blocks_per_unit
)
8462 * __le32_to_cpu(migr_rec
->curr_migr_unit
);
8463 unsigned long long border
;
8465 /* Check that array hasn't become failed.
8467 degraded
= check_degradation_change(sra
, fds
, degraded
);
8469 dprintf("imsm: Abort reshape due to degradation"
8470 " level (%i)\n", degraded
);
8474 next_step
= __le32_to_cpu(migr_rec
->blocks_per_unit
);
8476 if ((current_position
+ next_step
) > max_position
)
8477 next_step
= max_position
- current_position
;
8479 start
= (map_src
->pba_of_lba0
+ dev
->reserved_blocks
+
8480 current_position
) * 512;
8482 /* allign reading start to old geometry */
8483 start_buf_shift
= start
% old_data_stripe_length
;
8484 start_src
= start
- start_buf_shift
;
8486 border
= (start_src
/ odata
) - (start
/ ndata
);
8488 if (border
<= __le32_to_cpu(migr_rec
->dest_depth_per_unit
)) {
8489 /* save critical stripes to buf
8490 * start - start address of current unit
8492 * start_src - start address of current unit
8493 * to backup alligned to source array
8496 unsigned long long next_step_filler
= 0;
8497 unsigned long long copy_length
= next_step
* 512;
8499 /* allign copy area length to stripe in old geometry */
8500 next_step_filler
= ((copy_length
+ start_buf_shift
)
8501 % old_data_stripe_length
);
8502 if (next_step_filler
)
8503 next_step_filler
= (old_data_stripe_length
8504 - next_step_filler
);
8505 dprintf("save_stripes() parameters: start = %llu,"
8506 "\tstart_src = %llu,\tnext_step*512 = %llu,"
8507 "\tstart_in_buf_shift = %llu,"
8508 "\tnext_step_filler = %llu\n",
8509 start
, start_src
, copy_length
,
8510 start_buf_shift
, next_step_filler
);
8512 if (save_stripes(fds
, offsets
, map_src
->num_members
,
8513 chunk
, sra
->array
.level
,
8514 sra
->array
.layout
, 0, NULL
, start_src
,
8516 next_step_filler
+ start_buf_shift
,
8518 dprintf("imsm: Cannot save stripes"
8522 /* Convert data to destination format and store it
8523 * in backup general migration area
8525 if (save_backup_imsm(st
, dev
, sra
,
8526 buf
+ start_buf_shift
,
8527 ndata
, copy_length
)) {
8528 dprintf("imsm: Cannot save stripes to "
8529 "target devices\n");
8532 if (save_checkpoint_imsm(st
, sra
,
8533 UNIT_SRC_IN_CP_AREA
)) {
8534 dprintf("imsm: Cannot write checkpoint to "
8535 "migration record (UNIT_SRC_IN_CP_AREA)\n");
8538 /* decrease backup_blocks */
8539 if (backup_blocks
> (unsigned long)next_step
)
8540 backup_blocks
-= next_step
;
8544 /* When data backed up, checkpoint stored,
8545 * kick the kernel to reshape unit of data
8547 next_step
= next_step
+ sra
->reshape_progress
;
8548 sysfs_set_num(sra
, NULL
, "suspend_lo", sra
->reshape_progress
);
8549 sysfs_set_num(sra
, NULL
, "suspend_hi", next_step
);
8551 /* wait until reshape finish */
8552 if (wait_for_reshape_imsm(sra
, next_step
, ndata
) < 0)
8553 dprintf("wait_for_reshape_imsm returned error,"
8554 " but we ignore it!\n");
8556 sra
->reshape_progress
= next_step
;
8558 if (save_checkpoint_imsm(st
, sra
, UNIT_SRC_NORMAL
)) {
8559 dprintf("imsm: Cannot write checkpoint to "
8560 "migration record (UNIT_SRC_NORMAL)\n");
8566 /* return '1' if done */
8574 #endif /* MDASSEMBLE */
8576 struct superswitch super_imsm
= {
8578 .examine_super
= examine_super_imsm
,
8579 .brief_examine_super
= brief_examine_super_imsm
,
8580 .brief_examine_subarrays
= brief_examine_subarrays_imsm
,
8581 .export_examine_super
= export_examine_super_imsm
,
8582 .detail_super
= detail_super_imsm
,
8583 .brief_detail_super
= brief_detail_super_imsm
,
8584 .write_init_super
= write_init_super_imsm
,
8585 .validate_geometry
= validate_geometry_imsm
,
8586 .add_to_super
= add_to_super_imsm
,
8587 .remove_from_super
= remove_from_super_imsm
,
8588 .detail_platform
= detail_platform_imsm
,
8589 .kill_subarray
= kill_subarray_imsm
,
8590 .update_subarray
= update_subarray_imsm
,
8591 .load_container
= load_container_imsm
,
8592 .default_geometry
= default_geometry_imsm
,
8593 .get_disk_controller_domain
= imsm_get_disk_controller_domain
,
8594 .reshape_super
= imsm_reshape_super
,
8595 .manage_reshape
= imsm_manage_reshape
,
8597 .match_home
= match_home_imsm
,
8598 .uuid_from_super
= uuid_from_super_imsm
,
8599 .getinfo_super
= getinfo_super_imsm
,
8600 .getinfo_super_disks
= getinfo_super_disks_imsm
,
8601 .update_super
= update_super_imsm
,
8603 .avail_size
= avail_size_imsm
,
8604 .min_acceptable_spare_size
= min_acceptable_spare_size_imsm
,
8606 .compare_super
= compare_super_imsm
,
8608 .load_super
= load_super_imsm
,
8609 .init_super
= init_super_imsm
,
8610 .store_super
= store_super_imsm
,
8611 .free_super
= free_super_imsm
,
8612 .match_metadata_desc
= match_metadata_desc_imsm
,
8613 .container_content
= container_content_imsm
,
8620 .open_new
= imsm_open_new
,
8621 .set_array_state
= imsm_set_array_state
,
8622 .set_disk
= imsm_set_disk
,
8623 .sync_metadata
= imsm_sync_metadata
,
8624 .activate_spare
= imsm_activate_spare
,
8625 .process_update
= imsm_process_update
,
8626 .prepare_update
= imsm_prepare_update
,
8627 #endif /* MDASSEMBLE */