2 * mdadm - Intel(R) Matrix Storage Manager Support
4 * Copyright (C) 2002-2008 Intel Corporation
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 #define HAVE_STDINT_H 1
24 #include "platform-intel.h"
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
54 #define MPB_SECTOR_CNT 2210
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
61 __u8 serial
[MAX_RAID_SERIAL_LEN
];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks
; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id
; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status
; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num
; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler
[IMSM_DISK_FILLERS
]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
73 /* RAID map configuration infos. */
75 __u32 pba_of_lba0
; /* start address of partition */
76 __u32 blocks_per_member
;/* blocks per member */
77 __u32 num_data_stripes
; /* number of data stripes */
78 __u16 blocks_per_strip
;
79 __u8 map_state
; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members
; /* number of member disks */
89 __u8 num_domains
; /* number of parity domains */
90 __u8 failed_disk_num
; /* valid only when state is degraded */
92 __u32 filler
[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl
[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
97 } __attribute__ ((packed
));
100 __u32 curr_migr_unit
;
101 __u32 checkpoint_id
; /* id to access curr_migr_unit */
102 __u8 migr_state
; /* Normal or Migrating */
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type
; /* Initializing, Rebuilding, ... */
111 __u8 fs_state
; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors
; /* number of mismatches */
113 __u16 bad_blocks
; /* number of bad blocks during verify */
115 struct imsm_map map
[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed
));
120 __u8 volume
[MAX_RAID_SERIAL_LEN
];
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status
; /* Persistent RaidDev status */
137 __u32 reserved_blocks
; /* Reserved blocks at beginning of volume */
141 __u8 cng_master_disk
;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler
[IMSM_DEV_FILLERS
];
148 } __attribute__ ((packed
));
151 __u8 sig
[MAX_SIGNATURE_LENGTH
]; /* 0x00 - 0x1F */
152 __u32 check_sum
; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size
; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num
; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num
; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size
; /* 0x30 - 0x33 in bytes */
157 __u32 attributes
; /* 0x34 - 0x37 */
158 __u8 num_disks
; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs
; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos
; /* 0x3A */
161 __u8 fill
[1]; /* 0x3B */
162 __u32 cache_size
; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num
; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count
; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size
; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler
[IMSM_FILLERS
]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk
[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed
));
173 #define BBM_LOG_MAX_ENTRIES 254
175 struct bbm_log_entry
{
176 __u64 defective_block_start
;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset
;
179 __u16 remapped_marked_count
;
181 } __attribute__ ((__packed__
));
184 __u32 signature
; /* 0xABADB10C */
186 __u32 reserved_spare_block_count
; /* 0 */
187 __u32 reserved
; /* 0xFFFF */
188 __u64 first_spare_lba
;
189 struct bbm_log_entry mapped_block_entries
[BBM_LOG_MAX_ENTRIES
];
190 } __attribute__ ((__packed__
));
194 static char *map_state_str
[] = { "normal", "uninitialized", "degraded", "failed" };
197 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
199 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
201 #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203 #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
207 __u32 rec_status
; /* Status used to determine how to restart
208 * migration in case it aborts
210 __u32 curr_migr_unit
; /* 0..numMigrUnits-1 */
211 __u32 family_num
; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr
; /* True if migrating in increasing
216 __u32 blocks_per_unit
; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit
; /* Num member blocks each destMap
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba
; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba
; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units
; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap
; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi
; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num
; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230 } __attribute__ ((__packed__
));
232 static __u8
migr_type(struct imsm_dev
*dev
)
234 if (dev
->vol
.migr_type
== MIGR_VERIFY
&&
235 dev
->status
& DEV_VERIFY_AND_FIX
)
238 return dev
->vol
.migr_type
;
241 static void set_migr_type(struct imsm_dev
*dev
, __u8 migr_type
)
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
246 if (migr_type
== MIGR_REPAIR
) {
247 dev
->vol
.migr_type
= MIGR_VERIFY
;
248 dev
->status
|= DEV_VERIFY_AND_FIX
;
250 dev
->vol
.migr_type
= migr_type
;
251 dev
->status
&= ~DEV_VERIFY_AND_FIX
;
255 static unsigned int sector_count(__u32 bytes
)
257 return ((bytes
+ (512-1)) & (~(512-1))) / 512;
260 static unsigned int mpb_sectors(struct imsm_super
*mpb
)
262 return sector_count(__le32_to_cpu(mpb
->mpb_size
));
266 struct imsm_dev
*dev
;
267 struct intel_dev
*next
;
272 enum sys_dev_type type
;
275 struct intel_hba
*next
;
282 /* internal representation of IMSM metadata */
285 void *buf
; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super
*anchor
; /* immovable parameters */
289 void *migr_rec_buf
; /* buffer for I/O operations */
290 struct migr_record
*migr_rec
; /* migration record */
292 size_t len
; /* size of the 'buf' allocation */
293 void *next_buf
; /* for realloc'ing buf from the manager */
295 int updates_pending
; /* count of pending updates for mdmon */
296 int current_vol
; /* index of raid device undergoing creation */
297 __u32 create_offset
; /* common start for 'current_vol' */
298 __u32 random
; /* random data for seeding new family numbers */
299 struct intel_dev
*devlist
;
303 __u8 serial
[MAX_RAID_SERIAL_LEN
];
306 struct imsm_disk disk
;
309 struct extent
*e
; /* for determining freespace @ create */
310 int raiddisk
; /* slot to fill in autolayout */
313 struct dl
*disk_mgmt_list
; /* list of disks to add/remove while mdmon
315 struct dl
*missing
; /* disks removed while we weren't looking */
316 struct bbm_log
*bbm_log
;
317 struct intel_hba
*hba
; /* device path of the raid controller for this metadata */
318 const struct imsm_orom
*orom
; /* platform firmware support */
319 struct intel_super
*next
; /* (temp) list for disambiguating family_num */
323 struct imsm_disk disk
;
324 #define IMSM_UNKNOWN_OWNER (-1)
326 struct intel_disk
*next
;
330 unsigned long long start
, size
;
333 /* definitions of reshape process types */
334 enum imsm_reshape_type
{
339 /* definition of messages passed to imsm_process_update */
340 enum imsm_update_type
{
341 update_activate_spare
,
345 update_add_remove_disk
,
346 update_reshape_container_disks
,
347 update_reshape_migration
,
349 update_general_migration_checkpoint
,
352 struct imsm_update_activate_spare
{
353 enum imsm_update_type type
;
357 struct imsm_update_activate_spare
*next
;
370 enum takeover_direction
{
374 struct imsm_update_takeover
{
375 enum imsm_update_type type
;
377 enum takeover_direction direction
;
380 struct imsm_update_reshape
{
381 enum imsm_update_type type
;
385 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
388 struct imsm_update_reshape_migration
{
389 enum imsm_update_type type
;
392 /* fields for array migration changes
399 int new_disks
[1]; /* new_raid_disks - old_raid_disks makedev number */
402 struct imsm_update_general_migration_checkpoint
{
403 enum imsm_update_type type
;
404 __u32 curr_migr_unit
;
408 __u8 serial
[MAX_RAID_SERIAL_LEN
];
411 struct imsm_update_create_array
{
412 enum imsm_update_type type
;
417 struct imsm_update_kill_array
{
418 enum imsm_update_type type
;
422 struct imsm_update_rename_array
{
423 enum imsm_update_type type
;
424 __u8 name
[MAX_RAID_SERIAL_LEN
];
428 struct imsm_update_add_remove_disk
{
429 enum imsm_update_type type
;
433 static const char *_sys_dev_type
[] = {
434 [SYS_DEV_UNKNOWN
] = "Unknown",
435 [SYS_DEV_SAS
] = "SAS",
436 [SYS_DEV_SATA
] = "SATA"
439 const char *get_sys_dev_type(enum sys_dev_type type
)
441 if (type
>= SYS_DEV_MAX
)
442 type
= SYS_DEV_UNKNOWN
;
444 return _sys_dev_type
[type
];
447 static struct intel_hba
* alloc_intel_hba(struct sys_dev
*device
)
449 struct intel_hba
*result
= malloc(sizeof(*result
));
451 result
->type
= device
->type
;
452 result
->path
= strdup(device
->path
);
454 if (result
->path
&& (result
->pci_id
= strrchr(result
->path
, '/')) != NULL
)
460 static struct intel_hba
* find_intel_hba(struct intel_hba
*hba
, struct sys_dev
*device
)
462 struct intel_hba
*result
=NULL
;
463 for (result
= hba
; result
; result
= result
->next
) {
464 if (result
->type
== device
->type
&& strcmp(result
->path
, device
->path
) == 0)
470 static int attach_hba_to_super(struct intel_super
*super
, struct sys_dev
*device
)
472 struct intel_hba
*hba
;
474 /* check if disk attached to Intel HBA */
475 hba
= find_intel_hba(super
->hba
, device
);
478 /* Check if HBA is already attached to super */
479 if (super
->hba
== NULL
) {
480 super
->hba
= alloc_intel_hba(device
);
485 /* Intel metadata allows for all disks attached to the same type HBA.
486 * Do not sypport odf HBA types mixing
488 if (device
->type
!= hba
->type
)
494 hba
->next
= alloc_intel_hba(device
);
498 static struct sys_dev
* find_disk_attached_hba(int fd
, const char *devname
)
500 struct sys_dev
*list
, *elem
, *prev
;
503 if ((list
= find_intel_devices()) == NULL
)
507 disk_path
= (char *) devname
;
509 disk_path
= diskfd_to_devpath(fd
);
516 for (prev
= NULL
, elem
= list
; elem
; prev
= elem
, elem
= elem
->next
) {
517 if (path_attached_to_hba(disk_path
, elem
->path
)) {
521 prev
->next
= elem
->next
;
523 if (disk_path
!= devname
)
529 if (disk_path
!= devname
)
537 static int find_intel_hba_capability(int fd
, struct intel_super
*super
,
540 static struct supertype
*match_metadata_desc_imsm(char *arg
)
542 struct supertype
*st
;
544 if (strcmp(arg
, "imsm") != 0 &&
545 strcmp(arg
, "default") != 0
549 st
= malloc(sizeof(*st
));
552 memset(st
, 0, sizeof(*st
));
553 st
->container_dev
= NoMdDev
;
554 st
->ss
= &super_imsm
;
555 st
->max_devs
= IMSM_MAX_DEVICES
;
556 st
->minor_version
= 0;
562 static __u8
*get_imsm_version(struct imsm_super
*mpb
)
564 return &mpb
->sig
[MPB_SIG_LEN
];
568 /* retrieve a disk directly from the anchor when the anchor is known to be
569 * up-to-date, currently only at load time
571 static struct imsm_disk
*__get_imsm_disk(struct imsm_super
*mpb
, __u8 index
)
573 if (index
>= mpb
->num_disks
)
575 return &mpb
->disk
[index
];
578 /* retrieve the disk description based on a index of the disk
581 static struct dl
*get_imsm_dl_disk(struct intel_super
*super
, __u8 index
)
585 for (d
= super
->disks
; d
; d
= d
->next
)
586 if (d
->index
== index
)
591 /* retrieve a disk from the parsed metadata */
592 static struct imsm_disk
*get_imsm_disk(struct intel_super
*super
, __u8 index
)
596 dl
= get_imsm_dl_disk(super
, index
);
603 /* generate a checksum directly from the anchor when the anchor is known to be
604 * up-to-date, currently only at load or write_super after coalescing
606 static __u32
__gen_imsm_checksum(struct imsm_super
*mpb
)
608 __u32 end
= mpb
->mpb_size
/ sizeof(end
);
609 __u32
*p
= (__u32
*) mpb
;
613 sum
+= __le32_to_cpu(*p
);
617 return sum
- __le32_to_cpu(mpb
->check_sum
);
620 static size_t sizeof_imsm_map(struct imsm_map
*map
)
622 return sizeof(struct imsm_map
) + sizeof(__u32
) * (map
->num_members
- 1);
625 struct imsm_map
*get_imsm_map(struct imsm_dev
*dev
, int second_map
)
627 /* A device can have 2 maps if it is in the middle of a migration.
629 * 0 - we return the first map
630 * 1 - we return the second map if it exists, else NULL
631 * -1 - we return the second map if it exists, else the first
633 struct imsm_map
*map
= &dev
->vol
.map
[0];
635 if (second_map
== 1 && !dev
->vol
.migr_state
)
637 else if (second_map
== 1 ||
638 (second_map
< 0 && dev
->vol
.migr_state
)) {
641 return ptr
+ sizeof_imsm_map(map
);
647 /* return the size of the device.
648 * migr_state increases the returned size if map[0] were to be duplicated
650 static size_t sizeof_imsm_dev(struct imsm_dev
*dev
, int migr_state
)
652 size_t size
= sizeof(*dev
) - sizeof(struct imsm_map
) +
653 sizeof_imsm_map(get_imsm_map(dev
, 0));
655 /* migrating means an additional map */
656 if (dev
->vol
.migr_state
)
657 size
+= sizeof_imsm_map(get_imsm_map(dev
, 1));
659 size
+= sizeof_imsm_map(get_imsm_map(dev
, 0));
665 /* retrieve disk serial number list from a metadata update */
666 static struct disk_info
*get_disk_info(struct imsm_update_create_array
*update
)
669 struct disk_info
*inf
;
671 inf
= u
+ sizeof(*update
) - sizeof(struct imsm_dev
) +
672 sizeof_imsm_dev(&update
->dev
, 0);
678 static struct imsm_dev
*__get_imsm_dev(struct imsm_super
*mpb
, __u8 index
)
684 if (index
>= mpb
->num_raid_devs
)
687 /* devices start after all disks */
688 offset
= ((void *) &mpb
->disk
[mpb
->num_disks
]) - _mpb
;
690 for (i
= 0; i
<= index
; i
++)
692 return _mpb
+ offset
;
694 offset
+= sizeof_imsm_dev(_mpb
+ offset
, 0);
699 static struct imsm_dev
*get_imsm_dev(struct intel_super
*super
, __u8 index
)
701 struct intel_dev
*dv
;
703 if (index
>= super
->anchor
->num_raid_devs
)
705 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
706 if (dv
->index
== index
)
714 * == 1 get second map
715 * == -1 than get map according to the current migr_state
717 static __u32
get_imsm_ord_tbl_ent(struct imsm_dev
*dev
,
721 struct imsm_map
*map
;
723 map
= get_imsm_map(dev
, second_map
);
725 /* top byte identifies disk under rebuild */
726 return __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
729 #define ord_to_idx(ord) (((ord) << 8) >> 8)
730 static __u32
get_imsm_disk_idx(struct imsm_dev
*dev
, int slot
, int second_map
)
732 __u32 ord
= get_imsm_ord_tbl_ent(dev
, slot
, second_map
);
734 return ord_to_idx(ord
);
737 static void set_imsm_ord_tbl_ent(struct imsm_map
*map
, int slot
, __u32 ord
)
739 map
->disk_ord_tbl
[slot
] = __cpu_to_le32(ord
);
742 static int get_imsm_disk_slot(struct imsm_map
*map
, unsigned idx
)
747 for (slot
= 0; slot
< map
->num_members
; slot
++) {
748 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
749 if (ord_to_idx(ord
) == idx
)
756 static int get_imsm_raid_level(struct imsm_map
*map
)
758 if (map
->raid_level
== 1) {
759 if (map
->num_members
== 2)
765 return map
->raid_level
;
768 static int cmp_extent(const void *av
, const void *bv
)
770 const struct extent
*a
= av
;
771 const struct extent
*b
= bv
;
772 if (a
->start
< b
->start
)
774 if (a
->start
> b
->start
)
779 static int count_memberships(struct dl
*dl
, struct intel_super
*super
)
784 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
785 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
786 struct imsm_map
*map
= get_imsm_map(dev
, 0);
788 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
795 static struct extent
*get_extents(struct intel_super
*super
, struct dl
*dl
)
797 /* find a list of used extents on the given physical device */
798 struct extent
*rv
, *e
;
800 int memberships
= count_memberships(dl
, super
);
801 __u32 reservation
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
803 rv
= malloc(sizeof(struct extent
) * (memberships
+ 1));
808 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
809 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
810 struct imsm_map
*map
= get_imsm_map(dev
, 0);
812 if (get_imsm_disk_slot(map
, dl
->index
) >= 0) {
813 e
->start
= __le32_to_cpu(map
->pba_of_lba0
);
814 e
->size
= __le32_to_cpu(map
->blocks_per_member
);
818 qsort(rv
, memberships
, sizeof(*rv
), cmp_extent
);
820 /* determine the start of the metadata
821 * when no raid devices are defined use the default
822 * ...otherwise allow the metadata to truncate the value
823 * as is the case with older versions of imsm
826 struct extent
*last
= &rv
[memberships
- 1];
829 remainder
= __le32_to_cpu(dl
->disk
.total_blocks
) -
830 (last
->start
+ last
->size
);
831 /* round down to 1k block to satisfy precision of the kernel
835 /* make sure remainder is still sane */
836 if (remainder
< (unsigned)ROUND_UP(super
->len
, 512) >> 9)
837 remainder
= ROUND_UP(super
->len
, 512) >> 9;
838 if (reservation
> remainder
)
839 reservation
= remainder
;
841 e
->start
= __le32_to_cpu(dl
->disk
.total_blocks
) - reservation
;
846 /* try to determine how much space is reserved for metadata from
847 * the last get_extents() entry, otherwise fallback to the
850 static __u32
imsm_reserved_sectors(struct intel_super
*super
, struct dl
*dl
)
856 /* for spares just return a minimal reservation which will grow
857 * once the spare is picked up by an array
860 return MPB_SECTOR_CNT
;
862 e
= get_extents(super
, dl
);
864 return MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
866 /* scroll to last entry */
867 for (i
= 0; e
[i
].size
; i
++)
870 rv
= __le32_to_cpu(dl
->disk
.total_blocks
) - e
[i
].start
;
877 static int is_spare(struct imsm_disk
*disk
)
879 return (disk
->status
& SPARE_DISK
) == SPARE_DISK
;
882 static int is_configured(struct imsm_disk
*disk
)
884 return (disk
->status
& CONFIGURED_DISK
) == CONFIGURED_DISK
;
887 static int is_failed(struct imsm_disk
*disk
)
889 return (disk
->status
& FAILED_DISK
) == FAILED_DISK
;
892 /* Return minimum size of a spare that can be used in this array*/
893 static unsigned long long min_acceptable_spare_size_imsm(struct supertype
*st
)
895 struct intel_super
*super
= st
->sb
;
899 unsigned long long rv
= 0;
903 /* find first active disk in array */
905 while (dl
&& (is_failed(&dl
->disk
) || dl
->index
== -1))
909 /* find last lba used by subarrays */
910 e
= get_extents(super
, dl
);
913 for (i
= 0; e
[i
].size
; i
++)
916 rv
= e
[i
-1].start
+ e
[i
-1].size
;
918 /* add the amount of space needed for metadata */
919 rv
= rv
+ MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
924 static __u64
blocks_per_migr_unit(struct intel_super
*super
,
925 struct imsm_dev
*dev
);
927 static void print_imsm_dev(struct intel_super
*super
,
928 struct imsm_dev
*dev
,
934 struct imsm_map
*map
= get_imsm_map(dev
, 0);
935 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
939 printf("[%.16s]:\n", dev
->volume
);
940 printf(" UUID : %s\n", uuid
);
941 printf(" RAID Level : %d", get_imsm_raid_level(map
));
943 printf(" <-- %d", get_imsm_raid_level(map2
));
945 printf(" Members : %d", map
->num_members
);
947 printf(" <-- %d", map2
->num_members
);
949 printf(" Slots : [");
950 for (i
= 0; i
< map
->num_members
; i
++) {
951 ord
= get_imsm_ord_tbl_ent(dev
, i
, 0);
952 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
957 for (i
= 0; i
< map2
->num_members
; i
++) {
958 ord
= get_imsm_ord_tbl_ent(dev
, i
, 1);
959 printf("%s", ord
& IMSM_ORD_REBUILD
? "_" : "U");
964 printf(" Failed disk : ");
965 if (map
->failed_disk_num
== 0xff)
968 printf("%i", map
->failed_disk_num
);
970 slot
= get_imsm_disk_slot(map
, disk_idx
);
972 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
973 printf(" This Slot : %d%s\n", slot
,
974 ord
& IMSM_ORD_REBUILD
? " (out-of-sync)" : "");
976 printf(" This Slot : ?\n");
977 sz
= __le32_to_cpu(dev
->size_high
);
979 sz
+= __le32_to_cpu(dev
->size_low
);
980 printf(" Array Size : %llu%s\n", (unsigned long long)sz
,
981 human_size(sz
* 512));
982 sz
= __le32_to_cpu(map
->blocks_per_member
);
983 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz
,
984 human_size(sz
* 512));
985 printf(" Sector Offset : %u\n",
986 __le32_to_cpu(map
->pba_of_lba0
));
987 printf(" Num Stripes : %u\n",
988 __le32_to_cpu(map
->num_data_stripes
));
989 printf(" Chunk Size : %u KiB",
990 __le16_to_cpu(map
->blocks_per_strip
) / 2);
992 printf(" <-- %u KiB",
993 __le16_to_cpu(map2
->blocks_per_strip
) / 2);
995 printf(" Reserved : %d\n", __le32_to_cpu(dev
->reserved_blocks
));
996 printf(" Migrate State : ");
997 if (dev
->vol
.migr_state
) {
998 if (migr_type(dev
) == MIGR_INIT
)
999 printf("initialize\n");
1000 else if (migr_type(dev
) == MIGR_REBUILD
)
1001 printf("rebuild\n");
1002 else if (migr_type(dev
) == MIGR_VERIFY
)
1004 else if (migr_type(dev
) == MIGR_GEN_MIGR
)
1005 printf("general migration\n");
1006 else if (migr_type(dev
) == MIGR_STATE_CHANGE
)
1007 printf("state change\n");
1008 else if (migr_type(dev
) == MIGR_REPAIR
)
1011 printf("<unknown:%d>\n", migr_type(dev
));
1014 printf(" Map State : %s", map_state_str
[map
->map_state
]);
1015 if (dev
->vol
.migr_state
) {
1016 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1018 printf(" <-- %s", map_state_str
[map
->map_state
]);
1019 printf("\n Checkpoint : %u (%llu)",
1020 __le32_to_cpu(dev
->vol
.curr_migr_unit
),
1021 (unsigned long long)blocks_per_migr_unit(super
, dev
));
1024 printf(" Dirty State : %s\n", dev
->vol
.dirty
? "dirty" : "clean");
1027 static void print_imsm_disk(struct imsm_super
*mpb
, int index
, __u32 reserved
)
1029 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, index
);
1030 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1033 if (index
< 0 || !disk
)
1037 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1038 printf(" Disk%02d Serial : %s\n", index
, str
);
1039 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1040 is_configured(disk
) ? " active" : "",
1041 is_failed(disk
) ? " failed" : "");
1042 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1043 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1044 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1045 human_size(sz
* 512));
1048 static int is_gen_migration(struct imsm_dev
*dev
);
1050 void examine_migr_rec_imsm(struct intel_super
*super
)
1052 struct migr_record
*migr_rec
= super
->migr_rec
;
1053 struct imsm_super
*mpb
= super
->anchor
;
1056 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1057 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
1058 if (is_gen_migration(dev
) == 0)
1061 printf("\nMigration Record Information:");
1062 if (super
->disks
->index
> 1) {
1063 printf(" Empty\n ");
1064 printf("Examine one of first two disks in array\n");
1067 printf("\n Status : ");
1068 if (__le32_to_cpu(migr_rec
->rec_status
) == UNIT_SRC_NORMAL
)
1071 printf("Contains Data\n");
1072 printf(" Current Unit : %u\n",
1073 __le32_to_cpu(migr_rec
->curr_migr_unit
));
1074 printf(" Family : %u\n",
1075 __le32_to_cpu(migr_rec
->family_num
));
1076 printf(" Ascending : %u\n",
1077 __le32_to_cpu(migr_rec
->ascending_migr
));
1078 printf(" Blocks Per Unit : %u\n",
1079 __le32_to_cpu(migr_rec
->blocks_per_unit
));
1080 printf(" Dest. Depth Per Unit : %u\n",
1081 __le32_to_cpu(migr_rec
->dest_depth_per_unit
));
1082 printf(" Checkpoint Area pba : %u\n",
1083 __le32_to_cpu(migr_rec
->ckpt_area_pba
));
1084 printf(" First member lba : %u\n",
1085 __le32_to_cpu(migr_rec
->dest_1st_member_lba
));
1086 printf(" Total Number of Units : %u\n",
1087 __le32_to_cpu(migr_rec
->num_migr_units
));
1088 printf(" Size of volume : %u\n",
1089 __le32_to_cpu(migr_rec
->post_migr_vol_cap
));
1090 printf(" Expansion space for LBA64 : %u\n",
1091 __le32_to_cpu(migr_rec
->post_migr_vol_cap_hi
));
1092 printf(" Record was read from : %u\n",
1093 __le32_to_cpu(migr_rec
->ckpt_read_disk_num
));
1099 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
);
1101 static void examine_super_imsm(struct supertype
*st
, char *homehost
)
1103 struct intel_super
*super
= st
->sb
;
1104 struct imsm_super
*mpb
= super
->anchor
;
1105 char str
[MAX_SIGNATURE_LENGTH
];
1110 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
1113 snprintf(str
, MPB_SIG_LEN
, "%s", mpb
->sig
);
1114 printf(" Magic : %s\n", str
);
1115 snprintf(str
, strlen(MPB_VERSION_RAID0
), "%s", get_imsm_version(mpb
));
1116 printf(" Version : %s\n", get_imsm_version(mpb
));
1117 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb
->orig_family_num
));
1118 printf(" Family : %08x\n", __le32_to_cpu(mpb
->family_num
));
1119 printf(" Generation : %08x\n", __le32_to_cpu(mpb
->generation_num
));
1120 getinfo_super_imsm(st
, &info
, NULL
);
1121 fname_from_uuid(st
, &info
, nbuf
, ':');
1122 printf(" UUID : %s\n", nbuf
+ 5);
1123 sum
= __le32_to_cpu(mpb
->check_sum
);
1124 printf(" Checksum : %08x %s\n", sum
,
1125 __gen_imsm_checksum(mpb
) == sum
? "correct" : "incorrect");
1126 printf(" MPB Sectors : %d\n", mpb_sectors(mpb
));
1127 printf(" Disks : %d\n", mpb
->num_disks
);
1128 printf(" RAID Devices : %d\n", mpb
->num_raid_devs
);
1129 print_imsm_disk(mpb
, super
->disks
->index
, reserved
);
1130 if (super
->bbm_log
) {
1131 struct bbm_log
*log
= super
->bbm_log
;
1134 printf("Bad Block Management Log:\n");
1135 printf(" Log Size : %d\n", __le32_to_cpu(mpb
->bbm_log_size
));
1136 printf(" Signature : %x\n", __le32_to_cpu(log
->signature
));
1137 printf(" Entry Count : %d\n", __le32_to_cpu(log
->entry_count
));
1138 printf(" Spare Blocks : %d\n", __le32_to_cpu(log
->reserved_spare_block_count
));
1139 printf(" First Spare : %llx\n",
1140 (unsigned long long) __le64_to_cpu(log
->first_spare_lba
));
1142 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
1144 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
1146 super
->current_vol
= i
;
1147 getinfo_super_imsm(st
, &info
, NULL
);
1148 fname_from_uuid(st
, &info
, nbuf
, ':');
1149 print_imsm_dev(super
, dev
, nbuf
+ 5, super
->disks
->index
);
1151 for (i
= 0; i
< mpb
->num_disks
; i
++) {
1152 if (i
== super
->disks
->index
)
1154 print_imsm_disk(mpb
, i
, reserved
);
1156 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1157 struct imsm_disk
*disk
;
1158 char str
[MAX_RAID_SERIAL_LEN
+ 1];
1166 snprintf(str
, MAX_RAID_SERIAL_LEN
+ 1, "%s", disk
->serial
);
1167 printf(" Disk Serial : %s\n", str
);
1168 printf(" State :%s%s%s\n", is_spare(disk
) ? " spare" : "",
1169 is_configured(disk
) ? " active" : "",
1170 is_failed(disk
) ? " failed" : "");
1171 printf(" Id : %08x\n", __le32_to_cpu(disk
->scsi_id
));
1172 sz
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
1173 printf(" Usable Size : %llu%s\n", (unsigned long long)sz
,
1174 human_size(sz
* 512));
1177 examine_migr_rec_imsm(super
);
1180 static void brief_examine_super_imsm(struct supertype
*st
, int verbose
)
1182 /* We just write a generic IMSM ARRAY entry */
1185 struct intel_super
*super
= st
->sb
;
1187 if (!super
->anchor
->num_raid_devs
) {
1188 printf("ARRAY metadata=imsm\n");
1192 getinfo_super_imsm(st
, &info
, NULL
);
1193 fname_from_uuid(st
, &info
, nbuf
, ':');
1194 printf("ARRAY metadata=imsm UUID=%s\n", nbuf
+ 5);
1197 static void brief_examine_subarrays_imsm(struct supertype
*st
, int verbose
)
1199 /* We just write a generic IMSM ARRAY entry */
1203 struct intel_super
*super
= st
->sb
;
1206 if (!super
->anchor
->num_raid_devs
)
1209 getinfo_super_imsm(st
, &info
, NULL
);
1210 fname_from_uuid(st
, &info
, nbuf
, ':');
1211 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
1212 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
1214 super
->current_vol
= i
;
1215 getinfo_super_imsm(st
, &info
, NULL
);
1216 fname_from_uuid(st
, &info
, nbuf1
, ':');
1217 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1218 dev
->volume
, nbuf
+ 5, i
, nbuf1
+ 5);
1222 static void export_examine_super_imsm(struct supertype
*st
)
1224 struct intel_super
*super
= st
->sb
;
1225 struct imsm_super
*mpb
= super
->anchor
;
1229 getinfo_super_imsm(st
, &info
, NULL
);
1230 fname_from_uuid(st
, &info
, nbuf
, ':');
1231 printf("MD_METADATA=imsm\n");
1232 printf("MD_LEVEL=container\n");
1233 printf("MD_UUID=%s\n", nbuf
+5);
1234 printf("MD_DEVICES=%u\n", mpb
->num_disks
);
1237 static void detail_super_imsm(struct supertype
*st
, char *homehost
)
1242 getinfo_super_imsm(st
, &info
, NULL
);
1243 fname_from_uuid(st
, &info
, nbuf
, ':');
1244 printf("\n UUID : %s\n", nbuf
+ 5);
1247 static void brief_detail_super_imsm(struct supertype
*st
)
1251 getinfo_super_imsm(st
, &info
, NULL
);
1252 fname_from_uuid(st
, &info
, nbuf
, ':');
1253 printf(" UUID=%s", nbuf
+ 5);
1256 static int imsm_read_serial(int fd
, char *devname
, __u8
*serial
);
1257 static void fd2devname(int fd
, char *name
);
1259 static int ahci_enumerate_ports(const char *hba_path
, int port_count
, int host_base
, int verbose
)
1261 /* dump an unsorted list of devices attached to AHCI Intel storage
1262 * controller, as well as non-connected ports
1264 int hba_len
= strlen(hba_path
) + 1;
1269 unsigned long port_mask
= (1 << port_count
) - 1;
1271 if (port_count
> (int)sizeof(port_mask
) * 8) {
1273 fprintf(stderr
, Name
": port_count %d out of range\n", port_count
);
1277 /* scroll through /sys/dev/block looking for devices attached to
1280 dir
= opendir("/sys/dev/block");
1281 for (ent
= dir
? readdir(dir
) : NULL
; ent
; ent
= readdir(dir
)) {
1292 if (sscanf(ent
->d_name
, "%d:%d", &major
, &minor
) != 2)
1294 path
= devt_to_devpath(makedev(major
, minor
));
1297 if (!path_attached_to_hba(path
, hba_path
)) {
1303 /* retrieve the scsi device type */
1304 if (asprintf(&device
, "/sys/dev/block/%d:%d/device/xxxxxxx", major
, minor
) < 0) {
1306 fprintf(stderr
, Name
": failed to allocate 'device'\n");
1310 sprintf(device
, "/sys/dev/block/%d:%d/device/type", major
, minor
);
1311 if (load_sys(device
, buf
) != 0) {
1313 fprintf(stderr
, Name
": failed to read device type for %s\n",
1319 type
= strtoul(buf
, NULL
, 10);
1321 /* if it's not a disk print the vendor and model */
1322 if (!(type
== 0 || type
== 7 || type
== 14)) {
1325 sprintf(device
, "/sys/dev/block/%d:%d/device/vendor", major
, minor
);
1326 if (load_sys(device
, buf
) == 0) {
1327 strncpy(vendor
, buf
, sizeof(vendor
));
1328 vendor
[sizeof(vendor
) - 1] = '\0';
1329 c
= (char *) &vendor
[sizeof(vendor
) - 1];
1330 while (isspace(*c
) || *c
== '\0')
1334 sprintf(device
, "/sys/dev/block/%d:%d/device/model", major
, minor
);
1335 if (load_sys(device
, buf
) == 0) {
1336 strncpy(model
, buf
, sizeof(model
));
1337 model
[sizeof(model
) - 1] = '\0';
1338 c
= (char *) &model
[sizeof(model
) - 1];
1339 while (isspace(*c
) || *c
== '\0')
1343 if (vendor
[0] && model
[0])
1344 sprintf(buf
, "%.64s %.64s", vendor
, model
);
1346 switch (type
) { /* numbers from hald/linux/device.c */
1347 case 1: sprintf(buf
, "tape"); break;
1348 case 2: sprintf(buf
, "printer"); break;
1349 case 3: sprintf(buf
, "processor"); break;
1351 case 5: sprintf(buf
, "cdrom"); break;
1352 case 6: sprintf(buf
, "scanner"); break;
1353 case 8: sprintf(buf
, "media_changer"); break;
1354 case 9: sprintf(buf
, "comm"); break;
1355 case 12: sprintf(buf
, "raid"); break;
1356 default: sprintf(buf
, "unknown");
1362 /* chop device path to 'host%d' and calculate the port number */
1363 c
= strchr(&path
[hba_len
], '/');
1366 fprintf(stderr
, Name
": %s - invalid path name\n", path
+ hba_len
);
1371 if (sscanf(&path
[hba_len
], "host%d", &port
) == 1)
1375 *c
= '/'; /* repair the full string */
1376 fprintf(stderr
, Name
": failed to determine port number for %s\n",
1383 /* mark this port as used */
1384 port_mask
&= ~(1 << port
);
1386 /* print out the device information */
1388 printf(" Port%d : - non-disk device (%s) -\n", port
, buf
);
1392 fd
= dev_open(ent
->d_name
, O_RDONLY
);
1394 printf(" Port%d : - disk info unavailable -\n", port
);
1396 fd2devname(fd
, buf
);
1397 printf(" Port%d : %s", port
, buf
);
1398 if (imsm_read_serial(fd
, NULL
, (__u8
*) buf
) == 0)
1399 printf(" (%s)\n", buf
);
1414 for (i
= 0; i
< port_count
; i
++)
1415 if (port_mask
& (1 << i
))
1416 printf(" Port%d : - no device attached -\n", i
);
1424 static void print_found_intel_controllers(struct sys_dev
*elem
)
1426 for (; elem
; elem
= elem
->next
) {
1427 fprintf(stderr
, Name
": found Intel(R) ");
1428 if (elem
->type
== SYS_DEV_SATA
)
1429 fprintf(stderr
, "SATA ");
1430 else if (elem
->type
== SYS_DEV_SAS
)
1431 fprintf(stderr
, "SAS ");
1432 fprintf(stderr
, "RAID controller");
1434 fprintf(stderr
, " at %s", elem
->pci_id
);
1435 fprintf(stderr
, ".\n");
1440 static int ahci_get_port_count(const char *hba_path
, int *port_count
)
1447 if ((dir
= opendir(hba_path
)) == NULL
)
1450 for (ent
= readdir(dir
); ent
; ent
= readdir(dir
)) {
1453 if (sscanf(ent
->d_name
, "host%d", &host
) != 1)
1455 if (*port_count
== 0)
1457 else if (host
< host_base
)
1460 if (host
+ 1 > *port_count
+ host_base
)
1461 *port_count
= host
+ 1 - host_base
;
1467 static void print_imsm_capability(const struct imsm_orom
*orom
)
1469 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1470 printf(" Version : %d.%d.%d.%d\n", orom
->major_ver
, orom
->minor_ver
,
1471 orom
->hotfix_ver
, orom
->build
);
1472 printf(" RAID Levels :%s%s%s%s%s\n",
1473 imsm_orom_has_raid0(orom
) ? " raid0" : "",
1474 imsm_orom_has_raid1(orom
) ? " raid1" : "",
1475 imsm_orom_has_raid1e(orom
) ? " raid1e" : "",
1476 imsm_orom_has_raid10(orom
) ? " raid10" : "",
1477 imsm_orom_has_raid5(orom
) ? " raid5" : "");
1478 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1479 imsm_orom_has_chunk(orom
, 2) ? " 2k" : "",
1480 imsm_orom_has_chunk(orom
, 4) ? " 4k" : "",
1481 imsm_orom_has_chunk(orom
, 8) ? " 8k" : "",
1482 imsm_orom_has_chunk(orom
, 16) ? " 16k" : "",
1483 imsm_orom_has_chunk(orom
, 32) ? " 32k" : "",
1484 imsm_orom_has_chunk(orom
, 64) ? " 64k" : "",
1485 imsm_orom_has_chunk(orom
, 128) ? " 128k" : "",
1486 imsm_orom_has_chunk(orom
, 256) ? " 256k" : "",
1487 imsm_orom_has_chunk(orom
, 512) ? " 512k" : "",
1488 imsm_orom_has_chunk(orom
, 1024*1) ? " 1M" : "",
1489 imsm_orom_has_chunk(orom
, 1024*2) ? " 2M" : "",
1490 imsm_orom_has_chunk(orom
, 1024*4) ? " 4M" : "",
1491 imsm_orom_has_chunk(orom
, 1024*8) ? " 8M" : "",
1492 imsm_orom_has_chunk(orom
, 1024*16) ? " 16M" : "",
1493 imsm_orom_has_chunk(orom
, 1024*32) ? " 32M" : "",
1494 imsm_orom_has_chunk(orom
, 1024*64) ? " 64M" : "");
1495 printf(" Max Disks : %d\n", orom
->tds
);
1496 printf(" Max Volumes : %d\n", orom
->vpa
);
1500 static int detail_platform_imsm(int verbose
, int enumerate_only
)
1502 /* There are two components to imsm platform support, the ahci SATA
1503 * controller and the option-rom. To find the SATA controller we
1504 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1505 * controller with the Intel vendor id is present. This approach
1506 * allows mdadm to leverage the kernel's ahci detection logic, with the
1507 * caveat that if ahci.ko is not loaded mdadm will not be able to
1508 * detect platform raid capabilities. The option-rom resides in a
1509 * platform "Adapter ROM". We scan for its signature to retrieve the
1510 * platform capabilities. If raid support is disabled in the BIOS the
1511 * option-rom capability structure will not be available.
1513 const struct imsm_orom
*orom
;
1514 struct sys_dev
*list
, *hba
;
1519 if (enumerate_only
) {
1520 if (check_env("IMSM_NO_PLATFORM"))
1522 list
= find_intel_devices();
1525 for (hba
= list
; hba
; hba
= hba
->next
) {
1526 orom
= find_imsm_capability(hba
->type
);
1532 free_sys_dev(&list
);
1536 list
= find_intel_devices();
1539 fprintf(stderr
, Name
": no active Intel(R) RAID "
1540 "controller found.\n");
1541 free_sys_dev(&list
);
1544 print_found_intel_controllers(list
);
1546 for (hba
= list
; hba
; hba
= hba
->next
) {
1547 orom
= find_imsm_capability(hba
->type
);
1549 fprintf(stderr
, Name
": imsm capabilities not found for controller: %s (type %s)\n",
1550 hba
->path
, get_sys_dev_type(hba
->type
));
1552 print_imsm_capability(orom
);
1555 for (hba
= list
; hba
; hba
= hba
->next
) {
1556 printf(" I/O Controller : %s (%s)\n",
1557 hba
->path
, get_sys_dev_type(hba
->type
));
1559 if (hba
->type
== SYS_DEV_SATA
) {
1560 host_base
= ahci_get_port_count(hba
->path
, &port_count
);
1561 if (ahci_enumerate_ports(hba
->path
, port_count
, host_base
, verbose
)) {
1563 fprintf(stderr
, Name
": failed to enumerate "
1564 "ports on SATA controller at %s.", hba
->pci_id
);
1570 free_sys_dev(&list
);
1575 static int match_home_imsm(struct supertype
*st
, char *homehost
)
1577 /* the imsm metadata format does not specify any host
1578 * identification information. We return -1 since we can never
1579 * confirm nor deny whether a given array is "meant" for this
1580 * host. We rely on compare_super and the 'family_num' fields to
1581 * exclude member disks that do not belong, and we rely on
1582 * mdadm.conf to specify the arrays that should be assembled.
1583 * Auto-assembly may still pick up "foreign" arrays.
1589 static void uuid_from_super_imsm(struct supertype
*st
, int uuid
[4])
1591 /* The uuid returned here is used for:
1592 * uuid to put into bitmap file (Create, Grow)
1593 * uuid for backup header when saving critical section (Grow)
1594 * comparing uuids when re-adding a device into an array
1595 * In these cases the uuid required is that of the data-array,
1596 * not the device-set.
1597 * uuid to recognise same set when adding a missing device back
1598 * to an array. This is a uuid for the device-set.
1600 * For each of these we can make do with a truncated
1601 * or hashed uuid rather than the original, as long as
1603 * In each case the uuid required is that of the data-array,
1604 * not the device-set.
1606 /* imsm does not track uuid's so we synthesis one using sha1 on
1607 * - The signature (Which is constant for all imsm array, but no matter)
1608 * - the orig_family_num of the container
1609 * - the index number of the volume
1610 * - the 'serial' number of the volume.
1611 * Hopefully these are all constant.
1613 struct intel_super
*super
= st
->sb
;
1616 struct sha1_ctx ctx
;
1617 struct imsm_dev
*dev
= NULL
;
1620 /* some mdadm versions failed to set ->orig_family_num, in which
1621 * case fall back to ->family_num. orig_family_num will be
1622 * fixed up with the first metadata update.
1624 family_num
= super
->anchor
->orig_family_num
;
1625 if (family_num
== 0)
1626 family_num
= super
->anchor
->family_num
;
1627 sha1_init_ctx(&ctx
);
1628 sha1_process_bytes(super
->anchor
->sig
, MPB_SIG_LEN
, &ctx
);
1629 sha1_process_bytes(&family_num
, sizeof(__u32
), &ctx
);
1630 if (super
->current_vol
>= 0)
1631 dev
= get_imsm_dev(super
, super
->current_vol
);
1633 __u32 vol
= super
->current_vol
;
1634 sha1_process_bytes(&vol
, sizeof(vol
), &ctx
);
1635 sha1_process_bytes(dev
->volume
, MAX_RAID_SERIAL_LEN
, &ctx
);
1637 sha1_finish_ctx(&ctx
, buf
);
1638 memcpy(uuid
, buf
, 4*4);
1643 get_imsm_numerical_version(struct imsm_super
*mpb
, int *m
, int *p
)
1645 __u8
*v
= get_imsm_version(mpb
);
1646 __u8
*end
= mpb
->sig
+ MAX_SIGNATURE_LENGTH
;
1647 char major
[] = { 0, 0, 0 };
1648 char minor
[] = { 0 ,0, 0 };
1649 char patch
[] = { 0, 0, 0 };
1650 char *ver_parse
[] = { major
, minor
, patch
};
1654 while (*v
!= '\0' && v
< end
) {
1655 if (*v
!= '.' && j
< 2)
1656 ver_parse
[i
][j
++] = *v
;
1664 *m
= strtol(minor
, NULL
, 0);
1665 *p
= strtol(patch
, NULL
, 0);
1669 static __u32
migr_strip_blocks_resync(struct imsm_dev
*dev
)
1671 /* migr_strip_size when repairing or initializing parity */
1672 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1673 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1675 switch (get_imsm_raid_level(map
)) {
1680 return 128*1024 >> 9;
1684 static __u32
migr_strip_blocks_rebuild(struct imsm_dev
*dev
)
1686 /* migr_strip_size when rebuilding a degraded disk, no idea why
1687 * this is different than migr_strip_size_resync(), but it's good
1690 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1691 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1693 switch (get_imsm_raid_level(map
)) {
1696 if (map
->num_members
% map
->num_domains
== 0)
1697 return 128*1024 >> 9;
1701 return max((__u32
) 64*1024 >> 9, chunk
);
1703 return 128*1024 >> 9;
1707 static __u32
num_stripes_per_unit_resync(struct imsm_dev
*dev
)
1709 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1710 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1711 __u32 lo_chunk
= __le32_to_cpu(lo
->blocks_per_strip
);
1712 __u32 hi_chunk
= __le32_to_cpu(hi
->blocks_per_strip
);
1714 return max((__u32
) 1, hi_chunk
/ lo_chunk
);
1717 static __u32
num_stripes_per_unit_rebuild(struct imsm_dev
*dev
)
1719 struct imsm_map
*lo
= get_imsm_map(dev
, 0);
1720 int level
= get_imsm_raid_level(lo
);
1722 if (level
== 1 || level
== 10) {
1723 struct imsm_map
*hi
= get_imsm_map(dev
, 1);
1725 return hi
->num_domains
;
1727 return num_stripes_per_unit_resync(dev
);
1730 static __u8
imsm_num_data_members(struct imsm_dev
*dev
, int second_map
)
1732 /* named 'imsm_' because raid0, raid1 and raid10
1733 * counter-intuitively have the same number of data disks
1735 struct imsm_map
*map
= get_imsm_map(dev
, second_map
);
1737 switch (get_imsm_raid_level(map
)) {
1741 return map
->num_members
;
1743 return map
->num_members
- 1;
1745 dprintf("%s: unsupported raid level\n", __func__
);
1750 static __u32
parity_segment_depth(struct imsm_dev
*dev
)
1752 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1753 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1755 switch(get_imsm_raid_level(map
)) {
1758 return chunk
* map
->num_domains
;
1760 return chunk
* map
->num_members
;
1766 static __u32
map_migr_block(struct imsm_dev
*dev
, __u32 block
)
1768 struct imsm_map
*map
= get_imsm_map(dev
, 1);
1769 __u32 chunk
= __le32_to_cpu(map
->blocks_per_strip
);
1770 __u32 strip
= block
/ chunk
;
1772 switch (get_imsm_raid_level(map
)) {
1775 __u32 vol_strip
= (strip
* map
->num_domains
) + 1;
1776 __u32 vol_stripe
= vol_strip
/ map
->num_members
;
1778 return vol_stripe
* chunk
+ block
% chunk
;
1780 __u32 stripe
= strip
/ (map
->num_members
- 1);
1782 return stripe
* chunk
+ block
% chunk
;
1789 static __u64
blocks_per_migr_unit(struct intel_super
*super
,
1790 struct imsm_dev
*dev
)
1792 /* calculate the conversion factor between per member 'blocks'
1793 * (md/{resync,rebuild}_start) and imsm migration units, return
1794 * 0 for the 'not migrating' and 'unsupported migration' cases
1796 if (!dev
->vol
.migr_state
)
1799 switch (migr_type(dev
)) {
1800 case MIGR_GEN_MIGR
: {
1801 struct migr_record
*migr_rec
= super
->migr_rec
;
1802 return __le32_to_cpu(migr_rec
->blocks_per_unit
);
1807 struct imsm_map
*map
= get_imsm_map(dev
, 0);
1808 __u32 stripes_per_unit
;
1809 __u32 blocks_per_unit
;
1818 /* yes, this is really the translation of migr_units to
1819 * per-member blocks in the 'resync' case
1821 stripes_per_unit
= num_stripes_per_unit_resync(dev
);
1822 migr_chunk
= migr_strip_blocks_resync(dev
);
1823 disks
= imsm_num_data_members(dev
, 0);
1824 blocks_per_unit
= stripes_per_unit
* migr_chunk
* disks
;
1825 stripe
= __le32_to_cpu(map
->blocks_per_strip
) * disks
;
1826 segment
= blocks_per_unit
/ stripe
;
1827 block_rel
= blocks_per_unit
- segment
* stripe
;
1828 parity_depth
= parity_segment_depth(dev
);
1829 block_map
= map_migr_block(dev
, block_rel
);
1830 return block_map
+ parity_depth
* segment
;
1832 case MIGR_REBUILD
: {
1833 __u32 stripes_per_unit
;
1836 stripes_per_unit
= num_stripes_per_unit_rebuild(dev
);
1837 migr_chunk
= migr_strip_blocks_rebuild(dev
);
1838 return migr_chunk
* stripes_per_unit
;
1840 case MIGR_STATE_CHANGE
:
1846 static int imsm_level_to_layout(int level
)
1854 return ALGORITHM_LEFT_ASYMMETRIC
;
1861 /*******************************************************************************
1862 * Function: read_imsm_migr_rec
1863 * Description: Function reads imsm migration record from last sector of disk
1865 * fd : disk descriptor
1866 * super : metadata info
1870 ******************************************************************************/
1871 static int read_imsm_migr_rec(int fd
, struct intel_super
*super
)
1874 unsigned long long dsize
;
1876 get_dev_size(fd
, NULL
, &dsize
);
1877 if (lseek64(fd
, dsize
- 512, SEEK_SET
) < 0) {
1879 Name
": Cannot seek to anchor block: %s\n",
1883 if (read(fd
, super
->migr_rec_buf
, 512) != 512) {
1885 Name
": Cannot read migr record block: %s\n",
1895 /*******************************************************************************
1896 * Function: load_imsm_migr_rec
1897 * Description: Function reads imsm migration record (it is stored at the last
1900 * super : imsm internal array info
1901 * info : general array info
1905 ******************************************************************************/
1906 static int load_imsm_migr_rec(struct intel_super
*super
, struct mdinfo
*info
)
1909 struct dl
*dl
= NULL
;
1915 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
1916 /* read only from one of the first two slots */
1917 if ((sd
->disk
.raid_disk
> 1) ||
1918 (sd
->disk
.raid_disk
< 0))
1920 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
1921 fd
= dev_open(nm
, O_RDONLY
);
1927 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
1928 /* read only from one of the first two slots */
1931 sprintf(nm
, "%d:%d", dl
->major
, dl
->minor
);
1932 fd
= dev_open(nm
, O_RDONLY
);
1939 retval
= read_imsm_migr_rec(fd
, super
);
1947 /*******************************************************************************
1948 * function: imsm_create_metadata_checkpoint_update
1949 * Description: It creates update for checkpoint change.
1951 * super : imsm internal array info
1952 * u : pointer to prepared update
1955 * If length is equal to 0, input pointer u contains no update
1956 ******************************************************************************/
1957 static int imsm_create_metadata_checkpoint_update(
1958 struct intel_super
*super
,
1959 struct imsm_update_general_migration_checkpoint
**u
)
1962 int update_memory_size
= 0;
1964 dprintf("imsm_create_metadata_checkpoint_update(enter)\n");
1970 /* size of all update data without anchor */
1971 update_memory_size
=
1972 sizeof(struct imsm_update_general_migration_checkpoint
);
1974 *u
= calloc(1, update_memory_size
);
1976 dprintf("error: cannot get memory for "
1977 "imsm_create_metadata_checkpoint_update update\n");
1980 (*u
)->type
= update_general_migration_checkpoint
;
1981 (*u
)->curr_migr_unit
= __le32_to_cpu(super
->migr_rec
->curr_migr_unit
);
1982 dprintf("imsm_create_metadata_checkpoint_update: prepared for %u\n",
1983 (*u
)->curr_migr_unit
);
1985 return update_memory_size
;
1989 static void imsm_update_metadata_locally(struct supertype
*st
,
1990 void *buf
, int len
);
1992 /*******************************************************************************
1993 * Function: write_imsm_migr_rec
1994 * Description: Function writes imsm migration record
1995 * (at the last sector of disk)
1997 * super : imsm internal array info
2001 ******************************************************************************/
2002 static int write_imsm_migr_rec(struct supertype
*st
)
2004 struct intel_super
*super
= st
->sb
;
2005 unsigned long long dsize
;
2011 struct imsm_update_general_migration_checkpoint
*u
;
2013 for (sd
= super
->disks
; sd
; sd
= sd
->next
) {
2014 /* write to 2 first slots only */
2015 if ((sd
->index
< 0) || (sd
->index
> 1))
2017 sprintf(nm
, "%d:%d", sd
->major
, sd
->minor
);
2018 fd
= dev_open(nm
, O_RDWR
);
2021 get_dev_size(fd
, NULL
, &dsize
);
2022 if (lseek64(fd
, dsize
- 512, SEEK_SET
) < 0) {
2024 Name
": Cannot seek to anchor block: %s\n",
2028 if (write(fd
, super
->migr_rec_buf
, 512) != 512) {
2030 Name
": Cannot write migr record block: %s\n",
2037 /* update checkpoint information in metadata */
2038 len
= imsm_create_metadata_checkpoint_update(super
, &u
);
2041 dprintf("imsm: Cannot prepare update\n");
2044 /* update metadata locally */
2045 imsm_update_metadata_locally(st
, u
, len
);
2046 /* and possibly remotely */
2047 if (st
->update_tail
) {
2048 append_metadata_update(st
, u
, len
);
2049 /* during reshape we do all work inside metadata handler
2050 * manage_reshape(), so metadata update has to be triggered
2053 flush_metadata_updates(st
);
2054 st
->update_tail
= &st
->updates
;
2065 static void getinfo_super_imsm_volume(struct supertype
*st
, struct mdinfo
*info
, char *dmap
)
2067 struct intel_super
*super
= st
->sb
;
2068 struct migr_record
*migr_rec
= super
->migr_rec
;
2069 struct imsm_dev
*dev
= get_imsm_dev(super
, super
->current_vol
);
2070 struct imsm_map
*map
= get_imsm_map(dev
, 0);
2071 struct imsm_map
*prev_map
= get_imsm_map(dev
, 1);
2072 struct imsm_map
*map_to_analyse
= map
;
2075 unsigned int component_size_alligment
;
2076 int map_disks
= info
->array
.raid_disks
;
2078 memset(info
, 0, sizeof(*info
));
2080 map_to_analyse
= prev_map
;
2082 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
2083 if (dl
->raiddisk
== info
->disk
.raid_disk
)
2085 info
->container_member
= super
->current_vol
;
2086 info
->array
.raid_disks
= map
->num_members
;
2087 info
->array
.level
= get_imsm_raid_level(map_to_analyse
);
2088 info
->array
.layout
= imsm_level_to_layout(info
->array
.level
);
2089 info
->array
.md_minor
= -1;
2090 info
->array
.ctime
= 0;
2091 info
->array
.utime
= 0;
2092 info
->array
.chunk_size
=
2093 __le16_to_cpu(map_to_analyse
->blocks_per_strip
) << 9;
2094 info
->array
.state
= !dev
->vol
.dirty
;
2095 info
->custom_array_size
= __le32_to_cpu(dev
->size_high
);
2096 info
->custom_array_size
<<= 32;
2097 info
->custom_array_size
|= __le32_to_cpu(dev
->size_low
);
2098 if (prev_map
&& map
->map_state
== prev_map
->map_state
) {
2099 info
->reshape_active
= 1;
2100 info
->new_level
= get_imsm_raid_level(map
);
2101 info
->new_layout
= imsm_level_to_layout(info
->new_level
);
2102 info
->new_chunk
= __le16_to_cpu(map
->blocks_per_strip
) << 9;
2103 info
->delta_disks
= map
->num_members
- prev_map
->num_members
;
2104 if (info
->delta_disks
) {
2105 /* this needs to be applied to every array
2108 info
->reshape_active
= 2;
2110 /* We shape information that we give to md might have to be
2111 * modify to cope with md's requirement for reshaping arrays.
2112 * For example, when reshaping a RAID0, md requires it to be
2113 * presented as a degraded RAID4.
2114 * Also if a RAID0 is migrating to a RAID5 we need to specify
2115 * the array as already being RAID5, but the 'before' layout
2116 * is a RAID4-like layout.
2118 switch (info
->array
.level
) {
2120 switch(info
->new_level
) {
2122 /* conversion is happening as RAID4 */
2123 info
->array
.level
= 4;
2124 info
->array
.raid_disks
+= 1;
2127 /* conversion is happening as RAID5 */
2128 info
->array
.level
= 5;
2129 info
->array
.layout
= ALGORITHM_PARITY_N
;
2130 info
->array
.raid_disks
+= 1;
2131 info
->delta_disks
-= 1;
2134 /* FIXME error message */
2135 info
->array
.level
= UnSet
;
2141 info
->new_level
= UnSet
;
2142 info
->new_layout
= UnSet
;
2143 info
->new_chunk
= info
->array
.chunk_size
;
2144 info
->delta_disks
= 0;
2146 info
->disk
.major
= 0;
2147 info
->disk
.minor
= 0;
2149 info
->disk
.major
= dl
->major
;
2150 info
->disk
.minor
= dl
->minor
;
2153 info
->data_offset
= __le32_to_cpu(map_to_analyse
->pba_of_lba0
);
2154 info
->component_size
=
2155 __le32_to_cpu(map_to_analyse
->blocks_per_member
);
2157 /* check component size aligment
2159 component_size_alligment
=
2160 info
->component_size
% (info
->array
.chunk_size
/512);
2162 if (component_size_alligment
&&
2163 (info
->array
.level
!= 1) && (info
->array
.level
!= UnSet
)) {
2164 dprintf("imsm: reported component size alligned from %llu ",
2165 info
->component_size
);
2166 info
->component_size
-= component_size_alligment
;
2167 dprintf("to %llu (%i).\n",
2168 info
->component_size
, component_size_alligment
);
2171 memset(info
->uuid
, 0, sizeof(info
->uuid
));
2172 info
->recovery_start
= MaxSector
;
2174 info
->reshape_progress
= 0;
2175 info
->resync_start
= MaxSector
;
2176 if (map_to_analyse
->map_state
== IMSM_T_STATE_UNINITIALIZED
||
2178 info
->resync_start
= 0;
2180 if (dev
->vol
.migr_state
) {
2181 switch (migr_type(dev
)) {
2184 __u64 blocks_per_unit
= blocks_per_migr_unit(super
,
2186 __u64 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
2188 info
->resync_start
= blocks_per_unit
* units
;
2191 case MIGR_GEN_MIGR
: {
2192 __u64 blocks_per_unit
= blocks_per_migr_unit(super
,
2194 __u64 units
= __le32_to_cpu(migr_rec
->curr_migr_unit
);
2195 unsigned long long array_blocks
;
2198 info
->reshape_progress
= blocks_per_unit
* units
;
2200 dprintf("IMSM: General Migration checkpoint : %llu "
2201 "(%llu) -> read reshape progress : %llu\n",
2202 units
, blocks_per_unit
, info
->reshape_progress
);
2204 used_disks
= imsm_num_data_members(dev
, 1);
2205 if (used_disks
> 0) {
2206 array_blocks
= map
->blocks_per_member
*
2208 /* round array size down to closest MB
2210 info
->custom_array_size
= (array_blocks
2211 >> SECT_PER_MB_SHIFT
)
2212 << SECT_PER_MB_SHIFT
;
2216 /* we could emulate the checkpointing of
2217 * 'sync_action=check' migrations, but for now
2218 * we just immediately complete them
2221 /* this is handled by container_content_imsm() */
2222 case MIGR_STATE_CHANGE
:
2223 /* FIXME handle other migrations */
2225 /* we are not dirty, so... */
2226 info
->resync_start
= MaxSector
;
2230 strncpy(info
->name
, (char *) dev
->volume
, MAX_RAID_SERIAL_LEN
);
2231 info
->name
[MAX_RAID_SERIAL_LEN
] = 0;
2233 info
->array
.major_version
= -1;
2234 info
->array
.minor_version
= -2;
2235 devname
= devnum2devname(st
->container_dev
);
2236 *info
->text_version
= '\0';
2238 sprintf(info
->text_version
, "/%s/%d", devname
, info
->container_member
);
2240 info
->safe_mode_delay
= 4000; /* 4 secs like the Matrix driver */
2241 uuid_from_super_imsm(st
, info
->uuid
);
2245 for (i
=0; i
<map_disks
; i
++) {
2247 if (i
< info
->array
.raid_disks
) {
2248 struct imsm_disk
*dsk
;
2249 j
= get_imsm_disk_idx(dev
, i
, -1);
2250 dsk
= get_imsm_disk(super
, j
);
2251 if (dsk
&& (dsk
->status
& CONFIGURED_DISK
))
2258 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
);
2259 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
);
2261 static struct imsm_disk
*get_imsm_missing(struct intel_super
*super
, __u8 index
)
2265 for (d
= super
->missing
; d
; d
= d
->next
)
2266 if (d
->index
== index
)
2271 static void getinfo_super_imsm(struct supertype
*st
, struct mdinfo
*info
, char *map
)
2273 struct intel_super
*super
= st
->sb
;
2274 struct imsm_disk
*disk
;
2275 int map_disks
= info
->array
.raid_disks
;
2276 int max_enough
= -1;
2278 struct imsm_super
*mpb
;
2280 if (super
->current_vol
>= 0) {
2281 getinfo_super_imsm_volume(st
, info
, map
);
2284 memset(info
, 0, sizeof(*info
));
2286 /* Set raid_disks to zero so that Assemble will always pull in valid
2289 info
->array
.raid_disks
= 0;
2290 info
->array
.level
= LEVEL_CONTAINER
;
2291 info
->array
.layout
= 0;
2292 info
->array
.md_minor
= -1;
2293 info
->array
.ctime
= 0; /* N/A for imsm */
2294 info
->array
.utime
= 0;
2295 info
->array
.chunk_size
= 0;
2297 info
->disk
.major
= 0;
2298 info
->disk
.minor
= 0;
2299 info
->disk
.raid_disk
= -1;
2300 info
->reshape_active
= 0;
2301 info
->array
.major_version
= -1;
2302 info
->array
.minor_version
= -2;
2303 strcpy(info
->text_version
, "imsm");
2304 info
->safe_mode_delay
= 0;
2305 info
->disk
.number
= -1;
2306 info
->disk
.state
= 0;
2308 info
->recovery_start
= MaxSector
;
2310 /* do we have the all the insync disks that we expect? */
2311 mpb
= super
->anchor
;
2313 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
2314 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
2315 int failed
, enough
, j
, missing
= 0;
2316 struct imsm_map
*map
;
2319 failed
= imsm_count_failed(super
, dev
);
2320 state
= imsm_check_degraded(super
, dev
, failed
);
2321 map
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2323 /* any newly missing disks?
2324 * (catches single-degraded vs double-degraded)
2326 for (j
= 0; j
< map
->num_members
; j
++) {
2327 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
2328 __u32 idx
= ord_to_idx(ord
);
2330 if (!(ord
& IMSM_ORD_REBUILD
) &&
2331 get_imsm_missing(super
, idx
)) {
2337 if (state
== IMSM_T_STATE_FAILED
)
2339 else if (state
== IMSM_T_STATE_DEGRADED
&&
2340 (state
!= map
->map_state
|| missing
))
2342 else /* we're normal, or already degraded */
2345 /* in the missing/failed disk case check to see
2346 * if at least one array is runnable
2348 max_enough
= max(max_enough
, enough
);
2350 dprintf("%s: enough: %d\n", __func__
, max_enough
);
2351 info
->container_enough
= max_enough
;
2354 __u32 reserved
= imsm_reserved_sectors(super
, super
->disks
);
2356 disk
= &super
->disks
->disk
;
2357 info
->data_offset
= __le32_to_cpu(disk
->total_blocks
) - reserved
;
2358 info
->component_size
= reserved
;
2359 info
->disk
.state
= is_configured(disk
) ? (1 << MD_DISK_ACTIVE
) : 0;
2360 /* we don't change info->disk.raid_disk here because
2361 * this state will be finalized in mdmon after we have
2362 * found the 'most fresh' version of the metadata
2364 info
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2365 info
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2368 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2369 * ->compare_super may have updated the 'num_raid_devs' field for spares
2371 if (info
->disk
.state
& (1 << MD_DISK_SYNC
) || super
->anchor
->num_raid_devs
)
2372 uuid_from_super_imsm(st
, info
->uuid
);
2374 memcpy(info
->uuid
, uuid_zero
, sizeof(uuid_zero
));
2376 /* I don't know how to compute 'map' on imsm, so use safe default */
2379 for (i
= 0; i
< map_disks
; i
++)
2385 /* allocates memory and fills disk in mdinfo structure
2386 * for each disk in array */
2387 struct mdinfo
*getinfo_super_disks_imsm(struct supertype
*st
)
2389 struct mdinfo
*mddev
= NULL
;
2390 struct intel_super
*super
= st
->sb
;
2391 struct imsm_disk
*disk
;
2394 if (!super
|| !super
->disks
)
2397 mddev
= malloc(sizeof(*mddev
));
2399 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2402 memset(mddev
, 0, sizeof(*mddev
));
2406 tmp
= malloc(sizeof(*tmp
));
2408 fprintf(stderr
, Name
": Failed to allocate memory.\n");
2413 memset(tmp
, 0, sizeof(*tmp
));
2415 tmp
->next
= mddev
->devs
;
2417 tmp
->disk
.number
= count
++;
2418 tmp
->disk
.major
= dl
->major
;
2419 tmp
->disk
.minor
= dl
->minor
;
2420 tmp
->disk
.state
= is_configured(disk
) ?
2421 (1 << MD_DISK_ACTIVE
) : 0;
2422 tmp
->disk
.state
|= is_failed(disk
) ? (1 << MD_DISK_FAULTY
) : 0;
2423 tmp
->disk
.state
|= is_spare(disk
) ? 0 : (1 << MD_DISK_SYNC
);
2424 tmp
->disk
.raid_disk
= -1;
2430 static int update_super_imsm(struct supertype
*st
, struct mdinfo
*info
,
2431 char *update
, char *devname
, int verbose
,
2432 int uuid_set
, char *homehost
)
2434 /* For 'assemble' and 'force' we need to return non-zero if any
2435 * change was made. For others, the return value is ignored.
2436 * Update options are:
2437 * force-one : This device looks a bit old but needs to be included,
2438 * update age info appropriately.
2439 * assemble: clear any 'faulty' flag to allow this device to
2441 * force-array: Array is degraded but being forced, mark it clean
2442 * if that will be needed to assemble it.
2444 * newdev: not used ????
2445 * grow: Array has gained a new device - this is currently for
2447 * resync: mark as dirty so a resync will happen.
2448 * name: update the name - preserving the homehost
2449 * uuid: Change the uuid of the array to match watch is given
2451 * Following are not relevant for this imsm:
2452 * sparc2.2 : update from old dodgey metadata
2453 * super-minor: change the preferred_minor number
2454 * summaries: update redundant counters.
2455 * homehost: update the recorded homehost
2456 * _reshape_progress: record new reshape_progress position.
2459 struct intel_super
*super
= st
->sb
;
2460 struct imsm_super
*mpb
;
2462 /* we can only update container info */
2463 if (!super
|| super
->current_vol
>= 0 || !super
->anchor
)
2466 mpb
= super
->anchor
;
2468 if (strcmp(update
, "uuid") == 0 && uuid_set
&& !info
->update_private
)
2470 else if (strcmp(update
, "uuid") == 0 && uuid_set
&& info
->update_private
) {
2471 mpb
->orig_family_num
= *((__u32
*) info
->update_private
);
2473 } else if (strcmp(update
, "uuid") == 0) {
2474 __u32
*new_family
= malloc(sizeof(*new_family
));
2476 /* update orig_family_number with the incoming random
2477 * data, report the new effective uuid, and store the
2478 * new orig_family_num for future updates.
2481 memcpy(&mpb
->orig_family_num
, info
->uuid
, sizeof(__u32
));
2482 uuid_from_super_imsm(st
, info
->uuid
);
2483 *new_family
= mpb
->orig_family_num
;
2484 info
->update_private
= new_family
;
2487 } else if (strcmp(update
, "assemble") == 0)
2492 /* successful update? recompute checksum */
2494 mpb
->check_sum
= __le32_to_cpu(__gen_imsm_checksum(mpb
));
2499 static size_t disks_to_mpb_size(int disks
)
2503 size
= sizeof(struct imsm_super
);
2504 size
+= (disks
- 1) * sizeof(struct imsm_disk
);
2505 size
+= 2 * sizeof(struct imsm_dev
);
2506 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2507 size
+= (4 - 2) * sizeof(struct imsm_map
);
2508 /* 4 possible disk_ord_tbl's */
2509 size
+= 4 * (disks
- 1) * sizeof(__u32
);
2514 static __u64
avail_size_imsm(struct supertype
*st
, __u64 devsize
)
2516 if (devsize
< (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
))
2519 return devsize
- (MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
);
2522 static void free_devlist(struct intel_super
*super
)
2524 struct intel_dev
*dv
;
2526 while (super
->devlist
) {
2527 dv
= super
->devlist
->next
;
2528 free(super
->devlist
->dev
);
2529 free(super
->devlist
);
2530 super
->devlist
= dv
;
2534 static void imsm_copy_dev(struct imsm_dev
*dest
, struct imsm_dev
*src
)
2536 memcpy(dest
, src
, sizeof_imsm_dev(src
, 0));
2539 static int compare_super_imsm(struct supertype
*st
, struct supertype
*tst
)
2543 * 0 same, or first was empty, and second was copied
2544 * 1 second had wrong number
2546 * 3 wrong other info
2548 struct intel_super
*first
= st
->sb
;
2549 struct intel_super
*sec
= tst
->sb
;
2556 /* in platform dependent environment test if the disks
2557 * use the same Intel hba
2559 if (!check_env("IMSM_NO_PLATFORM")) {
2560 if (!first
->hba
|| !sec
->hba
||
2561 (first
->hba
->type
!= sec
->hba
->type
)) {
2563 "HBAs of devices does not match %s != %s\n",
2564 first
->hba
? get_sys_dev_type(first
->hba
->type
) : NULL
,
2565 sec
->hba
? get_sys_dev_type(sec
->hba
->type
) : NULL
);
2570 /* if an anchor does not have num_raid_devs set then it is a free
2573 if (first
->anchor
->num_raid_devs
> 0 &&
2574 sec
->anchor
->num_raid_devs
> 0) {
2575 /* Determine if these disks might ever have been
2576 * related. Further disambiguation can only take place
2577 * in load_super_imsm_all
2579 __u32 first_family
= first
->anchor
->orig_family_num
;
2580 __u32 sec_family
= sec
->anchor
->orig_family_num
;
2582 if (memcmp(first
->anchor
->sig
, sec
->anchor
->sig
,
2583 MAX_SIGNATURE_LENGTH
) != 0)
2586 if (first_family
== 0)
2587 first_family
= first
->anchor
->family_num
;
2588 if (sec_family
== 0)
2589 sec_family
= sec
->anchor
->family_num
;
2591 if (first_family
!= sec_family
)
2597 /* if 'first' is a spare promote it to a populated mpb with sec's
2600 if (first
->anchor
->num_raid_devs
== 0 &&
2601 sec
->anchor
->num_raid_devs
> 0) {
2603 struct intel_dev
*dv
;
2604 struct imsm_dev
*dev
;
2606 /* we need to copy raid device info from sec if an allocation
2607 * fails here we don't associate the spare
2609 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++) {
2610 dv
= malloc(sizeof(*dv
));
2613 dev
= malloc(sizeof_imsm_dev(get_imsm_dev(sec
, i
), 1));
2620 dv
->next
= first
->devlist
;
2621 first
->devlist
= dv
;
2623 if (i
< sec
->anchor
->num_raid_devs
) {
2624 /* allocation failure */
2625 free_devlist(first
);
2626 fprintf(stderr
, "imsm: failed to associate spare\n");
2629 first
->anchor
->num_raid_devs
= sec
->anchor
->num_raid_devs
;
2630 first
->anchor
->orig_family_num
= sec
->anchor
->orig_family_num
;
2631 first
->anchor
->family_num
= sec
->anchor
->family_num
;
2632 memcpy(first
->anchor
->sig
, sec
->anchor
->sig
, MAX_SIGNATURE_LENGTH
);
2633 for (i
= 0; i
< sec
->anchor
->num_raid_devs
; i
++)
2634 imsm_copy_dev(get_imsm_dev(first
, i
), get_imsm_dev(sec
, i
));
2640 static void fd2devname(int fd
, char *name
)
2644 char dname
[PATH_MAX
];
2649 if (fstat(fd
, &st
) != 0)
2651 sprintf(path
, "/sys/dev/block/%d:%d",
2652 major(st
.st_rdev
), minor(st
.st_rdev
));
2654 rv
= readlink(path
, dname
, sizeof(dname
));
2659 nm
= strrchr(dname
, '/');
2661 snprintf(name
, MAX_RAID_SERIAL_LEN
, "/dev/%s", nm
);
2664 extern int scsi_get_serial(int fd
, void *buf
, size_t buf_len
);
2666 static int imsm_read_serial(int fd
, char *devname
,
2667 __u8 serial
[MAX_RAID_SERIAL_LEN
])
2669 unsigned char scsi_serial
[255];
2678 memset(scsi_serial
, 0, sizeof(scsi_serial
));
2680 rv
= scsi_get_serial(fd
, scsi_serial
, sizeof(scsi_serial
));
2682 if (rv
&& check_env("IMSM_DEVNAME_AS_SERIAL")) {
2683 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2684 fd2devname(fd
, (char *) serial
);
2691 Name
": Failed to retrieve serial for %s\n",
2696 rsp_len
= scsi_serial
[3];
2700 Name
": Failed to retrieve serial for %s\n",
2704 rsp_buf
= (char *) &scsi_serial
[4];
2706 /* trim all whitespace and non-printable characters and convert
2709 for (i
= 0, dest
= rsp_buf
; i
< rsp_len
; i
++) {
2712 /* ':' is reserved for use in placeholder serial
2713 * numbers for missing disks
2721 len
= dest
- rsp_buf
;
2724 /* truncate leading characters */
2725 if (len
> MAX_RAID_SERIAL_LEN
) {
2726 dest
+= len
- MAX_RAID_SERIAL_LEN
;
2727 len
= MAX_RAID_SERIAL_LEN
;
2730 memset(serial
, 0, MAX_RAID_SERIAL_LEN
);
2731 memcpy(serial
, dest
, len
);
2736 static int serialcmp(__u8
*s1
, __u8
*s2
)
2738 return strncmp((char *) s1
, (char *) s2
, MAX_RAID_SERIAL_LEN
);
2741 static void serialcpy(__u8
*dest
, __u8
*src
)
2743 strncpy((char *) dest
, (char *) src
, MAX_RAID_SERIAL_LEN
);
2747 static struct dl
*serial_to_dl(__u8
*serial
, struct intel_super
*super
)
2751 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
2752 if (serialcmp(dl
->serial
, serial
) == 0)
2759 static struct imsm_disk
*
2760 __serial_to_disk(__u8
*serial
, struct imsm_super
*mpb
, int *idx
)
2764 for (i
= 0; i
< mpb
->num_disks
; i
++) {
2765 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
2767 if (serialcmp(disk
->serial
, serial
) == 0) {
2778 load_imsm_disk(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
2780 struct imsm_disk
*disk
;
2785 __u8 serial
[MAX_RAID_SERIAL_LEN
];
2787 rv
= imsm_read_serial(fd
, devname
, serial
);
2792 dl
= calloc(1, sizeof(*dl
));
2796 Name
": failed to allocate disk buffer for %s\n",
2802 dl
->major
= major(stb
.st_rdev
);
2803 dl
->minor
= minor(stb
.st_rdev
);
2804 dl
->next
= super
->disks
;
2805 dl
->fd
= keep_fd
? fd
: -1;
2806 assert(super
->disks
== NULL
);
2808 serialcpy(dl
->serial
, serial
);
2811 fd2devname(fd
, name
);
2813 dl
->devname
= strdup(devname
);
2815 dl
->devname
= strdup(name
);
2817 /* look up this disk's index in the current anchor */
2818 disk
= __serial_to_disk(dl
->serial
, super
->anchor
, &dl
->index
);
2821 /* only set index on disks that are a member of a
2822 * populated contianer, i.e. one with raid_devs
2824 if (is_failed(&dl
->disk
))
2826 else if (is_spare(&dl
->disk
))
2834 /* When migrating map0 contains the 'destination' state while map1
2835 * contains the current state. When not migrating map0 contains the
2836 * current state. This routine assumes that map[0].map_state is set to
2837 * the current array state before being called.
2839 * Migration is indicated by one of the following states
2840 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2841 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2842 * map1state=unitialized)
2843 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2845 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2846 * map1state=degraded)
2847 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2850 static void migrate(struct imsm_dev
*dev
, struct intel_super
*super
,
2851 __u8 to_state
, int migr_type
)
2853 struct imsm_map
*dest
;
2854 struct imsm_map
*src
= get_imsm_map(dev
, 0);
2856 dev
->vol
.migr_state
= 1;
2857 set_migr_type(dev
, migr_type
);
2858 dev
->vol
.curr_migr_unit
= 0;
2859 dest
= get_imsm_map(dev
, 1);
2861 /* duplicate and then set the target end state in map[0] */
2862 memcpy(dest
, src
, sizeof_imsm_map(src
));
2863 if ((migr_type
== MIGR_REBUILD
) ||
2864 (migr_type
== MIGR_GEN_MIGR
)) {
2868 for (i
= 0; i
< src
->num_members
; i
++) {
2869 ord
= __le32_to_cpu(src
->disk_ord_tbl
[i
]);
2870 set_imsm_ord_tbl_ent(src
, i
, ord_to_idx(ord
));
2874 if (migr_type
== MIGR_GEN_MIGR
)
2875 /* Clear migration record */
2876 memset(super
->migr_rec
, 0, sizeof(struct migr_record
));
2878 src
->map_state
= to_state
;
2881 static void end_migration(struct imsm_dev
*dev
, __u8 map_state
)
2883 struct imsm_map
*map
= get_imsm_map(dev
, 0);
2884 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
2887 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2888 * completed in the last migration.
2890 * FIXME add support for raid-level-migration
2892 for (i
= 0; i
< prev
->num_members
; i
++)
2893 for (j
= 0; j
< map
->num_members
; j
++)
2894 /* during online capacity expansion
2895 * disks position can be changed if takeover is used
2897 if (ord_to_idx(map
->disk_ord_tbl
[j
]) ==
2898 ord_to_idx(prev
->disk_ord_tbl
[i
])) {
2899 map
->disk_ord_tbl
[j
] |= prev
->disk_ord_tbl
[i
];
2903 dev
->vol
.migr_state
= 0;
2904 dev
->vol
.migr_type
= 0;
2905 dev
->vol
.curr_migr_unit
= 0;
2906 map
->map_state
= map_state
;
2910 static int parse_raid_devices(struct intel_super
*super
)
2913 struct imsm_dev
*dev_new
;
2914 size_t len
, len_migr
;
2916 size_t space_needed
= 0;
2917 struct imsm_super
*mpb
= super
->anchor
;
2919 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2920 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
2921 struct intel_dev
*dv
;
2923 len
= sizeof_imsm_dev(dev_iter
, 0);
2924 len_migr
= sizeof_imsm_dev(dev_iter
, 1);
2926 space_needed
+= len_migr
- len
;
2928 dv
= malloc(sizeof(*dv
));
2931 if (max_len
< len_migr
)
2933 if (max_len
> len_migr
)
2934 space_needed
+= max_len
- len_migr
;
2935 dev_new
= malloc(max_len
);
2940 imsm_copy_dev(dev_new
, dev_iter
);
2943 dv
->next
= super
->devlist
;
2944 super
->devlist
= dv
;
2947 /* ensure that super->buf is large enough when all raid devices
2950 if (__le32_to_cpu(mpb
->mpb_size
) + space_needed
> super
->len
) {
2953 len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + space_needed
, 512);
2954 if (posix_memalign(&buf
, 512, len
) != 0)
2957 memcpy(buf
, super
->buf
, super
->len
);
2958 memset(buf
+ super
->len
, 0, len
- super
->len
);
2967 /* retrieve a pointer to the bbm log which starts after all raid devices */
2968 struct bbm_log
*__get_imsm_bbm_log(struct imsm_super
*mpb
)
2972 if (__le32_to_cpu(mpb
->bbm_log_size
)) {
2974 ptr
+= mpb
->mpb_size
- __le32_to_cpu(mpb
->bbm_log_size
);
2980 /*******************************************************************************
2981 * Function: check_mpb_migr_compatibility
2982 * Description: Function checks for unsupported migration features:
2983 * - migration optimization area (pba_of_lba0)
2984 * - descending reshape (ascending_migr)
2986 * super : imsm metadata information
2988 * 0 : migration is compatible
2989 * -1 : migration is not compatible
2990 ******************************************************************************/
2991 int check_mpb_migr_compatibility(struct intel_super
*super
)
2993 struct imsm_map
*map0
, *map1
;
2994 struct migr_record
*migr_rec
= super
->migr_rec
;
2997 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
2998 struct imsm_dev
*dev_iter
= __get_imsm_dev(super
->anchor
, i
);
3001 dev_iter
->vol
.migr_state
== 1 &&
3002 dev_iter
->vol
.migr_type
== MIGR_GEN_MIGR
) {
3003 /* This device is migrating */
3004 map0
= get_imsm_map(dev_iter
, 0);
3005 map1
= get_imsm_map(dev_iter
, 1);
3006 if (map0
->pba_of_lba0
!= map1
->pba_of_lba0
)
3007 /* migration optimization area was used */
3009 if (migr_rec
->ascending_migr
== 0
3010 && migr_rec
->dest_depth_per_unit
> 0)
3011 /* descending reshape not supported yet */
3018 static void __free_imsm(struct intel_super
*super
, int free_disks
);
3020 /* load_imsm_mpb - read matrix metadata
3021 * allocates super->mpb to be freed by free_imsm
3023 static int load_imsm_mpb(int fd
, struct intel_super
*super
, char *devname
)
3025 unsigned long long dsize
;
3026 unsigned long long sectors
;
3028 struct imsm_super
*anchor
;
3031 get_dev_size(fd
, NULL
, &dsize
);
3035 Name
": %s: device to small for imsm\n",
3040 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0) {
3043 Name
": Cannot seek to anchor block on %s: %s\n",
3044 devname
, strerror(errno
));
3048 if (posix_memalign((void**)&anchor
, 512, 512) != 0) {
3051 Name
": Failed to allocate imsm anchor buffer"
3052 " on %s\n", devname
);
3055 if (read(fd
, anchor
, 512) != 512) {
3058 Name
": Cannot read anchor block on %s: %s\n",
3059 devname
, strerror(errno
));
3064 if (strncmp((char *) anchor
->sig
, MPB_SIGNATURE
, MPB_SIG_LEN
) != 0) {
3067 Name
": no IMSM anchor on %s\n", devname
);
3072 __free_imsm(super
, 0);
3073 /* reload capability and hba */
3075 /* capability and hba must be updated with new super allocation */
3076 find_intel_hba_capability(fd
, super
, devname
);
3077 super
->len
= ROUND_UP(anchor
->mpb_size
, 512);
3078 if (posix_memalign(&super
->buf
, 512, super
->len
) != 0) {
3081 Name
": unable to allocate %zu byte mpb buffer\n",
3086 memcpy(super
->buf
, anchor
, 512);
3088 sectors
= mpb_sectors(anchor
) - 1;
3091 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
3092 fprintf(stderr
, Name
3093 ": %s could not allocate migr_rec buffer\n", __func__
);
3099 check_sum
= __gen_imsm_checksum(super
->anchor
);
3100 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
3103 Name
": IMSM checksum %x != %x on %s\n",
3105 __le32_to_cpu(super
->anchor
->check_sum
),
3113 /* read the extended mpb */
3114 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0) {
3117 Name
": Cannot seek to extended mpb on %s: %s\n",
3118 devname
, strerror(errno
));
3122 if ((unsigned)read(fd
, super
->buf
+ 512, super
->len
- 512) != super
->len
- 512) {
3125 Name
": Cannot read extended mpb on %s: %s\n",
3126 devname
, strerror(errno
));
3130 check_sum
= __gen_imsm_checksum(super
->anchor
);
3131 if (check_sum
!= __le32_to_cpu(super
->anchor
->check_sum
)) {
3134 Name
": IMSM checksum %x != %x on %s\n",
3135 check_sum
, __le32_to_cpu(super
->anchor
->check_sum
),
3140 /* FIXME the BBM log is disk specific so we cannot use this global
3141 * buffer for all disks. Ok for now since we only look at the global
3142 * bbm_log_size parameter to gate assembly
3144 super
->bbm_log
= __get_imsm_bbm_log(super
->anchor
);
3149 static int read_imsm_migr_rec(int fd
, struct intel_super
*super
);
3152 load_and_parse_mpb(int fd
, struct intel_super
*super
, char *devname
, int keep_fd
)
3156 err
= load_imsm_mpb(fd
, super
, devname
);
3159 err
= load_imsm_disk(fd
, super
, devname
, keep_fd
);
3162 err
= parse_raid_devices(super
);
3167 static void __free_imsm_disk(struct dl
*d
)
3179 static void free_imsm_disks(struct intel_super
*super
)
3183 while (super
->disks
) {
3185 super
->disks
= d
->next
;
3186 __free_imsm_disk(d
);
3188 while (super
->disk_mgmt_list
) {
3189 d
= super
->disk_mgmt_list
;
3190 super
->disk_mgmt_list
= d
->next
;
3191 __free_imsm_disk(d
);
3193 while (super
->missing
) {
3195 super
->missing
= d
->next
;
3196 __free_imsm_disk(d
);
3201 /* free all the pieces hanging off of a super pointer */
3202 static void __free_imsm(struct intel_super
*super
, int free_disks
)
3204 struct intel_hba
*elem
, *next
;
3210 /* unlink capability description */
3212 if (super
->migr_rec_buf
) {
3213 free(super
->migr_rec_buf
);
3214 super
->migr_rec_buf
= NULL
;
3217 free_imsm_disks(super
);
3218 free_devlist(super
);
3222 free((void *)elem
->path
);
3230 static void free_imsm(struct intel_super
*super
)
3232 __free_imsm(super
, 1);
3236 static void free_super_imsm(struct supertype
*st
)
3238 struct intel_super
*super
= st
->sb
;
3247 static struct intel_super
*alloc_super(void)
3249 struct intel_super
*super
= malloc(sizeof(*super
));
3252 memset(super
, 0, sizeof(*super
));
3253 super
->current_vol
= -1;
3254 super
->create_offset
= ~((__u32
) 0);
3260 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3262 static int find_intel_hba_capability(int fd
, struct intel_super
*super
, char *devname
)
3264 struct sys_dev
*hba_name
;
3267 if ((fd
< 0) || check_env("IMSM_NO_PLATFORM")) {
3272 hba_name
= find_disk_attached_hba(fd
, NULL
);
3276 Name
": %s is not attached to Intel(R) RAID controller.\n",
3280 rv
= attach_hba_to_super(super
, hba_name
);
3283 struct intel_hba
*hba
= super
->hba
;
3285 fprintf(stderr
, Name
": %s is attached to Intel(R) %s RAID "
3286 "controller (%s),\n"
3287 " but the container is assigned to Intel(R) "
3288 "%s RAID controller (",
3291 hba_name
->pci_id
? : "Err!",
3292 get_sys_dev_type(hba_name
->type
));
3295 fprintf(stderr
, "%s", hba
->pci_id
? : "Err!");
3297 fprintf(stderr
, ", ");
3301 fprintf(stderr
, ").\n"
3302 " Mixing devices attached to different controllers "
3303 "is not allowed.\n");
3305 free_sys_dev(&hba_name
);
3308 super
->orom
= find_imsm_capability(hba_name
->type
);
3309 free_sys_dev(&hba_name
);
3316 /* find_missing - helper routine for load_super_imsm_all that identifies
3317 * disks that have disappeared from the system. This routine relies on
3318 * the mpb being uptodate, which it is at load time.
3320 static int find_missing(struct intel_super
*super
)
3323 struct imsm_super
*mpb
= super
->anchor
;
3325 struct imsm_disk
*disk
;
3327 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3328 disk
= __get_imsm_disk(mpb
, i
);
3329 dl
= serial_to_dl(disk
->serial
, super
);
3333 dl
= malloc(sizeof(*dl
));
3339 dl
->devname
= strdup("missing");
3341 serialcpy(dl
->serial
, disk
->serial
);
3344 dl
->next
= super
->missing
;
3345 super
->missing
= dl
;
3351 static struct intel_disk
*disk_list_get(__u8
*serial
, struct intel_disk
*disk_list
)
3353 struct intel_disk
*idisk
= disk_list
;
3356 if (serialcmp(idisk
->disk
.serial
, serial
) == 0)
3358 idisk
= idisk
->next
;
3364 static int __prep_thunderdome(struct intel_super
**table
, int tbl_size
,
3365 struct intel_super
*super
,
3366 struct intel_disk
**disk_list
)
3368 struct imsm_disk
*d
= &super
->disks
->disk
;
3369 struct imsm_super
*mpb
= super
->anchor
;
3372 for (i
= 0; i
< tbl_size
; i
++) {
3373 struct imsm_super
*tbl_mpb
= table
[i
]->anchor
;
3374 struct imsm_disk
*tbl_d
= &table
[i
]->disks
->disk
;
3376 if (tbl_mpb
->family_num
== mpb
->family_num
) {
3377 if (tbl_mpb
->check_sum
== mpb
->check_sum
) {
3378 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3379 __func__
, super
->disks
->major
,
3380 super
->disks
->minor
,
3381 table
[i
]->disks
->major
,
3382 table
[i
]->disks
->minor
);
3386 if (((is_configured(d
) && !is_configured(tbl_d
)) ||
3387 is_configured(d
) == is_configured(tbl_d
)) &&
3388 tbl_mpb
->generation_num
< mpb
->generation_num
) {
3389 /* current version of the mpb is a
3390 * better candidate than the one in
3391 * super_table, but copy over "cross
3392 * generational" status
3394 struct intel_disk
*idisk
;
3396 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3397 __func__
, super
->disks
->major
,
3398 super
->disks
->minor
,
3399 table
[i
]->disks
->major
,
3400 table
[i
]->disks
->minor
);
3402 idisk
= disk_list_get(tbl_d
->serial
, *disk_list
);
3403 if (idisk
&& is_failed(&idisk
->disk
))
3404 tbl_d
->status
|= FAILED_DISK
;
3407 struct intel_disk
*idisk
;
3408 struct imsm_disk
*disk
;
3410 /* tbl_mpb is more up to date, but copy
3411 * over cross generational status before
3414 disk
= __serial_to_disk(d
->serial
, mpb
, NULL
);
3415 if (disk
&& is_failed(disk
))
3416 d
->status
|= FAILED_DISK
;
3418 idisk
= disk_list_get(d
->serial
, *disk_list
);
3421 if (disk
&& is_configured(disk
))
3422 idisk
->disk
.status
|= CONFIGURED_DISK
;
3425 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3426 __func__
, super
->disks
->major
,
3427 super
->disks
->minor
,
3428 table
[i
]->disks
->major
,
3429 table
[i
]->disks
->minor
);
3437 table
[tbl_size
++] = super
;
3441 /* update/extend the merged list of imsm_disk records */
3442 for (j
= 0; j
< mpb
->num_disks
; j
++) {
3443 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, j
);
3444 struct intel_disk
*idisk
;
3446 idisk
= disk_list_get(disk
->serial
, *disk_list
);
3448 idisk
->disk
.status
|= disk
->status
;
3449 if (is_configured(&idisk
->disk
) ||
3450 is_failed(&idisk
->disk
))
3451 idisk
->disk
.status
&= ~(SPARE_DISK
);
3453 idisk
= calloc(1, sizeof(*idisk
));
3456 idisk
->owner
= IMSM_UNKNOWN_OWNER
;
3457 idisk
->disk
= *disk
;
3458 idisk
->next
= *disk_list
;
3462 if (serialcmp(idisk
->disk
.serial
, d
->serial
) == 0)
3469 static struct intel_super
*
3470 validate_members(struct intel_super
*super
, struct intel_disk
*disk_list
,
3473 struct imsm_super
*mpb
= super
->anchor
;
3477 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3478 struct imsm_disk
*disk
= __get_imsm_disk(mpb
, i
);
3479 struct intel_disk
*idisk
;
3481 idisk
= disk_list_get(disk
->serial
, disk_list
);
3483 if (idisk
->owner
== owner
||
3484 idisk
->owner
== IMSM_UNKNOWN_OWNER
)
3487 dprintf("%s: '%.16s' owner %d != %d\n",
3488 __func__
, disk
->serial
, idisk
->owner
,
3491 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3492 __func__
, __le32_to_cpu(mpb
->family_num
), i
,
3498 if (ok_count
== mpb
->num_disks
)
3503 static void show_conflicts(__u32 family_num
, struct intel_super
*super_list
)
3505 struct intel_super
*s
;
3507 for (s
= super_list
; s
; s
= s
->next
) {
3508 if (family_num
!= s
->anchor
->family_num
)
3510 fprintf(stderr
, "Conflict, offlining family %#x on '%s'\n",
3511 __le32_to_cpu(family_num
), s
->disks
->devname
);
3515 static struct intel_super
*
3516 imsm_thunderdome(struct intel_super
**super_list
, int len
)
3518 struct intel_super
*super_table
[len
];
3519 struct intel_disk
*disk_list
= NULL
;
3520 struct intel_super
*champion
, *spare
;
3521 struct intel_super
*s
, **del
;
3526 memset(super_table
, 0, sizeof(super_table
));
3527 for (s
= *super_list
; s
; s
= s
->next
)
3528 tbl_size
= __prep_thunderdome(super_table
, tbl_size
, s
, &disk_list
);
3530 for (i
= 0; i
< tbl_size
; i
++) {
3531 struct imsm_disk
*d
;
3532 struct intel_disk
*idisk
;
3533 struct imsm_super
*mpb
= super_table
[i
]->anchor
;
3536 d
= &s
->disks
->disk
;
3538 /* 'd' must appear in merged disk list for its
3539 * configuration to be valid
3541 idisk
= disk_list_get(d
->serial
, disk_list
);
3542 if (idisk
&& idisk
->owner
== i
)
3543 s
= validate_members(s
, disk_list
, i
);
3548 dprintf("%s: marking family: %#x from %d:%d offline\n",
3549 __func__
, mpb
->family_num
,
3550 super_table
[i
]->disks
->major
,
3551 super_table
[i
]->disks
->minor
);
3555 /* This is where the mdadm implementation differs from the Windows
3556 * driver which has no strict concept of a container. We can only
3557 * assemble one family from a container, so when returning a prodigal
3558 * array member to this system the code will not be able to disambiguate
3559 * the container contents that should be assembled ("foreign" versus
3560 * "local"). It requires user intervention to set the orig_family_num
3561 * to a new value to establish a new container. The Windows driver in
3562 * this situation fixes up the volume name in place and manages the
3563 * foreign array as an independent entity.
3568 for (i
= 0; i
< tbl_size
; i
++) {
3569 struct intel_super
*tbl_ent
= super_table
[i
];
3575 if (tbl_ent
->anchor
->num_raid_devs
== 0) {
3580 if (s
&& !is_spare
) {
3581 show_conflicts(tbl_ent
->anchor
->family_num
, *super_list
);
3583 } else if (!s
&& !is_spare
)
3596 fprintf(stderr
, "Chose family %#x on '%s', "
3597 "assemble conflicts to new container with '--update=uuid'\n",
3598 __le32_to_cpu(s
->anchor
->family_num
), s
->disks
->devname
);
3600 /* collect all dl's onto 'champion', and update them to
3601 * champion's version of the status
3603 for (s
= *super_list
; s
; s
= s
->next
) {
3604 struct imsm_super
*mpb
= champion
->anchor
;
3605 struct dl
*dl
= s
->disks
;
3610 for (i
= 0; i
< mpb
->num_disks
; i
++) {
3611 struct imsm_disk
*disk
;
3613 disk
= __serial_to_disk(dl
->serial
, mpb
, &dl
->index
);
3616 /* only set index on disks that are a member of
3617 * a populated contianer, i.e. one with
3620 if (is_failed(&dl
->disk
))
3622 else if (is_spare(&dl
->disk
))
3628 if (i
>= mpb
->num_disks
) {
3629 struct intel_disk
*idisk
;
3631 idisk
= disk_list_get(dl
->serial
, disk_list
);
3632 if (idisk
&& is_spare(&idisk
->disk
) &&
3633 !is_failed(&idisk
->disk
) && !is_configured(&idisk
->disk
))
3641 dl
->next
= champion
->disks
;
3642 champion
->disks
= dl
;
3646 /* delete 'champion' from super_list */
3647 for (del
= super_list
; *del
; ) {
3648 if (*del
== champion
) {
3649 *del
= (*del
)->next
;
3652 del
= &(*del
)->next
;
3654 champion
->next
= NULL
;
3658 struct intel_disk
*idisk
= disk_list
;
3660 disk_list
= disk_list
->next
;
3667 static int load_super_imsm_all(struct supertype
*st
, int fd
, void **sbp
,
3671 struct intel_super
*super_list
= NULL
;
3672 struct intel_super
*super
= NULL
;
3673 int devnum
= fd2devnum(fd
);
3679 /* check if 'fd' an opened container */
3680 sra
= sysfs_read(fd
, 0, GET_LEVEL
|GET_VERSION
|GET_DEVS
|GET_STATE
);
3684 if (sra
->array
.major_version
!= -1 ||
3685 sra
->array
.minor_version
!= -2 ||
3686 strcmp(sra
->text_version
, "imsm") != 0) {
3691 for (sd
= sra
->devs
, i
= 0; sd
; sd
= sd
->next
, i
++) {
3692 struct intel_super
*s
= alloc_super();
3700 s
->next
= super_list
;
3704 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
3705 dfd
= dev_open(nm
, O_RDWR
);
3709 rv
= find_intel_hba_capability(dfd
, s
, devname
);
3710 /* no orom/efi or non-intel hba of the disk */
3714 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3716 /* retry the load if we might have raced against mdmon */
3717 if (err
== 3 && mdmon_running(devnum
))
3718 for (retry
= 0; retry
< 3; retry
++) {
3720 err
= load_and_parse_mpb(dfd
, s
, NULL
, 1);
3728 /* all mpbs enter, maybe one leaves */
3729 super
= imsm_thunderdome(&super_list
, i
);
3735 if (find_missing(super
) != 0) {
3741 /* load migration record */
3742 err
= load_imsm_migr_rec(super
, NULL
);
3748 /* Check migration compatibility */
3749 if (check_mpb_migr_compatibility(super
) != 0) {
3750 fprintf(stderr
, Name
": Unsupported migration detected");
3752 fprintf(stderr
, " on %s\n", devname
);
3754 fprintf(stderr
, " (IMSM).\n");
3763 while (super_list
) {
3764 struct intel_super
*s
= super_list
;
3766 super_list
= super_list
->next
;
3775 st
->container_dev
= devnum
;
3776 if (err
== 0 && st
->ss
== NULL
) {
3777 st
->ss
= &super_imsm
;
3778 st
->minor_version
= 0;
3779 st
->max_devs
= IMSM_MAX_DEVICES
;
3784 static int load_container_imsm(struct supertype
*st
, int fd
, char *devname
)
3786 return load_super_imsm_all(st
, fd
, &st
->sb
, devname
);
3790 static int load_super_imsm(struct supertype
*st
, int fd
, char *devname
)
3792 struct intel_super
*super
;
3795 if (test_partition(fd
))
3796 /* IMSM not allowed on partitions */
3799 free_super_imsm(st
);
3801 super
= alloc_super();
3804 Name
": malloc of %zu failed.\n",
3808 /* Load hba and capabilities if they exist.
3809 * But do not preclude loading metadata in case capabilities or hba are
3810 * non-compliant and ignore_hw_compat is set.
3812 rv
= find_intel_hba_capability(fd
, super
, devname
);
3813 /* no orom/efi or non-intel hba of the disk */
3814 if ((rv
!= 0) && (st
->ignore_hw_compat
== 0)) {
3817 Name
": No OROM/EFI properties for %s\n", devname
);
3821 rv
= load_and_parse_mpb(fd
, super
, devname
, 0);
3826 Name
": Failed to load all information "
3827 "sections on %s\n", devname
);
3833 if (st
->ss
== NULL
) {
3834 st
->ss
= &super_imsm
;
3835 st
->minor_version
= 0;
3836 st
->max_devs
= IMSM_MAX_DEVICES
;
3839 /* load migration record */
3840 load_imsm_migr_rec(super
, NULL
);
3842 /* Check for unsupported migration features */
3843 if (check_mpb_migr_compatibility(super
) != 0) {
3844 fprintf(stderr
, Name
": Unsupported migration detected");
3846 fprintf(stderr
, " on %s\n", devname
);
3848 fprintf(stderr
, " (IMSM).\n");
3855 static __u16
info_to_blocks_per_strip(mdu_array_info_t
*info
)
3857 if (info
->level
== 1)
3859 return info
->chunk_size
>> 9;
3862 static __u32
info_to_num_data_stripes(mdu_array_info_t
*info
, int num_domains
)
3866 num_stripes
= (info
->size
* 2) / info_to_blocks_per_strip(info
);
3867 num_stripes
/= num_domains
;
3872 static __u32
info_to_blocks_per_member(mdu_array_info_t
*info
)
3874 if (info
->level
== 1)
3875 return info
->size
* 2;
3877 return (info
->size
* 2) & ~(info_to_blocks_per_strip(info
) - 1);
3880 static void imsm_update_version_info(struct intel_super
*super
)
3882 /* update the version and attributes */
3883 struct imsm_super
*mpb
= super
->anchor
;
3885 struct imsm_dev
*dev
;
3886 struct imsm_map
*map
;
3889 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3890 dev
= get_imsm_dev(super
, i
);
3891 map
= get_imsm_map(dev
, 0);
3892 if (__le32_to_cpu(dev
->size_high
) > 0)
3893 mpb
->attributes
|= MPB_ATTRIB_2TB
;
3895 /* FIXME detect when an array spans a port multiplier */
3897 mpb
->attributes
|= MPB_ATTRIB_PM
;
3900 if (mpb
->num_raid_devs
> 1 ||
3901 mpb
->attributes
!= MPB_ATTRIB_CHECKSUM_VERIFY
) {
3902 version
= MPB_VERSION_ATTRIBS
;
3903 switch (get_imsm_raid_level(map
)) {
3904 case 0: mpb
->attributes
|= MPB_ATTRIB_RAID0
; break;
3905 case 1: mpb
->attributes
|= MPB_ATTRIB_RAID1
; break;
3906 case 10: mpb
->attributes
|= MPB_ATTRIB_RAID10
; break;
3907 case 5: mpb
->attributes
|= MPB_ATTRIB_RAID5
; break;
3910 if (map
->num_members
>= 5)
3911 version
= MPB_VERSION_5OR6_DISK_ARRAY
;
3912 else if (dev
->status
== DEV_CLONE_N_GO
)
3913 version
= MPB_VERSION_CNG
;
3914 else if (get_imsm_raid_level(map
) == 5)
3915 version
= MPB_VERSION_RAID5
;
3916 else if (map
->num_members
>= 3)
3917 version
= MPB_VERSION_3OR4_DISK_ARRAY
;
3918 else if (get_imsm_raid_level(map
) == 1)
3919 version
= MPB_VERSION_RAID1
;
3921 version
= MPB_VERSION_RAID0
;
3923 strcpy(((char *) mpb
->sig
) + strlen(MPB_SIGNATURE
), version
);
3927 static int check_name(struct intel_super
*super
, char *name
, int quiet
)
3929 struct imsm_super
*mpb
= super
->anchor
;
3930 char *reason
= NULL
;
3933 if (strlen(name
) > MAX_RAID_SERIAL_LEN
)
3934 reason
= "must be 16 characters or less";
3936 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
3937 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
3939 if (strncmp((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
) == 0) {
3940 reason
= "already exists";
3945 if (reason
&& !quiet
)
3946 fprintf(stderr
, Name
": imsm volume name %s\n", reason
);
3951 static int init_super_imsm_volume(struct supertype
*st
, mdu_array_info_t
*info
,
3952 unsigned long long size
, char *name
,
3953 char *homehost
, int *uuid
)
3955 /* We are creating a volume inside a pre-existing container.
3956 * so st->sb is already set.
3958 struct intel_super
*super
= st
->sb
;
3959 struct imsm_super
*mpb
= super
->anchor
;
3960 struct intel_dev
*dv
;
3961 struct imsm_dev
*dev
;
3962 struct imsm_vol
*vol
;
3963 struct imsm_map
*map
;
3964 int idx
= mpb
->num_raid_devs
;
3966 unsigned long long array_blocks
;
3967 size_t size_old
, size_new
;
3968 __u32 num_data_stripes
;
3970 if (super
->orom
&& mpb
->num_raid_devs
>= super
->orom
->vpa
) {
3971 fprintf(stderr
, Name
": This imsm-container already has the "
3972 "maximum of %d volumes\n", super
->orom
->vpa
);
3976 /* ensure the mpb is large enough for the new data */
3977 size_old
= __le32_to_cpu(mpb
->mpb_size
);
3978 size_new
= disks_to_mpb_size(info
->nr_disks
);
3979 if (size_new
> size_old
) {
3981 size_t size_round
= ROUND_UP(size_new
, 512);
3983 if (posix_memalign(&mpb_new
, 512, size_round
) != 0) {
3984 fprintf(stderr
, Name
": could not allocate new mpb\n");
3987 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
3988 fprintf(stderr
, Name
3989 ": %s could not allocate migr_rec buffer\n",
3995 memcpy(mpb_new
, mpb
, size_old
);
3998 super
->anchor
= mpb_new
;
3999 mpb
->mpb_size
= __cpu_to_le32(size_new
);
4000 memset(mpb_new
+ size_old
, 0, size_round
- size_old
);
4002 super
->current_vol
= idx
;
4003 /* when creating the first raid device in this container set num_disks
4004 * to zero, i.e. delete this spare and add raid member devices in
4005 * add_to_super_imsm_volume()
4007 if (super
->current_vol
== 0)
4010 if (!check_name(super
, name
, 0))
4012 dv
= malloc(sizeof(*dv
));
4014 fprintf(stderr
, Name
": failed to allocate device list entry\n");
4017 dev
= calloc(1, sizeof(*dev
) + sizeof(__u32
) * (info
->raid_disks
- 1));
4020 fprintf(stderr
, Name
": could not allocate raid device\n");
4024 strncpy((char *) dev
->volume
, name
, MAX_RAID_SERIAL_LEN
);
4025 if (info
->level
== 1)
4026 array_blocks
= info_to_blocks_per_member(info
);
4028 array_blocks
= calc_array_size(info
->level
, info
->raid_disks
,
4029 info
->layout
, info
->chunk_size
,
4031 /* round array size down to closest MB */
4032 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
4034 dev
->size_low
= __cpu_to_le32((__u32
) array_blocks
);
4035 dev
->size_high
= __cpu_to_le32((__u32
) (array_blocks
>> 32));
4036 dev
->status
= (DEV_READ_COALESCING
| DEV_WRITE_COALESCING
);
4038 vol
->migr_state
= 0;
4039 set_migr_type(dev
, MIGR_INIT
);
4041 vol
->curr_migr_unit
= 0;
4042 map
= get_imsm_map(dev
, 0);
4043 map
->pba_of_lba0
= __cpu_to_le32(super
->create_offset
);
4044 map
->blocks_per_member
= __cpu_to_le32(info_to_blocks_per_member(info
));
4045 map
->blocks_per_strip
= __cpu_to_le16(info_to_blocks_per_strip(info
));
4046 map
->failed_disk_num
= ~0;
4047 map
->map_state
= info
->level
? IMSM_T_STATE_UNINITIALIZED
:
4048 IMSM_T_STATE_NORMAL
;
4051 if (info
->level
== 1 && info
->raid_disks
> 2) {
4054 fprintf(stderr
, Name
": imsm does not support more than 2 disks"
4055 "in a raid1 volume\n");
4059 map
->raid_level
= info
->level
;
4060 if (info
->level
== 10) {
4061 map
->raid_level
= 1;
4062 map
->num_domains
= info
->raid_disks
/ 2;
4063 } else if (info
->level
== 1)
4064 map
->num_domains
= info
->raid_disks
;
4066 map
->num_domains
= 1;
4068 num_data_stripes
= info_to_num_data_stripes(info
, map
->num_domains
);
4069 map
->num_data_stripes
= __cpu_to_le32(num_data_stripes
);
4071 map
->num_members
= info
->raid_disks
;
4072 for (i
= 0; i
< map
->num_members
; i
++) {
4073 /* initialized in add_to_super */
4074 set_imsm_ord_tbl_ent(map
, i
, IMSM_ORD_REBUILD
);
4076 mpb
->num_raid_devs
++;
4079 dv
->index
= super
->current_vol
;
4080 dv
->next
= super
->devlist
;
4081 super
->devlist
= dv
;
4083 imsm_update_version_info(super
);
4088 static int init_super_imsm(struct supertype
*st
, mdu_array_info_t
*info
,
4089 unsigned long long size
, char *name
,
4090 char *homehost
, int *uuid
)
4092 /* This is primarily called by Create when creating a new array.
4093 * We will then get add_to_super called for each component, and then
4094 * write_init_super called to write it out to each device.
4095 * For IMSM, Create can create on fresh devices or on a pre-existing
4097 * To create on a pre-existing array a different method will be called.
4098 * This one is just for fresh drives.
4100 struct intel_super
*super
;
4101 struct imsm_super
*mpb
;
4106 return init_super_imsm_volume(st
, info
, size
, name
, homehost
, uuid
);
4109 mpb_size
= disks_to_mpb_size(info
->nr_disks
);
4113 super
= alloc_super();
4114 if (super
&& posix_memalign(&super
->buf
, 512, mpb_size
) != 0) {
4119 fprintf(stderr
, Name
4120 ": %s could not allocate superblock\n", __func__
);
4123 if (posix_memalign(&super
->migr_rec_buf
, 512, 512) != 0) {
4124 fprintf(stderr
, Name
4125 ": %s could not allocate migr_rec buffer\n", __func__
);
4130 memset(super
->buf
, 0, mpb_size
);
4132 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
4136 /* zeroing superblock */
4140 mpb
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
4142 version
= (char *) mpb
->sig
;
4143 strcpy(version
, MPB_SIGNATURE
);
4144 version
+= strlen(MPB_SIGNATURE
);
4145 strcpy(version
, MPB_VERSION_RAID0
);
4151 static int add_to_super_imsm_volume(struct supertype
*st
, mdu_disk_info_t
*dk
,
4152 int fd
, char *devname
)
4154 struct intel_super
*super
= st
->sb
;
4155 struct imsm_super
*mpb
= super
->anchor
;
4157 struct imsm_dev
*dev
;
4158 struct imsm_map
*map
;
4161 dev
= get_imsm_dev(super
, super
->current_vol
);
4162 map
= get_imsm_map(dev
, 0);
4164 if (! (dk
->state
& (1<<MD_DISK_SYNC
))) {
4165 fprintf(stderr
, Name
": %s: Cannot add spare devices to IMSM volume\n",
4171 /* we're doing autolayout so grab the pre-marked (in
4172 * validate_geometry) raid_disk
4174 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4175 if (dl
->raiddisk
== dk
->raid_disk
)
4178 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4179 if (dl
->major
== dk
->major
&&
4180 dl
->minor
== dk
->minor
)
4185 fprintf(stderr
, Name
": %s is not a member of the same container\n", devname
);
4189 /* add a pristine spare to the metadata */
4190 if (dl
->index
< 0) {
4191 dl
->index
= super
->anchor
->num_disks
;
4192 super
->anchor
->num_disks
++;
4194 /* Check the device has not already been added */
4195 slot
= get_imsm_disk_slot(map
, dl
->index
);
4197 (get_imsm_ord_tbl_ent(dev
, slot
, -1) & IMSM_ORD_REBUILD
) == 0) {
4198 fprintf(stderr
, Name
": %s has been included in this array twice\n",
4202 set_imsm_ord_tbl_ent(map
, dk
->number
, dl
->index
);
4203 dl
->disk
.status
= CONFIGURED_DISK
;
4205 /* if we are creating the first raid device update the family number */
4206 if (super
->current_vol
== 0) {
4208 struct imsm_dev
*_dev
= __get_imsm_dev(mpb
, 0);
4209 struct imsm_disk
*_disk
= __get_imsm_disk(mpb
, dl
->index
);
4211 if (!_dev
|| !_disk
) {
4212 fprintf(stderr
, Name
": BUG mpb setup error\n");
4218 sum
+= __gen_imsm_checksum(mpb
);
4219 mpb
->family_num
= __cpu_to_le32(sum
);
4220 mpb
->orig_family_num
= mpb
->family_num
;
4227 static int add_to_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
,
4228 int fd
, char *devname
)
4230 struct intel_super
*super
= st
->sb
;
4232 unsigned long long size
;
4237 /* If we are on an RAID enabled platform check that the disk is
4238 * attached to the raid controller.
4239 * We do not need to test disks attachment for container based additions,
4240 * they shall be already tested when container was created/assembled.
4242 rv
= find_intel_hba_capability(fd
, super
, devname
);
4243 /* no orom/efi or non-intel hba of the disk */
4245 dprintf("capability: %p fd: %d ret: %d\n",
4246 super
->orom
, fd
, rv
);
4250 if (super
->current_vol
>= 0)
4251 return add_to_super_imsm_volume(st
, dk
, fd
, devname
);
4254 dd
= malloc(sizeof(*dd
));
4257 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
4260 memset(dd
, 0, sizeof(*dd
));
4261 dd
->major
= major(stb
.st_rdev
);
4262 dd
->minor
= minor(stb
.st_rdev
);
4264 dd
->devname
= devname
? strdup(devname
) : NULL
;
4267 dd
->action
= DISK_ADD
;
4268 rv
= imsm_read_serial(fd
, devname
, dd
->serial
);
4271 Name
": failed to retrieve scsi serial, aborting\n");
4276 get_dev_size(fd
, NULL
, &size
);
4278 serialcpy(dd
->disk
.serial
, dd
->serial
);
4279 dd
->disk
.total_blocks
= __cpu_to_le32(size
);
4280 dd
->disk
.status
= SPARE_DISK
;
4281 if (sysfs_disk_to_scsi_id(fd
, &id
) == 0)
4282 dd
->disk
.scsi_id
= __cpu_to_le32(id
);
4284 dd
->disk
.scsi_id
= __cpu_to_le32(0);
4286 if (st
->update_tail
) {
4287 dd
->next
= super
->disk_mgmt_list
;
4288 super
->disk_mgmt_list
= dd
;
4290 dd
->next
= super
->disks
;
4292 super
->updates_pending
++;
4299 static int remove_from_super_imsm(struct supertype
*st
, mdu_disk_info_t
*dk
)
4301 struct intel_super
*super
= st
->sb
;
4304 /* remove from super works only in mdmon - for communication
4305 * manager - monitor. Check if communication memory buffer
4308 if (!st
->update_tail
) {
4310 Name
": %s shall be used in mdmon context only"
4311 "(line %d).\n", __func__
, __LINE__
);
4314 dd
= malloc(sizeof(*dd
));
4317 Name
": malloc failed %s:%d.\n", __func__
, __LINE__
);
4320 memset(dd
, 0, sizeof(*dd
));
4321 dd
->major
= dk
->major
;
4322 dd
->minor
= dk
->minor
;
4325 dd
->disk
.status
= SPARE_DISK
;
4326 dd
->action
= DISK_REMOVE
;
4328 dd
->next
= super
->disk_mgmt_list
;
4329 super
->disk_mgmt_list
= dd
;
4335 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
);
4339 struct imsm_super anchor
;
4340 } spare_record
__attribute__ ((aligned(512)));
4342 /* spare records have their own family number and do not have any defined raid
4345 static int write_super_imsm_spares(struct intel_super
*super
, int doclose
)
4347 struct imsm_super
*mpb
= super
->anchor
;
4348 struct imsm_super
*spare
= &spare_record
.anchor
;
4352 spare
->mpb_size
= __cpu_to_le32(sizeof(struct imsm_super
)),
4353 spare
->generation_num
= __cpu_to_le32(1UL),
4354 spare
->attributes
= MPB_ATTRIB_CHECKSUM_VERIFY
;
4355 spare
->num_disks
= 1,
4356 spare
->num_raid_devs
= 0,
4357 spare
->cache_size
= mpb
->cache_size
,
4358 spare
->pwr_cycle_count
= __cpu_to_le32(1),
4360 snprintf((char *) spare
->sig
, MAX_SIGNATURE_LENGTH
,
4361 MPB_SIGNATURE MPB_VERSION_RAID0
);
4363 for (d
= super
->disks
; d
; d
= d
->next
) {
4367 spare
->disk
[0] = d
->disk
;
4368 sum
= __gen_imsm_checksum(spare
);
4369 spare
->family_num
= __cpu_to_le32(sum
);
4370 spare
->orig_family_num
= 0;
4371 sum
= __gen_imsm_checksum(spare
);
4372 spare
->check_sum
= __cpu_to_le32(sum
);
4374 if (store_imsm_mpb(d
->fd
, spare
)) {
4375 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
4376 __func__
, d
->major
, d
->minor
, strerror(errno
));
4388 static int is_gen_migration(struct imsm_dev
*dev
);
4390 static int write_super_imsm(struct supertype
*st
, int doclose
)
4392 struct intel_super
*super
= st
->sb
;
4393 struct imsm_super
*mpb
= super
->anchor
;
4399 __u32 mpb_size
= sizeof(struct imsm_super
) - sizeof(struct imsm_disk
);
4401 int clear_migration_record
= 1;
4403 /* 'generation' is incremented everytime the metadata is written */
4404 generation
= __le32_to_cpu(mpb
->generation_num
);
4406 mpb
->generation_num
= __cpu_to_le32(generation
);
4408 /* fix up cases where previous mdadm releases failed to set
4411 if (mpb
->orig_family_num
== 0)
4412 mpb
->orig_family_num
= mpb
->family_num
;
4414 for (d
= super
->disks
; d
; d
= d
->next
) {
4418 mpb
->disk
[d
->index
] = d
->disk
;
4422 for (d
= super
->missing
; d
; d
= d
->next
) {
4423 mpb
->disk
[d
->index
] = d
->disk
;
4426 mpb
->num_disks
= num_disks
;
4427 mpb_size
+= sizeof(struct imsm_disk
) * mpb
->num_disks
;
4429 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
4430 struct imsm_dev
*dev
= __get_imsm_dev(mpb
, i
);
4431 struct imsm_dev
*dev2
= get_imsm_dev(super
, i
);
4433 imsm_copy_dev(dev
, dev2
);
4434 mpb_size
+= sizeof_imsm_dev(dev
, 0);
4436 if (is_gen_migration(dev2
))
4437 clear_migration_record
= 0;
4439 mpb_size
+= __le32_to_cpu(mpb
->bbm_log_size
);
4440 mpb
->mpb_size
= __cpu_to_le32(mpb_size
);
4442 /* recalculate checksum */
4443 sum
= __gen_imsm_checksum(mpb
);
4444 mpb
->check_sum
= __cpu_to_le32(sum
);
4446 if (clear_migration_record
)
4447 memset(super
->migr_rec_buf
, 0, 512);
4449 /* write the mpb for disks that compose raid devices */
4450 for (d
= super
->disks
; d
; d
= d
->next
) {
4453 if (store_imsm_mpb(d
->fd
, mpb
))
4454 fprintf(stderr
, "%s: failed for device %d:%d %s\n",
4455 __func__
, d
->major
, d
->minor
, strerror(errno
));
4456 if (clear_migration_record
) {
4457 unsigned long long dsize
;
4459 get_dev_size(d
->fd
, NULL
, &dsize
);
4460 if (lseek64(d
->fd
, dsize
- 512, SEEK_SET
) >= 0) {
4461 write(d
->fd
, super
->migr_rec_buf
, 512);
4471 return write_super_imsm_spares(super
, doclose
);
4477 static int create_array(struct supertype
*st
, int dev_idx
)
4480 struct imsm_update_create_array
*u
;
4481 struct intel_super
*super
= st
->sb
;
4482 struct imsm_dev
*dev
= get_imsm_dev(super
, dev_idx
);
4483 struct imsm_map
*map
= get_imsm_map(dev
, 0);
4484 struct disk_info
*inf
;
4485 struct imsm_disk
*disk
;
4488 len
= sizeof(*u
) - sizeof(*dev
) + sizeof_imsm_dev(dev
, 0) +
4489 sizeof(*inf
) * map
->num_members
;
4492 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4497 u
->type
= update_create_array
;
4498 u
->dev_idx
= dev_idx
;
4499 imsm_copy_dev(&u
->dev
, dev
);
4500 inf
= get_disk_info(u
);
4501 for (i
= 0; i
< map
->num_members
; i
++) {
4502 int idx
= get_imsm_disk_idx(dev
, i
, -1);
4504 disk
= get_imsm_disk(super
, idx
);
4505 serialcpy(inf
[i
].serial
, disk
->serial
);
4507 append_metadata_update(st
, u
, len
);
4512 static int mgmt_disk(struct supertype
*st
)
4514 struct intel_super
*super
= st
->sb
;
4516 struct imsm_update_add_remove_disk
*u
;
4518 if (!super
->disk_mgmt_list
)
4524 fprintf(stderr
, "%s: failed to allocate update buffer\n",
4529 u
->type
= update_add_remove_disk
;
4530 append_metadata_update(st
, u
, len
);
4535 static int write_init_super_imsm(struct supertype
*st
)
4537 struct intel_super
*super
= st
->sb
;
4538 int current_vol
= super
->current_vol
;
4540 /* we are done with current_vol reset it to point st at the container */
4541 super
->current_vol
= -1;
4543 if (st
->update_tail
) {
4544 /* queue the recently created array / added disk
4545 * as a metadata update */
4548 /* determine if we are creating a volume or adding a disk */
4549 if (current_vol
< 0) {
4550 /* in the mgmt (add/remove) disk case we are running
4551 * in mdmon context, so don't close fd's
4553 return mgmt_disk(st
);
4555 rv
= create_array(st
, current_vol
);
4560 for (d
= super
->disks
; d
; d
= d
->next
)
4561 Kill(d
->devname
, NULL
, 0, 1, 1);
4562 return write_super_imsm(st
, 1);
4567 static int store_super_imsm(struct supertype
*st
, int fd
)
4569 struct intel_super
*super
= st
->sb
;
4570 struct imsm_super
*mpb
= super
? super
->anchor
: NULL
;
4576 return store_imsm_mpb(fd
, mpb
);
4582 static int imsm_bbm_log_size(struct imsm_super
*mpb
)
4584 return __le32_to_cpu(mpb
->bbm_log_size
);
4588 static int validate_geometry_imsm_container(struct supertype
*st
, int level
,
4589 int layout
, int raiddisks
, int chunk
,
4590 unsigned long long size
, char *dev
,
4591 unsigned long long *freesize
,
4595 unsigned long long ldsize
;
4596 struct intel_super
*super
=NULL
;
4599 if (level
!= LEVEL_CONTAINER
)
4604 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
4607 fprintf(stderr
, Name
": imsm: Cannot open %s: %s\n",
4608 dev
, strerror(errno
));
4611 if (!get_dev_size(fd
, dev
, &ldsize
)) {
4616 /* capabilities retrieve could be possible
4617 * note that there is no fd for the disks in array.
4619 super
= alloc_super();
4622 Name
": malloc of %zu failed.\n",
4628 rv
= find_intel_hba_capability(fd
, super
, verbose
? dev
: NULL
);
4632 fd2devname(fd
, str
);
4633 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4634 fd
, str
, super
->orom
, rv
, raiddisks
);
4636 /* no orom/efi or non-intel hba of the disk */
4642 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4644 fprintf(stderr
, Name
": %d exceeds maximum number of"
4645 " platform supported disks: %d\n",
4646 raiddisks
, super
->orom
->tds
);
4652 *freesize
= avail_size_imsm(st
, ldsize
>> 9);
4658 static unsigned long long find_size(struct extent
*e
, int *idx
, int num_extents
)
4660 const unsigned long long base_start
= e
[*idx
].start
;
4661 unsigned long long end
= base_start
+ e
[*idx
].size
;
4664 if (base_start
== end
)
4668 for (i
= *idx
; i
< num_extents
; i
++) {
4669 /* extend overlapping extents */
4670 if (e
[i
].start
>= base_start
&&
4671 e
[i
].start
<= end
) {
4674 if (e
[i
].start
+ e
[i
].size
> end
)
4675 end
= e
[i
].start
+ e
[i
].size
;
4676 } else if (e
[i
].start
> end
) {
4682 return end
- base_start
;
4685 static unsigned long long merge_extents(struct intel_super
*super
, int sum_extents
)
4687 /* build a composite disk with all known extents and generate a new
4688 * 'maxsize' given the "all disks in an array must share a common start
4689 * offset" constraint
4691 struct extent
*e
= calloc(sum_extents
, sizeof(*e
));
4695 unsigned long long pos
;
4696 unsigned long long start
= 0;
4697 unsigned long long maxsize
;
4698 unsigned long reserve
;
4703 /* coalesce and sort all extents. also, check to see if we need to
4704 * reserve space between member arrays
4707 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4710 for (i
= 0; i
< dl
->extent_cnt
; i
++)
4713 qsort(e
, sum_extents
, sizeof(*e
), cmp_extent
);
4718 while (i
< sum_extents
) {
4719 e
[j
].start
= e
[i
].start
;
4720 e
[j
].size
= find_size(e
, &i
, sum_extents
);
4722 if (e
[j
-1].size
== 0)
4731 unsigned long long esize
;
4733 esize
= e
[i
].start
- pos
;
4734 if (esize
>= maxsize
) {
4739 pos
= e
[i
].start
+ e
[i
].size
;
4741 } while (e
[i
-1].size
);
4747 /* FIXME assumes volume at offset 0 is the first volume in a
4750 if (start_extent
> 0)
4751 reserve
= IMSM_RESERVED_SECTORS
; /* gap between raid regions */
4755 if (maxsize
< reserve
)
4758 super
->create_offset
= ~((__u32
) 0);
4759 if (start
+ reserve
> super
->create_offset
)
4760 return 0; /* start overflows create_offset */
4761 super
->create_offset
= start
+ reserve
;
4763 return maxsize
- reserve
;
4766 static int is_raid_level_supported(const struct imsm_orom
*orom
, int level
, int raiddisks
)
4768 if (level
< 0 || level
== 6 || level
== 4)
4771 /* if we have an orom prevent invalid raid levels */
4774 case 0: return imsm_orom_has_raid0(orom
);
4777 return imsm_orom_has_raid1e(orom
);
4778 return imsm_orom_has_raid1(orom
) && raiddisks
== 2;
4779 case 10: return imsm_orom_has_raid10(orom
) && raiddisks
== 4;
4780 case 5: return imsm_orom_has_raid5(orom
) && raiddisks
> 2;
4783 return 1; /* not on an Intel RAID platform so anything goes */
4789 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4791 * validate volume parameters with OROM/EFI capabilities
4794 validate_geometry_imsm_orom(struct intel_super
*super
, int level
, int layout
,
4795 int raiddisks
, int *chunk
, int verbose
)
4800 /* validate container capabilities */
4801 if (super
->orom
&& raiddisks
> super
->orom
->tds
) {
4803 fprintf(stderr
, Name
": %d exceeds maximum number of"
4804 " platform supported disks: %d\n",
4805 raiddisks
, super
->orom
->tds
);
4809 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4810 if (super
->orom
&& (!is_raid_level_supported(super
->orom
, level
,
4812 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4813 level
, raiddisks
, raiddisks
> 1 ? "s" : "");
4816 if (super
->orom
&& level
!= 1) {
4817 if (chunk
&& (*chunk
== 0 || *chunk
== UnSet
))
4818 *chunk
= imsm_orom_default_chunk(super
->orom
);
4819 else if (chunk
&& !imsm_orom_has_chunk(super
->orom
, *chunk
)) {
4820 pr_vrb(": platform does not support a chunk size of: "
4825 if (layout
!= imsm_level_to_layout(level
)) {
4827 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4828 else if (level
== 10)
4829 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4831 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4838 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4839 * FIX ME add ahci details
4841 static int validate_geometry_imsm_volume(struct supertype
*st
, int level
,
4842 int layout
, int raiddisks
, int *chunk
,
4843 unsigned long long size
, char *dev
,
4844 unsigned long long *freesize
,
4848 struct intel_super
*super
= st
->sb
;
4849 struct imsm_super
*mpb
= super
->anchor
;
4851 unsigned long long pos
= 0;
4852 unsigned long long maxsize
;
4856 /* We must have the container info already read in. */
4860 if (!validate_geometry_imsm_orom(super
, level
, layout
, raiddisks
, chunk
, verbose
)) {
4861 fprintf(stderr
, Name
": RAID gemetry validation failed. "
4862 "Cannot proceed with the action(s).\n");
4866 /* General test: make sure there is space for
4867 * 'raiddisks' device extents of size 'size' at a given
4870 unsigned long long minsize
= size
;
4871 unsigned long long start_offset
= MaxSector
;
4874 minsize
= MPB_SECTOR_CNT
+ IMSM_RESERVED_SECTORS
;
4875 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4880 e
= get_extents(super
, dl
);
4883 unsigned long long esize
;
4884 esize
= e
[i
].start
- pos
;
4885 if (esize
>= minsize
)
4887 if (found
&& start_offset
== MaxSector
) {
4890 } else if (found
&& pos
!= start_offset
) {
4894 pos
= e
[i
].start
+ e
[i
].size
;
4896 } while (e
[i
-1].size
);
4901 if (dcnt
< raiddisks
) {
4903 fprintf(stderr
, Name
": imsm: Not enough "
4904 "devices with space for this array "
4912 /* This device must be a member of the set */
4913 if (stat(dev
, &stb
) < 0)
4915 if ((S_IFMT
& stb
.st_mode
) != S_IFBLK
)
4917 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
4918 if (dl
->major
== (int)major(stb
.st_rdev
) &&
4919 dl
->minor
== (int)minor(stb
.st_rdev
))
4924 fprintf(stderr
, Name
": %s is not in the "
4925 "same imsm set\n", dev
);
4927 } else if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
) {
4928 /* If a volume is present then the current creation attempt
4929 * cannot incorporate new spares because the orom may not
4930 * understand this configuration (all member disks must be
4931 * members of each array in the container).
4933 fprintf(stderr
, Name
": %s is a spare and a volume"
4934 " is already defined for this container\n", dev
);
4935 fprintf(stderr
, Name
": The option-rom requires all member"
4936 " disks to be a member of all volumes\n");
4940 /* retrieve the largest free space block */
4941 e
= get_extents(super
, dl
);
4946 unsigned long long esize
;
4948 esize
= e
[i
].start
- pos
;
4949 if (esize
>= maxsize
)
4951 pos
= e
[i
].start
+ e
[i
].size
;
4953 } while (e
[i
-1].size
);
4958 fprintf(stderr
, Name
": unable to determine free space for: %s\n",
4962 if (maxsize
< size
) {
4964 fprintf(stderr
, Name
": %s not enough space (%llu < %llu)\n",
4965 dev
, maxsize
, size
);
4969 /* count total number of extents for merge */
4971 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
4973 i
+= dl
->extent_cnt
;
4975 maxsize
= merge_extents(super
, i
);
4976 if (maxsize
< size
|| maxsize
== 0) {
4978 fprintf(stderr
, Name
": not enough space after merge (%llu < %llu)\n",
4983 *freesize
= maxsize
;
4988 static int reserve_space(struct supertype
*st
, int raiddisks
,
4989 unsigned long long size
, int chunk
,
4990 unsigned long long *freesize
)
4992 struct intel_super
*super
= st
->sb
;
4993 struct imsm_super
*mpb
= super
->anchor
;
4998 unsigned long long maxsize
;
4999 unsigned long long minsize
;
5003 /* find the largest common start free region of the possible disks */
5007 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
5013 /* don't activate new spares if we are orom constrained
5014 * and there is already a volume active in the container
5016 if (super
->orom
&& dl
->index
< 0 && mpb
->num_raid_devs
)
5019 e
= get_extents(super
, dl
);
5022 for (i
= 1; e
[i
-1].size
; i
++)
5030 maxsize
= merge_extents(super
, extent_cnt
);
5034 minsize
= chunk
* 2;
5036 if (cnt
< raiddisks
||
5037 (super
->orom
&& used
&& used
!= raiddisks
) ||
5038 maxsize
< minsize
||
5040 fprintf(stderr
, Name
": not enough devices with space to create array.\n");
5041 return 0; /* No enough free spaces large enough */
5053 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
5055 dl
->raiddisk
= cnt
++;
5062 static int validate_geometry_imsm(struct supertype
*st
, int level
, int layout
,
5063 int raiddisks
, int *chunk
, unsigned long long size
,
5064 char *dev
, unsigned long long *freesize
,
5072 * if given unused devices create a container
5073 * if given given devices in a container create a member volume
5075 if (level
== LEVEL_CONTAINER
) {
5076 /* Must be a fresh device to add to a container */
5077 return validate_geometry_imsm_container(st
, level
, layout
,
5079 chunk
?*chunk
:0, size
,
5085 if (st
->sb
&& freesize
) {
5086 /* we are being asked to automatically layout a
5087 * new volume based on the current contents of
5088 * the container. If the the parameters can be
5089 * satisfied reserve_space will record the disks,
5090 * start offset, and size of the volume to be
5091 * created. add_to_super and getinfo_super
5092 * detect when autolayout is in progress.
5094 if (!validate_geometry_imsm_orom(st
->sb
, level
, layout
,
5098 return reserve_space(st
, raiddisks
, size
,
5099 chunk
?*chunk
:0, freesize
);
5104 /* creating in a given container */
5105 return validate_geometry_imsm_volume(st
, level
, layout
,
5106 raiddisks
, chunk
, size
,
5107 dev
, freesize
, verbose
);
5110 /* This device needs to be a device in an 'imsm' container */
5111 fd
= open(dev
, O_RDONLY
|O_EXCL
, 0);
5115 Name
": Cannot create this array on device %s\n",
5120 if (errno
!= EBUSY
|| (fd
= open(dev
, O_RDONLY
, 0)) < 0) {
5122 fprintf(stderr
, Name
": Cannot open %s: %s\n",
5123 dev
, strerror(errno
));
5126 /* Well, it is in use by someone, maybe an 'imsm' container. */
5127 cfd
= open_container(fd
);
5131 fprintf(stderr
, Name
": Cannot use %s: It is busy\n",
5135 sra
= sysfs_read(cfd
, 0, GET_VERSION
);
5136 if (sra
&& sra
->array
.major_version
== -1 &&
5137 strcmp(sra
->text_version
, "imsm") == 0)
5141 /* This is a member of a imsm container. Load the container
5142 * and try to create a volume
5144 struct intel_super
*super
;
5146 if (load_super_imsm_all(st
, cfd
, (void **) &super
, NULL
) == 0) {
5148 st
->container_dev
= fd2devnum(cfd
);
5150 return validate_geometry_imsm_volume(st
, level
, layout
,
5158 fprintf(stderr
, Name
": failed container membership check\n");
5164 static void default_geometry_imsm(struct supertype
*st
, int *level
, int *layout
, int *chunk
)
5166 struct intel_super
*super
= st
->sb
;
5168 if (level
&& *level
== UnSet
)
5169 *level
= LEVEL_CONTAINER
;
5171 if (level
&& layout
&& *layout
== UnSet
)
5172 *layout
= imsm_level_to_layout(*level
);
5174 if (chunk
&& (*chunk
== UnSet
|| *chunk
== 0) &&
5175 super
&& super
->orom
)
5176 *chunk
= imsm_orom_default_chunk(super
->orom
);
5179 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
);
5181 static int kill_subarray_imsm(struct supertype
*st
)
5183 /* remove the subarray currently referenced by ->current_vol */
5185 struct intel_dev
**dp
;
5186 struct intel_super
*super
= st
->sb
;
5187 __u8 current_vol
= super
->current_vol
;
5188 struct imsm_super
*mpb
= super
->anchor
;
5190 if (super
->current_vol
< 0)
5192 super
->current_vol
= -1; /* invalidate subarray cursor */
5194 /* block deletions that would change the uuid of active subarrays
5196 * FIXME when immutable ids are available, but note that we'll
5197 * also need to fixup the invalidated/active subarray indexes in
5200 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5203 if (i
< current_vol
)
5205 sprintf(subarray
, "%u", i
);
5206 if (is_subarray_active(subarray
, st
->devname
)) {
5208 Name
": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
5215 if (st
->update_tail
) {
5216 struct imsm_update_kill_array
*u
= malloc(sizeof(*u
));
5220 u
->type
= update_kill_array
;
5221 u
->dev_idx
= current_vol
;
5222 append_metadata_update(st
, u
, sizeof(*u
));
5227 for (dp
= &super
->devlist
; *dp
;)
5228 if ((*dp
)->index
== current_vol
) {
5231 handle_missing(super
, (*dp
)->dev
);
5232 if ((*dp
)->index
> current_vol
)
5237 /* no more raid devices, all active components are now spares,
5238 * but of course failed are still failed
5240 if (--mpb
->num_raid_devs
== 0) {
5243 for (d
= super
->disks
; d
; d
= d
->next
)
5244 if (d
->index
> -2) {
5246 d
->disk
.status
= SPARE_DISK
;
5250 super
->updates_pending
++;
5255 static int update_subarray_imsm(struct supertype
*st
, char *subarray
,
5256 char *update
, struct mddev_ident
*ident
)
5258 /* update the subarray currently referenced by ->current_vol */
5259 struct intel_super
*super
= st
->sb
;
5260 struct imsm_super
*mpb
= super
->anchor
;
5262 if (strcmp(update
, "name") == 0) {
5263 char *name
= ident
->name
;
5267 if (is_subarray_active(subarray
, st
->devname
)) {
5269 Name
": Unable to update name of active subarray\n");
5273 if (!check_name(super
, name
, 0))
5276 vol
= strtoul(subarray
, &ep
, 10);
5277 if (*ep
!= '\0' || vol
>= super
->anchor
->num_raid_devs
)
5280 if (st
->update_tail
) {
5281 struct imsm_update_rename_array
*u
= malloc(sizeof(*u
));
5285 u
->type
= update_rename_array
;
5287 snprintf((char *) u
->name
, MAX_RAID_SERIAL_LEN
, "%s", name
);
5288 append_metadata_update(st
, u
, sizeof(*u
));
5290 struct imsm_dev
*dev
;
5293 dev
= get_imsm_dev(super
, vol
);
5294 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
5295 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5296 dev
= get_imsm_dev(super
, i
);
5297 handle_missing(super
, dev
);
5299 super
->updates_pending
++;
5307 static int is_gen_migration(struct imsm_dev
*dev
)
5309 if (!dev
->vol
.migr_state
)
5312 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5317 #endif /* MDASSEMBLE */
5319 static int is_rebuilding(struct imsm_dev
*dev
)
5321 struct imsm_map
*migr_map
;
5323 if (!dev
->vol
.migr_state
)
5326 if (migr_type(dev
) != MIGR_REBUILD
)
5329 migr_map
= get_imsm_map(dev
, 1);
5331 if (migr_map
->map_state
== IMSM_T_STATE_DEGRADED
)
5337 static void update_recovery_start(struct intel_super
*super
,
5338 struct imsm_dev
*dev
,
5339 struct mdinfo
*array
)
5341 struct mdinfo
*rebuild
= NULL
;
5345 if (!is_rebuilding(dev
))
5348 /* Find the rebuild target, but punt on the dual rebuild case */
5349 for (d
= array
->devs
; d
; d
= d
->next
)
5350 if (d
->recovery_start
== 0) {
5357 /* (?) none of the disks are marked with
5358 * IMSM_ORD_REBUILD, so assume they are missing and the
5359 * disk_ord_tbl was not correctly updated
5361 dprintf("%s: failed to locate out-of-sync disk\n", __func__
);
5365 units
= __le32_to_cpu(dev
->vol
.curr_migr_unit
);
5366 rebuild
->recovery_start
= units
* blocks_per_migr_unit(super
, dev
);
5369 static int recover_backup_imsm(struct supertype
*st
, struct mdinfo
*info
);
5371 static struct mdinfo
*container_content_imsm(struct supertype
*st
, char *subarray
)
5373 /* Given a container loaded by load_super_imsm_all,
5374 * extract information about all the arrays into
5376 * If 'subarray' is given, just extract info about that array.
5378 * For each imsm_dev create an mdinfo, fill it in,
5379 * then look for matching devices in super->disks
5380 * and create appropriate device mdinfo.
5382 struct intel_super
*super
= st
->sb
;
5383 struct imsm_super
*mpb
= super
->anchor
;
5384 struct mdinfo
*rest
= NULL
;
5388 int spare_disks
= 0;
5390 /* check for bad blocks */
5391 if (imsm_bbm_log_size(super
->anchor
))
5394 /* count spare devices, not used in maps
5396 for (d
= super
->disks
; d
; d
= d
->next
)
5400 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5401 struct imsm_dev
*dev
;
5402 struct imsm_map
*map
;
5403 struct imsm_map
*map2
;
5404 struct mdinfo
*this;
5409 (i
!= strtoul(subarray
, &ep
, 10) || *ep
!= '\0'))
5412 dev
= get_imsm_dev(super
, i
);
5413 map
= get_imsm_map(dev
, 0);
5414 map2
= get_imsm_map(dev
, 1);
5416 /* do not publish arrays that are in the middle of an
5417 * unsupported migration
5419 if (dev
->vol
.migr_state
&&
5420 (migr_type(dev
) == MIGR_STATE_CHANGE
)) {
5421 fprintf(stderr
, Name
": cannot assemble volume '%.16s':"
5422 " unsupported migration in progress\n",
5426 /* do not publish arrays that are not support by controller's
5430 chunk
= __le16_to_cpu(map
->blocks_per_strip
) >> 1;
5432 if (!validate_geometry_imsm_orom(super
,
5433 get_imsm_raid_level(map
), /* RAID level */
5434 imsm_level_to_layout(get_imsm_raid_level(map
)),
5435 map
->num_members
, /* raid disks */
5438 fprintf(stderr
, Name
": RAID gemetry validation failed. "
5439 "Cannot proceed with the action(s).\n");
5442 #endif /* MDASSEMBLE */
5443 this = malloc(sizeof(*this));
5445 fprintf(stderr
, Name
": failed to allocate %zu bytes\n",
5449 memset(this, 0, sizeof(*this));
5452 super
->current_vol
= i
;
5453 getinfo_super_imsm_volume(st
, this, NULL
);
5454 for (slot
= 0 ; slot
< map
->num_members
; slot
++) {
5455 unsigned long long recovery_start
;
5456 struct mdinfo
*info_d
;
5463 idx
= get_imsm_disk_idx(dev
, slot
, 0);
5464 ord
= get_imsm_ord_tbl_ent(dev
, slot
, -1);
5465 for (d
= super
->disks
; d
; d
= d
->next
)
5466 if (d
->index
== idx
)
5469 recovery_start
= MaxSector
;
5472 if (d
&& is_failed(&d
->disk
))
5474 if (ord
& IMSM_ORD_REBUILD
)
5478 * if we skip some disks the array will be assmebled degraded;
5479 * reset resync start to avoid a dirty-degraded
5480 * situation when performing the intial sync
5482 * FIXME handle dirty degraded
5484 if ((skip
|| recovery_start
== 0) && !dev
->vol
.dirty
)
5485 this->resync_start
= MaxSector
;
5489 info_d
= calloc(1, sizeof(*info_d
));
5491 fprintf(stderr
, Name
": failed to allocate disk"
5492 " for volume %.16s\n", dev
->volume
);
5493 info_d
= this->devs
;
5495 struct mdinfo
*d
= info_d
->next
;
5504 info_d
->next
= this->devs
;
5505 this->devs
= info_d
;
5507 info_d
->disk
.number
= d
->index
;
5508 info_d
->disk
.major
= d
->major
;
5509 info_d
->disk
.minor
= d
->minor
;
5510 info_d
->disk
.raid_disk
= slot
;
5511 info_d
->recovery_start
= recovery_start
;
5513 if (slot
< map2
->num_members
)
5514 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5516 this->array
.spare_disks
++;
5518 if (slot
< map
->num_members
)
5519 info_d
->disk
.state
= (1 << MD_DISK_ACTIVE
);
5521 this->array
.spare_disks
++;
5523 if (info_d
->recovery_start
== MaxSector
)
5524 this->array
.working_disks
++;
5526 info_d
->events
= __le32_to_cpu(mpb
->generation_num
);
5527 info_d
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
5528 info_d
->component_size
= __le32_to_cpu(map
->blocks_per_member
);
5530 /* now that the disk list is up-to-date fixup recovery_start */
5531 update_recovery_start(super
, dev
, this);
5532 this->array
.spare_disks
+= spare_disks
;
5534 /* check for reshape */
5535 if (this->reshape_active
== 1)
5536 recover_backup_imsm(st
, this);
5541 /* if array has bad blocks, set suitable bit in array status */
5543 rest
->array
.state
|= (1<<MD_SB_BBM_ERRORS
);
5549 static __u8
imsm_check_degraded(struct intel_super
*super
, struct imsm_dev
*dev
, int failed
)
5551 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5554 return map
->map_state
== IMSM_T_STATE_UNINITIALIZED
?
5555 IMSM_T_STATE_UNINITIALIZED
: IMSM_T_STATE_NORMAL
;
5557 switch (get_imsm_raid_level(map
)) {
5559 return IMSM_T_STATE_FAILED
;
5562 if (failed
< map
->num_members
)
5563 return IMSM_T_STATE_DEGRADED
;
5565 return IMSM_T_STATE_FAILED
;
5570 * check to see if any mirrors have failed, otherwise we
5571 * are degraded. Even numbered slots are mirrored on
5575 /* gcc -Os complains that this is unused */
5576 int insync
= insync
;
5578 for (i
= 0; i
< map
->num_members
; i
++) {
5579 __u32 ord
= get_imsm_ord_tbl_ent(dev
, i
, -1);
5580 int idx
= ord_to_idx(ord
);
5581 struct imsm_disk
*disk
;
5583 /* reset the potential in-sync count on even-numbered
5584 * slots. num_copies is always 2 for imsm raid10
5589 disk
= get_imsm_disk(super
, idx
);
5590 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5593 /* no in-sync disks left in this mirror the
5597 return IMSM_T_STATE_FAILED
;
5600 return IMSM_T_STATE_DEGRADED
;
5604 return IMSM_T_STATE_DEGRADED
;
5606 return IMSM_T_STATE_FAILED
;
5612 return map
->map_state
;
5615 static int imsm_count_failed(struct intel_super
*super
, struct imsm_dev
*dev
)
5619 struct imsm_disk
*disk
;
5620 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5621 struct imsm_map
*prev
= get_imsm_map(dev
, dev
->vol
.migr_state
);
5625 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5626 * disks that are being rebuilt. New failures are recorded to
5627 * map[0]. So we look through all the disks we started with and
5628 * see if any failures are still present, or if any new ones
5631 * FIXME add support for online capacity expansion and
5632 * raid-level-migration
5634 for (i
= 0; i
< prev
->num_members
; i
++) {
5635 ord
= __le32_to_cpu(prev
->disk_ord_tbl
[i
]);
5636 ord
|= __le32_to_cpu(map
->disk_ord_tbl
[i
]);
5637 idx
= ord_to_idx(ord
);
5639 disk
= get_imsm_disk(super
, idx
);
5640 if (!disk
|| is_failed(disk
) || ord
& IMSM_ORD_REBUILD
)
5648 static int imsm_open_new(struct supertype
*c
, struct active_array
*a
,
5651 struct intel_super
*super
= c
->sb
;
5652 struct imsm_super
*mpb
= super
->anchor
;
5654 if (atoi(inst
) >= mpb
->num_raid_devs
) {
5655 fprintf(stderr
, "%s: subarry index %d, out of range\n",
5656 __func__
, atoi(inst
));
5660 dprintf("imsm: open_new %s\n", inst
);
5661 a
->info
.container_member
= atoi(inst
);
5665 static int is_resyncing(struct imsm_dev
*dev
)
5667 struct imsm_map
*migr_map
;
5669 if (!dev
->vol
.migr_state
)
5672 if (migr_type(dev
) == MIGR_INIT
||
5673 migr_type(dev
) == MIGR_REPAIR
)
5676 if (migr_type(dev
) == MIGR_GEN_MIGR
)
5679 migr_map
= get_imsm_map(dev
, 1);
5681 if ((migr_map
->map_state
== IMSM_T_STATE_NORMAL
) &&
5682 (dev
->vol
.migr_type
!= MIGR_GEN_MIGR
))
5688 /* return true if we recorded new information */
5689 static int mark_failure(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5693 struct imsm_map
*map
;
5695 /* new failures are always set in map[0] */
5696 map
= get_imsm_map(dev
, 0);
5698 slot
= get_imsm_disk_slot(map
, idx
);
5702 ord
= __le32_to_cpu(map
->disk_ord_tbl
[slot
]);
5703 if (is_failed(disk
) && (ord
& IMSM_ORD_REBUILD
))
5706 disk
->status
|= FAILED_DISK
;
5707 set_imsm_ord_tbl_ent(map
, slot
, idx
| IMSM_ORD_REBUILD
);
5708 if (map
->failed_disk_num
== 0xff)
5709 map
->failed_disk_num
= slot
;
5713 static void mark_missing(struct imsm_dev
*dev
, struct imsm_disk
*disk
, int idx
)
5715 mark_failure(dev
, disk
, idx
);
5717 if (disk
->scsi_id
== __cpu_to_le32(~(__u32
)0))
5720 disk
->scsi_id
= __cpu_to_le32(~(__u32
)0);
5721 memmove(&disk
->serial
[0], &disk
->serial
[1], MAX_RAID_SERIAL_LEN
- 1);
5724 static void handle_missing(struct intel_super
*super
, struct imsm_dev
*dev
)
5730 if (!super
->missing
)
5732 failed
= imsm_count_failed(super
, dev
);
5733 map_state
= imsm_check_degraded(super
, dev
, failed
);
5735 dprintf("imsm: mark missing\n");
5736 end_migration(dev
, map_state
);
5737 for (dl
= super
->missing
; dl
; dl
= dl
->next
)
5738 mark_missing(dev
, &dl
->disk
, dl
->index
);
5739 super
->updates_pending
++;
5742 static unsigned long long imsm_set_array_size(struct imsm_dev
*dev
)
5744 int used_disks
= imsm_num_data_members(dev
, 0);
5745 unsigned long long array_blocks
;
5746 struct imsm_map
*map
;
5748 if (used_disks
== 0) {
5749 /* when problems occures
5750 * return current array_blocks value
5752 array_blocks
= __le32_to_cpu(dev
->size_high
);
5753 array_blocks
= array_blocks
<< 32;
5754 array_blocks
+= __le32_to_cpu(dev
->size_low
);
5756 return array_blocks
;
5759 /* set array size in metadata
5761 map
= get_imsm_map(dev
, 0);
5762 array_blocks
= map
->blocks_per_member
* used_disks
;
5764 /* round array size down to closest MB
5766 array_blocks
= (array_blocks
>> SECT_PER_MB_SHIFT
) << SECT_PER_MB_SHIFT
;
5767 dev
->size_low
= __cpu_to_le32((__u32
)array_blocks
);
5768 dev
->size_high
= __cpu_to_le32((__u32
)(array_blocks
>> 32));
5770 return array_blocks
;
5773 static void imsm_set_disk(struct active_array
*a
, int n
, int state
);
5775 static void imsm_progress_container_reshape(struct intel_super
*super
)
5777 /* if no device has a migr_state, but some device has a
5778 * different number of members than the previous device, start
5779 * changing the number of devices in this device to match
5782 struct imsm_super
*mpb
= super
->anchor
;
5783 int prev_disks
= -1;
5787 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
5788 struct imsm_dev
*dev
= get_imsm_dev(super
, i
);
5789 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5790 struct imsm_map
*map2
;
5791 int prev_num_members
;
5793 if (dev
->vol
.migr_state
)
5796 if (prev_disks
== -1)
5797 prev_disks
= map
->num_members
;
5798 if (prev_disks
== map
->num_members
)
5801 /* OK, this array needs to enter reshape mode.
5802 * i.e it needs a migr_state
5805 copy_map_size
= sizeof_imsm_map(map
);
5806 prev_num_members
= map
->num_members
;
5807 map
->num_members
= prev_disks
;
5808 dev
->vol
.migr_state
= 1;
5809 dev
->vol
.curr_migr_unit
= 0;
5810 dev
->vol
.migr_type
= MIGR_GEN_MIGR
;
5811 for (i
= prev_num_members
;
5812 i
< map
->num_members
; i
++)
5813 set_imsm_ord_tbl_ent(map
, i
, i
);
5814 map2
= get_imsm_map(dev
, 1);
5815 /* Copy the current map */
5816 memcpy(map2
, map
, copy_map_size
);
5817 map2
->num_members
= prev_num_members
;
5819 imsm_set_array_size(dev
);
5820 super
->updates_pending
++;
5824 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5825 * states are handled in imsm_set_disk() with one exception, when a
5826 * resync is stopped due to a new failure this routine will set the
5827 * 'degraded' state for the array.
5829 static int imsm_set_array_state(struct active_array
*a
, int consistent
)
5831 int inst
= a
->info
.container_member
;
5832 struct intel_super
*super
= a
->container
->sb
;
5833 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5834 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5835 int failed
= imsm_count_failed(super
, dev
);
5836 __u8 map_state
= imsm_check_degraded(super
, dev
, failed
);
5837 __u32 blocks_per_unit
;
5839 if (dev
->vol
.migr_state
&&
5840 dev
->vol
.migr_type
== MIGR_GEN_MIGR
) {
5841 /* array state change is blocked due to reshape action
5843 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5844 * - finish the reshape (if last_checkpoint is big and action != reshape)
5845 * - update curr_migr_unit
5847 if (a
->curr_action
== reshape
) {
5848 /* still reshaping, maybe update curr_migr_unit */
5849 goto mark_checkpoint
;
5851 if (a
->last_checkpoint
== 0 && a
->prev_action
== reshape
) {
5852 /* for some reason we aborted the reshape.
5855 struct imsm_map
*map2
= get_imsm_map(dev
, 1);
5856 dev
->vol
.migr_state
= 0;
5857 dev
->vol
.migr_type
= 0;
5858 dev
->vol
.curr_migr_unit
= 0;
5859 memcpy(map
, map2
, sizeof_imsm_map(map2
));
5860 super
->updates_pending
++;
5862 if (a
->last_checkpoint
>= a
->info
.component_size
) {
5863 unsigned long long array_blocks
;
5867 used_disks
= imsm_num_data_members(dev
, 0);
5868 if (used_disks
> 0) {
5870 map
->blocks_per_member
*
5872 /* round array size down to closest MB
5874 array_blocks
= (array_blocks
5875 >> SECT_PER_MB_SHIFT
)
5876 << SECT_PER_MB_SHIFT
;
5877 a
->info
.custom_array_size
= array_blocks
;
5878 /* encourage manager to update array
5882 a
->check_reshape
= 1;
5884 /* finalize online capacity expansion/reshape */
5885 for (mdi
= a
->info
.devs
; mdi
; mdi
= mdi
->next
)
5887 mdi
->disk
.raid_disk
,
5890 imsm_progress_container_reshape(super
);
5895 /* before we activate this array handle any missing disks */
5896 if (consistent
== 2)
5897 handle_missing(super
, dev
);
5899 if (consistent
== 2 &&
5900 (!is_resync_complete(&a
->info
) ||
5901 map_state
!= IMSM_T_STATE_NORMAL
||
5902 dev
->vol
.migr_state
))
5905 if (is_resync_complete(&a
->info
)) {
5906 /* complete intialization / resync,
5907 * recovery and interrupted recovery is completed in
5910 if (is_resyncing(dev
)) {
5911 dprintf("imsm: mark resync done\n");
5912 end_migration(dev
, map_state
);
5913 super
->updates_pending
++;
5914 a
->last_checkpoint
= 0;
5916 } else if (!is_resyncing(dev
) && !failed
) {
5917 /* mark the start of the init process if nothing is failed */
5918 dprintf("imsm: mark resync start\n");
5919 if (map
->map_state
== IMSM_T_STATE_UNINITIALIZED
)
5920 migrate(dev
, super
, IMSM_T_STATE_NORMAL
, MIGR_INIT
);
5922 migrate(dev
, super
, IMSM_T_STATE_NORMAL
, MIGR_REPAIR
);
5923 super
->updates_pending
++;
5927 /* skip checkpointing for general migration,
5928 * it is controlled in mdadm
5930 if (is_gen_migration(dev
))
5931 goto skip_mark_checkpoint
;
5933 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5934 blocks_per_unit
= blocks_per_migr_unit(super
, dev
);
5935 if (blocks_per_unit
) {
5939 units
= a
->last_checkpoint
/ blocks_per_unit
;
5942 /* check that we did not overflow 32-bits, and that
5943 * curr_migr_unit needs updating
5945 if (units32
== units
&&
5947 __le32_to_cpu(dev
->vol
.curr_migr_unit
) != units32
) {
5948 dprintf("imsm: mark checkpoint (%u)\n", units32
);
5949 dev
->vol
.curr_migr_unit
= __cpu_to_le32(units32
);
5950 super
->updates_pending
++;
5954 skip_mark_checkpoint
:
5955 /* mark dirty / clean */
5956 if (dev
->vol
.dirty
!= !consistent
) {
5957 dprintf("imsm: mark '%s'\n", consistent
? "clean" : "dirty");
5962 super
->updates_pending
++;
5968 static void imsm_set_disk(struct active_array
*a
, int n
, int state
)
5970 int inst
= a
->info
.container_member
;
5971 struct intel_super
*super
= a
->container
->sb
;
5972 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
5973 struct imsm_map
*map
= get_imsm_map(dev
, 0);
5974 struct imsm_disk
*disk
;
5979 if (n
> map
->num_members
)
5980 fprintf(stderr
, "imsm: set_disk %d out of range 0..%d\n",
5981 n
, map
->num_members
- 1);
5986 dprintf("imsm: set_disk %d:%x\n", n
, state
);
5988 ord
= get_imsm_ord_tbl_ent(dev
, n
, -1);
5989 disk
= get_imsm_disk(super
, ord_to_idx(ord
));
5991 /* check for new failures */
5992 if (state
& DS_FAULTY
) {
5993 if (mark_failure(dev
, disk
, ord_to_idx(ord
)))
5994 super
->updates_pending
++;
5997 /* check if in_sync */
5998 if (state
& DS_INSYNC
&& ord
& IMSM_ORD_REBUILD
&& is_rebuilding(dev
)) {
5999 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
6001 set_imsm_ord_tbl_ent(migr_map
, n
, ord_to_idx(ord
));
6002 super
->updates_pending
++;
6005 failed
= imsm_count_failed(super
, dev
);
6006 map_state
= imsm_check_degraded(super
, dev
, failed
);
6008 /* check if recovery complete, newly degraded, or failed */
6009 if (map_state
== IMSM_T_STATE_NORMAL
&& is_rebuilding(dev
)) {
6010 end_migration(dev
, map_state
);
6011 map
= get_imsm_map(dev
, 0);
6012 map
->failed_disk_num
= ~0;
6013 super
->updates_pending
++;
6014 a
->last_checkpoint
= 0;
6015 } else if (map_state
== IMSM_T_STATE_DEGRADED
&&
6016 map
->map_state
!= map_state
&&
6017 !dev
->vol
.migr_state
) {
6018 dprintf("imsm: mark degraded\n");
6019 map
->map_state
= map_state
;
6020 super
->updates_pending
++;
6021 a
->last_checkpoint
= 0;
6022 } else if (map_state
== IMSM_T_STATE_FAILED
&&
6023 map
->map_state
!= map_state
) {
6024 dprintf("imsm: mark failed\n");
6025 end_migration(dev
, map_state
);
6026 super
->updates_pending
++;
6027 a
->last_checkpoint
= 0;
6028 } else if (is_gen_migration(dev
)) {
6029 dprintf("imsm: Detected General Migration in state: ");
6030 if (map_state
== IMSM_T_STATE_NORMAL
) {
6031 end_migration(dev
, map_state
);
6032 map
= get_imsm_map(dev
, 0);
6033 map
->failed_disk_num
= ~0;
6034 dprintf("normal\n");
6036 if (map_state
== IMSM_T_STATE_DEGRADED
) {
6037 printf("degraded\n");
6038 end_migration(dev
, map_state
);
6040 dprintf("failed\n");
6042 map
->map_state
= map_state
;
6044 super
->updates_pending
++;
6048 static int store_imsm_mpb(int fd
, struct imsm_super
*mpb
)
6051 __u32 mpb_size
= __le32_to_cpu(mpb
->mpb_size
);
6052 unsigned long long dsize
;
6053 unsigned long long sectors
;
6055 get_dev_size(fd
, NULL
, &dsize
);
6057 if (mpb_size
> 512) {
6058 /* -1 to account for anchor */
6059 sectors
= mpb_sectors(mpb
) - 1;
6061 /* write the extended mpb to the sectors preceeding the anchor */
6062 if (lseek64(fd
, dsize
- (512 * (2 + sectors
)), SEEK_SET
) < 0)
6065 if ((unsigned long long)write(fd
, buf
+ 512, 512 * sectors
)
6070 /* first block is stored on second to last sector of the disk */
6071 if (lseek64(fd
, dsize
- (512 * 2), SEEK_SET
) < 0)
6074 if (write(fd
, buf
, 512) != 512)
6080 static void imsm_sync_metadata(struct supertype
*container
)
6082 struct intel_super
*super
= container
->sb
;
6084 dprintf("sync metadata: %d\n", super
->updates_pending
);
6085 if (!super
->updates_pending
)
6088 write_super_imsm(container
, 0);
6090 super
->updates_pending
= 0;
6093 static struct dl
*imsm_readd(struct intel_super
*super
, int idx
, struct active_array
*a
)
6095 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
6096 int i
= get_imsm_disk_idx(dev
, idx
, -1);
6099 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6103 if (dl
&& is_failed(&dl
->disk
))
6107 dprintf("%s: found %x:%x\n", __func__
, dl
->major
, dl
->minor
);
6112 static struct dl
*imsm_add_spare(struct intel_super
*super
, int slot
,
6113 struct active_array
*a
, int activate_new
,
6114 struct mdinfo
*additional_test_list
)
6116 struct imsm_dev
*dev
= get_imsm_dev(super
, a
->info
.container_member
);
6117 int idx
= get_imsm_disk_idx(dev
, slot
, -1);
6118 struct imsm_super
*mpb
= super
->anchor
;
6119 struct imsm_map
*map
;
6120 unsigned long long pos
;
6125 __u32 array_start
= 0;
6126 __u32 array_end
= 0;
6128 struct mdinfo
*test_list
;
6130 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
6131 /* If in this array, skip */
6132 for (d
= a
->info
.devs
; d
; d
= d
->next
)
6133 if (d
->state_fd
>= 0 &&
6134 d
->disk
.major
== dl
->major
&&
6135 d
->disk
.minor
== dl
->minor
) {
6136 dprintf("%x:%x already in array\n",
6137 dl
->major
, dl
->minor
);
6142 test_list
= additional_test_list
;
6144 if (test_list
->disk
.major
== dl
->major
&&
6145 test_list
->disk
.minor
== dl
->minor
) {
6146 dprintf("%x:%x already in additional test list\n",
6147 dl
->major
, dl
->minor
);
6150 test_list
= test_list
->next
;
6155 /* skip in use or failed drives */
6156 if (is_failed(&dl
->disk
) || idx
== dl
->index
||
6158 dprintf("%x:%x status (failed: %d index: %d)\n",
6159 dl
->major
, dl
->minor
, is_failed(&dl
->disk
), idx
);
6163 /* skip pure spares when we are looking for partially
6164 * assimilated drives
6166 if (dl
->index
== -1 && !activate_new
)
6169 /* Does this unused device have the requisite free space?
6170 * It needs to be able to cover all member volumes
6172 ex
= get_extents(super
, dl
);
6174 dprintf("cannot get extents\n");
6177 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
6178 dev
= get_imsm_dev(super
, i
);
6179 map
= get_imsm_map(dev
, 0);
6181 /* check if this disk is already a member of
6184 if (get_imsm_disk_slot(map
, dl
->index
) >= 0)
6190 array_start
= __le32_to_cpu(map
->pba_of_lba0
);
6191 array_end
= array_start
+
6192 __le32_to_cpu(map
->blocks_per_member
) - 1;
6195 /* check that we can start at pba_of_lba0 with
6196 * blocks_per_member of space
6198 if (array_start
>= pos
&& array_end
< ex
[j
].start
) {
6202 pos
= ex
[j
].start
+ ex
[j
].size
;
6204 } while (ex
[j
-1].size
);
6211 if (i
< mpb
->num_raid_devs
) {
6212 dprintf("%x:%x does not have %u to %u available\n",
6213 dl
->major
, dl
->minor
, array_start
, array_end
);
6224 static int imsm_rebuild_allowed(struct supertype
*cont
, int dev_idx
, int failed
)
6226 struct imsm_dev
*dev2
;
6227 struct imsm_map
*map
;
6233 dev2
= get_imsm_dev(cont
->sb
, dev_idx
);
6235 state
= imsm_check_degraded(cont
->sb
, dev2
, failed
);
6236 if (state
== IMSM_T_STATE_FAILED
) {
6237 map
= get_imsm_map(dev2
, 0);
6240 for (slot
= 0; slot
< map
->num_members
; slot
++) {
6242 * Check if failed disks are deleted from intel
6243 * disk list or are marked to be deleted
6245 idx
= get_imsm_disk_idx(dev2
, slot
, -1);
6246 idisk
= get_imsm_dl_disk(cont
->sb
, idx
);
6248 * Do not rebuild the array if failed disks
6249 * from failed sub-array are not removed from
6253 is_failed(&idisk
->disk
) &&
6254 (idisk
->action
!= DISK_REMOVE
))
6262 static struct mdinfo
*imsm_activate_spare(struct active_array
*a
,
6263 struct metadata_update
**updates
)
6266 * Find a device with unused free space and use it to replace a
6267 * failed/vacant region in an array. We replace failed regions one a
6268 * array at a time. The result is that a new spare disk will be added
6269 * to the first failed array and after the monitor has finished
6270 * propagating failures the remainder will be consumed.
6272 * FIXME add a capability for mdmon to request spares from another
6276 struct intel_super
*super
= a
->container
->sb
;
6277 int inst
= a
->info
.container_member
;
6278 struct imsm_dev
*dev
= get_imsm_dev(super
, inst
);
6279 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6280 int failed
= a
->info
.array
.raid_disks
;
6281 struct mdinfo
*rv
= NULL
;
6284 struct metadata_update
*mu
;
6286 struct imsm_update_activate_spare
*u
;
6291 for (d
= a
->info
.devs
; d
; d
= d
->next
) {
6292 if ((d
->curr_state
& DS_FAULTY
) &&
6294 /* wait for Removal to happen */
6296 if (d
->state_fd
>= 0)
6300 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6301 inst
, failed
, a
->info
.array
.raid_disks
, a
->info
.array
.level
);
6303 if (dev
->vol
.migr_state
&&
6304 dev
->vol
.migr_type
== MIGR_GEN_MIGR
)
6305 /* No repair during migration */
6308 if (a
->info
.array
.level
== 4)
6309 /* No repair for takeovered array
6310 * imsm doesn't support raid4
6314 if (imsm_check_degraded(super
, dev
, failed
) != IMSM_T_STATE_DEGRADED
)
6318 * If there are any failed disks check state of the other volume.
6319 * Block rebuild if the another one is failed until failed disks
6320 * are removed from container.
6323 dprintf("found failed disks in %s, check if there another"
6324 "failed sub-array.\n",
6326 /* check if states of the other volumes allow for rebuild */
6327 for (i
= 0; i
< super
->anchor
->num_raid_devs
; i
++) {
6329 allowed
= imsm_rebuild_allowed(a
->container
,
6337 /* For each slot, if it is not working, find a spare */
6338 for (i
= 0; i
< a
->info
.array
.raid_disks
; i
++) {
6339 for (d
= a
->info
.devs
; d
; d
= d
->next
)
6340 if (d
->disk
.raid_disk
== i
)
6342 dprintf("found %d: %p %x\n", i
, d
, d
?d
->curr_state
:0);
6343 if (d
&& (d
->state_fd
>= 0))
6347 * OK, this device needs recovery. Try to re-add the
6348 * previous occupant of this slot, if this fails see if
6349 * we can continue the assimilation of a spare that was
6350 * partially assimilated, finally try to activate a new
6353 dl
= imsm_readd(super
, i
, a
);
6355 dl
= imsm_add_spare(super
, i
, a
, 0, NULL
);
6357 dl
= imsm_add_spare(super
, i
, a
, 1, NULL
);
6361 /* found a usable disk with enough space */
6362 di
= malloc(sizeof(*di
));
6365 memset(di
, 0, sizeof(*di
));
6367 /* dl->index will be -1 in the case we are activating a
6368 * pristine spare. imsm_process_update() will create a
6369 * new index in this case. Once a disk is found to be
6370 * failed in all member arrays it is kicked from the
6373 di
->disk
.number
= dl
->index
;
6375 /* (ab)use di->devs to store a pointer to the device
6378 di
->devs
= (struct mdinfo
*) dl
;
6380 di
->disk
.raid_disk
= i
;
6381 di
->disk
.major
= dl
->major
;
6382 di
->disk
.minor
= dl
->minor
;
6384 di
->recovery_start
= 0;
6385 di
->data_offset
= __le32_to_cpu(map
->pba_of_lba0
);
6386 di
->component_size
= a
->info
.component_size
;
6387 di
->container_member
= inst
;
6388 super
->random
= random32();
6392 dprintf("%x:%x to be %d at %llu\n", dl
->major
, dl
->minor
,
6393 i
, di
->data_offset
);
6399 /* No spares found */
6401 /* Now 'rv' has a list of devices to return.
6402 * Create a metadata_update record to update the
6403 * disk_ord_tbl for the array
6405 mu
= malloc(sizeof(*mu
));
6407 mu
->buf
= malloc(sizeof(struct imsm_update_activate_spare
) * num_spares
);
6408 if (mu
->buf
== NULL
) {
6415 struct mdinfo
*n
= rv
->next
;
6424 mu
->space_list
= NULL
;
6425 mu
->len
= sizeof(struct imsm_update_activate_spare
) * num_spares
;
6426 mu
->next
= *updates
;
6427 u
= (struct imsm_update_activate_spare
*) mu
->buf
;
6429 for (di
= rv
; di
; di
= di
->next
) {
6430 u
->type
= update_activate_spare
;
6431 u
->dl
= (struct dl
*) di
->devs
;
6433 u
->slot
= di
->disk
.raid_disk
;
6444 static int disks_overlap(struct intel_super
*super
, int idx
, struct imsm_update_create_array
*u
)
6446 struct imsm_dev
*dev
= get_imsm_dev(super
, idx
);
6447 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6448 struct imsm_map
*new_map
= get_imsm_map(&u
->dev
, 0);
6449 struct disk_info
*inf
= get_disk_info(u
);
6450 struct imsm_disk
*disk
;
6454 for (i
= 0; i
< map
->num_members
; i
++) {
6455 disk
= get_imsm_disk(super
, get_imsm_disk_idx(dev
, i
, -1));
6456 for (j
= 0; j
< new_map
->num_members
; j
++)
6457 if (serialcmp(disk
->serial
, inf
[j
].serial
) == 0)
6465 static struct dl
*get_disk_super(struct intel_super
*super
, int major
, int minor
)
6467 struct dl
*dl
= NULL
;
6468 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6469 if ((dl
->major
== major
) && (dl
->minor
== minor
))
6474 static int remove_disk_super(struct intel_super
*super
, int major
, int minor
)
6476 struct dl
*prev
= NULL
;
6480 for (dl
= super
->disks
; dl
; dl
= dl
->next
) {
6481 if ((dl
->major
== major
) && (dl
->minor
== minor
)) {
6484 prev
->next
= dl
->next
;
6486 super
->disks
= dl
->next
;
6488 __free_imsm_disk(dl
);
6489 dprintf("%s: removed %x:%x\n",
6490 __func__
, major
, minor
);
6498 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
);
6500 static int add_remove_disk_update(struct intel_super
*super
)
6502 int check_degraded
= 0;
6503 struct dl
*disk
= NULL
;
6504 /* add/remove some spares to/from the metadata/contrainer */
6505 while (super
->disk_mgmt_list
) {
6506 struct dl
*disk_cfg
;
6508 disk_cfg
= super
->disk_mgmt_list
;
6509 super
->disk_mgmt_list
= disk_cfg
->next
;
6510 disk_cfg
->next
= NULL
;
6512 if (disk_cfg
->action
== DISK_ADD
) {
6513 disk_cfg
->next
= super
->disks
;
6514 super
->disks
= disk_cfg
;
6516 dprintf("%s: added %x:%x\n",
6517 __func__
, disk_cfg
->major
,
6519 } else if (disk_cfg
->action
== DISK_REMOVE
) {
6520 dprintf("Disk remove action processed: %x.%x\n",
6521 disk_cfg
->major
, disk_cfg
->minor
);
6522 disk
= get_disk_super(super
,
6526 /* store action status */
6527 disk
->action
= DISK_REMOVE
;
6528 /* remove spare disks only */
6529 if (disk
->index
== -1) {
6530 remove_disk_super(super
,
6535 /* release allocate disk structure */
6536 __free_imsm_disk(disk_cfg
);
6539 return check_degraded
;
6543 static int apply_reshape_migration_update(struct imsm_update_reshape_migration
*u
,
6544 struct intel_super
*super
,
6547 struct intel_dev
*id
;
6548 void **tofree
= NULL
;
6551 dprintf("apply_reshape_migration_update()\n");
6552 if ((u
->subdev
< 0) ||
6554 dprintf("imsm: Error: Wrong subdev: %i\n", u
->subdev
);
6557 if ((space_list
== NULL
) || (*space_list
== NULL
)) {
6558 dprintf("imsm: Error: Memory is not allocated\n");
6562 for (id
= super
->devlist
; id
; id
= id
->next
) {
6563 if (id
->index
== (unsigned)u
->subdev
) {
6564 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subdev
);
6565 struct imsm_map
*map
;
6566 struct imsm_dev
*new_dev
=
6567 (struct imsm_dev
*)*space_list
;
6568 struct imsm_map
*migr_map
= get_imsm_map(dev
, 1);
6570 struct dl
*new_disk
;
6572 if (new_dev
== NULL
)
6574 *space_list
= **space_list
;
6575 memcpy(new_dev
, dev
, sizeof_imsm_dev(dev
, 0));
6576 map
= get_imsm_map(new_dev
, 0);
6578 dprintf("imsm: Error: migration in progress");
6582 to_state
= map
->map_state
;
6583 if ((u
->new_level
== 5) && (map
->raid_level
== 0)) {
6585 /* this should not happen */
6586 if (u
->new_disks
[0] < 0) {
6587 map
->failed_disk_num
=
6588 map
->num_members
- 1;
6589 to_state
= IMSM_T_STATE_DEGRADED
;
6591 to_state
= IMSM_T_STATE_NORMAL
;
6593 migrate(new_dev
, super
, to_state
, MIGR_GEN_MIGR
);
6594 if (u
->new_level
> -1)
6595 map
->raid_level
= u
->new_level
;
6596 migr_map
= get_imsm_map(new_dev
, 1);
6597 if ((u
->new_level
== 5) &&
6598 (migr_map
->raid_level
== 0)) {
6599 int ord
= map
->num_members
- 1;
6600 migr_map
->num_members
--;
6601 if (u
->new_disks
[0] < 0)
6602 ord
|= IMSM_ORD_REBUILD
;
6603 set_imsm_ord_tbl_ent(map
,
6604 map
->num_members
- 1,
6608 tofree
= (void **)dev
;
6610 /* update chunk size
6612 if (u
->new_chunksize
> 0)
6613 map
->blocks_per_strip
=
6614 __cpu_to_le16(u
->new_chunksize
* 2);
6618 if ((u
->new_level
!= 5) ||
6619 (migr_map
->raid_level
!= 0) ||
6620 (migr_map
->raid_level
== map
->raid_level
))
6623 if (u
->new_disks
[0] >= 0) {
6626 new_disk
= get_disk_super(super
,
6627 major(u
->new_disks
[0]),
6628 minor(u
->new_disks
[0]));
6629 dprintf("imsm: new disk for reshape is: %i:%i "
6630 "(%p, index = %i)\n",
6631 major(u
->new_disks
[0]),
6632 minor(u
->new_disks
[0]),
6633 new_disk
, new_disk
->index
);
6634 if (new_disk
== NULL
)
6635 goto error_disk_add
;
6637 new_disk
->index
= map
->num_members
- 1;
6638 /* slot to fill in autolayout
6640 new_disk
->raiddisk
= new_disk
->index
;
6641 new_disk
->disk
.status
|= CONFIGURED_DISK
;
6642 new_disk
->disk
.status
&= ~SPARE_DISK
;
6644 goto error_disk_add
;
6647 *tofree
= *space_list
;
6648 /* calculate new size
6650 imsm_set_array_size(new_dev
);
6657 *space_list
= tofree
;
6661 dprintf("Error: imsm: Cannot find disk.\n");
6666 static int apply_reshape_container_disks_update(struct imsm_update_reshape
*u
,
6667 struct intel_super
*super
,
6670 struct dl
*new_disk
;
6671 struct intel_dev
*id
;
6673 int delta_disks
= u
->new_raid_disks
- u
->old_raid_disks
;
6674 int disk_count
= u
->old_raid_disks
;
6675 void **tofree
= NULL
;
6676 int devices_to_reshape
= 1;
6677 struct imsm_super
*mpb
= super
->anchor
;
6679 unsigned int dev_id
;
6681 dprintf("imsm: apply_reshape_container_disks_update()\n");
6683 /* enable spares to use in array */
6684 for (i
= 0; i
< delta_disks
; i
++) {
6685 new_disk
= get_disk_super(super
,
6686 major(u
->new_disks
[i
]),
6687 minor(u
->new_disks
[i
]));
6688 dprintf("imsm: new disk for reshape is: %i:%i "
6689 "(%p, index = %i)\n",
6690 major(u
->new_disks
[i
]), minor(u
->new_disks
[i
]),
6691 new_disk
, new_disk
->index
);
6692 if ((new_disk
== NULL
) ||
6693 ((new_disk
->index
>= 0) &&
6694 (new_disk
->index
< u
->old_raid_disks
)))
6695 goto update_reshape_exit
;
6696 new_disk
->index
= disk_count
++;
6697 /* slot to fill in autolayout
6699 new_disk
->raiddisk
= new_disk
->index
;
6700 new_disk
->disk
.status
|=
6702 new_disk
->disk
.status
&= ~SPARE_DISK
;
6705 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6706 mpb
->num_raid_devs
);
6707 /* manage changes in volume
6709 for (dev_id
= 0; dev_id
< mpb
->num_raid_devs
; dev_id
++) {
6710 void **sp
= *space_list
;
6711 struct imsm_dev
*newdev
;
6712 struct imsm_map
*newmap
, *oldmap
;
6714 for (id
= super
->devlist
; id
; id
= id
->next
) {
6715 if (id
->index
== dev_id
)
6724 /* Copy the dev, but not (all of) the map */
6725 memcpy(newdev
, id
->dev
, sizeof(*newdev
));
6726 oldmap
= get_imsm_map(id
->dev
, 0);
6727 newmap
= get_imsm_map(newdev
, 0);
6728 /* Copy the current map */
6729 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6730 /* update one device only
6732 if (devices_to_reshape
) {
6733 dprintf("imsm: modifying subdev: %i\n",
6735 devices_to_reshape
--;
6736 newdev
->vol
.migr_state
= 1;
6737 newdev
->vol
.curr_migr_unit
= 0;
6738 newdev
->vol
.migr_type
= MIGR_GEN_MIGR
;
6739 newmap
->num_members
= u
->new_raid_disks
;
6740 for (i
= 0; i
< delta_disks
; i
++) {
6741 set_imsm_ord_tbl_ent(newmap
,
6742 u
->old_raid_disks
+ i
,
6743 u
->old_raid_disks
+ i
);
6745 /* New map is correct, now need to save old map
6747 newmap
= get_imsm_map(newdev
, 1);
6748 memcpy(newmap
, oldmap
, sizeof_imsm_map(oldmap
));
6750 imsm_set_array_size(newdev
);
6753 sp
= (void **)id
->dev
;
6758 /* Clear migration record */
6759 memset(super
->migr_rec
, 0, sizeof(struct migr_record
));
6762 *space_list
= tofree
;
6765 update_reshape_exit
:
6770 static int apply_takeover_update(struct imsm_update_takeover
*u
,
6771 struct intel_super
*super
,
6774 struct imsm_dev
*dev
= NULL
;
6775 struct intel_dev
*dv
;
6776 struct imsm_dev
*dev_new
;
6777 struct imsm_map
*map
;
6781 for (dv
= super
->devlist
; dv
; dv
= dv
->next
)
6782 if (dv
->index
== (unsigned int)u
->subarray
) {
6790 map
= get_imsm_map(dev
, 0);
6792 if (u
->direction
== R10_TO_R0
) {
6793 /* Number of failed disks must be half of initial disk number */
6794 if (imsm_count_failed(super
, dev
) != (map
->num_members
/ 2))
6797 /* iterate through devices to mark removed disks as spare */
6798 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6799 if (dm
->disk
.status
& FAILED_DISK
) {
6800 int idx
= dm
->index
;
6801 /* update indexes on the disk list */
6802 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6803 the index values will end up being correct.... NB */
6804 for (du
= super
->disks
; du
; du
= du
->next
)
6805 if (du
->index
> idx
)
6807 /* mark as spare disk */
6808 dm
->disk
.status
= SPARE_DISK
;
6813 map
->num_members
= map
->num_members
/ 2;
6814 map
->map_state
= IMSM_T_STATE_NORMAL
;
6815 map
->num_domains
= 1;
6816 map
->raid_level
= 0;
6817 map
->failed_disk_num
= -1;
6820 if (u
->direction
== R0_TO_R10
) {
6822 /* update slots in current disk list */
6823 for (dm
= super
->disks
; dm
; dm
= dm
->next
) {
6827 /* create new *missing* disks */
6828 for (i
= 0; i
< map
->num_members
; i
++) {
6829 space
= *space_list
;
6832 *space_list
= *space
;
6834 memcpy(du
, super
->disks
, sizeof(*du
));
6838 du
->index
= (i
* 2) + 1;
6839 sprintf((char *)du
->disk
.serial
,
6840 " MISSING_%d", du
->index
);
6841 sprintf((char *)du
->serial
,
6842 "MISSING_%d", du
->index
);
6843 du
->next
= super
->missing
;
6844 super
->missing
= du
;
6846 /* create new dev and map */
6847 space
= *space_list
;
6850 *space_list
= *space
;
6851 dev_new
= (void *)space
;
6852 memcpy(dev_new
, dev
, sizeof(*dev
));
6853 /* update new map */
6854 map
= get_imsm_map(dev_new
, 0);
6855 map
->num_members
= map
->num_members
* 2;
6856 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6857 map
->num_domains
= 2;
6858 map
->raid_level
= 1;
6859 /* replace dev<->dev_new */
6862 /* update disk order table */
6863 for (du
= super
->disks
; du
; du
= du
->next
)
6865 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6866 for (du
= super
->missing
; du
; du
= du
->next
)
6867 if (du
->index
>= 0) {
6868 set_imsm_ord_tbl_ent(map
, du
->index
, du
->index
);
6869 mark_missing(dev_new
, &du
->disk
, du
->index
);
6875 static void imsm_process_update(struct supertype
*st
,
6876 struct metadata_update
*update
)
6879 * crack open the metadata_update envelope to find the update record
6880 * update can be one of:
6881 * update_reshape_container_disks - all the arrays in the container
6882 * are being reshaped to have more devices. We need to mark
6883 * the arrays for general migration and convert selected spares
6884 * into active devices.
6885 * update_activate_spare - a spare device has replaced a failed
6886 * device in an array, update the disk_ord_tbl. If this disk is
6887 * present in all member arrays then also clear the SPARE_DISK
6889 * update_create_array
6891 * update_rename_array
6892 * update_add_remove_disk
6894 struct intel_super
*super
= st
->sb
;
6895 struct imsm_super
*mpb
;
6896 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
6898 /* update requires a larger buf but the allocation failed */
6899 if (super
->next_len
&& !super
->next_buf
) {
6900 super
->next_len
= 0;
6904 if (super
->next_buf
) {
6905 memcpy(super
->next_buf
, super
->buf
, super
->len
);
6907 super
->len
= super
->next_len
;
6908 super
->buf
= super
->next_buf
;
6910 super
->next_len
= 0;
6911 super
->next_buf
= NULL
;
6914 mpb
= super
->anchor
;
6917 case update_takeover
: {
6918 struct imsm_update_takeover
*u
= (void *)update
->buf
;
6919 if (apply_takeover_update(u
, super
, &update
->space_list
)) {
6920 imsm_update_version_info(super
);
6921 super
->updates_pending
++;
6926 case update_reshape_container_disks
: {
6927 struct imsm_update_reshape
*u
= (void *)update
->buf
;
6928 if (apply_reshape_container_disks_update(
6929 u
, super
, &update
->space_list
))
6930 super
->updates_pending
++;
6933 case update_reshape_migration
: {
6934 struct imsm_update_reshape_migration
*u
= (void *)update
->buf
;
6935 if (apply_reshape_migration_update(
6936 u
, super
, &update
->space_list
))
6937 super
->updates_pending
++;
6940 case update_activate_spare
: {
6941 struct imsm_update_activate_spare
*u
= (void *) update
->buf
;
6942 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->array
);
6943 struct imsm_map
*map
= get_imsm_map(dev
, 0);
6944 struct imsm_map
*migr_map
;
6945 struct active_array
*a
;
6946 struct imsm_disk
*disk
;
6951 int victim
= get_imsm_disk_idx(dev
, u
->slot
, -1);
6954 for (dl
= super
->disks
; dl
; dl
= dl
->next
)
6959 fprintf(stderr
, "error: imsm_activate_spare passed "
6960 "an unknown disk (index: %d)\n",
6965 super
->updates_pending
++;
6966 /* count failures (excluding rebuilds and the victim)
6967 * to determine map[0] state
6970 for (i
= 0; i
< map
->num_members
; i
++) {
6973 disk
= get_imsm_disk(super
,
6974 get_imsm_disk_idx(dev
, i
, -1));
6975 if (!disk
|| is_failed(disk
))
6979 /* adding a pristine spare, assign a new index */
6980 if (dl
->index
< 0) {
6981 dl
->index
= super
->anchor
->num_disks
;
6982 super
->anchor
->num_disks
++;
6985 disk
->status
|= CONFIGURED_DISK
;
6986 disk
->status
&= ~SPARE_DISK
;
6989 to_state
= imsm_check_degraded(super
, dev
, failed
);
6990 map
->map_state
= IMSM_T_STATE_DEGRADED
;
6991 migrate(dev
, super
, to_state
, MIGR_REBUILD
);
6992 migr_map
= get_imsm_map(dev
, 1);
6993 set_imsm_ord_tbl_ent(map
, u
->slot
, dl
->index
);
6994 set_imsm_ord_tbl_ent(migr_map
, u
->slot
, dl
->index
| IMSM_ORD_REBUILD
);
6996 /* update the family_num to mark a new container
6997 * generation, being careful to record the existing
6998 * family_num in orig_family_num to clean up after
6999 * earlier mdadm versions that neglected to set it.
7001 if (mpb
->orig_family_num
== 0)
7002 mpb
->orig_family_num
= mpb
->family_num
;
7003 mpb
->family_num
+= super
->random
;
7005 /* count arrays using the victim in the metadata */
7007 for (a
= st
->arrays
; a
; a
= a
->next
) {
7008 dev
= get_imsm_dev(super
, a
->info
.container_member
);
7009 map
= get_imsm_map(dev
, 0);
7011 if (get_imsm_disk_slot(map
, victim
) >= 0)
7015 /* delete the victim if it is no longer being
7021 /* We know that 'manager' isn't touching anything,
7022 * so it is safe to delete
7024 for (dlp
= &super
->disks
; *dlp
; dlp
= &(*dlp
)->next
)
7025 if ((*dlp
)->index
== victim
)
7028 /* victim may be on the missing list */
7030 for (dlp
= &super
->missing
; *dlp
; dlp
= &(*dlp
)->next
)
7031 if ((*dlp
)->index
== victim
)
7033 imsm_delete(super
, dlp
, victim
);
7037 case update_create_array
: {
7038 /* someone wants to create a new array, we need to be aware of
7039 * a few races/collisions:
7040 * 1/ 'Create' called by two separate instances of mdadm
7041 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
7042 * devices that have since been assimilated via
7044 * In the event this update can not be carried out mdadm will
7045 * (FIX ME) notice that its update did not take hold.
7047 struct imsm_update_create_array
*u
= (void *) update
->buf
;
7048 struct intel_dev
*dv
;
7049 struct imsm_dev
*dev
;
7050 struct imsm_map
*map
, *new_map
;
7051 unsigned long long start
, end
;
7052 unsigned long long new_start
, new_end
;
7054 struct disk_info
*inf
;
7057 /* handle racing creates: first come first serve */
7058 if (u
->dev_idx
< mpb
->num_raid_devs
) {
7059 dprintf("%s: subarray %d already defined\n",
7060 __func__
, u
->dev_idx
);
7064 /* check update is next in sequence */
7065 if (u
->dev_idx
!= mpb
->num_raid_devs
) {
7066 dprintf("%s: can not create array %d expected index %d\n",
7067 __func__
, u
->dev_idx
, mpb
->num_raid_devs
);
7071 new_map
= get_imsm_map(&u
->dev
, 0);
7072 new_start
= __le32_to_cpu(new_map
->pba_of_lba0
);
7073 new_end
= new_start
+ __le32_to_cpu(new_map
->blocks_per_member
);
7074 inf
= get_disk_info(u
);
7076 /* handle activate_spare versus create race:
7077 * check to make sure that overlapping arrays do not include
7080 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
7081 dev
= get_imsm_dev(super
, i
);
7082 map
= get_imsm_map(dev
, 0);
7083 start
= __le32_to_cpu(map
->pba_of_lba0
);
7084 end
= start
+ __le32_to_cpu(map
->blocks_per_member
);
7085 if ((new_start
>= start
&& new_start
<= end
) ||
7086 (start
>= new_start
&& start
<= new_end
))
7091 if (disks_overlap(super
, i
, u
)) {
7092 dprintf("%s: arrays overlap\n", __func__
);
7097 /* check that prepare update was successful */
7098 if (!update
->space
) {
7099 dprintf("%s: prepare update failed\n", __func__
);
7103 /* check that all disks are still active before committing
7104 * changes. FIXME: could we instead handle this by creating a
7105 * degraded array? That's probably not what the user expects,
7106 * so better to drop this update on the floor.
7108 for (i
= 0; i
< new_map
->num_members
; i
++) {
7109 dl
= serial_to_dl(inf
[i
].serial
, super
);
7111 dprintf("%s: disk disappeared\n", __func__
);
7116 super
->updates_pending
++;
7118 /* convert spares to members and fixup ord_tbl */
7119 for (i
= 0; i
< new_map
->num_members
; i
++) {
7120 dl
= serial_to_dl(inf
[i
].serial
, super
);
7121 if (dl
->index
== -1) {
7122 dl
->index
= mpb
->num_disks
;
7124 dl
->disk
.status
|= CONFIGURED_DISK
;
7125 dl
->disk
.status
&= ~SPARE_DISK
;
7127 set_imsm_ord_tbl_ent(new_map
, i
, dl
->index
);
7132 update
->space
= NULL
;
7133 imsm_copy_dev(dev
, &u
->dev
);
7134 dv
->index
= u
->dev_idx
;
7135 dv
->next
= super
->devlist
;
7136 super
->devlist
= dv
;
7137 mpb
->num_raid_devs
++;
7139 imsm_update_version_info(super
);
7142 /* mdmon knows how to release update->space, but not
7143 * ((struct intel_dev *) update->space)->dev
7145 if (update
->space
) {
7151 case update_kill_array
: {
7152 struct imsm_update_kill_array
*u
= (void *) update
->buf
;
7153 int victim
= u
->dev_idx
;
7154 struct active_array
*a
;
7155 struct intel_dev
**dp
;
7156 struct imsm_dev
*dev
;
7158 /* sanity check that we are not affecting the uuid of
7159 * active arrays, or deleting an active array
7161 * FIXME when immutable ids are available, but note that
7162 * we'll also need to fixup the invalidated/active
7163 * subarray indexes in mdstat
7165 for (a
= st
->arrays
; a
; a
= a
->next
)
7166 if (a
->info
.container_member
>= victim
)
7168 /* by definition if mdmon is running at least one array
7169 * is active in the container, so checking
7170 * mpb->num_raid_devs is just extra paranoia
7172 dev
= get_imsm_dev(super
, victim
);
7173 if (a
|| !dev
|| mpb
->num_raid_devs
== 1) {
7174 dprintf("failed to delete subarray-%d\n", victim
);
7178 for (dp
= &super
->devlist
; *dp
;)
7179 if ((*dp
)->index
== (unsigned)super
->current_vol
) {
7182 if ((*dp
)->index
> (unsigned)victim
)
7186 mpb
->num_raid_devs
--;
7187 super
->updates_pending
++;
7190 case update_rename_array
: {
7191 struct imsm_update_rename_array
*u
= (void *) update
->buf
;
7192 char name
[MAX_RAID_SERIAL_LEN
+1];
7193 int target
= u
->dev_idx
;
7194 struct active_array
*a
;
7195 struct imsm_dev
*dev
;
7197 /* sanity check that we are not affecting the uuid of
7200 snprintf(name
, MAX_RAID_SERIAL_LEN
, "%s", (char *) u
->name
);
7201 name
[MAX_RAID_SERIAL_LEN
] = '\0';
7202 for (a
= st
->arrays
; a
; a
= a
->next
)
7203 if (a
->info
.container_member
== target
)
7205 dev
= get_imsm_dev(super
, u
->dev_idx
);
7206 if (a
|| !dev
|| !check_name(super
, name
, 1)) {
7207 dprintf("failed to rename subarray-%d\n", target
);
7211 snprintf((char *) dev
->volume
, MAX_RAID_SERIAL_LEN
, "%s", name
);
7212 super
->updates_pending
++;
7215 case update_add_remove_disk
: {
7216 /* we may be able to repair some arrays if disks are
7217 * being added, check teh status of add_remove_disk
7218 * if discs has been added.
7220 if (add_remove_disk_update(super
)) {
7221 struct active_array
*a
;
7223 super
->updates_pending
++;
7224 for (a
= st
->arrays
; a
; a
= a
->next
)
7225 a
->check_degraded
= 1;
7230 fprintf(stderr
, "error: unsuported process update type:"
7231 "(type: %d)\n", type
);
7235 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
);
7237 static void imsm_prepare_update(struct supertype
*st
,
7238 struct metadata_update
*update
)
7241 * Allocate space to hold new disk entries, raid-device entries or a new
7242 * mpb if necessary. The manager synchronously waits for updates to
7243 * complete in the monitor, so new mpb buffers allocated here can be
7244 * integrated by the monitor thread without worrying about live pointers
7245 * in the manager thread.
7247 enum imsm_update_type type
= *(enum imsm_update_type
*) update
->buf
;
7248 struct intel_super
*super
= st
->sb
;
7249 struct imsm_super
*mpb
= super
->anchor
;
7254 case update_takeover
: {
7255 struct imsm_update_takeover
*u
= (void *)update
->buf
;
7256 if (u
->direction
== R0_TO_R10
) {
7257 void **tail
= (void **)&update
->space_list
;
7258 struct imsm_dev
*dev
= get_imsm_dev(super
, u
->subarray
);
7259 struct imsm_map
*map
= get_imsm_map(dev
, 0);
7260 int num_members
= map
->num_members
;
7264 /* allocate memory for added disks */
7265 for (i
= 0; i
< num_members
; i
++) {
7266 size
= sizeof(struct dl
);
7267 space
= malloc(size
);
7276 /* allocate memory for new device */
7277 size
= sizeof_imsm_dev(super
->devlist
->dev
, 0) +
7278 (num_members
* sizeof(__u32
));
7279 space
= malloc(size
);
7288 len
= disks_to_mpb_size(num_members
* 2);
7290 /* if allocation didn't success, free buffer */
7291 while (update
->space_list
) {
7292 void **sp
= update
->space_list
;
7293 update
->space_list
= *sp
;
7301 case update_reshape_container_disks
: {
7302 /* Every raid device in the container is about to
7303 * gain some more devices, and we will enter a
7305 * So each 'imsm_map' will be bigger, and the imsm_vol
7306 * will now hold 2 of them.
7307 * Thus we need new 'struct imsm_dev' allocations sized
7308 * as sizeof_imsm_dev but with more devices in both maps.
7310 struct imsm_update_reshape
*u
= (void *)update
->buf
;
7311 struct intel_dev
*dl
;
7312 void **space_tail
= (void**)&update
->space_list
;
7314 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7316 for (dl
= super
->devlist
; dl
; dl
= dl
->next
) {
7317 int size
= sizeof_imsm_dev(dl
->dev
, 1);
7319 if (u
->new_raid_disks
> u
->old_raid_disks
)
7320 size
+= sizeof(__u32
)*2*
7321 (u
->new_raid_disks
- u
->old_raid_disks
);
7330 len
= disks_to_mpb_size(u
->new_raid_disks
);
7331 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
7334 case update_reshape_migration
: {
7335 /* for migration level 0->5 we need to add disks
7336 * so the same as for container operation we will copy
7337 * device to the bigger location.
7338 * in memory prepared device and new disk area are prepared
7339 * for usage in process update
7341 struct imsm_update_reshape_migration
*u
= (void *)update
->buf
;
7342 struct intel_dev
*id
;
7343 void **space_tail
= (void **)&update
->space_list
;
7346 int current_level
= -1;
7348 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7350 /* add space for bigger array in update
7352 for (id
= super
->devlist
; id
; id
= id
->next
) {
7353 if (id
->index
== (unsigned)u
->subdev
) {
7354 size
= sizeof_imsm_dev(id
->dev
, 1);
7355 if (u
->new_raid_disks
> u
->old_raid_disks
)
7356 size
+= sizeof(__u32
)*2*
7357 (u
->new_raid_disks
- u
->old_raid_disks
);
7367 if (update
->space_list
== NULL
)
7370 /* add space for disk in update
7372 size
= sizeof(struct dl
);
7375 free(update
->space_list
);
7376 update
->space_list
= NULL
;
7383 /* add spare device to update
7385 for (id
= super
->devlist
; id
; id
= id
->next
)
7386 if (id
->index
== (unsigned)u
->subdev
) {
7387 struct imsm_dev
*dev
;
7388 struct imsm_map
*map
;
7390 dev
= get_imsm_dev(super
, u
->subdev
);
7391 map
= get_imsm_map(dev
, 0);
7392 current_level
= map
->raid_level
;
7395 if ((u
->new_level
== 5) && (u
->new_level
!= current_level
)) {
7396 struct mdinfo
*spares
;
7398 spares
= get_spares_for_grow(st
);
7406 makedev(dev
->disk
.major
,
7408 dl
= get_disk_super(super
,
7411 dl
->index
= u
->old_raid_disks
;
7417 len
= disks_to_mpb_size(u
->new_raid_disks
);
7418 dprintf("New anchor length is %llu\n", (unsigned long long)len
);
7421 case update_create_array
: {
7422 struct imsm_update_create_array
*u
= (void *) update
->buf
;
7423 struct intel_dev
*dv
;
7424 struct imsm_dev
*dev
= &u
->dev
;
7425 struct imsm_map
*map
= get_imsm_map(dev
, 0);
7427 struct disk_info
*inf
;
7431 inf
= get_disk_info(u
);
7432 len
= sizeof_imsm_dev(dev
, 1);
7433 /* allocate a new super->devlist entry */
7434 dv
= malloc(sizeof(*dv
));
7436 dv
->dev
= malloc(len
);
7441 update
->space
= NULL
;
7445 /* count how many spares will be converted to members */
7446 for (i
= 0; i
< map
->num_members
; i
++) {
7447 dl
= serial_to_dl(inf
[i
].serial
, super
);
7449 /* hmm maybe it failed?, nothing we can do about
7454 if (count_memberships(dl
, super
) == 0)
7457 len
+= activate
* sizeof(struct imsm_disk
);
7464 /* check if we need a larger metadata buffer */
7465 if (super
->next_buf
)
7466 buf_len
= super
->next_len
;
7468 buf_len
= super
->len
;
7470 if (__le32_to_cpu(mpb
->mpb_size
) + len
> buf_len
) {
7471 /* ok we need a larger buf than what is currently allocated
7472 * if this allocation fails process_update will notice that
7473 * ->next_len is set and ->next_buf is NULL
7475 buf_len
= ROUND_UP(__le32_to_cpu(mpb
->mpb_size
) + len
, 512);
7476 if (super
->next_buf
)
7477 free(super
->next_buf
);
7479 super
->next_len
= buf_len
;
7480 if (posix_memalign(&super
->next_buf
, 512, buf_len
) == 0)
7481 memset(super
->next_buf
, 0, buf_len
);
7483 super
->next_buf
= NULL
;
7487 /* must be called while manager is quiesced */
7488 static void imsm_delete(struct intel_super
*super
, struct dl
**dlp
, unsigned index
)
7490 struct imsm_super
*mpb
= super
->anchor
;
7492 struct imsm_dev
*dev
;
7493 struct imsm_map
*map
;
7494 int i
, j
, num_members
;
7497 dprintf("%s: deleting device[%d] from imsm_super\n",
7500 /* shift all indexes down one */
7501 for (iter
= super
->disks
; iter
; iter
= iter
->next
)
7502 if (iter
->index
> (int)index
)
7504 for (iter
= super
->missing
; iter
; iter
= iter
->next
)
7505 if (iter
->index
> (int)index
)
7508 for (i
= 0; i
< mpb
->num_raid_devs
; i
++) {
7509 dev
= get_imsm_dev(super
, i
);
7510 map
= get_imsm_map(dev
, 0);
7511 num_members
= map
->num_members
;
7512 for (j
= 0; j
< num_members
; j
++) {
7513 /* update ord entries being careful not to propagate
7514 * ord-flags to the first map
7516 ord
= get_imsm_ord_tbl_ent(dev
, j
, -1);
7518 if (ord_to_idx(ord
) <= index
)
7521 map
= get_imsm_map(dev
, 0);
7522 set_imsm_ord_tbl_ent(map
, j
, ord_to_idx(ord
- 1));
7523 map
= get_imsm_map(dev
, 1);
7525 set_imsm_ord_tbl_ent(map
, j
, ord
- 1);
7530 super
->updates_pending
++;
7532 struct dl
*dl
= *dlp
;
7534 *dlp
= (*dlp
)->next
;
7535 __free_imsm_disk(dl
);
7539 /*******************************************************************************
7540 * Function: open_backup_targets
7541 * Description: Function opens file descriptors for all devices given in
7544 * info : general array info
7545 * raid_disks : number of disks
7546 * raid_fds : table of device's file descriptors
7550 ******************************************************************************/
7551 int open_backup_targets(struct mdinfo
*info
, int raid_disks
, int *raid_fds
)
7555 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
7558 if (sd
->disk
.state
& (1<<MD_DISK_FAULTY
)) {
7559 dprintf("disk is faulty!!\n");
7563 if ((sd
->disk
.raid_disk
>= raid_disks
) ||
7564 (sd
->disk
.raid_disk
< 0))
7567 dn
= map_dev(sd
->disk
.major
,
7569 raid_fds
[sd
->disk
.raid_disk
] = dev_open(dn
, O_RDWR
);
7570 if (raid_fds
[sd
->disk
.raid_disk
] < 0) {
7571 fprintf(stderr
, "cannot open component\n");
7578 /*******************************************************************************
7579 * Function: init_migr_record_imsm
7580 * Description: Function inits imsm migration record
7582 * super : imsm internal array info
7583 * dev : device under migration
7584 * info : general array info to find the smallest device
7587 ******************************************************************************/
7588 void init_migr_record_imsm(struct supertype
*st
, struct imsm_dev
*dev
,
7589 struct mdinfo
*info
)
7591 struct intel_super
*super
= st
->sb
;
7592 struct migr_record
*migr_rec
= super
->migr_rec
;
7594 unsigned long long dsize
, dev_sectors
;
7595 long long unsigned min_dev_sectors
= -1LLU;
7599 struct imsm_map
*map_dest
= get_imsm_map(dev
, 0);
7600 struct imsm_map
*map_src
= get_imsm_map(dev
, 1);
7601 unsigned long long num_migr_units
;
7603 unsigned long long array_blocks
=
7604 (((unsigned long long)__le32_to_cpu(dev
->size_high
)) << 32) +
7605 __le32_to_cpu(dev
->size_low
);
7607 memset(migr_rec
, 0, sizeof(struct migr_record
));
7608 migr_rec
->family_num
= __cpu_to_le32(super
->anchor
->family_num
);
7610 /* only ascending reshape supported now */
7611 migr_rec
->ascending_migr
= __cpu_to_le32(1);
7613 migr_rec
->dest_depth_per_unit
= GEN_MIGR_AREA_SIZE
/
7614 max(map_dest
->blocks_per_strip
, map_src
->blocks_per_strip
);
7615 migr_rec
->dest_depth_per_unit
*= map_dest
->blocks_per_strip
;
7616 new_data_disks
= imsm_num_data_members(dev
, 0);
7617 migr_rec
->blocks_per_unit
=
7618 __cpu_to_le32(migr_rec
->dest_depth_per_unit
* new_data_disks
);
7619 migr_rec
->dest_depth_per_unit
=
7620 __cpu_to_le32(migr_rec
->dest_depth_per_unit
);
7623 array_blocks
/ __le32_to_cpu(migr_rec
->blocks_per_unit
);
7625 if (array_blocks
% __le32_to_cpu(migr_rec
->blocks_per_unit
))
7627 migr_rec
->num_migr_units
= __cpu_to_le32(num_migr_units
);
7629 migr_rec
->post_migr_vol_cap
= dev
->size_low
;
7630 migr_rec
->post_migr_vol_cap_hi
= dev
->size_high
;
7633 /* Find the smallest dev */
7634 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
7635 sprintf(nm
, "%d:%d", sd
->disk
.major
, sd
->disk
.minor
);
7636 fd
= dev_open(nm
, O_RDONLY
);
7639 get_dev_size(fd
, NULL
, &dsize
);
7640 dev_sectors
= dsize
/ 512;
7641 if (dev_sectors
< min_dev_sectors
)
7642 min_dev_sectors
= dev_sectors
;
7645 migr_rec
->ckpt_area_pba
= __cpu_to_le32(min_dev_sectors
-
7646 RAID_DISK_RESERVED_BLOCKS_IMSM_HI
);
7648 write_imsm_migr_rec(st
);
7653 /*******************************************************************************
7654 * Function: save_backup_imsm
7655 * Description: Function saves critical data stripes to Migration Copy Area
7656 * and updates the current migration unit status.
7657 * Use restore_stripes() to form a destination stripe,
7658 * and to write it to the Copy Area.
7660 * st : supertype information
7661 * info : general array info
7662 * buf : input buffer
7663 * write_offset : address of data to backup
7664 * length : length of data to backup (blocks_per_unit)
7668 ******************************************************************************/
7669 int save_backup_imsm(struct supertype
*st
,
7670 struct imsm_dev
*dev
,
7671 struct mdinfo
*info
,
7677 struct intel_super
*super
= st
->sb
;
7678 unsigned long long *target_offsets
= NULL
;
7679 int *targets
= NULL
;
7681 struct imsm_map
*map_dest
= get_imsm_map(dev
, 0);
7682 int new_disks
= map_dest
->num_members
;
7684 targets
= malloc(new_disks
* sizeof(int));
7688 target_offsets
= malloc(new_disks
* sizeof(unsigned long long));
7689 if (!target_offsets
)
7692 for (i
= 0; i
< new_disks
; i
++) {
7694 target_offsets
[i
] = (unsigned long long)
7695 __le32_to_cpu(super
->migr_rec
->ckpt_area_pba
) * 512;
7698 if (open_backup_targets(info
, new_disks
, targets
))
7701 if (restore_stripes(targets
, /* list of dest devices */
7702 target_offsets
, /* migration record offsets */
7707 -1, /* source backup file descriptor */
7708 0, /* input buf offset
7709 * always 0 buf is already offset */
7713 fprintf(stderr
, Name
": Error restoring stripes\n");
7721 for (i
= 0; i
< new_disks
; i
++)
7722 if (targets
[i
] >= 0)
7726 free(target_offsets
);
7731 /*******************************************************************************
7732 * Function: save_checkpoint_imsm
7733 * Description: Function called for current unit status update
7734 * in the migration record. It writes it to disk.
7736 * super : imsm internal array info
7737 * info : general array info
7741 ******************************************************************************/
7742 int save_checkpoint_imsm(struct supertype
*st
, struct mdinfo
*info
, int state
)
7744 struct intel_super
*super
= st
->sb
;
7745 load_imsm_migr_rec(super
, info
);
7746 if (__le32_to_cpu(super
->migr_rec
->blocks_per_unit
) == 0) {
7747 dprintf("ERROR: blocks_per_unit = 0!!!\n");
7751 super
->migr_rec
->curr_migr_unit
=
7752 __cpu_to_le32(info
->reshape_progress
/
7753 __le32_to_cpu(super
->migr_rec
->blocks_per_unit
));
7754 super
->migr_rec
->rec_status
= __cpu_to_le32(state
);
7755 super
->migr_rec
->dest_1st_member_lba
=
7756 __cpu_to_le32((__le32_to_cpu(super
->migr_rec
->curr_migr_unit
))
7757 * __le32_to_cpu(super
->migr_rec
->dest_depth_per_unit
));
7758 if (write_imsm_migr_rec(st
) < 0) {
7759 dprintf("imsm: Cannot write migration record "
7760 "outside backup area\n");
7767 static __u64
blocks_per_migr_unit(struct intel_super
*super
,
7768 struct imsm_dev
*dev
);
7770 /*******************************************************************************
7771 * Function: recover_backup_imsm
7772 * Description: Function recovers critical data from the Migration Copy Area
7773 * while assembling an array.
7775 * super : imsm internal array info
7776 * info : general array info
7778 * 0 : success (or there is no data to recover)
7780 ******************************************************************************/
7781 int recover_backup_imsm(struct supertype
*st
, struct mdinfo
*info
)
7783 struct intel_super
*super
= st
->sb
;
7784 struct migr_record
*migr_rec
= super
->migr_rec
;
7785 struct imsm_map
*map_dest
= NULL
;
7786 struct intel_dev
*id
= NULL
;
7787 unsigned long long read_offset
;
7788 unsigned long long write_offset
;
7790 int *targets
= NULL
;
7791 int new_disks
, i
, err
;
7794 unsigned long curr_migr_unit
= __le32_to_cpu(migr_rec
->curr_migr_unit
);
7795 unsigned long num_migr_units
= __le32_to_cpu(migr_rec
->num_migr_units
);
7796 int ascending
= __le32_to_cpu(migr_rec
->ascending_migr
);
7799 err
= sysfs_get_str(info
, NULL
, "array_state", (char *)buffer
, 20);
7803 /* recover data only during assemblation */
7804 if (strncmp(buffer
, "inactive", 8) != 0)
7806 /* no data to recover */
7807 if (__le32_to_cpu(migr_rec
->rec_status
) == UNIT_SRC_NORMAL
)
7809 if (curr_migr_unit
>= num_migr_units
)
7812 /* find device during reshape */
7813 for (id
= super
->devlist
; id
; id
= id
->next
)
7814 if (is_gen_migration(id
->dev
))
7819 map_dest
= get_imsm_map(id
->dev
, 0);
7820 new_disks
= map_dest
->num_members
;
7822 read_offset
= (unsigned long long)
7823 __le32_to_cpu(migr_rec
->ckpt_area_pba
) * 512;
7825 write_offset
= ((unsigned long long)
7826 __le32_to_cpu(migr_rec
->dest_1st_member_lba
) +
7827 info
->data_offset
) * 512;
7829 unit_len
= __le32_to_cpu(migr_rec
->dest_depth_per_unit
) * 512;
7830 if (posix_memalign((void **)&buf
, 512, unit_len
) != 0)
7832 targets
= malloc(new_disks
* sizeof(int));
7836 open_backup_targets(info
, new_disks
, targets
);
7838 for (i
= 0; i
< new_disks
; i
++) {
7839 if (lseek64(targets
[i
], read_offset
, SEEK_SET
) < 0) {
7841 Name
": Cannot seek to block: %s\n",
7845 if (read(targets
[i
], buf
, unit_len
) != unit_len
) {
7847 Name
": Cannot read copy area block: %s\n",
7851 if (lseek64(targets
[i
], write_offset
, SEEK_SET
) < 0) {
7853 Name
": Cannot seek to block: %s\n",
7857 if (write(targets
[i
], buf
, unit_len
) != unit_len
) {
7859 Name
": Cannot restore block: %s\n",
7865 if (ascending
&& curr_migr_unit
< (num_migr_units
-1))
7868 migr_rec
->curr_migr_unit
= __le32_to_cpu(curr_migr_unit
);
7869 super
->migr_rec
->rec_status
= __cpu_to_le32(UNIT_SRC_NORMAL
);
7870 if (write_imsm_migr_rec(st
) == 0) {
7871 __u64 blocks_per_unit
= blocks_per_migr_unit(super
, id
->dev
);
7872 info
->reshape_progress
= curr_migr_unit
* blocks_per_unit
;
7878 for (i
= 0; i
< new_disks
; i
++)
7887 static char disk_by_path
[] = "/dev/disk/by-path/";
7889 static const char *imsm_get_disk_controller_domain(const char *path
)
7891 char disk_path
[PATH_MAX
];
7895 strncpy(disk_path
, disk_by_path
, PATH_MAX
- 1);
7896 strncat(disk_path
, path
, PATH_MAX
- strlen(disk_path
) - 1);
7897 if (stat(disk_path
, &st
) == 0) {
7898 struct sys_dev
* hba
;
7901 path
= devt_to_devpath(st
.st_rdev
);
7904 hba
= find_disk_attached_hba(-1, path
);
7905 if (hba
&& hba
->type
== SYS_DEV_SAS
)
7907 else if (hba
&& hba
->type
== SYS_DEV_SATA
)
7911 dprintf("path: %s hba: %s attached: %s\n",
7912 path
, (hba
) ? hba
->path
: "NULL", drv
);
7920 static int imsm_find_array_minor_by_subdev(int subdev
, int container
, int *minor
)
7922 char subdev_name
[20];
7923 struct mdstat_ent
*mdstat
;
7925 sprintf(subdev_name
, "%d", subdev
);
7926 mdstat
= mdstat_by_subdev(subdev_name
, container
);
7930 *minor
= mdstat
->devnum
;
7931 free_mdstat(mdstat
);
7935 static int imsm_reshape_is_allowed_on_container(struct supertype
*st
,
7936 struct geo_params
*geo
,
7937 int *old_raid_disks
)
7939 /* currently we only support increasing the number of devices
7940 * for a container. This increases the number of device for each
7941 * member array. They must all be RAID0 or RAID5.
7944 struct mdinfo
*info
, *member
;
7945 int devices_that_can_grow
= 0;
7947 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7948 "st->devnum = (%i)\n",
7951 if (geo
->size
!= -1 ||
7952 geo
->level
!= UnSet
||
7953 geo
->layout
!= UnSet
||
7954 geo
->chunksize
!= 0 ||
7955 geo
->raid_disks
== UnSet
) {
7956 dprintf("imsm: Container operation is allowed for "
7957 "raid disks number change only.\n");
7961 info
= container_content_imsm(st
, NULL
);
7962 for (member
= info
; member
; member
= member
->next
) {
7966 dprintf("imsm: checking device_num: %i\n",
7967 member
->container_member
);
7969 if (geo
->raid_disks
<= member
->array
.raid_disks
) {
7970 /* we work on container for Online Capacity Expansion
7971 * only so raid_disks has to grow
7973 dprintf("imsm: for container operation raid disks "
7974 "increase is required\n");
7978 if ((info
->array
.level
!= 0) &&
7979 (info
->array
.level
!= 5)) {
7980 /* we cannot use this container with other raid level
7982 dprintf("imsm: for container operation wrong"
7983 " raid level (%i) detected\n",
7987 /* check for platform support
7988 * for this raid level configuration
7990 struct intel_super
*super
= st
->sb
;
7991 if (!is_raid_level_supported(super
->orom
,
7992 member
->array
.level
,
7994 dprintf("platform does not support raid%d with"
7998 geo
->raid_disks
> 1 ? "s" : "");
8001 /* check if component size is aligned to chunk size
8003 if (info
->component_size
%
8004 (info
->array
.chunk_size
/512)) {
8005 dprintf("Component size is not aligned to "
8011 if (*old_raid_disks
&&
8012 info
->array
.raid_disks
!= *old_raid_disks
)
8014 *old_raid_disks
= info
->array
.raid_disks
;
8016 /* All raid5 and raid0 volumes in container
8017 * have to be ready for Online Capacity Expansion
8018 * so they need to be assembled. We have already
8019 * checked that no recovery etc is happening.
8021 result
= imsm_find_array_minor_by_subdev(member
->container_member
,
8025 dprintf("imsm: cannot find array\n");
8028 devices_that_can_grow
++;
8031 if (!member
&& devices_that_can_grow
)
8035 dprintf("\tContainer operation allowed\n");
8037 dprintf("\tError: %i\n", ret_val
);
8042 /* Function: get_spares_for_grow
8043 * Description: Allocates memory and creates list of spare devices
8044 * avaliable in container. Checks if spare drive size is acceptable.
8045 * Parameters: Pointer to the supertype structure
8046 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
8049 static struct mdinfo
*get_spares_for_grow(struct supertype
*st
)
8051 unsigned long long min_size
= min_acceptable_spare_size_imsm(st
);
8052 return container_choose_spares(st
, min_size
, NULL
, NULL
, NULL
, 0);
8055 /******************************************************************************
8056 * function: imsm_create_metadata_update_for_reshape
8057 * Function creates update for whole IMSM container.
8059 ******************************************************************************/
8060 static int imsm_create_metadata_update_for_reshape(
8061 struct supertype
*st
,
8062 struct geo_params
*geo
,
8064 struct imsm_update_reshape
**updatep
)
8066 struct intel_super
*super
= st
->sb
;
8067 struct imsm_super
*mpb
= super
->anchor
;
8068 int update_memory_size
= 0;
8069 struct imsm_update_reshape
*u
= NULL
;
8070 struct mdinfo
*spares
= NULL
;
8072 int delta_disks
= 0;
8075 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
8078 delta_disks
= geo
->raid_disks
- old_raid_disks
;
8080 /* size of all update data without anchor */
8081 update_memory_size
= sizeof(struct imsm_update_reshape
);
8083 /* now add space for spare disks that we need to add. */
8084 update_memory_size
+= sizeof(u
->new_disks
[0]) * (delta_disks
- 1);
8086 u
= calloc(1, update_memory_size
);
8089 "cannot get memory for imsm_update_reshape update\n");
8092 u
->type
= update_reshape_container_disks
;
8093 u
->old_raid_disks
= old_raid_disks
;
8094 u
->new_raid_disks
= geo
->raid_disks
;
8096 /* now get spare disks list
8098 spares
= get_spares_for_grow(st
);
8101 || delta_disks
> spares
->array
.spare_disks
) {
8102 fprintf(stderr
, Name
": imsm: ERROR: Cannot get spare devices "
8103 "for %s.\n", geo
->dev_name
);
8107 /* we have got spares
8108 * update disk list in imsm_disk list table in anchor
8110 dprintf("imsm: %i spares are available.\n\n",
8111 spares
->array
.spare_disks
);
8114 for (i
= 0; i
< delta_disks
; i
++) {
8119 u
->new_disks
[i
] = makedev(dev
->disk
.major
,
8121 dl
= get_disk_super(super
, dev
->disk
.major
, dev
->disk
.minor
);
8122 dl
->index
= mpb
->num_disks
;
8132 dprintf("imsm: reshape update preparation :");
8133 if (i
== delta_disks
) {
8136 return update_memory_size
;
8139 dprintf(" Error\n");
8144 /******************************************************************************
8145 * function: imsm_create_metadata_update_for_migration()
8146 * Creates update for IMSM array.
8148 ******************************************************************************/
8149 static int imsm_create_metadata_update_for_migration(
8150 struct supertype
*st
,
8151 struct geo_params
*geo
,
8152 struct imsm_update_reshape_migration
**updatep
)
8154 struct intel_super
*super
= st
->sb
;
8155 int update_memory_size
= 0;
8156 struct imsm_update_reshape_migration
*u
= NULL
;
8157 struct imsm_dev
*dev
;
8158 int previous_level
= -1;
8160 dprintf("imsm_create_metadata_update_for_migration(enter)"
8161 " New Level = %i\n", geo
->level
);
8163 /* size of all update data without anchor */
8164 update_memory_size
= sizeof(struct imsm_update_reshape_migration
);
8166 u
= calloc(1, update_memory_size
);
8168 dprintf("error: cannot get memory for "
8169 "imsm_create_metadata_update_for_migration\n");
8172 u
->type
= update_reshape_migration
;
8173 u
->subdev
= super
->current_vol
;
8174 u
->new_level
= geo
->level
;
8175 u
->new_layout
= geo
->layout
;
8176 u
->new_raid_disks
= u
->old_raid_disks
= geo
->raid_disks
;
8177 u
->new_disks
[0] = -1;
8178 u
->new_chunksize
= -1;
8180 dev
= get_imsm_dev(super
, u
->subdev
);
8182 struct imsm_map
*map
;
8184 map
= get_imsm_map(dev
, 0);
8186 int current_chunk_size
=
8187 __le16_to_cpu(map
->blocks_per_strip
) / 2;
8189 if (geo
->chunksize
!= current_chunk_size
) {
8190 u
->new_chunksize
= geo
->chunksize
/ 1024;
8192 "chunk size change from %i to %i\n",
8193 current_chunk_size
, u
->new_chunksize
);
8195 previous_level
= map
->raid_level
;
8198 if ((geo
->level
== 5) && (previous_level
== 0)) {
8199 struct mdinfo
*spares
= NULL
;
8201 u
->new_raid_disks
++;
8202 spares
= get_spares_for_grow(st
);
8203 if ((spares
== NULL
) || (spares
->array
.spare_disks
< 1)) {
8206 update_memory_size
= 0;
8207 dprintf("error: cannot get spare device "
8208 "for requested migration");
8213 dprintf("imsm: reshape update preparation : OK\n");
8216 return update_memory_size
;
8219 static void imsm_update_metadata_locally(struct supertype
*st
,
8222 struct metadata_update mu
;
8227 mu
.space_list
= NULL
;
8229 imsm_prepare_update(st
, &mu
);
8230 imsm_process_update(st
, &mu
);
8232 while (mu
.space_list
) {
8233 void **space
= mu
.space_list
;
8234 mu
.space_list
= *space
;
8239 /***************************************************************************
8240 * Function: imsm_analyze_change
8241 * Description: Function analyze change for single volume
8242 * and validate if transition is supported
8243 * Parameters: Geometry parameters, supertype structure
8244 * Returns: Operation type code on success, -1 if fail
8245 ****************************************************************************/
8246 enum imsm_reshape_type
imsm_analyze_change(struct supertype
*st
,
8247 struct geo_params
*geo
)
8254 getinfo_super_imsm_volume(st
, &info
, NULL
);
8256 if ((geo
->level
!= info
.array
.level
) &&
8257 (geo
->level
>= 0) &&
8258 (geo
->level
!= UnSet
)) {
8259 switch (info
.array
.level
) {
8261 if (geo
->level
== 5) {
8262 change
= CH_MIGRATION
;
8265 if (geo
->level
== 10) {
8266 change
= CH_TAKEOVER
;
8271 if (geo
->level
== 0) {
8272 change
= CH_TAKEOVER
;
8277 if (geo
->level
== 0) {
8278 change
= CH_TAKEOVER
;
8285 Name
" Error. Level Migration from %d to %d "
8287 info
.array
.level
, geo
->level
);
8288 goto analyse_change_exit
;
8291 geo
->level
= info
.array
.level
;
8293 if ((geo
->layout
!= info
.array
.layout
)
8294 && ((geo
->layout
!= UnSet
) && (geo
->layout
!= -1))) {
8295 change
= CH_MIGRATION
;
8296 if ((info
.array
.layout
== 0)
8297 && (info
.array
.level
== 5)
8298 && (geo
->layout
== 5)) {
8299 /* reshape 5 -> 4 */
8300 } else if ((info
.array
.layout
== 5)
8301 && (info
.array
.level
== 5)
8302 && (geo
->layout
== 0)) {
8303 /* reshape 4 -> 5 */
8308 Name
" Error. Layout Migration from %d to %d "
8310 info
.array
.layout
, geo
->layout
);
8312 goto analyse_change_exit
;
8315 geo
->layout
= info
.array
.layout
;
8317 if ((geo
->chunksize
> 0) && (geo
->chunksize
!= UnSet
)
8318 && (geo
->chunksize
!= info
.array
.chunk_size
))
8319 change
= CH_MIGRATION
;
8321 geo
->chunksize
= info
.array
.chunk_size
;
8323 chunk
= geo
->chunksize
/ 1024;
8324 if (!validate_geometry_imsm(st
,
8334 struct intel_super
*super
= st
->sb
;
8335 struct imsm_super
*mpb
= super
->anchor
;
8337 if (mpb
->num_raid_devs
> 1) {
8339 Name
" Error. Cannot perform operation on %s"
8340 "- for this operation it MUST be single "
8341 "array in container\n",
8347 analyse_change_exit
:
8352 int imsm_takeover(struct supertype
*st
, struct geo_params
*geo
)
8354 struct intel_super
*super
= st
->sb
;
8355 struct imsm_update_takeover
*u
;
8357 u
= malloc(sizeof(struct imsm_update_takeover
));
8361 u
->type
= update_takeover
;
8362 u
->subarray
= super
->current_vol
;
8364 /* 10->0 transition */
8365 if (geo
->level
== 0)
8366 u
->direction
= R10_TO_R0
;
8368 /* 0->10 transition */
8369 if (geo
->level
== 10)
8370 u
->direction
= R0_TO_R10
;
8372 /* update metadata locally */
8373 imsm_update_metadata_locally(st
, u
,
8374 sizeof(struct imsm_update_takeover
));
8375 /* and possibly remotely */
8376 if (st
->update_tail
)
8377 append_metadata_update(st
, u
,
8378 sizeof(struct imsm_update_takeover
));
8385 static int warn_user_about_risk(void)
8390 "\nThis is an experimental feature. Data on the RAID volume(s) "
8391 "can be lost!!!\n\n"
8392 "To continue command execution please make sure that\n"
8393 "the grow process will not be interrupted. Use safe power\n"
8394 "supply to avoid unexpected system reboot. Make sure that\n"
8395 "reshaped container is not assembled automatically during\n"
8397 "If reshape is interrupted, assemble array manually\n"
8398 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
8399 "Assembly in scan mode is not possible in such case.\n"
8400 "Growing container with boot array is not possible.\n"
8401 "If boot array reshape is interrupted, whole file system\n"
8402 "can be lost.\n\n");
8403 rv
= ask("Do you want to continue? ");
8404 fprintf(stderr
, "\n");
8409 static int imsm_reshape_super(struct supertype
*st
, long long size
, int level
,
8410 int layout
, int chunksize
, int raid_disks
,
8411 int delta_disks
, char *backup
, char *dev
,
8415 struct geo_params geo
;
8417 dprintf("imsm: reshape_super called.\n");
8419 memset(&geo
, 0, sizeof(struct geo_params
));
8422 geo
.dev_id
= st
->devnum
;
8425 geo
.layout
= layout
;
8426 geo
.chunksize
= chunksize
;
8427 geo
.raid_disks
= raid_disks
;
8428 if (delta_disks
!= UnSet
)
8429 geo
.raid_disks
+= delta_disks
;
8431 dprintf("\tfor level : %i\n", geo
.level
);
8432 dprintf("\tfor raid_disks : %i\n", geo
.raid_disks
);
8434 if (experimental() == 0)
8437 if (st
->container_dev
== st
->devnum
) {
8438 /* On container level we can only increase number of devices. */
8439 dprintf("imsm: info: Container operation\n");
8440 int old_raid_disks
= 0;
8442 /* this warning will be removed when imsm checkpointing
8443 * will be implemented, and restoring from check-point
8444 * operation will be transparent for reboot process
8446 if (warn_user_about_risk() == 0)
8449 if (imsm_reshape_is_allowed_on_container(
8450 st
, &geo
, &old_raid_disks
)) {
8451 struct imsm_update_reshape
*u
= NULL
;
8454 len
= imsm_create_metadata_update_for_reshape(
8455 st
, &geo
, old_raid_disks
, &u
);
8458 dprintf("imsm: Cannot prepare update\n");
8459 goto exit_imsm_reshape_super
;
8463 /* update metadata locally */
8464 imsm_update_metadata_locally(st
, u
, len
);
8465 /* and possibly remotely */
8466 if (st
->update_tail
)
8467 append_metadata_update(st
, u
, len
);
8472 fprintf(stderr
, Name
": (imsm) Operation "
8473 "is not allowed on this container\n");
8476 /* On volume level we support following operations
8477 * - takeover: raid10 -> raid0; raid0 -> raid10
8478 * - chunk size migration
8479 * - migration: raid5 -> raid0; raid0 -> raid5
8481 struct intel_super
*super
= st
->sb
;
8482 struct intel_dev
*dev
= super
->devlist
;
8484 dprintf("imsm: info: Volume operation\n");
8485 /* find requested device */
8487 imsm_find_array_minor_by_subdev(dev
->index
, st
->container_dev
, &devnum
);
8488 if (devnum
== geo
.dev_id
)
8493 fprintf(stderr
, Name
" Cannot find %s (%i) subarray\n",
8494 geo
.dev_name
, geo
.dev_id
);
8495 goto exit_imsm_reshape_super
;
8497 super
->current_vol
= dev
->index
;
8498 change
= imsm_analyze_change(st
, &geo
);
8501 ret_val
= imsm_takeover(st
, &geo
);
8503 case CH_MIGRATION
: {
8504 struct imsm_update_reshape_migration
*u
= NULL
;
8506 imsm_create_metadata_update_for_migration(
8510 "Cannot prepare update\n");
8514 /* update metadata locally */
8515 imsm_update_metadata_locally(st
, u
, len
);
8516 /* and possibly remotely */
8517 if (st
->update_tail
)
8518 append_metadata_update(st
, u
, len
);
8528 exit_imsm_reshape_super
:
8529 dprintf("imsm: reshape_super Exit code = %i\n", ret_val
);
8533 /*******************************************************************************
8534 * Function: wait_for_reshape_imsm
8535 * Description: Function writes new sync_max value and waits until
8536 * reshape process reach new position
8538 * sra : general array info
8539 * to_complete : new sync_max position
8540 * ndata : number of disks in new array's layout
8543 * 1 : there is no reshape in progress,
8545 ******************************************************************************/
8546 int wait_for_reshape_imsm(struct mdinfo
*sra
, unsigned long long to_complete
,
8549 int fd
= sysfs_get_fd(sra
, NULL
, "reshape_position");
8550 unsigned long long completed
;
8552 struct timeval timeout
;
8557 sysfs_fd_get_ll(fd
, &completed
);
8559 if (to_complete
== 0) {/* reshape till the end of array */
8560 sysfs_set_str(sra
, NULL
, "sync_max", "max");
8561 to_complete
= MaxSector
;
8563 if (completed
> to_complete
)
8565 if (sysfs_set_num(sra
, NULL
, "sync_max",
8566 to_complete
/ ndata
) != 0) {
8572 /* FIXME should not need a timeout at all */
8573 timeout
.tv_sec
= 30;
8574 timeout
.tv_usec
= 0;
8580 select(fd
+1, NULL
, NULL
, &rfds
, &timeout
);
8581 if (sysfs_fd_get_ll(fd
, &completed
) < 0) {
8585 if (sysfs_get_str(sra
, NULL
, "sync_action",
8587 strncmp(action
, "reshape", 7) != 0)
8589 } while (completed
< to_complete
);
8595 /*******************************************************************************
8596 * Function: check_degradation_change
8597 * Description: Check that array hasn't become failed.
8599 * info : for sysfs access
8600 * sources : source disks descriptors
8601 * degraded: previous degradation level
8604 ******************************************************************************/
8605 int check_degradation_change(struct mdinfo
*info
,
8609 unsigned long long new_degraded
;
8610 sysfs_get_ll(info
, NULL
, "degraded", &new_degraded
);
8611 if (new_degraded
!= (unsigned long long)degraded
) {
8612 /* check each device to ensure it is still working */
8615 for (sd
= info
->devs
; sd
; sd
= sd
->next
) {
8616 if (sd
->disk
.state
& (1<<MD_DISK_FAULTY
))
8618 if (sd
->disk
.state
& (1<<MD_DISK_SYNC
)) {
8620 if (sysfs_get_str(info
,
8621 sd
, "state", sbuf
, 20) < 0 ||
8622 strstr(sbuf
, "faulty") ||
8623 strstr(sbuf
, "in_sync") == NULL
) {
8624 /* this device is dead */
8625 sd
->disk
.state
= (1<<MD_DISK_FAULTY
);
8626 if (sd
->disk
.raid_disk
>= 0 &&
8627 sources
[sd
->disk
.raid_disk
] >= 0) {
8629 sd
->disk
.raid_disk
]);
8630 sources
[sd
->disk
.raid_disk
] =
8639 return new_degraded
;
8642 /*******************************************************************************
8643 * Function: imsm_manage_reshape
8644 * Description: Function finds array under reshape and it manages reshape
8645 * process. It creates stripes backups (if required) and sets
8648 * afd : Backup handle (nattive) - not used
8649 * sra : general array info
8650 * reshape : reshape parameters - not used
8651 * st : supertype structure
8652 * blocks : size of critical section [blocks]
8653 * fds : table of source device descriptor
8654 * offsets : start of array (offest per devices)
8656 * destfd : table of destination device descriptor
8657 * destoffsets : table of destination offsets (per device)
8659 * 1 : success, reshape is done
8661 ******************************************************************************/
8662 static int imsm_manage_reshape(
8663 int afd
, struct mdinfo
*sra
, struct reshape
*reshape
,
8664 struct supertype
*st
, unsigned long backup_blocks
,
8665 int *fds
, unsigned long long *offsets
,
8666 int dests
, int *destfd
, unsigned long long *destoffsets
)
8669 struct intel_super
*super
= st
->sb
;
8670 struct intel_dev
*dv
= NULL
;
8671 struct imsm_dev
*dev
= NULL
;
8672 struct imsm_map
*map_src
, *map_dest
;
8673 int migr_vol_qan
= 0;
8674 int ndata
, odata
; /* [bytes] */
8675 int chunk
; /* [bytes] */
8676 struct migr_record
*migr_rec
;
8678 unsigned int buf_size
; /* [bytes] */
8679 unsigned long long max_position
; /* array size [bytes] */
8680 unsigned long long next_step
; /* [blocks]/[bytes] */
8681 unsigned long long old_data_stripe_length
;
8682 unsigned long long new_data_stripe_length
;
8683 unsigned long long start_src
; /* [bytes] */
8684 unsigned long long start
; /* [bytes] */
8685 unsigned long long start_buf_shift
; /* [bytes] */
8688 if (!fds
|| !offsets
|| !destfd
|| !destoffsets
|| !sra
)
8691 /* Find volume during the reshape */
8692 for (dv
= super
->devlist
; dv
; dv
= dv
->next
) {
8693 if (dv
->dev
->vol
.migr_type
== MIGR_GEN_MIGR
8694 && dv
->dev
->vol
.migr_state
== 1) {
8699 /* Only one volume can migrate at the same time */
8700 if (migr_vol_qan
!= 1) {
8701 fprintf(stderr
, Name
" : %s", migr_vol_qan
?
8702 "Number of migrating volumes greater than 1\n" :
8703 "There is no volume during migrationg\n");
8707 map_src
= get_imsm_map(dev
, 1);
8708 if (map_src
== NULL
)
8710 map_dest
= get_imsm_map(dev
, 0);
8712 ndata
= imsm_num_data_members(dev
, 0);
8713 odata
= imsm_num_data_members(dev
, 1);
8715 chunk
= map_src
->blocks_per_strip
* 512;
8716 old_data_stripe_length
= odata
* chunk
;
8718 migr_rec
= super
->migr_rec
;
8721 sra
->new_chunk
= __le16_to_cpu(map_dest
->blocks_per_strip
) * 512;
8722 sra
->new_level
= map_dest
->raid_level
;
8723 new_data_stripe_length
= sra
->new_chunk
* ndata
;
8725 /* initialize migration record for start condition */
8726 if (sra
->reshape_progress
== 0)
8727 init_migr_record_imsm(st
, dev
, sra
);
8730 buf_size
= __le32_to_cpu(migr_rec
->blocks_per_unit
) * 512;
8731 /* extend buffer size for parity disk */
8732 buf_size
+= __le32_to_cpu(migr_rec
->dest_depth_per_unit
) * 512;
8733 /* add space for stripe aligment */
8734 buf_size
+= old_data_stripe_length
;
8735 if (posix_memalign((void **)&buf
, 4096, buf_size
)) {
8736 dprintf("imsm: Cannot allocate checpoint buffer\n");
8741 __le32_to_cpu(migr_rec
->post_migr_vol_cap
) +
8742 ((unsigned long long)__le32_to_cpu(
8743 migr_rec
->post_migr_vol_cap_hi
) << 32);
8745 while (__le32_to_cpu(migr_rec
->curr_migr_unit
) <
8746 __le32_to_cpu(migr_rec
->num_migr_units
)) {
8747 /* current reshape position [blocks] */
8748 unsigned long long current_position
=
8749 __le32_to_cpu(migr_rec
->blocks_per_unit
)
8750 * __le32_to_cpu(migr_rec
->curr_migr_unit
);
8751 unsigned long long border
;
8753 /* Check that array hasn't become failed.
8755 degraded
= check_degradation_change(sra
, fds
, degraded
);
8757 dprintf("imsm: Abort reshape due to degradation"
8758 " level (%i)\n", degraded
);
8762 next_step
= __le32_to_cpu(migr_rec
->blocks_per_unit
);
8764 if ((current_position
+ next_step
) > max_position
)
8765 next_step
= max_position
- current_position
;
8767 start
= (map_src
->pba_of_lba0
+ dev
->reserved_blocks
+
8768 current_position
) * 512;
8770 /* allign reading start to old geometry */
8771 start_buf_shift
= start
% old_data_stripe_length
;
8772 start_src
= start
- start_buf_shift
;
8774 border
= (start_src
/ odata
) - (start
/ ndata
);
8776 if (border
<= __le32_to_cpu(migr_rec
->dest_depth_per_unit
)) {
8777 /* save critical stripes to buf
8778 * start - start address of current unit
8780 * start_src - start address of current unit
8781 * to backup alligned to source array
8784 unsigned long long next_step_filler
= 0;
8785 unsigned long long copy_length
= next_step
* 512;
8787 /* allign copy area length to stripe in old geometry */
8788 next_step_filler
= ((copy_length
+ start_buf_shift
)
8789 % old_data_stripe_length
);
8790 if (next_step_filler
)
8791 next_step_filler
= (old_data_stripe_length
8792 - next_step_filler
);
8793 dprintf("save_stripes() parameters: start = %llu,"
8794 "\tstart_src = %llu,\tnext_step*512 = %llu,"
8795 "\tstart_in_buf_shift = %llu,"
8796 "\tnext_step_filler = %llu\n",
8797 start
, start_src
, copy_length
,
8798 start_buf_shift
, next_step_filler
);
8800 if (save_stripes(fds
, offsets
, map_src
->num_members
,
8801 chunk
, sra
->array
.level
,
8802 sra
->array
.layout
, 0, NULL
, start_src
,
8804 next_step_filler
+ start_buf_shift
,
8806 dprintf("imsm: Cannot save stripes"
8810 /* Convert data to destination format and store it
8811 * in backup general migration area
8813 if (save_backup_imsm(st
, dev
, sra
,
8814 buf
+ start_buf_shift
,
8815 ndata
, copy_length
)) {
8816 dprintf("imsm: Cannot save stripes to "
8817 "target devices\n");
8820 if (save_checkpoint_imsm(st
, sra
,
8821 UNIT_SRC_IN_CP_AREA
)) {
8822 dprintf("imsm: Cannot write checkpoint to "
8823 "migration record (UNIT_SRC_IN_CP_AREA)\n");
8826 /* decrease backup_blocks */
8827 if (backup_blocks
> (unsigned long)next_step
)
8828 backup_blocks
-= next_step
;
8832 /* When data backed up, checkpoint stored,
8833 * kick the kernel to reshape unit of data
8835 next_step
= next_step
+ sra
->reshape_progress
;
8836 sysfs_set_num(sra
, NULL
, "suspend_lo", sra
->reshape_progress
);
8837 sysfs_set_num(sra
, NULL
, "suspend_hi", next_step
);
8839 /* wait until reshape finish */
8840 if (wait_for_reshape_imsm(sra
, next_step
, ndata
) < 0) {
8841 dprintf("wait_for_reshape_imsm returned error!\n");
8845 sra
->reshape_progress
= next_step
;
8847 if (save_checkpoint_imsm(st
, sra
, UNIT_SRC_NORMAL
)) {
8848 dprintf("imsm: Cannot write checkpoint to "
8849 "migration record (UNIT_SRC_NORMAL)\n");
8855 /* return '1' if done */
8863 #endif /* MDASSEMBLE */
8865 struct superswitch super_imsm
= {
8867 .examine_super
= examine_super_imsm
,
8868 .brief_examine_super
= brief_examine_super_imsm
,
8869 .brief_examine_subarrays
= brief_examine_subarrays_imsm
,
8870 .export_examine_super
= export_examine_super_imsm
,
8871 .detail_super
= detail_super_imsm
,
8872 .brief_detail_super
= brief_detail_super_imsm
,
8873 .write_init_super
= write_init_super_imsm
,
8874 .validate_geometry
= validate_geometry_imsm
,
8875 .add_to_super
= add_to_super_imsm
,
8876 .remove_from_super
= remove_from_super_imsm
,
8877 .detail_platform
= detail_platform_imsm
,
8878 .kill_subarray
= kill_subarray_imsm
,
8879 .update_subarray
= update_subarray_imsm
,
8880 .load_container
= load_container_imsm
,
8881 .default_geometry
= default_geometry_imsm
,
8882 .get_disk_controller_domain
= imsm_get_disk_controller_domain
,
8883 .reshape_super
= imsm_reshape_super
,
8884 .manage_reshape
= imsm_manage_reshape
,
8886 .match_home
= match_home_imsm
,
8887 .uuid_from_super
= uuid_from_super_imsm
,
8888 .getinfo_super
= getinfo_super_imsm
,
8889 .getinfo_super_disks
= getinfo_super_disks_imsm
,
8890 .update_super
= update_super_imsm
,
8892 .avail_size
= avail_size_imsm
,
8893 .min_acceptable_spare_size
= min_acceptable_spare_size_imsm
,
8895 .compare_super
= compare_super_imsm
,
8897 .load_super
= load_super_imsm
,
8898 .init_super
= init_super_imsm
,
8899 .store_super
= store_super_imsm
,
8900 .free_super
= free_super_imsm
,
8901 .match_metadata_desc
= match_metadata_desc_imsm
,
8902 .container_content
= container_content_imsm
,
8904 .recover_backup
= recover_backup_imsm
,
8911 .open_new
= imsm_open_new
,
8912 .set_array_state
= imsm_set_array_state
,
8913 .set_disk
= imsm_set_disk
,
8914 .sync_metadata
= imsm_sync_metadata
,
8915 .activate_spare
= imsm_activate_spare
,
8916 .process_update
= imsm_process_update
,
8917 .prepare_update
= imsm_prepare_update
,
8918 #endif /* MDASSEMBLE */