]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-intel.c
Define dummy functions to mdmon.c
[thirdparty/mdadm.git] / super-intel.c
CommitLineData
cdddbdbc
DW
1/*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
a54d5262 4 * Copyright (C) 2002-2008 Intel Corporation
cdddbdbc
DW
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
51006d85 20#define HAVE_STDINT_H 1
cdddbdbc 21#include "mdadm.h"
c2a1e7da 22#include "mdmon.h"
51006d85 23#include "sha1.h"
88c32bb1 24#include "platform-intel.h"
cdddbdbc
DW
25#include <values.h>
26#include <scsi/sg.h>
27#include <ctype.h>
d665cc31 28#include <dirent.h>
cdddbdbc
DW
29
30/* MPB == Metadata Parameter Block */
31#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32#define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33#define MPB_VERSION_RAID0 "1.0.00"
34#define MPB_VERSION_RAID1 "1.1.00"
fe7ed8cb
DW
35#define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36#define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
cdddbdbc 37#define MPB_VERSION_RAID5 "1.2.02"
fe7ed8cb
DW
38#define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39#define MPB_VERSION_CNG "1.2.06"
40#define MPB_VERSION_ATTRIBS "1.3.00"
cdddbdbc
DW
41#define MAX_SIGNATURE_LENGTH 32
42#define MAX_RAID_SERIAL_LEN 16
fe7ed8cb
DW
43
44#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45#define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
8e59f3d8 54#define MPB_SECTOR_CNT 2210
c2c087e6 55#define IMSM_RESERVED_SECTORS 4096
979d38be 56#define SECT_PER_MB_SHIFT 11
cdddbdbc
DW
57
58/* Disk configuration info. */
59#define IMSM_MAX_DEVICES 255
60struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
f2f27e63
DW
64#define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65#define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66#define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
cdddbdbc 67 __u32 status; /* 0xF0 - 0xF3 */
fe7ed8cb
DW
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69#define IMSM_DISK_FILLERS 4
cdddbdbc
DW
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71};
72
73/* RAID map configuration infos. */
74struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80#define IMSM_T_STATE_NORMAL 0
81#define IMSM_T_STATE_UNINITIALIZED 1
e3bba0e0
DW
82#define IMSM_T_STATE_DEGRADED 2
83#define IMSM_T_STATE_FAILED 3
cdddbdbc
DW
84 __u8 raid_level;
85#define IMSM_T_RAID0 0
86#define IMSM_T_RAID1 1
87#define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
fe7ed8cb
DW
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
252d23c0 91 __u8 ddf;
cdddbdbc 92 __u32 filler[7]; /* expansion area */
7eef0453 93#define IMSM_ORD_REBUILD (1 << 24)
cdddbdbc 94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
7eef0453
DW
95 * top byte contains some flags
96 */
cdddbdbc
DW
97} __attribute__ ((packed));
98
99struct imsm_vol {
f8f603f1 100 __u32 curr_migr_unit;
fe7ed8cb 101 __u32 checkpoint_id; /* id to access curr_migr_unit */
cdddbdbc 102 __u8 migr_state; /* Normal or Migrating */
e3bba0e0
DW
103#define MIGR_INIT 0
104#define MIGR_REBUILD 1
105#define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106#define MIGR_GEN_MIGR 3
107#define MIGR_STATE_CHANGE 4
1484e727 108#define MIGR_REPAIR 5
cdddbdbc
DW
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
fe7ed8cb
DW
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
cdddbdbc
DW
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117} __attribute__ ((packed));
118
119struct imsm_dev {
fe7ed8cb 120 __u8 volume[MAX_RAID_SERIAL_LEN];
cdddbdbc
DW
121 __u32 size_low;
122 __u32 size_high;
fe7ed8cb
DW
123#define DEV_BOOTABLE __cpu_to_le32(0x01)
124#define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125#define DEV_READ_COALESCING __cpu_to_le32(0x04)
126#define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127#define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128#define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129#define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130#define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131#define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132#define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133#define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134#define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135#define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
cdddbdbc
DW
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
fe7ed8cb
DW
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145#define IMSM_DEV_FILLERS 10
cdddbdbc
DW
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148} __attribute__ ((packed));
149
150struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
604b746f
JD
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
cdddbdbc
DW
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
604b746f
JD
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166#define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
cdddbdbc
DW
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
604b746f 170 /* here comes BBM logs */
cdddbdbc
DW
171} __attribute__ ((packed));
172
604b746f
JD
173#define BBM_LOG_MAX_ENTRIES 254
174
175struct bbm_log_entry {
176 __u64 defective_block_start;
177#define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181} __attribute__ ((__packed__));
182
183struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190} __attribute__ ((__packed__));
191
192
cdddbdbc
DW
193#ifndef MDASSEMBLE
194static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195#endif
196
8e59f3d8
AK
197#define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
198
199#define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
200
201#define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203#define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
206struct migr_record {
207 __u32 rec_status; /* Status used to determine how to restart
208 * migration in case it aborts
209 * in some fashion */
210 __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
211 __u32 family_num; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr; /* True if migrating in increasing
215 * order of lbas */
216 __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit; /* Num member blocks each destMap
218 * member disk
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230} __attribute__ ((__packed__));
231
1484e727
DW
232static __u8 migr_type(struct imsm_dev *dev)
233{
234 if (dev->vol.migr_type == MIGR_VERIFY &&
235 dev->status & DEV_VERIFY_AND_FIX)
236 return MIGR_REPAIR;
237 else
238 return dev->vol.migr_type;
239}
240
241static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
242{
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
245 */
246 if (migr_type == MIGR_REPAIR) {
247 dev->vol.migr_type = MIGR_VERIFY;
248 dev->status |= DEV_VERIFY_AND_FIX;
249 } else {
250 dev->vol.migr_type = migr_type;
251 dev->status &= ~DEV_VERIFY_AND_FIX;
252 }
253}
254
87eb16df 255static unsigned int sector_count(__u32 bytes)
cdddbdbc 256{
87eb16df
DW
257 return ((bytes + (512-1)) & (~(512-1))) / 512;
258}
cdddbdbc 259
87eb16df
DW
260static unsigned int mpb_sectors(struct imsm_super *mpb)
261{
262 return sector_count(__le32_to_cpu(mpb->mpb_size));
cdddbdbc
DW
263}
264
ba2de7ba
DW
265struct intel_dev {
266 struct imsm_dev *dev;
267 struct intel_dev *next;
f21e18ca 268 unsigned index;
ba2de7ba
DW
269};
270
88654014
LM
271struct intel_hba {
272 enum sys_dev_type type;
273 char *path;
274 char *pci_id;
275 struct intel_hba *next;
276};
277
1a64be56
LM
278enum action {
279 DISK_REMOVE = 1,
280 DISK_ADD
281};
cdddbdbc
DW
282/* internal representation of IMSM metadata */
283struct intel_super {
284 union {
949c47a0
DW
285 void *buf; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super *anchor; /* immovable parameters */
cdddbdbc 287 };
8e59f3d8
AK
288 union {
289 void *migr_rec_buf; /* buffer for I/O operations */
290 struct migr_record *migr_rec; /* migration record */
291 };
949c47a0 292 size_t len; /* size of the 'buf' allocation */
4d7b1503
DW
293 void *next_buf; /* for realloc'ing buf from the manager */
294 size_t next_len;
c2c087e6 295 int updates_pending; /* count of pending updates for mdmon */
bf5a934a 296 int current_vol; /* index of raid device undergoing creation */
0dcecb2e 297 __u32 create_offset; /* common start for 'current_vol' */
148acb7b 298 __u32 random; /* random data for seeding new family numbers */
ba2de7ba 299 struct intel_dev *devlist;
cdddbdbc
DW
300 struct dl {
301 struct dl *next;
302 int index;
303 __u8 serial[MAX_RAID_SERIAL_LEN];
304 int major, minor;
305 char *devname;
b9f594fe 306 struct imsm_disk disk;
cdddbdbc 307 int fd;
0dcecb2e
DW
308 int extent_cnt;
309 struct extent *e; /* for determining freespace @ create */
efb30e7f 310 int raiddisk; /* slot to fill in autolayout */
1a64be56 311 enum action action;
cdddbdbc 312 } *disks;
1a64be56
LM
313 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
314 active */
47ee5a45 315 struct dl *missing; /* disks removed while we weren't looking */
43dad3d6 316 struct bbm_log *bbm_log;
88654014 317 struct intel_hba *hba; /* device path of the raid controller for this metadata */
88c32bb1 318 const struct imsm_orom *orom; /* platform firmware support */
a2b97981
DW
319 struct intel_super *next; /* (temp) list for disambiguating family_num */
320};
321
322struct intel_disk {
323 struct imsm_disk disk;
324 #define IMSM_UNKNOWN_OWNER (-1)
325 int owner;
326 struct intel_disk *next;
cdddbdbc
DW
327};
328
c2c087e6
DW
329struct extent {
330 unsigned long long start, size;
331};
332
694575e7
KW
333/* definitions of reshape process types */
334enum imsm_reshape_type {
335 CH_TAKEOVER,
b5347799 336 CH_MIGRATION,
694575e7
KW
337};
338
88758e9d
DW
339/* definition of messages passed to imsm_process_update */
340enum imsm_update_type {
341 update_activate_spare,
8273f55e 342 update_create_array,
33414a01 343 update_kill_array,
aa534678 344 update_rename_array,
1a64be56 345 update_add_remove_disk,
78b10e66 346 update_reshape_container_disks,
48c5303a 347 update_reshape_migration,
bb025c2f 348 update_takeover
88758e9d
DW
349};
350
351struct imsm_update_activate_spare {
352 enum imsm_update_type type;
d23fe947 353 struct dl *dl;
88758e9d
DW
354 int slot;
355 int array;
356 struct imsm_update_activate_spare *next;
357};
358
78b10e66
N
359struct geo_params {
360 int dev_id;
361 char *dev_name;
362 long long size;
363 int level;
364 int layout;
365 int chunksize;
366 int raid_disks;
367};
368
bb025c2f
KW
369enum takeover_direction {
370 R10_TO_R0,
371 R0_TO_R10
372};
373struct imsm_update_takeover {
374 enum imsm_update_type type;
375 int subarray;
376 enum takeover_direction direction;
377};
78b10e66
N
378
379struct imsm_update_reshape {
380 enum imsm_update_type type;
381 int old_raid_disks;
382 int new_raid_disks;
48c5303a
PC
383
384 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
385};
386
387struct imsm_update_reshape_migration {
388 enum imsm_update_type type;
389 int old_raid_disks;
390 int new_raid_disks;
391 /* fields for array migration changes
392 */
393 int subdev;
394 int new_level;
395 int new_layout;
4bba0439 396 int new_chunksize;
48c5303a 397
d195167d 398 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
78b10e66
N
399};
400
54c2c1ea
DW
401struct disk_info {
402 __u8 serial[MAX_RAID_SERIAL_LEN];
403};
404
8273f55e
DW
405struct imsm_update_create_array {
406 enum imsm_update_type type;
8273f55e 407 int dev_idx;
6a3e913e 408 struct imsm_dev dev;
8273f55e
DW
409};
410
33414a01
DW
411struct imsm_update_kill_array {
412 enum imsm_update_type type;
413 int dev_idx;
414};
415
aa534678
DW
416struct imsm_update_rename_array {
417 enum imsm_update_type type;
418 __u8 name[MAX_RAID_SERIAL_LEN];
419 int dev_idx;
420};
421
1a64be56 422struct imsm_update_add_remove_disk {
43dad3d6
DW
423 enum imsm_update_type type;
424};
425
88654014
LM
426
427static const char *_sys_dev_type[] = {
428 [SYS_DEV_UNKNOWN] = "Unknown",
429 [SYS_DEV_SAS] = "SAS",
430 [SYS_DEV_SATA] = "SATA"
431};
432
433const char *get_sys_dev_type(enum sys_dev_type type)
434{
435 if (type >= SYS_DEV_MAX)
436 type = SYS_DEV_UNKNOWN;
437
438 return _sys_dev_type[type];
439}
440
441static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
442{
443 struct intel_hba *result = malloc(sizeof(*result));
444 if (result) {
445 result->type = device->type;
446 result->path = strdup(device->path);
447 result->next = NULL;
448 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
449 result->pci_id++;
450 }
451 return result;
452}
453
454static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
455{
456 struct intel_hba *result=NULL;
457 for (result = hba; result; result = result->next) {
458 if (result->type == device->type && strcmp(result->path, device->path) == 0)
459 break;
460 }
461 return result;
462}
463
b4cf4cba 464static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
88654014
LM
465{
466 struct intel_hba *hba;
467
468 /* check if disk attached to Intel HBA */
469 hba = find_intel_hba(super->hba, device);
470 if (hba != NULL)
471 return 1;
472 /* Check if HBA is already attached to super */
473 if (super->hba == NULL) {
474 super->hba = alloc_intel_hba(device);
475 return 1;
476 }
477
478 hba = super->hba;
479 /* Intel metadata allows for all disks attached to the same type HBA.
480 * Do not sypport odf HBA types mixing
481 */
482 if (device->type != hba->type)
483 return 2;
484
485 while (hba->next)
486 hba = hba->next;
487
488 hba->next = alloc_intel_hba(device);
489 return 1;
490}
491
492static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
493{
494 struct sys_dev *list, *elem, *prev;
495 char *disk_path;
496
497 if ((list = find_intel_devices()) == NULL)
498 return 0;
499
500 if (fd < 0)
501 disk_path = (char *) devname;
502 else
503 disk_path = diskfd_to_devpath(fd);
504
505 if (!disk_path) {
506 free_sys_dev(&list);
507 return 0;
508 }
509
510 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
511 if (path_attached_to_hba(disk_path, elem->path)) {
512 if (prev == NULL)
513 list = list->next;
514 else
515 prev->next = elem->next;
516 elem->next = NULL;
517 if (disk_path != devname)
518 free(disk_path);
519 free_sys_dev(&list);
520 return elem;
521 }
522 }
523 if (disk_path != devname)
524 free(disk_path);
525 free_sys_dev(&list);
526
527 return NULL;
528}
529
530
d424212e
N
531static int find_intel_hba_capability(int fd, struct intel_super *super,
532 char *devname);
f2f5c343 533
cdddbdbc
DW
534static struct supertype *match_metadata_desc_imsm(char *arg)
535{
536 struct supertype *st;
537
538 if (strcmp(arg, "imsm") != 0 &&
539 strcmp(arg, "default") != 0
540 )
541 return NULL;
542
543 st = malloc(sizeof(*st));
4e9d2186
AW
544 if (!st)
545 return NULL;
ef609477 546 memset(st, 0, sizeof(*st));
d1d599ea 547 st->container_dev = NoMdDev;
cdddbdbc
DW
548 st->ss = &super_imsm;
549 st->max_devs = IMSM_MAX_DEVICES;
550 st->minor_version = 0;
551 st->sb = NULL;
552 return st;
553}
554
0e600426 555#ifndef MDASSEMBLE
cdddbdbc
DW
556static __u8 *get_imsm_version(struct imsm_super *mpb)
557{
558 return &mpb->sig[MPB_SIG_LEN];
559}
0e600426 560#endif
cdddbdbc 561
949c47a0
DW
562/* retrieve a disk directly from the anchor when the anchor is known to be
563 * up-to-date, currently only at load time
564 */
565static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
cdddbdbc 566{
949c47a0 567 if (index >= mpb->num_disks)
cdddbdbc
DW
568 return NULL;
569 return &mpb->disk[index];
570}
571
95d07a2c
LM
572/* retrieve the disk description based on a index of the disk
573 * in the sub-array
574 */
575static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
949c47a0 576{
b9f594fe
DW
577 struct dl *d;
578
579 for (d = super->disks; d; d = d->next)
580 if (d->index == index)
95d07a2c
LM
581 return d;
582
583 return NULL;
584}
585/* retrieve a disk from the parsed metadata */
586static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
587{
588 struct dl *dl;
589
590 dl = get_imsm_dl_disk(super, index);
591 if (dl)
592 return &dl->disk;
593
b9f594fe 594 return NULL;
949c47a0
DW
595}
596
597/* generate a checksum directly from the anchor when the anchor is known to be
598 * up-to-date, currently only at load or write_super after coalescing
599 */
600static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
cdddbdbc
DW
601{
602 __u32 end = mpb->mpb_size / sizeof(end);
603 __u32 *p = (__u32 *) mpb;
604 __u32 sum = 0;
605
97f734fd
N
606 while (end--) {
607 sum += __le32_to_cpu(*p);
608 p++;
609 }
cdddbdbc
DW
610
611 return sum - __le32_to_cpu(mpb->check_sum);
612}
613
a965f303
DW
614static size_t sizeof_imsm_map(struct imsm_map *map)
615{
616 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
617}
618
619struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
cdddbdbc 620{
5e7b0330
AK
621 /* A device can have 2 maps if it is in the middle of a migration.
622 * If second_map is:
623 * 0 - we return the first map
624 * 1 - we return the second map if it exists, else NULL
625 * -1 - we return the second map if it exists, else the first
626 */
a965f303
DW
627 struct imsm_map *map = &dev->vol.map[0];
628
5e7b0330 629 if (second_map == 1 && !dev->vol.migr_state)
a965f303 630 return NULL;
5e7b0330
AK
631 else if (second_map == 1 ||
632 (second_map < 0 && dev->vol.migr_state)) {
a965f303
DW
633 void *ptr = map;
634
635 return ptr + sizeof_imsm_map(map);
636 } else
637 return map;
5e7b0330 638
a965f303 639}
cdddbdbc 640
3393c6af
DW
641/* return the size of the device.
642 * migr_state increases the returned size if map[0] were to be duplicated
643 */
644static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
a965f303
DW
645{
646 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
647 sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
648
649 /* migrating means an additional map */
a965f303
DW
650 if (dev->vol.migr_state)
651 size += sizeof_imsm_map(get_imsm_map(dev, 1));
3393c6af
DW
652 else if (migr_state)
653 size += sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
654
655 return size;
656}
657
54c2c1ea
DW
658#ifndef MDASSEMBLE
659/* retrieve disk serial number list from a metadata update */
660static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
661{
662 void *u = update;
663 struct disk_info *inf;
664
665 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
666 sizeof_imsm_dev(&update->dev, 0);
667
668 return inf;
669}
670#endif
671
949c47a0 672static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
cdddbdbc
DW
673{
674 int offset;
675 int i;
676 void *_mpb = mpb;
677
949c47a0 678 if (index >= mpb->num_raid_devs)
cdddbdbc
DW
679 return NULL;
680
681 /* devices start after all disks */
682 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
683
684 for (i = 0; i <= index; i++)
685 if (i == index)
686 return _mpb + offset;
687 else
3393c6af 688 offset += sizeof_imsm_dev(_mpb + offset, 0);
cdddbdbc
DW
689
690 return NULL;
691}
692
949c47a0
DW
693static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
694{
ba2de7ba
DW
695 struct intel_dev *dv;
696
949c47a0
DW
697 if (index >= super->anchor->num_raid_devs)
698 return NULL;
ba2de7ba
DW
699 for (dv = super->devlist; dv; dv = dv->next)
700 if (dv->index == index)
701 return dv->dev;
702 return NULL;
949c47a0
DW
703}
704
98130f40
AK
705/*
706 * for second_map:
707 * == 0 get first map
708 * == 1 get second map
709 * == -1 than get map according to the current migr_state
710 */
711static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
712 int slot,
713 int second_map)
7eef0453
DW
714{
715 struct imsm_map *map;
716
5e7b0330 717 map = get_imsm_map(dev, second_map);
7eef0453 718
ff077194
DW
719 /* top byte identifies disk under rebuild */
720 return __le32_to_cpu(map->disk_ord_tbl[slot]);
721}
722
723#define ord_to_idx(ord) (((ord) << 8) >> 8)
98130f40 724static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
ff077194 725{
98130f40 726 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
ff077194
DW
727
728 return ord_to_idx(ord);
7eef0453
DW
729}
730
be73972f
DW
731static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
732{
733 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
734}
735
f21e18ca 736static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
620b1713
DW
737{
738 int slot;
739 __u32 ord;
740
741 for (slot = 0; slot < map->num_members; slot++) {
742 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
743 if (ord_to_idx(ord) == idx)
744 return slot;
745 }
746
747 return -1;
748}
749
cdddbdbc
DW
750static int get_imsm_raid_level(struct imsm_map *map)
751{
752 if (map->raid_level == 1) {
753 if (map->num_members == 2)
754 return 1;
755 else
756 return 10;
757 }
758
759 return map->raid_level;
760}
761
c2c087e6
DW
762static int cmp_extent(const void *av, const void *bv)
763{
764 const struct extent *a = av;
765 const struct extent *b = bv;
766 if (a->start < b->start)
767 return -1;
768 if (a->start > b->start)
769 return 1;
770 return 0;
771}
772
0dcecb2e 773static int count_memberships(struct dl *dl, struct intel_super *super)
c2c087e6 774{
c2c087e6 775 int memberships = 0;
620b1713 776 int i;
c2c087e6 777
949c47a0
DW
778 for (i = 0; i < super->anchor->num_raid_devs; i++) {
779 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 780 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 781
620b1713
DW
782 if (get_imsm_disk_slot(map, dl->index) >= 0)
783 memberships++;
c2c087e6 784 }
0dcecb2e
DW
785
786 return memberships;
787}
788
789static struct extent *get_extents(struct intel_super *super, struct dl *dl)
790{
791 /* find a list of used extents on the given physical device */
792 struct extent *rv, *e;
620b1713 793 int i;
0dcecb2e
DW
794 int memberships = count_memberships(dl, super);
795 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
796
c2c087e6
DW
797 rv = malloc(sizeof(struct extent) * (memberships + 1));
798 if (!rv)
799 return NULL;
800 e = rv;
801
949c47a0
DW
802 for (i = 0; i < super->anchor->num_raid_devs; i++) {
803 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 804 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 805
620b1713
DW
806 if (get_imsm_disk_slot(map, dl->index) >= 0) {
807 e->start = __le32_to_cpu(map->pba_of_lba0);
808 e->size = __le32_to_cpu(map->blocks_per_member);
809 e++;
c2c087e6
DW
810 }
811 }
812 qsort(rv, memberships, sizeof(*rv), cmp_extent);
813
14e8215b
DW
814 /* determine the start of the metadata
815 * when no raid devices are defined use the default
816 * ...otherwise allow the metadata to truncate the value
817 * as is the case with older versions of imsm
818 */
819 if (memberships) {
820 struct extent *last = &rv[memberships - 1];
821 __u32 remainder;
822
823 remainder = __le32_to_cpu(dl->disk.total_blocks) -
824 (last->start + last->size);
dda5855f
DW
825 /* round down to 1k block to satisfy precision of the kernel
826 * 'size' interface
827 */
828 remainder &= ~1UL;
829 /* make sure remainder is still sane */
f21e18ca 830 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
dda5855f 831 remainder = ROUND_UP(super->len, 512) >> 9;
14e8215b
DW
832 if (reservation > remainder)
833 reservation = remainder;
834 }
835 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
c2c087e6
DW
836 e->size = 0;
837 return rv;
838}
839
14e8215b
DW
840/* try to determine how much space is reserved for metadata from
841 * the last get_extents() entry, otherwise fallback to the
842 * default
843 */
844static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
845{
846 struct extent *e;
847 int i;
848 __u32 rv;
849
850 /* for spares just return a minimal reservation which will grow
851 * once the spare is picked up by an array
852 */
853 if (dl->index == -1)
854 return MPB_SECTOR_CNT;
855
856 e = get_extents(super, dl);
857 if (!e)
858 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
859
860 /* scroll to last entry */
861 for (i = 0; e[i].size; i++)
862 continue;
863
864 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
865
866 free(e);
867
868 return rv;
869}
870
25ed7e59
DW
871static int is_spare(struct imsm_disk *disk)
872{
873 return (disk->status & SPARE_DISK) == SPARE_DISK;
874}
875
876static int is_configured(struct imsm_disk *disk)
877{
878 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
879}
880
881static int is_failed(struct imsm_disk *disk)
882{
883 return (disk->status & FAILED_DISK) == FAILED_DISK;
884}
885
80e7f8c3
AC
886/* Return minimum size of a spare that can be used in this array*/
887static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
888{
889 struct intel_super *super = st->sb;
890 struct dl *dl;
891 struct extent *e;
892 int i;
893 unsigned long long rv = 0;
894
895 if (!super)
896 return rv;
897 /* find first active disk in array */
898 dl = super->disks;
899 while (dl && (is_failed(&dl->disk) || dl->index == -1))
900 dl = dl->next;
901 if (!dl)
902 return rv;
903 /* find last lba used by subarrays */
904 e = get_extents(super, dl);
905 if (!e)
906 return rv;
907 for (i = 0; e[i].size; i++)
908 continue;
909 if (i > 0)
910 rv = e[i-1].start + e[i-1].size;
911 free(e);
912 /* add the amount of space needed for metadata */
913 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
914 return rv * 512;
915}
916
1799c9e8 917#ifndef MDASSEMBLE
1e5c6983
DW
918static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
919
44470971 920static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
cdddbdbc
DW
921{
922 __u64 sz;
0d80bb2f 923 int slot, i;
a965f303 924 struct imsm_map *map = get_imsm_map(dev, 0);
dd8bcb3b 925 struct imsm_map *map2 = get_imsm_map(dev, 1);
b10b37b8 926 __u32 ord;
cdddbdbc
DW
927
928 printf("\n");
1e7bc0ed 929 printf("[%.16s]:\n", dev->volume);
44470971 930 printf(" UUID : %s\n", uuid);
dd8bcb3b
AK
931 printf(" RAID Level : %d", get_imsm_raid_level(map));
932 if (map2)
933 printf(" <-- %d", get_imsm_raid_level(map2));
934 printf("\n");
935 printf(" Members : %d", map->num_members);
936 if (map2)
937 printf(" <-- %d", map2->num_members);
938 printf("\n");
0d80bb2f
DW
939 printf(" Slots : [");
940 for (i = 0; i < map->num_members; i++) {
dd8bcb3b 941 ord = get_imsm_ord_tbl_ent(dev, i, 0);
0d80bb2f
DW
942 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
943 }
dd8bcb3b
AK
944 printf("]");
945 if (map2) {
946 printf(" <-- [");
947 for (i = 0; i < map2->num_members; i++) {
948 ord = get_imsm_ord_tbl_ent(dev, i, 1);
949 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
950 }
951 printf("]");
952 }
953 printf("\n");
7095bccb
AK
954 printf(" Failed disk : ");
955 if (map->failed_disk_num == 0xff)
956 printf("none");
957 else
958 printf("%i", map->failed_disk_num);
959 printf("\n");
620b1713
DW
960 slot = get_imsm_disk_slot(map, disk_idx);
961 if (slot >= 0) {
98130f40 962 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
b10b37b8
DW
963 printf(" This Slot : %d%s\n", slot,
964 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
965 } else
cdddbdbc
DW
966 printf(" This Slot : ?\n");
967 sz = __le32_to_cpu(dev->size_high);
968 sz <<= 32;
969 sz += __le32_to_cpu(dev->size_low);
970 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
971 human_size(sz * 512));
972 sz = __le32_to_cpu(map->blocks_per_member);
973 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
974 human_size(sz * 512));
975 printf(" Sector Offset : %u\n",
976 __le32_to_cpu(map->pba_of_lba0));
977 printf(" Num Stripes : %u\n",
978 __le32_to_cpu(map->num_data_stripes));
dd8bcb3b 979 printf(" Chunk Size : %u KiB",
cdddbdbc 980 __le16_to_cpu(map->blocks_per_strip) / 2);
dd8bcb3b
AK
981 if (map2)
982 printf(" <-- %u KiB",
983 __le16_to_cpu(map2->blocks_per_strip) / 2);
984 printf("\n");
cdddbdbc 985 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
8655a7b1 986 printf(" Migrate State : ");
1484e727
DW
987 if (dev->vol.migr_state) {
988 if (migr_type(dev) == MIGR_INIT)
8655a7b1 989 printf("initialize\n");
1484e727 990 else if (migr_type(dev) == MIGR_REBUILD)
8655a7b1 991 printf("rebuild\n");
1484e727 992 else if (migr_type(dev) == MIGR_VERIFY)
8655a7b1 993 printf("check\n");
1484e727 994 else if (migr_type(dev) == MIGR_GEN_MIGR)
8655a7b1 995 printf("general migration\n");
1484e727 996 else if (migr_type(dev) == MIGR_STATE_CHANGE)
8655a7b1 997 printf("state change\n");
1484e727 998 else if (migr_type(dev) == MIGR_REPAIR)
8655a7b1 999 printf("repair\n");
1484e727 1000 else
8655a7b1
DW
1001 printf("<unknown:%d>\n", migr_type(dev));
1002 } else
1003 printf("idle\n");
3393c6af
DW
1004 printf(" Map State : %s", map_state_str[map->map_state]);
1005 if (dev->vol.migr_state) {
1006 struct imsm_map *map = get_imsm_map(dev, 1);
1e5c6983 1007
b10b37b8 1008 printf(" <-- %s", map_state_str[map->map_state]);
1e5c6983
DW
1009 printf("\n Checkpoint : %u (%llu)",
1010 __le32_to_cpu(dev->vol.curr_migr_unit),
94fcb80a 1011 (unsigned long long)blocks_per_migr_unit(dev));
3393c6af
DW
1012 }
1013 printf("\n");
cdddbdbc 1014 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
cdddbdbc
DW
1015}
1016
14e8215b 1017static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
cdddbdbc 1018{
949c47a0 1019 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
1f24f035 1020 char str[MAX_RAID_SERIAL_LEN + 1];
cdddbdbc
DW
1021 __u64 sz;
1022
d362da3d 1023 if (index < 0 || !disk)
e9d82038
DW
1024 return;
1025
cdddbdbc 1026 printf("\n");
1f24f035 1027 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
cdddbdbc 1028 printf(" Disk%02d Serial : %s\n", index, str);
25ed7e59
DW
1029 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1030 is_configured(disk) ? " active" : "",
1031 is_failed(disk) ? " failed" : "");
cdddbdbc 1032 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
14e8215b 1033 sz = __le32_to_cpu(disk->total_blocks) - reserved;
cdddbdbc
DW
1034 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1035 human_size(sz * 512));
1036}
1037
a5d85af7 1038static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
44470971 1039
cdddbdbc
DW
1040static void examine_super_imsm(struct supertype *st, char *homehost)
1041{
1042 struct intel_super *super = st->sb;
949c47a0 1043 struct imsm_super *mpb = super->anchor;
cdddbdbc
DW
1044 char str[MAX_SIGNATURE_LENGTH];
1045 int i;
27fd6274
DW
1046 struct mdinfo info;
1047 char nbuf[64];
cdddbdbc 1048 __u32 sum;
14e8215b 1049 __u32 reserved = imsm_reserved_sectors(super, super->disks);
94827db3 1050 struct dl *dl;
27fd6274 1051
cdddbdbc
DW
1052 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1053 printf(" Magic : %s\n", str);
1054 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1055 printf(" Version : %s\n", get_imsm_version(mpb));
148acb7b 1056 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
cdddbdbc
DW
1057 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1058 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
a5d85af7 1059 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1060 fname_from_uuid(st, &info, nbuf, ':');
27fd6274 1061 printf(" UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1062 sum = __le32_to_cpu(mpb->check_sum);
1063 printf(" Checksum : %08x %s\n", sum,
949c47a0 1064 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
87eb16df 1065 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
cdddbdbc
DW
1066 printf(" Disks : %d\n", mpb->num_disks);
1067 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
14e8215b 1068 print_imsm_disk(mpb, super->disks->index, reserved);
604b746f
JD
1069 if (super->bbm_log) {
1070 struct bbm_log *log = super->bbm_log;
1071
1072 printf("\n");
1073 printf("Bad Block Management Log:\n");
1074 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1075 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1076 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1077 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
13a3b65d
N
1078 printf(" First Spare : %llx\n",
1079 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
604b746f 1080 }
44470971
DW
1081 for (i = 0; i < mpb->num_raid_devs; i++) {
1082 struct mdinfo info;
1083 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1084
1085 super->current_vol = i;
a5d85af7 1086 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1087 fname_from_uuid(st, &info, nbuf, ':');
44470971
DW
1088 print_imsm_dev(dev, nbuf + 5, super->disks->index);
1089 }
cdddbdbc
DW
1090 for (i = 0; i < mpb->num_disks; i++) {
1091 if (i == super->disks->index)
1092 continue;
14e8215b 1093 print_imsm_disk(mpb, i, reserved);
cdddbdbc 1094 }
94827db3
N
1095 for (dl = super->disks ; dl; dl = dl->next) {
1096 struct imsm_disk *disk;
1097 char str[MAX_RAID_SERIAL_LEN + 1];
1098 __u64 sz;
1099
1100 if (dl->index >= 0)
1101 continue;
1102
1103 disk = &dl->disk;
1104 printf("\n");
1105 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1106 printf(" Disk Serial : %s\n", str);
1107 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1108 is_configured(disk) ? " active" : "",
1109 is_failed(disk) ? " failed" : "");
1110 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1111 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1112 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1113 human_size(sz * 512));
1114 }
cdddbdbc
DW
1115}
1116
061f2c6a 1117static void brief_examine_super_imsm(struct supertype *st, int verbose)
cdddbdbc 1118{
27fd6274 1119 /* We just write a generic IMSM ARRAY entry */
ff54de6e
N
1120 struct mdinfo info;
1121 char nbuf[64];
1e7bc0ed 1122 struct intel_super *super = st->sb;
1e7bc0ed 1123
0d5a423f
DW
1124 if (!super->anchor->num_raid_devs) {
1125 printf("ARRAY metadata=imsm\n");
1e7bc0ed 1126 return;
0d5a423f 1127 }
ff54de6e 1128
a5d85af7 1129 getinfo_super_imsm(st, &info, NULL);
4737ae25
N
1130 fname_from_uuid(st, &info, nbuf, ':');
1131 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1132}
1133
1134static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1135{
1136 /* We just write a generic IMSM ARRAY entry */
1137 struct mdinfo info;
1138 char nbuf[64];
1139 char nbuf1[64];
1140 struct intel_super *super = st->sb;
1141 int i;
1142
1143 if (!super->anchor->num_raid_devs)
1144 return;
1145
a5d85af7 1146 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1147 fname_from_uuid(st, &info, nbuf, ':');
1e7bc0ed
DW
1148 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1149 struct imsm_dev *dev = get_imsm_dev(super, i);
1150
1151 super->current_vol = i;
a5d85af7 1152 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1153 fname_from_uuid(st, &info, nbuf1, ':');
1124b3cf 1154 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
cf8de691 1155 dev->volume, nbuf + 5, i, nbuf1 + 5);
1e7bc0ed 1156 }
cdddbdbc
DW
1157}
1158
9d84c8ea
DW
1159static void export_examine_super_imsm(struct supertype *st)
1160{
1161 struct intel_super *super = st->sb;
1162 struct imsm_super *mpb = super->anchor;
1163 struct mdinfo info;
1164 char nbuf[64];
1165
a5d85af7 1166 getinfo_super_imsm(st, &info, NULL);
9d84c8ea
DW
1167 fname_from_uuid(st, &info, nbuf, ':');
1168 printf("MD_METADATA=imsm\n");
1169 printf("MD_LEVEL=container\n");
1170 printf("MD_UUID=%s\n", nbuf+5);
1171 printf("MD_DEVICES=%u\n", mpb->num_disks);
1172}
1173
cdddbdbc
DW
1174static void detail_super_imsm(struct supertype *st, char *homehost)
1175{
3ebe00a1
DW
1176 struct mdinfo info;
1177 char nbuf[64];
1178
a5d85af7 1179 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1180 fname_from_uuid(st, &info, nbuf, ':');
3ebe00a1 1181 printf("\n UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1182}
1183
1184static void brief_detail_super_imsm(struct supertype *st)
1185{
ff54de6e
N
1186 struct mdinfo info;
1187 char nbuf[64];
a5d85af7 1188 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1189 fname_from_uuid(st, &info, nbuf, ':');
ff54de6e 1190 printf(" UUID=%s", nbuf + 5);
cdddbdbc 1191}
d665cc31
DW
1192
1193static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1194static void fd2devname(int fd, char *name);
1195
120dc887 1196static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
d665cc31 1197{
120dc887
LM
1198 /* dump an unsorted list of devices attached to AHCI Intel storage
1199 * controller, as well as non-connected ports
d665cc31
DW
1200 */
1201 int hba_len = strlen(hba_path) + 1;
1202 struct dirent *ent;
1203 DIR *dir;
1204 char *path = NULL;
1205 int err = 0;
1206 unsigned long port_mask = (1 << port_count) - 1;
1207
f21e18ca 1208 if (port_count > (int)sizeof(port_mask) * 8) {
d665cc31
DW
1209 if (verbose)
1210 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1211 return 2;
1212 }
1213
1214 /* scroll through /sys/dev/block looking for devices attached to
1215 * this hba
1216 */
1217 dir = opendir("/sys/dev/block");
1218 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1219 int fd;
1220 char model[64];
1221 char vendor[64];
1222 char buf[1024];
1223 int major, minor;
1224 char *device;
1225 char *c;
1226 int port;
1227 int type;
1228
1229 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1230 continue;
1231 path = devt_to_devpath(makedev(major, minor));
1232 if (!path)
1233 continue;
1234 if (!path_attached_to_hba(path, hba_path)) {
1235 free(path);
1236 path = NULL;
1237 continue;
1238 }
1239
1240 /* retrieve the scsi device type */
1241 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1242 if (verbose)
1243 fprintf(stderr, Name ": failed to allocate 'device'\n");
1244 err = 2;
1245 break;
1246 }
1247 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1248 if (load_sys(device, buf) != 0) {
1249 if (verbose)
1250 fprintf(stderr, Name ": failed to read device type for %s\n",
1251 path);
1252 err = 2;
1253 free(device);
1254 break;
1255 }
1256 type = strtoul(buf, NULL, 10);
1257
1258 /* if it's not a disk print the vendor and model */
1259 if (!(type == 0 || type == 7 || type == 14)) {
1260 vendor[0] = '\0';
1261 model[0] = '\0';
1262 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1263 if (load_sys(device, buf) == 0) {
1264 strncpy(vendor, buf, sizeof(vendor));
1265 vendor[sizeof(vendor) - 1] = '\0';
1266 c = (char *) &vendor[sizeof(vendor) - 1];
1267 while (isspace(*c) || *c == '\0')
1268 *c-- = '\0';
1269
1270 }
1271 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1272 if (load_sys(device, buf) == 0) {
1273 strncpy(model, buf, sizeof(model));
1274 model[sizeof(model) - 1] = '\0';
1275 c = (char *) &model[sizeof(model) - 1];
1276 while (isspace(*c) || *c == '\0')
1277 *c-- = '\0';
1278 }
1279
1280 if (vendor[0] && model[0])
1281 sprintf(buf, "%.64s %.64s", vendor, model);
1282 else
1283 switch (type) { /* numbers from hald/linux/device.c */
1284 case 1: sprintf(buf, "tape"); break;
1285 case 2: sprintf(buf, "printer"); break;
1286 case 3: sprintf(buf, "processor"); break;
1287 case 4:
1288 case 5: sprintf(buf, "cdrom"); break;
1289 case 6: sprintf(buf, "scanner"); break;
1290 case 8: sprintf(buf, "media_changer"); break;
1291 case 9: sprintf(buf, "comm"); break;
1292 case 12: sprintf(buf, "raid"); break;
1293 default: sprintf(buf, "unknown");
1294 }
1295 } else
1296 buf[0] = '\0';
1297 free(device);
1298
1299 /* chop device path to 'host%d' and calculate the port number */
1300 c = strchr(&path[hba_len], '/');
4e5e717d
AW
1301 if (!c) {
1302 if (verbose)
1303 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1304 err = 2;
1305 break;
1306 }
d665cc31
DW
1307 *c = '\0';
1308 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1309 port -= host_base;
1310 else {
1311 if (verbose) {
1312 *c = '/'; /* repair the full string */
1313 fprintf(stderr, Name ": failed to determine port number for %s\n",
1314 path);
1315 }
1316 err = 2;
1317 break;
1318 }
1319
1320 /* mark this port as used */
1321 port_mask &= ~(1 << port);
1322
1323 /* print out the device information */
1324 if (buf[0]) {
1325 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1326 continue;
1327 }
1328
1329 fd = dev_open(ent->d_name, O_RDONLY);
1330 if (fd < 0)
1331 printf(" Port%d : - disk info unavailable -\n", port);
1332 else {
1333 fd2devname(fd, buf);
1334 printf(" Port%d : %s", port, buf);
1335 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1336 printf(" (%s)\n", buf);
1337 else
1338 printf("()\n");
1339 }
1340 close(fd);
1341 free(path);
1342 path = NULL;
1343 }
1344 if (path)
1345 free(path);
1346 if (dir)
1347 closedir(dir);
1348 if (err == 0) {
1349 int i;
1350
1351 for (i = 0; i < port_count; i++)
1352 if (port_mask & (1 << i))
1353 printf(" Port%d : - no device attached -\n", i);
1354 }
1355
1356 return err;
1357}
1358
120dc887 1359
155cbb4c 1360
120dc887
LM
1361static void print_found_intel_controllers(struct sys_dev *elem)
1362{
1363 for (; elem; elem = elem->next) {
1364 fprintf(stderr, Name ": found Intel(R) ");
1365 if (elem->type == SYS_DEV_SATA)
1366 fprintf(stderr, "SATA ");
155cbb4c
LM
1367 else if (elem->type == SYS_DEV_SAS)
1368 fprintf(stderr, "SAS ");
120dc887
LM
1369 fprintf(stderr, "RAID controller");
1370 if (elem->pci_id)
1371 fprintf(stderr, " at %s", elem->pci_id);
1372 fprintf(stderr, ".\n");
1373 }
1374 fflush(stderr);
1375}
1376
120dc887
LM
1377static int ahci_get_port_count(const char *hba_path, int *port_count)
1378{
1379 struct dirent *ent;
1380 DIR *dir;
1381 int host_base = -1;
1382
1383 *port_count = 0;
1384 if ((dir = opendir(hba_path)) == NULL)
1385 return -1;
1386
1387 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1388 int host;
1389
1390 if (sscanf(ent->d_name, "host%d", &host) != 1)
1391 continue;
1392 if (*port_count == 0)
1393 host_base = host;
1394 else if (host < host_base)
1395 host_base = host;
1396
1397 if (host + 1 > *port_count + host_base)
1398 *port_count = host + 1 - host_base;
1399 }
1400 closedir(dir);
1401 return host_base;
1402}
1403
a891a3c2
LM
1404static void print_imsm_capability(const struct imsm_orom *orom)
1405{
1406 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1407 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1408 orom->hotfix_ver, orom->build);
1409 printf(" RAID Levels :%s%s%s%s%s\n",
1410 imsm_orom_has_raid0(orom) ? " raid0" : "",
1411 imsm_orom_has_raid1(orom) ? " raid1" : "",
1412 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1413 imsm_orom_has_raid10(orom) ? " raid10" : "",
1414 imsm_orom_has_raid5(orom) ? " raid5" : "");
1415 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1416 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1417 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1418 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1419 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1420 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1421 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1422 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1423 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1424 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1425 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1426 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1427 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1428 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1429 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1430 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1431 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1432 printf(" Max Disks : %d\n", orom->tds);
1433 printf(" Max Volumes : %d\n", orom->vpa);
1434 return;
1435}
1436
5615172f 1437static int detail_platform_imsm(int verbose, int enumerate_only)
d665cc31
DW
1438{
1439 /* There are two components to imsm platform support, the ahci SATA
1440 * controller and the option-rom. To find the SATA controller we
1441 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1442 * controller with the Intel vendor id is present. This approach
1443 * allows mdadm to leverage the kernel's ahci detection logic, with the
1444 * caveat that if ahci.ko is not loaded mdadm will not be able to
1445 * detect platform raid capabilities. The option-rom resides in a
1446 * platform "Adapter ROM". We scan for its signature to retrieve the
1447 * platform capabilities. If raid support is disabled in the BIOS the
1448 * option-rom capability structure will not be available.
1449 */
1450 const struct imsm_orom *orom;
1451 struct sys_dev *list, *hba;
d665cc31
DW
1452 int host_base = 0;
1453 int port_count = 0;
120dc887 1454 int result=0;
d665cc31 1455
5615172f 1456 if (enumerate_only) {
a891a3c2 1457 if (check_env("IMSM_NO_PLATFORM"))
5615172f 1458 return 0;
a891a3c2
LM
1459 list = find_intel_devices();
1460 if (!list)
1461 return 2;
1462 for (hba = list; hba; hba = hba->next) {
1463 orom = find_imsm_capability(hba->type);
1464 if (!orom) {
1465 result = 2;
1466 break;
1467 }
1468 }
1469 free_sys_dev(&list);
1470 return result;
5615172f
DW
1471 }
1472
155cbb4c
LM
1473 list = find_intel_devices();
1474 if (!list) {
d665cc31 1475 if (verbose)
155cbb4c
LM
1476 fprintf(stderr, Name ": no active Intel(R) RAID "
1477 "controller found.\n");
d665cc31
DW
1478 free_sys_dev(&list);
1479 return 2;
1480 } else if (verbose)
155cbb4c 1481 print_found_intel_controllers(list);
d665cc31 1482
a891a3c2
LM
1483 for (hba = list; hba; hba = hba->next) {
1484 orom = find_imsm_capability(hba->type);
1485 if (!orom)
1486 fprintf(stderr, Name ": imsm capabilities not found for controller: %s (type %s)\n",
1487 hba->path, get_sys_dev_type(hba->type));
1488 else
1489 print_imsm_capability(orom);
d665cc31
DW
1490 }
1491
120dc887
LM
1492 for (hba = list; hba; hba = hba->next) {
1493 printf(" I/O Controller : %s (%s)\n",
1494 hba->path, get_sys_dev_type(hba->type));
d665cc31 1495
120dc887
LM
1496 if (hba->type == SYS_DEV_SATA) {
1497 host_base = ahci_get_port_count(hba->path, &port_count);
1498 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1499 if (verbose)
1500 fprintf(stderr, Name ": failed to enumerate "
1501 "ports on SATA controller at %s.", hba->pci_id);
1502 result |= 2;
1503 }
1504 }
d665cc31 1505 }
155cbb4c 1506
120dc887
LM
1507 free_sys_dev(&list);
1508 return result;
d665cc31 1509}
cdddbdbc
DW
1510#endif
1511
1512static int match_home_imsm(struct supertype *st, char *homehost)
1513{
5115ca67
DW
1514 /* the imsm metadata format does not specify any host
1515 * identification information. We return -1 since we can never
1516 * confirm nor deny whether a given array is "meant" for this
148acb7b 1517 * host. We rely on compare_super and the 'family_num' fields to
5115ca67
DW
1518 * exclude member disks that do not belong, and we rely on
1519 * mdadm.conf to specify the arrays that should be assembled.
1520 * Auto-assembly may still pick up "foreign" arrays.
1521 */
cdddbdbc 1522
9362c1c8 1523 return -1;
cdddbdbc
DW
1524}
1525
1526static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1527{
51006d85
N
1528 /* The uuid returned here is used for:
1529 * uuid to put into bitmap file (Create, Grow)
1530 * uuid for backup header when saving critical section (Grow)
1531 * comparing uuids when re-adding a device into an array
1532 * In these cases the uuid required is that of the data-array,
1533 * not the device-set.
1534 * uuid to recognise same set when adding a missing device back
1535 * to an array. This is a uuid for the device-set.
1536 *
1537 * For each of these we can make do with a truncated
1538 * or hashed uuid rather than the original, as long as
1539 * everyone agrees.
1540 * In each case the uuid required is that of the data-array,
1541 * not the device-set.
43dad3d6 1542 */
51006d85
N
1543 /* imsm does not track uuid's so we synthesis one using sha1 on
1544 * - The signature (Which is constant for all imsm array, but no matter)
148acb7b 1545 * - the orig_family_num of the container
51006d85
N
1546 * - the index number of the volume
1547 * - the 'serial' number of the volume.
1548 * Hopefully these are all constant.
1549 */
1550 struct intel_super *super = st->sb;
43dad3d6 1551
51006d85
N
1552 char buf[20];
1553 struct sha1_ctx ctx;
1554 struct imsm_dev *dev = NULL;
148acb7b 1555 __u32 family_num;
51006d85 1556
148acb7b
DW
1557 /* some mdadm versions failed to set ->orig_family_num, in which
1558 * case fall back to ->family_num. orig_family_num will be
1559 * fixed up with the first metadata update.
1560 */
1561 family_num = super->anchor->orig_family_num;
1562 if (family_num == 0)
1563 family_num = super->anchor->family_num;
51006d85 1564 sha1_init_ctx(&ctx);
92bd8f8d 1565 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
148acb7b 1566 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
51006d85
N
1567 if (super->current_vol >= 0)
1568 dev = get_imsm_dev(super, super->current_vol);
1569 if (dev) {
1570 __u32 vol = super->current_vol;
1571 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1572 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1573 }
1574 sha1_finish_ctx(&ctx, buf);
1575 memcpy(uuid, buf, 4*4);
cdddbdbc
DW
1576}
1577
0d481d37 1578#if 0
4f5bc454
DW
1579static void
1580get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
cdddbdbc 1581{
cdddbdbc
DW
1582 __u8 *v = get_imsm_version(mpb);
1583 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1584 char major[] = { 0, 0, 0 };
1585 char minor[] = { 0 ,0, 0 };
1586 char patch[] = { 0, 0, 0 };
1587 char *ver_parse[] = { major, minor, patch };
1588 int i, j;
1589
1590 i = j = 0;
1591 while (*v != '\0' && v < end) {
1592 if (*v != '.' && j < 2)
1593 ver_parse[i][j++] = *v;
1594 else {
1595 i++;
1596 j = 0;
1597 }
1598 v++;
1599 }
1600
4f5bc454
DW
1601 *m = strtol(minor, NULL, 0);
1602 *p = strtol(patch, NULL, 0);
1603}
0d481d37 1604#endif
4f5bc454 1605
1e5c6983
DW
1606static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1607{
1608 /* migr_strip_size when repairing or initializing parity */
1609 struct imsm_map *map = get_imsm_map(dev, 0);
1610 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1611
1612 switch (get_imsm_raid_level(map)) {
1613 case 5:
1614 case 10:
1615 return chunk;
1616 default:
1617 return 128*1024 >> 9;
1618 }
1619}
1620
1621static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1622{
1623 /* migr_strip_size when rebuilding a degraded disk, no idea why
1624 * this is different than migr_strip_size_resync(), but it's good
1625 * to be compatible
1626 */
1627 struct imsm_map *map = get_imsm_map(dev, 1);
1628 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1629
1630 switch (get_imsm_raid_level(map)) {
1631 case 1:
1632 case 10:
1633 if (map->num_members % map->num_domains == 0)
1634 return 128*1024 >> 9;
1635 else
1636 return chunk;
1637 case 5:
1638 return max((__u32) 64*1024 >> 9, chunk);
1639 default:
1640 return 128*1024 >> 9;
1641 }
1642}
1643
1644static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1645{
1646 struct imsm_map *lo = get_imsm_map(dev, 0);
1647 struct imsm_map *hi = get_imsm_map(dev, 1);
1648 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1649 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1650
1651 return max((__u32) 1, hi_chunk / lo_chunk);
1652}
1653
1654static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1655{
1656 struct imsm_map *lo = get_imsm_map(dev, 0);
1657 int level = get_imsm_raid_level(lo);
1658
1659 if (level == 1 || level == 10) {
1660 struct imsm_map *hi = get_imsm_map(dev, 1);
1661
1662 return hi->num_domains;
1663 } else
1664 return num_stripes_per_unit_resync(dev);
1665}
1666
98130f40 1667static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1e5c6983
DW
1668{
1669 /* named 'imsm_' because raid0, raid1 and raid10
1670 * counter-intuitively have the same number of data disks
1671 */
98130f40 1672 struct imsm_map *map = get_imsm_map(dev, second_map);
1e5c6983
DW
1673
1674 switch (get_imsm_raid_level(map)) {
1675 case 0:
1676 case 1:
1677 case 10:
1678 return map->num_members;
1679 case 5:
1680 return map->num_members - 1;
1681 default:
1682 dprintf("%s: unsupported raid level\n", __func__);
1683 return 0;
1684 }
1685}
1686
1687static __u32 parity_segment_depth(struct imsm_dev *dev)
1688{
1689 struct imsm_map *map = get_imsm_map(dev, 0);
1690 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1691
1692 switch(get_imsm_raid_level(map)) {
1693 case 1:
1694 case 10:
1695 return chunk * map->num_domains;
1696 case 5:
1697 return chunk * map->num_members;
1698 default:
1699 return chunk;
1700 }
1701}
1702
1703static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1704{
1705 struct imsm_map *map = get_imsm_map(dev, 1);
1706 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1707 __u32 strip = block / chunk;
1708
1709 switch (get_imsm_raid_level(map)) {
1710 case 1:
1711 case 10: {
1712 __u32 vol_strip = (strip * map->num_domains) + 1;
1713 __u32 vol_stripe = vol_strip / map->num_members;
1714
1715 return vol_stripe * chunk + block % chunk;
1716 } case 5: {
1717 __u32 stripe = strip / (map->num_members - 1);
1718
1719 return stripe * chunk + block % chunk;
1720 }
1721 default:
1722 return 0;
1723 }
1724}
1725
1726static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
1727{
1728 /* calculate the conversion factor between per member 'blocks'
1729 * (md/{resync,rebuild}_start) and imsm migration units, return
1730 * 0 for the 'not migrating' and 'unsupported migration' cases
1731 */
1732 if (!dev->vol.migr_state)
1733 return 0;
1734
1735 switch (migr_type(dev)) {
6345120e 1736 case MIGR_GEN_MIGR:
1e5c6983
DW
1737 case MIGR_VERIFY:
1738 case MIGR_REPAIR:
1739 case MIGR_INIT: {
1740 struct imsm_map *map = get_imsm_map(dev, 0);
1741 __u32 stripes_per_unit;
1742 __u32 blocks_per_unit;
1743 __u32 parity_depth;
1744 __u32 migr_chunk;
1745 __u32 block_map;
1746 __u32 block_rel;
1747 __u32 segment;
1748 __u32 stripe;
1749 __u8 disks;
1750
1751 /* yes, this is really the translation of migr_units to
1752 * per-member blocks in the 'resync' case
1753 */
1754 stripes_per_unit = num_stripes_per_unit_resync(dev);
1755 migr_chunk = migr_strip_blocks_resync(dev);
98130f40 1756 disks = imsm_num_data_members(dev, 0);
1e5c6983
DW
1757 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1758 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1759 segment = blocks_per_unit / stripe;
1760 block_rel = blocks_per_unit - segment * stripe;
1761 parity_depth = parity_segment_depth(dev);
1762 block_map = map_migr_block(dev, block_rel);
1763 return block_map + parity_depth * segment;
1764 }
1765 case MIGR_REBUILD: {
1766 __u32 stripes_per_unit;
1767 __u32 migr_chunk;
1768
1769 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1770 migr_chunk = migr_strip_blocks_rebuild(dev);
1771 return migr_chunk * stripes_per_unit;
1772 }
1e5c6983
DW
1773 case MIGR_STATE_CHANGE:
1774 default:
1775 return 0;
1776 }
1777}
1778
c2c087e6
DW
1779static int imsm_level_to_layout(int level)
1780{
1781 switch (level) {
1782 case 0:
1783 case 1:
1784 return 0;
1785 case 5:
1786 case 6:
a380c027 1787 return ALGORITHM_LEFT_ASYMMETRIC;
c2c087e6 1788 case 10:
c92a2527 1789 return 0x102;
c2c087e6 1790 }
a18a888e 1791 return UnSet;
c2c087e6
DW
1792}
1793
8e59f3d8
AK
1794/*******************************************************************************
1795 * Function: read_imsm_migr_rec
1796 * Description: Function reads imsm migration record from last sector of disk
1797 * Parameters:
1798 * fd : disk descriptor
1799 * super : metadata info
1800 * Returns:
1801 * 0 : success,
1802 * -1 : fail
1803 ******************************************************************************/
1804static int read_imsm_migr_rec(int fd, struct intel_super *super)
1805{
1806 int ret_val = -1;
1807 unsigned long long dsize;
1808
1809 get_dev_size(fd, NULL, &dsize);
1810 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1811 fprintf(stderr,
1812 Name ": Cannot seek to anchor block: %s\n",
1813 strerror(errno));
1814 goto out;
1815 }
1816 if (read(fd, super->migr_rec_buf, 512) != 512) {
1817 fprintf(stderr,
1818 Name ": Cannot read migr record block: %s\n",
1819 strerror(errno));
1820 goto out;
1821 }
1822 ret_val = 0;
1823
1824out:
1825 return ret_val;
1826}
1827
1828/*******************************************************************************
1829 * Function: load_imsm_migr_rec
1830 * Description: Function reads imsm migration record (it is stored at the last
1831 * sector of disk)
1832 * Parameters:
1833 * super : imsm internal array info
1834 * info : general array info
1835 * Returns:
1836 * 0 : success
1837 * -1 : fail
1838 ******************************************************************************/
1839static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
1840{
1841 struct mdinfo *sd;
1842 struct dl *dl = NULL;
1843 char nm[30];
1844 int retval = -1;
1845 int fd = -1;
1846
1847 if (info) {
1848 for (sd = info->devs ; sd ; sd = sd->next) {
1849 /* read only from one of the first two slots */
1850 if ((sd->disk.raid_disk > 1) ||
1851 (sd->disk.raid_disk < 0))
1852 continue;
1853 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1854 fd = dev_open(nm, O_RDONLY);
1855 if (fd >= 0)
1856 break;
1857 }
1858 }
1859 if (fd < 0) {
1860 for (dl = super->disks; dl; dl = dl->next) {
1861 /* read only from one of the first two slots */
1862 if (dl->index > 1)
1863 continue;
1864 sprintf(nm, "%d:%d", dl->major, dl->minor);
1865 fd = dev_open(nm, O_RDONLY);
1866 if (fd >= 0)
1867 break;
1868 }
1869 }
1870 if (fd < 0)
1871 goto out;
1872 retval = read_imsm_migr_rec(fd, super);
1873
1874out:
1875 if (fd >= 0)
1876 close(fd);
1877 return retval;
1878}
1879
a5d85af7 1880static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
bf5a934a
DW
1881{
1882 struct intel_super *super = st->sb;
949c47a0 1883 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
a965f303 1884 struct imsm_map *map = get_imsm_map(dev, 0);
81ac8b4d 1885 struct imsm_map *prev_map = get_imsm_map(dev, 1);
b335e593 1886 struct imsm_map *map_to_analyse = map;
efb30e7f 1887 struct dl *dl;
e207da2f 1888 char *devname;
139dae11 1889 unsigned int component_size_alligment;
a5d85af7 1890 int map_disks = info->array.raid_disks;
bf5a934a 1891
95eeceeb 1892 memset(info, 0, sizeof(*info));
b335e593
AK
1893 if (prev_map)
1894 map_to_analyse = prev_map;
1895
efb30e7f
DW
1896 for (dl = super->disks; dl; dl = dl->next)
1897 if (dl->raiddisk == info->disk.raid_disk)
1898 break;
bf5a934a 1899 info->container_member = super->current_vol;
cd0430a1 1900 info->array.raid_disks = map->num_members;
b335e593 1901 info->array.level = get_imsm_raid_level(map_to_analyse);
bf5a934a
DW
1902 info->array.layout = imsm_level_to_layout(info->array.level);
1903 info->array.md_minor = -1;
1904 info->array.ctime = 0;
1905 info->array.utime = 0;
b335e593
AK
1906 info->array.chunk_size =
1907 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
301406c9 1908 info->array.state = !dev->vol.dirty;
da9b4a62
DW
1909 info->custom_array_size = __le32_to_cpu(dev->size_high);
1910 info->custom_array_size <<= 32;
1911 info->custom_array_size |= __le32_to_cpu(dev->size_low);
3f83228a
N
1912 if (prev_map && map->map_state == prev_map->map_state) {
1913 info->reshape_active = 1;
b335e593
AK
1914 info->new_level = get_imsm_raid_level(map);
1915 info->new_layout = imsm_level_to_layout(info->new_level);
1916 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
3f83228a 1917 info->delta_disks = map->num_members - prev_map->num_members;
493f5dd6
N
1918 if (info->delta_disks) {
1919 /* this needs to be applied to every array
1920 * in the container.
1921 */
1922 info->reshape_active = 2;
1923 }
3f83228a
N
1924 /* We shape information that we give to md might have to be
1925 * modify to cope with md's requirement for reshaping arrays.
1926 * For example, when reshaping a RAID0, md requires it to be
1927 * presented as a degraded RAID4.
1928 * Also if a RAID0 is migrating to a RAID5 we need to specify
1929 * the array as already being RAID5, but the 'before' layout
1930 * is a RAID4-like layout.
1931 */
1932 switch (info->array.level) {
1933 case 0:
1934 switch(info->new_level) {
1935 case 0:
1936 /* conversion is happening as RAID4 */
1937 info->array.level = 4;
1938 info->array.raid_disks += 1;
1939 break;
1940 case 5:
1941 /* conversion is happening as RAID5 */
1942 info->array.level = 5;
1943 info->array.layout = ALGORITHM_PARITY_N;
1944 info->array.raid_disks += 1;
1945 info->delta_disks -= 1;
1946 break;
1947 default:
1948 /* FIXME error message */
1949 info->array.level = UnSet;
1950 break;
1951 }
1952 break;
1953 }
b335e593
AK
1954 } else {
1955 info->new_level = UnSet;
1956 info->new_layout = UnSet;
1957 info->new_chunk = info->array.chunk_size;
3f83228a 1958 info->delta_disks = 0;
b335e593 1959 }
301406c9
DW
1960 info->disk.major = 0;
1961 info->disk.minor = 0;
efb30e7f
DW
1962 if (dl) {
1963 info->disk.major = dl->major;
1964 info->disk.minor = dl->minor;
1965 }
bf5a934a 1966
b335e593
AK
1967 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
1968 info->component_size =
1969 __le32_to_cpu(map_to_analyse->blocks_per_member);
139dae11
AK
1970
1971 /* check component size aligment
1972 */
1973 component_size_alligment =
1974 info->component_size % (info->array.chunk_size/512);
1975
1976 if (component_size_alligment &&
1977 (info->array.level != 1) && (info->array.level != UnSet)) {
1978 dprintf("imsm: reported component size alligned from %llu ",
1979 info->component_size);
1980 info->component_size -= component_size_alligment;
1981 dprintf("to %llu (%i).\n",
1982 info->component_size, component_size_alligment);
1983 }
1984
301406c9 1985 memset(info->uuid, 0, sizeof(info->uuid));
921d9e16 1986 info->recovery_start = MaxSector;
bf5a934a 1987
d2e6d5d6 1988 info->reshape_progress = 0;
b6796ce1 1989 info->resync_start = MaxSector;
b335e593
AK
1990 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
1991 dev->vol.dirty) {
301406c9 1992 info->resync_start = 0;
b6796ce1
AK
1993 }
1994 if (dev->vol.migr_state) {
1e5c6983
DW
1995 switch (migr_type(dev)) {
1996 case MIGR_REPAIR:
1997 case MIGR_INIT: {
1998 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
1999 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
2000
2001 info->resync_start = blocks_per_unit * units;
2002 break;
2003 }
d2e6d5d6
AK
2004 case MIGR_GEN_MIGR: {
2005 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
2006 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
04fa9523
AK
2007 unsigned long long array_blocks;
2008 int used_disks;
d2e6d5d6
AK
2009
2010 info->reshape_progress = blocks_per_unit * units;
6289d1e0 2011
d2e6d5d6
AK
2012 dprintf("IMSM: General Migration checkpoint : %llu "
2013 "(%llu) -> read reshape progress : %llu\n",
2014 units, blocks_per_unit, info->reshape_progress);
75156c46
AK
2015
2016 used_disks = imsm_num_data_members(dev, 1);
2017 if (used_disks > 0) {
2018 array_blocks = map->blocks_per_member *
2019 used_disks;
2020 /* round array size down to closest MB
2021 */
2022 info->custom_array_size = (array_blocks
2023 >> SECT_PER_MB_SHIFT)
2024 << SECT_PER_MB_SHIFT;
2025 }
d2e6d5d6 2026 }
1e5c6983
DW
2027 case MIGR_VERIFY:
2028 /* we could emulate the checkpointing of
2029 * 'sync_action=check' migrations, but for now
2030 * we just immediately complete them
2031 */
2032 case MIGR_REBUILD:
2033 /* this is handled by container_content_imsm() */
1e5c6983
DW
2034 case MIGR_STATE_CHANGE:
2035 /* FIXME handle other migrations */
2036 default:
2037 /* we are not dirty, so... */
2038 info->resync_start = MaxSector;
2039 }
b6796ce1 2040 }
301406c9
DW
2041
2042 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
2043 info->name[MAX_RAID_SERIAL_LEN] = 0;
bf5a934a 2044
f35f2525
N
2045 info->array.major_version = -1;
2046 info->array.minor_version = -2;
e207da2f
AW
2047 devname = devnum2devname(st->container_dev);
2048 *info->text_version = '\0';
2049 if (devname)
2050 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
2051 free(devname);
a67dd8cc 2052 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
51006d85 2053 uuid_from_super_imsm(st, info->uuid);
a5d85af7
N
2054
2055 if (dmap) {
2056 int i, j;
2057 for (i=0; i<map_disks; i++) {
2058 dmap[i] = 0;
2059 if (i < info->array.raid_disks) {
2060 struct imsm_disk *dsk;
98130f40 2061 j = get_imsm_disk_idx(dev, i, -1);
a5d85af7
N
2062 dsk = get_imsm_disk(super, j);
2063 if (dsk && (dsk->status & CONFIGURED_DISK))
2064 dmap[i] = 1;
2065 }
2066 }
2067 }
81ac8b4d 2068}
bf5a934a 2069
97b4d0e9
DW
2070static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
2071static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
2072
2073static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
2074{
2075 struct dl *d;
2076
2077 for (d = super->missing; d; d = d->next)
2078 if (d->index == index)
2079 return &d->disk;
2080 return NULL;
2081}
2082
a5d85af7 2083static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
4f5bc454
DW
2084{
2085 struct intel_super *super = st->sb;
4f5bc454 2086 struct imsm_disk *disk;
a5d85af7 2087 int map_disks = info->array.raid_disks;
ab3cb6b3
N
2088 int max_enough = -1;
2089 int i;
2090 struct imsm_super *mpb;
4f5bc454 2091
bf5a934a 2092 if (super->current_vol >= 0) {
a5d85af7 2093 getinfo_super_imsm_volume(st, info, map);
bf5a934a
DW
2094 return;
2095 }
95eeceeb 2096 memset(info, 0, sizeof(*info));
d23fe947
DW
2097
2098 /* Set raid_disks to zero so that Assemble will always pull in valid
2099 * spares
2100 */
2101 info->array.raid_disks = 0;
cdddbdbc
DW
2102 info->array.level = LEVEL_CONTAINER;
2103 info->array.layout = 0;
2104 info->array.md_minor = -1;
c2c087e6 2105 info->array.ctime = 0; /* N/A for imsm */
cdddbdbc
DW
2106 info->array.utime = 0;
2107 info->array.chunk_size = 0;
2108
2109 info->disk.major = 0;
2110 info->disk.minor = 0;
cdddbdbc 2111 info->disk.raid_disk = -1;
c2c087e6 2112 info->reshape_active = 0;
f35f2525
N
2113 info->array.major_version = -1;
2114 info->array.minor_version = -2;
c2c087e6 2115 strcpy(info->text_version, "imsm");
a67dd8cc 2116 info->safe_mode_delay = 0;
c2c087e6
DW
2117 info->disk.number = -1;
2118 info->disk.state = 0;
c5afc314 2119 info->name[0] = 0;
921d9e16 2120 info->recovery_start = MaxSector;
c2c087e6 2121
97b4d0e9 2122 /* do we have the all the insync disks that we expect? */
ab3cb6b3 2123 mpb = super->anchor;
97b4d0e9 2124
ab3cb6b3
N
2125 for (i = 0; i < mpb->num_raid_devs; i++) {
2126 struct imsm_dev *dev = get_imsm_dev(super, i);
2127 int failed, enough, j, missing = 0;
2128 struct imsm_map *map;
2129 __u8 state;
97b4d0e9 2130
ab3cb6b3
N
2131 failed = imsm_count_failed(super, dev);
2132 state = imsm_check_degraded(super, dev, failed);
2133 map = get_imsm_map(dev, dev->vol.migr_state);
2134
2135 /* any newly missing disks?
2136 * (catches single-degraded vs double-degraded)
2137 */
2138 for (j = 0; j < map->num_members; j++) {
98130f40 2139 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
ab3cb6b3
N
2140 __u32 idx = ord_to_idx(ord);
2141
2142 if (!(ord & IMSM_ORD_REBUILD) &&
2143 get_imsm_missing(super, idx)) {
2144 missing = 1;
2145 break;
2146 }
97b4d0e9 2147 }
ab3cb6b3
N
2148
2149 if (state == IMSM_T_STATE_FAILED)
2150 enough = -1;
2151 else if (state == IMSM_T_STATE_DEGRADED &&
2152 (state != map->map_state || missing))
2153 enough = 0;
2154 else /* we're normal, or already degraded */
2155 enough = 1;
2156
2157 /* in the missing/failed disk case check to see
2158 * if at least one array is runnable
2159 */
2160 max_enough = max(max_enough, enough);
2161 }
2162 dprintf("%s: enough: %d\n", __func__, max_enough);
2163 info->container_enough = max_enough;
97b4d0e9 2164
4a04ec6c 2165 if (super->disks) {
14e8215b
DW
2166 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2167
b9f594fe 2168 disk = &super->disks->disk;
14e8215b
DW
2169 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
2170 info->component_size = reserved;
25ed7e59 2171 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
df474657
DW
2172 /* we don't change info->disk.raid_disk here because
2173 * this state will be finalized in mdmon after we have
2174 * found the 'most fresh' version of the metadata
2175 */
25ed7e59
DW
2176 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2177 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
cdddbdbc 2178 }
a575e2a7
DW
2179
2180 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2181 * ->compare_super may have updated the 'num_raid_devs' field for spares
2182 */
2183 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
36ba7d48 2184 uuid_from_super_imsm(st, info->uuid);
22e263f6
AC
2185 else
2186 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
a5d85af7
N
2187
2188 /* I don't know how to compute 'map' on imsm, so use safe default */
2189 if (map) {
2190 int i;
2191 for (i = 0; i < map_disks; i++)
2192 map[i] = 1;
2193 }
2194
cdddbdbc
DW
2195}
2196
5c4cd5da
AC
2197/* allocates memory and fills disk in mdinfo structure
2198 * for each disk in array */
2199struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2200{
2201 struct mdinfo *mddev = NULL;
2202 struct intel_super *super = st->sb;
2203 struct imsm_disk *disk;
2204 int count = 0;
2205 struct dl *dl;
2206 if (!super || !super->disks)
2207 return NULL;
2208 dl = super->disks;
2209 mddev = malloc(sizeof(*mddev));
2210 if (!mddev) {
2211 fprintf(stderr, Name ": Failed to allocate memory.\n");
2212 return NULL;
2213 }
2214 memset(mddev, 0, sizeof(*mddev));
2215 while (dl) {
2216 struct mdinfo *tmp;
2217 disk = &dl->disk;
2218 tmp = malloc(sizeof(*tmp));
2219 if (!tmp) {
2220 fprintf(stderr, Name ": Failed to allocate memory.\n");
2221 if (mddev)
2222 sysfs_free(mddev);
2223 return NULL;
2224 }
2225 memset(tmp, 0, sizeof(*tmp));
2226 if (mddev->devs)
2227 tmp->next = mddev->devs;
2228 mddev->devs = tmp;
2229 tmp->disk.number = count++;
2230 tmp->disk.major = dl->major;
2231 tmp->disk.minor = dl->minor;
2232 tmp->disk.state = is_configured(disk) ?
2233 (1 << MD_DISK_ACTIVE) : 0;
2234 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2235 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2236 tmp->disk.raid_disk = -1;
2237 dl = dl->next;
2238 }
2239 return mddev;
2240}
2241
cdddbdbc
DW
2242static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2243 char *update, char *devname, int verbose,
2244 int uuid_set, char *homehost)
2245{
f352c545
DW
2246 /* For 'assemble' and 'force' we need to return non-zero if any
2247 * change was made. For others, the return value is ignored.
2248 * Update options are:
2249 * force-one : This device looks a bit old but needs to be included,
2250 * update age info appropriately.
2251 * assemble: clear any 'faulty' flag to allow this device to
2252 * be assembled.
2253 * force-array: Array is degraded but being forced, mark it clean
2254 * if that will be needed to assemble it.
2255 *
2256 * newdev: not used ????
2257 * grow: Array has gained a new device - this is currently for
2258 * linear only
2259 * resync: mark as dirty so a resync will happen.
2260 * name: update the name - preserving the homehost
6e46bf34 2261 * uuid: Change the uuid of the array to match watch is given
f352c545
DW
2262 *
2263 * Following are not relevant for this imsm:
2264 * sparc2.2 : update from old dodgey metadata
2265 * super-minor: change the preferred_minor number
2266 * summaries: update redundant counters.
f352c545
DW
2267 * homehost: update the recorded homehost
2268 * _reshape_progress: record new reshape_progress position.
2269 */
6e46bf34
DW
2270 int rv = 1;
2271 struct intel_super *super = st->sb;
2272 struct imsm_super *mpb;
f352c545 2273
6e46bf34
DW
2274 /* we can only update container info */
2275 if (!super || super->current_vol >= 0 || !super->anchor)
2276 return 1;
2277
2278 mpb = super->anchor;
2279
2280 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
1e2b2765 2281 rv = -1;
6e46bf34
DW
2282 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2283 mpb->orig_family_num = *((__u32 *) info->update_private);
2284 rv = 0;
2285 } else if (strcmp(update, "uuid") == 0) {
2286 __u32 *new_family = malloc(sizeof(*new_family));
2287
2288 /* update orig_family_number with the incoming random
2289 * data, report the new effective uuid, and store the
2290 * new orig_family_num for future updates.
2291 */
2292 if (new_family) {
2293 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2294 uuid_from_super_imsm(st, info->uuid);
2295 *new_family = mpb->orig_family_num;
2296 info->update_private = new_family;
2297 rv = 0;
2298 }
2299 } else if (strcmp(update, "assemble") == 0)
2300 rv = 0;
2301 else
1e2b2765 2302 rv = -1;
f352c545 2303
6e46bf34
DW
2304 /* successful update? recompute checksum */
2305 if (rv == 0)
2306 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
f352c545
DW
2307
2308 return rv;
cdddbdbc
DW
2309}
2310
c2c087e6 2311static size_t disks_to_mpb_size(int disks)
cdddbdbc 2312{
c2c087e6 2313 size_t size;
cdddbdbc 2314
c2c087e6
DW
2315 size = sizeof(struct imsm_super);
2316 size += (disks - 1) * sizeof(struct imsm_disk);
2317 size += 2 * sizeof(struct imsm_dev);
2318 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2319 size += (4 - 2) * sizeof(struct imsm_map);
2320 /* 4 possible disk_ord_tbl's */
2321 size += 4 * (disks - 1) * sizeof(__u32);
2322
2323 return size;
2324}
2325
2326static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2327{
2328 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2329 return 0;
2330
2331 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
cdddbdbc
DW
2332}
2333
ba2de7ba
DW
2334static void free_devlist(struct intel_super *super)
2335{
2336 struct intel_dev *dv;
2337
2338 while (super->devlist) {
2339 dv = super->devlist->next;
2340 free(super->devlist->dev);
2341 free(super->devlist);
2342 super->devlist = dv;
2343 }
2344}
2345
2346static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2347{
2348 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2349}
2350
cdddbdbc
DW
2351static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2352{
2353 /*
2354 * return:
2355 * 0 same, or first was empty, and second was copied
2356 * 1 second had wrong number
2357 * 2 wrong uuid
2358 * 3 wrong other info
2359 */
2360 struct intel_super *first = st->sb;
2361 struct intel_super *sec = tst->sb;
2362
2363 if (!first) {
2364 st->sb = tst->sb;
2365 tst->sb = NULL;
2366 return 0;
2367 }
8603ea6f
LM
2368 /* in platform dependent environment test if the disks
2369 * use the same Intel hba
2370 */
2371 if (!check_env("IMSM_NO_PLATFORM")) {
ea2bc72b
LM
2372 if (!first->hba || !sec->hba ||
2373 (first->hba->type != sec->hba->type)) {
8603ea6f
LM
2374 fprintf(stderr,
2375 "HBAs of devices does not match %s != %s\n",
ea2bc72b
LM
2376 first->hba ? get_sys_dev_type(first->hba->type) : NULL,
2377 sec->hba ? get_sys_dev_type(sec->hba->type) : NULL);
8603ea6f
LM
2378 return 3;
2379 }
2380 }
cdddbdbc 2381
d23fe947
DW
2382 /* if an anchor does not have num_raid_devs set then it is a free
2383 * floating spare
2384 */
2385 if (first->anchor->num_raid_devs > 0 &&
2386 sec->anchor->num_raid_devs > 0) {
a2b97981
DW
2387 /* Determine if these disks might ever have been
2388 * related. Further disambiguation can only take place
2389 * in load_super_imsm_all
2390 */
2391 __u32 first_family = first->anchor->orig_family_num;
2392 __u32 sec_family = sec->anchor->orig_family_num;
2393
f796af5d
DW
2394 if (memcmp(first->anchor->sig, sec->anchor->sig,
2395 MAX_SIGNATURE_LENGTH) != 0)
2396 return 3;
2397
a2b97981
DW
2398 if (first_family == 0)
2399 first_family = first->anchor->family_num;
2400 if (sec_family == 0)
2401 sec_family = sec->anchor->family_num;
2402
2403 if (first_family != sec_family)
d23fe947 2404 return 3;
f796af5d 2405
d23fe947 2406 }
cdddbdbc 2407
f796af5d 2408
3e372e5a
DW
2409 /* if 'first' is a spare promote it to a populated mpb with sec's
2410 * family number
2411 */
2412 if (first->anchor->num_raid_devs == 0 &&
2413 sec->anchor->num_raid_devs > 0) {
78d30f94 2414 int i;
ba2de7ba
DW
2415 struct intel_dev *dv;
2416 struct imsm_dev *dev;
78d30f94
DW
2417
2418 /* we need to copy raid device info from sec if an allocation
2419 * fails here we don't associate the spare
2420 */
2421 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
ba2de7ba
DW
2422 dv = malloc(sizeof(*dv));
2423 if (!dv)
2424 break;
2425 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2426 if (!dev) {
2427 free(dv);
2428 break;
78d30f94 2429 }
ba2de7ba
DW
2430 dv->dev = dev;
2431 dv->index = i;
2432 dv->next = first->devlist;
2433 first->devlist = dv;
78d30f94 2434 }
709743c5 2435 if (i < sec->anchor->num_raid_devs) {
ba2de7ba
DW
2436 /* allocation failure */
2437 free_devlist(first);
2438 fprintf(stderr, "imsm: failed to associate spare\n");
2439 return 3;
78d30f94 2440 }
3e372e5a 2441 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
148acb7b 2442 first->anchor->orig_family_num = sec->anchor->orig_family_num;
3e372e5a 2443 first->anchor->family_num = sec->anchor->family_num;
ac6449be 2444 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
709743c5
DW
2445 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2446 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
3e372e5a
DW
2447 }
2448
cdddbdbc
DW
2449 return 0;
2450}
2451
0030e8d6
DW
2452static void fd2devname(int fd, char *name)
2453{
2454 struct stat st;
2455 char path[256];
33a6535d 2456 char dname[PATH_MAX];
0030e8d6
DW
2457 char *nm;
2458 int rv;
2459
2460 name[0] = '\0';
2461 if (fstat(fd, &st) != 0)
2462 return;
2463 sprintf(path, "/sys/dev/block/%d:%d",
2464 major(st.st_rdev), minor(st.st_rdev));
2465
2466 rv = readlink(path, dname, sizeof(dname));
2467 if (rv <= 0)
2468 return;
2469
2470 dname[rv] = '\0';
2471 nm = strrchr(dname, '/');
2472 nm++;
2473 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2474}
2475
cdddbdbc
DW
2476extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2477
2478static int imsm_read_serial(int fd, char *devname,
2479 __u8 serial[MAX_RAID_SERIAL_LEN])
2480{
2481 unsigned char scsi_serial[255];
cdddbdbc
DW
2482 int rv;
2483 int rsp_len;
1f24f035 2484 int len;
316e2bf4
DW
2485 char *dest;
2486 char *src;
2487 char *rsp_buf;
2488 int i;
cdddbdbc
DW
2489
2490 memset(scsi_serial, 0, sizeof(scsi_serial));
cdddbdbc 2491
f9ba0ff1
DW
2492 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2493
40ebbb9c 2494 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
f9ba0ff1
DW
2495 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2496 fd2devname(fd, (char *) serial);
0030e8d6
DW
2497 return 0;
2498 }
2499
cdddbdbc
DW
2500 if (rv != 0) {
2501 if (devname)
2502 fprintf(stderr,
2503 Name ": Failed to retrieve serial for %s\n",
2504 devname);
2505 return rv;
2506 }
2507
2508 rsp_len = scsi_serial[3];
03cd4cc8
DW
2509 if (!rsp_len) {
2510 if (devname)
2511 fprintf(stderr,
2512 Name ": Failed to retrieve serial for %s\n",
2513 devname);
2514 return 2;
2515 }
1f24f035 2516 rsp_buf = (char *) &scsi_serial[4];
5c3db629 2517
316e2bf4
DW
2518 /* trim all whitespace and non-printable characters and convert
2519 * ':' to ';'
2520 */
2521 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2522 src = &rsp_buf[i];
2523 if (*src > 0x20) {
2524 /* ':' is reserved for use in placeholder serial
2525 * numbers for missing disks
2526 */
2527 if (*src == ':')
2528 *dest++ = ';';
2529 else
2530 *dest++ = *src;
2531 }
2532 }
2533 len = dest - rsp_buf;
2534 dest = rsp_buf;
2535
2536 /* truncate leading characters */
2537 if (len > MAX_RAID_SERIAL_LEN) {
2538 dest += len - MAX_RAID_SERIAL_LEN;
1f24f035 2539 len = MAX_RAID_SERIAL_LEN;
316e2bf4 2540 }
5c3db629 2541
5c3db629 2542 memset(serial, 0, MAX_RAID_SERIAL_LEN);
316e2bf4 2543 memcpy(serial, dest, len);
cdddbdbc
DW
2544
2545 return 0;
2546}
2547
1f24f035
DW
2548static int serialcmp(__u8 *s1, __u8 *s2)
2549{
2550 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2551}
2552
2553static void serialcpy(__u8 *dest, __u8 *src)
2554{
2555 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2556}
2557
1799c9e8 2558#ifndef MDASSEMBLE
54c2c1ea
DW
2559static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2560{
2561 struct dl *dl;
2562
2563 for (dl = super->disks; dl; dl = dl->next)
2564 if (serialcmp(dl->serial, serial) == 0)
2565 break;
2566
2567 return dl;
2568}
1799c9e8 2569#endif
54c2c1ea 2570
a2b97981
DW
2571static struct imsm_disk *
2572__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2573{
2574 int i;
2575
2576 for (i = 0; i < mpb->num_disks; i++) {
2577 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2578
2579 if (serialcmp(disk->serial, serial) == 0) {
2580 if (idx)
2581 *idx = i;
2582 return disk;
2583 }
2584 }
2585
2586 return NULL;
2587}
2588
cdddbdbc
DW
2589static int
2590load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2591{
a2b97981 2592 struct imsm_disk *disk;
cdddbdbc
DW
2593 struct dl *dl;
2594 struct stat stb;
cdddbdbc 2595 int rv;
a2b97981 2596 char name[40];
d23fe947
DW
2597 __u8 serial[MAX_RAID_SERIAL_LEN];
2598
2599 rv = imsm_read_serial(fd, devname, serial);
2600
2601 if (rv != 0)
2602 return 2;
2603
a2b97981 2604 dl = calloc(1, sizeof(*dl));
b9f594fe 2605 if (!dl) {
cdddbdbc
DW
2606 if (devname)
2607 fprintf(stderr,
2608 Name ": failed to allocate disk buffer for %s\n",
2609 devname);
2610 return 2;
2611 }
cdddbdbc 2612
a2b97981
DW
2613 fstat(fd, &stb);
2614 dl->major = major(stb.st_rdev);
2615 dl->minor = minor(stb.st_rdev);
2616 dl->next = super->disks;
2617 dl->fd = keep_fd ? fd : -1;
2618 assert(super->disks == NULL);
2619 super->disks = dl;
2620 serialcpy(dl->serial, serial);
2621 dl->index = -2;
2622 dl->e = NULL;
2623 fd2devname(fd, name);
2624 if (devname)
2625 dl->devname = strdup(devname);
2626 else
2627 dl->devname = strdup(name);
cdddbdbc 2628
d23fe947 2629 /* look up this disk's index in the current anchor */
a2b97981
DW
2630 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2631 if (disk) {
2632 dl->disk = *disk;
2633 /* only set index on disks that are a member of a
2634 * populated contianer, i.e. one with raid_devs
2635 */
2636 if (is_failed(&dl->disk))
3f6efecc 2637 dl->index = -2;
a2b97981
DW
2638 else if (is_spare(&dl->disk))
2639 dl->index = -1;
3f6efecc
DW
2640 }
2641
949c47a0
DW
2642 return 0;
2643}
2644
0e600426 2645#ifndef MDASSEMBLE
0c046afd
DW
2646/* When migrating map0 contains the 'destination' state while map1
2647 * contains the current state. When not migrating map0 contains the
2648 * current state. This routine assumes that map[0].map_state is set to
2649 * the current array state before being called.
2650 *
2651 * Migration is indicated by one of the following states
2652 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
e3bba0e0 2653 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
0c046afd 2654 * map1state=unitialized)
1484e727 2655 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
0c046afd 2656 * map1state=normal)
e3bba0e0 2657 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
0c046afd 2658 * map1state=degraded)
8e59f3d8
AK
2659 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2660 * map1state=normal)
0c046afd 2661 */
8e59f3d8
AK
2662static void migrate(struct imsm_dev *dev, struct intel_super *super,
2663 __u8 to_state, int migr_type)
3393c6af 2664{
0c046afd 2665 struct imsm_map *dest;
3393c6af
DW
2666 struct imsm_map *src = get_imsm_map(dev, 0);
2667
0c046afd 2668 dev->vol.migr_state = 1;
1484e727 2669 set_migr_type(dev, migr_type);
f8f603f1 2670 dev->vol.curr_migr_unit = 0;
0c046afd
DW
2671 dest = get_imsm_map(dev, 1);
2672
0556e1a2 2673 /* duplicate and then set the target end state in map[0] */
3393c6af 2674 memcpy(dest, src, sizeof_imsm_map(src));
28bce06f
AK
2675 if ((migr_type == MIGR_REBUILD) ||
2676 (migr_type == MIGR_GEN_MIGR)) {
0556e1a2
DW
2677 __u32 ord;
2678 int i;
2679
2680 for (i = 0; i < src->num_members; i++) {
2681 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2682 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2683 }
2684 }
2685
8e59f3d8
AK
2686 if (migr_type == MIGR_GEN_MIGR)
2687 /* Clear migration record */
2688 memset(super->migr_rec, 0, sizeof(struct migr_record));
2689
0c046afd 2690 src->map_state = to_state;
949c47a0 2691}
f8f603f1
DW
2692
2693static void end_migration(struct imsm_dev *dev, __u8 map_state)
2694{
2695 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2 2696 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
28bce06f 2697 int i, j;
0556e1a2
DW
2698
2699 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2700 * completed in the last migration.
2701 *
28bce06f 2702 * FIXME add support for raid-level-migration
0556e1a2
DW
2703 */
2704 for (i = 0; i < prev->num_members; i++)
28bce06f
AK
2705 for (j = 0; j < map->num_members; j++)
2706 /* during online capacity expansion
2707 * disks position can be changed if takeover is used
2708 */
2709 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2710 ord_to_idx(prev->disk_ord_tbl[i])) {
2711 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2712 break;
2713 }
f8f603f1
DW
2714
2715 dev->vol.migr_state = 0;
28bce06f 2716 dev->vol.migr_type = 0;
f8f603f1
DW
2717 dev->vol.curr_migr_unit = 0;
2718 map->map_state = map_state;
2719}
0e600426 2720#endif
949c47a0
DW
2721
2722static int parse_raid_devices(struct intel_super *super)
2723{
2724 int i;
2725 struct imsm_dev *dev_new;
4d7b1503 2726 size_t len, len_migr;
401d313b 2727 size_t max_len = 0;
4d7b1503
DW
2728 size_t space_needed = 0;
2729 struct imsm_super *mpb = super->anchor;
949c47a0
DW
2730
2731 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2732 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
ba2de7ba 2733 struct intel_dev *dv;
949c47a0 2734
4d7b1503
DW
2735 len = sizeof_imsm_dev(dev_iter, 0);
2736 len_migr = sizeof_imsm_dev(dev_iter, 1);
2737 if (len_migr > len)
2738 space_needed += len_migr - len;
2739
ba2de7ba
DW
2740 dv = malloc(sizeof(*dv));
2741 if (!dv)
2742 return 1;
401d313b
AK
2743 if (max_len < len_migr)
2744 max_len = len_migr;
2745 if (max_len > len_migr)
2746 space_needed += max_len - len_migr;
2747 dev_new = malloc(max_len);
ba2de7ba
DW
2748 if (!dev_new) {
2749 free(dv);
949c47a0 2750 return 1;
ba2de7ba 2751 }
949c47a0 2752 imsm_copy_dev(dev_new, dev_iter);
ba2de7ba
DW
2753 dv->dev = dev_new;
2754 dv->index = i;
2755 dv->next = super->devlist;
2756 super->devlist = dv;
949c47a0 2757 }
cdddbdbc 2758
4d7b1503
DW
2759 /* ensure that super->buf is large enough when all raid devices
2760 * are migrating
2761 */
2762 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2763 void *buf;
2764
2765 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2766 if (posix_memalign(&buf, 512, len) != 0)
2767 return 1;
2768
1f45a8ad
DW
2769 memcpy(buf, super->buf, super->len);
2770 memset(buf + super->len, 0, len - super->len);
4d7b1503
DW
2771 free(super->buf);
2772 super->buf = buf;
2773 super->len = len;
2774 }
2775
cdddbdbc
DW
2776 return 0;
2777}
2778
604b746f
JD
2779/* retrieve a pointer to the bbm log which starts after all raid devices */
2780struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2781{
2782 void *ptr = NULL;
2783
2784 if (__le32_to_cpu(mpb->bbm_log_size)) {
2785 ptr = mpb;
2786 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2787 }
2788
2789 return ptr;
2790}
2791
d23fe947 2792static void __free_imsm(struct intel_super *super, int free_disks);
9ca2c81c 2793
cdddbdbc 2794/* load_imsm_mpb - read matrix metadata
f2f5c343 2795 * allocates super->mpb to be freed by free_imsm
cdddbdbc
DW
2796 */
2797static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
2798{
2799 unsigned long long dsize;
cdddbdbc
DW
2800 unsigned long long sectors;
2801 struct stat;
6416d527 2802 struct imsm_super *anchor;
cdddbdbc
DW
2803 __u32 check_sum;
2804
cdddbdbc 2805 get_dev_size(fd, NULL, &dsize);
64436f06
N
2806 if (dsize < 1024) {
2807 if (devname)
2808 fprintf(stderr,
2809 Name ": %s: device to small for imsm\n",
2810 devname);
2811 return 1;
2812 }
cdddbdbc
DW
2813
2814 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
2815 if (devname)
2816 fprintf(stderr,
2817 Name ": Cannot seek to anchor block on %s: %s\n",
2818 devname, strerror(errno));
2819 return 1;
2820 }
2821
949c47a0 2822 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
ad97895e
DW
2823 if (devname)
2824 fprintf(stderr,
2825 Name ": Failed to allocate imsm anchor buffer"
2826 " on %s\n", devname);
2827 return 1;
2828 }
949c47a0 2829 if (read(fd, anchor, 512) != 512) {
cdddbdbc
DW
2830 if (devname)
2831 fprintf(stderr,
2832 Name ": Cannot read anchor block on %s: %s\n",
2833 devname, strerror(errno));
6416d527 2834 free(anchor);
cdddbdbc
DW
2835 return 1;
2836 }
2837
6416d527 2838 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
cdddbdbc
DW
2839 if (devname)
2840 fprintf(stderr,
2841 Name ": no IMSM anchor on %s\n", devname);
6416d527 2842 free(anchor);
cdddbdbc
DW
2843 return 2;
2844 }
2845
d23fe947 2846 __free_imsm(super, 0);
f2f5c343
LM
2847 /* reload capability and hba */
2848
2849 /* capability and hba must be updated with new super allocation */
d424212e 2850 find_intel_hba_capability(fd, super, devname);
949c47a0
DW
2851 super->len = ROUND_UP(anchor->mpb_size, 512);
2852 if (posix_memalign(&super->buf, 512, super->len) != 0) {
cdddbdbc
DW
2853 if (devname)
2854 fprintf(stderr,
2855 Name ": unable to allocate %zu byte mpb buffer\n",
949c47a0 2856 super->len);
6416d527 2857 free(anchor);
cdddbdbc
DW
2858 return 2;
2859 }
949c47a0 2860 memcpy(super->buf, anchor, 512);
cdddbdbc 2861
6416d527
NB
2862 sectors = mpb_sectors(anchor) - 1;
2863 free(anchor);
8e59f3d8
AK
2864
2865 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
2866 fprintf(stderr, Name
2867 ": %s could not allocate migr_rec buffer\n", __func__);
2868 free(super->buf);
2869 return 2;
2870 }
2871
949c47a0 2872 if (!sectors) {
ecf45690
DW
2873 check_sum = __gen_imsm_checksum(super->anchor);
2874 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
2875 if (devname)
2876 fprintf(stderr,
2877 Name ": IMSM checksum %x != %x on %s\n",
2878 check_sum,
2879 __le32_to_cpu(super->anchor->check_sum),
2880 devname);
2881 return 2;
2882 }
2883
a2b97981 2884 return 0;
949c47a0 2885 }
cdddbdbc
DW
2886
2887 /* read the extended mpb */
2888 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
2889 if (devname)
2890 fprintf(stderr,
2891 Name ": Cannot seek to extended mpb on %s: %s\n",
2892 devname, strerror(errno));
2893 return 1;
2894 }
2895
f21e18ca 2896 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
cdddbdbc
DW
2897 if (devname)
2898 fprintf(stderr,
2899 Name ": Cannot read extended mpb on %s: %s\n",
2900 devname, strerror(errno));
2901 return 2;
2902 }
2903
949c47a0
DW
2904 check_sum = __gen_imsm_checksum(super->anchor);
2905 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
cdddbdbc
DW
2906 if (devname)
2907 fprintf(stderr,
2908 Name ": IMSM checksum %x != %x on %s\n",
949c47a0 2909 check_sum, __le32_to_cpu(super->anchor->check_sum),
cdddbdbc 2910 devname);
db575f3b 2911 return 3;
cdddbdbc
DW
2912 }
2913
604b746f
JD
2914 /* FIXME the BBM log is disk specific so we cannot use this global
2915 * buffer for all disks. Ok for now since we only look at the global
2916 * bbm_log_size parameter to gate assembly
2917 */
2918 super->bbm_log = __get_imsm_bbm_log(super->anchor);
2919
a2b97981
DW
2920 return 0;
2921}
2922
8e59f3d8
AK
2923static int read_imsm_migr_rec(int fd, struct intel_super *super);
2924
a2b97981
DW
2925static int
2926load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
2927{
2928 int err;
2929
2930 err = load_imsm_mpb(fd, super, devname);
2931 if (err)
2932 return err;
2933 err = load_imsm_disk(fd, super, devname, keep_fd);
2934 if (err)
2935 return err;
2936 err = parse_raid_devices(super);
4d7b1503 2937
a2b97981 2938 return err;
cdddbdbc
DW
2939}
2940
ae6aad82
DW
2941static void __free_imsm_disk(struct dl *d)
2942{
2943 if (d->fd >= 0)
2944 close(d->fd);
2945 if (d->devname)
2946 free(d->devname);
0dcecb2e
DW
2947 if (d->e)
2948 free(d->e);
ae6aad82
DW
2949 free(d);
2950
2951}
1a64be56 2952
cdddbdbc
DW
2953static void free_imsm_disks(struct intel_super *super)
2954{
47ee5a45 2955 struct dl *d;
cdddbdbc 2956
47ee5a45
DW
2957 while (super->disks) {
2958 d = super->disks;
cdddbdbc 2959 super->disks = d->next;
ae6aad82 2960 __free_imsm_disk(d);
cdddbdbc 2961 }
cb82edca
AK
2962 while (super->disk_mgmt_list) {
2963 d = super->disk_mgmt_list;
2964 super->disk_mgmt_list = d->next;
2965 __free_imsm_disk(d);
2966 }
47ee5a45
DW
2967 while (super->missing) {
2968 d = super->missing;
2969 super->missing = d->next;
2970 __free_imsm_disk(d);
2971 }
2972
cdddbdbc
DW
2973}
2974
9ca2c81c 2975/* free all the pieces hanging off of a super pointer */
d23fe947 2976static void __free_imsm(struct intel_super *super, int free_disks)
cdddbdbc 2977{
88654014
LM
2978 struct intel_hba *elem, *next;
2979
9ca2c81c 2980 if (super->buf) {
949c47a0 2981 free(super->buf);
9ca2c81c
DW
2982 super->buf = NULL;
2983 }
f2f5c343
LM
2984 /* unlink capability description */
2985 super->orom = NULL;
8e59f3d8
AK
2986 if (super->migr_rec_buf) {
2987 free(super->migr_rec_buf);
2988 super->migr_rec_buf = NULL;
2989 }
d23fe947
DW
2990 if (free_disks)
2991 free_imsm_disks(super);
ba2de7ba 2992 free_devlist(super);
88654014
LM
2993 elem = super->hba;
2994 while (elem) {
2995 if (elem->path)
2996 free((void *)elem->path);
2997 next = elem->next;
2998 free(elem);
2999 elem = next;
88c32bb1 3000 }
88654014 3001 super->hba = NULL;
cdddbdbc
DW
3002}
3003
9ca2c81c
DW
3004static void free_imsm(struct intel_super *super)
3005{
d23fe947 3006 __free_imsm(super, 1);
9ca2c81c
DW
3007 free(super);
3008}
cdddbdbc
DW
3009
3010static void free_super_imsm(struct supertype *st)
3011{
3012 struct intel_super *super = st->sb;
3013
3014 if (!super)
3015 return;
3016
3017 free_imsm(super);
3018 st->sb = NULL;
3019}
3020
49133e57 3021static struct intel_super *alloc_super(void)
c2c087e6
DW
3022{
3023 struct intel_super *super = malloc(sizeof(*super));
3024
3025 if (super) {
3026 memset(super, 0, sizeof(*super));
bf5a934a 3027 super->current_vol = -1;
0dcecb2e 3028 super->create_offset = ~((__u32 ) 0);
c2c087e6 3029 }
c2c087e6
DW
3030 return super;
3031}
3032
f0f5a016
LM
3033/*
3034 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3035 */
d424212e 3036static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
f0f5a016
LM
3037{
3038 struct sys_dev *hba_name;
3039 int rv = 0;
3040
3041 if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
f2f5c343 3042 super->orom = NULL;
f0f5a016
LM
3043 super->hba = NULL;
3044 return 0;
3045 }
3046 hba_name = find_disk_attached_hba(fd, NULL);
3047 if (!hba_name) {
d424212e 3048 if (devname)
f0f5a016
LM
3049 fprintf(stderr,
3050 Name ": %s is not attached to Intel(R) RAID controller.\n",
d424212e 3051 devname);
f0f5a016
LM
3052 return 1;
3053 }
3054 rv = attach_hba_to_super(super, hba_name);
3055 if (rv == 2) {
d424212e
N
3056 if (devname) {
3057 struct intel_hba *hba = super->hba;
f0f5a016 3058
f0f5a016
LM
3059 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
3060 "controller (%s),\n"
3061 " but the container is assigned to Intel(R) "
3062 "%s RAID controller (",
d424212e 3063 devname,
f0f5a016
LM
3064 hba_name->path,
3065 hba_name->pci_id ? : "Err!",
3066 get_sys_dev_type(hba_name->type));
3067
f0f5a016
LM
3068 while (hba) {
3069 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
3070 if (hba->next)
3071 fprintf(stderr, ", ");
3072 hba = hba->next;
3073 }
3074
3075 fprintf(stderr, ").\n"
3076 " Mixing devices attached to different controllers "
3077 "is not allowed.\n");
3078 }
3079 free_sys_dev(&hba_name);
3080 return 2;
3081 }
f2f5c343 3082 super->orom = find_imsm_capability(hba_name->type);
f0f5a016 3083 free_sys_dev(&hba_name);
f2f5c343
LM
3084 if (!super->orom)
3085 return 3;
f0f5a016
LM
3086 return 0;
3087}
3088
cdddbdbc 3089#ifndef MDASSEMBLE
47ee5a45
DW
3090/* find_missing - helper routine for load_super_imsm_all that identifies
3091 * disks that have disappeared from the system. This routine relies on
3092 * the mpb being uptodate, which it is at load time.
3093 */
3094static int find_missing(struct intel_super *super)
3095{
3096 int i;
3097 struct imsm_super *mpb = super->anchor;
3098 struct dl *dl;
3099 struct imsm_disk *disk;
47ee5a45
DW
3100
3101 for (i = 0; i < mpb->num_disks; i++) {
3102 disk = __get_imsm_disk(mpb, i);
54c2c1ea 3103 dl = serial_to_dl(disk->serial, super);
47ee5a45
DW
3104 if (dl)
3105 continue;
47ee5a45
DW
3106
3107 dl = malloc(sizeof(*dl));
3108 if (!dl)
3109 return 1;
3110 dl->major = 0;
3111 dl->minor = 0;
3112 dl->fd = -1;
3113 dl->devname = strdup("missing");
3114 dl->index = i;
3115 serialcpy(dl->serial, disk->serial);
3116 dl->disk = *disk;
689c9bf3 3117 dl->e = NULL;
47ee5a45
DW
3118 dl->next = super->missing;
3119 super->missing = dl;
3120 }
3121
3122 return 0;
3123}
3124
a2b97981
DW
3125static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
3126{
3127 struct intel_disk *idisk = disk_list;
3128
3129 while (idisk) {
3130 if (serialcmp(idisk->disk.serial, serial) == 0)
3131 break;
3132 idisk = idisk->next;
3133 }
3134
3135 return idisk;
3136}
3137
3138static int __prep_thunderdome(struct intel_super **table, int tbl_size,
3139 struct intel_super *super,
3140 struct intel_disk **disk_list)
3141{
3142 struct imsm_disk *d = &super->disks->disk;
3143 struct imsm_super *mpb = super->anchor;
3144 int i, j;
3145
3146 for (i = 0; i < tbl_size; i++) {
3147 struct imsm_super *tbl_mpb = table[i]->anchor;
3148 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3149
3150 if (tbl_mpb->family_num == mpb->family_num) {
3151 if (tbl_mpb->check_sum == mpb->check_sum) {
3152 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3153 __func__, super->disks->major,
3154 super->disks->minor,
3155 table[i]->disks->major,
3156 table[i]->disks->minor);
3157 break;
3158 }
3159
3160 if (((is_configured(d) && !is_configured(tbl_d)) ||
3161 is_configured(d) == is_configured(tbl_d)) &&
3162 tbl_mpb->generation_num < mpb->generation_num) {
3163 /* current version of the mpb is a
3164 * better candidate than the one in
3165 * super_table, but copy over "cross
3166 * generational" status
3167 */
3168 struct intel_disk *idisk;
3169
3170 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3171 __func__, super->disks->major,
3172 super->disks->minor,
3173 table[i]->disks->major,
3174 table[i]->disks->minor);
3175
3176 idisk = disk_list_get(tbl_d->serial, *disk_list);
3177 if (idisk && is_failed(&idisk->disk))
3178 tbl_d->status |= FAILED_DISK;
3179 break;
3180 } else {
3181 struct intel_disk *idisk;
3182 struct imsm_disk *disk;
3183
3184 /* tbl_mpb is more up to date, but copy
3185 * over cross generational status before
3186 * returning
3187 */
3188 disk = __serial_to_disk(d->serial, mpb, NULL);
3189 if (disk && is_failed(disk))
3190 d->status |= FAILED_DISK;
3191
3192 idisk = disk_list_get(d->serial, *disk_list);
3193 if (idisk) {
3194 idisk->owner = i;
3195 if (disk && is_configured(disk))
3196 idisk->disk.status |= CONFIGURED_DISK;
3197 }
3198
3199 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3200 __func__, super->disks->major,
3201 super->disks->minor,
3202 table[i]->disks->major,
3203 table[i]->disks->minor);
3204
3205 return tbl_size;
3206 }
3207 }
3208 }
3209
3210 if (i >= tbl_size)
3211 table[tbl_size++] = super;
3212 else
3213 table[i] = super;
3214
3215 /* update/extend the merged list of imsm_disk records */
3216 for (j = 0; j < mpb->num_disks; j++) {
3217 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3218 struct intel_disk *idisk;
3219
3220 idisk = disk_list_get(disk->serial, *disk_list);
3221 if (idisk) {
3222 idisk->disk.status |= disk->status;
3223 if (is_configured(&idisk->disk) ||
3224 is_failed(&idisk->disk))
3225 idisk->disk.status &= ~(SPARE_DISK);
3226 } else {
3227 idisk = calloc(1, sizeof(*idisk));
3228 if (!idisk)
3229 return -1;
3230 idisk->owner = IMSM_UNKNOWN_OWNER;
3231 idisk->disk = *disk;
3232 idisk->next = *disk_list;
3233 *disk_list = idisk;
3234 }
3235
3236 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3237 idisk->owner = i;
3238 }
3239
3240 return tbl_size;
3241}
3242
3243static struct intel_super *
3244validate_members(struct intel_super *super, struct intel_disk *disk_list,
3245 const int owner)
3246{
3247 struct imsm_super *mpb = super->anchor;
3248 int ok_count = 0;
3249 int i;
3250
3251 for (i = 0; i < mpb->num_disks; i++) {
3252 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3253 struct intel_disk *idisk;
3254
3255 idisk = disk_list_get(disk->serial, disk_list);
3256 if (idisk) {
3257 if (idisk->owner == owner ||
3258 idisk->owner == IMSM_UNKNOWN_OWNER)
3259 ok_count++;
3260 else
3261 dprintf("%s: '%.16s' owner %d != %d\n",
3262 __func__, disk->serial, idisk->owner,
3263 owner);
3264 } else {
3265 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3266 __func__, __le32_to_cpu(mpb->family_num), i,
3267 disk->serial);
3268 break;
3269 }
3270 }
3271
3272 if (ok_count == mpb->num_disks)
3273 return super;
3274 return NULL;
3275}
3276
3277static void show_conflicts(__u32 family_num, struct intel_super *super_list)
3278{
3279 struct intel_super *s;
3280
3281 for (s = super_list; s; s = s->next) {
3282 if (family_num != s->anchor->family_num)
3283 continue;
3284 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
3285 __le32_to_cpu(family_num), s->disks->devname);
3286 }
3287}
3288
3289static struct intel_super *
3290imsm_thunderdome(struct intel_super **super_list, int len)
3291{
3292 struct intel_super *super_table[len];
3293 struct intel_disk *disk_list = NULL;
3294 struct intel_super *champion, *spare;
3295 struct intel_super *s, **del;
3296 int tbl_size = 0;
3297 int conflict;
3298 int i;
3299
3300 memset(super_table, 0, sizeof(super_table));
3301 for (s = *super_list; s; s = s->next)
3302 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
3303
3304 for (i = 0; i < tbl_size; i++) {
3305 struct imsm_disk *d;
3306 struct intel_disk *idisk;
3307 struct imsm_super *mpb = super_table[i]->anchor;
3308
3309 s = super_table[i];
3310 d = &s->disks->disk;
3311
3312 /* 'd' must appear in merged disk list for its
3313 * configuration to be valid
3314 */
3315 idisk = disk_list_get(d->serial, disk_list);
3316 if (idisk && idisk->owner == i)
3317 s = validate_members(s, disk_list, i);
3318 else
3319 s = NULL;
3320
3321 if (!s)
3322 dprintf("%s: marking family: %#x from %d:%d offline\n",
3323 __func__, mpb->family_num,
3324 super_table[i]->disks->major,
3325 super_table[i]->disks->minor);
3326 super_table[i] = s;
3327 }
3328
3329 /* This is where the mdadm implementation differs from the Windows
3330 * driver which has no strict concept of a container. We can only
3331 * assemble one family from a container, so when returning a prodigal
3332 * array member to this system the code will not be able to disambiguate
3333 * the container contents that should be assembled ("foreign" versus
3334 * "local"). It requires user intervention to set the orig_family_num
3335 * to a new value to establish a new container. The Windows driver in
3336 * this situation fixes up the volume name in place and manages the
3337 * foreign array as an independent entity.
3338 */
3339 s = NULL;
3340 spare = NULL;
3341 conflict = 0;
3342 for (i = 0; i < tbl_size; i++) {
3343 struct intel_super *tbl_ent = super_table[i];
3344 int is_spare = 0;
3345
3346 if (!tbl_ent)
3347 continue;
3348
3349 if (tbl_ent->anchor->num_raid_devs == 0) {
3350 spare = tbl_ent;
3351 is_spare = 1;
3352 }
3353
3354 if (s && !is_spare) {
3355 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3356 conflict++;
3357 } else if (!s && !is_spare)
3358 s = tbl_ent;
3359 }
3360
3361 if (!s)
3362 s = spare;
3363 if (!s) {
3364 champion = NULL;
3365 goto out;
3366 }
3367 champion = s;
3368
3369 if (conflict)
3370 fprintf(stderr, "Chose family %#x on '%s', "
3371 "assemble conflicts to new container with '--update=uuid'\n",
3372 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3373
3374 /* collect all dl's onto 'champion', and update them to
3375 * champion's version of the status
3376 */
3377 for (s = *super_list; s; s = s->next) {
3378 struct imsm_super *mpb = champion->anchor;
3379 struct dl *dl = s->disks;
3380
3381 if (s == champion)
3382 continue;
3383
3384 for (i = 0; i < mpb->num_disks; i++) {
3385 struct imsm_disk *disk;
3386
3387 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3388 if (disk) {
3389 dl->disk = *disk;
3390 /* only set index on disks that are a member of
3391 * a populated contianer, i.e. one with
3392 * raid_devs
3393 */
3394 if (is_failed(&dl->disk))
3395 dl->index = -2;
3396 else if (is_spare(&dl->disk))
3397 dl->index = -1;
3398 break;
3399 }
3400 }
3401
3402 if (i >= mpb->num_disks) {
3403 struct intel_disk *idisk;
3404
3405 idisk = disk_list_get(dl->serial, disk_list);
ecf408e9 3406 if (idisk && is_spare(&idisk->disk) &&
a2b97981
DW
3407 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3408 dl->index = -1;
3409 else {
3410 dl->index = -2;
3411 continue;
3412 }
3413 }
3414
3415 dl->next = champion->disks;
3416 champion->disks = dl;
3417 s->disks = NULL;
3418 }
3419
3420 /* delete 'champion' from super_list */
3421 for (del = super_list; *del; ) {
3422 if (*del == champion) {
3423 *del = (*del)->next;
3424 break;
3425 } else
3426 del = &(*del)->next;
3427 }
3428 champion->next = NULL;
3429
3430 out:
3431 while (disk_list) {
3432 struct intel_disk *idisk = disk_list;
3433
3434 disk_list = disk_list->next;
3435 free(idisk);
3436 }
3437
3438 return champion;
3439}
3440
cdddbdbc 3441static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
e1902a7b 3442 char *devname)
cdddbdbc
DW
3443{
3444 struct mdinfo *sra;
a2b97981
DW
3445 struct intel_super *super_list = NULL;
3446 struct intel_super *super = NULL;
db575f3b 3447 int devnum = fd2devnum(fd);
a2b97981 3448 struct mdinfo *sd;
db575f3b 3449 int retry;
a2b97981
DW
3450 int err = 0;
3451 int i;
dab4a513
DW
3452
3453 /* check if 'fd' an opened container */
b526e52d 3454 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
cdddbdbc
DW
3455 if (!sra)
3456 return 1;
3457
3458 if (sra->array.major_version != -1 ||
3459 sra->array.minor_version != -2 ||
1602d52c
AW
3460 strcmp(sra->text_version, "imsm") != 0) {
3461 err = 1;
3462 goto error;
3463 }
a2b97981
DW
3464 /* load all mpbs */
3465 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
49133e57 3466 struct intel_super *s = alloc_super();
7a6ecd55 3467 char nm[32];
a2b97981 3468 int dfd;
f2f5c343 3469 int rv;
a2b97981
DW
3470
3471 err = 1;
3472 if (!s)
3473 goto error;
3474 s->next = super_list;
3475 super_list = s;
cdddbdbc 3476
a2b97981 3477 err = 2;
cdddbdbc 3478 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3479 dfd = dev_open(nm, O_RDWR);
a2b97981
DW
3480 if (dfd < 0)
3481 goto error;
3482
d424212e 3483 rv = find_intel_hba_capability(dfd, s, devname);
f2f5c343
LM
3484 /* no orom/efi or non-intel hba of the disk */
3485 if (rv != 0)
3486 goto error;
3487
e1902a7b 3488 err = load_and_parse_mpb(dfd, s, NULL, 1);
db575f3b
DW
3489
3490 /* retry the load if we might have raced against mdmon */
a2b97981 3491 if (err == 3 && mdmon_running(devnum))
db575f3b
DW
3492 for (retry = 0; retry < 3; retry++) {
3493 usleep(3000);
e1902a7b 3494 err = load_and_parse_mpb(dfd, s, NULL, 1);
a2b97981 3495 if (err != 3)
db575f3b
DW
3496 break;
3497 }
a2b97981
DW
3498 if (err)
3499 goto error;
cdddbdbc
DW
3500 }
3501
a2b97981
DW
3502 /* all mpbs enter, maybe one leaves */
3503 super = imsm_thunderdome(&super_list, i);
3504 if (!super) {
3505 err = 1;
3506 goto error;
cdddbdbc
DW
3507 }
3508
47ee5a45
DW
3509 if (find_missing(super) != 0) {
3510 free_imsm(super);
a2b97981
DW
3511 err = 2;
3512 goto error;
47ee5a45 3513 }
8e59f3d8
AK
3514
3515 /* load migration record */
3516 err = load_imsm_migr_rec(super, NULL);
3517 if (err) {
3518 err = 4;
3519 goto error;
3520 }
a2b97981
DW
3521 err = 0;
3522
3523 error:
3524 while (super_list) {
3525 struct intel_super *s = super_list;
3526
3527 super_list = super_list->next;
3528 free_imsm(s);
3529 }
1602d52c 3530 sysfs_free(sra);
a2b97981
DW
3531
3532 if (err)
3533 return err;
f7e7067b 3534
cdddbdbc 3535 *sbp = super;
db575f3b 3536 st->container_dev = devnum;
a2b97981 3537 if (err == 0 && st->ss == NULL) {
bf5a934a 3538 st->ss = &super_imsm;
cdddbdbc
DW
3539 st->minor_version = 0;
3540 st->max_devs = IMSM_MAX_DEVICES;
3541 }
cdddbdbc
DW
3542 return 0;
3543}
2b959fbf
N
3544
3545static int load_container_imsm(struct supertype *st, int fd, char *devname)
3546{
3547 return load_super_imsm_all(st, fd, &st->sb, devname);
3548}
cdddbdbc
DW
3549#endif
3550
3551static int load_super_imsm(struct supertype *st, int fd, char *devname)
3552{
3553 struct intel_super *super;
3554 int rv;
3555
691c6ee1
N
3556 if (test_partition(fd))
3557 /* IMSM not allowed on partitions */
3558 return 1;
3559
37424f13
DW
3560 free_super_imsm(st);
3561
49133e57 3562 super = alloc_super();
cdddbdbc
DW
3563 if (!super) {
3564 fprintf(stderr,
3565 Name ": malloc of %zu failed.\n",
3566 sizeof(*super));
3567 return 1;
3568 }
ea2bc72b
LM
3569 /* Load hba and capabilities if they exist.
3570 * But do not preclude loading metadata in case capabilities or hba are
3571 * non-compliant and ignore_hw_compat is set.
3572 */
d424212e 3573 rv = find_intel_hba_capability(fd, super, devname);
f2f5c343 3574 /* no orom/efi or non-intel hba of the disk */
ea2bc72b 3575 if ((rv != 0) && (st->ignore_hw_compat == 0)) {
f2f5c343
LM
3576 if (devname)
3577 fprintf(stderr,
3578 Name ": No OROM/EFI properties for %s\n", devname);
3579 free_imsm(super);
3580 return 2;
3581 }
a2b97981 3582 rv = load_and_parse_mpb(fd, super, devname, 0);
cdddbdbc
DW
3583
3584 if (rv) {
3585 if (devname)
3586 fprintf(stderr,
3587 Name ": Failed to load all information "
3588 "sections on %s\n", devname);
3589 free_imsm(super);
3590 return rv;
3591 }
3592
3593 st->sb = super;
3594 if (st->ss == NULL) {
3595 st->ss = &super_imsm;
3596 st->minor_version = 0;
3597 st->max_devs = IMSM_MAX_DEVICES;
3598 }
8e59f3d8
AK
3599
3600 /* load migration record */
3601 load_imsm_migr_rec(super, NULL);
3602
cdddbdbc
DW
3603 return 0;
3604}
3605
ef6ffade
DW
3606static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3607{
3608 if (info->level == 1)
3609 return 128;
3610 return info->chunk_size >> 9;
3611}
3612
ff596308 3613static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
ef6ffade
DW
3614{
3615 __u32 num_stripes;
3616
3617 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
ff596308 3618 num_stripes /= num_domains;
ef6ffade
DW
3619
3620 return num_stripes;
3621}
3622
fcfd9599
DW
3623static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3624{
4025c288
DW
3625 if (info->level == 1)
3626 return info->size * 2;
3627 else
3628 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
fcfd9599
DW
3629}
3630
4d1313e9
DW
3631static void imsm_update_version_info(struct intel_super *super)
3632{
3633 /* update the version and attributes */
3634 struct imsm_super *mpb = super->anchor;
3635 char *version;
3636 struct imsm_dev *dev;
3637 struct imsm_map *map;
3638 int i;
3639
3640 for (i = 0; i < mpb->num_raid_devs; i++) {
3641 dev = get_imsm_dev(super, i);
3642 map = get_imsm_map(dev, 0);
3643 if (__le32_to_cpu(dev->size_high) > 0)
3644 mpb->attributes |= MPB_ATTRIB_2TB;
3645
3646 /* FIXME detect when an array spans a port multiplier */
3647 #if 0
3648 mpb->attributes |= MPB_ATTRIB_PM;
3649 #endif
3650
3651 if (mpb->num_raid_devs > 1 ||
3652 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3653 version = MPB_VERSION_ATTRIBS;
3654 switch (get_imsm_raid_level(map)) {
3655 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3656 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3657 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3658 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3659 }
3660 } else {
3661 if (map->num_members >= 5)
3662 version = MPB_VERSION_5OR6_DISK_ARRAY;
3663 else if (dev->status == DEV_CLONE_N_GO)
3664 version = MPB_VERSION_CNG;
3665 else if (get_imsm_raid_level(map) == 5)
3666 version = MPB_VERSION_RAID5;
3667 else if (map->num_members >= 3)
3668 version = MPB_VERSION_3OR4_DISK_ARRAY;
3669 else if (get_imsm_raid_level(map) == 1)
3670 version = MPB_VERSION_RAID1;
3671 else
3672 version = MPB_VERSION_RAID0;
3673 }
3674 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3675 }
3676}
3677
aa534678
DW
3678static int check_name(struct intel_super *super, char *name, int quiet)
3679{
3680 struct imsm_super *mpb = super->anchor;
3681 char *reason = NULL;
3682 int i;
3683
3684 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3685 reason = "must be 16 characters or less";
3686
3687 for (i = 0; i < mpb->num_raid_devs; i++) {
3688 struct imsm_dev *dev = get_imsm_dev(super, i);
3689
3690 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3691 reason = "already exists";
3692 break;
3693 }
3694 }
3695
3696 if (reason && !quiet)
3697 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3698
3699 return !reason;
3700}
3701
8b353278
DW
3702static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3703 unsigned long long size, char *name,
3704 char *homehost, int *uuid)
cdddbdbc 3705{
c2c087e6
DW
3706 /* We are creating a volume inside a pre-existing container.
3707 * so st->sb is already set.
3708 */
3709 struct intel_super *super = st->sb;
949c47a0 3710 struct imsm_super *mpb = super->anchor;
ba2de7ba 3711 struct intel_dev *dv;
c2c087e6
DW
3712 struct imsm_dev *dev;
3713 struct imsm_vol *vol;
3714 struct imsm_map *map;
3715 int idx = mpb->num_raid_devs;
3716 int i;
3717 unsigned long long array_blocks;
2c092cad 3718 size_t size_old, size_new;
ff596308 3719 __u32 num_data_stripes;
cdddbdbc 3720
88c32bb1 3721 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
c2c087e6 3722 fprintf(stderr, Name": This imsm-container already has the "
88c32bb1 3723 "maximum of %d volumes\n", super->orom->vpa);
c2c087e6
DW
3724 return 0;
3725 }
3726
2c092cad
DW
3727 /* ensure the mpb is large enough for the new data */
3728 size_old = __le32_to_cpu(mpb->mpb_size);
3729 size_new = disks_to_mpb_size(info->nr_disks);
3730 if (size_new > size_old) {
3731 void *mpb_new;
3732 size_t size_round = ROUND_UP(size_new, 512);
3733
3734 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3735 fprintf(stderr, Name": could not allocate new mpb\n");
3736 return 0;
3737 }
8e59f3d8
AK
3738 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3739 fprintf(stderr, Name
3740 ": %s could not allocate migr_rec buffer\n",
3741 __func__);
3742 free(super->buf);
3743 free(super);
3744 return 0;
3745 }
2c092cad
DW
3746 memcpy(mpb_new, mpb, size_old);
3747 free(mpb);
3748 mpb = mpb_new;
949c47a0 3749 super->anchor = mpb_new;
2c092cad
DW
3750 mpb->mpb_size = __cpu_to_le32(size_new);
3751 memset(mpb_new + size_old, 0, size_round - size_old);
3752 }
bf5a934a 3753 super->current_vol = idx;
d23fe947
DW
3754 /* when creating the first raid device in this container set num_disks
3755 * to zero, i.e. delete this spare and add raid member devices in
3756 * add_to_super_imsm_volume()
3757 */
3758 if (super->current_vol == 0)
3759 mpb->num_disks = 0;
5a038140 3760
aa534678
DW
3761 if (!check_name(super, name, 0))
3762 return 0;
ba2de7ba
DW
3763 dv = malloc(sizeof(*dv));
3764 if (!dv) {
3765 fprintf(stderr, Name ": failed to allocate device list entry\n");
3766 return 0;
3767 }
1a2487c2 3768 dev = calloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
949c47a0 3769 if (!dev) {
ba2de7ba 3770 free(dv);
949c47a0
DW
3771 fprintf(stderr, Name": could not allocate raid device\n");
3772 return 0;
3773 }
1a2487c2 3774
c2c087e6 3775 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
03bcbc65
DW
3776 if (info->level == 1)
3777 array_blocks = info_to_blocks_per_member(info);
3778 else
3779 array_blocks = calc_array_size(info->level, info->raid_disks,
3780 info->layout, info->chunk_size,
3781 info->size*2);
979d38be
DW
3782 /* round array size down to closest MB */
3783 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
3784
c2c087e6
DW
3785 dev->size_low = __cpu_to_le32((__u32) array_blocks);
3786 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
1a2487c2 3787 dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
c2c087e6
DW
3788 vol = &dev->vol;
3789 vol->migr_state = 0;
1484e727 3790 set_migr_type(dev, MIGR_INIT);
c2c087e6 3791 vol->dirty = 0;
f8f603f1 3792 vol->curr_migr_unit = 0;
a965f303 3793 map = get_imsm_map(dev, 0);
0dcecb2e 3794 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
fcfd9599 3795 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
ef6ffade 3796 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
0556e1a2 3797 map->failed_disk_num = ~0;
c2c087e6
DW
3798 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
3799 IMSM_T_STATE_NORMAL;
252d23c0 3800 map->ddf = 1;
ef6ffade
DW
3801
3802 if (info->level == 1 && info->raid_disks > 2) {
38950822
AW
3803 free(dev);
3804 free(dv);
ef6ffade
DW
3805 fprintf(stderr, Name": imsm does not support more than 2 disks"
3806 "in a raid1 volume\n");
3807 return 0;
3808 }
81062a36
DW
3809
3810 map->raid_level = info->level;
4d1313e9 3811 if (info->level == 10) {
c2c087e6 3812 map->raid_level = 1;
4d1313e9 3813 map->num_domains = info->raid_disks / 2;
81062a36
DW
3814 } else if (info->level == 1)
3815 map->num_domains = info->raid_disks;
3816 else
ff596308 3817 map->num_domains = 1;
81062a36 3818
ff596308
DW
3819 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
3820 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
ef6ffade 3821
c2c087e6
DW
3822 map->num_members = info->raid_disks;
3823 for (i = 0; i < map->num_members; i++) {
3824 /* initialized in add_to_super */
4eb26970 3825 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
c2c087e6 3826 }
949c47a0 3827 mpb->num_raid_devs++;
ba2de7ba
DW
3828
3829 dv->dev = dev;
3830 dv->index = super->current_vol;
3831 dv->next = super->devlist;
3832 super->devlist = dv;
c2c087e6 3833
4d1313e9
DW
3834 imsm_update_version_info(super);
3835
c2c087e6 3836 return 1;
cdddbdbc
DW
3837}
3838
bf5a934a
DW
3839static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
3840 unsigned long long size, char *name,
3841 char *homehost, int *uuid)
3842{
3843 /* This is primarily called by Create when creating a new array.
3844 * We will then get add_to_super called for each component, and then
3845 * write_init_super called to write it out to each device.
3846 * For IMSM, Create can create on fresh devices or on a pre-existing
3847 * array.
3848 * To create on a pre-existing array a different method will be called.
3849 * This one is just for fresh drives.
3850 */
3851 struct intel_super *super;
3852 struct imsm_super *mpb;
3853 size_t mpb_size;
4d1313e9 3854 char *version;
bf5a934a 3855
bf5a934a 3856 if (st->sb)
e683ca88
DW
3857 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
3858
3859 if (info)
3860 mpb_size = disks_to_mpb_size(info->nr_disks);
3861 else
3862 mpb_size = 512;
bf5a934a 3863
49133e57 3864 super = alloc_super();
e683ca88 3865 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
bf5a934a 3866 free(super);
e683ca88
DW
3867 super = NULL;
3868 }
3869 if (!super) {
3870 fprintf(stderr, Name
3871 ": %s could not allocate superblock\n", __func__);
bf5a934a
DW
3872 return 0;
3873 }
8e59f3d8
AK
3874 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3875 fprintf(stderr, Name
3876 ": %s could not allocate migr_rec buffer\n", __func__);
3877 free(super->buf);
3878 free(super);
3879 return 0;
3880 }
e683ca88 3881 memset(super->buf, 0, mpb_size);
ef649044 3882 mpb = super->buf;
e683ca88
DW
3883 mpb->mpb_size = __cpu_to_le32(mpb_size);
3884 st->sb = super;
3885
3886 if (info == NULL) {
3887 /* zeroing superblock */
3888 return 0;
3889 }
bf5a934a 3890
4d1313e9
DW
3891 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
3892
3893 version = (char *) mpb->sig;
3894 strcpy(version, MPB_SIGNATURE);
3895 version += strlen(MPB_SIGNATURE);
3896 strcpy(version, MPB_VERSION_RAID0);
bf5a934a 3897
bf5a934a
DW
3898 return 1;
3899}
3900
0e600426 3901#ifndef MDASSEMBLE
f20c3968 3902static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
bf5a934a
DW
3903 int fd, char *devname)
3904{
3905 struct intel_super *super = st->sb;
d23fe947 3906 struct imsm_super *mpb = super->anchor;
bf5a934a
DW
3907 struct dl *dl;
3908 struct imsm_dev *dev;
3909 struct imsm_map *map;
4eb26970 3910 int slot;
bf5a934a 3911
949c47a0 3912 dev = get_imsm_dev(super, super->current_vol);
a965f303 3913 map = get_imsm_map(dev, 0);
bf5a934a 3914
208933a7
N
3915 if (! (dk->state & (1<<MD_DISK_SYNC))) {
3916 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
3917 devname);
3918 return 1;
3919 }
3920
efb30e7f
DW
3921 if (fd == -1) {
3922 /* we're doing autolayout so grab the pre-marked (in
3923 * validate_geometry) raid_disk
3924 */
3925 for (dl = super->disks; dl; dl = dl->next)
3926 if (dl->raiddisk == dk->raid_disk)
3927 break;
3928 } else {
3929 for (dl = super->disks; dl ; dl = dl->next)
3930 if (dl->major == dk->major &&
3931 dl->minor == dk->minor)
3932 break;
3933 }
d23fe947 3934
208933a7
N
3935 if (!dl) {
3936 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
f20c3968 3937 return 1;
208933a7 3938 }
bf5a934a 3939
d23fe947
DW
3940 /* add a pristine spare to the metadata */
3941 if (dl->index < 0) {
3942 dl->index = super->anchor->num_disks;
3943 super->anchor->num_disks++;
3944 }
4eb26970
DW
3945 /* Check the device has not already been added */
3946 slot = get_imsm_disk_slot(map, dl->index);
3947 if (slot >= 0 &&
98130f40 3948 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
4eb26970
DW
3949 fprintf(stderr, Name ": %s has been included in this array twice\n",
3950 devname);
3951 return 1;
3952 }
be73972f 3953 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
ee5aad5a 3954 dl->disk.status = CONFIGURED_DISK;
d23fe947
DW
3955
3956 /* if we are creating the first raid device update the family number */
3957 if (super->current_vol == 0) {
3958 __u32 sum;
3959 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
3960 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
3961
791b666a
AW
3962 if (!_dev || !_disk) {
3963 fprintf(stderr, Name ": BUG mpb setup error\n");
3964 return 1;
3965 }
d23fe947
DW
3966 *_dev = *dev;
3967 *_disk = dl->disk;
148acb7b
DW
3968 sum = random32();
3969 sum += __gen_imsm_checksum(mpb);
d23fe947 3970 mpb->family_num = __cpu_to_le32(sum);
148acb7b 3971 mpb->orig_family_num = mpb->family_num;
d23fe947 3972 }
f20c3968
DW
3973
3974 return 0;
bf5a934a
DW
3975}
3976
88654014 3977
f20c3968 3978static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
88654014 3979 int fd, char *devname)
cdddbdbc 3980{
c2c087e6 3981 struct intel_super *super = st->sb;
c2c087e6
DW
3982 struct dl *dd;
3983 unsigned long long size;
f2f27e63 3984 __u32 id;
c2c087e6
DW
3985 int rv;
3986 struct stat stb;
3987
88654014
LM
3988 /* If we are on an RAID enabled platform check that the disk is
3989 * attached to the raid controller.
3990 * We do not need to test disks attachment for container based additions,
3991 * they shall be already tested when container was created/assembled.
88c32bb1 3992 */
d424212e 3993 rv = find_intel_hba_capability(fd, super, devname);
f2f5c343 3994 /* no orom/efi or non-intel hba of the disk */
f0f5a016
LM
3995 if (rv != 0) {
3996 dprintf("capability: %p fd: %d ret: %d\n",
3997 super->orom, fd, rv);
3998 return 1;
88c32bb1
DW
3999 }
4000
f20c3968
DW
4001 if (super->current_vol >= 0)
4002 return add_to_super_imsm_volume(st, dk, fd, devname);
bf5a934a 4003
c2c087e6
DW
4004 fstat(fd, &stb);
4005 dd = malloc(sizeof(*dd));
b9f594fe 4006 if (!dd) {
c2c087e6
DW
4007 fprintf(stderr,
4008 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
f20c3968 4009 return 1;
c2c087e6
DW
4010 }
4011 memset(dd, 0, sizeof(*dd));
4012 dd->major = major(stb.st_rdev);
4013 dd->minor = minor(stb.st_rdev);
b9f594fe 4014 dd->index = -1;
c2c087e6 4015 dd->devname = devname ? strdup(devname) : NULL;
c2c087e6 4016 dd->fd = fd;
689c9bf3 4017 dd->e = NULL;
1a64be56 4018 dd->action = DISK_ADD;
c2c087e6 4019 rv = imsm_read_serial(fd, devname, dd->serial);
32ba9157 4020 if (rv) {
c2c087e6 4021 fprintf(stderr,
0030e8d6 4022 Name ": failed to retrieve scsi serial, aborting\n");
949c47a0 4023 free(dd);
0030e8d6 4024 abort();
c2c087e6
DW
4025 }
4026
c2c087e6
DW
4027 get_dev_size(fd, NULL, &size);
4028 size /= 512;
1f24f035 4029 serialcpy(dd->disk.serial, dd->serial);
b9f594fe 4030 dd->disk.total_blocks = __cpu_to_le32(size);
ee5aad5a 4031 dd->disk.status = SPARE_DISK;
c2c087e6 4032 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
b9f594fe 4033 dd->disk.scsi_id = __cpu_to_le32(id);
c2c087e6 4034 else
b9f594fe 4035 dd->disk.scsi_id = __cpu_to_le32(0);
43dad3d6
DW
4036
4037 if (st->update_tail) {
1a64be56
LM
4038 dd->next = super->disk_mgmt_list;
4039 super->disk_mgmt_list = dd;
43dad3d6
DW
4040 } else {
4041 dd->next = super->disks;
4042 super->disks = dd;
ceaf0ee1 4043 super->updates_pending++;
43dad3d6 4044 }
f20c3968
DW
4045
4046 return 0;
cdddbdbc
DW
4047}
4048
1a64be56
LM
4049
4050static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
4051{
4052 struct intel_super *super = st->sb;
4053 struct dl *dd;
4054
4055 /* remove from super works only in mdmon - for communication
4056 * manager - monitor. Check if communication memory buffer
4057 * is prepared.
4058 */
4059 if (!st->update_tail) {
4060 fprintf(stderr,
4061 Name ": %s shall be used in mdmon context only"
4062 "(line %d).\n", __func__, __LINE__);
4063 return 1;
4064 }
4065 dd = malloc(sizeof(*dd));
4066 if (!dd) {
4067 fprintf(stderr,
4068 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4069 return 1;
4070 }
4071 memset(dd, 0, sizeof(*dd));
4072 dd->major = dk->major;
4073 dd->minor = dk->minor;
4074 dd->index = -1;
4075 dd->fd = -1;
4076 dd->disk.status = SPARE_DISK;
4077 dd->action = DISK_REMOVE;
4078
4079 dd->next = super->disk_mgmt_list;
4080 super->disk_mgmt_list = dd;
4081
4082
4083 return 0;
4084}
4085
f796af5d
DW
4086static int store_imsm_mpb(int fd, struct imsm_super *mpb);
4087
4088static union {
4089 char buf[512];
4090 struct imsm_super anchor;
4091} spare_record __attribute__ ((aligned(512)));
c2c087e6 4092
d23fe947
DW
4093/* spare records have their own family number and do not have any defined raid
4094 * devices
4095 */
4096static int write_super_imsm_spares(struct intel_super *super, int doclose)
4097{
d23fe947 4098 struct imsm_super *mpb = super->anchor;
f796af5d 4099 struct imsm_super *spare = &spare_record.anchor;
d23fe947
DW
4100 __u32 sum;
4101 struct dl *d;
4102
f796af5d
DW
4103 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
4104 spare->generation_num = __cpu_to_le32(1UL),
4105 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4106 spare->num_disks = 1,
4107 spare->num_raid_devs = 0,
4108 spare->cache_size = mpb->cache_size,
4109 spare->pwr_cycle_count = __cpu_to_le32(1),
4110
4111 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
4112 MPB_SIGNATURE MPB_VERSION_RAID0);
d23fe947
DW
4113
4114 for (d = super->disks; d; d = d->next) {
8796fdc4 4115 if (d->index != -1)
d23fe947
DW
4116 continue;
4117
f796af5d
DW
4118 spare->disk[0] = d->disk;
4119 sum = __gen_imsm_checksum(spare);
4120 spare->family_num = __cpu_to_le32(sum);
4121 spare->orig_family_num = 0;
4122 sum = __gen_imsm_checksum(spare);
4123 spare->check_sum = __cpu_to_le32(sum);
d23fe947 4124
f796af5d 4125 if (store_imsm_mpb(d->fd, spare)) {
d23fe947
DW
4126 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4127 __func__, d->major, d->minor, strerror(errno));
e74255d9 4128 return 1;
d23fe947
DW
4129 }
4130 if (doclose) {
4131 close(d->fd);
4132 d->fd = -1;
4133 }
4134 }
4135
e74255d9 4136 return 0;
d23fe947
DW
4137}
4138
36988a3d 4139static int write_super_imsm(struct supertype *st, int doclose)
cdddbdbc 4140{
36988a3d 4141 struct intel_super *super = st->sb;
949c47a0 4142 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
4143 struct dl *d;
4144 __u32 generation;
4145 __u32 sum;
d23fe947 4146 int spares = 0;
949c47a0 4147 int i;
a48ac0a8 4148 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
36988a3d 4149 int num_disks = 0;
cdddbdbc 4150
c2c087e6
DW
4151 /* 'generation' is incremented everytime the metadata is written */
4152 generation = __le32_to_cpu(mpb->generation_num);
4153 generation++;
4154 mpb->generation_num = __cpu_to_le32(generation);
4155
148acb7b
DW
4156 /* fix up cases where previous mdadm releases failed to set
4157 * orig_family_num
4158 */
4159 if (mpb->orig_family_num == 0)
4160 mpb->orig_family_num = mpb->family_num;
4161
d23fe947 4162 for (d = super->disks; d; d = d->next) {
8796fdc4 4163 if (d->index == -1)
d23fe947 4164 spares++;
36988a3d 4165 else {
d23fe947 4166 mpb->disk[d->index] = d->disk;
36988a3d
AK
4167 num_disks++;
4168 }
d23fe947 4169 }
36988a3d 4170 for (d = super->missing; d; d = d->next) {
47ee5a45 4171 mpb->disk[d->index] = d->disk;
36988a3d
AK
4172 num_disks++;
4173 }
4174 mpb->num_disks = num_disks;
4175 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
b9f594fe 4176
949c47a0
DW
4177 for (i = 0; i < mpb->num_raid_devs; i++) {
4178 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
36988a3d
AK
4179 struct imsm_dev *dev2 = get_imsm_dev(super, i);
4180 if (dev && dev2) {
4181 imsm_copy_dev(dev, dev2);
4182 mpb_size += sizeof_imsm_dev(dev, 0);
4183 }
949c47a0 4184 }
a48ac0a8
DW
4185 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
4186 mpb->mpb_size = __cpu_to_le32(mpb_size);
949c47a0 4187
c2c087e6 4188 /* recalculate checksum */
949c47a0 4189 sum = __gen_imsm_checksum(mpb);
c2c087e6
DW
4190 mpb->check_sum = __cpu_to_le32(sum);
4191
d23fe947 4192 /* write the mpb for disks that compose raid devices */
c2c087e6 4193 for (d = super->disks; d ; d = d->next) {
d23fe947
DW
4194 if (d->index < 0)
4195 continue;
f796af5d 4196 if (store_imsm_mpb(d->fd, mpb))
c2c087e6
DW
4197 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4198 __func__, d->major, d->minor, strerror(errno));
c2c087e6
DW
4199 if (doclose) {
4200 close(d->fd);
4201 d->fd = -1;
4202 }
4203 }
4204
d23fe947
DW
4205 if (spares)
4206 return write_super_imsm_spares(super, doclose);
4207
e74255d9 4208 return 0;
c2c087e6
DW
4209}
4210
0e600426 4211
9b1fb677 4212static int create_array(struct supertype *st, int dev_idx)
43dad3d6
DW
4213{
4214 size_t len;
4215 struct imsm_update_create_array *u;
4216 struct intel_super *super = st->sb;
9b1fb677 4217 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
54c2c1ea
DW
4218 struct imsm_map *map = get_imsm_map(dev, 0);
4219 struct disk_info *inf;
4220 struct imsm_disk *disk;
4221 int i;
43dad3d6 4222
54c2c1ea
DW
4223 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
4224 sizeof(*inf) * map->num_members;
43dad3d6
DW
4225 u = malloc(len);
4226 if (!u) {
4227 fprintf(stderr, "%s: failed to allocate update buffer\n",
4228 __func__);
4229 return 1;
4230 }
4231
4232 u->type = update_create_array;
9b1fb677 4233 u->dev_idx = dev_idx;
43dad3d6 4234 imsm_copy_dev(&u->dev, dev);
54c2c1ea
DW
4235 inf = get_disk_info(u);
4236 for (i = 0; i < map->num_members; i++) {
98130f40 4237 int idx = get_imsm_disk_idx(dev, i, -1);
9b1fb677 4238
54c2c1ea
DW
4239 disk = get_imsm_disk(super, idx);
4240 serialcpy(inf[i].serial, disk->serial);
4241 }
43dad3d6
DW
4242 append_metadata_update(st, u, len);
4243
4244 return 0;
4245}
4246
1a64be56 4247static int mgmt_disk(struct supertype *st)
43dad3d6
DW
4248{
4249 struct intel_super *super = st->sb;
4250 size_t len;
1a64be56 4251 struct imsm_update_add_remove_disk *u;
43dad3d6 4252
1a64be56 4253 if (!super->disk_mgmt_list)
43dad3d6
DW
4254 return 0;
4255
4256 len = sizeof(*u);
4257 u = malloc(len);
4258 if (!u) {
4259 fprintf(stderr, "%s: failed to allocate update buffer\n",
4260 __func__);
4261 return 1;
4262 }
4263
1a64be56 4264 u->type = update_add_remove_disk;
43dad3d6
DW
4265 append_metadata_update(st, u, len);
4266
4267 return 0;
4268}
4269
c2c087e6
DW
4270static int write_init_super_imsm(struct supertype *st)
4271{
9b1fb677
DW
4272 struct intel_super *super = st->sb;
4273 int current_vol = super->current_vol;
4274
4275 /* we are done with current_vol reset it to point st at the container */
4276 super->current_vol = -1;
4277
8273f55e 4278 if (st->update_tail) {
43dad3d6
DW
4279 /* queue the recently created array / added disk
4280 * as a metadata update */
43dad3d6 4281 int rv;
8273f55e 4282
43dad3d6 4283 /* determine if we are creating a volume or adding a disk */
9b1fb677 4284 if (current_vol < 0) {
1a64be56
LM
4285 /* in the mgmt (add/remove) disk case we are running
4286 * in mdmon context, so don't close fd's
43dad3d6 4287 */
1a64be56 4288 return mgmt_disk(st);
43dad3d6 4289 } else
9b1fb677 4290 rv = create_array(st, current_vol);
8273f55e 4291
43dad3d6 4292 return rv;
d682f344
N
4293 } else {
4294 struct dl *d;
4295 for (d = super->disks; d; d = d->next)
4296 Kill(d->devname, NULL, 0, 1, 1);
36988a3d 4297 return write_super_imsm(st, 1);
d682f344 4298 }
cdddbdbc 4299}
0e600426 4300#endif
cdddbdbc 4301
e683ca88 4302static int store_super_imsm(struct supertype *st, int fd)
cdddbdbc 4303{
e683ca88
DW
4304 struct intel_super *super = st->sb;
4305 struct imsm_super *mpb = super ? super->anchor : NULL;
551c80c1 4306
e683ca88 4307 if (!mpb)
ad97895e
DW
4308 return 1;
4309
1799c9e8 4310#ifndef MDASSEMBLE
e683ca88 4311 return store_imsm_mpb(fd, mpb);
1799c9e8
N
4312#else
4313 return 1;
4314#endif
cdddbdbc
DW
4315}
4316
0e600426
N
4317static int imsm_bbm_log_size(struct imsm_super *mpb)
4318{
4319 return __le32_to_cpu(mpb->bbm_log_size);
4320}
4321
4322#ifndef MDASSEMBLE
cdddbdbc
DW
4323static int validate_geometry_imsm_container(struct supertype *st, int level,
4324 int layout, int raiddisks, int chunk,
c2c087e6 4325 unsigned long long size, char *dev,
2c514b71
NB
4326 unsigned long long *freesize,
4327 int verbose)
cdddbdbc 4328{
c2c087e6
DW
4329 int fd;
4330 unsigned long long ldsize;
f2f5c343
LM
4331 struct intel_super *super=NULL;
4332 int rv = 0;
cdddbdbc 4333
c2c087e6
DW
4334 if (level != LEVEL_CONTAINER)
4335 return 0;
4336 if (!dev)
4337 return 1;
4338
4339 fd = open(dev, O_RDONLY|O_EXCL, 0);
4340 if (fd < 0) {
2c514b71
NB
4341 if (verbose)
4342 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4343 dev, strerror(errno));
c2c087e6
DW
4344 return 0;
4345 }
4346 if (!get_dev_size(fd, dev, &ldsize)) {
4347 close(fd);
4348 return 0;
4349 }
f2f5c343
LM
4350
4351 /* capabilities retrieve could be possible
4352 * note that there is no fd for the disks in array.
4353 */
4354 super = alloc_super();
4355 if (!super) {
4356 fprintf(stderr,
4357 Name ": malloc of %zu failed.\n",
4358 sizeof(*super));
4359 close(fd);
4360 return 0;
4361 }
4362
d424212e 4363 rv = find_intel_hba_capability(fd, super, verbose ? dev : NULL);
f2f5c343
LM
4364 if (rv != 0) {
4365#if DEBUG
4366 char str[256];
4367 fd2devname(fd, str);
4368 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4369 fd, str, super->orom, rv, raiddisks);
4370#endif
4371 /* no orom/efi or non-intel hba of the disk */
4372 close(fd);
4373 free_imsm(super);
4374 return 0;
4375 }
c2c087e6 4376 close(fd);
f2f5c343
LM
4377 if (super->orom && raiddisks > super->orom->tds) {
4378 if (verbose)
4379 fprintf(stderr, Name ": %d exceeds maximum number of"
4380 " platform supported disks: %d\n",
4381 raiddisks, super->orom->tds);
4382
4383 free_imsm(super);
4384 return 0;
4385 }
c2c087e6
DW
4386
4387 *freesize = avail_size_imsm(st, ldsize >> 9);
f2f5c343 4388 free_imsm(super);
c2c087e6
DW
4389
4390 return 1;
cdddbdbc
DW
4391}
4392
0dcecb2e
DW
4393static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4394{
4395 const unsigned long long base_start = e[*idx].start;
4396 unsigned long long end = base_start + e[*idx].size;
4397 int i;
4398
4399 if (base_start == end)
4400 return 0;
4401
4402 *idx = *idx + 1;
4403 for (i = *idx; i < num_extents; i++) {
4404 /* extend overlapping extents */
4405 if (e[i].start >= base_start &&
4406 e[i].start <= end) {
4407 if (e[i].size == 0)
4408 return 0;
4409 if (e[i].start + e[i].size > end)
4410 end = e[i].start + e[i].size;
4411 } else if (e[i].start > end) {
4412 *idx = i;
4413 break;
4414 }
4415 }
4416
4417 return end - base_start;
4418}
4419
4420static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4421{
4422 /* build a composite disk with all known extents and generate a new
4423 * 'maxsize' given the "all disks in an array must share a common start
4424 * offset" constraint
4425 */
4426 struct extent *e = calloc(sum_extents, sizeof(*e));
4427 struct dl *dl;
4428 int i, j;
4429 int start_extent;
4430 unsigned long long pos;
b9d77223 4431 unsigned long long start = 0;
0dcecb2e
DW
4432 unsigned long long maxsize;
4433 unsigned long reserve;
4434
4435 if (!e)
a7dd165b 4436 return 0;
0dcecb2e
DW
4437
4438 /* coalesce and sort all extents. also, check to see if we need to
4439 * reserve space between member arrays
4440 */
4441 j = 0;
4442 for (dl = super->disks; dl; dl = dl->next) {
4443 if (!dl->e)
4444 continue;
4445 for (i = 0; i < dl->extent_cnt; i++)
4446 e[j++] = dl->e[i];
4447 }
4448 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4449
4450 /* merge extents */
4451 i = 0;
4452 j = 0;
4453 while (i < sum_extents) {
4454 e[j].start = e[i].start;
4455 e[j].size = find_size(e, &i, sum_extents);
4456 j++;
4457 if (e[j-1].size == 0)
4458 break;
4459 }
4460
4461 pos = 0;
4462 maxsize = 0;
4463 start_extent = 0;
4464 i = 0;
4465 do {
4466 unsigned long long esize;
4467
4468 esize = e[i].start - pos;
4469 if (esize >= maxsize) {
4470 maxsize = esize;
4471 start = pos;
4472 start_extent = i;
4473 }
4474 pos = e[i].start + e[i].size;
4475 i++;
4476 } while (e[i-1].size);
4477 free(e);
4478
a7dd165b
DW
4479 if (maxsize == 0)
4480 return 0;
4481
4482 /* FIXME assumes volume at offset 0 is the first volume in a
4483 * container
4484 */
0dcecb2e
DW
4485 if (start_extent > 0)
4486 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4487 else
4488 reserve = 0;
4489
4490 if (maxsize < reserve)
a7dd165b 4491 return 0;
0dcecb2e
DW
4492
4493 super->create_offset = ~((__u32) 0);
4494 if (start + reserve > super->create_offset)
a7dd165b 4495 return 0; /* start overflows create_offset */
0dcecb2e
DW
4496 super->create_offset = start + reserve;
4497
4498 return maxsize - reserve;
4499}
4500
88c32bb1
DW
4501static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4502{
4503 if (level < 0 || level == 6 || level == 4)
4504 return 0;
4505
4506 /* if we have an orom prevent invalid raid levels */
4507 if (orom)
4508 switch (level) {
4509 case 0: return imsm_orom_has_raid0(orom);
4510 case 1:
4511 if (raiddisks > 2)
4512 return imsm_orom_has_raid1e(orom);
1c556e92
DW
4513 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4514 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4515 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
88c32bb1
DW
4516 }
4517 else
4518 return 1; /* not on an Intel RAID platform so anything goes */
4519
4520 return 0;
4521}
4522
73408129 4523
35f81cbb 4524#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
73408129
LM
4525/*
4526 * validate volume parameters with OROM/EFI capabilities
4527 */
6592ce37
DW
4528static int
4529validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
c21e737b 4530 int raiddisks, int *chunk, int verbose)
6592ce37 4531{
73408129
LM
4532#if DEBUG
4533 verbose = 1;
4534#endif
4535 /* validate container capabilities */
4536 if (super->orom && raiddisks > super->orom->tds) {
4537 if (verbose)
4538 fprintf(stderr, Name ": %d exceeds maximum number of"
4539 " platform supported disks: %d\n",
4540 raiddisks, super->orom->tds);
4541 return 0;
4542 }
4543
4544 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4545 if (super->orom && (!is_raid_level_supported(super->orom, level,
4546 raiddisks))) {
6592ce37
DW
4547 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4548 level, raiddisks, raiddisks > 1 ? "s" : "");
4549 return 0;
4550 }
c21e737b
CA
4551 if (super->orom && level != 1) {
4552 if (chunk && (*chunk == 0 || *chunk == UnSet))
4553 *chunk = imsm_orom_default_chunk(super->orom);
4554 else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
4555 pr_vrb(": platform does not support a chunk size of: "
4556 "%d\n", *chunk);
4557 return 0;
4558 }
6592ce37
DW
4559 }
4560 if (layout != imsm_level_to_layout(level)) {
4561 if (level == 5)
4562 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4563 else if (level == 10)
4564 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4565 else
4566 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4567 layout, level);
4568 return 0;
4569 }
6592ce37
DW
4570 return 1;
4571}
4572
c2c087e6
DW
4573/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4574 * FIX ME add ahci details
4575 */
8b353278 4576static int validate_geometry_imsm_volume(struct supertype *st, int level,
c21e737b 4577 int layout, int raiddisks, int *chunk,
c2c087e6 4578 unsigned long long size, char *dev,
2c514b71
NB
4579 unsigned long long *freesize,
4580 int verbose)
cdddbdbc 4581{
c2c087e6
DW
4582 struct stat stb;
4583 struct intel_super *super = st->sb;
a20d2ba5 4584 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
4585 struct dl *dl;
4586 unsigned long long pos = 0;
4587 unsigned long long maxsize;
4588 struct extent *e;
4589 int i;
cdddbdbc 4590
88c32bb1
DW
4591 /* We must have the container info already read in. */
4592 if (!super)
c2c087e6
DW
4593 return 0;
4594
d54559f0
LM
4595 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
4596 fprintf(stderr, Name ": RAID gemetry validation failed. "
4597 "Cannot proceed with the action(s).\n");
c2c087e6 4598 return 0;
d54559f0 4599 }
c2c087e6
DW
4600 if (!dev) {
4601 /* General test: make sure there is space for
2da8544a
DW
4602 * 'raiddisks' device extents of size 'size' at a given
4603 * offset
c2c087e6 4604 */
e46273eb 4605 unsigned long long minsize = size;
b7528a20 4606 unsigned long long start_offset = MaxSector;
c2c087e6
DW
4607 int dcnt = 0;
4608 if (minsize == 0)
4609 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4610 for (dl = super->disks; dl ; dl = dl->next) {
4611 int found = 0;
4612
bf5a934a 4613 pos = 0;
c2c087e6
DW
4614 i = 0;
4615 e = get_extents(super, dl);
4616 if (!e) continue;
4617 do {
4618 unsigned long long esize;
4619 esize = e[i].start - pos;
4620 if (esize >= minsize)
4621 found = 1;
b7528a20 4622 if (found && start_offset == MaxSector) {
2da8544a
DW
4623 start_offset = pos;
4624 break;
4625 } else if (found && pos != start_offset) {
4626 found = 0;
4627 break;
4628 }
c2c087e6
DW
4629 pos = e[i].start + e[i].size;
4630 i++;
4631 } while (e[i-1].size);
4632 if (found)
4633 dcnt++;
4634 free(e);
4635 }
4636 if (dcnt < raiddisks) {
2c514b71
NB
4637 if (verbose)
4638 fprintf(stderr, Name ": imsm: Not enough "
4639 "devices with space for this array "
4640 "(%d < %d)\n",
4641 dcnt, raiddisks);
c2c087e6
DW
4642 return 0;
4643 }
4644 return 1;
4645 }
0dcecb2e 4646
c2c087e6
DW
4647 /* This device must be a member of the set */
4648 if (stat(dev, &stb) < 0)
4649 return 0;
4650 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4651 return 0;
4652 for (dl = super->disks ; dl ; dl = dl->next) {
f21e18ca
N
4653 if (dl->major == (int)major(stb.st_rdev) &&
4654 dl->minor == (int)minor(stb.st_rdev))
c2c087e6
DW
4655 break;
4656 }
4657 if (!dl) {
2c514b71
NB
4658 if (verbose)
4659 fprintf(stderr, Name ": %s is not in the "
4660 "same imsm set\n", dev);
c2c087e6 4661 return 0;
a20d2ba5
DW
4662 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4663 /* If a volume is present then the current creation attempt
4664 * cannot incorporate new spares because the orom may not
4665 * understand this configuration (all member disks must be
4666 * members of each array in the container).
4667 */
4668 fprintf(stderr, Name ": %s is a spare and a volume"
4669 " is already defined for this container\n", dev);
4670 fprintf(stderr, Name ": The option-rom requires all member"
4671 " disks to be a member of all volumes\n");
4672 return 0;
c2c087e6 4673 }
0dcecb2e
DW
4674
4675 /* retrieve the largest free space block */
c2c087e6
DW
4676 e = get_extents(super, dl);
4677 maxsize = 0;
4678 i = 0;
0dcecb2e
DW
4679 if (e) {
4680 do {
4681 unsigned long long esize;
4682
4683 esize = e[i].start - pos;
4684 if (esize >= maxsize)
4685 maxsize = esize;
4686 pos = e[i].start + e[i].size;
4687 i++;
4688 } while (e[i-1].size);
4689 dl->e = e;
4690 dl->extent_cnt = i;
4691 } else {
4692 if (verbose)
4693 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4694 dev);
4695 return 0;
4696 }
4697 if (maxsize < size) {
4698 if (verbose)
4699 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4700 dev, maxsize, size);
4701 return 0;
4702 }
4703
4704 /* count total number of extents for merge */
4705 i = 0;
4706 for (dl = super->disks; dl; dl = dl->next)
4707 if (dl->e)
4708 i += dl->extent_cnt;
4709
4710 maxsize = merge_extents(super, i);
a7dd165b 4711 if (maxsize < size || maxsize == 0) {
0dcecb2e
DW
4712 if (verbose)
4713 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4714 maxsize, size);
4715 return 0;
0dcecb2e
DW
4716 }
4717
c2c087e6
DW
4718 *freesize = maxsize;
4719
4720 return 1;
cdddbdbc
DW
4721}
4722
efb30e7f
DW
4723static int reserve_space(struct supertype *st, int raiddisks,
4724 unsigned long long size, int chunk,
4725 unsigned long long *freesize)
4726{
4727 struct intel_super *super = st->sb;
4728 struct imsm_super *mpb = super->anchor;
4729 struct dl *dl;
4730 int i;
4731 int extent_cnt;
4732 struct extent *e;
4733 unsigned long long maxsize;
4734 unsigned long long minsize;
4735 int cnt;
4736 int used;
4737
4738 /* find the largest common start free region of the possible disks */
4739 used = 0;
4740 extent_cnt = 0;
4741 cnt = 0;
4742 for (dl = super->disks; dl; dl = dl->next) {
4743 dl->raiddisk = -1;
4744
4745 if (dl->index >= 0)
4746 used++;
4747
4748 /* don't activate new spares if we are orom constrained
4749 * and there is already a volume active in the container
4750 */
4751 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
4752 continue;
4753
4754 e = get_extents(super, dl);
4755 if (!e)
4756 continue;
4757 for (i = 1; e[i-1].size; i++)
4758 ;
4759 dl->e = e;
4760 dl->extent_cnt = i;
4761 extent_cnt += i;
4762 cnt++;
4763 }
4764
4765 maxsize = merge_extents(super, extent_cnt);
4766 minsize = size;
4767 if (size == 0)
612e59d8
CA
4768 /* chunk is in K */
4769 minsize = chunk * 2;
efb30e7f
DW
4770
4771 if (cnt < raiddisks ||
4772 (super->orom && used && used != raiddisks) ||
a7dd165b
DW
4773 maxsize < minsize ||
4774 maxsize == 0) {
efb30e7f
DW
4775 fprintf(stderr, Name ": not enough devices with space to create array.\n");
4776 return 0; /* No enough free spaces large enough */
4777 }
4778
4779 if (size == 0) {
4780 size = maxsize;
4781 if (chunk) {
612e59d8
CA
4782 size /= 2 * chunk;
4783 size *= 2 * chunk;
efb30e7f
DW
4784 }
4785 }
4786
4787 cnt = 0;
4788 for (dl = super->disks; dl; dl = dl->next)
4789 if (dl->e)
4790 dl->raiddisk = cnt++;
4791
4792 *freesize = size;
4793
4794 return 1;
4795}
4796
bf5a934a 4797static int validate_geometry_imsm(struct supertype *st, int level, int layout,
c21e737b 4798 int raiddisks, int *chunk, unsigned long long size,
bf5a934a
DW
4799 char *dev, unsigned long long *freesize,
4800 int verbose)
4801{
4802 int fd, cfd;
4803 struct mdinfo *sra;
20cbe8d2 4804 int is_member = 0;
bf5a934a 4805
d54559f0
LM
4806 /* load capability
4807 * if given unused devices create a container
bf5a934a
DW
4808 * if given given devices in a container create a member volume
4809 */
4810 if (level == LEVEL_CONTAINER) {
4811 /* Must be a fresh device to add to a container */
4812 return validate_geometry_imsm_container(st, level, layout,
c21e737b
CA
4813 raiddisks,
4814 chunk?*chunk:0, size,
bf5a934a
DW
4815 dev, freesize,
4816 verbose);
4817 }
4818
8592f29d
N
4819 if (!dev) {
4820 if (st->sb && freesize) {
efb30e7f
DW
4821 /* we are being asked to automatically layout a
4822 * new volume based on the current contents of
4823 * the container. If the the parameters can be
4824 * satisfied reserve_space will record the disks,
4825 * start offset, and size of the volume to be
4826 * created. add_to_super and getinfo_super
4827 * detect when autolayout is in progress.
4828 */
6592ce37
DW
4829 if (!validate_geometry_imsm_orom(st->sb, level, layout,
4830 raiddisks, chunk,
4831 verbose))
4832 return 0;
c21e737b
CA
4833 return reserve_space(st, raiddisks, size,
4834 chunk?*chunk:0, freesize);
8592f29d
N
4835 }
4836 return 1;
4837 }
bf5a934a
DW
4838 if (st->sb) {
4839 /* creating in a given container */
4840 return validate_geometry_imsm_volume(st, level, layout,
4841 raiddisks, chunk, size,
4842 dev, freesize, verbose);
4843 }
4844
bf5a934a
DW
4845 /* This device needs to be a device in an 'imsm' container */
4846 fd = open(dev, O_RDONLY|O_EXCL, 0);
4847 if (fd >= 0) {
4848 if (verbose)
4849 fprintf(stderr,
4850 Name ": Cannot create this array on device %s\n",
4851 dev);
4852 close(fd);
4853 return 0;
4854 }
4855 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
4856 if (verbose)
4857 fprintf(stderr, Name ": Cannot open %s: %s\n",
4858 dev, strerror(errno));
4859 return 0;
4860 }
4861 /* Well, it is in use by someone, maybe an 'imsm' container. */
4862 cfd = open_container(fd);
20cbe8d2 4863 close(fd);
bf5a934a 4864 if (cfd < 0) {
bf5a934a
DW
4865 if (verbose)
4866 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
4867 dev);
4868 return 0;
4869 }
4870 sra = sysfs_read(cfd, 0, GET_VERSION);
bf5a934a 4871 if (sra && sra->array.major_version == -1 &&
20cbe8d2
AW
4872 strcmp(sra->text_version, "imsm") == 0)
4873 is_member = 1;
4874 sysfs_free(sra);
4875 if (is_member) {
bf5a934a
DW
4876 /* This is a member of a imsm container. Load the container
4877 * and try to create a volume
4878 */
4879 struct intel_super *super;
4880
e1902a7b 4881 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
bf5a934a
DW
4882 st->sb = super;
4883 st->container_dev = fd2devnum(cfd);
4884 close(cfd);
4885 return validate_geometry_imsm_volume(st, level, layout,
4886 raiddisks, chunk,
4887 size, dev,
4888 freesize, verbose);
4889 }
20cbe8d2 4890 }
bf5a934a 4891
20cbe8d2
AW
4892 if (verbose)
4893 fprintf(stderr, Name ": failed container membership check\n");
4894
4895 close(cfd);
4896 return 0;
bf5a934a 4897}
0bd16cf2 4898
30f58b22 4899static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
0bd16cf2
DJ
4900{
4901 struct intel_super *super = st->sb;
4902
30f58b22
DW
4903 if (level && *level == UnSet)
4904 *level = LEVEL_CONTAINER;
4905
4906 if (level && layout && *layout == UnSet)
4907 *layout = imsm_level_to_layout(*level);
0bd16cf2 4908
1d54f286
N
4909 if (chunk && (*chunk == UnSet || *chunk == 0) &&
4910 super && super->orom)
30f58b22 4911 *chunk = imsm_orom_default_chunk(super->orom);
0bd16cf2
DJ
4912}
4913
33414a01
DW
4914static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
4915
4916static int kill_subarray_imsm(struct supertype *st)
4917{
4918 /* remove the subarray currently referenced by ->current_vol */
4919 __u8 i;
4920 struct intel_dev **dp;
4921 struct intel_super *super = st->sb;
4922 __u8 current_vol = super->current_vol;
4923 struct imsm_super *mpb = super->anchor;
4924
4925 if (super->current_vol < 0)
4926 return 2;
4927 super->current_vol = -1; /* invalidate subarray cursor */
4928
4929 /* block deletions that would change the uuid of active subarrays
4930 *
4931 * FIXME when immutable ids are available, but note that we'll
4932 * also need to fixup the invalidated/active subarray indexes in
4933 * mdstat
4934 */
4935 for (i = 0; i < mpb->num_raid_devs; i++) {
4936 char subarray[4];
4937
4938 if (i < current_vol)
4939 continue;
4940 sprintf(subarray, "%u", i);
4941 if (is_subarray_active(subarray, st->devname)) {
4942 fprintf(stderr,
4943 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
4944 current_vol, i);
4945
4946 return 2;
4947 }
4948 }
4949
4950 if (st->update_tail) {
4951 struct imsm_update_kill_array *u = malloc(sizeof(*u));
4952
4953 if (!u)
4954 return 2;
4955 u->type = update_kill_array;
4956 u->dev_idx = current_vol;
4957 append_metadata_update(st, u, sizeof(*u));
4958
4959 return 0;
4960 }
4961
4962 for (dp = &super->devlist; *dp;)
4963 if ((*dp)->index == current_vol) {
4964 *dp = (*dp)->next;
4965 } else {
4966 handle_missing(super, (*dp)->dev);
4967 if ((*dp)->index > current_vol)
4968 (*dp)->index--;
4969 dp = &(*dp)->next;
4970 }
4971
4972 /* no more raid devices, all active components are now spares,
4973 * but of course failed are still failed
4974 */
4975 if (--mpb->num_raid_devs == 0) {
4976 struct dl *d;
4977
4978 for (d = super->disks; d; d = d->next)
4979 if (d->index > -2) {
4980 d->index = -1;
4981 d->disk.status = SPARE_DISK;
4982 }
4983 }
4984
4985 super->updates_pending++;
4986
4987 return 0;
4988}
aa534678 4989
a951a4f7 4990static int update_subarray_imsm(struct supertype *st, char *subarray,
fa56eddb 4991 char *update, struct mddev_ident *ident)
aa534678
DW
4992{
4993 /* update the subarray currently referenced by ->current_vol */
4994 struct intel_super *super = st->sb;
4995 struct imsm_super *mpb = super->anchor;
4996
aa534678
DW
4997 if (strcmp(update, "name") == 0) {
4998 char *name = ident->name;
a951a4f7
N
4999 char *ep;
5000 int vol;
aa534678 5001
a951a4f7 5002 if (is_subarray_active(subarray, st->devname)) {
aa534678
DW
5003 fprintf(stderr,
5004 Name ": Unable to update name of active subarray\n");
5005 return 2;
5006 }
5007
5008 if (!check_name(super, name, 0))
5009 return 2;
5010
a951a4f7
N
5011 vol = strtoul(subarray, &ep, 10);
5012 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
5013 return 2;
5014
aa534678
DW
5015 if (st->update_tail) {
5016 struct imsm_update_rename_array *u = malloc(sizeof(*u));
5017
5018 if (!u)
5019 return 2;
5020 u->type = update_rename_array;
a951a4f7 5021 u->dev_idx = vol;
aa534678
DW
5022 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
5023 append_metadata_update(st, u, sizeof(*u));
5024 } else {
5025 struct imsm_dev *dev;
5026 int i;
5027
a951a4f7 5028 dev = get_imsm_dev(super, vol);
aa534678
DW
5029 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
5030 for (i = 0; i < mpb->num_raid_devs; i++) {
5031 dev = get_imsm_dev(super, i);
5032 handle_missing(super, dev);
5033 }
5034 super->updates_pending++;
5035 }
5036 } else
5037 return 2;
5038
5039 return 0;
5040}
bf5a934a 5041
28bce06f
AK
5042static int is_gen_migration(struct imsm_dev *dev)
5043{
5044 if (!dev->vol.migr_state)
5045 return 0;
5046
5047 if (migr_type(dev) == MIGR_GEN_MIGR)
5048 return 1;
5049
5050 return 0;
5051}
71204a50 5052#endif /* MDASSEMBLE */
28bce06f 5053
1e5c6983
DW
5054static int is_rebuilding(struct imsm_dev *dev)
5055{
5056 struct imsm_map *migr_map;
5057
5058 if (!dev->vol.migr_state)
5059 return 0;
5060
5061 if (migr_type(dev) != MIGR_REBUILD)
5062 return 0;
5063
5064 migr_map = get_imsm_map(dev, 1);
5065
5066 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
5067 return 1;
5068 else
5069 return 0;
5070}
5071
5072static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
5073{
5074 struct mdinfo *rebuild = NULL;
5075 struct mdinfo *d;
5076 __u32 units;
5077
5078 if (!is_rebuilding(dev))
5079 return;
5080
5081 /* Find the rebuild target, but punt on the dual rebuild case */
5082 for (d = array->devs; d; d = d->next)
5083 if (d->recovery_start == 0) {
5084 if (rebuild)
5085 return;
5086 rebuild = d;
5087 }
5088
4363fd80
DW
5089 if (!rebuild) {
5090 /* (?) none of the disks are marked with
5091 * IMSM_ORD_REBUILD, so assume they are missing and the
5092 * disk_ord_tbl was not correctly updated
5093 */
5094 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
5095 return;
5096 }
5097
1e5c6983
DW
5098 units = __le32_to_cpu(dev->vol.curr_migr_unit);
5099 rebuild->recovery_start = units * blocks_per_migr_unit(dev);
5100}
5101
5102
00bbdbda 5103static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
cdddbdbc 5104{
4f5bc454
DW
5105 /* Given a container loaded by load_super_imsm_all,
5106 * extract information about all the arrays into
5107 * an mdinfo tree.
00bbdbda 5108 * If 'subarray' is given, just extract info about that array.
4f5bc454
DW
5109 *
5110 * For each imsm_dev create an mdinfo, fill it in,
5111 * then look for matching devices in super->disks
5112 * and create appropriate device mdinfo.
5113 */
5114 struct intel_super *super = st->sb;
949c47a0 5115 struct imsm_super *mpb = super->anchor;
4f5bc454 5116 struct mdinfo *rest = NULL;
00bbdbda 5117 unsigned int i;
a06d022d 5118 int bbm_errors = 0;
abef11a3
AK
5119 struct dl *d;
5120 int spare_disks = 0;
cdddbdbc 5121
a06d022d
KW
5122 /* check for bad blocks */
5123 if (imsm_bbm_log_size(super->anchor))
5124 bbm_errors = 1;
604b746f 5125
abef11a3
AK
5126 /* count spare devices, not used in maps
5127 */
5128 for (d = super->disks; d; d = d->next)
5129 if (d->index == -1)
5130 spare_disks++;
5131
4f5bc454 5132 for (i = 0; i < mpb->num_raid_devs; i++) {
00bbdbda
N
5133 struct imsm_dev *dev;
5134 struct imsm_map *map;
86e3692b 5135 struct imsm_map *map2;
4f5bc454 5136 struct mdinfo *this;
2db86302 5137 int slot, chunk;
00bbdbda
N
5138 char *ep;
5139
5140 if (subarray &&
5141 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
5142 continue;
5143
5144 dev = get_imsm_dev(super, i);
5145 map = get_imsm_map(dev, 0);
86e3692b 5146 map2 = get_imsm_map(dev, 1);
4f5bc454 5147
1ce0101c
DW
5148 /* do not publish arrays that are in the middle of an
5149 * unsupported migration
5150 */
5151 if (dev->vol.migr_state &&
28bce06f 5152 (migr_type(dev) == MIGR_STATE_CHANGE)) {
1ce0101c
DW
5153 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
5154 " unsupported migration in progress\n",
5155 dev->volume);
5156 continue;
5157 }
2db86302
LM
5158 /* do not publish arrays that are not support by controller's
5159 * OROM/EFI
5160 */
1ce0101c 5161
2db86302 5162 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
7b0bbd0f 5163#ifndef MDASSEMBLE
2db86302
LM
5164 if (!validate_geometry_imsm_orom(super,
5165 get_imsm_raid_level(map), /* RAID level */
5166 imsm_level_to_layout(get_imsm_raid_level(map)),
5167 map->num_members, /* raid disks */
5168 &chunk,
5169 1 /* verbose */)) {
5170 fprintf(stderr, Name ": RAID gemetry validation failed. "
5171 "Cannot proceed with the action(s).\n");
5172 continue;
5173 }
7b0bbd0f 5174#endif /* MDASSEMBLE */
4f5bc454 5175 this = malloc(sizeof(*this));
0fbd635c 5176 if (!this) {
cf1be220 5177 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
0fbd635c
AW
5178 sizeof(*this));
5179 break;
5180 }
4f5bc454
DW
5181 memset(this, 0, sizeof(*this));
5182 this->next = rest;
4f5bc454 5183
301406c9 5184 super->current_vol = i;
a5d85af7 5185 getinfo_super_imsm_volume(st, this, NULL);
4f5bc454 5186 for (slot = 0 ; slot < map->num_members; slot++) {
1e5c6983 5187 unsigned long long recovery_start;
4f5bc454
DW
5188 struct mdinfo *info_d;
5189 struct dl *d;
5190 int idx;
9a1608e5 5191 int skip;
7eef0453 5192 __u32 ord;
4f5bc454 5193
9a1608e5 5194 skip = 0;
98130f40 5195 idx = get_imsm_disk_idx(dev, slot, 0);
196b0d44 5196 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
4f5bc454
DW
5197 for (d = super->disks; d ; d = d->next)
5198 if (d->index == idx)
0fbd635c 5199 break;
4f5bc454 5200
1e5c6983 5201 recovery_start = MaxSector;
4f5bc454 5202 if (d == NULL)
9a1608e5 5203 skip = 1;
25ed7e59 5204 if (d && is_failed(&d->disk))
9a1608e5 5205 skip = 1;
7eef0453 5206 if (ord & IMSM_ORD_REBUILD)
1e5c6983 5207 recovery_start = 0;
9a1608e5
DW
5208
5209 /*
5210 * if we skip some disks the array will be assmebled degraded;
1e5c6983
DW
5211 * reset resync start to avoid a dirty-degraded
5212 * situation when performing the intial sync
9a1608e5
DW
5213 *
5214 * FIXME handle dirty degraded
5215 */
1e5c6983 5216 if ((skip || recovery_start == 0) && !dev->vol.dirty)
b7528a20 5217 this->resync_start = MaxSector;
9a1608e5
DW
5218 if (skip)
5219 continue;
4f5bc454 5220
1e5c6983 5221 info_d = calloc(1, sizeof(*info_d));
9a1608e5
DW
5222 if (!info_d) {
5223 fprintf(stderr, Name ": failed to allocate disk"
1ce0101c 5224 " for volume %.16s\n", dev->volume);
1e5c6983
DW
5225 info_d = this->devs;
5226 while (info_d) {
5227 struct mdinfo *d = info_d->next;
5228
5229 free(info_d);
5230 info_d = d;
5231 }
9a1608e5
DW
5232 free(this);
5233 this = rest;
5234 break;
5235 }
4f5bc454
DW
5236 info_d->next = this->devs;
5237 this->devs = info_d;
5238
4f5bc454
DW
5239 info_d->disk.number = d->index;
5240 info_d->disk.major = d->major;
5241 info_d->disk.minor = d->minor;
5242 info_d->disk.raid_disk = slot;
1e5c6983 5243 info_d->recovery_start = recovery_start;
86e3692b
AK
5244 if (map2) {
5245 if (slot < map2->num_members)
5246 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
5247 else
5248 this->array.spare_disks++;
86e3692b
AK
5249 } else {
5250 if (slot < map->num_members)
5251 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
5252 else
5253 this->array.spare_disks++;
86e3692b 5254 }
1e5c6983
DW
5255 if (info_d->recovery_start == MaxSector)
5256 this->array.working_disks++;
4f5bc454
DW
5257
5258 info_d->events = __le32_to_cpu(mpb->generation_num);
5259 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
5260 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
4f5bc454 5261 }
1e5c6983
DW
5262 /* now that the disk list is up-to-date fixup recovery_start */
5263 update_recovery_start(dev, this);
abef11a3 5264 this->array.spare_disks += spare_disks;
9a1608e5 5265 rest = this;
4f5bc454
DW
5266 }
5267
a06d022d
KW
5268 /* if array has bad blocks, set suitable bit in array status */
5269 if (bbm_errors)
5270 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
5271
4f5bc454 5272 return rest;
cdddbdbc
DW
5273}
5274
845dea95 5275
fb49eef2 5276static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
c2a1e7da 5277{
a965f303 5278 struct imsm_map *map = get_imsm_map(dev, 0);
c2a1e7da
DW
5279
5280 if (!failed)
3393c6af
DW
5281 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
5282 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
c2a1e7da
DW
5283
5284 switch (get_imsm_raid_level(map)) {
5285 case 0:
5286 return IMSM_T_STATE_FAILED;
5287 break;
5288 case 1:
5289 if (failed < map->num_members)
5290 return IMSM_T_STATE_DEGRADED;
5291 else
5292 return IMSM_T_STATE_FAILED;
5293 break;
5294 case 10:
5295 {
5296 /**
c92a2527
DW
5297 * check to see if any mirrors have failed, otherwise we
5298 * are degraded. Even numbered slots are mirrored on
5299 * slot+1
c2a1e7da 5300 */
c2a1e7da 5301 int i;
d9b420a5
N
5302 /* gcc -Os complains that this is unused */
5303 int insync = insync;
c2a1e7da
DW
5304
5305 for (i = 0; i < map->num_members; i++) {
98130f40 5306 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
c92a2527
DW
5307 int idx = ord_to_idx(ord);
5308 struct imsm_disk *disk;
c2a1e7da 5309
c92a2527
DW
5310 /* reset the potential in-sync count on even-numbered
5311 * slots. num_copies is always 2 for imsm raid10
5312 */
5313 if ((i & 1) == 0)
5314 insync = 2;
c2a1e7da 5315
c92a2527 5316 disk = get_imsm_disk(super, idx);
25ed7e59 5317 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
c92a2527 5318 insync--;
c2a1e7da 5319
c92a2527
DW
5320 /* no in-sync disks left in this mirror the
5321 * array has failed
5322 */
5323 if (insync == 0)
5324 return IMSM_T_STATE_FAILED;
c2a1e7da
DW
5325 }
5326
5327 return IMSM_T_STATE_DEGRADED;
5328 }
5329 case 5:
5330 if (failed < 2)
5331 return IMSM_T_STATE_DEGRADED;
5332 else
5333 return IMSM_T_STATE_FAILED;
5334 break;
5335 default:
5336 break;
5337 }
5338
5339 return map->map_state;
5340}
5341
ff077194 5342static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
c2a1e7da
DW
5343{
5344 int i;
5345 int failed = 0;
5346 struct imsm_disk *disk;
ff077194 5347 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2
DW
5348 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
5349 __u32 ord;
5350 int idx;
c2a1e7da 5351
0556e1a2
DW
5352 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5353 * disks that are being rebuilt. New failures are recorded to
5354 * map[0]. So we look through all the disks we started with and
5355 * see if any failures are still present, or if any new ones
5356 * have arrived
5357 *
5358 * FIXME add support for online capacity expansion and
5359 * raid-level-migration
5360 */
5361 for (i = 0; i < prev->num_members; i++) {
5362 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
5363 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
5364 idx = ord_to_idx(ord);
c2a1e7da 5365
949c47a0 5366 disk = get_imsm_disk(super, idx);
25ed7e59 5367 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
fcb84475 5368 failed++;
c2a1e7da
DW
5369 }
5370
5371 return failed;
845dea95
NB
5372}
5373
97b4d0e9
DW
5374#ifndef MDASSEMBLE
5375static int imsm_open_new(struct supertype *c, struct active_array *a,
5376 char *inst)
5377{
5378 struct intel_super *super = c->sb;
5379 struct imsm_super *mpb = super->anchor;
5380
5381 if (atoi(inst) >= mpb->num_raid_devs) {
5382 fprintf(stderr, "%s: subarry index %d, out of range\n",
5383 __func__, atoi(inst));
5384 return -ENODEV;
5385 }
5386
5387 dprintf("imsm: open_new %s\n", inst);
5388 a->info.container_member = atoi(inst);
5389 return 0;
5390}
5391
0c046afd
DW
5392static int is_resyncing(struct imsm_dev *dev)
5393{
5394 struct imsm_map *migr_map;
5395
5396 if (!dev->vol.migr_state)
5397 return 0;
5398
1484e727
DW
5399 if (migr_type(dev) == MIGR_INIT ||
5400 migr_type(dev) == MIGR_REPAIR)
0c046afd
DW
5401 return 1;
5402
4c9bc37b
AK
5403 if (migr_type(dev) == MIGR_GEN_MIGR)
5404 return 0;
5405
0c046afd
DW
5406 migr_map = get_imsm_map(dev, 1);
5407
4c9bc37b
AK
5408 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
5409 (dev->vol.migr_type != MIGR_GEN_MIGR))
0c046afd
DW
5410 return 1;
5411 else
5412 return 0;
5413}
5414
0556e1a2
DW
5415/* return true if we recorded new information */
5416static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
47ee5a45 5417{
0556e1a2
DW
5418 __u32 ord;
5419 int slot;
5420 struct imsm_map *map;
5421
5422 /* new failures are always set in map[0] */
5423 map = get_imsm_map(dev, 0);
5424
5425 slot = get_imsm_disk_slot(map, idx);
5426 if (slot < 0)
5427 return 0;
5428
5429 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
25ed7e59 5430 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
0556e1a2
DW
5431 return 0;
5432
f2f27e63 5433 disk->status |= FAILED_DISK;
0556e1a2 5434 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
f21e18ca 5435 if (map->failed_disk_num == 0xff)
0556e1a2
DW
5436 map->failed_disk_num = slot;
5437 return 1;
5438}
5439
5440static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5441{
5442 mark_failure(dev, disk, idx);
5443
5444 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5445 return;
5446
47ee5a45
DW
5447 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5448 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5449}
5450
33414a01
DW
5451static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5452{
5453 __u8 map_state;
5454 struct dl *dl;
5455 int failed;
5456
5457 if (!super->missing)
5458 return;
5459 failed = imsm_count_failed(super, dev);
5460 map_state = imsm_check_degraded(super, dev, failed);
5461
5462 dprintf("imsm: mark missing\n");
5463 end_migration(dev, map_state);
5464 for (dl = super->missing; dl; dl = dl->next)
5465 mark_missing(dev, &dl->disk, dl->index);
5466 super->updates_pending++;
5467}
5468
70bdf0dc
AK
5469static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5470{
5471 int used_disks = imsm_num_data_members(dev, 0);
5472 unsigned long long array_blocks;
5473 struct imsm_map *map;
5474
5475 if (used_disks == 0) {
5476 /* when problems occures
5477 * return current array_blocks value
5478 */
5479 array_blocks = __le32_to_cpu(dev->size_high);
5480 array_blocks = array_blocks << 32;
5481 array_blocks += __le32_to_cpu(dev->size_low);
5482
5483 return array_blocks;
5484 }
5485
5486 /* set array size in metadata
5487 */
5488 map = get_imsm_map(dev, 0);
5489 array_blocks = map->blocks_per_member * used_disks;
5490
5491 /* round array size down to closest MB
5492 */
5493 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5494 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5495 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5496
5497 return array_blocks;
5498}
5499
28bce06f
AK
5500static void imsm_set_disk(struct active_array *a, int n, int state);
5501
0e2d1a4e
AK
5502static void imsm_progress_container_reshape(struct intel_super *super)
5503{
5504 /* if no device has a migr_state, but some device has a
5505 * different number of members than the previous device, start
5506 * changing the number of devices in this device to match
5507 * previous.
5508 */
5509 struct imsm_super *mpb = super->anchor;
5510 int prev_disks = -1;
5511 int i;
1dfaa380 5512 int copy_map_size;
0e2d1a4e
AK
5513
5514 for (i = 0; i < mpb->num_raid_devs; i++) {
5515 struct imsm_dev *dev = get_imsm_dev(super, i);
5516 struct imsm_map *map = get_imsm_map(dev, 0);
5517 struct imsm_map *map2;
5518 int prev_num_members;
0e2d1a4e
AK
5519
5520 if (dev->vol.migr_state)
5521 return;
5522
5523 if (prev_disks == -1)
5524 prev_disks = map->num_members;
5525 if (prev_disks == map->num_members)
5526 continue;
5527
5528 /* OK, this array needs to enter reshape mode.
5529 * i.e it needs a migr_state
5530 */
5531
1dfaa380 5532 copy_map_size = sizeof_imsm_map(map);
0e2d1a4e
AK
5533 prev_num_members = map->num_members;
5534 map->num_members = prev_disks;
5535 dev->vol.migr_state = 1;
5536 dev->vol.curr_migr_unit = 0;
5537 dev->vol.migr_type = MIGR_GEN_MIGR;
5538 for (i = prev_num_members;
5539 i < map->num_members; i++)
5540 set_imsm_ord_tbl_ent(map, i, i);
5541 map2 = get_imsm_map(dev, 1);
5542 /* Copy the current map */
1dfaa380 5543 memcpy(map2, map, copy_map_size);
0e2d1a4e
AK
5544 map2->num_members = prev_num_members;
5545
70bdf0dc 5546 imsm_set_array_size(dev);
0e2d1a4e
AK
5547 super->updates_pending++;
5548 }
5549}
5550
aad6f216 5551/* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
0c046afd
DW
5552 * states are handled in imsm_set_disk() with one exception, when a
5553 * resync is stopped due to a new failure this routine will set the
5554 * 'degraded' state for the array.
5555 */
01f157d7 5556static int imsm_set_array_state(struct active_array *a, int consistent)
a862209d
DW
5557{
5558 int inst = a->info.container_member;
5559 struct intel_super *super = a->container->sb;
949c47a0 5560 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5561 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd
DW
5562 int failed = imsm_count_failed(super, dev);
5563 __u8 map_state = imsm_check_degraded(super, dev, failed);
1e5c6983 5564 __u32 blocks_per_unit;
a862209d 5565
1af97990
AK
5566 if (dev->vol.migr_state &&
5567 dev->vol.migr_type == MIGR_GEN_MIGR) {
5568 /* array state change is blocked due to reshape action
aad6f216
N
5569 * We might need to
5570 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5571 * - finish the reshape (if last_checkpoint is big and action != reshape)
5572 * - update curr_migr_unit
1af97990 5573 */
aad6f216
N
5574 if (a->curr_action == reshape) {
5575 /* still reshaping, maybe update curr_migr_unit */
633b5610 5576 goto mark_checkpoint;
aad6f216
N
5577 } else {
5578 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5579 /* for some reason we aborted the reshape.
5580 * Better clean up
5581 */
5582 struct imsm_map *map2 = get_imsm_map(dev, 1);
5583 dev->vol.migr_state = 0;
5584 dev->vol.migr_type = 0;
5585 dev->vol.curr_migr_unit = 0;
5586 memcpy(map, map2, sizeof_imsm_map(map2));
5587 super->updates_pending++;
5588 }
5589 if (a->last_checkpoint >= a->info.component_size) {
5590 unsigned long long array_blocks;
5591 int used_disks;
e154ced3 5592 struct mdinfo *mdi;
aad6f216 5593
9653001d 5594 used_disks = imsm_num_data_members(dev, 0);
d55adef9
AK
5595 if (used_disks > 0) {
5596 array_blocks =
5597 map->blocks_per_member *
5598 used_disks;
5599 /* round array size down to closest MB
5600 */
5601 array_blocks = (array_blocks
5602 >> SECT_PER_MB_SHIFT)
5603 << SECT_PER_MB_SHIFT;
d55adef9
AK
5604 a->info.custom_array_size = array_blocks;
5605 /* encourage manager to update array
5606 * size
5607 */
e154ced3 5608
d55adef9 5609 a->check_reshape = 1;
633b5610 5610 }
e154ced3
AK
5611 /* finalize online capacity expansion/reshape */
5612 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5613 imsm_set_disk(a,
5614 mdi->disk.raid_disk,
5615 mdi->curr_state);
5616
0e2d1a4e 5617 imsm_progress_container_reshape(super);
e154ced3 5618 }
aad6f216 5619 }
1af97990
AK
5620 }
5621
47ee5a45 5622 /* before we activate this array handle any missing disks */
33414a01
DW
5623 if (consistent == 2)
5624 handle_missing(super, dev);
1e5c6983 5625
0c046afd 5626 if (consistent == 2 &&
b7941fd6 5627 (!is_resync_complete(&a->info) ||
0c046afd
DW
5628 map_state != IMSM_T_STATE_NORMAL ||
5629 dev->vol.migr_state))
01f157d7 5630 consistent = 0;
272906ef 5631
b7941fd6 5632 if (is_resync_complete(&a->info)) {
0c046afd 5633 /* complete intialization / resync,
0556e1a2
DW
5634 * recovery and interrupted recovery is completed in
5635 * ->set_disk
0c046afd
DW
5636 */
5637 if (is_resyncing(dev)) {
5638 dprintf("imsm: mark resync done\n");
f8f603f1 5639 end_migration(dev, map_state);
115c3803 5640 super->updates_pending++;
484240d8 5641 a->last_checkpoint = 0;
115c3803 5642 }
0c046afd
DW
5643 } else if (!is_resyncing(dev) && !failed) {
5644 /* mark the start of the init process if nothing is failed */
b7941fd6 5645 dprintf("imsm: mark resync start\n");
1484e727 5646 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
8e59f3d8 5647 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
1484e727 5648 else
8e59f3d8 5649 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
3393c6af 5650 super->updates_pending++;
115c3803 5651 }
a862209d 5652
633b5610 5653mark_checkpoint:
1e5c6983
DW
5654 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5655 blocks_per_unit = blocks_per_migr_unit(dev);
4f0a7acc 5656 if (blocks_per_unit) {
1e5c6983
DW
5657 __u32 units32;
5658 __u64 units;
5659
4f0a7acc 5660 units = a->last_checkpoint / blocks_per_unit;
1e5c6983
DW
5661 units32 = units;
5662
5663 /* check that we did not overflow 32-bits, and that
5664 * curr_migr_unit needs updating
5665 */
5666 if (units32 == units &&
bfd80a56 5667 units32 != 0 &&
1e5c6983
DW
5668 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5669 dprintf("imsm: mark checkpoint (%u)\n", units32);
5670 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5671 super->updates_pending++;
5672 }
5673 }
f8f603f1 5674
3393c6af 5675 /* mark dirty / clean */
0c046afd 5676 if (dev->vol.dirty != !consistent) {
b7941fd6 5677 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
0c046afd
DW
5678 if (consistent)
5679 dev->vol.dirty = 0;
5680 else
5681 dev->vol.dirty = 1;
a862209d
DW
5682 super->updates_pending++;
5683 }
28bce06f 5684
01f157d7 5685 return consistent;
a862209d
DW
5686}
5687
8d45d196 5688static void imsm_set_disk(struct active_array *a, int n, int state)
845dea95 5689{
8d45d196
DW
5690 int inst = a->info.container_member;
5691 struct intel_super *super = a->container->sb;
949c47a0 5692 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5693 struct imsm_map *map = get_imsm_map(dev, 0);
8d45d196 5694 struct imsm_disk *disk;
0c046afd 5695 int failed;
b10b37b8 5696 __u32 ord;
0c046afd 5697 __u8 map_state;
8d45d196
DW
5698
5699 if (n > map->num_members)
5700 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5701 n, map->num_members - 1);
5702
5703 if (n < 0)
5704 return;
5705
4e6e574a 5706 dprintf("imsm: set_disk %d:%x\n", n, state);
8d45d196 5707
98130f40 5708 ord = get_imsm_ord_tbl_ent(dev, n, -1);
b10b37b8 5709 disk = get_imsm_disk(super, ord_to_idx(ord));
8d45d196 5710
5802a811 5711 /* check for new failures */
0556e1a2
DW
5712 if (state & DS_FAULTY) {
5713 if (mark_failure(dev, disk, ord_to_idx(ord)))
5714 super->updates_pending++;
8d45d196 5715 }
47ee5a45 5716
19859edc 5717 /* check if in_sync */
0556e1a2 5718 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
b10b37b8
DW
5719 struct imsm_map *migr_map = get_imsm_map(dev, 1);
5720
5721 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
19859edc
DW
5722 super->updates_pending++;
5723 }
8d45d196 5724
0c046afd
DW
5725 failed = imsm_count_failed(super, dev);
5726 map_state = imsm_check_degraded(super, dev, failed);
5802a811 5727
0c046afd
DW
5728 /* check if recovery complete, newly degraded, or failed */
5729 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
f8f603f1 5730 end_migration(dev, map_state);
0556e1a2
DW
5731 map = get_imsm_map(dev, 0);
5732 map->failed_disk_num = ~0;
0c046afd 5733 super->updates_pending++;
484240d8 5734 a->last_checkpoint = 0;
0c046afd
DW
5735 } else if (map_state == IMSM_T_STATE_DEGRADED &&
5736 map->map_state != map_state &&
5737 !dev->vol.migr_state) {
5738 dprintf("imsm: mark degraded\n");
5739 map->map_state = map_state;
5740 super->updates_pending++;
484240d8 5741 a->last_checkpoint = 0;
0c046afd
DW
5742 } else if (map_state == IMSM_T_STATE_FAILED &&
5743 map->map_state != map_state) {
5744 dprintf("imsm: mark failed\n");
f8f603f1 5745 end_migration(dev, map_state);
0c046afd 5746 super->updates_pending++;
484240d8 5747 a->last_checkpoint = 0;
28bce06f
AK
5748 } else if (is_gen_migration(dev)) {
5749 dprintf("imsm: Detected General Migration in state: ");
5750 if (map_state == IMSM_T_STATE_NORMAL) {
5751 end_migration(dev, map_state);
5752 map = get_imsm_map(dev, 0);
5753 map->failed_disk_num = ~0;
5754 dprintf("normal\n");
5755 } else {
5756 if (map_state == IMSM_T_STATE_DEGRADED) {
5757 printf("degraded\n");
5758 end_migration(dev, map_state);
5759 } else {
5760 dprintf("failed\n");
5761 }
5762 map->map_state = map_state;
5763 }
5764 super->updates_pending++;
5802a811 5765 }
845dea95
NB
5766}
5767
f796af5d 5768static int store_imsm_mpb(int fd, struct imsm_super *mpb)
c2a1e7da 5769{
f796af5d 5770 void *buf = mpb;
c2a1e7da
DW
5771 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
5772 unsigned long long dsize;
5773 unsigned long long sectors;
5774
5775 get_dev_size(fd, NULL, &dsize);
5776
272f648f
DW
5777 if (mpb_size > 512) {
5778 /* -1 to account for anchor */
5779 sectors = mpb_sectors(mpb) - 1;
c2a1e7da 5780
272f648f
DW
5781 /* write the extended mpb to the sectors preceeding the anchor */
5782 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
5783 return 1;
c2a1e7da 5784
f21e18ca
N
5785 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
5786 != 512 * sectors)
272f648f
DW
5787 return 1;
5788 }
c2a1e7da 5789
272f648f
DW
5790 /* first block is stored on second to last sector of the disk */
5791 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
c2a1e7da
DW
5792 return 1;
5793
f796af5d 5794 if (write(fd, buf, 512) != 512)
c2a1e7da
DW
5795 return 1;
5796
c2a1e7da
DW
5797 return 0;
5798}
5799
2e735d19 5800static void imsm_sync_metadata(struct supertype *container)
845dea95 5801{
2e735d19 5802 struct intel_super *super = container->sb;
c2a1e7da 5803
1a64be56 5804 dprintf("sync metadata: %d\n", super->updates_pending);
c2a1e7da
DW
5805 if (!super->updates_pending)
5806 return;
5807
36988a3d 5808 write_super_imsm(container, 0);
c2a1e7da
DW
5809
5810 super->updates_pending = 0;
845dea95
NB
5811}
5812
272906ef
DW
5813static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
5814{
5815 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 5816 int i = get_imsm_disk_idx(dev, idx, -1);
272906ef
DW
5817 struct dl *dl;
5818
5819 for (dl = super->disks; dl; dl = dl->next)
5820 if (dl->index == i)
5821 break;
5822
25ed7e59 5823 if (dl && is_failed(&dl->disk))
272906ef
DW
5824 dl = NULL;
5825
5826 if (dl)
5827 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
5828
5829 return dl;
5830}
5831
a20d2ba5 5832static struct dl *imsm_add_spare(struct intel_super *super, int slot,
8ba77d32
AK
5833 struct active_array *a, int activate_new,
5834 struct mdinfo *additional_test_list)
272906ef
DW
5835{
5836 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 5837 int idx = get_imsm_disk_idx(dev, slot, -1);
a20d2ba5
DW
5838 struct imsm_super *mpb = super->anchor;
5839 struct imsm_map *map;
272906ef
DW
5840 unsigned long long pos;
5841 struct mdinfo *d;
5842 struct extent *ex;
a20d2ba5 5843 int i, j;
272906ef 5844 int found;
569cc43f
DW
5845 __u32 array_start = 0;
5846 __u32 array_end = 0;
272906ef 5847 struct dl *dl;
6c932028 5848 struct mdinfo *test_list;
272906ef
DW
5849
5850 for (dl = super->disks; dl; dl = dl->next) {
5851 /* If in this array, skip */
5852 for (d = a->info.devs ; d ; d = d->next)
e553d2a4
DW
5853 if (d->state_fd >= 0 &&
5854 d->disk.major == dl->major &&
272906ef 5855 d->disk.minor == dl->minor) {
8ba77d32
AK
5856 dprintf("%x:%x already in array\n",
5857 dl->major, dl->minor);
272906ef
DW
5858 break;
5859 }
5860 if (d)
5861 continue;
6c932028
AK
5862 test_list = additional_test_list;
5863 while (test_list) {
5864 if (test_list->disk.major == dl->major &&
5865 test_list->disk.minor == dl->minor) {
8ba77d32
AK
5866 dprintf("%x:%x already in additional test list\n",
5867 dl->major, dl->minor);
5868 break;
5869 }
6c932028 5870 test_list = test_list->next;
8ba77d32 5871 }
6c932028 5872 if (test_list)
8ba77d32 5873 continue;
272906ef 5874
e553d2a4 5875 /* skip in use or failed drives */
25ed7e59 5876 if (is_failed(&dl->disk) || idx == dl->index ||
df474657
DW
5877 dl->index == -2) {
5878 dprintf("%x:%x status (failed: %d index: %d)\n",
25ed7e59 5879 dl->major, dl->minor, is_failed(&dl->disk), idx);
9a1608e5
DW
5880 continue;
5881 }
5882
a20d2ba5
DW
5883 /* skip pure spares when we are looking for partially
5884 * assimilated drives
5885 */
5886 if (dl->index == -1 && !activate_new)
5887 continue;
5888
272906ef 5889 /* Does this unused device have the requisite free space?
a20d2ba5 5890 * It needs to be able to cover all member volumes
272906ef
DW
5891 */
5892 ex = get_extents(super, dl);
5893 if (!ex) {
5894 dprintf("cannot get extents\n");
5895 continue;
5896 }
a20d2ba5
DW
5897 for (i = 0; i < mpb->num_raid_devs; i++) {
5898 dev = get_imsm_dev(super, i);
5899 map = get_imsm_map(dev, 0);
272906ef 5900
a20d2ba5
DW
5901 /* check if this disk is already a member of
5902 * this array
272906ef 5903 */
620b1713 5904 if (get_imsm_disk_slot(map, dl->index) >= 0)
a20d2ba5
DW
5905 continue;
5906
5907 found = 0;
5908 j = 0;
5909 pos = 0;
5910 array_start = __le32_to_cpu(map->pba_of_lba0);
329c8278
DW
5911 array_end = array_start +
5912 __le32_to_cpu(map->blocks_per_member) - 1;
a20d2ba5
DW
5913
5914 do {
5915 /* check that we can start at pba_of_lba0 with
5916 * blocks_per_member of space
5917 */
329c8278 5918 if (array_start >= pos && array_end < ex[j].start) {
a20d2ba5
DW
5919 found = 1;
5920 break;
5921 }
5922 pos = ex[j].start + ex[j].size;
5923 j++;
5924 } while (ex[j-1].size);
5925
5926 if (!found)
272906ef 5927 break;
a20d2ba5 5928 }
272906ef
DW
5929
5930 free(ex);
a20d2ba5 5931 if (i < mpb->num_raid_devs) {
329c8278
DW
5932 dprintf("%x:%x does not have %u to %u available\n",
5933 dl->major, dl->minor, array_start, array_end);
272906ef
DW
5934 /* No room */
5935 continue;
a20d2ba5
DW
5936 }
5937 return dl;
272906ef
DW
5938 }
5939
5940 return dl;
5941}
5942
95d07a2c
LM
5943
5944static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
5945{
5946 struct imsm_dev *dev2;
5947 struct imsm_map *map;
5948 struct dl *idisk;
5949 int slot;
5950 int idx;
5951 __u8 state;
5952
5953 dev2 = get_imsm_dev(cont->sb, dev_idx);
5954 if (dev2) {
5955 state = imsm_check_degraded(cont->sb, dev2, failed);
5956 if (state == IMSM_T_STATE_FAILED) {
5957 map = get_imsm_map(dev2, 0);
5958 if (!map)
5959 return 1;
5960 for (slot = 0; slot < map->num_members; slot++) {
5961 /*
5962 * Check if failed disks are deleted from intel
5963 * disk list or are marked to be deleted
5964 */
98130f40 5965 idx = get_imsm_disk_idx(dev2, slot, -1);
95d07a2c
LM
5966 idisk = get_imsm_dl_disk(cont->sb, idx);
5967 /*
5968 * Do not rebuild the array if failed disks
5969 * from failed sub-array are not removed from
5970 * container.
5971 */
5972 if (idisk &&
5973 is_failed(&idisk->disk) &&
5974 (idisk->action != DISK_REMOVE))
5975 return 0;
5976 }
5977 }
5978 }
5979 return 1;
5980}
5981
88758e9d
DW
5982static struct mdinfo *imsm_activate_spare(struct active_array *a,
5983 struct metadata_update **updates)
5984{
5985 /**
d23fe947
DW
5986 * Find a device with unused free space and use it to replace a
5987 * failed/vacant region in an array. We replace failed regions one a
5988 * array at a time. The result is that a new spare disk will be added
5989 * to the first failed array and after the monitor has finished
5990 * propagating failures the remainder will be consumed.
88758e9d 5991 *
d23fe947
DW
5992 * FIXME add a capability for mdmon to request spares from another
5993 * container.
88758e9d
DW
5994 */
5995
5996 struct intel_super *super = a->container->sb;
88758e9d 5997 int inst = a->info.container_member;
949c47a0 5998 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5999 struct imsm_map *map = get_imsm_map(dev, 0);
88758e9d
DW
6000 int failed = a->info.array.raid_disks;
6001 struct mdinfo *rv = NULL;
6002 struct mdinfo *d;
6003 struct mdinfo *di;
6004 struct metadata_update *mu;
6005 struct dl *dl;
6006 struct imsm_update_activate_spare *u;
6007 int num_spares = 0;
6008 int i;
95d07a2c 6009 int allowed;
88758e9d
DW
6010
6011 for (d = a->info.devs ; d ; d = d->next) {
6012 if ((d->curr_state & DS_FAULTY) &&
6013 d->state_fd >= 0)
6014 /* wait for Removal to happen */
6015 return NULL;
6016 if (d->state_fd >= 0)
6017 failed--;
6018 }
6019
6020 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6021 inst, failed, a->info.array.raid_disks, a->info.array.level);
1af97990
AK
6022
6023 if (dev->vol.migr_state &&
6024 dev->vol.migr_type == MIGR_GEN_MIGR)
6025 /* No repair during migration */
6026 return NULL;
6027
89c67882
AK
6028 if (a->info.array.level == 4)
6029 /* No repair for takeovered array
6030 * imsm doesn't support raid4
6031 */
6032 return NULL;
6033
fb49eef2 6034 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
88758e9d
DW
6035 return NULL;
6036
95d07a2c
LM
6037 /*
6038 * If there are any failed disks check state of the other volume.
6039 * Block rebuild if the another one is failed until failed disks
6040 * are removed from container.
6041 */
6042 if (failed) {
6043 dprintf("found failed disks in %s, check if there another"
6044 "failed sub-array.\n",
6045 dev->volume);
6046 /* check if states of the other volumes allow for rebuild */
6047 for (i = 0; i < super->anchor->num_raid_devs; i++) {
6048 if (i != inst) {
6049 allowed = imsm_rebuild_allowed(a->container,
6050 i, failed);
6051 if (!allowed)
6052 return NULL;
6053 }
6054 }
6055 }
6056
88758e9d 6057 /* For each slot, if it is not working, find a spare */
88758e9d
DW
6058 for (i = 0; i < a->info.array.raid_disks; i++) {
6059 for (d = a->info.devs ; d ; d = d->next)
6060 if (d->disk.raid_disk == i)
6061 break;
6062 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
6063 if (d && (d->state_fd >= 0))
6064 continue;
6065
272906ef 6066 /*
a20d2ba5
DW
6067 * OK, this device needs recovery. Try to re-add the
6068 * previous occupant of this slot, if this fails see if
6069 * we can continue the assimilation of a spare that was
6070 * partially assimilated, finally try to activate a new
6071 * spare.
272906ef
DW
6072 */
6073 dl = imsm_readd(super, i, a);
6074 if (!dl)
8ba77d32 6075 dl = imsm_add_spare(super, i, a, 0, NULL);
a20d2ba5 6076 if (!dl)
8ba77d32 6077 dl = imsm_add_spare(super, i, a, 1, NULL);
272906ef
DW
6078 if (!dl)
6079 continue;
6080
6081 /* found a usable disk with enough space */
6082 di = malloc(sizeof(*di));
79244939
DW
6083 if (!di)
6084 continue;
272906ef
DW
6085 memset(di, 0, sizeof(*di));
6086
6087 /* dl->index will be -1 in the case we are activating a
6088 * pristine spare. imsm_process_update() will create a
6089 * new index in this case. Once a disk is found to be
6090 * failed in all member arrays it is kicked from the
6091 * metadata
6092 */
6093 di->disk.number = dl->index;
d23fe947 6094
272906ef
DW
6095 /* (ab)use di->devs to store a pointer to the device
6096 * we chose
6097 */
6098 di->devs = (struct mdinfo *) dl;
6099
6100 di->disk.raid_disk = i;
6101 di->disk.major = dl->major;
6102 di->disk.minor = dl->minor;
6103 di->disk.state = 0;
d23534e4 6104 di->recovery_start = 0;
272906ef
DW
6105 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
6106 di->component_size = a->info.component_size;
6107 di->container_member = inst;
148acb7b 6108 super->random = random32();
272906ef
DW
6109 di->next = rv;
6110 rv = di;
6111 num_spares++;
6112 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
6113 i, di->data_offset);
88758e9d 6114
272906ef 6115 break;
88758e9d
DW
6116 }
6117
6118 if (!rv)
6119 /* No spares found */
6120 return rv;
6121 /* Now 'rv' has a list of devices to return.
6122 * Create a metadata_update record to update the
6123 * disk_ord_tbl for the array
6124 */
6125 mu = malloc(sizeof(*mu));
79244939
DW
6126 if (mu) {
6127 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
6128 if (mu->buf == NULL) {
6129 free(mu);
6130 mu = NULL;
6131 }
6132 }
6133 if (!mu) {
6134 while (rv) {
6135 struct mdinfo *n = rv->next;
6136
6137 free(rv);
6138 rv = n;
6139 }
6140 return NULL;
6141 }
6142
88758e9d 6143 mu->space = NULL;
cb23f1f4 6144 mu->space_list = NULL;
88758e9d
DW
6145 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
6146 mu->next = *updates;
6147 u = (struct imsm_update_activate_spare *) mu->buf;
6148
6149 for (di = rv ; di ; di = di->next) {
6150 u->type = update_activate_spare;
d23fe947
DW
6151 u->dl = (struct dl *) di->devs;
6152 di->devs = NULL;
88758e9d
DW
6153 u->slot = di->disk.raid_disk;
6154 u->array = inst;
6155 u->next = u + 1;
6156 u++;
6157 }
6158 (u-1)->next = NULL;
6159 *updates = mu;
6160
6161 return rv;
6162}
6163
54c2c1ea 6164static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
8273f55e 6165{
54c2c1ea
DW
6166 struct imsm_dev *dev = get_imsm_dev(super, idx);
6167 struct imsm_map *map = get_imsm_map(dev, 0);
6168 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
6169 struct disk_info *inf = get_disk_info(u);
6170 struct imsm_disk *disk;
8273f55e
DW
6171 int i;
6172 int j;
8273f55e 6173
54c2c1ea 6174 for (i = 0; i < map->num_members; i++) {
98130f40 6175 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
54c2c1ea
DW
6176 for (j = 0; j < new_map->num_members; j++)
6177 if (serialcmp(disk->serial, inf[j].serial) == 0)
8273f55e
DW
6178 return 1;
6179 }
6180
6181 return 0;
6182}
6183
1a64be56
LM
6184
6185static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
6186{
6187 struct dl *dl = NULL;
6188 for (dl = super->disks; dl; dl = dl->next)
6189 if ((dl->major == major) && (dl->minor == minor))
6190 return dl;
6191 return NULL;
6192}
6193
6194static int remove_disk_super(struct intel_super *super, int major, int minor)
6195{
6196 struct dl *prev = NULL;
6197 struct dl *dl;
6198
6199 prev = NULL;
6200 for (dl = super->disks; dl; dl = dl->next) {
6201 if ((dl->major == major) && (dl->minor == minor)) {
6202 /* remove */
6203 if (prev)
6204 prev->next = dl->next;
6205 else
6206 super->disks = dl->next;
6207 dl->next = NULL;
6208 __free_imsm_disk(dl);
6209 dprintf("%s: removed %x:%x\n",
6210 __func__, major, minor);
6211 break;
6212 }
6213 prev = dl;
6214 }
6215 return 0;
6216}
6217
f21e18ca 6218static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
ae6aad82 6219
1a64be56
LM
6220static int add_remove_disk_update(struct intel_super *super)
6221{
6222 int check_degraded = 0;
6223 struct dl *disk = NULL;
6224 /* add/remove some spares to/from the metadata/contrainer */
6225 while (super->disk_mgmt_list) {
6226 struct dl *disk_cfg;
6227
6228 disk_cfg = super->disk_mgmt_list;
6229 super->disk_mgmt_list = disk_cfg->next;
6230 disk_cfg->next = NULL;
6231
6232 if (disk_cfg->action == DISK_ADD) {
6233 disk_cfg->next = super->disks;
6234 super->disks = disk_cfg;
6235 check_degraded = 1;
6236 dprintf("%s: added %x:%x\n",
6237 __func__, disk_cfg->major,
6238 disk_cfg->minor);
6239 } else if (disk_cfg->action == DISK_REMOVE) {
6240 dprintf("Disk remove action processed: %x.%x\n",
6241 disk_cfg->major, disk_cfg->minor);
6242 disk = get_disk_super(super,
6243 disk_cfg->major,
6244 disk_cfg->minor);
6245 if (disk) {
6246 /* store action status */
6247 disk->action = DISK_REMOVE;
6248 /* remove spare disks only */
6249 if (disk->index == -1) {
6250 remove_disk_super(super,
6251 disk_cfg->major,
6252 disk_cfg->minor);
6253 }
6254 }
6255 /* release allocate disk structure */
6256 __free_imsm_disk(disk_cfg);
6257 }
6258 }
6259 return check_degraded;
6260}
6261
a29911da
PC
6262
6263static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
6264 struct intel_super *super,
6265 void ***space_list)
6266{
6267 struct intel_dev *id;
6268 void **tofree = NULL;
6269 int ret_val = 0;
6270
6271 dprintf("apply_reshape_migration_update()\n");
6272 if ((u->subdev < 0) ||
6273 (u->subdev > 1)) {
6274 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
6275 return ret_val;
6276 }
6277 if ((space_list == NULL) || (*space_list == NULL)) {
6278 dprintf("imsm: Error: Memory is not allocated\n");
6279 return ret_val;
6280 }
6281
6282 for (id = super->devlist ; id; id = id->next) {
6283 if (id->index == (unsigned)u->subdev) {
6284 struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
6285 struct imsm_map *map;
6286 struct imsm_dev *new_dev =
6287 (struct imsm_dev *)*space_list;
6288 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6289 int to_state;
6290 struct dl *new_disk;
6291
6292 if (new_dev == NULL)
6293 return ret_val;
6294 *space_list = **space_list;
6295 memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
6296 map = get_imsm_map(new_dev, 0);
6297 if (migr_map) {
6298 dprintf("imsm: Error: migration in progress");
6299 return ret_val;
6300 }
6301
6302 to_state = map->map_state;
6303 if ((u->new_level == 5) && (map->raid_level == 0)) {
6304 map->num_members++;
6305 /* this should not happen */
6306 if (u->new_disks[0] < 0) {
6307 map->failed_disk_num =
6308 map->num_members - 1;
6309 to_state = IMSM_T_STATE_DEGRADED;
6310 } else
6311 to_state = IMSM_T_STATE_NORMAL;
6312 }
8e59f3d8 6313 migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
a29911da
PC
6314 if (u->new_level > -1)
6315 map->raid_level = u->new_level;
6316 migr_map = get_imsm_map(new_dev, 1);
6317 if ((u->new_level == 5) &&
6318 (migr_map->raid_level == 0)) {
6319 int ord = map->num_members - 1;
6320 migr_map->num_members--;
6321 if (u->new_disks[0] < 0)
6322 ord |= IMSM_ORD_REBUILD;
6323 set_imsm_ord_tbl_ent(map,
6324 map->num_members - 1,
6325 ord);
6326 }
6327 id->dev = new_dev;
6328 tofree = (void **)dev;
6329
4bba0439
PC
6330 /* update chunk size
6331 */
6332 if (u->new_chunksize > 0)
6333 map->blocks_per_strip =
6334 __cpu_to_le16(u->new_chunksize * 2);
6335
a29911da
PC
6336 /* add disk
6337 */
6338 if ((u->new_level != 5) ||
6339 (migr_map->raid_level != 0) ||
6340 (migr_map->raid_level == map->raid_level))
6341 goto skip_disk_add;
6342
6343 if (u->new_disks[0] >= 0) {
6344 /* use passes spare
6345 */
6346 new_disk = get_disk_super(super,
6347 major(u->new_disks[0]),
6348 minor(u->new_disks[0]));
6349 dprintf("imsm: new disk for reshape is: %i:%i "
6350 "(%p, index = %i)\n",
6351 major(u->new_disks[0]),
6352 minor(u->new_disks[0]),
6353 new_disk, new_disk->index);
6354 if (new_disk == NULL)
6355 goto error_disk_add;
6356
6357 new_disk->index = map->num_members - 1;
6358 /* slot to fill in autolayout
6359 */
6360 new_disk->raiddisk = new_disk->index;
6361 new_disk->disk.status |= CONFIGURED_DISK;
6362 new_disk->disk.status &= ~SPARE_DISK;
6363 } else
6364 goto error_disk_add;
6365
6366skip_disk_add:
6367 *tofree = *space_list;
6368 /* calculate new size
6369 */
6370 imsm_set_array_size(new_dev);
6371
6372 ret_val = 1;
6373 }
6374 }
6375
6376 if (tofree)
6377 *space_list = tofree;
6378 return ret_val;
6379
6380error_disk_add:
6381 dprintf("Error: imsm: Cannot find disk.\n");
6382 return ret_val;
6383}
6384
6385
2e5dc010
N
6386static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
6387 struct intel_super *super,
6388 void ***space_list)
6389{
6390 struct dl *new_disk;
6391 struct intel_dev *id;
6392 int i;
6393 int delta_disks = u->new_raid_disks - u->old_raid_disks;
ee4beede 6394 int disk_count = u->old_raid_disks;
2e5dc010
N
6395 void **tofree = NULL;
6396 int devices_to_reshape = 1;
6397 struct imsm_super *mpb = super->anchor;
6398 int ret_val = 0;
d098291a 6399 unsigned int dev_id;
2e5dc010 6400
ed7333bd 6401 dprintf("imsm: apply_reshape_container_disks_update()\n");
2e5dc010
N
6402
6403 /* enable spares to use in array */
6404 for (i = 0; i < delta_disks; i++) {
6405 new_disk = get_disk_super(super,
6406 major(u->new_disks[i]),
6407 minor(u->new_disks[i]));
ed7333bd
AK
6408 dprintf("imsm: new disk for reshape is: %i:%i "
6409 "(%p, index = %i)\n",
2e5dc010
N
6410 major(u->new_disks[i]), minor(u->new_disks[i]),
6411 new_disk, new_disk->index);
6412 if ((new_disk == NULL) ||
6413 ((new_disk->index >= 0) &&
6414 (new_disk->index < u->old_raid_disks)))
6415 goto update_reshape_exit;
ee4beede 6416 new_disk->index = disk_count++;
2e5dc010
N
6417 /* slot to fill in autolayout
6418 */
6419 new_disk->raiddisk = new_disk->index;
6420 new_disk->disk.status |=
6421 CONFIGURED_DISK;
6422 new_disk->disk.status &= ~SPARE_DISK;
6423 }
6424
ed7333bd
AK
6425 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6426 mpb->num_raid_devs);
2e5dc010
N
6427 /* manage changes in volume
6428 */
d098291a 6429 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
2e5dc010
N
6430 void **sp = *space_list;
6431 struct imsm_dev *newdev;
6432 struct imsm_map *newmap, *oldmap;
6433
d098291a
AK
6434 for (id = super->devlist ; id; id = id->next) {
6435 if (id->index == dev_id)
6436 break;
6437 }
6438 if (id == NULL)
6439 break;
2e5dc010
N
6440 if (!sp)
6441 continue;
6442 *space_list = *sp;
6443 newdev = (void*)sp;
6444 /* Copy the dev, but not (all of) the map */
6445 memcpy(newdev, id->dev, sizeof(*newdev));
6446 oldmap = get_imsm_map(id->dev, 0);
6447 newmap = get_imsm_map(newdev, 0);
6448 /* Copy the current map */
6449 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6450 /* update one device only
6451 */
6452 if (devices_to_reshape) {
ed7333bd
AK
6453 dprintf("imsm: modifying subdev: %i\n",
6454 id->index);
2e5dc010
N
6455 devices_to_reshape--;
6456 newdev->vol.migr_state = 1;
6457 newdev->vol.curr_migr_unit = 0;
6458 newdev->vol.migr_type = MIGR_GEN_MIGR;
6459 newmap->num_members = u->new_raid_disks;
6460 for (i = 0; i < delta_disks; i++) {
6461 set_imsm_ord_tbl_ent(newmap,
6462 u->old_raid_disks + i,
6463 u->old_raid_disks + i);
6464 }
6465 /* New map is correct, now need to save old map
6466 */
6467 newmap = get_imsm_map(newdev, 1);
6468 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6469
70bdf0dc 6470 imsm_set_array_size(newdev);
2e5dc010
N
6471 }
6472
6473 sp = (void **)id->dev;
6474 id->dev = newdev;
6475 *sp = tofree;
6476 tofree = sp;
8e59f3d8
AK
6477
6478 /* Clear migration record */
6479 memset(super->migr_rec, 0, sizeof(struct migr_record));
2e5dc010 6480 }
819bc634
AK
6481 if (tofree)
6482 *space_list = tofree;
2e5dc010
N
6483 ret_val = 1;
6484
6485update_reshape_exit:
6486
6487 return ret_val;
6488}
6489
bb025c2f 6490static int apply_takeover_update(struct imsm_update_takeover *u,
8ca6df95
KW
6491 struct intel_super *super,
6492 void ***space_list)
bb025c2f
KW
6493{
6494 struct imsm_dev *dev = NULL;
8ca6df95
KW
6495 struct intel_dev *dv;
6496 struct imsm_dev *dev_new;
bb025c2f
KW
6497 struct imsm_map *map;
6498 struct dl *dm, *du;
8ca6df95 6499 int i;
bb025c2f
KW
6500
6501 for (dv = super->devlist; dv; dv = dv->next)
6502 if (dv->index == (unsigned int)u->subarray) {
6503 dev = dv->dev;
6504 break;
6505 }
6506
6507 if (dev == NULL)
6508 return 0;
6509
6510 map = get_imsm_map(dev, 0);
6511
6512 if (u->direction == R10_TO_R0) {
43d5ec18
KW
6513 /* Number of failed disks must be half of initial disk number */
6514 if (imsm_count_failed(super, dev) != (map->num_members / 2))
6515 return 0;
6516
bb025c2f
KW
6517 /* iterate through devices to mark removed disks as spare */
6518 for (dm = super->disks; dm; dm = dm->next) {
6519 if (dm->disk.status & FAILED_DISK) {
6520 int idx = dm->index;
6521 /* update indexes on the disk list */
6522/* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6523 the index values will end up being correct.... NB */
6524 for (du = super->disks; du; du = du->next)
6525 if (du->index > idx)
6526 du->index--;
6527 /* mark as spare disk */
6528 dm->disk.status = SPARE_DISK;
6529 dm->index = -1;
6530 }
6531 }
bb025c2f
KW
6532 /* update map */
6533 map->num_members = map->num_members / 2;
6534 map->map_state = IMSM_T_STATE_NORMAL;
6535 map->num_domains = 1;
6536 map->raid_level = 0;
6537 map->failed_disk_num = -1;
6538 }
6539
8ca6df95
KW
6540 if (u->direction == R0_TO_R10) {
6541 void **space;
6542 /* update slots in current disk list */
6543 for (dm = super->disks; dm; dm = dm->next) {
6544 if (dm->index >= 0)
6545 dm->index *= 2;
6546 }
6547 /* create new *missing* disks */
6548 for (i = 0; i < map->num_members; i++) {
6549 space = *space_list;
6550 if (!space)
6551 continue;
6552 *space_list = *space;
6553 du = (void *)space;
6554 memcpy(du, super->disks, sizeof(*du));
8ca6df95
KW
6555 du->fd = -1;
6556 du->minor = 0;
6557 du->major = 0;
6558 du->index = (i * 2) + 1;
6559 sprintf((char *)du->disk.serial,
6560 " MISSING_%d", du->index);
6561 sprintf((char *)du->serial,
6562 "MISSING_%d", du->index);
6563 du->next = super->missing;
6564 super->missing = du;
6565 }
6566 /* create new dev and map */
6567 space = *space_list;
6568 if (!space)
6569 return 0;
6570 *space_list = *space;
6571 dev_new = (void *)space;
6572 memcpy(dev_new, dev, sizeof(*dev));
6573 /* update new map */
6574 map = get_imsm_map(dev_new, 0);
8ca6df95 6575 map->num_members = map->num_members * 2;
1a2487c2 6576 map->map_state = IMSM_T_STATE_DEGRADED;
8ca6df95
KW
6577 map->num_domains = 2;
6578 map->raid_level = 1;
6579 /* replace dev<->dev_new */
6580 dv->dev = dev_new;
6581 }
bb025c2f
KW
6582 /* update disk order table */
6583 for (du = super->disks; du; du = du->next)
6584 if (du->index >= 0)
6585 set_imsm_ord_tbl_ent(map, du->index, du->index);
8ca6df95 6586 for (du = super->missing; du; du = du->next)
1a2487c2
KW
6587 if (du->index >= 0) {
6588 set_imsm_ord_tbl_ent(map, du->index, du->index);
6589 mark_missing(dev_new, &du->disk, du->index);
6590 }
bb025c2f
KW
6591
6592 return 1;
6593}
6594
e8319a19
DW
6595static void imsm_process_update(struct supertype *st,
6596 struct metadata_update *update)
6597{
6598 /**
6599 * crack open the metadata_update envelope to find the update record
6600 * update can be one of:
d195167d
AK
6601 * update_reshape_container_disks - all the arrays in the container
6602 * are being reshaped to have more devices. We need to mark
6603 * the arrays for general migration and convert selected spares
6604 * into active devices.
6605 * update_activate_spare - a spare device has replaced a failed
e8319a19
DW
6606 * device in an array, update the disk_ord_tbl. If this disk is
6607 * present in all member arrays then also clear the SPARE_DISK
6608 * flag
d195167d
AK
6609 * update_create_array
6610 * update_kill_array
6611 * update_rename_array
6612 * update_add_remove_disk
e8319a19
DW
6613 */
6614 struct intel_super *super = st->sb;
4d7b1503 6615 struct imsm_super *mpb;
e8319a19
DW
6616 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6617
4d7b1503
DW
6618 /* update requires a larger buf but the allocation failed */
6619 if (super->next_len && !super->next_buf) {
6620 super->next_len = 0;
6621 return;
6622 }
6623
6624 if (super->next_buf) {
6625 memcpy(super->next_buf, super->buf, super->len);
6626 free(super->buf);
6627 super->len = super->next_len;
6628 super->buf = super->next_buf;
6629
6630 super->next_len = 0;
6631 super->next_buf = NULL;
6632 }
6633
6634 mpb = super->anchor;
6635
e8319a19 6636 switch (type) {
bb025c2f
KW
6637 case update_takeover: {
6638 struct imsm_update_takeover *u = (void *)update->buf;
1a2487c2
KW
6639 if (apply_takeover_update(u, super, &update->space_list)) {
6640 imsm_update_version_info(super);
bb025c2f 6641 super->updates_pending++;
1a2487c2 6642 }
bb025c2f
KW
6643 break;
6644 }
6645
78b10e66 6646 case update_reshape_container_disks: {
d195167d 6647 struct imsm_update_reshape *u = (void *)update->buf;
2e5dc010
N
6648 if (apply_reshape_container_disks_update(
6649 u, super, &update->space_list))
6650 super->updates_pending++;
78b10e66
N
6651 break;
6652 }
48c5303a 6653 case update_reshape_migration: {
a29911da
PC
6654 struct imsm_update_reshape_migration *u = (void *)update->buf;
6655 if (apply_reshape_migration_update(
6656 u, super, &update->space_list))
6657 super->updates_pending++;
48c5303a
PC
6658 break;
6659 }
e8319a19
DW
6660 case update_activate_spare: {
6661 struct imsm_update_activate_spare *u = (void *) update->buf;
949c47a0 6662 struct imsm_dev *dev = get_imsm_dev(super, u->array);
a965f303 6663 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd 6664 struct imsm_map *migr_map;
e8319a19
DW
6665 struct active_array *a;
6666 struct imsm_disk *disk;
0c046afd 6667 __u8 to_state;
e8319a19 6668 struct dl *dl;
e8319a19 6669 unsigned int found;
0c046afd 6670 int failed;
98130f40 6671 int victim = get_imsm_disk_idx(dev, u->slot, -1);
e8319a19
DW
6672 int i;
6673
6674 for (dl = super->disks; dl; dl = dl->next)
d23fe947 6675 if (dl == u->dl)
e8319a19
DW
6676 break;
6677
6678 if (!dl) {
6679 fprintf(stderr, "error: imsm_activate_spare passed "
1f24f035
DW
6680 "an unknown disk (index: %d)\n",
6681 u->dl->index);
e8319a19
DW
6682 return;
6683 }
6684
6685 super->updates_pending++;
0c046afd
DW
6686 /* count failures (excluding rebuilds and the victim)
6687 * to determine map[0] state
6688 */
6689 failed = 0;
6690 for (i = 0; i < map->num_members; i++) {
6691 if (i == u->slot)
6692 continue;
98130f40
AK
6693 disk = get_imsm_disk(super,
6694 get_imsm_disk_idx(dev, i, -1));
25ed7e59 6695 if (!disk || is_failed(disk))
0c046afd
DW
6696 failed++;
6697 }
6698
d23fe947
DW
6699 /* adding a pristine spare, assign a new index */
6700 if (dl->index < 0) {
6701 dl->index = super->anchor->num_disks;
6702 super->anchor->num_disks++;
6703 }
d23fe947 6704 disk = &dl->disk;
f2f27e63
DW
6705 disk->status |= CONFIGURED_DISK;
6706 disk->status &= ~SPARE_DISK;
e8319a19 6707
0c046afd
DW
6708 /* mark rebuild */
6709 to_state = imsm_check_degraded(super, dev, failed);
6710 map->map_state = IMSM_T_STATE_DEGRADED;
8e59f3d8 6711 migrate(dev, super, to_state, MIGR_REBUILD);
0c046afd
DW
6712 migr_map = get_imsm_map(dev, 1);
6713 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6714 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6715
148acb7b
DW
6716 /* update the family_num to mark a new container
6717 * generation, being careful to record the existing
6718 * family_num in orig_family_num to clean up after
6719 * earlier mdadm versions that neglected to set it.
6720 */
6721 if (mpb->orig_family_num == 0)
6722 mpb->orig_family_num = mpb->family_num;
6723 mpb->family_num += super->random;
6724
e8319a19
DW
6725 /* count arrays using the victim in the metadata */
6726 found = 0;
6727 for (a = st->arrays; a ; a = a->next) {
949c47a0 6728 dev = get_imsm_dev(super, a->info.container_member);
620b1713
DW
6729 map = get_imsm_map(dev, 0);
6730
6731 if (get_imsm_disk_slot(map, victim) >= 0)
6732 found++;
e8319a19
DW
6733 }
6734
24565c9a 6735 /* delete the victim if it is no longer being
e8319a19
DW
6736 * utilized anywhere
6737 */
e8319a19 6738 if (!found) {
ae6aad82 6739 struct dl **dlp;
24565c9a 6740
47ee5a45
DW
6741 /* We know that 'manager' isn't touching anything,
6742 * so it is safe to delete
6743 */
24565c9a 6744 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
ae6aad82
DW
6745 if ((*dlp)->index == victim)
6746 break;
47ee5a45
DW
6747
6748 /* victim may be on the missing list */
6749 if (!*dlp)
6750 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
6751 if ((*dlp)->index == victim)
6752 break;
24565c9a 6753 imsm_delete(super, dlp, victim);
e8319a19 6754 }
8273f55e
DW
6755 break;
6756 }
6757 case update_create_array: {
6758 /* someone wants to create a new array, we need to be aware of
6759 * a few races/collisions:
6760 * 1/ 'Create' called by two separate instances of mdadm
6761 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6762 * devices that have since been assimilated via
6763 * activate_spare.
6764 * In the event this update can not be carried out mdadm will
6765 * (FIX ME) notice that its update did not take hold.
6766 */
6767 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 6768 struct intel_dev *dv;
8273f55e
DW
6769 struct imsm_dev *dev;
6770 struct imsm_map *map, *new_map;
6771 unsigned long long start, end;
6772 unsigned long long new_start, new_end;
6773 int i;
54c2c1ea
DW
6774 struct disk_info *inf;
6775 struct dl *dl;
8273f55e
DW
6776
6777 /* handle racing creates: first come first serve */
6778 if (u->dev_idx < mpb->num_raid_devs) {
6779 dprintf("%s: subarray %d already defined\n",
6780 __func__, u->dev_idx);
ba2de7ba 6781 goto create_error;
8273f55e
DW
6782 }
6783
6784 /* check update is next in sequence */
6785 if (u->dev_idx != mpb->num_raid_devs) {
6a3e913e
DW
6786 dprintf("%s: can not create array %d expected index %d\n",
6787 __func__, u->dev_idx, mpb->num_raid_devs);
ba2de7ba 6788 goto create_error;
8273f55e
DW
6789 }
6790
a965f303 6791 new_map = get_imsm_map(&u->dev, 0);
8273f55e
DW
6792 new_start = __le32_to_cpu(new_map->pba_of_lba0);
6793 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
54c2c1ea 6794 inf = get_disk_info(u);
8273f55e
DW
6795
6796 /* handle activate_spare versus create race:
6797 * check to make sure that overlapping arrays do not include
6798 * overalpping disks
6799 */
6800 for (i = 0; i < mpb->num_raid_devs; i++) {
949c47a0 6801 dev = get_imsm_dev(super, i);
a965f303 6802 map = get_imsm_map(dev, 0);
8273f55e
DW
6803 start = __le32_to_cpu(map->pba_of_lba0);
6804 end = start + __le32_to_cpu(map->blocks_per_member);
6805 if ((new_start >= start && new_start <= end) ||
6806 (start >= new_start && start <= new_end))
54c2c1ea
DW
6807 /* overlap */;
6808 else
6809 continue;
6810
6811 if (disks_overlap(super, i, u)) {
8273f55e 6812 dprintf("%s: arrays overlap\n", __func__);
ba2de7ba 6813 goto create_error;
8273f55e
DW
6814 }
6815 }
8273f55e 6816
949c47a0
DW
6817 /* check that prepare update was successful */
6818 if (!update->space) {
6819 dprintf("%s: prepare update failed\n", __func__);
ba2de7ba 6820 goto create_error;
949c47a0
DW
6821 }
6822
54c2c1ea
DW
6823 /* check that all disks are still active before committing
6824 * changes. FIXME: could we instead handle this by creating a
6825 * degraded array? That's probably not what the user expects,
6826 * so better to drop this update on the floor.
6827 */
6828 for (i = 0; i < new_map->num_members; i++) {
6829 dl = serial_to_dl(inf[i].serial, super);
6830 if (!dl) {
6831 dprintf("%s: disk disappeared\n", __func__);
ba2de7ba 6832 goto create_error;
54c2c1ea 6833 }
949c47a0
DW
6834 }
6835
8273f55e 6836 super->updates_pending++;
54c2c1ea
DW
6837
6838 /* convert spares to members and fixup ord_tbl */
6839 for (i = 0; i < new_map->num_members; i++) {
6840 dl = serial_to_dl(inf[i].serial, super);
6841 if (dl->index == -1) {
6842 dl->index = mpb->num_disks;
6843 mpb->num_disks++;
6844 dl->disk.status |= CONFIGURED_DISK;
6845 dl->disk.status &= ~SPARE_DISK;
6846 }
6847 set_imsm_ord_tbl_ent(new_map, i, dl->index);
6848 }
6849
ba2de7ba
DW
6850 dv = update->space;
6851 dev = dv->dev;
949c47a0
DW
6852 update->space = NULL;
6853 imsm_copy_dev(dev, &u->dev);
ba2de7ba
DW
6854 dv->index = u->dev_idx;
6855 dv->next = super->devlist;
6856 super->devlist = dv;
8273f55e 6857 mpb->num_raid_devs++;
8273f55e 6858
4d1313e9 6859 imsm_update_version_info(super);
8273f55e 6860 break;
ba2de7ba
DW
6861 create_error:
6862 /* mdmon knows how to release update->space, but not
6863 * ((struct intel_dev *) update->space)->dev
6864 */
6865 if (update->space) {
6866 dv = update->space;
6867 free(dv->dev);
6868 }
8273f55e 6869 break;
e8319a19 6870 }
33414a01
DW
6871 case update_kill_array: {
6872 struct imsm_update_kill_array *u = (void *) update->buf;
6873 int victim = u->dev_idx;
6874 struct active_array *a;
6875 struct intel_dev **dp;
6876 struct imsm_dev *dev;
6877
6878 /* sanity check that we are not affecting the uuid of
6879 * active arrays, or deleting an active array
6880 *
6881 * FIXME when immutable ids are available, but note that
6882 * we'll also need to fixup the invalidated/active
6883 * subarray indexes in mdstat
6884 */
6885 for (a = st->arrays; a; a = a->next)
6886 if (a->info.container_member >= victim)
6887 break;
6888 /* by definition if mdmon is running at least one array
6889 * is active in the container, so checking
6890 * mpb->num_raid_devs is just extra paranoia
6891 */
6892 dev = get_imsm_dev(super, victim);
6893 if (a || !dev || mpb->num_raid_devs == 1) {
6894 dprintf("failed to delete subarray-%d\n", victim);
6895 break;
6896 }
6897
6898 for (dp = &super->devlist; *dp;)
f21e18ca 6899 if ((*dp)->index == (unsigned)super->current_vol) {
33414a01
DW
6900 *dp = (*dp)->next;
6901 } else {
f21e18ca 6902 if ((*dp)->index > (unsigned)victim)
33414a01
DW
6903 (*dp)->index--;
6904 dp = &(*dp)->next;
6905 }
6906 mpb->num_raid_devs--;
6907 super->updates_pending++;
6908 break;
6909 }
aa534678
DW
6910 case update_rename_array: {
6911 struct imsm_update_rename_array *u = (void *) update->buf;
6912 char name[MAX_RAID_SERIAL_LEN+1];
6913 int target = u->dev_idx;
6914 struct active_array *a;
6915 struct imsm_dev *dev;
6916
6917 /* sanity check that we are not affecting the uuid of
6918 * an active array
6919 */
6920 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
6921 name[MAX_RAID_SERIAL_LEN] = '\0';
6922 for (a = st->arrays; a; a = a->next)
6923 if (a->info.container_member == target)
6924 break;
6925 dev = get_imsm_dev(super, u->dev_idx);
6926 if (a || !dev || !check_name(super, name, 1)) {
6927 dprintf("failed to rename subarray-%d\n", target);
6928 break;
6929 }
6930
cdbe98cd 6931 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
aa534678
DW
6932 super->updates_pending++;
6933 break;
6934 }
1a64be56 6935 case update_add_remove_disk: {
43dad3d6 6936 /* we may be able to repair some arrays if disks are
1a64be56
LM
6937 * being added, check teh status of add_remove_disk
6938 * if discs has been added.
6939 */
6940 if (add_remove_disk_update(super)) {
43dad3d6 6941 struct active_array *a;
072b727f
DW
6942
6943 super->updates_pending++;
1a64be56 6944 for (a = st->arrays; a; a = a->next)
43dad3d6
DW
6945 a->check_degraded = 1;
6946 }
43dad3d6 6947 break;
e8319a19 6948 }
1a64be56
LM
6949 default:
6950 fprintf(stderr, "error: unsuported process update type:"
6951 "(type: %d)\n", type);
6952 }
e8319a19 6953}
88758e9d 6954
bc0b9d34
PC
6955static struct mdinfo *get_spares_for_grow(struct supertype *st);
6956
8273f55e
DW
6957static void imsm_prepare_update(struct supertype *st,
6958 struct metadata_update *update)
6959{
949c47a0 6960 /**
4d7b1503
DW
6961 * Allocate space to hold new disk entries, raid-device entries or a new
6962 * mpb if necessary. The manager synchronously waits for updates to
6963 * complete in the monitor, so new mpb buffers allocated here can be
6964 * integrated by the monitor thread without worrying about live pointers
6965 * in the manager thread.
8273f55e 6966 */
949c47a0 6967 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
4d7b1503
DW
6968 struct intel_super *super = st->sb;
6969 struct imsm_super *mpb = super->anchor;
6970 size_t buf_len;
6971 size_t len = 0;
949c47a0
DW
6972
6973 switch (type) {
abedf5fc
KW
6974 case update_takeover: {
6975 struct imsm_update_takeover *u = (void *)update->buf;
6976 if (u->direction == R0_TO_R10) {
6977 void **tail = (void **)&update->space_list;
6978 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
6979 struct imsm_map *map = get_imsm_map(dev, 0);
6980 int num_members = map->num_members;
6981 void *space;
6982 int size, i;
6983 int err = 0;
6984 /* allocate memory for added disks */
6985 for (i = 0; i < num_members; i++) {
6986 size = sizeof(struct dl);
6987 space = malloc(size);
6988 if (!space) {
6989 err++;
6990 break;
6991 }
6992 *tail = space;
6993 tail = space;
6994 *tail = NULL;
6995 }
6996 /* allocate memory for new device */
6997 size = sizeof_imsm_dev(super->devlist->dev, 0) +
6998 (num_members * sizeof(__u32));
6999 space = malloc(size);
7000 if (!space)
7001 err++;
7002 else {
7003 *tail = space;
7004 tail = space;
7005 *tail = NULL;
7006 }
7007 if (!err) {
7008 len = disks_to_mpb_size(num_members * 2);
7009 } else {
7010 /* if allocation didn't success, free buffer */
7011 while (update->space_list) {
7012 void **sp = update->space_list;
7013 update->space_list = *sp;
7014 free(sp);
7015 }
7016 }
7017 }
7018
7019 break;
7020 }
78b10e66 7021 case update_reshape_container_disks: {
d195167d
AK
7022 /* Every raid device in the container is about to
7023 * gain some more devices, and we will enter a
7024 * reconfiguration.
7025 * So each 'imsm_map' will be bigger, and the imsm_vol
7026 * will now hold 2 of them.
7027 * Thus we need new 'struct imsm_dev' allocations sized
7028 * as sizeof_imsm_dev but with more devices in both maps.
7029 */
7030 struct imsm_update_reshape *u = (void *)update->buf;
7031 struct intel_dev *dl;
7032 void **space_tail = (void**)&update->space_list;
7033
7034 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7035
7036 for (dl = super->devlist; dl; dl = dl->next) {
7037 int size = sizeof_imsm_dev(dl->dev, 1);
7038 void *s;
d677e0b8
AK
7039 if (u->new_raid_disks > u->old_raid_disks)
7040 size += sizeof(__u32)*2*
7041 (u->new_raid_disks - u->old_raid_disks);
d195167d
AK
7042 s = malloc(size);
7043 if (!s)
7044 break;
7045 *space_tail = s;
7046 space_tail = s;
7047 *space_tail = NULL;
7048 }
7049
7050 len = disks_to_mpb_size(u->new_raid_disks);
7051 dprintf("New anchor length is %llu\n", (unsigned long long)len);
78b10e66
N
7052 break;
7053 }
48c5303a 7054 case update_reshape_migration: {
bc0b9d34
PC
7055 /* for migration level 0->5 we need to add disks
7056 * so the same as for container operation we will copy
7057 * device to the bigger location.
7058 * in memory prepared device and new disk area are prepared
7059 * for usage in process update
7060 */
7061 struct imsm_update_reshape_migration *u = (void *)update->buf;
7062 struct intel_dev *id;
7063 void **space_tail = (void **)&update->space_list;
7064 int size;
7065 void *s;
7066 int current_level = -1;
7067
7068 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7069
7070 /* add space for bigger array in update
7071 */
7072 for (id = super->devlist; id; id = id->next) {
7073 if (id->index == (unsigned)u->subdev) {
7074 size = sizeof_imsm_dev(id->dev, 1);
7075 if (u->new_raid_disks > u->old_raid_disks)
7076 size += sizeof(__u32)*2*
7077 (u->new_raid_disks - u->old_raid_disks);
7078 s = malloc(size);
7079 if (!s)
7080 break;
7081 *space_tail = s;
7082 space_tail = s;
7083 *space_tail = NULL;
7084 break;
7085 }
7086 }
7087 if (update->space_list == NULL)
7088 break;
7089
7090 /* add space for disk in update
7091 */
7092 size = sizeof(struct dl);
7093 s = malloc(size);
7094 if (!s) {
7095 free(update->space_list);
7096 update->space_list = NULL;
7097 break;
7098 }
7099 *space_tail = s;
7100 space_tail = s;
7101 *space_tail = NULL;
7102
7103 /* add spare device to update
7104 */
7105 for (id = super->devlist ; id; id = id->next)
7106 if (id->index == (unsigned)u->subdev) {
7107 struct imsm_dev *dev;
7108 struct imsm_map *map;
7109
7110 dev = get_imsm_dev(super, u->subdev);
7111 map = get_imsm_map(dev, 0);
7112 current_level = map->raid_level;
7113 break;
7114 }
7115 if ((u->new_level == 5) && (u->new_level != current_level)) {
7116 struct mdinfo *spares;
7117
7118 spares = get_spares_for_grow(st);
7119 if (spares) {
7120 struct dl *dl;
7121 struct mdinfo *dev;
7122
7123 dev = spares->devs;
7124 if (dev) {
7125 u->new_disks[0] =
7126 makedev(dev->disk.major,
7127 dev->disk.minor);
7128 dl = get_disk_super(super,
7129 dev->disk.major,
7130 dev->disk.minor);
7131 dl->index = u->old_raid_disks;
7132 dev = dev->next;
7133 }
7134 sysfs_free(spares);
7135 }
7136 }
7137 len = disks_to_mpb_size(u->new_raid_disks);
7138 dprintf("New anchor length is %llu\n", (unsigned long long)len);
48c5303a
PC
7139 break;
7140 }
949c47a0
DW
7141 case update_create_array: {
7142 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 7143 struct intel_dev *dv;
54c2c1ea
DW
7144 struct imsm_dev *dev = &u->dev;
7145 struct imsm_map *map = get_imsm_map(dev, 0);
7146 struct dl *dl;
7147 struct disk_info *inf;
7148 int i;
7149 int activate = 0;
949c47a0 7150
54c2c1ea
DW
7151 inf = get_disk_info(u);
7152 len = sizeof_imsm_dev(dev, 1);
ba2de7ba
DW
7153 /* allocate a new super->devlist entry */
7154 dv = malloc(sizeof(*dv));
7155 if (dv) {
7156 dv->dev = malloc(len);
7157 if (dv->dev)
7158 update->space = dv;
7159 else {
7160 free(dv);
7161 update->space = NULL;
7162 }
7163 }
949c47a0 7164
54c2c1ea
DW
7165 /* count how many spares will be converted to members */
7166 for (i = 0; i < map->num_members; i++) {
7167 dl = serial_to_dl(inf[i].serial, super);
7168 if (!dl) {
7169 /* hmm maybe it failed?, nothing we can do about
7170 * it here
7171 */
7172 continue;
7173 }
7174 if (count_memberships(dl, super) == 0)
7175 activate++;
7176 }
7177 len += activate * sizeof(struct imsm_disk);
949c47a0
DW
7178 break;
7179 default:
7180 break;
7181 }
7182 }
8273f55e 7183
4d7b1503
DW
7184 /* check if we need a larger metadata buffer */
7185 if (super->next_buf)
7186 buf_len = super->next_len;
7187 else
7188 buf_len = super->len;
7189
7190 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
7191 /* ok we need a larger buf than what is currently allocated
7192 * if this allocation fails process_update will notice that
7193 * ->next_len is set and ->next_buf is NULL
7194 */
7195 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
7196 if (super->next_buf)
7197 free(super->next_buf);
7198
7199 super->next_len = buf_len;
1f45a8ad
DW
7200 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
7201 memset(super->next_buf, 0, buf_len);
7202 else
4d7b1503
DW
7203 super->next_buf = NULL;
7204 }
8273f55e
DW
7205}
7206
ae6aad82 7207/* must be called while manager is quiesced */
f21e18ca 7208static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
ae6aad82
DW
7209{
7210 struct imsm_super *mpb = super->anchor;
ae6aad82
DW
7211 struct dl *iter;
7212 struct imsm_dev *dev;
7213 struct imsm_map *map;
24565c9a
DW
7214 int i, j, num_members;
7215 __u32 ord;
ae6aad82 7216
24565c9a
DW
7217 dprintf("%s: deleting device[%d] from imsm_super\n",
7218 __func__, index);
ae6aad82
DW
7219
7220 /* shift all indexes down one */
7221 for (iter = super->disks; iter; iter = iter->next)
f21e18ca 7222 if (iter->index > (int)index)
ae6aad82 7223 iter->index--;
47ee5a45 7224 for (iter = super->missing; iter; iter = iter->next)
f21e18ca 7225 if (iter->index > (int)index)
47ee5a45 7226 iter->index--;
ae6aad82
DW
7227
7228 for (i = 0; i < mpb->num_raid_devs; i++) {
7229 dev = get_imsm_dev(super, i);
7230 map = get_imsm_map(dev, 0);
24565c9a
DW
7231 num_members = map->num_members;
7232 for (j = 0; j < num_members; j++) {
7233 /* update ord entries being careful not to propagate
7234 * ord-flags to the first map
7235 */
98130f40 7236 ord = get_imsm_ord_tbl_ent(dev, j, -1);
ae6aad82 7237
24565c9a
DW
7238 if (ord_to_idx(ord) <= index)
7239 continue;
ae6aad82 7240
24565c9a
DW
7241 map = get_imsm_map(dev, 0);
7242 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
7243 map = get_imsm_map(dev, 1);
7244 if (map)
7245 set_imsm_ord_tbl_ent(map, j, ord - 1);
ae6aad82
DW
7246 }
7247 }
7248
7249 mpb->num_disks--;
7250 super->updates_pending++;
24565c9a
DW
7251 if (*dlp) {
7252 struct dl *dl = *dlp;
7253
7254 *dlp = (*dlp)->next;
7255 __free_imsm_disk(dl);
7256 }
ae6aad82
DW
7257}
7258
2cda7640
ML
7259static char disk_by_path[] = "/dev/disk/by-path/";
7260
7261static const char *imsm_get_disk_controller_domain(const char *path)
7262{
2cda7640 7263 char disk_path[PATH_MAX];
96234762
LM
7264 char *drv=NULL;
7265 struct stat st;
2cda7640 7266
96234762
LM
7267 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
7268 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
7269 if (stat(disk_path, &st) == 0) {
7270 struct sys_dev* hba;
7271 char *path=NULL;
7272
7273 path = devt_to_devpath(st.st_rdev);
7274 if (path == NULL)
7275 return "unknown";
7276 hba = find_disk_attached_hba(-1, path);
7277 if (hba && hba->type == SYS_DEV_SAS)
7278 drv = "isci";
7279 else if (hba && hba->type == SYS_DEV_SATA)
7280 drv = "ahci";
7281 else
7282 drv = "unknown";
7283 dprintf("path: %s hba: %s attached: %s\n",
7284 path, (hba) ? hba->path : "NULL", drv);
7285 free(path);
7286 if (hba)
7287 free_sys_dev(&hba);
2cda7640 7288 }
96234762 7289 return drv;
2cda7640
ML
7290}
7291
78b10e66
N
7292static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
7293{
7294 char subdev_name[20];
7295 struct mdstat_ent *mdstat;
7296
7297 sprintf(subdev_name, "%d", subdev);
7298 mdstat = mdstat_by_subdev(subdev_name, container);
7299 if (!mdstat)
7300 return -1;
7301
7302 *minor = mdstat->devnum;
7303 free_mdstat(mdstat);
7304 return 0;
7305}
7306
7307static int imsm_reshape_is_allowed_on_container(struct supertype *st,
7308 struct geo_params *geo,
7309 int *old_raid_disks)
7310{
694575e7
KW
7311 /* currently we only support increasing the number of devices
7312 * for a container. This increases the number of device for each
7313 * member array. They must all be RAID0 or RAID5.
7314 */
78b10e66
N
7315 int ret_val = 0;
7316 struct mdinfo *info, *member;
7317 int devices_that_can_grow = 0;
7318
7319 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7320 "st->devnum = (%i)\n",
7321 st->devnum);
7322
7323 if (geo->size != -1 ||
7324 geo->level != UnSet ||
7325 geo->layout != UnSet ||
7326 geo->chunksize != 0 ||
7327 geo->raid_disks == UnSet) {
7328 dprintf("imsm: Container operation is allowed for "
7329 "raid disks number change only.\n");
7330 return ret_val;
7331 }
7332
7333 info = container_content_imsm(st, NULL);
7334 for (member = info; member; member = member->next) {
7335 int result;
7336 int minor;
7337
7338 dprintf("imsm: checking device_num: %i\n",
7339 member->container_member);
7340
d7d205bd 7341 if (geo->raid_disks <= member->array.raid_disks) {
78b10e66
N
7342 /* we work on container for Online Capacity Expansion
7343 * only so raid_disks has to grow
7344 */
7345 dprintf("imsm: for container operation raid disks "
7346 "increase is required\n");
7347 break;
7348 }
7349
7350 if ((info->array.level != 0) &&
7351 (info->array.level != 5)) {
7352 /* we cannot use this container with other raid level
7353 */
690aae1a 7354 dprintf("imsm: for container operation wrong"
78b10e66
N
7355 " raid level (%i) detected\n",
7356 info->array.level);
7357 break;
7358 } else {
7359 /* check for platform support
7360 * for this raid level configuration
7361 */
7362 struct intel_super *super = st->sb;
7363 if (!is_raid_level_supported(super->orom,
7364 member->array.level,
7365 geo->raid_disks)) {
690aae1a 7366 dprintf("platform does not support raid%d with"
78b10e66
N
7367 " %d disk%s\n",
7368 info->array.level,
7369 geo->raid_disks,
7370 geo->raid_disks > 1 ? "s" : "");
7371 break;
7372 }
2a4a08e7
AK
7373 /* check if component size is aligned to chunk size
7374 */
7375 if (info->component_size %
7376 (info->array.chunk_size/512)) {
7377 dprintf("Component size is not aligned to "
7378 "chunk size\n");
7379 break;
7380 }
78b10e66
N
7381 }
7382
7383 if (*old_raid_disks &&
7384 info->array.raid_disks != *old_raid_disks)
7385 break;
7386 *old_raid_disks = info->array.raid_disks;
7387
7388 /* All raid5 and raid0 volumes in container
7389 * have to be ready for Online Capacity Expansion
7390 * so they need to be assembled. We have already
7391 * checked that no recovery etc is happening.
7392 */
7393 result = imsm_find_array_minor_by_subdev(member->container_member,
7394 st->container_dev,
7395 &minor);
7396 if (result < 0) {
7397 dprintf("imsm: cannot find array\n");
7398 break;
7399 }
7400 devices_that_can_grow++;
7401 }
7402 sysfs_free(info);
7403 if (!member && devices_that_can_grow)
7404 ret_val = 1;
7405
7406 if (ret_val)
7407 dprintf("\tContainer operation allowed\n");
7408 else
7409 dprintf("\tError: %i\n", ret_val);
7410
7411 return ret_val;
7412}
7413
7414/* Function: get_spares_for_grow
7415 * Description: Allocates memory and creates list of spare devices
7416 * avaliable in container. Checks if spare drive size is acceptable.
7417 * Parameters: Pointer to the supertype structure
7418 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7419 * NULL if fail
7420 */
7421static struct mdinfo *get_spares_for_grow(struct supertype *st)
7422{
78b10e66 7423 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
326727d9 7424 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
78b10e66
N
7425}
7426
7427/******************************************************************************
7428 * function: imsm_create_metadata_update_for_reshape
7429 * Function creates update for whole IMSM container.
7430 *
7431 ******************************************************************************/
7432static int imsm_create_metadata_update_for_reshape(
7433 struct supertype *st,
7434 struct geo_params *geo,
7435 int old_raid_disks,
7436 struct imsm_update_reshape **updatep)
7437{
7438 struct intel_super *super = st->sb;
7439 struct imsm_super *mpb = super->anchor;
7440 int update_memory_size = 0;
7441 struct imsm_update_reshape *u = NULL;
7442 struct mdinfo *spares = NULL;
7443 int i;
7444 int delta_disks = 0;
bbd24d86 7445 struct mdinfo *dev;
78b10e66
N
7446
7447 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7448 geo->raid_disks);
7449
7450 delta_disks = geo->raid_disks - old_raid_disks;
7451
7452 /* size of all update data without anchor */
7453 update_memory_size = sizeof(struct imsm_update_reshape);
7454
7455 /* now add space for spare disks that we need to add. */
7456 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
7457
7458 u = calloc(1, update_memory_size);
7459 if (u == NULL) {
7460 dprintf("error: "
7461 "cannot get memory for imsm_update_reshape update\n");
7462 return 0;
7463 }
7464 u->type = update_reshape_container_disks;
7465 u->old_raid_disks = old_raid_disks;
7466 u->new_raid_disks = geo->raid_disks;
7467
7468 /* now get spare disks list
7469 */
7470 spares = get_spares_for_grow(st);
7471
7472 if (spares == NULL
7473 || delta_disks > spares->array.spare_disks) {
e14e5960
KW
7474 fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices "
7475 "for %s.\n", geo->dev_name);
78b10e66
N
7476 goto abort;
7477 }
7478
7479 /* we have got spares
7480 * update disk list in imsm_disk list table in anchor
7481 */
7482 dprintf("imsm: %i spares are available.\n\n",
7483 spares->array.spare_disks);
7484
bbd24d86 7485 dev = spares->devs;
78b10e66 7486 for (i = 0; i < delta_disks; i++) {
78b10e66
N
7487 struct dl *dl;
7488
bbd24d86
AK
7489 if (dev == NULL)
7490 break;
78b10e66
N
7491 u->new_disks[i] = makedev(dev->disk.major,
7492 dev->disk.minor);
7493 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
ee4beede
AK
7494 dl->index = mpb->num_disks;
7495 mpb->num_disks++;
bbd24d86 7496 dev = dev->next;
78b10e66 7497 }
78b10e66
N
7498
7499abort:
7500 /* free spares
7501 */
7502 sysfs_free(spares);
7503
d677e0b8 7504 dprintf("imsm: reshape update preparation :");
78b10e66 7505 if (i == delta_disks) {
d677e0b8 7506 dprintf(" OK\n");
78b10e66
N
7507 *updatep = u;
7508 return update_memory_size;
7509 }
7510 free(u);
d677e0b8 7511 dprintf(" Error\n");
78b10e66
N
7512
7513 return 0;
7514}
7515
48c5303a
PC
7516/******************************************************************************
7517 * function: imsm_create_metadata_update_for_migration()
7518 * Creates update for IMSM array.
7519 *
7520 ******************************************************************************/
7521static int imsm_create_metadata_update_for_migration(
7522 struct supertype *st,
7523 struct geo_params *geo,
7524 struct imsm_update_reshape_migration **updatep)
7525{
7526 struct intel_super *super = st->sb;
7527 int update_memory_size = 0;
7528 struct imsm_update_reshape_migration *u = NULL;
7529 struct imsm_dev *dev;
7530 int previous_level = -1;
7531
7532 dprintf("imsm_create_metadata_update_for_migration(enter)"
7533 " New Level = %i\n", geo->level);
7534
7535 /* size of all update data without anchor */
7536 update_memory_size = sizeof(struct imsm_update_reshape_migration);
7537
7538 u = calloc(1, update_memory_size);
7539 if (u == NULL) {
7540 dprintf("error: cannot get memory for "
7541 "imsm_create_metadata_update_for_migration\n");
7542 return 0;
7543 }
7544 u->type = update_reshape_migration;
7545 u->subdev = super->current_vol;
7546 u->new_level = geo->level;
7547 u->new_layout = geo->layout;
7548 u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
7549 u->new_disks[0] = -1;
4bba0439 7550 u->new_chunksize = -1;
48c5303a
PC
7551
7552 dev = get_imsm_dev(super, u->subdev);
7553 if (dev) {
7554 struct imsm_map *map;
7555
7556 map = get_imsm_map(dev, 0);
4bba0439
PC
7557 if (map) {
7558 int current_chunk_size =
7559 __le16_to_cpu(map->blocks_per_strip) / 2;
7560
7561 if (geo->chunksize != current_chunk_size) {
7562 u->new_chunksize = geo->chunksize / 1024;
7563 dprintf("imsm: "
7564 "chunk size change from %i to %i\n",
7565 current_chunk_size, u->new_chunksize);
7566 }
48c5303a 7567 previous_level = map->raid_level;
4bba0439 7568 }
48c5303a
PC
7569 }
7570 if ((geo->level == 5) && (previous_level == 0)) {
7571 struct mdinfo *spares = NULL;
7572
7573 u->new_raid_disks++;
7574 spares = get_spares_for_grow(st);
7575 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
7576 free(u);
7577 sysfs_free(spares);
7578 update_memory_size = 0;
7579 dprintf("error: cannot get spare device "
7580 "for requested migration");
7581 return 0;
7582 }
7583 sysfs_free(spares);
7584 }
7585 dprintf("imsm: reshape update preparation : OK\n");
7586 *updatep = u;
7587
7588 return update_memory_size;
7589}
7590
8dd70bce
AK
7591static void imsm_update_metadata_locally(struct supertype *st,
7592 void *buf, int len)
7593{
7594 struct metadata_update mu;
7595
7596 mu.buf = buf;
7597 mu.len = len;
7598 mu.space = NULL;
7599 mu.space_list = NULL;
7600 mu.next = NULL;
7601 imsm_prepare_update(st, &mu);
7602 imsm_process_update(st, &mu);
7603
7604 while (mu.space_list) {
7605 void **space = mu.space_list;
7606 mu.space_list = *space;
7607 free(space);
7608 }
7609}
78b10e66 7610
471bceb6 7611/***************************************************************************
694575e7 7612* Function: imsm_analyze_change
471bceb6
KW
7613* Description: Function analyze change for single volume
7614* and validate if transition is supported
694575e7
KW
7615* Parameters: Geometry parameters, supertype structure
7616* Returns: Operation type code on success, -1 if fail
471bceb6
KW
7617****************************************************************************/
7618enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
7619 struct geo_params *geo)
694575e7 7620{
471bceb6
KW
7621 struct mdinfo info;
7622 int change = -1;
7623 int check_devs = 0;
c21e737b 7624 int chunk;
471bceb6
KW
7625
7626 getinfo_super_imsm_volume(st, &info, NULL);
7627
7628 if ((geo->level != info.array.level) &&
7629 (geo->level >= 0) &&
7630 (geo->level != UnSet)) {
7631 switch (info.array.level) {
7632 case 0:
7633 if (geo->level == 5) {
b5347799 7634 change = CH_MIGRATION;
471bceb6
KW
7635 check_devs = 1;
7636 }
7637 if (geo->level == 10) {
7638 change = CH_TAKEOVER;
7639 check_devs = 1;
7640 }
dfe77a9e
KW
7641 break;
7642 case 1:
7643 if (geo->level == 0) {
7644 change = CH_TAKEOVER;
7645 check_devs = 1;
7646 }
471bceb6 7647 break;
471bceb6
KW
7648 case 10:
7649 if (geo->level == 0) {
7650 change = CH_TAKEOVER;
7651 check_devs = 1;
7652 }
7653 break;
7654 }
7655 if (change == -1) {
7656 fprintf(stderr,
7657 Name " Error. Level Migration from %d to %d "
7658 "not supported!\n",
7659 info.array.level, geo->level);
7660 goto analyse_change_exit;
7661 }
7662 } else
7663 geo->level = info.array.level;
7664
7665 if ((geo->layout != info.array.layout)
7666 && ((geo->layout != UnSet) && (geo->layout != -1))) {
b5347799 7667 change = CH_MIGRATION;
471bceb6
KW
7668 if ((info.array.layout == 0)
7669 && (info.array.level == 5)
7670 && (geo->layout == 5)) {
7671 /* reshape 5 -> 4 */
7672 } else if ((info.array.layout == 5)
7673 && (info.array.level == 5)
7674 && (geo->layout == 0)) {
7675 /* reshape 4 -> 5 */
7676 geo->layout = 0;
7677 geo->level = 5;
7678 } else {
7679 fprintf(stderr,
7680 Name " Error. Layout Migration from %d to %d "
7681 "not supported!\n",
7682 info.array.layout, geo->layout);
7683 change = -1;
7684 goto analyse_change_exit;
7685 }
7686 } else
7687 geo->layout = info.array.layout;
7688
7689 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
7690 && (geo->chunksize != info.array.chunk_size))
b5347799 7691 change = CH_MIGRATION;
471bceb6
KW
7692 else
7693 geo->chunksize = info.array.chunk_size;
7694
c21e737b 7695 chunk = geo->chunksize / 1024;
471bceb6
KW
7696 if (!validate_geometry_imsm(st,
7697 geo->level,
7698 geo->layout,
7699 geo->raid_disks,
c21e737b 7700 &chunk,
471bceb6
KW
7701 geo->size,
7702 0, 0, 1))
7703 change = -1;
7704
7705 if (check_devs) {
7706 struct intel_super *super = st->sb;
7707 struct imsm_super *mpb = super->anchor;
7708
7709 if (mpb->num_raid_devs > 1) {
7710 fprintf(stderr,
7711 Name " Error. Cannot perform operation on %s"
7712 "- for this operation it MUST be single "
7713 "array in container\n",
7714 geo->dev_name);
7715 change = -1;
7716 }
7717 }
7718
7719analyse_change_exit:
7720
7721 return change;
694575e7
KW
7722}
7723
bb025c2f
KW
7724int imsm_takeover(struct supertype *st, struct geo_params *geo)
7725{
7726 struct intel_super *super = st->sb;
7727 struct imsm_update_takeover *u;
7728
7729 u = malloc(sizeof(struct imsm_update_takeover));
7730 if (u == NULL)
7731 return 1;
7732
7733 u->type = update_takeover;
7734 u->subarray = super->current_vol;
7735
7736 /* 10->0 transition */
7737 if (geo->level == 0)
7738 u->direction = R10_TO_R0;
7739
0529c688
KW
7740 /* 0->10 transition */
7741 if (geo->level == 10)
7742 u->direction = R0_TO_R10;
7743
bb025c2f
KW
7744 /* update metadata locally */
7745 imsm_update_metadata_locally(st, u,
7746 sizeof(struct imsm_update_takeover));
7747 /* and possibly remotely */
7748 if (st->update_tail)
7749 append_metadata_update(st, u,
7750 sizeof(struct imsm_update_takeover));
7751 else
7752 free(u);
7753
7754 return 0;
7755}
7756
6dc0be30
AK
7757static int warn_user_about_risk(void)
7758{
7759 int rv = 0;
7760
7761 fprintf(stderr,
7762 "\nThis is an experimental feature. Data on the RAID volume(s) "
7763 "can be lost!!!\n\n"
7764 "To continue command execution please make sure that\n"
7765 "the grow process will not be interrupted. Use safe power\n"
7766 "supply to avoid unexpected system reboot. Make sure that\n"
7767 "reshaped container is not assembled automatically during\n"
7768 "system boot.\n"
7769 "If reshape is interrupted, assemble array manually\n"
7770 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
7771 "Assembly in scan mode is not possible in such case.\n"
7772 "Growing container with boot array is not possible.\n"
7773 "If boot array reshape is interrupted, whole file system\n"
7774 "can be lost.\n\n");
7775 rv = ask("Do you want to continue? ");
7776 fprintf(stderr, "\n");
7777
7778 return rv;
7779}
7780
78b10e66
N
7781static int imsm_reshape_super(struct supertype *st, long long size, int level,
7782 int layout, int chunksize, int raid_disks,
41784c88
AK
7783 int delta_disks, char *backup, char *dev,
7784 int verbose)
78b10e66 7785{
78b10e66
N
7786 int ret_val = 1;
7787 struct geo_params geo;
7788
7789 dprintf("imsm: reshape_super called.\n");
7790
71204a50 7791 memset(&geo, 0, sizeof(struct geo_params));
78b10e66
N
7792
7793 geo.dev_name = dev;
694575e7 7794 geo.dev_id = st->devnum;
78b10e66
N
7795 geo.size = size;
7796 geo.level = level;
7797 geo.layout = layout;
7798 geo.chunksize = chunksize;
7799 geo.raid_disks = raid_disks;
41784c88
AK
7800 if (delta_disks != UnSet)
7801 geo.raid_disks += delta_disks;
78b10e66
N
7802
7803 dprintf("\tfor level : %i\n", geo.level);
7804 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
7805
7806 if (experimental() == 0)
7807 return ret_val;
7808
78b10e66 7809 if (st->container_dev == st->devnum) {
694575e7
KW
7810 /* On container level we can only increase number of devices. */
7811 dprintf("imsm: info: Container operation\n");
78b10e66 7812 int old_raid_disks = 0;
6dc0be30
AK
7813
7814 /* this warning will be removed when imsm checkpointing
7815 * will be implemented, and restoring from check-point
7816 * operation will be transparent for reboot process
7817 */
7818 if (warn_user_about_risk() == 0)
7819 return ret_val;
7820
78b10e66
N
7821 if (imsm_reshape_is_allowed_on_container(
7822 st, &geo, &old_raid_disks)) {
7823 struct imsm_update_reshape *u = NULL;
7824 int len;
7825
7826 len = imsm_create_metadata_update_for_reshape(
7827 st, &geo, old_raid_disks, &u);
7828
ed08d51c
AK
7829 if (len <= 0) {
7830 dprintf("imsm: Cannot prepare update\n");
7831 goto exit_imsm_reshape_super;
7832 }
7833
8dd70bce
AK
7834 ret_val = 0;
7835 /* update metadata locally */
7836 imsm_update_metadata_locally(st, u, len);
7837 /* and possibly remotely */
7838 if (st->update_tail)
7839 append_metadata_update(st, u, len);
7840 else
ed08d51c 7841 free(u);
8dd70bce 7842
694575e7 7843 } else {
e7ff7e40
AK
7844 fprintf(stderr, Name ": (imsm) Operation "
7845 "is not allowed on this container\n");
694575e7
KW
7846 }
7847 } else {
7848 /* On volume level we support following operations
471bceb6
KW
7849 * - takeover: raid10 -> raid0; raid0 -> raid10
7850 * - chunk size migration
7851 * - migration: raid5 -> raid0; raid0 -> raid5
7852 */
7853 struct intel_super *super = st->sb;
7854 struct intel_dev *dev = super->devlist;
7855 int change, devnum;
694575e7 7856 dprintf("imsm: info: Volume operation\n");
471bceb6
KW
7857 /* find requested device */
7858 while (dev) {
7859 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
7860 if (devnum == geo.dev_id)
7861 break;
7862 dev = dev->next;
7863 }
7864 if (dev == NULL) {
7865 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
7866 geo.dev_name, geo.dev_id);
7867 goto exit_imsm_reshape_super;
7868 }
7869 super->current_vol = dev->index;
694575e7
KW
7870 change = imsm_analyze_change(st, &geo);
7871 switch (change) {
471bceb6 7872 case CH_TAKEOVER:
bb025c2f 7873 ret_val = imsm_takeover(st, &geo);
694575e7 7874 break;
48c5303a
PC
7875 case CH_MIGRATION: {
7876 struct imsm_update_reshape_migration *u = NULL;
7877 int len =
7878 imsm_create_metadata_update_for_migration(
7879 st, &geo, &u);
7880 if (len < 1) {
7881 dprintf("imsm: "
7882 "Cannot prepare update\n");
7883 break;
7884 }
471bceb6 7885 ret_val = 0;
48c5303a
PC
7886 /* update metadata locally */
7887 imsm_update_metadata_locally(st, u, len);
7888 /* and possibly remotely */
7889 if (st->update_tail)
7890 append_metadata_update(st, u, len);
7891 else
7892 free(u);
7893 }
7894 break;
471bceb6
KW
7895 default:
7896 ret_val = 1;
694575e7 7897 }
694575e7 7898 }
78b10e66 7899
ed08d51c 7900exit_imsm_reshape_super:
78b10e66
N
7901 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
7902 return ret_val;
7903}
2cda7640 7904
999b4972
N
7905static int imsm_manage_reshape(
7906 int afd, struct mdinfo *sra, struct reshape *reshape,
7907 struct supertype *st, unsigned long stripes,
7908 int *fds, unsigned long long *offsets,
7909 int dests, int *destfd, unsigned long long *destoffsets)
7910{
7911 /* Just use child_monitor for now */
7912 return child_monitor(
7913 afd, sra, reshape, st, stripes,
7914 fds, offsets, dests, destfd, destoffsets);
7915}
71204a50 7916#endif /* MDASSEMBLE */
999b4972 7917
cdddbdbc
DW
7918struct superswitch super_imsm = {
7919#ifndef MDASSEMBLE
7920 .examine_super = examine_super_imsm,
7921 .brief_examine_super = brief_examine_super_imsm,
4737ae25 7922 .brief_examine_subarrays = brief_examine_subarrays_imsm,
9d84c8ea 7923 .export_examine_super = export_examine_super_imsm,
cdddbdbc
DW
7924 .detail_super = detail_super_imsm,
7925 .brief_detail_super = brief_detail_super_imsm,
bf5a934a 7926 .write_init_super = write_init_super_imsm,
0e600426
N
7927 .validate_geometry = validate_geometry_imsm,
7928 .add_to_super = add_to_super_imsm,
1a64be56 7929 .remove_from_super = remove_from_super_imsm,
d665cc31 7930 .detail_platform = detail_platform_imsm,
33414a01 7931 .kill_subarray = kill_subarray_imsm,
aa534678 7932 .update_subarray = update_subarray_imsm,
2b959fbf 7933 .load_container = load_container_imsm,
71204a50
N
7934 .default_geometry = default_geometry_imsm,
7935 .get_disk_controller_domain = imsm_get_disk_controller_domain,
7936 .reshape_super = imsm_reshape_super,
7937 .manage_reshape = imsm_manage_reshape,
cdddbdbc
DW
7938#endif
7939 .match_home = match_home_imsm,
7940 .uuid_from_super= uuid_from_super_imsm,
7941 .getinfo_super = getinfo_super_imsm,
5c4cd5da 7942 .getinfo_super_disks = getinfo_super_disks_imsm,
cdddbdbc
DW
7943 .update_super = update_super_imsm,
7944
7945 .avail_size = avail_size_imsm,
80e7f8c3 7946 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
cdddbdbc
DW
7947
7948 .compare_super = compare_super_imsm,
7949
7950 .load_super = load_super_imsm,
bf5a934a 7951 .init_super = init_super_imsm,
e683ca88 7952 .store_super = store_super_imsm,
cdddbdbc
DW
7953 .free_super = free_super_imsm,
7954 .match_metadata_desc = match_metadata_desc_imsm,
bf5a934a 7955 .container_content = container_content_imsm,
cdddbdbc 7956
cdddbdbc 7957 .external = 1,
4cce4069 7958 .name = "imsm",
845dea95 7959
0e600426 7960#ifndef MDASSEMBLE
845dea95
NB
7961/* for mdmon */
7962 .open_new = imsm_open_new,
ed9d66aa 7963 .set_array_state= imsm_set_array_state,
845dea95
NB
7964 .set_disk = imsm_set_disk,
7965 .sync_metadata = imsm_sync_metadata,
88758e9d 7966 .activate_spare = imsm_activate_spare,
e8319a19 7967 .process_update = imsm_process_update,
8273f55e 7968 .prepare_update = imsm_prepare_update,
0e600426 7969#endif /* MDASSEMBLE */
cdddbdbc 7970};