]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-intel.c
Spare migration tests updated
[thirdparty/mdadm.git] / super-intel.c
CommitLineData
cdddbdbc
DW
1/*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
a54d5262 4 * Copyright (C) 2002-2008 Intel Corporation
cdddbdbc
DW
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
51006d85 20#define HAVE_STDINT_H 1
cdddbdbc 21#include "mdadm.h"
c2a1e7da 22#include "mdmon.h"
51006d85 23#include "sha1.h"
88c32bb1 24#include "platform-intel.h"
cdddbdbc
DW
25#include <values.h>
26#include <scsi/sg.h>
27#include <ctype.h>
d665cc31 28#include <dirent.h>
cdddbdbc
DW
29
30/* MPB == Metadata Parameter Block */
31#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32#define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33#define MPB_VERSION_RAID0 "1.0.00"
34#define MPB_VERSION_RAID1 "1.1.00"
fe7ed8cb
DW
35#define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36#define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
cdddbdbc 37#define MPB_VERSION_RAID5 "1.2.02"
fe7ed8cb
DW
38#define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39#define MPB_VERSION_CNG "1.2.06"
40#define MPB_VERSION_ATTRIBS "1.3.00"
cdddbdbc
DW
41#define MAX_SIGNATURE_LENGTH 32
42#define MAX_RAID_SERIAL_LEN 16
fe7ed8cb
DW
43
44#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45#define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
c2c087e6
DW
54#define MPB_SECTOR_CNT 418
55#define IMSM_RESERVED_SECTORS 4096
979d38be 56#define SECT_PER_MB_SHIFT 11
cdddbdbc
DW
57
58/* Disk configuration info. */
59#define IMSM_MAX_DEVICES 255
60struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
f2f27e63
DW
64#define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65#define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66#define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
cdddbdbc 67 __u32 status; /* 0xF0 - 0xF3 */
fe7ed8cb
DW
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69#define IMSM_DISK_FILLERS 4
cdddbdbc
DW
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71};
72
73/* RAID map configuration infos. */
74struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80#define IMSM_T_STATE_NORMAL 0
81#define IMSM_T_STATE_UNINITIALIZED 1
e3bba0e0
DW
82#define IMSM_T_STATE_DEGRADED 2
83#define IMSM_T_STATE_FAILED 3
cdddbdbc
DW
84 __u8 raid_level;
85#define IMSM_T_RAID0 0
86#define IMSM_T_RAID1 1
87#define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
fe7ed8cb
DW
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
252d23c0 91 __u8 ddf;
cdddbdbc 92 __u32 filler[7]; /* expansion area */
7eef0453 93#define IMSM_ORD_REBUILD (1 << 24)
cdddbdbc 94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
7eef0453
DW
95 * top byte contains some flags
96 */
cdddbdbc
DW
97} __attribute__ ((packed));
98
99struct imsm_vol {
f8f603f1 100 __u32 curr_migr_unit;
fe7ed8cb 101 __u32 checkpoint_id; /* id to access curr_migr_unit */
cdddbdbc 102 __u8 migr_state; /* Normal or Migrating */
e3bba0e0
DW
103#define MIGR_INIT 0
104#define MIGR_REBUILD 1
105#define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106#define MIGR_GEN_MIGR 3
107#define MIGR_STATE_CHANGE 4
1484e727 108#define MIGR_REPAIR 5
cdddbdbc
DW
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
fe7ed8cb
DW
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
cdddbdbc
DW
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117} __attribute__ ((packed));
118
119struct imsm_dev {
fe7ed8cb 120 __u8 volume[MAX_RAID_SERIAL_LEN];
cdddbdbc
DW
121 __u32 size_low;
122 __u32 size_high;
fe7ed8cb
DW
123#define DEV_BOOTABLE __cpu_to_le32(0x01)
124#define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125#define DEV_READ_COALESCING __cpu_to_le32(0x04)
126#define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127#define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128#define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129#define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130#define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131#define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132#define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133#define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134#define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135#define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
cdddbdbc
DW
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
fe7ed8cb
DW
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145#define IMSM_DEV_FILLERS 10
cdddbdbc
DW
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148} __attribute__ ((packed));
149
150struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
604b746f
JD
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
cdddbdbc
DW
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
604b746f
JD
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166#define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
cdddbdbc
DW
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
604b746f 170 /* here comes BBM logs */
cdddbdbc
DW
171} __attribute__ ((packed));
172
604b746f
JD
173#define BBM_LOG_MAX_ENTRIES 254
174
175struct bbm_log_entry {
176 __u64 defective_block_start;
177#define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181} __attribute__ ((__packed__));
182
183struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190} __attribute__ ((__packed__));
191
192
cdddbdbc
DW
193#ifndef MDASSEMBLE
194static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195#endif
196
1484e727
DW
197static __u8 migr_type(struct imsm_dev *dev)
198{
199 if (dev->vol.migr_type == MIGR_VERIFY &&
200 dev->status & DEV_VERIFY_AND_FIX)
201 return MIGR_REPAIR;
202 else
203 return dev->vol.migr_type;
204}
205
206static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
207{
208 /* for compatibility with older oroms convert MIGR_REPAIR, into
209 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
210 */
211 if (migr_type == MIGR_REPAIR) {
212 dev->vol.migr_type = MIGR_VERIFY;
213 dev->status |= DEV_VERIFY_AND_FIX;
214 } else {
215 dev->vol.migr_type = migr_type;
216 dev->status &= ~DEV_VERIFY_AND_FIX;
217 }
218}
219
87eb16df 220static unsigned int sector_count(__u32 bytes)
cdddbdbc 221{
87eb16df
DW
222 return ((bytes + (512-1)) & (~(512-1))) / 512;
223}
cdddbdbc 224
87eb16df
DW
225static unsigned int mpb_sectors(struct imsm_super *mpb)
226{
227 return sector_count(__le32_to_cpu(mpb->mpb_size));
cdddbdbc
DW
228}
229
ba2de7ba
DW
230struct intel_dev {
231 struct imsm_dev *dev;
232 struct intel_dev *next;
f21e18ca 233 unsigned index;
ba2de7ba
DW
234};
235
88654014
LM
236struct intel_hba {
237 enum sys_dev_type type;
238 char *path;
239 char *pci_id;
240 struct intel_hba *next;
241};
242
1a64be56
LM
243enum action {
244 DISK_REMOVE = 1,
245 DISK_ADD
246};
cdddbdbc
DW
247/* internal representation of IMSM metadata */
248struct intel_super {
249 union {
949c47a0
DW
250 void *buf; /* O_DIRECT buffer for reading/writing metadata */
251 struct imsm_super *anchor; /* immovable parameters */
cdddbdbc 252 };
949c47a0 253 size_t len; /* size of the 'buf' allocation */
4d7b1503
DW
254 void *next_buf; /* for realloc'ing buf from the manager */
255 size_t next_len;
c2c087e6 256 int updates_pending; /* count of pending updates for mdmon */
bf5a934a 257 int current_vol; /* index of raid device undergoing creation */
0dcecb2e 258 __u32 create_offset; /* common start for 'current_vol' */
148acb7b 259 __u32 random; /* random data for seeding new family numbers */
ba2de7ba 260 struct intel_dev *devlist;
cdddbdbc
DW
261 struct dl {
262 struct dl *next;
263 int index;
264 __u8 serial[MAX_RAID_SERIAL_LEN];
265 int major, minor;
266 char *devname;
b9f594fe 267 struct imsm_disk disk;
cdddbdbc 268 int fd;
0dcecb2e
DW
269 int extent_cnt;
270 struct extent *e; /* for determining freespace @ create */
efb30e7f 271 int raiddisk; /* slot to fill in autolayout */
1a64be56 272 enum action action;
cdddbdbc 273 } *disks;
1a64be56
LM
274 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
275 active */
47ee5a45 276 struct dl *missing; /* disks removed while we weren't looking */
43dad3d6 277 struct bbm_log *bbm_log;
88654014 278 struct intel_hba *hba; /* device path of the raid controller for this metadata */
88c32bb1 279 const struct imsm_orom *orom; /* platform firmware support */
a2b97981
DW
280 struct intel_super *next; /* (temp) list for disambiguating family_num */
281};
282
283struct intel_disk {
284 struct imsm_disk disk;
285 #define IMSM_UNKNOWN_OWNER (-1)
286 int owner;
287 struct intel_disk *next;
cdddbdbc
DW
288};
289
c2c087e6
DW
290struct extent {
291 unsigned long long start, size;
292};
293
694575e7
KW
294/* definitions of reshape process types */
295enum imsm_reshape_type {
296 CH_TAKEOVER,
297 CH_CHUNK_MIGR,
298 CH_LEVEL_MIGRATION
299};
300
88758e9d
DW
301/* definition of messages passed to imsm_process_update */
302enum imsm_update_type {
303 update_activate_spare,
8273f55e 304 update_create_array,
33414a01 305 update_kill_array,
aa534678 306 update_rename_array,
1a64be56 307 update_add_remove_disk,
78b10e66 308 update_reshape_container_disks,
bb025c2f 309 update_takeover
88758e9d
DW
310};
311
312struct imsm_update_activate_spare {
313 enum imsm_update_type type;
d23fe947 314 struct dl *dl;
88758e9d
DW
315 int slot;
316 int array;
317 struct imsm_update_activate_spare *next;
318};
319
78b10e66
N
320struct geo_params {
321 int dev_id;
322 char *dev_name;
323 long long size;
324 int level;
325 int layout;
326 int chunksize;
327 int raid_disks;
328};
329
bb025c2f
KW
330enum takeover_direction {
331 R10_TO_R0,
332 R0_TO_R10
333};
334struct imsm_update_takeover {
335 enum imsm_update_type type;
336 int subarray;
337 enum takeover_direction direction;
338};
78b10e66
N
339
340struct imsm_update_reshape {
341 enum imsm_update_type type;
342 int old_raid_disks;
343 int new_raid_disks;
d195167d 344 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
78b10e66
N
345};
346
54c2c1ea
DW
347struct disk_info {
348 __u8 serial[MAX_RAID_SERIAL_LEN];
349};
350
8273f55e
DW
351struct imsm_update_create_array {
352 enum imsm_update_type type;
8273f55e 353 int dev_idx;
6a3e913e 354 struct imsm_dev dev;
8273f55e
DW
355};
356
33414a01
DW
357struct imsm_update_kill_array {
358 enum imsm_update_type type;
359 int dev_idx;
360};
361
aa534678
DW
362struct imsm_update_rename_array {
363 enum imsm_update_type type;
364 __u8 name[MAX_RAID_SERIAL_LEN];
365 int dev_idx;
366};
367
1a64be56 368struct imsm_update_add_remove_disk {
43dad3d6
DW
369 enum imsm_update_type type;
370};
371
88654014
LM
372
373static const char *_sys_dev_type[] = {
374 [SYS_DEV_UNKNOWN] = "Unknown",
375 [SYS_DEV_SAS] = "SAS",
376 [SYS_DEV_SATA] = "SATA"
377};
378
379const char *get_sys_dev_type(enum sys_dev_type type)
380{
381 if (type >= SYS_DEV_MAX)
382 type = SYS_DEV_UNKNOWN;
383
384 return _sys_dev_type[type];
385}
386
71204a50 387#ifndef MDASSEMBLE
88654014
LM
388static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
389{
390 struct intel_hba *result = malloc(sizeof(*result));
391 if (result) {
392 result->type = device->type;
393 result->path = strdup(device->path);
394 result->next = NULL;
395 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
396 result->pci_id++;
397 }
398 return result;
399}
400
401static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
402{
403 struct intel_hba *result=NULL;
404 for (result = hba; result; result = result->next) {
405 if (result->type == device->type && strcmp(result->path, device->path) == 0)
406 break;
407 }
408 return result;
409}
410
411
88654014
LM
412static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device,
413 const char *devname)
414{
415 struct intel_hba *hba;
416
417 /* check if disk attached to Intel HBA */
418 hba = find_intel_hba(super->hba, device);
419 if (hba != NULL)
420 return 1;
421 /* Check if HBA is already attached to super */
422 if (super->hba == NULL) {
423 super->hba = alloc_intel_hba(device);
424 return 1;
425 }
426
427 hba = super->hba;
428 /* Intel metadata allows for all disks attached to the same type HBA.
429 * Do not sypport odf HBA types mixing
430 */
431 if (device->type != hba->type)
432 return 2;
433
434 while (hba->next)
435 hba = hba->next;
436
437 hba->next = alloc_intel_hba(device);
438 return 1;
439}
440
441static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
442{
443 struct sys_dev *list, *elem, *prev;
444 char *disk_path;
445
446 if ((list = find_intel_devices()) == NULL)
447 return 0;
448
449 if (fd < 0)
450 disk_path = (char *) devname;
451 else
452 disk_path = diskfd_to_devpath(fd);
453
454 if (!disk_path) {
455 free_sys_dev(&list);
456 return 0;
457 }
458
459 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
460 if (path_attached_to_hba(disk_path, elem->path)) {
461 if (prev == NULL)
462 list = list->next;
463 else
464 prev->next = elem->next;
465 elem->next = NULL;
466 if (disk_path != devname)
467 free(disk_path);
468 free_sys_dev(&list);
469 return elem;
470 }
471 }
472 if (disk_path != devname)
473 free(disk_path);
474 free_sys_dev(&list);
475
476 return NULL;
477}
71204a50 478#endif /* MDASSEMBLE */
88654014
LM
479
480
cdddbdbc
DW
481static struct supertype *match_metadata_desc_imsm(char *arg)
482{
483 struct supertype *st;
484
485 if (strcmp(arg, "imsm") != 0 &&
486 strcmp(arg, "default") != 0
487 )
488 return NULL;
489
490 st = malloc(sizeof(*st));
4e9d2186
AW
491 if (!st)
492 return NULL;
ef609477 493 memset(st, 0, sizeof(*st));
d1d599ea 494 st->container_dev = NoMdDev;
cdddbdbc
DW
495 st->ss = &super_imsm;
496 st->max_devs = IMSM_MAX_DEVICES;
497 st->minor_version = 0;
498 st->sb = NULL;
499 return st;
500}
501
0e600426 502#ifndef MDASSEMBLE
cdddbdbc
DW
503static __u8 *get_imsm_version(struct imsm_super *mpb)
504{
505 return &mpb->sig[MPB_SIG_LEN];
506}
0e600426 507#endif
cdddbdbc 508
949c47a0
DW
509/* retrieve a disk directly from the anchor when the anchor is known to be
510 * up-to-date, currently only at load time
511 */
512static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
cdddbdbc 513{
949c47a0 514 if (index >= mpb->num_disks)
cdddbdbc
DW
515 return NULL;
516 return &mpb->disk[index];
517}
518
95d07a2c
LM
519/* retrieve the disk description based on a index of the disk
520 * in the sub-array
521 */
522static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
949c47a0 523{
b9f594fe
DW
524 struct dl *d;
525
526 for (d = super->disks; d; d = d->next)
527 if (d->index == index)
95d07a2c
LM
528 return d;
529
530 return NULL;
531}
532/* retrieve a disk from the parsed metadata */
533static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
534{
535 struct dl *dl;
536
537 dl = get_imsm_dl_disk(super, index);
538 if (dl)
539 return &dl->disk;
540
b9f594fe 541 return NULL;
949c47a0
DW
542}
543
544/* generate a checksum directly from the anchor when the anchor is known to be
545 * up-to-date, currently only at load or write_super after coalescing
546 */
547static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
cdddbdbc
DW
548{
549 __u32 end = mpb->mpb_size / sizeof(end);
550 __u32 *p = (__u32 *) mpb;
551 __u32 sum = 0;
552
97f734fd
N
553 while (end--) {
554 sum += __le32_to_cpu(*p);
555 p++;
556 }
cdddbdbc
DW
557
558 return sum - __le32_to_cpu(mpb->check_sum);
559}
560
a965f303
DW
561static size_t sizeof_imsm_map(struct imsm_map *map)
562{
563 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
564}
565
566struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
cdddbdbc 567{
5e7b0330
AK
568 /* A device can have 2 maps if it is in the middle of a migration.
569 * If second_map is:
570 * 0 - we return the first map
571 * 1 - we return the second map if it exists, else NULL
572 * -1 - we return the second map if it exists, else the first
573 */
a965f303
DW
574 struct imsm_map *map = &dev->vol.map[0];
575
5e7b0330 576 if (second_map == 1 && !dev->vol.migr_state)
a965f303 577 return NULL;
5e7b0330
AK
578 else if (second_map == 1 ||
579 (second_map < 0 && dev->vol.migr_state)) {
a965f303
DW
580 void *ptr = map;
581
582 return ptr + sizeof_imsm_map(map);
583 } else
584 return map;
5e7b0330 585
a965f303 586}
cdddbdbc 587
3393c6af
DW
588/* return the size of the device.
589 * migr_state increases the returned size if map[0] were to be duplicated
590 */
591static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
a965f303
DW
592{
593 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
594 sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
595
596 /* migrating means an additional map */
a965f303
DW
597 if (dev->vol.migr_state)
598 size += sizeof_imsm_map(get_imsm_map(dev, 1));
3393c6af
DW
599 else if (migr_state)
600 size += sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
601
602 return size;
603}
604
54c2c1ea
DW
605#ifndef MDASSEMBLE
606/* retrieve disk serial number list from a metadata update */
607static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
608{
609 void *u = update;
610 struct disk_info *inf;
611
612 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
613 sizeof_imsm_dev(&update->dev, 0);
614
615 return inf;
616}
617#endif
618
949c47a0 619static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
cdddbdbc
DW
620{
621 int offset;
622 int i;
623 void *_mpb = mpb;
624
949c47a0 625 if (index >= mpb->num_raid_devs)
cdddbdbc
DW
626 return NULL;
627
628 /* devices start after all disks */
629 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
630
631 for (i = 0; i <= index; i++)
632 if (i == index)
633 return _mpb + offset;
634 else
3393c6af 635 offset += sizeof_imsm_dev(_mpb + offset, 0);
cdddbdbc
DW
636
637 return NULL;
638}
639
949c47a0
DW
640static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
641{
ba2de7ba
DW
642 struct intel_dev *dv;
643
949c47a0
DW
644 if (index >= super->anchor->num_raid_devs)
645 return NULL;
ba2de7ba
DW
646 for (dv = super->devlist; dv; dv = dv->next)
647 if (dv->index == index)
648 return dv->dev;
649 return NULL;
949c47a0
DW
650}
651
98130f40
AK
652/*
653 * for second_map:
654 * == 0 get first map
655 * == 1 get second map
656 * == -1 than get map according to the current migr_state
657 */
658static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
659 int slot,
660 int second_map)
7eef0453
DW
661{
662 struct imsm_map *map;
663
5e7b0330 664 map = get_imsm_map(dev, second_map);
7eef0453 665
ff077194
DW
666 /* top byte identifies disk under rebuild */
667 return __le32_to_cpu(map->disk_ord_tbl[slot]);
668}
669
670#define ord_to_idx(ord) (((ord) << 8) >> 8)
98130f40 671static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
ff077194 672{
98130f40 673 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
ff077194
DW
674
675 return ord_to_idx(ord);
7eef0453
DW
676}
677
be73972f
DW
678static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
679{
680 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
681}
682
f21e18ca 683static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
620b1713
DW
684{
685 int slot;
686 __u32 ord;
687
688 for (slot = 0; slot < map->num_members; slot++) {
689 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
690 if (ord_to_idx(ord) == idx)
691 return slot;
692 }
693
694 return -1;
695}
696
cdddbdbc
DW
697static int get_imsm_raid_level(struct imsm_map *map)
698{
699 if (map->raid_level == 1) {
700 if (map->num_members == 2)
701 return 1;
702 else
703 return 10;
704 }
705
706 return map->raid_level;
707}
708
c2c087e6
DW
709static int cmp_extent(const void *av, const void *bv)
710{
711 const struct extent *a = av;
712 const struct extent *b = bv;
713 if (a->start < b->start)
714 return -1;
715 if (a->start > b->start)
716 return 1;
717 return 0;
718}
719
0dcecb2e 720static int count_memberships(struct dl *dl, struct intel_super *super)
c2c087e6 721{
c2c087e6 722 int memberships = 0;
620b1713 723 int i;
c2c087e6 724
949c47a0
DW
725 for (i = 0; i < super->anchor->num_raid_devs; i++) {
726 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 727 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 728
620b1713
DW
729 if (get_imsm_disk_slot(map, dl->index) >= 0)
730 memberships++;
c2c087e6 731 }
0dcecb2e
DW
732
733 return memberships;
734}
735
736static struct extent *get_extents(struct intel_super *super, struct dl *dl)
737{
738 /* find a list of used extents on the given physical device */
739 struct extent *rv, *e;
620b1713 740 int i;
0dcecb2e
DW
741 int memberships = count_memberships(dl, super);
742 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
743
c2c087e6
DW
744 rv = malloc(sizeof(struct extent) * (memberships + 1));
745 if (!rv)
746 return NULL;
747 e = rv;
748
949c47a0
DW
749 for (i = 0; i < super->anchor->num_raid_devs; i++) {
750 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 751 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 752
620b1713
DW
753 if (get_imsm_disk_slot(map, dl->index) >= 0) {
754 e->start = __le32_to_cpu(map->pba_of_lba0);
755 e->size = __le32_to_cpu(map->blocks_per_member);
756 e++;
c2c087e6
DW
757 }
758 }
759 qsort(rv, memberships, sizeof(*rv), cmp_extent);
760
14e8215b
DW
761 /* determine the start of the metadata
762 * when no raid devices are defined use the default
763 * ...otherwise allow the metadata to truncate the value
764 * as is the case with older versions of imsm
765 */
766 if (memberships) {
767 struct extent *last = &rv[memberships - 1];
768 __u32 remainder;
769
770 remainder = __le32_to_cpu(dl->disk.total_blocks) -
771 (last->start + last->size);
dda5855f
DW
772 /* round down to 1k block to satisfy precision of the kernel
773 * 'size' interface
774 */
775 remainder &= ~1UL;
776 /* make sure remainder is still sane */
f21e18ca 777 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
dda5855f 778 remainder = ROUND_UP(super->len, 512) >> 9;
14e8215b
DW
779 if (reservation > remainder)
780 reservation = remainder;
781 }
782 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
c2c087e6
DW
783 e->size = 0;
784 return rv;
785}
786
14e8215b
DW
787/* try to determine how much space is reserved for metadata from
788 * the last get_extents() entry, otherwise fallback to the
789 * default
790 */
791static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
792{
793 struct extent *e;
794 int i;
795 __u32 rv;
796
797 /* for spares just return a minimal reservation which will grow
798 * once the spare is picked up by an array
799 */
800 if (dl->index == -1)
801 return MPB_SECTOR_CNT;
802
803 e = get_extents(super, dl);
804 if (!e)
805 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
806
807 /* scroll to last entry */
808 for (i = 0; e[i].size; i++)
809 continue;
810
811 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
812
813 free(e);
814
815 return rv;
816}
817
25ed7e59
DW
818static int is_spare(struct imsm_disk *disk)
819{
820 return (disk->status & SPARE_DISK) == SPARE_DISK;
821}
822
823static int is_configured(struct imsm_disk *disk)
824{
825 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
826}
827
828static int is_failed(struct imsm_disk *disk)
829{
830 return (disk->status & FAILED_DISK) == FAILED_DISK;
831}
832
80e7f8c3
AC
833/* Return minimum size of a spare that can be used in this array*/
834static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
835{
836 struct intel_super *super = st->sb;
837 struct dl *dl;
838 struct extent *e;
839 int i;
840 unsigned long long rv = 0;
841
842 if (!super)
843 return rv;
844 /* find first active disk in array */
845 dl = super->disks;
846 while (dl && (is_failed(&dl->disk) || dl->index == -1))
847 dl = dl->next;
848 if (!dl)
849 return rv;
850 /* find last lba used by subarrays */
851 e = get_extents(super, dl);
852 if (!e)
853 return rv;
854 for (i = 0; e[i].size; i++)
855 continue;
856 if (i > 0)
857 rv = e[i-1].start + e[i-1].size;
858 free(e);
859 /* add the amount of space needed for metadata */
860 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
861 return rv * 512;
862}
863
1799c9e8 864#ifndef MDASSEMBLE
1e5c6983
DW
865static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
866
44470971 867static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
cdddbdbc
DW
868{
869 __u64 sz;
0d80bb2f 870 int slot, i;
a965f303 871 struct imsm_map *map = get_imsm_map(dev, 0);
dd8bcb3b 872 struct imsm_map *map2 = get_imsm_map(dev, 1);
b10b37b8 873 __u32 ord;
cdddbdbc
DW
874
875 printf("\n");
1e7bc0ed 876 printf("[%.16s]:\n", dev->volume);
44470971 877 printf(" UUID : %s\n", uuid);
dd8bcb3b
AK
878 printf(" RAID Level : %d", get_imsm_raid_level(map));
879 if (map2)
880 printf(" <-- %d", get_imsm_raid_level(map2));
881 printf("\n");
882 printf(" Members : %d", map->num_members);
883 if (map2)
884 printf(" <-- %d", map2->num_members);
885 printf("\n");
0d80bb2f
DW
886 printf(" Slots : [");
887 for (i = 0; i < map->num_members; i++) {
dd8bcb3b 888 ord = get_imsm_ord_tbl_ent(dev, i, 0);
0d80bb2f
DW
889 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
890 }
dd8bcb3b
AK
891 printf("]");
892 if (map2) {
893 printf(" <-- [");
894 for (i = 0; i < map2->num_members; i++) {
895 ord = get_imsm_ord_tbl_ent(dev, i, 1);
896 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
897 }
898 printf("]");
899 }
900 printf("\n");
620b1713
DW
901 slot = get_imsm_disk_slot(map, disk_idx);
902 if (slot >= 0) {
98130f40 903 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
b10b37b8
DW
904 printf(" This Slot : %d%s\n", slot,
905 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
906 } else
cdddbdbc
DW
907 printf(" This Slot : ?\n");
908 sz = __le32_to_cpu(dev->size_high);
909 sz <<= 32;
910 sz += __le32_to_cpu(dev->size_low);
911 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
912 human_size(sz * 512));
913 sz = __le32_to_cpu(map->blocks_per_member);
914 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
915 human_size(sz * 512));
916 printf(" Sector Offset : %u\n",
917 __le32_to_cpu(map->pba_of_lba0));
918 printf(" Num Stripes : %u\n",
919 __le32_to_cpu(map->num_data_stripes));
dd8bcb3b 920 printf(" Chunk Size : %u KiB",
cdddbdbc 921 __le16_to_cpu(map->blocks_per_strip) / 2);
dd8bcb3b
AK
922 if (map2)
923 printf(" <-- %u KiB",
924 __le16_to_cpu(map2->blocks_per_strip) / 2);
925 printf("\n");
cdddbdbc 926 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
8655a7b1 927 printf(" Migrate State : ");
1484e727
DW
928 if (dev->vol.migr_state) {
929 if (migr_type(dev) == MIGR_INIT)
8655a7b1 930 printf("initialize\n");
1484e727 931 else if (migr_type(dev) == MIGR_REBUILD)
8655a7b1 932 printf("rebuild\n");
1484e727 933 else if (migr_type(dev) == MIGR_VERIFY)
8655a7b1 934 printf("check\n");
1484e727 935 else if (migr_type(dev) == MIGR_GEN_MIGR)
8655a7b1 936 printf("general migration\n");
1484e727 937 else if (migr_type(dev) == MIGR_STATE_CHANGE)
8655a7b1 938 printf("state change\n");
1484e727 939 else if (migr_type(dev) == MIGR_REPAIR)
8655a7b1 940 printf("repair\n");
1484e727 941 else
8655a7b1
DW
942 printf("<unknown:%d>\n", migr_type(dev));
943 } else
944 printf("idle\n");
3393c6af
DW
945 printf(" Map State : %s", map_state_str[map->map_state]);
946 if (dev->vol.migr_state) {
947 struct imsm_map *map = get_imsm_map(dev, 1);
1e5c6983 948
b10b37b8 949 printf(" <-- %s", map_state_str[map->map_state]);
1e5c6983
DW
950 printf("\n Checkpoint : %u (%llu)",
951 __le32_to_cpu(dev->vol.curr_migr_unit),
94fcb80a 952 (unsigned long long)blocks_per_migr_unit(dev));
3393c6af
DW
953 }
954 printf("\n");
cdddbdbc 955 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
cdddbdbc
DW
956}
957
14e8215b 958static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
cdddbdbc 959{
949c47a0 960 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
1f24f035 961 char str[MAX_RAID_SERIAL_LEN + 1];
cdddbdbc
DW
962 __u64 sz;
963
d362da3d 964 if (index < 0 || !disk)
e9d82038
DW
965 return;
966
cdddbdbc 967 printf("\n");
1f24f035 968 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
cdddbdbc 969 printf(" Disk%02d Serial : %s\n", index, str);
25ed7e59
DW
970 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
971 is_configured(disk) ? " active" : "",
972 is_failed(disk) ? " failed" : "");
cdddbdbc 973 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
14e8215b 974 sz = __le32_to_cpu(disk->total_blocks) - reserved;
cdddbdbc
DW
975 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
976 human_size(sz * 512));
977}
978
a5d85af7 979static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
44470971 980
cdddbdbc
DW
981static void examine_super_imsm(struct supertype *st, char *homehost)
982{
983 struct intel_super *super = st->sb;
949c47a0 984 struct imsm_super *mpb = super->anchor;
cdddbdbc
DW
985 char str[MAX_SIGNATURE_LENGTH];
986 int i;
27fd6274
DW
987 struct mdinfo info;
988 char nbuf[64];
cdddbdbc 989 __u32 sum;
14e8215b 990 __u32 reserved = imsm_reserved_sectors(super, super->disks);
94827db3 991 struct dl *dl;
27fd6274 992
cdddbdbc
DW
993 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
994 printf(" Magic : %s\n", str);
995 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
996 printf(" Version : %s\n", get_imsm_version(mpb));
148acb7b 997 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
cdddbdbc
DW
998 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
999 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
a5d85af7 1000 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1001 fname_from_uuid(st, &info, nbuf, ':');
27fd6274 1002 printf(" UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1003 sum = __le32_to_cpu(mpb->check_sum);
1004 printf(" Checksum : %08x %s\n", sum,
949c47a0 1005 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
87eb16df 1006 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
cdddbdbc
DW
1007 printf(" Disks : %d\n", mpb->num_disks);
1008 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
14e8215b 1009 print_imsm_disk(mpb, super->disks->index, reserved);
604b746f
JD
1010 if (super->bbm_log) {
1011 struct bbm_log *log = super->bbm_log;
1012
1013 printf("\n");
1014 printf("Bad Block Management Log:\n");
1015 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1016 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1017 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1018 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
13a3b65d
N
1019 printf(" First Spare : %llx\n",
1020 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
604b746f 1021 }
44470971
DW
1022 for (i = 0; i < mpb->num_raid_devs; i++) {
1023 struct mdinfo info;
1024 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1025
1026 super->current_vol = i;
a5d85af7 1027 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1028 fname_from_uuid(st, &info, nbuf, ':');
44470971
DW
1029 print_imsm_dev(dev, nbuf + 5, super->disks->index);
1030 }
cdddbdbc
DW
1031 for (i = 0; i < mpb->num_disks; i++) {
1032 if (i == super->disks->index)
1033 continue;
14e8215b 1034 print_imsm_disk(mpb, i, reserved);
cdddbdbc 1035 }
94827db3
N
1036 for (dl = super->disks ; dl; dl = dl->next) {
1037 struct imsm_disk *disk;
1038 char str[MAX_RAID_SERIAL_LEN + 1];
1039 __u64 sz;
1040
1041 if (dl->index >= 0)
1042 continue;
1043
1044 disk = &dl->disk;
1045 printf("\n");
1046 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1047 printf(" Disk Serial : %s\n", str);
1048 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1049 is_configured(disk) ? " active" : "",
1050 is_failed(disk) ? " failed" : "");
1051 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1052 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1053 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1054 human_size(sz * 512));
1055 }
cdddbdbc
DW
1056}
1057
061f2c6a 1058static void brief_examine_super_imsm(struct supertype *st, int verbose)
cdddbdbc 1059{
27fd6274 1060 /* We just write a generic IMSM ARRAY entry */
ff54de6e
N
1061 struct mdinfo info;
1062 char nbuf[64];
1e7bc0ed 1063 struct intel_super *super = st->sb;
1e7bc0ed 1064
0d5a423f
DW
1065 if (!super->anchor->num_raid_devs) {
1066 printf("ARRAY metadata=imsm\n");
1e7bc0ed 1067 return;
0d5a423f 1068 }
ff54de6e 1069
a5d85af7 1070 getinfo_super_imsm(st, &info, NULL);
4737ae25
N
1071 fname_from_uuid(st, &info, nbuf, ':');
1072 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1073}
1074
1075static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1076{
1077 /* We just write a generic IMSM ARRAY entry */
1078 struct mdinfo info;
1079 char nbuf[64];
1080 char nbuf1[64];
1081 struct intel_super *super = st->sb;
1082 int i;
1083
1084 if (!super->anchor->num_raid_devs)
1085 return;
1086
a5d85af7 1087 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1088 fname_from_uuid(st, &info, nbuf, ':');
1e7bc0ed
DW
1089 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1090 struct imsm_dev *dev = get_imsm_dev(super, i);
1091
1092 super->current_vol = i;
a5d85af7 1093 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1094 fname_from_uuid(st, &info, nbuf1, ':');
1124b3cf 1095 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
cf8de691 1096 dev->volume, nbuf + 5, i, nbuf1 + 5);
1e7bc0ed 1097 }
cdddbdbc
DW
1098}
1099
9d84c8ea
DW
1100static void export_examine_super_imsm(struct supertype *st)
1101{
1102 struct intel_super *super = st->sb;
1103 struct imsm_super *mpb = super->anchor;
1104 struct mdinfo info;
1105 char nbuf[64];
1106
a5d85af7 1107 getinfo_super_imsm(st, &info, NULL);
9d84c8ea
DW
1108 fname_from_uuid(st, &info, nbuf, ':');
1109 printf("MD_METADATA=imsm\n");
1110 printf("MD_LEVEL=container\n");
1111 printf("MD_UUID=%s\n", nbuf+5);
1112 printf("MD_DEVICES=%u\n", mpb->num_disks);
1113}
1114
cdddbdbc
DW
1115static void detail_super_imsm(struct supertype *st, char *homehost)
1116{
3ebe00a1
DW
1117 struct mdinfo info;
1118 char nbuf[64];
1119
a5d85af7 1120 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1121 fname_from_uuid(st, &info, nbuf, ':');
3ebe00a1 1122 printf("\n UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1123}
1124
1125static void brief_detail_super_imsm(struct supertype *st)
1126{
ff54de6e
N
1127 struct mdinfo info;
1128 char nbuf[64];
a5d85af7 1129 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1130 fname_from_uuid(st, &info, nbuf, ':');
ff54de6e 1131 printf(" UUID=%s", nbuf + 5);
cdddbdbc 1132}
d665cc31
DW
1133
1134static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1135static void fd2devname(int fd, char *name);
1136
120dc887 1137static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
d665cc31 1138{
120dc887
LM
1139 /* dump an unsorted list of devices attached to AHCI Intel storage
1140 * controller, as well as non-connected ports
d665cc31
DW
1141 */
1142 int hba_len = strlen(hba_path) + 1;
1143 struct dirent *ent;
1144 DIR *dir;
1145 char *path = NULL;
1146 int err = 0;
1147 unsigned long port_mask = (1 << port_count) - 1;
1148
f21e18ca 1149 if (port_count > (int)sizeof(port_mask) * 8) {
d665cc31
DW
1150 if (verbose)
1151 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1152 return 2;
1153 }
1154
1155 /* scroll through /sys/dev/block looking for devices attached to
1156 * this hba
1157 */
1158 dir = opendir("/sys/dev/block");
1159 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1160 int fd;
1161 char model[64];
1162 char vendor[64];
1163 char buf[1024];
1164 int major, minor;
1165 char *device;
1166 char *c;
1167 int port;
1168 int type;
1169
1170 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1171 continue;
1172 path = devt_to_devpath(makedev(major, minor));
1173 if (!path)
1174 continue;
1175 if (!path_attached_to_hba(path, hba_path)) {
1176 free(path);
1177 path = NULL;
1178 continue;
1179 }
1180
1181 /* retrieve the scsi device type */
1182 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1183 if (verbose)
1184 fprintf(stderr, Name ": failed to allocate 'device'\n");
1185 err = 2;
1186 break;
1187 }
1188 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1189 if (load_sys(device, buf) != 0) {
1190 if (verbose)
1191 fprintf(stderr, Name ": failed to read device type for %s\n",
1192 path);
1193 err = 2;
1194 free(device);
1195 break;
1196 }
1197 type = strtoul(buf, NULL, 10);
1198
1199 /* if it's not a disk print the vendor and model */
1200 if (!(type == 0 || type == 7 || type == 14)) {
1201 vendor[0] = '\0';
1202 model[0] = '\0';
1203 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1204 if (load_sys(device, buf) == 0) {
1205 strncpy(vendor, buf, sizeof(vendor));
1206 vendor[sizeof(vendor) - 1] = '\0';
1207 c = (char *) &vendor[sizeof(vendor) - 1];
1208 while (isspace(*c) || *c == '\0')
1209 *c-- = '\0';
1210
1211 }
1212 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1213 if (load_sys(device, buf) == 0) {
1214 strncpy(model, buf, sizeof(model));
1215 model[sizeof(model) - 1] = '\0';
1216 c = (char *) &model[sizeof(model) - 1];
1217 while (isspace(*c) || *c == '\0')
1218 *c-- = '\0';
1219 }
1220
1221 if (vendor[0] && model[0])
1222 sprintf(buf, "%.64s %.64s", vendor, model);
1223 else
1224 switch (type) { /* numbers from hald/linux/device.c */
1225 case 1: sprintf(buf, "tape"); break;
1226 case 2: sprintf(buf, "printer"); break;
1227 case 3: sprintf(buf, "processor"); break;
1228 case 4:
1229 case 5: sprintf(buf, "cdrom"); break;
1230 case 6: sprintf(buf, "scanner"); break;
1231 case 8: sprintf(buf, "media_changer"); break;
1232 case 9: sprintf(buf, "comm"); break;
1233 case 12: sprintf(buf, "raid"); break;
1234 default: sprintf(buf, "unknown");
1235 }
1236 } else
1237 buf[0] = '\0';
1238 free(device);
1239
1240 /* chop device path to 'host%d' and calculate the port number */
1241 c = strchr(&path[hba_len], '/');
4e5e717d
AW
1242 if (!c) {
1243 if (verbose)
1244 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1245 err = 2;
1246 break;
1247 }
d665cc31
DW
1248 *c = '\0';
1249 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1250 port -= host_base;
1251 else {
1252 if (verbose) {
1253 *c = '/'; /* repair the full string */
1254 fprintf(stderr, Name ": failed to determine port number for %s\n",
1255 path);
1256 }
1257 err = 2;
1258 break;
1259 }
1260
1261 /* mark this port as used */
1262 port_mask &= ~(1 << port);
1263
1264 /* print out the device information */
1265 if (buf[0]) {
1266 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1267 continue;
1268 }
1269
1270 fd = dev_open(ent->d_name, O_RDONLY);
1271 if (fd < 0)
1272 printf(" Port%d : - disk info unavailable -\n", port);
1273 else {
1274 fd2devname(fd, buf);
1275 printf(" Port%d : %s", port, buf);
1276 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1277 printf(" (%s)\n", buf);
1278 else
1279 printf("()\n");
1280 }
1281 close(fd);
1282 free(path);
1283 path = NULL;
1284 }
1285 if (path)
1286 free(path);
1287 if (dir)
1288 closedir(dir);
1289 if (err == 0) {
1290 int i;
1291
1292 for (i = 0; i < port_count; i++)
1293 if (port_mask & (1 << i))
1294 printf(" Port%d : - no device attached -\n", i);
1295 }
1296
1297 return err;
1298}
1299
120dc887 1300
155cbb4c 1301
120dc887
LM
1302static void print_found_intel_controllers(struct sys_dev *elem)
1303{
1304 for (; elem; elem = elem->next) {
1305 fprintf(stderr, Name ": found Intel(R) ");
1306 if (elem->type == SYS_DEV_SATA)
1307 fprintf(stderr, "SATA ");
155cbb4c
LM
1308 else if (elem->type == SYS_DEV_SAS)
1309 fprintf(stderr, "SAS ");
120dc887
LM
1310 fprintf(stderr, "RAID controller");
1311 if (elem->pci_id)
1312 fprintf(stderr, " at %s", elem->pci_id);
1313 fprintf(stderr, ".\n");
1314 }
1315 fflush(stderr);
1316}
1317
120dc887
LM
1318static int ahci_get_port_count(const char *hba_path, int *port_count)
1319{
1320 struct dirent *ent;
1321 DIR *dir;
1322 int host_base = -1;
1323
1324 *port_count = 0;
1325 if ((dir = opendir(hba_path)) == NULL)
1326 return -1;
1327
1328 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1329 int host;
1330
1331 if (sscanf(ent->d_name, "host%d", &host) != 1)
1332 continue;
1333 if (*port_count == 0)
1334 host_base = host;
1335 else if (host < host_base)
1336 host_base = host;
1337
1338 if (host + 1 > *port_count + host_base)
1339 *port_count = host + 1 - host_base;
1340 }
1341 closedir(dir);
1342 return host_base;
1343}
1344
5615172f 1345static int detail_platform_imsm(int verbose, int enumerate_only)
d665cc31
DW
1346{
1347 /* There are two components to imsm platform support, the ahci SATA
1348 * controller and the option-rom. To find the SATA controller we
1349 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1350 * controller with the Intel vendor id is present. This approach
1351 * allows mdadm to leverage the kernel's ahci detection logic, with the
1352 * caveat that if ahci.ko is not loaded mdadm will not be able to
1353 * detect platform raid capabilities. The option-rom resides in a
1354 * platform "Adapter ROM". We scan for its signature to retrieve the
1355 * platform capabilities. If raid support is disabled in the BIOS the
1356 * option-rom capability structure will not be available.
1357 */
1358 const struct imsm_orom *orom;
1359 struct sys_dev *list, *hba;
d665cc31
DW
1360 int host_base = 0;
1361 int port_count = 0;
120dc887 1362 int result=0;
d665cc31 1363
5615172f
DW
1364 if (enumerate_only) {
1365 if (check_env("IMSM_NO_PLATFORM") || find_imsm_orom())
1366 return 0;
1367 return 2;
1368 }
1369
155cbb4c
LM
1370 list = find_intel_devices();
1371 if (!list) {
d665cc31 1372 if (verbose)
155cbb4c
LM
1373 fprintf(stderr, Name ": no active Intel(R) RAID "
1374 "controller found.\n");
d665cc31
DW
1375 free_sys_dev(&list);
1376 return 2;
1377 } else if (verbose)
155cbb4c 1378 print_found_intel_controllers(list);
d665cc31
DW
1379
1380 orom = find_imsm_orom();
1381 if (!orom) {
155cbb4c 1382 free_sys_dev(&list);
d665cc31
DW
1383 if (verbose)
1384 fprintf(stderr, Name ": imsm option-rom not found\n");
1385 return 2;
1386 }
1387
1388 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1389 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1390 orom->hotfix_ver, orom->build);
1391 printf(" RAID Levels :%s%s%s%s%s\n",
1392 imsm_orom_has_raid0(orom) ? " raid0" : "",
1393 imsm_orom_has_raid1(orom) ? " raid1" : "",
1394 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1395 imsm_orom_has_raid10(orom) ? " raid10" : "",
1396 imsm_orom_has_raid5(orom) ? " raid5" : "");
8be094f0
DW
1397 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1398 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1399 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1400 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1401 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1402 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1403 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1404 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1405 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1406 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1407 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1408 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1409 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1410 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1411 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1412 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1413 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
d665cc31
DW
1414 printf(" Max Disks : %d\n", orom->tds);
1415 printf(" Max Volumes : %d\n", orom->vpa);
d665cc31 1416
120dc887
LM
1417 for (hba = list; hba; hba = hba->next) {
1418 printf(" I/O Controller : %s (%s)\n",
1419 hba->path, get_sys_dev_type(hba->type));
d665cc31 1420
120dc887
LM
1421 if (hba->type == SYS_DEV_SATA) {
1422 host_base = ahci_get_port_count(hba->path, &port_count);
1423 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1424 if (verbose)
1425 fprintf(stderr, Name ": failed to enumerate "
1426 "ports on SATA controller at %s.", hba->pci_id);
1427 result |= 2;
1428 }
155cbb4c
LM
1429 } else if (hba->type == SYS_DEV_SAS) {
1430 if (verbose)
1431 fprintf(stderr, Name ": failed to enumerate "
1432 "devices on SAS controller at %s.", hba->pci_id);
1433 result |= 2;
120dc887 1434 }
d665cc31 1435 }
155cbb4c 1436
120dc887
LM
1437 free_sys_dev(&list);
1438 return result;
d665cc31 1439}
cdddbdbc
DW
1440#endif
1441
1442static int match_home_imsm(struct supertype *st, char *homehost)
1443{
5115ca67
DW
1444 /* the imsm metadata format does not specify any host
1445 * identification information. We return -1 since we can never
1446 * confirm nor deny whether a given array is "meant" for this
148acb7b 1447 * host. We rely on compare_super and the 'family_num' fields to
5115ca67
DW
1448 * exclude member disks that do not belong, and we rely on
1449 * mdadm.conf to specify the arrays that should be assembled.
1450 * Auto-assembly may still pick up "foreign" arrays.
1451 */
cdddbdbc 1452
9362c1c8 1453 return -1;
cdddbdbc
DW
1454}
1455
1456static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1457{
51006d85
N
1458 /* The uuid returned here is used for:
1459 * uuid to put into bitmap file (Create, Grow)
1460 * uuid for backup header when saving critical section (Grow)
1461 * comparing uuids when re-adding a device into an array
1462 * In these cases the uuid required is that of the data-array,
1463 * not the device-set.
1464 * uuid to recognise same set when adding a missing device back
1465 * to an array. This is a uuid for the device-set.
1466 *
1467 * For each of these we can make do with a truncated
1468 * or hashed uuid rather than the original, as long as
1469 * everyone agrees.
1470 * In each case the uuid required is that of the data-array,
1471 * not the device-set.
43dad3d6 1472 */
51006d85
N
1473 /* imsm does not track uuid's so we synthesis one using sha1 on
1474 * - The signature (Which is constant for all imsm array, but no matter)
148acb7b 1475 * - the orig_family_num of the container
51006d85
N
1476 * - the index number of the volume
1477 * - the 'serial' number of the volume.
1478 * Hopefully these are all constant.
1479 */
1480 struct intel_super *super = st->sb;
43dad3d6 1481
51006d85
N
1482 char buf[20];
1483 struct sha1_ctx ctx;
1484 struct imsm_dev *dev = NULL;
148acb7b 1485 __u32 family_num;
51006d85 1486
148acb7b
DW
1487 /* some mdadm versions failed to set ->orig_family_num, in which
1488 * case fall back to ->family_num. orig_family_num will be
1489 * fixed up with the first metadata update.
1490 */
1491 family_num = super->anchor->orig_family_num;
1492 if (family_num == 0)
1493 family_num = super->anchor->family_num;
51006d85 1494 sha1_init_ctx(&ctx);
92bd8f8d 1495 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
148acb7b 1496 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
51006d85
N
1497 if (super->current_vol >= 0)
1498 dev = get_imsm_dev(super, super->current_vol);
1499 if (dev) {
1500 __u32 vol = super->current_vol;
1501 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1502 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1503 }
1504 sha1_finish_ctx(&ctx, buf);
1505 memcpy(uuid, buf, 4*4);
cdddbdbc
DW
1506}
1507
0d481d37 1508#if 0
4f5bc454
DW
1509static void
1510get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
cdddbdbc 1511{
cdddbdbc
DW
1512 __u8 *v = get_imsm_version(mpb);
1513 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1514 char major[] = { 0, 0, 0 };
1515 char minor[] = { 0 ,0, 0 };
1516 char patch[] = { 0, 0, 0 };
1517 char *ver_parse[] = { major, minor, patch };
1518 int i, j;
1519
1520 i = j = 0;
1521 while (*v != '\0' && v < end) {
1522 if (*v != '.' && j < 2)
1523 ver_parse[i][j++] = *v;
1524 else {
1525 i++;
1526 j = 0;
1527 }
1528 v++;
1529 }
1530
4f5bc454
DW
1531 *m = strtol(minor, NULL, 0);
1532 *p = strtol(patch, NULL, 0);
1533}
0d481d37 1534#endif
4f5bc454 1535
1e5c6983
DW
1536static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1537{
1538 /* migr_strip_size when repairing or initializing parity */
1539 struct imsm_map *map = get_imsm_map(dev, 0);
1540 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1541
1542 switch (get_imsm_raid_level(map)) {
1543 case 5:
1544 case 10:
1545 return chunk;
1546 default:
1547 return 128*1024 >> 9;
1548 }
1549}
1550
1551static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1552{
1553 /* migr_strip_size when rebuilding a degraded disk, no idea why
1554 * this is different than migr_strip_size_resync(), but it's good
1555 * to be compatible
1556 */
1557 struct imsm_map *map = get_imsm_map(dev, 1);
1558 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1559
1560 switch (get_imsm_raid_level(map)) {
1561 case 1:
1562 case 10:
1563 if (map->num_members % map->num_domains == 0)
1564 return 128*1024 >> 9;
1565 else
1566 return chunk;
1567 case 5:
1568 return max((__u32) 64*1024 >> 9, chunk);
1569 default:
1570 return 128*1024 >> 9;
1571 }
1572}
1573
1574static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1575{
1576 struct imsm_map *lo = get_imsm_map(dev, 0);
1577 struct imsm_map *hi = get_imsm_map(dev, 1);
1578 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1579 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1580
1581 return max((__u32) 1, hi_chunk / lo_chunk);
1582}
1583
1584static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1585{
1586 struct imsm_map *lo = get_imsm_map(dev, 0);
1587 int level = get_imsm_raid_level(lo);
1588
1589 if (level == 1 || level == 10) {
1590 struct imsm_map *hi = get_imsm_map(dev, 1);
1591
1592 return hi->num_domains;
1593 } else
1594 return num_stripes_per_unit_resync(dev);
1595}
1596
98130f40 1597static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1e5c6983
DW
1598{
1599 /* named 'imsm_' because raid0, raid1 and raid10
1600 * counter-intuitively have the same number of data disks
1601 */
98130f40 1602 struct imsm_map *map = get_imsm_map(dev, second_map);
1e5c6983
DW
1603
1604 switch (get_imsm_raid_level(map)) {
1605 case 0:
1606 case 1:
1607 case 10:
1608 return map->num_members;
1609 case 5:
1610 return map->num_members - 1;
1611 default:
1612 dprintf("%s: unsupported raid level\n", __func__);
1613 return 0;
1614 }
1615}
1616
1617static __u32 parity_segment_depth(struct imsm_dev *dev)
1618{
1619 struct imsm_map *map = get_imsm_map(dev, 0);
1620 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1621
1622 switch(get_imsm_raid_level(map)) {
1623 case 1:
1624 case 10:
1625 return chunk * map->num_domains;
1626 case 5:
1627 return chunk * map->num_members;
1628 default:
1629 return chunk;
1630 }
1631}
1632
1633static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1634{
1635 struct imsm_map *map = get_imsm_map(dev, 1);
1636 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1637 __u32 strip = block / chunk;
1638
1639 switch (get_imsm_raid_level(map)) {
1640 case 1:
1641 case 10: {
1642 __u32 vol_strip = (strip * map->num_domains) + 1;
1643 __u32 vol_stripe = vol_strip / map->num_members;
1644
1645 return vol_stripe * chunk + block % chunk;
1646 } case 5: {
1647 __u32 stripe = strip / (map->num_members - 1);
1648
1649 return stripe * chunk + block % chunk;
1650 }
1651 default:
1652 return 0;
1653 }
1654}
1655
1656static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
1657{
1658 /* calculate the conversion factor between per member 'blocks'
1659 * (md/{resync,rebuild}_start) and imsm migration units, return
1660 * 0 for the 'not migrating' and 'unsupported migration' cases
1661 */
1662 if (!dev->vol.migr_state)
1663 return 0;
1664
1665 switch (migr_type(dev)) {
6345120e 1666 case MIGR_GEN_MIGR:
1e5c6983
DW
1667 case MIGR_VERIFY:
1668 case MIGR_REPAIR:
1669 case MIGR_INIT: {
1670 struct imsm_map *map = get_imsm_map(dev, 0);
1671 __u32 stripes_per_unit;
1672 __u32 blocks_per_unit;
1673 __u32 parity_depth;
1674 __u32 migr_chunk;
1675 __u32 block_map;
1676 __u32 block_rel;
1677 __u32 segment;
1678 __u32 stripe;
1679 __u8 disks;
1680
1681 /* yes, this is really the translation of migr_units to
1682 * per-member blocks in the 'resync' case
1683 */
1684 stripes_per_unit = num_stripes_per_unit_resync(dev);
1685 migr_chunk = migr_strip_blocks_resync(dev);
98130f40 1686 disks = imsm_num_data_members(dev, 0);
1e5c6983
DW
1687 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1688 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1689 segment = blocks_per_unit / stripe;
1690 block_rel = blocks_per_unit - segment * stripe;
1691 parity_depth = parity_segment_depth(dev);
1692 block_map = map_migr_block(dev, block_rel);
1693 return block_map + parity_depth * segment;
1694 }
1695 case MIGR_REBUILD: {
1696 __u32 stripes_per_unit;
1697 __u32 migr_chunk;
1698
1699 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1700 migr_chunk = migr_strip_blocks_rebuild(dev);
1701 return migr_chunk * stripes_per_unit;
1702 }
1e5c6983
DW
1703 case MIGR_STATE_CHANGE:
1704 default:
1705 return 0;
1706 }
1707}
1708
c2c087e6
DW
1709static int imsm_level_to_layout(int level)
1710{
1711 switch (level) {
1712 case 0:
1713 case 1:
1714 return 0;
1715 case 5:
1716 case 6:
a380c027 1717 return ALGORITHM_LEFT_ASYMMETRIC;
c2c087e6 1718 case 10:
c92a2527 1719 return 0x102;
c2c087e6 1720 }
a18a888e 1721 return UnSet;
c2c087e6
DW
1722}
1723
a5d85af7 1724static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
bf5a934a
DW
1725{
1726 struct intel_super *super = st->sb;
949c47a0 1727 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
a965f303 1728 struct imsm_map *map = get_imsm_map(dev, 0);
81ac8b4d 1729 struct imsm_map *prev_map = get_imsm_map(dev, 1);
b335e593 1730 struct imsm_map *map_to_analyse = map;
efb30e7f 1731 struct dl *dl;
e207da2f 1732 char *devname;
a5d85af7 1733 int map_disks = info->array.raid_disks;
bf5a934a 1734
b335e593
AK
1735 if (prev_map)
1736 map_to_analyse = prev_map;
1737
efb30e7f
DW
1738 for (dl = super->disks; dl; dl = dl->next)
1739 if (dl->raiddisk == info->disk.raid_disk)
1740 break;
bf5a934a 1741 info->container_member = super->current_vol;
b335e593
AK
1742 info->array.raid_disks = map_to_analyse->num_members;
1743 info->array.level = get_imsm_raid_level(map_to_analyse);
bf5a934a
DW
1744 info->array.layout = imsm_level_to_layout(info->array.level);
1745 info->array.md_minor = -1;
1746 info->array.ctime = 0;
1747 info->array.utime = 0;
b335e593
AK
1748 info->array.chunk_size =
1749 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
301406c9 1750 info->array.state = !dev->vol.dirty;
da9b4a62
DW
1751 info->custom_array_size = __le32_to_cpu(dev->size_high);
1752 info->custom_array_size <<= 32;
1753 info->custom_array_size |= __le32_to_cpu(dev->size_low);
b335e593
AK
1754 if (prev_map) {
1755 info->new_level = get_imsm_raid_level(map);
1756 info->new_layout = imsm_level_to_layout(info->new_level);
1757 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
1758 } else {
1759 info->new_level = UnSet;
1760 info->new_layout = UnSet;
1761 info->new_chunk = info->array.chunk_size;
1762 }
301406c9
DW
1763 info->disk.major = 0;
1764 info->disk.minor = 0;
efb30e7f
DW
1765 if (dl) {
1766 info->disk.major = dl->major;
1767 info->disk.minor = dl->minor;
1768 }
bf5a934a 1769
b335e593
AK
1770 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
1771 info->component_size =
1772 __le32_to_cpu(map_to_analyse->blocks_per_member);
301406c9 1773 memset(info->uuid, 0, sizeof(info->uuid));
921d9e16 1774 info->recovery_start = MaxSector;
21f3b9cc
CA
1775 info->reshape_active = (prev_map != NULL) &&
1776 (map->map_state == prev_map->map_state);
81ac8b4d
AK
1777 if (info->reshape_active)
1778 info->delta_disks = map->num_members - prev_map->num_members;
1779 else
1780 info->delta_disks = 0;
bf5a934a 1781
b335e593
AK
1782 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
1783 dev->vol.dirty) {
301406c9 1784 info->resync_start = 0;
1e5c6983
DW
1785 } else if (dev->vol.migr_state) {
1786 switch (migr_type(dev)) {
1787 case MIGR_REPAIR:
1788 case MIGR_INIT: {
1789 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
1790 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
1791
1792 info->resync_start = blocks_per_unit * units;
1793 break;
1794 }
1795 case MIGR_VERIFY:
1796 /* we could emulate the checkpointing of
1797 * 'sync_action=check' migrations, but for now
1798 * we just immediately complete them
1799 */
1800 case MIGR_REBUILD:
1801 /* this is handled by container_content_imsm() */
1802 case MIGR_GEN_MIGR:
1803 case MIGR_STATE_CHANGE:
1804 /* FIXME handle other migrations */
1805 default:
1806 /* we are not dirty, so... */
1807 info->resync_start = MaxSector;
1808 }
1809 } else
b7528a20 1810 info->resync_start = MaxSector;
301406c9
DW
1811
1812 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
1813 info->name[MAX_RAID_SERIAL_LEN] = 0;
bf5a934a 1814
f35f2525
N
1815 info->array.major_version = -1;
1816 info->array.minor_version = -2;
e207da2f
AW
1817 devname = devnum2devname(st->container_dev);
1818 *info->text_version = '\0';
1819 if (devname)
1820 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
1821 free(devname);
a67dd8cc 1822 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
51006d85 1823 uuid_from_super_imsm(st, info->uuid);
a5d85af7
N
1824
1825 if (dmap) {
1826 int i, j;
1827 for (i=0; i<map_disks; i++) {
1828 dmap[i] = 0;
1829 if (i < info->array.raid_disks) {
1830 struct imsm_disk *dsk;
98130f40 1831 j = get_imsm_disk_idx(dev, i, -1);
a5d85af7
N
1832 dsk = get_imsm_disk(super, j);
1833 if (dsk && (dsk->status & CONFIGURED_DISK))
1834 dmap[i] = 1;
1835 }
1836 }
1837 }
81ac8b4d 1838}
bf5a934a 1839
97b4d0e9
DW
1840static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
1841static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
1842
1843static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
1844{
1845 struct dl *d;
1846
1847 for (d = super->missing; d; d = d->next)
1848 if (d->index == index)
1849 return &d->disk;
1850 return NULL;
1851}
1852
a5d85af7 1853static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
4f5bc454
DW
1854{
1855 struct intel_super *super = st->sb;
4f5bc454 1856 struct imsm_disk *disk;
a5d85af7 1857 int map_disks = info->array.raid_disks;
ab3cb6b3
N
1858 int max_enough = -1;
1859 int i;
1860 struct imsm_super *mpb;
4f5bc454 1861
bf5a934a 1862 if (super->current_vol >= 0) {
a5d85af7 1863 getinfo_super_imsm_volume(st, info, map);
bf5a934a
DW
1864 return;
1865 }
d23fe947
DW
1866
1867 /* Set raid_disks to zero so that Assemble will always pull in valid
1868 * spares
1869 */
1870 info->array.raid_disks = 0;
cdddbdbc
DW
1871 info->array.level = LEVEL_CONTAINER;
1872 info->array.layout = 0;
1873 info->array.md_minor = -1;
c2c087e6 1874 info->array.ctime = 0; /* N/A for imsm */
cdddbdbc
DW
1875 info->array.utime = 0;
1876 info->array.chunk_size = 0;
1877
1878 info->disk.major = 0;
1879 info->disk.minor = 0;
cdddbdbc 1880 info->disk.raid_disk = -1;
c2c087e6 1881 info->reshape_active = 0;
f35f2525
N
1882 info->array.major_version = -1;
1883 info->array.minor_version = -2;
c2c087e6 1884 strcpy(info->text_version, "imsm");
a67dd8cc 1885 info->safe_mode_delay = 0;
c2c087e6
DW
1886 info->disk.number = -1;
1887 info->disk.state = 0;
c5afc314 1888 info->name[0] = 0;
921d9e16 1889 info->recovery_start = MaxSector;
c2c087e6 1890
97b4d0e9 1891 /* do we have the all the insync disks that we expect? */
ab3cb6b3 1892 mpb = super->anchor;
97b4d0e9 1893
ab3cb6b3
N
1894 for (i = 0; i < mpb->num_raid_devs; i++) {
1895 struct imsm_dev *dev = get_imsm_dev(super, i);
1896 int failed, enough, j, missing = 0;
1897 struct imsm_map *map;
1898 __u8 state;
97b4d0e9 1899
ab3cb6b3
N
1900 failed = imsm_count_failed(super, dev);
1901 state = imsm_check_degraded(super, dev, failed);
1902 map = get_imsm_map(dev, dev->vol.migr_state);
1903
1904 /* any newly missing disks?
1905 * (catches single-degraded vs double-degraded)
1906 */
1907 for (j = 0; j < map->num_members; j++) {
98130f40 1908 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
ab3cb6b3
N
1909 __u32 idx = ord_to_idx(ord);
1910
1911 if (!(ord & IMSM_ORD_REBUILD) &&
1912 get_imsm_missing(super, idx)) {
1913 missing = 1;
1914 break;
1915 }
97b4d0e9 1916 }
ab3cb6b3
N
1917
1918 if (state == IMSM_T_STATE_FAILED)
1919 enough = -1;
1920 else if (state == IMSM_T_STATE_DEGRADED &&
1921 (state != map->map_state || missing))
1922 enough = 0;
1923 else /* we're normal, or already degraded */
1924 enough = 1;
1925
1926 /* in the missing/failed disk case check to see
1927 * if at least one array is runnable
1928 */
1929 max_enough = max(max_enough, enough);
1930 }
1931 dprintf("%s: enough: %d\n", __func__, max_enough);
1932 info->container_enough = max_enough;
97b4d0e9 1933
4a04ec6c 1934 if (super->disks) {
14e8215b
DW
1935 __u32 reserved = imsm_reserved_sectors(super, super->disks);
1936
b9f594fe 1937 disk = &super->disks->disk;
14e8215b
DW
1938 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
1939 info->component_size = reserved;
25ed7e59 1940 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
df474657
DW
1941 /* we don't change info->disk.raid_disk here because
1942 * this state will be finalized in mdmon after we have
1943 * found the 'most fresh' version of the metadata
1944 */
25ed7e59
DW
1945 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
1946 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
cdddbdbc 1947 }
a575e2a7
DW
1948
1949 /* only call uuid_from_super_imsm when this disk is part of a populated container,
1950 * ->compare_super may have updated the 'num_raid_devs' field for spares
1951 */
1952 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
36ba7d48 1953 uuid_from_super_imsm(st, info->uuid);
22e263f6
AC
1954 else
1955 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
a5d85af7
N
1956
1957 /* I don't know how to compute 'map' on imsm, so use safe default */
1958 if (map) {
1959 int i;
1960 for (i = 0; i < map_disks; i++)
1961 map[i] = 1;
1962 }
1963
cdddbdbc
DW
1964}
1965
5c4cd5da
AC
1966/* allocates memory and fills disk in mdinfo structure
1967 * for each disk in array */
1968struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
1969{
1970 struct mdinfo *mddev = NULL;
1971 struct intel_super *super = st->sb;
1972 struct imsm_disk *disk;
1973 int count = 0;
1974 struct dl *dl;
1975 if (!super || !super->disks)
1976 return NULL;
1977 dl = super->disks;
1978 mddev = malloc(sizeof(*mddev));
1979 if (!mddev) {
1980 fprintf(stderr, Name ": Failed to allocate memory.\n");
1981 return NULL;
1982 }
1983 memset(mddev, 0, sizeof(*mddev));
1984 while (dl) {
1985 struct mdinfo *tmp;
1986 disk = &dl->disk;
1987 tmp = malloc(sizeof(*tmp));
1988 if (!tmp) {
1989 fprintf(stderr, Name ": Failed to allocate memory.\n");
1990 if (mddev)
1991 sysfs_free(mddev);
1992 return NULL;
1993 }
1994 memset(tmp, 0, sizeof(*tmp));
1995 if (mddev->devs)
1996 tmp->next = mddev->devs;
1997 mddev->devs = tmp;
1998 tmp->disk.number = count++;
1999 tmp->disk.major = dl->major;
2000 tmp->disk.minor = dl->minor;
2001 tmp->disk.state = is_configured(disk) ?
2002 (1 << MD_DISK_ACTIVE) : 0;
2003 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2004 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2005 tmp->disk.raid_disk = -1;
2006 dl = dl->next;
2007 }
2008 return mddev;
2009}
2010
cdddbdbc
DW
2011static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2012 char *update, char *devname, int verbose,
2013 int uuid_set, char *homehost)
2014{
f352c545
DW
2015 /* For 'assemble' and 'force' we need to return non-zero if any
2016 * change was made. For others, the return value is ignored.
2017 * Update options are:
2018 * force-one : This device looks a bit old but needs to be included,
2019 * update age info appropriately.
2020 * assemble: clear any 'faulty' flag to allow this device to
2021 * be assembled.
2022 * force-array: Array is degraded but being forced, mark it clean
2023 * if that will be needed to assemble it.
2024 *
2025 * newdev: not used ????
2026 * grow: Array has gained a new device - this is currently for
2027 * linear only
2028 * resync: mark as dirty so a resync will happen.
2029 * name: update the name - preserving the homehost
6e46bf34 2030 * uuid: Change the uuid of the array to match watch is given
f352c545
DW
2031 *
2032 * Following are not relevant for this imsm:
2033 * sparc2.2 : update from old dodgey metadata
2034 * super-minor: change the preferred_minor number
2035 * summaries: update redundant counters.
f352c545
DW
2036 * homehost: update the recorded homehost
2037 * _reshape_progress: record new reshape_progress position.
2038 */
6e46bf34
DW
2039 int rv = 1;
2040 struct intel_super *super = st->sb;
2041 struct imsm_super *mpb;
f352c545 2042
6e46bf34
DW
2043 /* we can only update container info */
2044 if (!super || super->current_vol >= 0 || !super->anchor)
2045 return 1;
2046
2047 mpb = super->anchor;
2048
2049 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
1e2b2765 2050 rv = -1;
6e46bf34
DW
2051 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2052 mpb->orig_family_num = *((__u32 *) info->update_private);
2053 rv = 0;
2054 } else if (strcmp(update, "uuid") == 0) {
2055 __u32 *new_family = malloc(sizeof(*new_family));
2056
2057 /* update orig_family_number with the incoming random
2058 * data, report the new effective uuid, and store the
2059 * new orig_family_num for future updates.
2060 */
2061 if (new_family) {
2062 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2063 uuid_from_super_imsm(st, info->uuid);
2064 *new_family = mpb->orig_family_num;
2065 info->update_private = new_family;
2066 rv = 0;
2067 }
2068 } else if (strcmp(update, "assemble") == 0)
2069 rv = 0;
2070 else
1e2b2765 2071 rv = -1;
f352c545 2072
6e46bf34
DW
2073 /* successful update? recompute checksum */
2074 if (rv == 0)
2075 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
f352c545
DW
2076
2077 return rv;
cdddbdbc
DW
2078}
2079
c2c087e6 2080static size_t disks_to_mpb_size(int disks)
cdddbdbc 2081{
c2c087e6 2082 size_t size;
cdddbdbc 2083
c2c087e6
DW
2084 size = sizeof(struct imsm_super);
2085 size += (disks - 1) * sizeof(struct imsm_disk);
2086 size += 2 * sizeof(struct imsm_dev);
2087 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2088 size += (4 - 2) * sizeof(struct imsm_map);
2089 /* 4 possible disk_ord_tbl's */
2090 size += 4 * (disks - 1) * sizeof(__u32);
2091
2092 return size;
2093}
2094
2095static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2096{
2097 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2098 return 0;
2099
2100 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
cdddbdbc
DW
2101}
2102
ba2de7ba
DW
2103static void free_devlist(struct intel_super *super)
2104{
2105 struct intel_dev *dv;
2106
2107 while (super->devlist) {
2108 dv = super->devlist->next;
2109 free(super->devlist->dev);
2110 free(super->devlist);
2111 super->devlist = dv;
2112 }
2113}
2114
2115static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2116{
2117 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2118}
2119
cdddbdbc
DW
2120static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2121{
2122 /*
2123 * return:
2124 * 0 same, or first was empty, and second was copied
2125 * 1 second had wrong number
2126 * 2 wrong uuid
2127 * 3 wrong other info
2128 */
2129 struct intel_super *first = st->sb;
2130 struct intel_super *sec = tst->sb;
2131
2132 if (!first) {
2133 st->sb = tst->sb;
2134 tst->sb = NULL;
2135 return 0;
2136 }
2137
d23fe947
DW
2138 /* if an anchor does not have num_raid_devs set then it is a free
2139 * floating spare
2140 */
2141 if (first->anchor->num_raid_devs > 0 &&
2142 sec->anchor->num_raid_devs > 0) {
a2b97981
DW
2143 /* Determine if these disks might ever have been
2144 * related. Further disambiguation can only take place
2145 * in load_super_imsm_all
2146 */
2147 __u32 first_family = first->anchor->orig_family_num;
2148 __u32 sec_family = sec->anchor->orig_family_num;
2149
f796af5d
DW
2150 if (memcmp(first->anchor->sig, sec->anchor->sig,
2151 MAX_SIGNATURE_LENGTH) != 0)
2152 return 3;
2153
a2b97981
DW
2154 if (first_family == 0)
2155 first_family = first->anchor->family_num;
2156 if (sec_family == 0)
2157 sec_family = sec->anchor->family_num;
2158
2159 if (first_family != sec_family)
d23fe947 2160 return 3;
f796af5d 2161
d23fe947 2162 }
cdddbdbc 2163
f796af5d 2164
3e372e5a
DW
2165 /* if 'first' is a spare promote it to a populated mpb with sec's
2166 * family number
2167 */
2168 if (first->anchor->num_raid_devs == 0 &&
2169 sec->anchor->num_raid_devs > 0) {
78d30f94 2170 int i;
ba2de7ba
DW
2171 struct intel_dev *dv;
2172 struct imsm_dev *dev;
78d30f94
DW
2173
2174 /* we need to copy raid device info from sec if an allocation
2175 * fails here we don't associate the spare
2176 */
2177 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
ba2de7ba
DW
2178 dv = malloc(sizeof(*dv));
2179 if (!dv)
2180 break;
2181 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2182 if (!dev) {
2183 free(dv);
2184 break;
78d30f94 2185 }
ba2de7ba
DW
2186 dv->dev = dev;
2187 dv->index = i;
2188 dv->next = first->devlist;
2189 first->devlist = dv;
78d30f94 2190 }
709743c5 2191 if (i < sec->anchor->num_raid_devs) {
ba2de7ba
DW
2192 /* allocation failure */
2193 free_devlist(first);
2194 fprintf(stderr, "imsm: failed to associate spare\n");
2195 return 3;
78d30f94 2196 }
3e372e5a 2197 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
148acb7b 2198 first->anchor->orig_family_num = sec->anchor->orig_family_num;
3e372e5a 2199 first->anchor->family_num = sec->anchor->family_num;
ac6449be 2200 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
709743c5
DW
2201 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2202 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
3e372e5a
DW
2203 }
2204
cdddbdbc
DW
2205 return 0;
2206}
2207
0030e8d6
DW
2208static void fd2devname(int fd, char *name)
2209{
2210 struct stat st;
2211 char path[256];
33a6535d 2212 char dname[PATH_MAX];
0030e8d6
DW
2213 char *nm;
2214 int rv;
2215
2216 name[0] = '\0';
2217 if (fstat(fd, &st) != 0)
2218 return;
2219 sprintf(path, "/sys/dev/block/%d:%d",
2220 major(st.st_rdev), minor(st.st_rdev));
2221
2222 rv = readlink(path, dname, sizeof(dname));
2223 if (rv <= 0)
2224 return;
2225
2226 dname[rv] = '\0';
2227 nm = strrchr(dname, '/');
2228 nm++;
2229 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2230}
2231
cdddbdbc
DW
2232extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2233
2234static int imsm_read_serial(int fd, char *devname,
2235 __u8 serial[MAX_RAID_SERIAL_LEN])
2236{
2237 unsigned char scsi_serial[255];
cdddbdbc
DW
2238 int rv;
2239 int rsp_len;
1f24f035 2240 int len;
316e2bf4
DW
2241 char *dest;
2242 char *src;
2243 char *rsp_buf;
2244 int i;
cdddbdbc
DW
2245
2246 memset(scsi_serial, 0, sizeof(scsi_serial));
cdddbdbc 2247
f9ba0ff1
DW
2248 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2249
40ebbb9c 2250 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
f9ba0ff1
DW
2251 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2252 fd2devname(fd, (char *) serial);
0030e8d6
DW
2253 return 0;
2254 }
2255
cdddbdbc
DW
2256 if (rv != 0) {
2257 if (devname)
2258 fprintf(stderr,
2259 Name ": Failed to retrieve serial for %s\n",
2260 devname);
2261 return rv;
2262 }
2263
2264 rsp_len = scsi_serial[3];
03cd4cc8
DW
2265 if (!rsp_len) {
2266 if (devname)
2267 fprintf(stderr,
2268 Name ": Failed to retrieve serial for %s\n",
2269 devname);
2270 return 2;
2271 }
1f24f035 2272 rsp_buf = (char *) &scsi_serial[4];
5c3db629 2273
316e2bf4
DW
2274 /* trim all whitespace and non-printable characters and convert
2275 * ':' to ';'
2276 */
2277 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2278 src = &rsp_buf[i];
2279 if (*src > 0x20) {
2280 /* ':' is reserved for use in placeholder serial
2281 * numbers for missing disks
2282 */
2283 if (*src == ':')
2284 *dest++ = ';';
2285 else
2286 *dest++ = *src;
2287 }
2288 }
2289 len = dest - rsp_buf;
2290 dest = rsp_buf;
2291
2292 /* truncate leading characters */
2293 if (len > MAX_RAID_SERIAL_LEN) {
2294 dest += len - MAX_RAID_SERIAL_LEN;
1f24f035 2295 len = MAX_RAID_SERIAL_LEN;
316e2bf4 2296 }
5c3db629 2297
5c3db629 2298 memset(serial, 0, MAX_RAID_SERIAL_LEN);
316e2bf4 2299 memcpy(serial, dest, len);
cdddbdbc
DW
2300
2301 return 0;
2302}
2303
1f24f035
DW
2304static int serialcmp(__u8 *s1, __u8 *s2)
2305{
2306 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2307}
2308
2309static void serialcpy(__u8 *dest, __u8 *src)
2310{
2311 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2312}
2313
1799c9e8 2314#ifndef MDASSEMBLE
54c2c1ea
DW
2315static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2316{
2317 struct dl *dl;
2318
2319 for (dl = super->disks; dl; dl = dl->next)
2320 if (serialcmp(dl->serial, serial) == 0)
2321 break;
2322
2323 return dl;
2324}
1799c9e8 2325#endif
54c2c1ea 2326
a2b97981
DW
2327static struct imsm_disk *
2328__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2329{
2330 int i;
2331
2332 for (i = 0; i < mpb->num_disks; i++) {
2333 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2334
2335 if (serialcmp(disk->serial, serial) == 0) {
2336 if (idx)
2337 *idx = i;
2338 return disk;
2339 }
2340 }
2341
2342 return NULL;
2343}
2344
cdddbdbc
DW
2345static int
2346load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2347{
a2b97981 2348 struct imsm_disk *disk;
cdddbdbc
DW
2349 struct dl *dl;
2350 struct stat stb;
cdddbdbc 2351 int rv;
a2b97981 2352 char name[40];
d23fe947
DW
2353 __u8 serial[MAX_RAID_SERIAL_LEN];
2354
2355 rv = imsm_read_serial(fd, devname, serial);
2356
2357 if (rv != 0)
2358 return 2;
2359
a2b97981 2360 dl = calloc(1, sizeof(*dl));
b9f594fe 2361 if (!dl) {
cdddbdbc
DW
2362 if (devname)
2363 fprintf(stderr,
2364 Name ": failed to allocate disk buffer for %s\n",
2365 devname);
2366 return 2;
2367 }
cdddbdbc 2368
a2b97981
DW
2369 fstat(fd, &stb);
2370 dl->major = major(stb.st_rdev);
2371 dl->minor = minor(stb.st_rdev);
2372 dl->next = super->disks;
2373 dl->fd = keep_fd ? fd : -1;
2374 assert(super->disks == NULL);
2375 super->disks = dl;
2376 serialcpy(dl->serial, serial);
2377 dl->index = -2;
2378 dl->e = NULL;
2379 fd2devname(fd, name);
2380 if (devname)
2381 dl->devname = strdup(devname);
2382 else
2383 dl->devname = strdup(name);
cdddbdbc 2384
d23fe947 2385 /* look up this disk's index in the current anchor */
a2b97981
DW
2386 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2387 if (disk) {
2388 dl->disk = *disk;
2389 /* only set index on disks that are a member of a
2390 * populated contianer, i.e. one with raid_devs
2391 */
2392 if (is_failed(&dl->disk))
3f6efecc 2393 dl->index = -2;
a2b97981
DW
2394 else if (is_spare(&dl->disk))
2395 dl->index = -1;
3f6efecc
DW
2396 }
2397
949c47a0
DW
2398 return 0;
2399}
2400
0e600426 2401#ifndef MDASSEMBLE
0c046afd
DW
2402/* When migrating map0 contains the 'destination' state while map1
2403 * contains the current state. When not migrating map0 contains the
2404 * current state. This routine assumes that map[0].map_state is set to
2405 * the current array state before being called.
2406 *
2407 * Migration is indicated by one of the following states
2408 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
e3bba0e0 2409 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
0c046afd 2410 * map1state=unitialized)
1484e727 2411 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
0c046afd 2412 * map1state=normal)
e3bba0e0 2413 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
0c046afd
DW
2414 * map1state=degraded)
2415 */
0556e1a2 2416static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type)
3393c6af 2417{
0c046afd 2418 struct imsm_map *dest;
3393c6af
DW
2419 struct imsm_map *src = get_imsm_map(dev, 0);
2420
0c046afd 2421 dev->vol.migr_state = 1;
1484e727 2422 set_migr_type(dev, migr_type);
f8f603f1 2423 dev->vol.curr_migr_unit = 0;
0c046afd
DW
2424 dest = get_imsm_map(dev, 1);
2425
0556e1a2 2426 /* duplicate and then set the target end state in map[0] */
3393c6af 2427 memcpy(dest, src, sizeof_imsm_map(src));
28bce06f
AK
2428 if ((migr_type == MIGR_REBUILD) ||
2429 (migr_type == MIGR_GEN_MIGR)) {
0556e1a2
DW
2430 __u32 ord;
2431 int i;
2432
2433 for (i = 0; i < src->num_members; i++) {
2434 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2435 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2436 }
2437 }
2438
0c046afd 2439 src->map_state = to_state;
949c47a0 2440}
f8f603f1
DW
2441
2442static void end_migration(struct imsm_dev *dev, __u8 map_state)
2443{
2444 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2 2445 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
28bce06f 2446 int i, j;
0556e1a2
DW
2447
2448 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2449 * completed in the last migration.
2450 *
28bce06f 2451 * FIXME add support for raid-level-migration
0556e1a2
DW
2452 */
2453 for (i = 0; i < prev->num_members; i++)
28bce06f
AK
2454 for (j = 0; j < map->num_members; j++)
2455 /* during online capacity expansion
2456 * disks position can be changed if takeover is used
2457 */
2458 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2459 ord_to_idx(prev->disk_ord_tbl[i])) {
2460 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2461 break;
2462 }
f8f603f1
DW
2463
2464 dev->vol.migr_state = 0;
28bce06f 2465 dev->vol.migr_type = 0;
f8f603f1
DW
2466 dev->vol.curr_migr_unit = 0;
2467 map->map_state = map_state;
2468}
0e600426 2469#endif
949c47a0
DW
2470
2471static int parse_raid_devices(struct intel_super *super)
2472{
2473 int i;
2474 struct imsm_dev *dev_new;
4d7b1503 2475 size_t len, len_migr;
401d313b 2476 size_t max_len = 0;
4d7b1503
DW
2477 size_t space_needed = 0;
2478 struct imsm_super *mpb = super->anchor;
949c47a0
DW
2479
2480 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2481 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
ba2de7ba 2482 struct intel_dev *dv;
949c47a0 2483
4d7b1503
DW
2484 len = sizeof_imsm_dev(dev_iter, 0);
2485 len_migr = sizeof_imsm_dev(dev_iter, 1);
2486 if (len_migr > len)
2487 space_needed += len_migr - len;
2488
ba2de7ba
DW
2489 dv = malloc(sizeof(*dv));
2490 if (!dv)
2491 return 1;
401d313b
AK
2492 if (max_len < len_migr)
2493 max_len = len_migr;
2494 if (max_len > len_migr)
2495 space_needed += max_len - len_migr;
2496 dev_new = malloc(max_len);
ba2de7ba
DW
2497 if (!dev_new) {
2498 free(dv);
949c47a0 2499 return 1;
ba2de7ba 2500 }
949c47a0 2501 imsm_copy_dev(dev_new, dev_iter);
ba2de7ba
DW
2502 dv->dev = dev_new;
2503 dv->index = i;
2504 dv->next = super->devlist;
2505 super->devlist = dv;
949c47a0 2506 }
cdddbdbc 2507
4d7b1503
DW
2508 /* ensure that super->buf is large enough when all raid devices
2509 * are migrating
2510 */
2511 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2512 void *buf;
2513
2514 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2515 if (posix_memalign(&buf, 512, len) != 0)
2516 return 1;
2517
1f45a8ad
DW
2518 memcpy(buf, super->buf, super->len);
2519 memset(buf + super->len, 0, len - super->len);
4d7b1503
DW
2520 free(super->buf);
2521 super->buf = buf;
2522 super->len = len;
2523 }
2524
cdddbdbc
DW
2525 return 0;
2526}
2527
604b746f
JD
2528/* retrieve a pointer to the bbm log which starts after all raid devices */
2529struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2530{
2531 void *ptr = NULL;
2532
2533 if (__le32_to_cpu(mpb->bbm_log_size)) {
2534 ptr = mpb;
2535 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2536 }
2537
2538 return ptr;
2539}
2540
d23fe947 2541static void __free_imsm(struct intel_super *super, int free_disks);
9ca2c81c 2542
cdddbdbc
DW
2543/* load_imsm_mpb - read matrix metadata
2544 * allocates super->mpb to be freed by free_super
2545 */
2546static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
2547{
2548 unsigned long long dsize;
cdddbdbc
DW
2549 unsigned long long sectors;
2550 struct stat;
6416d527 2551 struct imsm_super *anchor;
cdddbdbc
DW
2552 __u32 check_sum;
2553
cdddbdbc 2554 get_dev_size(fd, NULL, &dsize);
64436f06
N
2555 if (dsize < 1024) {
2556 if (devname)
2557 fprintf(stderr,
2558 Name ": %s: device to small for imsm\n",
2559 devname);
2560 return 1;
2561 }
cdddbdbc
DW
2562
2563 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
2564 if (devname)
2565 fprintf(stderr,
2566 Name ": Cannot seek to anchor block on %s: %s\n",
2567 devname, strerror(errno));
2568 return 1;
2569 }
2570
949c47a0 2571 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
ad97895e
DW
2572 if (devname)
2573 fprintf(stderr,
2574 Name ": Failed to allocate imsm anchor buffer"
2575 " on %s\n", devname);
2576 return 1;
2577 }
949c47a0 2578 if (read(fd, anchor, 512) != 512) {
cdddbdbc
DW
2579 if (devname)
2580 fprintf(stderr,
2581 Name ": Cannot read anchor block on %s: %s\n",
2582 devname, strerror(errno));
6416d527 2583 free(anchor);
cdddbdbc
DW
2584 return 1;
2585 }
2586
6416d527 2587 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
cdddbdbc
DW
2588 if (devname)
2589 fprintf(stderr,
2590 Name ": no IMSM anchor on %s\n", devname);
6416d527 2591 free(anchor);
cdddbdbc
DW
2592 return 2;
2593 }
2594
d23fe947 2595 __free_imsm(super, 0);
949c47a0
DW
2596 super->len = ROUND_UP(anchor->mpb_size, 512);
2597 if (posix_memalign(&super->buf, 512, super->len) != 0) {
cdddbdbc
DW
2598 if (devname)
2599 fprintf(stderr,
2600 Name ": unable to allocate %zu byte mpb buffer\n",
949c47a0 2601 super->len);
6416d527 2602 free(anchor);
cdddbdbc
DW
2603 return 2;
2604 }
949c47a0 2605 memcpy(super->buf, anchor, 512);
cdddbdbc 2606
6416d527
NB
2607 sectors = mpb_sectors(anchor) - 1;
2608 free(anchor);
949c47a0 2609 if (!sectors) {
ecf45690
DW
2610 check_sum = __gen_imsm_checksum(super->anchor);
2611 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
2612 if (devname)
2613 fprintf(stderr,
2614 Name ": IMSM checksum %x != %x on %s\n",
2615 check_sum,
2616 __le32_to_cpu(super->anchor->check_sum),
2617 devname);
2618 return 2;
2619 }
2620
a2b97981 2621 return 0;
949c47a0 2622 }
cdddbdbc
DW
2623
2624 /* read the extended mpb */
2625 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
2626 if (devname)
2627 fprintf(stderr,
2628 Name ": Cannot seek to extended mpb on %s: %s\n",
2629 devname, strerror(errno));
2630 return 1;
2631 }
2632
f21e18ca 2633 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
cdddbdbc
DW
2634 if (devname)
2635 fprintf(stderr,
2636 Name ": Cannot read extended mpb on %s: %s\n",
2637 devname, strerror(errno));
2638 return 2;
2639 }
2640
949c47a0
DW
2641 check_sum = __gen_imsm_checksum(super->anchor);
2642 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
cdddbdbc
DW
2643 if (devname)
2644 fprintf(stderr,
2645 Name ": IMSM checksum %x != %x on %s\n",
949c47a0 2646 check_sum, __le32_to_cpu(super->anchor->check_sum),
cdddbdbc 2647 devname);
db575f3b 2648 return 3;
cdddbdbc
DW
2649 }
2650
604b746f
JD
2651 /* FIXME the BBM log is disk specific so we cannot use this global
2652 * buffer for all disks. Ok for now since we only look at the global
2653 * bbm_log_size parameter to gate assembly
2654 */
2655 super->bbm_log = __get_imsm_bbm_log(super->anchor);
2656
a2b97981
DW
2657 return 0;
2658}
2659
2660static int
2661load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
2662{
2663 int err;
2664
2665 err = load_imsm_mpb(fd, super, devname);
2666 if (err)
2667 return err;
2668 err = load_imsm_disk(fd, super, devname, keep_fd);
2669 if (err)
2670 return err;
2671 err = parse_raid_devices(super);
4d7b1503 2672
a2b97981 2673 return err;
cdddbdbc
DW
2674}
2675
ae6aad82
DW
2676static void __free_imsm_disk(struct dl *d)
2677{
2678 if (d->fd >= 0)
2679 close(d->fd);
2680 if (d->devname)
2681 free(d->devname);
0dcecb2e
DW
2682 if (d->e)
2683 free(d->e);
ae6aad82
DW
2684 free(d);
2685
2686}
1a64be56 2687
cdddbdbc
DW
2688static void free_imsm_disks(struct intel_super *super)
2689{
47ee5a45 2690 struct dl *d;
cdddbdbc 2691
47ee5a45
DW
2692 while (super->disks) {
2693 d = super->disks;
cdddbdbc 2694 super->disks = d->next;
ae6aad82 2695 __free_imsm_disk(d);
cdddbdbc 2696 }
cb82edca
AK
2697 while (super->disk_mgmt_list) {
2698 d = super->disk_mgmt_list;
2699 super->disk_mgmt_list = d->next;
2700 __free_imsm_disk(d);
2701 }
47ee5a45
DW
2702 while (super->missing) {
2703 d = super->missing;
2704 super->missing = d->next;
2705 __free_imsm_disk(d);
2706 }
2707
cdddbdbc
DW
2708}
2709
9ca2c81c 2710/* free all the pieces hanging off of a super pointer */
d23fe947 2711static void __free_imsm(struct intel_super *super, int free_disks)
cdddbdbc 2712{
88654014
LM
2713 struct intel_hba *elem, *next;
2714
9ca2c81c 2715 if (super->buf) {
949c47a0 2716 free(super->buf);
9ca2c81c
DW
2717 super->buf = NULL;
2718 }
d23fe947
DW
2719 if (free_disks)
2720 free_imsm_disks(super);
ba2de7ba 2721 free_devlist(super);
88654014
LM
2722 elem = super->hba;
2723 while (elem) {
2724 if (elem->path)
2725 free((void *)elem->path);
2726 next = elem->next;
2727 free(elem);
2728 elem = next;
88c32bb1 2729 }
88654014 2730 super->hba = NULL;
cdddbdbc
DW
2731}
2732
9ca2c81c
DW
2733static void free_imsm(struct intel_super *super)
2734{
d23fe947 2735 __free_imsm(super, 1);
9ca2c81c
DW
2736 free(super);
2737}
cdddbdbc
DW
2738
2739static void free_super_imsm(struct supertype *st)
2740{
2741 struct intel_super *super = st->sb;
2742
2743 if (!super)
2744 return;
2745
2746 free_imsm(super);
2747 st->sb = NULL;
2748}
2749
49133e57 2750static struct intel_super *alloc_super(void)
c2c087e6
DW
2751{
2752 struct intel_super *super = malloc(sizeof(*super));
2753
2754 if (super) {
2755 memset(super, 0, sizeof(*super));
bf5a934a 2756 super->current_vol = -1;
0dcecb2e 2757 super->create_offset = ~((__u32 ) 0);
88c32bb1
DW
2758 if (!check_env("IMSM_NO_PLATFORM"))
2759 super->orom = find_imsm_orom();
c2c087e6
DW
2760 }
2761
2762 return super;
2763}
2764
cdddbdbc 2765#ifndef MDASSEMBLE
47ee5a45
DW
2766/* find_missing - helper routine for load_super_imsm_all that identifies
2767 * disks that have disappeared from the system. This routine relies on
2768 * the mpb being uptodate, which it is at load time.
2769 */
2770static int find_missing(struct intel_super *super)
2771{
2772 int i;
2773 struct imsm_super *mpb = super->anchor;
2774 struct dl *dl;
2775 struct imsm_disk *disk;
47ee5a45
DW
2776
2777 for (i = 0; i < mpb->num_disks; i++) {
2778 disk = __get_imsm_disk(mpb, i);
54c2c1ea 2779 dl = serial_to_dl(disk->serial, super);
47ee5a45
DW
2780 if (dl)
2781 continue;
47ee5a45
DW
2782
2783 dl = malloc(sizeof(*dl));
2784 if (!dl)
2785 return 1;
2786 dl->major = 0;
2787 dl->minor = 0;
2788 dl->fd = -1;
2789 dl->devname = strdup("missing");
2790 dl->index = i;
2791 serialcpy(dl->serial, disk->serial);
2792 dl->disk = *disk;
689c9bf3 2793 dl->e = NULL;
47ee5a45
DW
2794 dl->next = super->missing;
2795 super->missing = dl;
2796 }
2797
2798 return 0;
2799}
2800
a2b97981
DW
2801static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
2802{
2803 struct intel_disk *idisk = disk_list;
2804
2805 while (idisk) {
2806 if (serialcmp(idisk->disk.serial, serial) == 0)
2807 break;
2808 idisk = idisk->next;
2809 }
2810
2811 return idisk;
2812}
2813
2814static int __prep_thunderdome(struct intel_super **table, int tbl_size,
2815 struct intel_super *super,
2816 struct intel_disk **disk_list)
2817{
2818 struct imsm_disk *d = &super->disks->disk;
2819 struct imsm_super *mpb = super->anchor;
2820 int i, j;
2821
2822 for (i = 0; i < tbl_size; i++) {
2823 struct imsm_super *tbl_mpb = table[i]->anchor;
2824 struct imsm_disk *tbl_d = &table[i]->disks->disk;
2825
2826 if (tbl_mpb->family_num == mpb->family_num) {
2827 if (tbl_mpb->check_sum == mpb->check_sum) {
2828 dprintf("%s: mpb from %d:%d matches %d:%d\n",
2829 __func__, super->disks->major,
2830 super->disks->minor,
2831 table[i]->disks->major,
2832 table[i]->disks->minor);
2833 break;
2834 }
2835
2836 if (((is_configured(d) && !is_configured(tbl_d)) ||
2837 is_configured(d) == is_configured(tbl_d)) &&
2838 tbl_mpb->generation_num < mpb->generation_num) {
2839 /* current version of the mpb is a
2840 * better candidate than the one in
2841 * super_table, but copy over "cross
2842 * generational" status
2843 */
2844 struct intel_disk *idisk;
2845
2846 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
2847 __func__, super->disks->major,
2848 super->disks->minor,
2849 table[i]->disks->major,
2850 table[i]->disks->minor);
2851
2852 idisk = disk_list_get(tbl_d->serial, *disk_list);
2853 if (idisk && is_failed(&idisk->disk))
2854 tbl_d->status |= FAILED_DISK;
2855 break;
2856 } else {
2857 struct intel_disk *idisk;
2858 struct imsm_disk *disk;
2859
2860 /* tbl_mpb is more up to date, but copy
2861 * over cross generational status before
2862 * returning
2863 */
2864 disk = __serial_to_disk(d->serial, mpb, NULL);
2865 if (disk && is_failed(disk))
2866 d->status |= FAILED_DISK;
2867
2868 idisk = disk_list_get(d->serial, *disk_list);
2869 if (idisk) {
2870 idisk->owner = i;
2871 if (disk && is_configured(disk))
2872 idisk->disk.status |= CONFIGURED_DISK;
2873 }
2874
2875 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
2876 __func__, super->disks->major,
2877 super->disks->minor,
2878 table[i]->disks->major,
2879 table[i]->disks->minor);
2880
2881 return tbl_size;
2882 }
2883 }
2884 }
2885
2886 if (i >= tbl_size)
2887 table[tbl_size++] = super;
2888 else
2889 table[i] = super;
2890
2891 /* update/extend the merged list of imsm_disk records */
2892 for (j = 0; j < mpb->num_disks; j++) {
2893 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
2894 struct intel_disk *idisk;
2895
2896 idisk = disk_list_get(disk->serial, *disk_list);
2897 if (idisk) {
2898 idisk->disk.status |= disk->status;
2899 if (is_configured(&idisk->disk) ||
2900 is_failed(&idisk->disk))
2901 idisk->disk.status &= ~(SPARE_DISK);
2902 } else {
2903 idisk = calloc(1, sizeof(*idisk));
2904 if (!idisk)
2905 return -1;
2906 idisk->owner = IMSM_UNKNOWN_OWNER;
2907 idisk->disk = *disk;
2908 idisk->next = *disk_list;
2909 *disk_list = idisk;
2910 }
2911
2912 if (serialcmp(idisk->disk.serial, d->serial) == 0)
2913 idisk->owner = i;
2914 }
2915
2916 return tbl_size;
2917}
2918
2919static struct intel_super *
2920validate_members(struct intel_super *super, struct intel_disk *disk_list,
2921 const int owner)
2922{
2923 struct imsm_super *mpb = super->anchor;
2924 int ok_count = 0;
2925 int i;
2926
2927 for (i = 0; i < mpb->num_disks; i++) {
2928 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2929 struct intel_disk *idisk;
2930
2931 idisk = disk_list_get(disk->serial, disk_list);
2932 if (idisk) {
2933 if (idisk->owner == owner ||
2934 idisk->owner == IMSM_UNKNOWN_OWNER)
2935 ok_count++;
2936 else
2937 dprintf("%s: '%.16s' owner %d != %d\n",
2938 __func__, disk->serial, idisk->owner,
2939 owner);
2940 } else {
2941 dprintf("%s: unknown disk %x [%d]: %.16s\n",
2942 __func__, __le32_to_cpu(mpb->family_num), i,
2943 disk->serial);
2944 break;
2945 }
2946 }
2947
2948 if (ok_count == mpb->num_disks)
2949 return super;
2950 return NULL;
2951}
2952
2953static void show_conflicts(__u32 family_num, struct intel_super *super_list)
2954{
2955 struct intel_super *s;
2956
2957 for (s = super_list; s; s = s->next) {
2958 if (family_num != s->anchor->family_num)
2959 continue;
2960 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
2961 __le32_to_cpu(family_num), s->disks->devname);
2962 }
2963}
2964
2965static struct intel_super *
2966imsm_thunderdome(struct intel_super **super_list, int len)
2967{
2968 struct intel_super *super_table[len];
2969 struct intel_disk *disk_list = NULL;
2970 struct intel_super *champion, *spare;
2971 struct intel_super *s, **del;
2972 int tbl_size = 0;
2973 int conflict;
2974 int i;
2975
2976 memset(super_table, 0, sizeof(super_table));
2977 for (s = *super_list; s; s = s->next)
2978 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
2979
2980 for (i = 0; i < tbl_size; i++) {
2981 struct imsm_disk *d;
2982 struct intel_disk *idisk;
2983 struct imsm_super *mpb = super_table[i]->anchor;
2984
2985 s = super_table[i];
2986 d = &s->disks->disk;
2987
2988 /* 'd' must appear in merged disk list for its
2989 * configuration to be valid
2990 */
2991 idisk = disk_list_get(d->serial, disk_list);
2992 if (idisk && idisk->owner == i)
2993 s = validate_members(s, disk_list, i);
2994 else
2995 s = NULL;
2996
2997 if (!s)
2998 dprintf("%s: marking family: %#x from %d:%d offline\n",
2999 __func__, mpb->family_num,
3000 super_table[i]->disks->major,
3001 super_table[i]->disks->minor);
3002 super_table[i] = s;
3003 }
3004
3005 /* This is where the mdadm implementation differs from the Windows
3006 * driver which has no strict concept of a container. We can only
3007 * assemble one family from a container, so when returning a prodigal
3008 * array member to this system the code will not be able to disambiguate
3009 * the container contents that should be assembled ("foreign" versus
3010 * "local"). It requires user intervention to set the orig_family_num
3011 * to a new value to establish a new container. The Windows driver in
3012 * this situation fixes up the volume name in place and manages the
3013 * foreign array as an independent entity.
3014 */
3015 s = NULL;
3016 spare = NULL;
3017 conflict = 0;
3018 for (i = 0; i < tbl_size; i++) {
3019 struct intel_super *tbl_ent = super_table[i];
3020 int is_spare = 0;
3021
3022 if (!tbl_ent)
3023 continue;
3024
3025 if (tbl_ent->anchor->num_raid_devs == 0) {
3026 spare = tbl_ent;
3027 is_spare = 1;
3028 }
3029
3030 if (s && !is_spare) {
3031 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3032 conflict++;
3033 } else if (!s && !is_spare)
3034 s = tbl_ent;
3035 }
3036
3037 if (!s)
3038 s = spare;
3039 if (!s) {
3040 champion = NULL;
3041 goto out;
3042 }
3043 champion = s;
3044
3045 if (conflict)
3046 fprintf(stderr, "Chose family %#x on '%s', "
3047 "assemble conflicts to new container with '--update=uuid'\n",
3048 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3049
3050 /* collect all dl's onto 'champion', and update them to
3051 * champion's version of the status
3052 */
3053 for (s = *super_list; s; s = s->next) {
3054 struct imsm_super *mpb = champion->anchor;
3055 struct dl *dl = s->disks;
3056
3057 if (s == champion)
3058 continue;
3059
3060 for (i = 0; i < mpb->num_disks; i++) {
3061 struct imsm_disk *disk;
3062
3063 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3064 if (disk) {
3065 dl->disk = *disk;
3066 /* only set index on disks that are a member of
3067 * a populated contianer, i.e. one with
3068 * raid_devs
3069 */
3070 if (is_failed(&dl->disk))
3071 dl->index = -2;
3072 else if (is_spare(&dl->disk))
3073 dl->index = -1;
3074 break;
3075 }
3076 }
3077
3078 if (i >= mpb->num_disks) {
3079 struct intel_disk *idisk;
3080
3081 idisk = disk_list_get(dl->serial, disk_list);
ecf408e9 3082 if (idisk && is_spare(&idisk->disk) &&
a2b97981
DW
3083 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3084 dl->index = -1;
3085 else {
3086 dl->index = -2;
3087 continue;
3088 }
3089 }
3090
3091 dl->next = champion->disks;
3092 champion->disks = dl;
3093 s->disks = NULL;
3094 }
3095
3096 /* delete 'champion' from super_list */
3097 for (del = super_list; *del; ) {
3098 if (*del == champion) {
3099 *del = (*del)->next;
3100 break;
3101 } else
3102 del = &(*del)->next;
3103 }
3104 champion->next = NULL;
3105
3106 out:
3107 while (disk_list) {
3108 struct intel_disk *idisk = disk_list;
3109
3110 disk_list = disk_list->next;
3111 free(idisk);
3112 }
3113
3114 return champion;
3115}
3116
cdddbdbc 3117static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
e1902a7b 3118 char *devname)
cdddbdbc
DW
3119{
3120 struct mdinfo *sra;
a2b97981
DW
3121 struct intel_super *super_list = NULL;
3122 struct intel_super *super = NULL;
db575f3b 3123 int devnum = fd2devnum(fd);
a2b97981 3124 struct mdinfo *sd;
db575f3b 3125 int retry;
a2b97981
DW
3126 int err = 0;
3127 int i;
dab4a513
DW
3128
3129 /* check if 'fd' an opened container */
b526e52d 3130 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
cdddbdbc
DW
3131 if (!sra)
3132 return 1;
3133
3134 if (sra->array.major_version != -1 ||
3135 sra->array.minor_version != -2 ||
1602d52c
AW
3136 strcmp(sra->text_version, "imsm") != 0) {
3137 err = 1;
3138 goto error;
3139 }
a2b97981
DW
3140 /* load all mpbs */
3141 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
49133e57 3142 struct intel_super *s = alloc_super();
7a6ecd55 3143 char nm[32];
a2b97981
DW
3144 int dfd;
3145
3146 err = 1;
3147 if (!s)
3148 goto error;
3149 s->next = super_list;
3150 super_list = s;
cdddbdbc 3151
a2b97981 3152 err = 2;
cdddbdbc 3153 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3154 dfd = dev_open(nm, O_RDWR);
a2b97981
DW
3155 if (dfd < 0)
3156 goto error;
3157
e1902a7b 3158 err = load_and_parse_mpb(dfd, s, NULL, 1);
db575f3b
DW
3159
3160 /* retry the load if we might have raced against mdmon */
a2b97981 3161 if (err == 3 && mdmon_running(devnum))
db575f3b
DW
3162 for (retry = 0; retry < 3; retry++) {
3163 usleep(3000);
e1902a7b 3164 err = load_and_parse_mpb(dfd, s, NULL, 1);
a2b97981 3165 if (err != 3)
db575f3b
DW
3166 break;
3167 }
a2b97981
DW
3168 if (err)
3169 goto error;
cdddbdbc
DW
3170 }
3171
a2b97981
DW
3172 /* all mpbs enter, maybe one leaves */
3173 super = imsm_thunderdome(&super_list, i);
3174 if (!super) {
3175 err = 1;
3176 goto error;
cdddbdbc
DW
3177 }
3178
47ee5a45
DW
3179 if (find_missing(super) != 0) {
3180 free_imsm(super);
a2b97981
DW
3181 err = 2;
3182 goto error;
47ee5a45 3183 }
a2b97981
DW
3184 err = 0;
3185
3186 error:
3187 while (super_list) {
3188 struct intel_super *s = super_list;
3189
3190 super_list = super_list->next;
3191 free_imsm(s);
3192 }
1602d52c 3193 sysfs_free(sra);
a2b97981
DW
3194
3195 if (err)
3196 return err;
f7e7067b 3197
cdddbdbc 3198 *sbp = super;
db575f3b 3199 st->container_dev = devnum;
a2b97981 3200 if (err == 0 && st->ss == NULL) {
bf5a934a 3201 st->ss = &super_imsm;
cdddbdbc
DW
3202 st->minor_version = 0;
3203 st->max_devs = IMSM_MAX_DEVICES;
3204 }
cdddbdbc
DW
3205 return 0;
3206}
2b959fbf
N
3207
3208static int load_container_imsm(struct supertype *st, int fd, char *devname)
3209{
3210 return load_super_imsm_all(st, fd, &st->sb, devname);
3211}
cdddbdbc
DW
3212#endif
3213
3214static int load_super_imsm(struct supertype *st, int fd, char *devname)
3215{
3216 struct intel_super *super;
3217 int rv;
3218
691c6ee1
N
3219 if (test_partition(fd))
3220 /* IMSM not allowed on partitions */
3221 return 1;
3222
37424f13
DW
3223 free_super_imsm(st);
3224
49133e57 3225 super = alloc_super();
cdddbdbc
DW
3226 if (!super) {
3227 fprintf(stderr,
3228 Name ": malloc of %zu failed.\n",
3229 sizeof(*super));
3230 return 1;
3231 }
3232
a2b97981 3233 rv = load_and_parse_mpb(fd, super, devname, 0);
cdddbdbc
DW
3234
3235 if (rv) {
3236 if (devname)
3237 fprintf(stderr,
3238 Name ": Failed to load all information "
3239 "sections on %s\n", devname);
3240 free_imsm(super);
3241 return rv;
3242 }
3243
3244 st->sb = super;
3245 if (st->ss == NULL) {
3246 st->ss = &super_imsm;
3247 st->minor_version = 0;
3248 st->max_devs = IMSM_MAX_DEVICES;
3249 }
cdddbdbc
DW
3250 return 0;
3251}
3252
ef6ffade
DW
3253static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3254{
3255 if (info->level == 1)
3256 return 128;
3257 return info->chunk_size >> 9;
3258}
3259
ff596308 3260static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
ef6ffade
DW
3261{
3262 __u32 num_stripes;
3263
3264 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
ff596308 3265 num_stripes /= num_domains;
ef6ffade
DW
3266
3267 return num_stripes;
3268}
3269
fcfd9599
DW
3270static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3271{
4025c288
DW
3272 if (info->level == 1)
3273 return info->size * 2;
3274 else
3275 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
fcfd9599
DW
3276}
3277
4d1313e9
DW
3278static void imsm_update_version_info(struct intel_super *super)
3279{
3280 /* update the version and attributes */
3281 struct imsm_super *mpb = super->anchor;
3282 char *version;
3283 struct imsm_dev *dev;
3284 struct imsm_map *map;
3285 int i;
3286
3287 for (i = 0; i < mpb->num_raid_devs; i++) {
3288 dev = get_imsm_dev(super, i);
3289 map = get_imsm_map(dev, 0);
3290 if (__le32_to_cpu(dev->size_high) > 0)
3291 mpb->attributes |= MPB_ATTRIB_2TB;
3292
3293 /* FIXME detect when an array spans a port multiplier */
3294 #if 0
3295 mpb->attributes |= MPB_ATTRIB_PM;
3296 #endif
3297
3298 if (mpb->num_raid_devs > 1 ||
3299 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3300 version = MPB_VERSION_ATTRIBS;
3301 switch (get_imsm_raid_level(map)) {
3302 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3303 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3304 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3305 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3306 }
3307 } else {
3308 if (map->num_members >= 5)
3309 version = MPB_VERSION_5OR6_DISK_ARRAY;
3310 else if (dev->status == DEV_CLONE_N_GO)
3311 version = MPB_VERSION_CNG;
3312 else if (get_imsm_raid_level(map) == 5)
3313 version = MPB_VERSION_RAID5;
3314 else if (map->num_members >= 3)
3315 version = MPB_VERSION_3OR4_DISK_ARRAY;
3316 else if (get_imsm_raid_level(map) == 1)
3317 version = MPB_VERSION_RAID1;
3318 else
3319 version = MPB_VERSION_RAID0;
3320 }
3321 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3322 }
3323}
3324
aa534678
DW
3325static int check_name(struct intel_super *super, char *name, int quiet)
3326{
3327 struct imsm_super *mpb = super->anchor;
3328 char *reason = NULL;
3329 int i;
3330
3331 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3332 reason = "must be 16 characters or less";
3333
3334 for (i = 0; i < mpb->num_raid_devs; i++) {
3335 struct imsm_dev *dev = get_imsm_dev(super, i);
3336
3337 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3338 reason = "already exists";
3339 break;
3340 }
3341 }
3342
3343 if (reason && !quiet)
3344 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3345
3346 return !reason;
3347}
3348
8b353278
DW
3349static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3350 unsigned long long size, char *name,
3351 char *homehost, int *uuid)
cdddbdbc 3352{
c2c087e6
DW
3353 /* We are creating a volume inside a pre-existing container.
3354 * so st->sb is already set.
3355 */
3356 struct intel_super *super = st->sb;
949c47a0 3357 struct imsm_super *mpb = super->anchor;
ba2de7ba 3358 struct intel_dev *dv;
c2c087e6
DW
3359 struct imsm_dev *dev;
3360 struct imsm_vol *vol;
3361 struct imsm_map *map;
3362 int idx = mpb->num_raid_devs;
3363 int i;
3364 unsigned long long array_blocks;
2c092cad 3365 size_t size_old, size_new;
ff596308 3366 __u32 num_data_stripes;
cdddbdbc 3367
88c32bb1 3368 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
c2c087e6 3369 fprintf(stderr, Name": This imsm-container already has the "
88c32bb1 3370 "maximum of %d volumes\n", super->orom->vpa);
c2c087e6
DW
3371 return 0;
3372 }
3373
2c092cad
DW
3374 /* ensure the mpb is large enough for the new data */
3375 size_old = __le32_to_cpu(mpb->mpb_size);
3376 size_new = disks_to_mpb_size(info->nr_disks);
3377 if (size_new > size_old) {
3378 void *mpb_new;
3379 size_t size_round = ROUND_UP(size_new, 512);
3380
3381 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3382 fprintf(stderr, Name": could not allocate new mpb\n");
3383 return 0;
3384 }
3385 memcpy(mpb_new, mpb, size_old);
3386 free(mpb);
3387 mpb = mpb_new;
949c47a0 3388 super->anchor = mpb_new;
2c092cad
DW
3389 mpb->mpb_size = __cpu_to_le32(size_new);
3390 memset(mpb_new + size_old, 0, size_round - size_old);
3391 }
bf5a934a 3392 super->current_vol = idx;
d23fe947
DW
3393 /* when creating the first raid device in this container set num_disks
3394 * to zero, i.e. delete this spare and add raid member devices in
3395 * add_to_super_imsm_volume()
3396 */
3397 if (super->current_vol == 0)
3398 mpb->num_disks = 0;
5a038140 3399
aa534678
DW
3400 if (!check_name(super, name, 0))
3401 return 0;
ba2de7ba
DW
3402 dv = malloc(sizeof(*dv));
3403 if (!dv) {
3404 fprintf(stderr, Name ": failed to allocate device list entry\n");
3405 return 0;
3406 }
949c47a0
DW
3407 dev = malloc(sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
3408 if (!dev) {
ba2de7ba 3409 free(dv);
949c47a0
DW
3410 fprintf(stderr, Name": could not allocate raid device\n");
3411 return 0;
3412 }
c2c087e6 3413 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
03bcbc65
DW
3414 if (info->level == 1)
3415 array_blocks = info_to_blocks_per_member(info);
3416 else
3417 array_blocks = calc_array_size(info->level, info->raid_disks,
3418 info->layout, info->chunk_size,
3419 info->size*2);
979d38be
DW
3420 /* round array size down to closest MB */
3421 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
3422
c2c087e6
DW
3423 dev->size_low = __cpu_to_le32((__u32) array_blocks);
3424 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
3425 dev->status = __cpu_to_le32(0);
3426 dev->reserved_blocks = __cpu_to_le32(0);
3427 vol = &dev->vol;
3428 vol->migr_state = 0;
1484e727 3429 set_migr_type(dev, MIGR_INIT);
c2c087e6 3430 vol->dirty = 0;
f8f603f1 3431 vol->curr_migr_unit = 0;
a965f303 3432 map = get_imsm_map(dev, 0);
0dcecb2e 3433 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
fcfd9599 3434 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
ef6ffade 3435 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
0556e1a2 3436 map->failed_disk_num = ~0;
c2c087e6
DW
3437 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
3438 IMSM_T_STATE_NORMAL;
252d23c0 3439 map->ddf = 1;
ef6ffade
DW
3440
3441 if (info->level == 1 && info->raid_disks > 2) {
38950822
AW
3442 free(dev);
3443 free(dv);
ef6ffade
DW
3444 fprintf(stderr, Name": imsm does not support more than 2 disks"
3445 "in a raid1 volume\n");
3446 return 0;
3447 }
81062a36
DW
3448
3449 map->raid_level = info->level;
4d1313e9 3450 if (info->level == 10) {
c2c087e6 3451 map->raid_level = 1;
4d1313e9 3452 map->num_domains = info->raid_disks / 2;
81062a36
DW
3453 } else if (info->level == 1)
3454 map->num_domains = info->raid_disks;
3455 else
ff596308 3456 map->num_domains = 1;
81062a36 3457
ff596308
DW
3458 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
3459 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
ef6ffade 3460
c2c087e6
DW
3461 map->num_members = info->raid_disks;
3462 for (i = 0; i < map->num_members; i++) {
3463 /* initialized in add_to_super */
4eb26970 3464 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
c2c087e6 3465 }
949c47a0 3466 mpb->num_raid_devs++;
ba2de7ba
DW
3467
3468 dv->dev = dev;
3469 dv->index = super->current_vol;
3470 dv->next = super->devlist;
3471 super->devlist = dv;
c2c087e6 3472
4d1313e9
DW
3473 imsm_update_version_info(super);
3474
c2c087e6 3475 return 1;
cdddbdbc
DW
3476}
3477
bf5a934a
DW
3478static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
3479 unsigned long long size, char *name,
3480 char *homehost, int *uuid)
3481{
3482 /* This is primarily called by Create when creating a new array.
3483 * We will then get add_to_super called for each component, and then
3484 * write_init_super called to write it out to each device.
3485 * For IMSM, Create can create on fresh devices or on a pre-existing
3486 * array.
3487 * To create on a pre-existing array a different method will be called.
3488 * This one is just for fresh drives.
3489 */
3490 struct intel_super *super;
3491 struct imsm_super *mpb;
3492 size_t mpb_size;
4d1313e9 3493 char *version;
bf5a934a 3494
bf5a934a 3495 if (st->sb)
e683ca88
DW
3496 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
3497
3498 if (info)
3499 mpb_size = disks_to_mpb_size(info->nr_disks);
3500 else
3501 mpb_size = 512;
bf5a934a 3502
49133e57 3503 super = alloc_super();
e683ca88 3504 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
bf5a934a 3505 free(super);
e683ca88
DW
3506 super = NULL;
3507 }
3508 if (!super) {
3509 fprintf(stderr, Name
3510 ": %s could not allocate superblock\n", __func__);
bf5a934a
DW
3511 return 0;
3512 }
e683ca88 3513 memset(super->buf, 0, mpb_size);
ef649044 3514 mpb = super->buf;
e683ca88
DW
3515 mpb->mpb_size = __cpu_to_le32(mpb_size);
3516 st->sb = super;
3517
3518 if (info == NULL) {
3519 /* zeroing superblock */
3520 return 0;
3521 }
bf5a934a 3522
4d1313e9
DW
3523 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
3524
3525 version = (char *) mpb->sig;
3526 strcpy(version, MPB_SIGNATURE);
3527 version += strlen(MPB_SIGNATURE);
3528 strcpy(version, MPB_VERSION_RAID0);
bf5a934a 3529
bf5a934a
DW
3530 return 1;
3531}
3532
0e600426 3533#ifndef MDASSEMBLE
f20c3968 3534static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
bf5a934a
DW
3535 int fd, char *devname)
3536{
3537 struct intel_super *super = st->sb;
d23fe947 3538 struct imsm_super *mpb = super->anchor;
bf5a934a
DW
3539 struct dl *dl;
3540 struct imsm_dev *dev;
3541 struct imsm_map *map;
4eb26970 3542 int slot;
bf5a934a 3543
949c47a0 3544 dev = get_imsm_dev(super, super->current_vol);
a965f303 3545 map = get_imsm_map(dev, 0);
bf5a934a 3546
208933a7
N
3547 if (! (dk->state & (1<<MD_DISK_SYNC))) {
3548 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
3549 devname);
3550 return 1;
3551 }
3552
efb30e7f
DW
3553 if (fd == -1) {
3554 /* we're doing autolayout so grab the pre-marked (in
3555 * validate_geometry) raid_disk
3556 */
3557 for (dl = super->disks; dl; dl = dl->next)
3558 if (dl->raiddisk == dk->raid_disk)
3559 break;
3560 } else {
3561 for (dl = super->disks; dl ; dl = dl->next)
3562 if (dl->major == dk->major &&
3563 dl->minor == dk->minor)
3564 break;
3565 }
d23fe947 3566
208933a7
N
3567 if (!dl) {
3568 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
f20c3968 3569 return 1;
208933a7 3570 }
bf5a934a 3571
d23fe947
DW
3572 /* add a pristine spare to the metadata */
3573 if (dl->index < 0) {
3574 dl->index = super->anchor->num_disks;
3575 super->anchor->num_disks++;
3576 }
4eb26970
DW
3577 /* Check the device has not already been added */
3578 slot = get_imsm_disk_slot(map, dl->index);
3579 if (slot >= 0 &&
98130f40 3580 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
4eb26970
DW
3581 fprintf(stderr, Name ": %s has been included in this array twice\n",
3582 devname);
3583 return 1;
3584 }
be73972f 3585 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
ee5aad5a 3586 dl->disk.status = CONFIGURED_DISK;
d23fe947
DW
3587
3588 /* if we are creating the first raid device update the family number */
3589 if (super->current_vol == 0) {
3590 __u32 sum;
3591 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
3592 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
3593
791b666a
AW
3594 if (!_dev || !_disk) {
3595 fprintf(stderr, Name ": BUG mpb setup error\n");
3596 return 1;
3597 }
d23fe947
DW
3598 *_dev = *dev;
3599 *_disk = dl->disk;
148acb7b
DW
3600 sum = random32();
3601 sum += __gen_imsm_checksum(mpb);
d23fe947 3602 mpb->family_num = __cpu_to_le32(sum);
148acb7b 3603 mpb->orig_family_num = mpb->family_num;
d23fe947 3604 }
f20c3968
DW
3605
3606 return 0;
bf5a934a
DW
3607}
3608
88654014 3609
f20c3968 3610static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
88654014 3611 int fd, char *devname)
cdddbdbc 3612{
c2c087e6 3613 struct intel_super *super = st->sb;
c2c087e6
DW
3614 struct dl *dd;
3615 unsigned long long size;
f2f27e63 3616 __u32 id;
c2c087e6
DW
3617 int rv;
3618 struct stat stb;
3619
88654014
LM
3620 /* If we are on an RAID enabled platform check that the disk is
3621 * attached to the raid controller.
3622 * We do not need to test disks attachment for container based additions,
3623 * they shall be already tested when container was created/assembled.
88c32bb1 3624 */
88654014
LM
3625 if ((fd != -1) && !check_env("IMSM_NO_PLATFORM")) {
3626 struct sys_dev *hba_name;
3627 struct intel_hba *hba;
3628
3629 hba_name = find_disk_attached_hba(fd, NULL);
3630 if (!hba_name) {
3631 fprintf(stderr,
3632 Name ": %s is not attached to Intel(R) RAID controller.\n",
3633 devname ? : "disk");
3634 return 1;
3635 }
3636 rv = attach_hba_to_super(super, hba_name, devname);
3637 switch (rv) {
3638 case 2:
3639 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
3640 "controller (%s),\n but the container is assigned to Intel(R) "
3641 "%s RAID controller (",
3642 devname,
3643 get_sys_dev_type(hba_name->type),
3644 hba_name->pci_id ? : "Err!",
3645 get_sys_dev_type(hba_name->type));
3646
3647 hba = super->hba;
3648 while (hba) {
3649 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
3650 if (hba->next)
3651 fprintf(stderr, ", ");
3652 hba = hba->next;
3653 }
3654
3655 fprintf(stderr, ").\n"
3656 " Mixing devices attached to different controllers "
3657 "is not allowed.\n");
3658 free_sys_dev(&hba_name);
3659 return 1;
3660 }
3661 free_sys_dev(&hba_name);
88c32bb1
DW
3662 }
3663
f20c3968
DW
3664 if (super->current_vol >= 0)
3665 return add_to_super_imsm_volume(st, dk, fd, devname);
bf5a934a 3666
c2c087e6
DW
3667 fstat(fd, &stb);
3668 dd = malloc(sizeof(*dd));
b9f594fe 3669 if (!dd) {
c2c087e6
DW
3670 fprintf(stderr,
3671 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
f20c3968 3672 return 1;
c2c087e6
DW
3673 }
3674 memset(dd, 0, sizeof(*dd));
3675 dd->major = major(stb.st_rdev);
3676 dd->minor = minor(stb.st_rdev);
b9f594fe 3677 dd->index = -1;
c2c087e6 3678 dd->devname = devname ? strdup(devname) : NULL;
c2c087e6 3679 dd->fd = fd;
689c9bf3 3680 dd->e = NULL;
1a64be56 3681 dd->action = DISK_ADD;
c2c087e6 3682 rv = imsm_read_serial(fd, devname, dd->serial);
32ba9157 3683 if (rv) {
c2c087e6 3684 fprintf(stderr,
0030e8d6 3685 Name ": failed to retrieve scsi serial, aborting\n");
949c47a0 3686 free(dd);
0030e8d6 3687 abort();
c2c087e6
DW
3688 }
3689
c2c087e6
DW
3690 get_dev_size(fd, NULL, &size);
3691 size /= 512;
1f24f035 3692 serialcpy(dd->disk.serial, dd->serial);
b9f594fe 3693 dd->disk.total_blocks = __cpu_to_le32(size);
ee5aad5a 3694 dd->disk.status = SPARE_DISK;
c2c087e6 3695 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
b9f594fe 3696 dd->disk.scsi_id = __cpu_to_le32(id);
c2c087e6 3697 else
b9f594fe 3698 dd->disk.scsi_id = __cpu_to_le32(0);
43dad3d6
DW
3699
3700 if (st->update_tail) {
1a64be56
LM
3701 dd->next = super->disk_mgmt_list;
3702 super->disk_mgmt_list = dd;
43dad3d6
DW
3703 } else {
3704 dd->next = super->disks;
3705 super->disks = dd;
3706 }
f20c3968
DW
3707
3708 return 0;
cdddbdbc
DW
3709}
3710
1a64be56
LM
3711
3712static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
3713{
3714 struct intel_super *super = st->sb;
3715 struct dl *dd;
3716
3717 /* remove from super works only in mdmon - for communication
3718 * manager - monitor. Check if communication memory buffer
3719 * is prepared.
3720 */
3721 if (!st->update_tail) {
3722 fprintf(stderr,
3723 Name ": %s shall be used in mdmon context only"
3724 "(line %d).\n", __func__, __LINE__);
3725 return 1;
3726 }
3727 dd = malloc(sizeof(*dd));
3728 if (!dd) {
3729 fprintf(stderr,
3730 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
3731 return 1;
3732 }
3733 memset(dd, 0, sizeof(*dd));
3734 dd->major = dk->major;
3735 dd->minor = dk->minor;
3736 dd->index = -1;
3737 dd->fd = -1;
3738 dd->disk.status = SPARE_DISK;
3739 dd->action = DISK_REMOVE;
3740
3741 dd->next = super->disk_mgmt_list;
3742 super->disk_mgmt_list = dd;
3743
3744
3745 return 0;
3746}
3747
f796af5d
DW
3748static int store_imsm_mpb(int fd, struct imsm_super *mpb);
3749
3750static union {
3751 char buf[512];
3752 struct imsm_super anchor;
3753} spare_record __attribute__ ((aligned(512)));
c2c087e6 3754
d23fe947
DW
3755/* spare records have their own family number and do not have any defined raid
3756 * devices
3757 */
3758static int write_super_imsm_spares(struct intel_super *super, int doclose)
3759{
d23fe947 3760 struct imsm_super *mpb = super->anchor;
f796af5d 3761 struct imsm_super *spare = &spare_record.anchor;
d23fe947
DW
3762 __u32 sum;
3763 struct dl *d;
3764
f796af5d
DW
3765 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
3766 spare->generation_num = __cpu_to_le32(1UL),
3767 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
3768 spare->num_disks = 1,
3769 spare->num_raid_devs = 0,
3770 spare->cache_size = mpb->cache_size,
3771 spare->pwr_cycle_count = __cpu_to_le32(1),
3772
3773 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
3774 MPB_SIGNATURE MPB_VERSION_RAID0);
d23fe947
DW
3775
3776 for (d = super->disks; d; d = d->next) {
8796fdc4 3777 if (d->index != -1)
d23fe947
DW
3778 continue;
3779
f796af5d
DW
3780 spare->disk[0] = d->disk;
3781 sum = __gen_imsm_checksum(spare);
3782 spare->family_num = __cpu_to_le32(sum);
3783 spare->orig_family_num = 0;
3784 sum = __gen_imsm_checksum(spare);
3785 spare->check_sum = __cpu_to_le32(sum);
d23fe947 3786
f796af5d 3787 if (store_imsm_mpb(d->fd, spare)) {
d23fe947
DW
3788 fprintf(stderr, "%s: failed for device %d:%d %s\n",
3789 __func__, d->major, d->minor, strerror(errno));
e74255d9 3790 return 1;
d23fe947
DW
3791 }
3792 if (doclose) {
3793 close(d->fd);
3794 d->fd = -1;
3795 }
3796 }
3797
e74255d9 3798 return 0;
d23fe947
DW
3799}
3800
36988a3d 3801static int write_super_imsm(struct supertype *st, int doclose)
cdddbdbc 3802{
36988a3d 3803 struct intel_super *super = st->sb;
949c47a0 3804 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
3805 struct dl *d;
3806 __u32 generation;
3807 __u32 sum;
d23fe947 3808 int spares = 0;
949c47a0 3809 int i;
a48ac0a8 3810 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
36988a3d 3811 int num_disks = 0;
cdddbdbc 3812
c2c087e6
DW
3813 /* 'generation' is incremented everytime the metadata is written */
3814 generation = __le32_to_cpu(mpb->generation_num);
3815 generation++;
3816 mpb->generation_num = __cpu_to_le32(generation);
3817
148acb7b
DW
3818 /* fix up cases where previous mdadm releases failed to set
3819 * orig_family_num
3820 */
3821 if (mpb->orig_family_num == 0)
3822 mpb->orig_family_num = mpb->family_num;
3823
d23fe947 3824 for (d = super->disks; d; d = d->next) {
8796fdc4 3825 if (d->index == -1)
d23fe947 3826 spares++;
36988a3d 3827 else {
d23fe947 3828 mpb->disk[d->index] = d->disk;
36988a3d
AK
3829 num_disks++;
3830 }
d23fe947 3831 }
36988a3d 3832 for (d = super->missing; d; d = d->next) {
47ee5a45 3833 mpb->disk[d->index] = d->disk;
36988a3d
AK
3834 num_disks++;
3835 }
3836 mpb->num_disks = num_disks;
3837 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
b9f594fe 3838
949c47a0
DW
3839 for (i = 0; i < mpb->num_raid_devs; i++) {
3840 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
36988a3d
AK
3841 struct imsm_dev *dev2 = get_imsm_dev(super, i);
3842 if (dev && dev2) {
3843 imsm_copy_dev(dev, dev2);
3844 mpb_size += sizeof_imsm_dev(dev, 0);
3845 }
949c47a0 3846 }
a48ac0a8
DW
3847 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
3848 mpb->mpb_size = __cpu_to_le32(mpb_size);
949c47a0 3849
c2c087e6 3850 /* recalculate checksum */
949c47a0 3851 sum = __gen_imsm_checksum(mpb);
c2c087e6
DW
3852 mpb->check_sum = __cpu_to_le32(sum);
3853
d23fe947 3854 /* write the mpb for disks that compose raid devices */
c2c087e6 3855 for (d = super->disks; d ; d = d->next) {
d23fe947
DW
3856 if (d->index < 0)
3857 continue;
f796af5d 3858 if (store_imsm_mpb(d->fd, mpb))
c2c087e6
DW
3859 fprintf(stderr, "%s: failed for device %d:%d %s\n",
3860 __func__, d->major, d->minor, strerror(errno));
c2c087e6
DW
3861 if (doclose) {
3862 close(d->fd);
3863 d->fd = -1;
3864 }
3865 }
3866
d23fe947
DW
3867 if (spares)
3868 return write_super_imsm_spares(super, doclose);
3869
e74255d9 3870 return 0;
c2c087e6
DW
3871}
3872
0e600426 3873
9b1fb677 3874static int create_array(struct supertype *st, int dev_idx)
43dad3d6
DW
3875{
3876 size_t len;
3877 struct imsm_update_create_array *u;
3878 struct intel_super *super = st->sb;
9b1fb677 3879 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
54c2c1ea
DW
3880 struct imsm_map *map = get_imsm_map(dev, 0);
3881 struct disk_info *inf;
3882 struct imsm_disk *disk;
3883 int i;
43dad3d6 3884
54c2c1ea
DW
3885 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
3886 sizeof(*inf) * map->num_members;
43dad3d6
DW
3887 u = malloc(len);
3888 if (!u) {
3889 fprintf(stderr, "%s: failed to allocate update buffer\n",
3890 __func__);
3891 return 1;
3892 }
3893
3894 u->type = update_create_array;
9b1fb677 3895 u->dev_idx = dev_idx;
43dad3d6 3896 imsm_copy_dev(&u->dev, dev);
54c2c1ea
DW
3897 inf = get_disk_info(u);
3898 for (i = 0; i < map->num_members; i++) {
98130f40 3899 int idx = get_imsm_disk_idx(dev, i, -1);
9b1fb677 3900
54c2c1ea
DW
3901 disk = get_imsm_disk(super, idx);
3902 serialcpy(inf[i].serial, disk->serial);
3903 }
43dad3d6
DW
3904 append_metadata_update(st, u, len);
3905
3906 return 0;
3907}
3908
1a64be56 3909static int mgmt_disk(struct supertype *st)
43dad3d6
DW
3910{
3911 struct intel_super *super = st->sb;
3912 size_t len;
1a64be56 3913 struct imsm_update_add_remove_disk *u;
43dad3d6 3914
1a64be56 3915 if (!super->disk_mgmt_list)
43dad3d6
DW
3916 return 0;
3917
3918 len = sizeof(*u);
3919 u = malloc(len);
3920 if (!u) {
3921 fprintf(stderr, "%s: failed to allocate update buffer\n",
3922 __func__);
3923 return 1;
3924 }
3925
1a64be56 3926 u->type = update_add_remove_disk;
43dad3d6
DW
3927 append_metadata_update(st, u, len);
3928
3929 return 0;
3930}
3931
c2c087e6
DW
3932static int write_init_super_imsm(struct supertype *st)
3933{
9b1fb677
DW
3934 struct intel_super *super = st->sb;
3935 int current_vol = super->current_vol;
3936
3937 /* we are done with current_vol reset it to point st at the container */
3938 super->current_vol = -1;
3939
8273f55e 3940 if (st->update_tail) {
43dad3d6
DW
3941 /* queue the recently created array / added disk
3942 * as a metadata update */
43dad3d6 3943 int rv;
8273f55e 3944
43dad3d6 3945 /* determine if we are creating a volume or adding a disk */
9b1fb677 3946 if (current_vol < 0) {
1a64be56
LM
3947 /* in the mgmt (add/remove) disk case we are running
3948 * in mdmon context, so don't close fd's
43dad3d6 3949 */
1a64be56 3950 return mgmt_disk(st);
43dad3d6 3951 } else
9b1fb677 3952 rv = create_array(st, current_vol);
8273f55e 3953
43dad3d6 3954 return rv;
d682f344
N
3955 } else {
3956 struct dl *d;
3957 for (d = super->disks; d; d = d->next)
3958 Kill(d->devname, NULL, 0, 1, 1);
36988a3d 3959 return write_super_imsm(st, 1);
d682f344 3960 }
cdddbdbc 3961}
0e600426 3962#endif
cdddbdbc 3963
e683ca88 3964static int store_super_imsm(struct supertype *st, int fd)
cdddbdbc 3965{
e683ca88
DW
3966 struct intel_super *super = st->sb;
3967 struct imsm_super *mpb = super ? super->anchor : NULL;
551c80c1 3968
e683ca88 3969 if (!mpb)
ad97895e
DW
3970 return 1;
3971
1799c9e8 3972#ifndef MDASSEMBLE
e683ca88 3973 return store_imsm_mpb(fd, mpb);
1799c9e8
N
3974#else
3975 return 1;
3976#endif
cdddbdbc
DW
3977}
3978
0e600426
N
3979static int imsm_bbm_log_size(struct imsm_super *mpb)
3980{
3981 return __le32_to_cpu(mpb->bbm_log_size);
3982}
3983
3984#ifndef MDASSEMBLE
cdddbdbc
DW
3985static int validate_geometry_imsm_container(struct supertype *st, int level,
3986 int layout, int raiddisks, int chunk,
c2c087e6 3987 unsigned long long size, char *dev,
2c514b71
NB
3988 unsigned long long *freesize,
3989 int verbose)
cdddbdbc 3990{
c2c087e6
DW
3991 int fd;
3992 unsigned long long ldsize;
88c32bb1 3993 const struct imsm_orom *orom;
cdddbdbc 3994
c2c087e6
DW
3995 if (level != LEVEL_CONTAINER)
3996 return 0;
3997 if (!dev)
3998 return 1;
3999
88c32bb1
DW
4000 if (check_env("IMSM_NO_PLATFORM"))
4001 orom = NULL;
4002 else
4003 orom = find_imsm_orom();
4004 if (orom && raiddisks > orom->tds) {
4005 if (verbose)
4006 fprintf(stderr, Name ": %d exceeds maximum number of"
4007 " platform supported disks: %d\n",
4008 raiddisks, orom->tds);
4009 return 0;
4010 }
4011
c2c087e6
DW
4012 fd = open(dev, O_RDONLY|O_EXCL, 0);
4013 if (fd < 0) {
2c514b71
NB
4014 if (verbose)
4015 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4016 dev, strerror(errno));
c2c087e6
DW
4017 return 0;
4018 }
4019 if (!get_dev_size(fd, dev, &ldsize)) {
4020 close(fd);
4021 return 0;
4022 }
4023 close(fd);
4024
4025 *freesize = avail_size_imsm(st, ldsize >> 9);
4026
4027 return 1;
cdddbdbc
DW
4028}
4029
0dcecb2e
DW
4030static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4031{
4032 const unsigned long long base_start = e[*idx].start;
4033 unsigned long long end = base_start + e[*idx].size;
4034 int i;
4035
4036 if (base_start == end)
4037 return 0;
4038
4039 *idx = *idx + 1;
4040 for (i = *idx; i < num_extents; i++) {
4041 /* extend overlapping extents */
4042 if (e[i].start >= base_start &&
4043 e[i].start <= end) {
4044 if (e[i].size == 0)
4045 return 0;
4046 if (e[i].start + e[i].size > end)
4047 end = e[i].start + e[i].size;
4048 } else if (e[i].start > end) {
4049 *idx = i;
4050 break;
4051 }
4052 }
4053
4054 return end - base_start;
4055}
4056
4057static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4058{
4059 /* build a composite disk with all known extents and generate a new
4060 * 'maxsize' given the "all disks in an array must share a common start
4061 * offset" constraint
4062 */
4063 struct extent *e = calloc(sum_extents, sizeof(*e));
4064 struct dl *dl;
4065 int i, j;
4066 int start_extent;
4067 unsigned long long pos;
b9d77223 4068 unsigned long long start = 0;
0dcecb2e
DW
4069 unsigned long long maxsize;
4070 unsigned long reserve;
4071
4072 if (!e)
a7dd165b 4073 return 0;
0dcecb2e
DW
4074
4075 /* coalesce and sort all extents. also, check to see if we need to
4076 * reserve space between member arrays
4077 */
4078 j = 0;
4079 for (dl = super->disks; dl; dl = dl->next) {
4080 if (!dl->e)
4081 continue;
4082 for (i = 0; i < dl->extent_cnt; i++)
4083 e[j++] = dl->e[i];
4084 }
4085 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4086
4087 /* merge extents */
4088 i = 0;
4089 j = 0;
4090 while (i < sum_extents) {
4091 e[j].start = e[i].start;
4092 e[j].size = find_size(e, &i, sum_extents);
4093 j++;
4094 if (e[j-1].size == 0)
4095 break;
4096 }
4097
4098 pos = 0;
4099 maxsize = 0;
4100 start_extent = 0;
4101 i = 0;
4102 do {
4103 unsigned long long esize;
4104
4105 esize = e[i].start - pos;
4106 if (esize >= maxsize) {
4107 maxsize = esize;
4108 start = pos;
4109 start_extent = i;
4110 }
4111 pos = e[i].start + e[i].size;
4112 i++;
4113 } while (e[i-1].size);
4114 free(e);
4115
a7dd165b
DW
4116 if (maxsize == 0)
4117 return 0;
4118
4119 /* FIXME assumes volume at offset 0 is the first volume in a
4120 * container
4121 */
0dcecb2e
DW
4122 if (start_extent > 0)
4123 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4124 else
4125 reserve = 0;
4126
4127 if (maxsize < reserve)
a7dd165b 4128 return 0;
0dcecb2e
DW
4129
4130 super->create_offset = ~((__u32) 0);
4131 if (start + reserve > super->create_offset)
a7dd165b 4132 return 0; /* start overflows create_offset */
0dcecb2e
DW
4133 super->create_offset = start + reserve;
4134
4135 return maxsize - reserve;
4136}
4137
88c32bb1
DW
4138static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4139{
4140 if (level < 0 || level == 6 || level == 4)
4141 return 0;
4142
4143 /* if we have an orom prevent invalid raid levels */
4144 if (orom)
4145 switch (level) {
4146 case 0: return imsm_orom_has_raid0(orom);
4147 case 1:
4148 if (raiddisks > 2)
4149 return imsm_orom_has_raid1e(orom);
1c556e92
DW
4150 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4151 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4152 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
88c32bb1
DW
4153 }
4154 else
4155 return 1; /* not on an Intel RAID platform so anything goes */
4156
4157 return 0;
4158}
4159
35f81cbb 4160#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
6592ce37
DW
4161static int
4162validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
4163 int raiddisks, int chunk, int verbose)
4164{
4165 if (!is_raid_level_supported(super->orom, level, raiddisks)) {
4166 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4167 level, raiddisks, raiddisks > 1 ? "s" : "");
4168 return 0;
4169 }
4170 if (super->orom && level != 1 &&
4171 !imsm_orom_has_chunk(super->orom, chunk)) {
4172 pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
4173 return 0;
4174 }
4175 if (layout != imsm_level_to_layout(level)) {
4176 if (level == 5)
4177 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4178 else if (level == 10)
4179 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4180 else
4181 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4182 layout, level);
4183 return 0;
4184 }
4185
4186 return 1;
4187}
4188
c2c087e6
DW
4189/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4190 * FIX ME add ahci details
4191 */
8b353278
DW
4192static int validate_geometry_imsm_volume(struct supertype *st, int level,
4193 int layout, int raiddisks, int chunk,
c2c087e6 4194 unsigned long long size, char *dev,
2c514b71
NB
4195 unsigned long long *freesize,
4196 int verbose)
cdddbdbc 4197{
c2c087e6
DW
4198 struct stat stb;
4199 struct intel_super *super = st->sb;
a20d2ba5 4200 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
4201 struct dl *dl;
4202 unsigned long long pos = 0;
4203 unsigned long long maxsize;
4204 struct extent *e;
4205 int i;
cdddbdbc 4206
88c32bb1
DW
4207 /* We must have the container info already read in. */
4208 if (!super)
c2c087e6
DW
4209 return 0;
4210
6592ce37 4211 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose))
c2c087e6 4212 return 0;
c2c087e6
DW
4213
4214 if (!dev) {
4215 /* General test: make sure there is space for
2da8544a
DW
4216 * 'raiddisks' device extents of size 'size' at a given
4217 * offset
c2c087e6 4218 */
e46273eb 4219 unsigned long long minsize = size;
b7528a20 4220 unsigned long long start_offset = MaxSector;
c2c087e6
DW
4221 int dcnt = 0;
4222 if (minsize == 0)
4223 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4224 for (dl = super->disks; dl ; dl = dl->next) {
4225 int found = 0;
4226
bf5a934a 4227 pos = 0;
c2c087e6
DW
4228 i = 0;
4229 e = get_extents(super, dl);
4230 if (!e) continue;
4231 do {
4232 unsigned long long esize;
4233 esize = e[i].start - pos;
4234 if (esize >= minsize)
4235 found = 1;
b7528a20 4236 if (found && start_offset == MaxSector) {
2da8544a
DW
4237 start_offset = pos;
4238 break;
4239 } else if (found && pos != start_offset) {
4240 found = 0;
4241 break;
4242 }
c2c087e6
DW
4243 pos = e[i].start + e[i].size;
4244 i++;
4245 } while (e[i-1].size);
4246 if (found)
4247 dcnt++;
4248 free(e);
4249 }
4250 if (dcnt < raiddisks) {
2c514b71
NB
4251 if (verbose)
4252 fprintf(stderr, Name ": imsm: Not enough "
4253 "devices with space for this array "
4254 "(%d < %d)\n",
4255 dcnt, raiddisks);
c2c087e6
DW
4256 return 0;
4257 }
4258 return 1;
4259 }
0dcecb2e 4260
c2c087e6
DW
4261 /* This device must be a member of the set */
4262 if (stat(dev, &stb) < 0)
4263 return 0;
4264 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4265 return 0;
4266 for (dl = super->disks ; dl ; dl = dl->next) {
f21e18ca
N
4267 if (dl->major == (int)major(stb.st_rdev) &&
4268 dl->minor == (int)minor(stb.st_rdev))
c2c087e6
DW
4269 break;
4270 }
4271 if (!dl) {
2c514b71
NB
4272 if (verbose)
4273 fprintf(stderr, Name ": %s is not in the "
4274 "same imsm set\n", dev);
c2c087e6 4275 return 0;
a20d2ba5
DW
4276 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4277 /* If a volume is present then the current creation attempt
4278 * cannot incorporate new spares because the orom may not
4279 * understand this configuration (all member disks must be
4280 * members of each array in the container).
4281 */
4282 fprintf(stderr, Name ": %s is a spare and a volume"
4283 " is already defined for this container\n", dev);
4284 fprintf(stderr, Name ": The option-rom requires all member"
4285 " disks to be a member of all volumes\n");
4286 return 0;
c2c087e6 4287 }
0dcecb2e
DW
4288
4289 /* retrieve the largest free space block */
c2c087e6
DW
4290 e = get_extents(super, dl);
4291 maxsize = 0;
4292 i = 0;
0dcecb2e
DW
4293 if (e) {
4294 do {
4295 unsigned long long esize;
4296
4297 esize = e[i].start - pos;
4298 if (esize >= maxsize)
4299 maxsize = esize;
4300 pos = e[i].start + e[i].size;
4301 i++;
4302 } while (e[i-1].size);
4303 dl->e = e;
4304 dl->extent_cnt = i;
4305 } else {
4306 if (verbose)
4307 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4308 dev);
4309 return 0;
4310 }
4311 if (maxsize < size) {
4312 if (verbose)
4313 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4314 dev, maxsize, size);
4315 return 0;
4316 }
4317
4318 /* count total number of extents for merge */
4319 i = 0;
4320 for (dl = super->disks; dl; dl = dl->next)
4321 if (dl->e)
4322 i += dl->extent_cnt;
4323
4324 maxsize = merge_extents(super, i);
a7dd165b 4325 if (maxsize < size || maxsize == 0) {
0dcecb2e
DW
4326 if (verbose)
4327 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4328 maxsize, size);
4329 return 0;
0dcecb2e
DW
4330 }
4331
c2c087e6
DW
4332 *freesize = maxsize;
4333
4334 return 1;
cdddbdbc
DW
4335}
4336
efb30e7f
DW
4337static int reserve_space(struct supertype *st, int raiddisks,
4338 unsigned long long size, int chunk,
4339 unsigned long long *freesize)
4340{
4341 struct intel_super *super = st->sb;
4342 struct imsm_super *mpb = super->anchor;
4343 struct dl *dl;
4344 int i;
4345 int extent_cnt;
4346 struct extent *e;
4347 unsigned long long maxsize;
4348 unsigned long long minsize;
4349 int cnt;
4350 int used;
4351
4352 /* find the largest common start free region of the possible disks */
4353 used = 0;
4354 extent_cnt = 0;
4355 cnt = 0;
4356 for (dl = super->disks; dl; dl = dl->next) {
4357 dl->raiddisk = -1;
4358
4359 if (dl->index >= 0)
4360 used++;
4361
4362 /* don't activate new spares if we are orom constrained
4363 * and there is already a volume active in the container
4364 */
4365 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
4366 continue;
4367
4368 e = get_extents(super, dl);
4369 if (!e)
4370 continue;
4371 for (i = 1; e[i-1].size; i++)
4372 ;
4373 dl->e = e;
4374 dl->extent_cnt = i;
4375 extent_cnt += i;
4376 cnt++;
4377 }
4378
4379 maxsize = merge_extents(super, extent_cnt);
4380 minsize = size;
4381 if (size == 0)
4382 minsize = chunk;
4383
4384 if (cnt < raiddisks ||
4385 (super->orom && used && used != raiddisks) ||
a7dd165b
DW
4386 maxsize < minsize ||
4387 maxsize == 0) {
efb30e7f
DW
4388 fprintf(stderr, Name ": not enough devices with space to create array.\n");
4389 return 0; /* No enough free spaces large enough */
4390 }
4391
4392 if (size == 0) {
4393 size = maxsize;
4394 if (chunk) {
4395 size /= chunk;
4396 size *= chunk;
4397 }
4398 }
4399
4400 cnt = 0;
4401 for (dl = super->disks; dl; dl = dl->next)
4402 if (dl->e)
4403 dl->raiddisk = cnt++;
4404
4405 *freesize = size;
4406
4407 return 1;
4408}
4409
bf5a934a
DW
4410static int validate_geometry_imsm(struct supertype *st, int level, int layout,
4411 int raiddisks, int chunk, unsigned long long size,
4412 char *dev, unsigned long long *freesize,
4413 int verbose)
4414{
4415 int fd, cfd;
4416 struct mdinfo *sra;
20cbe8d2 4417 int is_member = 0;
bf5a934a
DW
4418
4419 /* if given unused devices create a container
4420 * if given given devices in a container create a member volume
4421 */
4422 if (level == LEVEL_CONTAINER) {
4423 /* Must be a fresh device to add to a container */
4424 return validate_geometry_imsm_container(st, level, layout,
4425 raiddisks, chunk, size,
4426 dev, freesize,
4427 verbose);
4428 }
4429
8592f29d
N
4430 if (!dev) {
4431 if (st->sb && freesize) {
efb30e7f
DW
4432 /* we are being asked to automatically layout a
4433 * new volume based on the current contents of
4434 * the container. If the the parameters can be
4435 * satisfied reserve_space will record the disks,
4436 * start offset, and size of the volume to be
4437 * created. add_to_super and getinfo_super
4438 * detect when autolayout is in progress.
4439 */
6592ce37
DW
4440 if (!validate_geometry_imsm_orom(st->sb, level, layout,
4441 raiddisks, chunk,
4442 verbose))
4443 return 0;
efb30e7f 4444 return reserve_space(st, raiddisks, size, chunk, freesize);
8592f29d
N
4445 }
4446 return 1;
4447 }
bf5a934a
DW
4448 if (st->sb) {
4449 /* creating in a given container */
4450 return validate_geometry_imsm_volume(st, level, layout,
4451 raiddisks, chunk, size,
4452 dev, freesize, verbose);
4453 }
4454
bf5a934a
DW
4455 /* This device needs to be a device in an 'imsm' container */
4456 fd = open(dev, O_RDONLY|O_EXCL, 0);
4457 if (fd >= 0) {
4458 if (verbose)
4459 fprintf(stderr,
4460 Name ": Cannot create this array on device %s\n",
4461 dev);
4462 close(fd);
4463 return 0;
4464 }
4465 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
4466 if (verbose)
4467 fprintf(stderr, Name ": Cannot open %s: %s\n",
4468 dev, strerror(errno));
4469 return 0;
4470 }
4471 /* Well, it is in use by someone, maybe an 'imsm' container. */
4472 cfd = open_container(fd);
20cbe8d2 4473 close(fd);
bf5a934a 4474 if (cfd < 0) {
bf5a934a
DW
4475 if (verbose)
4476 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
4477 dev);
4478 return 0;
4479 }
4480 sra = sysfs_read(cfd, 0, GET_VERSION);
bf5a934a 4481 if (sra && sra->array.major_version == -1 &&
20cbe8d2
AW
4482 strcmp(sra->text_version, "imsm") == 0)
4483 is_member = 1;
4484 sysfs_free(sra);
4485 if (is_member) {
bf5a934a
DW
4486 /* This is a member of a imsm container. Load the container
4487 * and try to create a volume
4488 */
4489 struct intel_super *super;
4490
e1902a7b 4491 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
bf5a934a
DW
4492 st->sb = super;
4493 st->container_dev = fd2devnum(cfd);
4494 close(cfd);
4495 return validate_geometry_imsm_volume(st, level, layout,
4496 raiddisks, chunk,
4497 size, dev,
4498 freesize, verbose);
4499 }
20cbe8d2 4500 }
bf5a934a 4501
20cbe8d2
AW
4502 if (verbose)
4503 fprintf(stderr, Name ": failed container membership check\n");
4504
4505 close(cfd);
4506 return 0;
bf5a934a 4507}
0bd16cf2 4508
30f58b22 4509static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
0bd16cf2
DJ
4510{
4511 struct intel_super *super = st->sb;
4512
30f58b22
DW
4513 if (level && *level == UnSet)
4514 *level = LEVEL_CONTAINER;
4515
4516 if (level && layout && *layout == UnSet)
4517 *layout = imsm_level_to_layout(*level);
0bd16cf2 4518
1d54f286
N
4519 if (chunk && (*chunk == UnSet || *chunk == 0) &&
4520 super && super->orom)
30f58b22 4521 *chunk = imsm_orom_default_chunk(super->orom);
0bd16cf2
DJ
4522}
4523
33414a01
DW
4524static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
4525
4526static int kill_subarray_imsm(struct supertype *st)
4527{
4528 /* remove the subarray currently referenced by ->current_vol */
4529 __u8 i;
4530 struct intel_dev **dp;
4531 struct intel_super *super = st->sb;
4532 __u8 current_vol = super->current_vol;
4533 struct imsm_super *mpb = super->anchor;
4534
4535 if (super->current_vol < 0)
4536 return 2;
4537 super->current_vol = -1; /* invalidate subarray cursor */
4538
4539 /* block deletions that would change the uuid of active subarrays
4540 *
4541 * FIXME when immutable ids are available, but note that we'll
4542 * also need to fixup the invalidated/active subarray indexes in
4543 * mdstat
4544 */
4545 for (i = 0; i < mpb->num_raid_devs; i++) {
4546 char subarray[4];
4547
4548 if (i < current_vol)
4549 continue;
4550 sprintf(subarray, "%u", i);
4551 if (is_subarray_active(subarray, st->devname)) {
4552 fprintf(stderr,
4553 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
4554 current_vol, i);
4555
4556 return 2;
4557 }
4558 }
4559
4560 if (st->update_tail) {
4561 struct imsm_update_kill_array *u = malloc(sizeof(*u));
4562
4563 if (!u)
4564 return 2;
4565 u->type = update_kill_array;
4566 u->dev_idx = current_vol;
4567 append_metadata_update(st, u, sizeof(*u));
4568
4569 return 0;
4570 }
4571
4572 for (dp = &super->devlist; *dp;)
4573 if ((*dp)->index == current_vol) {
4574 *dp = (*dp)->next;
4575 } else {
4576 handle_missing(super, (*dp)->dev);
4577 if ((*dp)->index > current_vol)
4578 (*dp)->index--;
4579 dp = &(*dp)->next;
4580 }
4581
4582 /* no more raid devices, all active components are now spares,
4583 * but of course failed are still failed
4584 */
4585 if (--mpb->num_raid_devs == 0) {
4586 struct dl *d;
4587
4588 for (d = super->disks; d; d = d->next)
4589 if (d->index > -2) {
4590 d->index = -1;
4591 d->disk.status = SPARE_DISK;
4592 }
4593 }
4594
4595 super->updates_pending++;
4596
4597 return 0;
4598}
aa534678 4599
a951a4f7 4600static int update_subarray_imsm(struct supertype *st, char *subarray,
fa56eddb 4601 char *update, struct mddev_ident *ident)
aa534678
DW
4602{
4603 /* update the subarray currently referenced by ->current_vol */
4604 struct intel_super *super = st->sb;
4605 struct imsm_super *mpb = super->anchor;
4606
aa534678
DW
4607 if (strcmp(update, "name") == 0) {
4608 char *name = ident->name;
a951a4f7
N
4609 char *ep;
4610 int vol;
aa534678 4611
a951a4f7 4612 if (is_subarray_active(subarray, st->devname)) {
aa534678
DW
4613 fprintf(stderr,
4614 Name ": Unable to update name of active subarray\n");
4615 return 2;
4616 }
4617
4618 if (!check_name(super, name, 0))
4619 return 2;
4620
a951a4f7
N
4621 vol = strtoul(subarray, &ep, 10);
4622 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
4623 return 2;
4624
aa534678
DW
4625 if (st->update_tail) {
4626 struct imsm_update_rename_array *u = malloc(sizeof(*u));
4627
4628 if (!u)
4629 return 2;
4630 u->type = update_rename_array;
a951a4f7 4631 u->dev_idx = vol;
aa534678
DW
4632 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
4633 append_metadata_update(st, u, sizeof(*u));
4634 } else {
4635 struct imsm_dev *dev;
4636 int i;
4637
a951a4f7 4638 dev = get_imsm_dev(super, vol);
aa534678
DW
4639 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
4640 for (i = 0; i < mpb->num_raid_devs; i++) {
4641 dev = get_imsm_dev(super, i);
4642 handle_missing(super, dev);
4643 }
4644 super->updates_pending++;
4645 }
4646 } else
4647 return 2;
4648
4649 return 0;
4650}
bf5a934a 4651
28bce06f
AK
4652static int is_gen_migration(struct imsm_dev *dev)
4653{
4654 if (!dev->vol.migr_state)
4655 return 0;
4656
4657 if (migr_type(dev) == MIGR_GEN_MIGR)
4658 return 1;
4659
4660 return 0;
4661}
71204a50 4662#endif /* MDASSEMBLE */
28bce06f 4663
1e5c6983
DW
4664static int is_rebuilding(struct imsm_dev *dev)
4665{
4666 struct imsm_map *migr_map;
4667
4668 if (!dev->vol.migr_state)
4669 return 0;
4670
4671 if (migr_type(dev) != MIGR_REBUILD)
4672 return 0;
4673
4674 migr_map = get_imsm_map(dev, 1);
4675
4676 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
4677 return 1;
4678 else
4679 return 0;
4680}
4681
4682static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
4683{
4684 struct mdinfo *rebuild = NULL;
4685 struct mdinfo *d;
4686 __u32 units;
4687
4688 if (!is_rebuilding(dev))
4689 return;
4690
4691 /* Find the rebuild target, but punt on the dual rebuild case */
4692 for (d = array->devs; d; d = d->next)
4693 if (d->recovery_start == 0) {
4694 if (rebuild)
4695 return;
4696 rebuild = d;
4697 }
4698
4363fd80
DW
4699 if (!rebuild) {
4700 /* (?) none of the disks are marked with
4701 * IMSM_ORD_REBUILD, so assume they are missing and the
4702 * disk_ord_tbl was not correctly updated
4703 */
4704 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
4705 return;
4706 }
4707
1e5c6983
DW
4708 units = __le32_to_cpu(dev->vol.curr_migr_unit);
4709 rebuild->recovery_start = units * blocks_per_migr_unit(dev);
4710}
4711
4712
00bbdbda 4713static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
cdddbdbc 4714{
4f5bc454
DW
4715 /* Given a container loaded by load_super_imsm_all,
4716 * extract information about all the arrays into
4717 * an mdinfo tree.
00bbdbda 4718 * If 'subarray' is given, just extract info about that array.
4f5bc454
DW
4719 *
4720 * For each imsm_dev create an mdinfo, fill it in,
4721 * then look for matching devices in super->disks
4722 * and create appropriate device mdinfo.
4723 */
4724 struct intel_super *super = st->sb;
949c47a0 4725 struct imsm_super *mpb = super->anchor;
4f5bc454 4726 struct mdinfo *rest = NULL;
00bbdbda 4727 unsigned int i;
a06d022d 4728 int bbm_errors = 0;
cdddbdbc 4729
a06d022d
KW
4730 /* check for bad blocks */
4731 if (imsm_bbm_log_size(super->anchor))
4732 bbm_errors = 1;
604b746f 4733
4f5bc454 4734 for (i = 0; i < mpb->num_raid_devs; i++) {
00bbdbda
N
4735 struct imsm_dev *dev;
4736 struct imsm_map *map;
86e3692b 4737 struct imsm_map *map2;
4f5bc454 4738 struct mdinfo *this;
4f5bc454 4739 int slot;
00bbdbda
N
4740 char *ep;
4741
4742 if (subarray &&
4743 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
4744 continue;
4745
4746 dev = get_imsm_dev(super, i);
4747 map = get_imsm_map(dev, 0);
86e3692b 4748 map2 = get_imsm_map(dev, 1);
4f5bc454 4749
1ce0101c
DW
4750 /* do not publish arrays that are in the middle of an
4751 * unsupported migration
4752 */
4753 if (dev->vol.migr_state &&
28bce06f 4754 (migr_type(dev) == MIGR_STATE_CHANGE)) {
1ce0101c
DW
4755 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
4756 " unsupported migration in progress\n",
4757 dev->volume);
4758 continue;
4759 }
4760
4f5bc454 4761 this = malloc(sizeof(*this));
0fbd635c 4762 if (!this) {
cf1be220 4763 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
0fbd635c
AW
4764 sizeof(*this));
4765 break;
4766 }
4f5bc454
DW
4767 memset(this, 0, sizeof(*this));
4768 this->next = rest;
4f5bc454 4769
301406c9 4770 super->current_vol = i;
a5d85af7 4771 getinfo_super_imsm_volume(st, this, NULL);
4f5bc454 4772 for (slot = 0 ; slot < map->num_members; slot++) {
1e5c6983 4773 unsigned long long recovery_start;
4f5bc454
DW
4774 struct mdinfo *info_d;
4775 struct dl *d;
4776 int idx;
9a1608e5 4777 int skip;
7eef0453 4778 __u32 ord;
4f5bc454 4779
9a1608e5 4780 skip = 0;
98130f40
AK
4781 idx = get_imsm_disk_idx(dev, slot, 0);
4782 ord = get_imsm_ord_tbl_ent(dev, slot, 0);
4f5bc454
DW
4783 for (d = super->disks; d ; d = d->next)
4784 if (d->index == idx)
0fbd635c 4785 break;
4f5bc454 4786
1e5c6983 4787 recovery_start = MaxSector;
4f5bc454 4788 if (d == NULL)
9a1608e5 4789 skip = 1;
25ed7e59 4790 if (d && is_failed(&d->disk))
9a1608e5 4791 skip = 1;
7eef0453 4792 if (ord & IMSM_ORD_REBUILD)
1e5c6983 4793 recovery_start = 0;
9a1608e5
DW
4794
4795 /*
4796 * if we skip some disks the array will be assmebled degraded;
1e5c6983
DW
4797 * reset resync start to avoid a dirty-degraded
4798 * situation when performing the intial sync
9a1608e5
DW
4799 *
4800 * FIXME handle dirty degraded
4801 */
1e5c6983 4802 if ((skip || recovery_start == 0) && !dev->vol.dirty)
b7528a20 4803 this->resync_start = MaxSector;
9a1608e5
DW
4804 if (skip)
4805 continue;
4f5bc454 4806
1e5c6983 4807 info_d = calloc(1, sizeof(*info_d));
9a1608e5
DW
4808 if (!info_d) {
4809 fprintf(stderr, Name ": failed to allocate disk"
1ce0101c 4810 " for volume %.16s\n", dev->volume);
1e5c6983
DW
4811 info_d = this->devs;
4812 while (info_d) {
4813 struct mdinfo *d = info_d->next;
4814
4815 free(info_d);
4816 info_d = d;
4817 }
9a1608e5
DW
4818 free(this);
4819 this = rest;
4820 break;
4821 }
4f5bc454
DW
4822 info_d->next = this->devs;
4823 this->devs = info_d;
4824
4f5bc454
DW
4825 info_d->disk.number = d->index;
4826 info_d->disk.major = d->major;
4827 info_d->disk.minor = d->minor;
4828 info_d->disk.raid_disk = slot;
1e5c6983 4829 info_d->recovery_start = recovery_start;
86e3692b
AK
4830 if (map2) {
4831 if (slot < map2->num_members)
4832 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
4833 else
4834 this->array.spare_disks++;
86e3692b
AK
4835 } else {
4836 if (slot < map->num_members)
4837 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
4838 else
4839 this->array.spare_disks++;
86e3692b 4840 }
1e5c6983
DW
4841 if (info_d->recovery_start == MaxSector)
4842 this->array.working_disks++;
4f5bc454
DW
4843
4844 info_d->events = __le32_to_cpu(mpb->generation_num);
4845 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
4846 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
4f5bc454 4847 }
1e5c6983
DW
4848 /* now that the disk list is up-to-date fixup recovery_start */
4849 update_recovery_start(dev, this);
9a1608e5 4850 rest = this;
4f5bc454
DW
4851 }
4852
a06d022d
KW
4853 /* if array has bad blocks, set suitable bit in array status */
4854 if (bbm_errors)
4855 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
4856
4f5bc454 4857 return rest;
cdddbdbc
DW
4858}
4859
845dea95 4860
fb49eef2 4861static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
c2a1e7da 4862{
a965f303 4863 struct imsm_map *map = get_imsm_map(dev, 0);
c2a1e7da
DW
4864
4865 if (!failed)
3393c6af
DW
4866 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
4867 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
c2a1e7da
DW
4868
4869 switch (get_imsm_raid_level(map)) {
4870 case 0:
4871 return IMSM_T_STATE_FAILED;
4872 break;
4873 case 1:
4874 if (failed < map->num_members)
4875 return IMSM_T_STATE_DEGRADED;
4876 else
4877 return IMSM_T_STATE_FAILED;
4878 break;
4879 case 10:
4880 {
4881 /**
c92a2527
DW
4882 * check to see if any mirrors have failed, otherwise we
4883 * are degraded. Even numbered slots are mirrored on
4884 * slot+1
c2a1e7da 4885 */
c2a1e7da 4886 int i;
d9b420a5
N
4887 /* gcc -Os complains that this is unused */
4888 int insync = insync;
c2a1e7da
DW
4889
4890 for (i = 0; i < map->num_members; i++) {
98130f40 4891 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
c92a2527
DW
4892 int idx = ord_to_idx(ord);
4893 struct imsm_disk *disk;
c2a1e7da 4894
c92a2527
DW
4895 /* reset the potential in-sync count on even-numbered
4896 * slots. num_copies is always 2 for imsm raid10
4897 */
4898 if ((i & 1) == 0)
4899 insync = 2;
c2a1e7da 4900
c92a2527 4901 disk = get_imsm_disk(super, idx);
25ed7e59 4902 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
c92a2527 4903 insync--;
c2a1e7da 4904
c92a2527
DW
4905 /* no in-sync disks left in this mirror the
4906 * array has failed
4907 */
4908 if (insync == 0)
4909 return IMSM_T_STATE_FAILED;
c2a1e7da
DW
4910 }
4911
4912 return IMSM_T_STATE_DEGRADED;
4913 }
4914 case 5:
4915 if (failed < 2)
4916 return IMSM_T_STATE_DEGRADED;
4917 else
4918 return IMSM_T_STATE_FAILED;
4919 break;
4920 default:
4921 break;
4922 }
4923
4924 return map->map_state;
4925}
4926
ff077194 4927static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
c2a1e7da
DW
4928{
4929 int i;
4930 int failed = 0;
4931 struct imsm_disk *disk;
ff077194 4932 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2
DW
4933 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
4934 __u32 ord;
4935 int idx;
c2a1e7da 4936
0556e1a2
DW
4937 /* at the beginning of migration we set IMSM_ORD_REBUILD on
4938 * disks that are being rebuilt. New failures are recorded to
4939 * map[0]. So we look through all the disks we started with and
4940 * see if any failures are still present, or if any new ones
4941 * have arrived
4942 *
4943 * FIXME add support for online capacity expansion and
4944 * raid-level-migration
4945 */
4946 for (i = 0; i < prev->num_members; i++) {
4947 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
4948 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
4949 idx = ord_to_idx(ord);
c2a1e7da 4950
949c47a0 4951 disk = get_imsm_disk(super, idx);
25ed7e59 4952 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
fcb84475 4953 failed++;
c2a1e7da
DW
4954 }
4955
4956 return failed;
845dea95
NB
4957}
4958
97b4d0e9
DW
4959#ifndef MDASSEMBLE
4960static int imsm_open_new(struct supertype *c, struct active_array *a,
4961 char *inst)
4962{
4963 struct intel_super *super = c->sb;
4964 struct imsm_super *mpb = super->anchor;
4965
4966 if (atoi(inst) >= mpb->num_raid_devs) {
4967 fprintf(stderr, "%s: subarry index %d, out of range\n",
4968 __func__, atoi(inst));
4969 return -ENODEV;
4970 }
4971
4972 dprintf("imsm: open_new %s\n", inst);
4973 a->info.container_member = atoi(inst);
4974 return 0;
4975}
4976
0c046afd
DW
4977static int is_resyncing(struct imsm_dev *dev)
4978{
4979 struct imsm_map *migr_map;
4980
4981 if (!dev->vol.migr_state)
4982 return 0;
4983
1484e727
DW
4984 if (migr_type(dev) == MIGR_INIT ||
4985 migr_type(dev) == MIGR_REPAIR)
0c046afd
DW
4986 return 1;
4987
4c9bc37b
AK
4988 if (migr_type(dev) == MIGR_GEN_MIGR)
4989 return 0;
4990
0c046afd
DW
4991 migr_map = get_imsm_map(dev, 1);
4992
4c9bc37b
AK
4993 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
4994 (dev->vol.migr_type != MIGR_GEN_MIGR))
0c046afd
DW
4995 return 1;
4996 else
4997 return 0;
4998}
4999
0556e1a2
DW
5000/* return true if we recorded new information */
5001static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
47ee5a45 5002{
0556e1a2
DW
5003 __u32 ord;
5004 int slot;
5005 struct imsm_map *map;
5006
5007 /* new failures are always set in map[0] */
5008 map = get_imsm_map(dev, 0);
5009
5010 slot = get_imsm_disk_slot(map, idx);
5011 if (slot < 0)
5012 return 0;
5013
5014 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
25ed7e59 5015 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
0556e1a2
DW
5016 return 0;
5017
f2f27e63 5018 disk->status |= FAILED_DISK;
cf53434e 5019 disk->status &= ~CONFIGURED_DISK;
0556e1a2 5020 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
f21e18ca 5021 if (map->failed_disk_num == 0xff)
0556e1a2
DW
5022 map->failed_disk_num = slot;
5023 return 1;
5024}
5025
5026static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5027{
5028 mark_failure(dev, disk, idx);
5029
5030 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5031 return;
5032
47ee5a45
DW
5033 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5034 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5035}
5036
33414a01
DW
5037static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5038{
5039 __u8 map_state;
5040 struct dl *dl;
5041 int failed;
5042
5043 if (!super->missing)
5044 return;
5045 failed = imsm_count_failed(super, dev);
5046 map_state = imsm_check_degraded(super, dev, failed);
5047
5048 dprintf("imsm: mark missing\n");
5049 end_migration(dev, map_state);
5050 for (dl = super->missing; dl; dl = dl->next)
5051 mark_missing(dev, &dl->disk, dl->index);
5052 super->updates_pending++;
5053}
5054
70bdf0dc
AK
5055static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5056{
5057 int used_disks = imsm_num_data_members(dev, 0);
5058 unsigned long long array_blocks;
5059 struct imsm_map *map;
5060
5061 if (used_disks == 0) {
5062 /* when problems occures
5063 * return current array_blocks value
5064 */
5065 array_blocks = __le32_to_cpu(dev->size_high);
5066 array_blocks = array_blocks << 32;
5067 array_blocks += __le32_to_cpu(dev->size_low);
5068
5069 return array_blocks;
5070 }
5071
5072 /* set array size in metadata
5073 */
5074 map = get_imsm_map(dev, 0);
5075 array_blocks = map->blocks_per_member * used_disks;
5076
5077 /* round array size down to closest MB
5078 */
5079 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5080 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5081 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5082
5083 return array_blocks;
5084}
5085
28bce06f
AK
5086static void imsm_set_disk(struct active_array *a, int n, int state);
5087
0e2d1a4e
AK
5088static void imsm_progress_container_reshape(struct intel_super *super)
5089{
5090 /* if no device has a migr_state, but some device has a
5091 * different number of members than the previous device, start
5092 * changing the number of devices in this device to match
5093 * previous.
5094 */
5095 struct imsm_super *mpb = super->anchor;
5096 int prev_disks = -1;
5097 int i;
1dfaa380 5098 int copy_map_size;
0e2d1a4e
AK
5099
5100 for (i = 0; i < mpb->num_raid_devs; i++) {
5101 struct imsm_dev *dev = get_imsm_dev(super, i);
5102 struct imsm_map *map = get_imsm_map(dev, 0);
5103 struct imsm_map *map2;
5104 int prev_num_members;
0e2d1a4e
AK
5105
5106 if (dev->vol.migr_state)
5107 return;
5108
5109 if (prev_disks == -1)
5110 prev_disks = map->num_members;
5111 if (prev_disks == map->num_members)
5112 continue;
5113
5114 /* OK, this array needs to enter reshape mode.
5115 * i.e it needs a migr_state
5116 */
5117
1dfaa380 5118 copy_map_size = sizeof_imsm_map(map);
0e2d1a4e
AK
5119 prev_num_members = map->num_members;
5120 map->num_members = prev_disks;
5121 dev->vol.migr_state = 1;
5122 dev->vol.curr_migr_unit = 0;
5123 dev->vol.migr_type = MIGR_GEN_MIGR;
5124 for (i = prev_num_members;
5125 i < map->num_members; i++)
5126 set_imsm_ord_tbl_ent(map, i, i);
5127 map2 = get_imsm_map(dev, 1);
5128 /* Copy the current map */
1dfaa380 5129 memcpy(map2, map, copy_map_size);
0e2d1a4e
AK
5130 map2->num_members = prev_num_members;
5131
70bdf0dc 5132 imsm_set_array_size(dev);
0e2d1a4e
AK
5133 super->updates_pending++;
5134 }
5135}
5136
aad6f216 5137/* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
0c046afd
DW
5138 * states are handled in imsm_set_disk() with one exception, when a
5139 * resync is stopped due to a new failure this routine will set the
5140 * 'degraded' state for the array.
5141 */
01f157d7 5142static int imsm_set_array_state(struct active_array *a, int consistent)
a862209d
DW
5143{
5144 int inst = a->info.container_member;
5145 struct intel_super *super = a->container->sb;
949c47a0 5146 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5147 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd
DW
5148 int failed = imsm_count_failed(super, dev);
5149 __u8 map_state = imsm_check_degraded(super, dev, failed);
1e5c6983 5150 __u32 blocks_per_unit;
a862209d 5151
1af97990
AK
5152 if (dev->vol.migr_state &&
5153 dev->vol.migr_type == MIGR_GEN_MIGR) {
5154 /* array state change is blocked due to reshape action
aad6f216
N
5155 * We might need to
5156 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5157 * - finish the reshape (if last_checkpoint is big and action != reshape)
5158 * - update curr_migr_unit
1af97990 5159 */
aad6f216
N
5160 if (a->curr_action == reshape) {
5161 /* still reshaping, maybe update curr_migr_unit */
5162 long long blocks_per_unit = blocks_per_migr_unit(dev);
5163 long long unit = a->last_checkpoint;
6345120e
AK
5164 if (blocks_per_unit) {
5165 unit /= blocks_per_unit;
5166 if (unit >
5167 __le32_to_cpu(dev->vol.curr_migr_unit)) {
5168 dev->vol.curr_migr_unit =
5169 __cpu_to_le32(unit);
5170 super->updates_pending++;
5171 }
aad6f216 5172 }
a4546b61 5173 return 0;
aad6f216
N
5174 } else {
5175 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5176 /* for some reason we aborted the reshape.
5177 * Better clean up
5178 */
5179 struct imsm_map *map2 = get_imsm_map(dev, 1);
5180 dev->vol.migr_state = 0;
5181 dev->vol.migr_type = 0;
5182 dev->vol.curr_migr_unit = 0;
5183 memcpy(map, map2, sizeof_imsm_map(map2));
5184 super->updates_pending++;
5185 }
5186 if (a->last_checkpoint >= a->info.component_size) {
5187 unsigned long long array_blocks;
5188 int used_disks;
e154ced3 5189 struct mdinfo *mdi;
aad6f216 5190
9653001d 5191 used_disks = imsm_num_data_members(dev, 0);
d55adef9
AK
5192 if (used_disks > 0) {
5193 array_blocks =
5194 map->blocks_per_member *
5195 used_disks;
5196 /* round array size down to closest MB
5197 */
5198 array_blocks = (array_blocks
5199 >> SECT_PER_MB_SHIFT)
5200 << SECT_PER_MB_SHIFT;
d55adef9
AK
5201 a->info.custom_array_size = array_blocks;
5202 /* encourage manager to update array
5203 * size
5204 */
e154ced3 5205
d55adef9 5206 a->check_reshape = 1;
f49ee92d 5207}
e154ced3
AK
5208 /* finalize online capacity expansion/reshape */
5209 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5210 imsm_set_disk(a,
5211 mdi->disk.raid_disk,
5212 mdi->curr_state);
5213
0e2d1a4e 5214 imsm_progress_container_reshape(super);
e154ced3 5215 }
aad6f216 5216 }
1af97990
AK
5217 }
5218
47ee5a45 5219 /* before we activate this array handle any missing disks */
33414a01
DW
5220 if (consistent == 2)
5221 handle_missing(super, dev);
1e5c6983 5222
0c046afd 5223 if (consistent == 2 &&
b7941fd6 5224 (!is_resync_complete(&a->info) ||
0c046afd
DW
5225 map_state != IMSM_T_STATE_NORMAL ||
5226 dev->vol.migr_state))
01f157d7 5227 consistent = 0;
272906ef 5228
b7941fd6 5229 if (is_resync_complete(&a->info)) {
0c046afd 5230 /* complete intialization / resync,
0556e1a2
DW
5231 * recovery and interrupted recovery is completed in
5232 * ->set_disk
0c046afd
DW
5233 */
5234 if (is_resyncing(dev)) {
5235 dprintf("imsm: mark resync done\n");
f8f603f1 5236 end_migration(dev, map_state);
115c3803 5237 super->updates_pending++;
484240d8 5238 a->last_checkpoint = 0;
115c3803 5239 }
0c046afd
DW
5240 } else if (!is_resyncing(dev) && !failed) {
5241 /* mark the start of the init process if nothing is failed */
b7941fd6 5242 dprintf("imsm: mark resync start\n");
1484e727 5243 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
e3bba0e0 5244 migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT);
1484e727
DW
5245 else
5246 migrate(dev, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
3393c6af 5247 super->updates_pending++;
115c3803 5248 }
a862209d 5249
1e5c6983
DW
5250 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5251 blocks_per_unit = blocks_per_migr_unit(dev);
4f0a7acc 5252 if (blocks_per_unit) {
1e5c6983
DW
5253 __u32 units32;
5254 __u64 units;
5255
4f0a7acc 5256 units = a->last_checkpoint / blocks_per_unit;
1e5c6983
DW
5257 units32 = units;
5258
5259 /* check that we did not overflow 32-bits, and that
5260 * curr_migr_unit needs updating
5261 */
5262 if (units32 == units &&
5263 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5264 dprintf("imsm: mark checkpoint (%u)\n", units32);
5265 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5266 super->updates_pending++;
5267 }
5268 }
f8f603f1 5269
3393c6af 5270 /* mark dirty / clean */
0c046afd 5271 if (dev->vol.dirty != !consistent) {
b7941fd6 5272 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
0c046afd
DW
5273 if (consistent)
5274 dev->vol.dirty = 0;
5275 else
5276 dev->vol.dirty = 1;
a862209d
DW
5277 super->updates_pending++;
5278 }
28bce06f 5279
01f157d7 5280 return consistent;
a862209d
DW
5281}
5282
8d45d196 5283static void imsm_set_disk(struct active_array *a, int n, int state)
845dea95 5284{
8d45d196
DW
5285 int inst = a->info.container_member;
5286 struct intel_super *super = a->container->sb;
949c47a0 5287 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5288 struct imsm_map *map = get_imsm_map(dev, 0);
8d45d196 5289 struct imsm_disk *disk;
0c046afd 5290 int failed;
b10b37b8 5291 __u32 ord;
0c046afd 5292 __u8 map_state;
8d45d196
DW
5293
5294 if (n > map->num_members)
5295 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5296 n, map->num_members - 1);
5297
5298 if (n < 0)
5299 return;
5300
4e6e574a 5301 dprintf("imsm: set_disk %d:%x\n", n, state);
8d45d196 5302
98130f40 5303 ord = get_imsm_ord_tbl_ent(dev, n, -1);
b10b37b8 5304 disk = get_imsm_disk(super, ord_to_idx(ord));
8d45d196 5305
5802a811 5306 /* check for new failures */
0556e1a2
DW
5307 if (state & DS_FAULTY) {
5308 if (mark_failure(dev, disk, ord_to_idx(ord)))
5309 super->updates_pending++;
8d45d196 5310 }
47ee5a45 5311
19859edc 5312 /* check if in_sync */
0556e1a2 5313 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
b10b37b8
DW
5314 struct imsm_map *migr_map = get_imsm_map(dev, 1);
5315
5316 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
19859edc
DW
5317 super->updates_pending++;
5318 }
8d45d196 5319
0c046afd
DW
5320 failed = imsm_count_failed(super, dev);
5321 map_state = imsm_check_degraded(super, dev, failed);
5802a811 5322
0c046afd
DW
5323 /* check if recovery complete, newly degraded, or failed */
5324 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
f8f603f1 5325 end_migration(dev, map_state);
0556e1a2
DW
5326 map = get_imsm_map(dev, 0);
5327 map->failed_disk_num = ~0;
0c046afd 5328 super->updates_pending++;
484240d8 5329 a->last_checkpoint = 0;
0c046afd
DW
5330 } else if (map_state == IMSM_T_STATE_DEGRADED &&
5331 map->map_state != map_state &&
5332 !dev->vol.migr_state) {
5333 dprintf("imsm: mark degraded\n");
5334 map->map_state = map_state;
5335 super->updates_pending++;
484240d8 5336 a->last_checkpoint = 0;
0c046afd
DW
5337 } else if (map_state == IMSM_T_STATE_FAILED &&
5338 map->map_state != map_state) {
5339 dprintf("imsm: mark failed\n");
f8f603f1 5340 end_migration(dev, map_state);
0c046afd 5341 super->updates_pending++;
484240d8 5342 a->last_checkpoint = 0;
28bce06f
AK
5343 } else if (is_gen_migration(dev)) {
5344 dprintf("imsm: Detected General Migration in state: ");
5345 if (map_state == IMSM_T_STATE_NORMAL) {
5346 end_migration(dev, map_state);
5347 map = get_imsm_map(dev, 0);
5348 map->failed_disk_num = ~0;
5349 dprintf("normal\n");
5350 } else {
5351 if (map_state == IMSM_T_STATE_DEGRADED) {
5352 printf("degraded\n");
5353 end_migration(dev, map_state);
5354 } else {
5355 dprintf("failed\n");
5356 }
5357 map->map_state = map_state;
5358 }
5359 super->updates_pending++;
5802a811 5360 }
845dea95
NB
5361}
5362
f796af5d 5363static int store_imsm_mpb(int fd, struct imsm_super *mpb)
c2a1e7da 5364{
f796af5d 5365 void *buf = mpb;
c2a1e7da
DW
5366 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
5367 unsigned long long dsize;
5368 unsigned long long sectors;
5369
5370 get_dev_size(fd, NULL, &dsize);
5371
272f648f
DW
5372 if (mpb_size > 512) {
5373 /* -1 to account for anchor */
5374 sectors = mpb_sectors(mpb) - 1;
c2a1e7da 5375
272f648f
DW
5376 /* write the extended mpb to the sectors preceeding the anchor */
5377 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
5378 return 1;
c2a1e7da 5379
f21e18ca
N
5380 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
5381 != 512 * sectors)
272f648f
DW
5382 return 1;
5383 }
c2a1e7da 5384
272f648f
DW
5385 /* first block is stored on second to last sector of the disk */
5386 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
c2a1e7da
DW
5387 return 1;
5388
f796af5d 5389 if (write(fd, buf, 512) != 512)
c2a1e7da
DW
5390 return 1;
5391
c2a1e7da
DW
5392 return 0;
5393}
5394
2e735d19 5395static void imsm_sync_metadata(struct supertype *container)
845dea95 5396{
2e735d19 5397 struct intel_super *super = container->sb;
c2a1e7da 5398
1a64be56 5399 dprintf("sync metadata: %d\n", super->updates_pending);
c2a1e7da
DW
5400 if (!super->updates_pending)
5401 return;
5402
36988a3d 5403 write_super_imsm(container, 0);
c2a1e7da
DW
5404
5405 super->updates_pending = 0;
845dea95
NB
5406}
5407
272906ef
DW
5408static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
5409{
5410 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 5411 int i = get_imsm_disk_idx(dev, idx, -1);
272906ef
DW
5412 struct dl *dl;
5413
5414 for (dl = super->disks; dl; dl = dl->next)
5415 if (dl->index == i)
5416 break;
5417
25ed7e59 5418 if (dl && is_failed(&dl->disk))
272906ef
DW
5419 dl = NULL;
5420
5421 if (dl)
5422 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
5423
5424 return dl;
5425}
5426
a20d2ba5 5427static struct dl *imsm_add_spare(struct intel_super *super, int slot,
8ba77d32
AK
5428 struct active_array *a, int activate_new,
5429 struct mdinfo *additional_test_list)
272906ef
DW
5430{
5431 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 5432 int idx = get_imsm_disk_idx(dev, slot, -1);
a20d2ba5
DW
5433 struct imsm_super *mpb = super->anchor;
5434 struct imsm_map *map;
272906ef
DW
5435 unsigned long long pos;
5436 struct mdinfo *d;
5437 struct extent *ex;
a20d2ba5 5438 int i, j;
272906ef 5439 int found;
569cc43f
DW
5440 __u32 array_start = 0;
5441 __u32 array_end = 0;
272906ef 5442 struct dl *dl;
6c932028 5443 struct mdinfo *test_list;
272906ef
DW
5444
5445 for (dl = super->disks; dl; dl = dl->next) {
5446 /* If in this array, skip */
5447 for (d = a->info.devs ; d ; d = d->next)
e553d2a4
DW
5448 if (d->state_fd >= 0 &&
5449 d->disk.major == dl->major &&
272906ef 5450 d->disk.minor == dl->minor) {
8ba77d32
AK
5451 dprintf("%x:%x already in array\n",
5452 dl->major, dl->minor);
272906ef
DW
5453 break;
5454 }
5455 if (d)
5456 continue;
6c932028
AK
5457 test_list = additional_test_list;
5458 while (test_list) {
5459 if (test_list->disk.major == dl->major &&
5460 test_list->disk.minor == dl->minor) {
8ba77d32
AK
5461 dprintf("%x:%x already in additional test list\n",
5462 dl->major, dl->minor);
5463 break;
5464 }
6c932028 5465 test_list = test_list->next;
8ba77d32 5466 }
6c932028 5467 if (test_list)
8ba77d32 5468 continue;
272906ef 5469
e553d2a4 5470 /* skip in use or failed drives */
25ed7e59 5471 if (is_failed(&dl->disk) || idx == dl->index ||
df474657
DW
5472 dl->index == -2) {
5473 dprintf("%x:%x status (failed: %d index: %d)\n",
25ed7e59 5474 dl->major, dl->minor, is_failed(&dl->disk), idx);
9a1608e5
DW
5475 continue;
5476 }
5477
a20d2ba5
DW
5478 /* skip pure spares when we are looking for partially
5479 * assimilated drives
5480 */
5481 if (dl->index == -1 && !activate_new)
5482 continue;
5483
272906ef 5484 /* Does this unused device have the requisite free space?
a20d2ba5 5485 * It needs to be able to cover all member volumes
272906ef
DW
5486 */
5487 ex = get_extents(super, dl);
5488 if (!ex) {
5489 dprintf("cannot get extents\n");
5490 continue;
5491 }
a20d2ba5
DW
5492 for (i = 0; i < mpb->num_raid_devs; i++) {
5493 dev = get_imsm_dev(super, i);
5494 map = get_imsm_map(dev, 0);
272906ef 5495
a20d2ba5
DW
5496 /* check if this disk is already a member of
5497 * this array
272906ef 5498 */
620b1713 5499 if (get_imsm_disk_slot(map, dl->index) >= 0)
a20d2ba5
DW
5500 continue;
5501
5502 found = 0;
5503 j = 0;
5504 pos = 0;
5505 array_start = __le32_to_cpu(map->pba_of_lba0);
329c8278
DW
5506 array_end = array_start +
5507 __le32_to_cpu(map->blocks_per_member) - 1;
a20d2ba5
DW
5508
5509 do {
5510 /* check that we can start at pba_of_lba0 with
5511 * blocks_per_member of space
5512 */
329c8278 5513 if (array_start >= pos && array_end < ex[j].start) {
a20d2ba5
DW
5514 found = 1;
5515 break;
5516 }
5517 pos = ex[j].start + ex[j].size;
5518 j++;
5519 } while (ex[j-1].size);
5520
5521 if (!found)
272906ef 5522 break;
a20d2ba5 5523 }
272906ef
DW
5524
5525 free(ex);
a20d2ba5 5526 if (i < mpb->num_raid_devs) {
329c8278
DW
5527 dprintf("%x:%x does not have %u to %u available\n",
5528 dl->major, dl->minor, array_start, array_end);
272906ef
DW
5529 /* No room */
5530 continue;
a20d2ba5
DW
5531 }
5532 return dl;
272906ef
DW
5533 }
5534
5535 return dl;
5536}
5537
95d07a2c
LM
5538
5539static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
5540{
5541 struct imsm_dev *dev2;
5542 struct imsm_map *map;
5543 struct dl *idisk;
5544 int slot;
5545 int idx;
5546 __u8 state;
5547
5548 dev2 = get_imsm_dev(cont->sb, dev_idx);
5549 if (dev2) {
5550 state = imsm_check_degraded(cont->sb, dev2, failed);
5551 if (state == IMSM_T_STATE_FAILED) {
5552 map = get_imsm_map(dev2, 0);
5553 if (!map)
5554 return 1;
5555 for (slot = 0; slot < map->num_members; slot++) {
5556 /*
5557 * Check if failed disks are deleted from intel
5558 * disk list or are marked to be deleted
5559 */
98130f40 5560 idx = get_imsm_disk_idx(dev2, slot, -1);
95d07a2c
LM
5561 idisk = get_imsm_dl_disk(cont->sb, idx);
5562 /*
5563 * Do not rebuild the array if failed disks
5564 * from failed sub-array are not removed from
5565 * container.
5566 */
5567 if (idisk &&
5568 is_failed(&idisk->disk) &&
5569 (idisk->action != DISK_REMOVE))
5570 return 0;
5571 }
5572 }
5573 }
5574 return 1;
5575}
5576
88758e9d
DW
5577static struct mdinfo *imsm_activate_spare(struct active_array *a,
5578 struct metadata_update **updates)
5579{
5580 /**
d23fe947
DW
5581 * Find a device with unused free space and use it to replace a
5582 * failed/vacant region in an array. We replace failed regions one a
5583 * array at a time. The result is that a new spare disk will be added
5584 * to the first failed array and after the monitor has finished
5585 * propagating failures the remainder will be consumed.
88758e9d 5586 *
d23fe947
DW
5587 * FIXME add a capability for mdmon to request spares from another
5588 * container.
88758e9d
DW
5589 */
5590
5591 struct intel_super *super = a->container->sb;
88758e9d 5592 int inst = a->info.container_member;
949c47a0 5593 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5594 struct imsm_map *map = get_imsm_map(dev, 0);
88758e9d
DW
5595 int failed = a->info.array.raid_disks;
5596 struct mdinfo *rv = NULL;
5597 struct mdinfo *d;
5598 struct mdinfo *di;
5599 struct metadata_update *mu;
5600 struct dl *dl;
5601 struct imsm_update_activate_spare *u;
5602 int num_spares = 0;
5603 int i;
95d07a2c 5604 int allowed;
88758e9d
DW
5605
5606 for (d = a->info.devs ; d ; d = d->next) {
5607 if ((d->curr_state & DS_FAULTY) &&
5608 d->state_fd >= 0)
5609 /* wait for Removal to happen */
5610 return NULL;
5611 if (d->state_fd >= 0)
5612 failed--;
5613 }
5614
5615 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
5616 inst, failed, a->info.array.raid_disks, a->info.array.level);
1af97990
AK
5617
5618 if (dev->vol.migr_state &&
5619 dev->vol.migr_type == MIGR_GEN_MIGR)
5620 /* No repair during migration */
5621 return NULL;
5622
89c67882
AK
5623 if (a->info.array.level == 4)
5624 /* No repair for takeovered array
5625 * imsm doesn't support raid4
5626 */
5627 return NULL;
5628
fb49eef2 5629 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
88758e9d
DW
5630 return NULL;
5631
95d07a2c
LM
5632 /*
5633 * If there are any failed disks check state of the other volume.
5634 * Block rebuild if the another one is failed until failed disks
5635 * are removed from container.
5636 */
5637 if (failed) {
5638 dprintf("found failed disks in %s, check if there another"
5639 "failed sub-array.\n",
5640 dev->volume);
5641 /* check if states of the other volumes allow for rebuild */
5642 for (i = 0; i < super->anchor->num_raid_devs; i++) {
5643 if (i != inst) {
5644 allowed = imsm_rebuild_allowed(a->container,
5645 i, failed);
5646 if (!allowed)
5647 return NULL;
5648 }
5649 }
5650 }
5651
88758e9d 5652 /* For each slot, if it is not working, find a spare */
88758e9d
DW
5653 for (i = 0; i < a->info.array.raid_disks; i++) {
5654 for (d = a->info.devs ; d ; d = d->next)
5655 if (d->disk.raid_disk == i)
5656 break;
5657 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
5658 if (d && (d->state_fd >= 0))
5659 continue;
5660
272906ef 5661 /*
a20d2ba5
DW
5662 * OK, this device needs recovery. Try to re-add the
5663 * previous occupant of this slot, if this fails see if
5664 * we can continue the assimilation of a spare that was
5665 * partially assimilated, finally try to activate a new
5666 * spare.
272906ef
DW
5667 */
5668 dl = imsm_readd(super, i, a);
5669 if (!dl)
8ba77d32 5670 dl = imsm_add_spare(super, i, a, 0, NULL);
a20d2ba5 5671 if (!dl)
8ba77d32 5672 dl = imsm_add_spare(super, i, a, 1, NULL);
272906ef
DW
5673 if (!dl)
5674 continue;
5675
5676 /* found a usable disk with enough space */
5677 di = malloc(sizeof(*di));
79244939
DW
5678 if (!di)
5679 continue;
272906ef
DW
5680 memset(di, 0, sizeof(*di));
5681
5682 /* dl->index will be -1 in the case we are activating a
5683 * pristine spare. imsm_process_update() will create a
5684 * new index in this case. Once a disk is found to be
5685 * failed in all member arrays it is kicked from the
5686 * metadata
5687 */
5688 di->disk.number = dl->index;
d23fe947 5689
272906ef
DW
5690 /* (ab)use di->devs to store a pointer to the device
5691 * we chose
5692 */
5693 di->devs = (struct mdinfo *) dl;
5694
5695 di->disk.raid_disk = i;
5696 di->disk.major = dl->major;
5697 di->disk.minor = dl->minor;
5698 di->disk.state = 0;
d23534e4 5699 di->recovery_start = 0;
272906ef
DW
5700 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
5701 di->component_size = a->info.component_size;
5702 di->container_member = inst;
148acb7b 5703 super->random = random32();
272906ef
DW
5704 di->next = rv;
5705 rv = di;
5706 num_spares++;
5707 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
5708 i, di->data_offset);
88758e9d 5709
272906ef 5710 break;
88758e9d
DW
5711 }
5712
5713 if (!rv)
5714 /* No spares found */
5715 return rv;
5716 /* Now 'rv' has a list of devices to return.
5717 * Create a metadata_update record to update the
5718 * disk_ord_tbl for the array
5719 */
5720 mu = malloc(sizeof(*mu));
79244939
DW
5721 if (mu) {
5722 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
5723 if (mu->buf == NULL) {
5724 free(mu);
5725 mu = NULL;
5726 }
5727 }
5728 if (!mu) {
5729 while (rv) {
5730 struct mdinfo *n = rv->next;
5731
5732 free(rv);
5733 rv = n;
5734 }
5735 return NULL;
5736 }
5737
88758e9d 5738 mu->space = NULL;
cb23f1f4 5739 mu->space_list = NULL;
88758e9d
DW
5740 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
5741 mu->next = *updates;
5742 u = (struct imsm_update_activate_spare *) mu->buf;
5743
5744 for (di = rv ; di ; di = di->next) {
5745 u->type = update_activate_spare;
d23fe947
DW
5746 u->dl = (struct dl *) di->devs;
5747 di->devs = NULL;
88758e9d
DW
5748 u->slot = di->disk.raid_disk;
5749 u->array = inst;
5750 u->next = u + 1;
5751 u++;
5752 }
5753 (u-1)->next = NULL;
5754 *updates = mu;
5755
5756 return rv;
5757}
5758
54c2c1ea 5759static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
8273f55e 5760{
54c2c1ea
DW
5761 struct imsm_dev *dev = get_imsm_dev(super, idx);
5762 struct imsm_map *map = get_imsm_map(dev, 0);
5763 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
5764 struct disk_info *inf = get_disk_info(u);
5765 struct imsm_disk *disk;
8273f55e
DW
5766 int i;
5767 int j;
8273f55e 5768
54c2c1ea 5769 for (i = 0; i < map->num_members; i++) {
98130f40 5770 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
54c2c1ea
DW
5771 for (j = 0; j < new_map->num_members; j++)
5772 if (serialcmp(disk->serial, inf[j].serial) == 0)
8273f55e
DW
5773 return 1;
5774 }
5775
5776 return 0;
5777}
5778
1a64be56
LM
5779
5780static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
5781{
5782 struct dl *dl = NULL;
5783 for (dl = super->disks; dl; dl = dl->next)
5784 if ((dl->major == major) && (dl->minor == minor))
5785 return dl;
5786 return NULL;
5787}
5788
5789static int remove_disk_super(struct intel_super *super, int major, int minor)
5790{
5791 struct dl *prev = NULL;
5792 struct dl *dl;
5793
5794 prev = NULL;
5795 for (dl = super->disks; dl; dl = dl->next) {
5796 if ((dl->major == major) && (dl->minor == minor)) {
5797 /* remove */
5798 if (prev)
5799 prev->next = dl->next;
5800 else
5801 super->disks = dl->next;
5802 dl->next = NULL;
5803 __free_imsm_disk(dl);
5804 dprintf("%s: removed %x:%x\n",
5805 __func__, major, minor);
5806 break;
5807 }
5808 prev = dl;
5809 }
5810 return 0;
5811}
5812
f21e18ca 5813static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
ae6aad82 5814
1a64be56
LM
5815static int add_remove_disk_update(struct intel_super *super)
5816{
5817 int check_degraded = 0;
5818 struct dl *disk = NULL;
5819 /* add/remove some spares to/from the metadata/contrainer */
5820 while (super->disk_mgmt_list) {
5821 struct dl *disk_cfg;
5822
5823 disk_cfg = super->disk_mgmt_list;
5824 super->disk_mgmt_list = disk_cfg->next;
5825 disk_cfg->next = NULL;
5826
5827 if (disk_cfg->action == DISK_ADD) {
5828 disk_cfg->next = super->disks;
5829 super->disks = disk_cfg;
5830 check_degraded = 1;
5831 dprintf("%s: added %x:%x\n",
5832 __func__, disk_cfg->major,
5833 disk_cfg->minor);
5834 } else if (disk_cfg->action == DISK_REMOVE) {
5835 dprintf("Disk remove action processed: %x.%x\n",
5836 disk_cfg->major, disk_cfg->minor);
5837 disk = get_disk_super(super,
5838 disk_cfg->major,
5839 disk_cfg->minor);
5840 if (disk) {
5841 /* store action status */
5842 disk->action = DISK_REMOVE;
5843 /* remove spare disks only */
5844 if (disk->index == -1) {
5845 remove_disk_super(super,
5846 disk_cfg->major,
5847 disk_cfg->minor);
5848 }
5849 }
5850 /* release allocate disk structure */
5851 __free_imsm_disk(disk_cfg);
5852 }
5853 }
5854 return check_degraded;
5855}
5856
2e5dc010
N
5857static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
5858 struct intel_super *super,
5859 void ***space_list)
5860{
5861 struct dl *new_disk;
5862 struct intel_dev *id;
5863 int i;
5864 int delta_disks = u->new_raid_disks - u->old_raid_disks;
ee4beede 5865 int disk_count = u->old_raid_disks;
2e5dc010
N
5866 void **tofree = NULL;
5867 int devices_to_reshape = 1;
5868 struct imsm_super *mpb = super->anchor;
5869 int ret_val = 0;
d098291a 5870 unsigned int dev_id;
2e5dc010 5871
ed7333bd 5872 dprintf("imsm: apply_reshape_container_disks_update()\n");
2e5dc010
N
5873
5874 /* enable spares to use in array */
5875 for (i = 0; i < delta_disks; i++) {
5876 new_disk = get_disk_super(super,
5877 major(u->new_disks[i]),
5878 minor(u->new_disks[i]));
ed7333bd
AK
5879 dprintf("imsm: new disk for reshape is: %i:%i "
5880 "(%p, index = %i)\n",
2e5dc010
N
5881 major(u->new_disks[i]), minor(u->new_disks[i]),
5882 new_disk, new_disk->index);
5883 if ((new_disk == NULL) ||
5884 ((new_disk->index >= 0) &&
5885 (new_disk->index < u->old_raid_disks)))
5886 goto update_reshape_exit;
ee4beede 5887 new_disk->index = disk_count++;
2e5dc010
N
5888 /* slot to fill in autolayout
5889 */
5890 new_disk->raiddisk = new_disk->index;
5891 new_disk->disk.status |=
5892 CONFIGURED_DISK;
5893 new_disk->disk.status &= ~SPARE_DISK;
5894 }
5895
ed7333bd
AK
5896 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
5897 mpb->num_raid_devs);
2e5dc010
N
5898 /* manage changes in volume
5899 */
d098291a 5900 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
2e5dc010
N
5901 void **sp = *space_list;
5902 struct imsm_dev *newdev;
5903 struct imsm_map *newmap, *oldmap;
5904
d098291a
AK
5905 for (id = super->devlist ; id; id = id->next) {
5906 if (id->index == dev_id)
5907 break;
5908 }
5909 if (id == NULL)
5910 break;
2e5dc010
N
5911 if (!sp)
5912 continue;
5913 *space_list = *sp;
5914 newdev = (void*)sp;
5915 /* Copy the dev, but not (all of) the map */
5916 memcpy(newdev, id->dev, sizeof(*newdev));
5917 oldmap = get_imsm_map(id->dev, 0);
5918 newmap = get_imsm_map(newdev, 0);
5919 /* Copy the current map */
5920 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
5921 /* update one device only
5922 */
5923 if (devices_to_reshape) {
ed7333bd
AK
5924 dprintf("imsm: modifying subdev: %i\n",
5925 id->index);
2e5dc010
N
5926 devices_to_reshape--;
5927 newdev->vol.migr_state = 1;
5928 newdev->vol.curr_migr_unit = 0;
5929 newdev->vol.migr_type = MIGR_GEN_MIGR;
5930 newmap->num_members = u->new_raid_disks;
5931 for (i = 0; i < delta_disks; i++) {
5932 set_imsm_ord_tbl_ent(newmap,
5933 u->old_raid_disks + i,
5934 u->old_raid_disks + i);
5935 }
5936 /* New map is correct, now need to save old map
5937 */
5938 newmap = get_imsm_map(newdev, 1);
5939 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
5940
70bdf0dc 5941 imsm_set_array_size(newdev);
2e5dc010
N
5942 }
5943
5944 sp = (void **)id->dev;
5945 id->dev = newdev;
5946 *sp = tofree;
5947 tofree = sp;
5948 }
819bc634
AK
5949 if (tofree)
5950 *space_list = tofree;
2e5dc010
N
5951 ret_val = 1;
5952
5953update_reshape_exit:
5954
5955 return ret_val;
5956}
5957
bb025c2f 5958static int apply_takeover_update(struct imsm_update_takeover *u,
8ca6df95
KW
5959 struct intel_super *super,
5960 void ***space_list)
bb025c2f
KW
5961{
5962 struct imsm_dev *dev = NULL;
8ca6df95
KW
5963 struct intel_dev *dv;
5964 struct imsm_dev *dev_new;
bb025c2f
KW
5965 struct imsm_map *map;
5966 struct dl *dm, *du;
8ca6df95 5967 int i;
bb025c2f
KW
5968
5969 for (dv = super->devlist; dv; dv = dv->next)
5970 if (dv->index == (unsigned int)u->subarray) {
5971 dev = dv->dev;
5972 break;
5973 }
5974
5975 if (dev == NULL)
5976 return 0;
5977
5978 map = get_imsm_map(dev, 0);
5979
5980 if (u->direction == R10_TO_R0) {
43d5ec18
KW
5981 /* Number of failed disks must be half of initial disk number */
5982 if (imsm_count_failed(super, dev) != (map->num_members / 2))
5983 return 0;
5984
bb025c2f
KW
5985 /* iterate through devices to mark removed disks as spare */
5986 for (dm = super->disks; dm; dm = dm->next) {
5987 if (dm->disk.status & FAILED_DISK) {
5988 int idx = dm->index;
5989 /* update indexes on the disk list */
5990/* FIXME this loop-with-the-loop looks wrong, I'm not convinced
5991 the index values will end up being correct.... NB */
5992 for (du = super->disks; du; du = du->next)
5993 if (du->index > idx)
5994 du->index--;
5995 /* mark as spare disk */
5996 dm->disk.status = SPARE_DISK;
5997 dm->index = -1;
5998 }
5999 }
bb025c2f
KW
6000 /* update map */
6001 map->num_members = map->num_members / 2;
6002 map->map_state = IMSM_T_STATE_NORMAL;
6003 map->num_domains = 1;
6004 map->raid_level = 0;
6005 map->failed_disk_num = -1;
6006 }
6007
8ca6df95
KW
6008 if (u->direction == R0_TO_R10) {
6009 void **space;
6010 /* update slots in current disk list */
6011 for (dm = super->disks; dm; dm = dm->next) {
6012 if (dm->index >= 0)
6013 dm->index *= 2;
6014 }
6015 /* create new *missing* disks */
6016 for (i = 0; i < map->num_members; i++) {
6017 space = *space_list;
6018 if (!space)
6019 continue;
6020 *space_list = *space;
6021 du = (void *)space;
6022 memcpy(du, super->disks, sizeof(*du));
6023 du->disk.status = FAILED_DISK;
6024 du->disk.scsi_id = 0;
6025 du->fd = -1;
6026 du->minor = 0;
6027 du->major = 0;
6028 du->index = (i * 2) + 1;
6029 sprintf((char *)du->disk.serial,
6030 " MISSING_%d", du->index);
6031 sprintf((char *)du->serial,
6032 "MISSING_%d", du->index);
6033 du->next = super->missing;
6034 super->missing = du;
6035 }
6036 /* create new dev and map */
6037 space = *space_list;
6038 if (!space)
6039 return 0;
6040 *space_list = *space;
6041 dev_new = (void *)space;
6042 memcpy(dev_new, dev, sizeof(*dev));
6043 /* update new map */
6044 map = get_imsm_map(dev_new, 0);
6045 map->failed_disk_num = map->num_members;
6046 map->num_members = map->num_members * 2;
6047 map->map_state = IMSM_T_STATE_NORMAL;
6048 map->num_domains = 2;
6049 map->raid_level = 1;
6050 /* replace dev<->dev_new */
6051 dv->dev = dev_new;
6052 }
bb025c2f
KW
6053 /* update disk order table */
6054 for (du = super->disks; du; du = du->next)
6055 if (du->index >= 0)
6056 set_imsm_ord_tbl_ent(map, du->index, du->index);
8ca6df95
KW
6057 for (du = super->missing; du; du = du->next)
6058 if (du->index >= 0)
6059 set_imsm_ord_tbl_ent(map, du->index,
6060 du->index | IMSM_ORD_REBUILD);
bb025c2f
KW
6061
6062 return 1;
6063}
6064
e8319a19
DW
6065static void imsm_process_update(struct supertype *st,
6066 struct metadata_update *update)
6067{
6068 /**
6069 * crack open the metadata_update envelope to find the update record
6070 * update can be one of:
d195167d
AK
6071 * update_reshape_container_disks - all the arrays in the container
6072 * are being reshaped to have more devices. We need to mark
6073 * the arrays for general migration and convert selected spares
6074 * into active devices.
6075 * update_activate_spare - a spare device has replaced a failed
e8319a19
DW
6076 * device in an array, update the disk_ord_tbl. If this disk is
6077 * present in all member arrays then also clear the SPARE_DISK
6078 * flag
d195167d
AK
6079 * update_create_array
6080 * update_kill_array
6081 * update_rename_array
6082 * update_add_remove_disk
e8319a19
DW
6083 */
6084 struct intel_super *super = st->sb;
4d7b1503 6085 struct imsm_super *mpb;
e8319a19
DW
6086 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6087
4d7b1503
DW
6088 /* update requires a larger buf but the allocation failed */
6089 if (super->next_len && !super->next_buf) {
6090 super->next_len = 0;
6091 return;
6092 }
6093
6094 if (super->next_buf) {
6095 memcpy(super->next_buf, super->buf, super->len);
6096 free(super->buf);
6097 super->len = super->next_len;
6098 super->buf = super->next_buf;
6099
6100 super->next_len = 0;
6101 super->next_buf = NULL;
6102 }
6103
6104 mpb = super->anchor;
6105
e8319a19 6106 switch (type) {
bb025c2f
KW
6107 case update_takeover: {
6108 struct imsm_update_takeover *u = (void *)update->buf;
8ca6df95 6109 if (apply_takeover_update(u, super, &update->space_list))
bb025c2f
KW
6110 super->updates_pending++;
6111 break;
6112 }
6113
78b10e66 6114 case update_reshape_container_disks: {
d195167d 6115 struct imsm_update_reshape *u = (void *)update->buf;
2e5dc010
N
6116 if (apply_reshape_container_disks_update(
6117 u, super, &update->space_list))
6118 super->updates_pending++;
78b10e66
N
6119 break;
6120 }
e8319a19
DW
6121 case update_activate_spare: {
6122 struct imsm_update_activate_spare *u = (void *) update->buf;
949c47a0 6123 struct imsm_dev *dev = get_imsm_dev(super, u->array);
a965f303 6124 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd 6125 struct imsm_map *migr_map;
e8319a19
DW
6126 struct active_array *a;
6127 struct imsm_disk *disk;
0c046afd 6128 __u8 to_state;
e8319a19 6129 struct dl *dl;
e8319a19 6130 unsigned int found;
0c046afd 6131 int failed;
98130f40 6132 int victim = get_imsm_disk_idx(dev, u->slot, -1);
e8319a19
DW
6133 int i;
6134
6135 for (dl = super->disks; dl; dl = dl->next)
d23fe947 6136 if (dl == u->dl)
e8319a19
DW
6137 break;
6138
6139 if (!dl) {
6140 fprintf(stderr, "error: imsm_activate_spare passed "
1f24f035
DW
6141 "an unknown disk (index: %d)\n",
6142 u->dl->index);
e8319a19
DW
6143 return;
6144 }
6145
6146 super->updates_pending++;
6147
0c046afd
DW
6148 /* count failures (excluding rebuilds and the victim)
6149 * to determine map[0] state
6150 */
6151 failed = 0;
6152 for (i = 0; i < map->num_members; i++) {
6153 if (i == u->slot)
6154 continue;
98130f40
AK
6155 disk = get_imsm_disk(super,
6156 get_imsm_disk_idx(dev, i, -1));
25ed7e59 6157 if (!disk || is_failed(disk))
0c046afd
DW
6158 failed++;
6159 }
6160
d23fe947
DW
6161 /* adding a pristine spare, assign a new index */
6162 if (dl->index < 0) {
6163 dl->index = super->anchor->num_disks;
6164 super->anchor->num_disks++;
6165 }
d23fe947 6166 disk = &dl->disk;
f2f27e63
DW
6167 disk->status |= CONFIGURED_DISK;
6168 disk->status &= ~SPARE_DISK;
e8319a19 6169
0c046afd
DW
6170 /* mark rebuild */
6171 to_state = imsm_check_degraded(super, dev, failed);
6172 map->map_state = IMSM_T_STATE_DEGRADED;
e3bba0e0 6173 migrate(dev, to_state, MIGR_REBUILD);
0c046afd
DW
6174 migr_map = get_imsm_map(dev, 1);
6175 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6176 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6177
148acb7b
DW
6178 /* update the family_num to mark a new container
6179 * generation, being careful to record the existing
6180 * family_num in orig_family_num to clean up after
6181 * earlier mdadm versions that neglected to set it.
6182 */
6183 if (mpb->orig_family_num == 0)
6184 mpb->orig_family_num = mpb->family_num;
6185 mpb->family_num += super->random;
6186
e8319a19
DW
6187 /* count arrays using the victim in the metadata */
6188 found = 0;
6189 for (a = st->arrays; a ; a = a->next) {
949c47a0 6190 dev = get_imsm_dev(super, a->info.container_member);
620b1713
DW
6191 map = get_imsm_map(dev, 0);
6192
6193 if (get_imsm_disk_slot(map, victim) >= 0)
6194 found++;
e8319a19
DW
6195 }
6196
24565c9a 6197 /* delete the victim if it is no longer being
e8319a19
DW
6198 * utilized anywhere
6199 */
e8319a19 6200 if (!found) {
ae6aad82 6201 struct dl **dlp;
24565c9a 6202
47ee5a45
DW
6203 /* We know that 'manager' isn't touching anything,
6204 * so it is safe to delete
6205 */
24565c9a 6206 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
ae6aad82
DW
6207 if ((*dlp)->index == victim)
6208 break;
47ee5a45
DW
6209
6210 /* victim may be on the missing list */
6211 if (!*dlp)
6212 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
6213 if ((*dlp)->index == victim)
6214 break;
24565c9a 6215 imsm_delete(super, dlp, victim);
e8319a19 6216 }
8273f55e
DW
6217 break;
6218 }
6219 case update_create_array: {
6220 /* someone wants to create a new array, we need to be aware of
6221 * a few races/collisions:
6222 * 1/ 'Create' called by two separate instances of mdadm
6223 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6224 * devices that have since been assimilated via
6225 * activate_spare.
6226 * In the event this update can not be carried out mdadm will
6227 * (FIX ME) notice that its update did not take hold.
6228 */
6229 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 6230 struct intel_dev *dv;
8273f55e
DW
6231 struct imsm_dev *dev;
6232 struct imsm_map *map, *new_map;
6233 unsigned long long start, end;
6234 unsigned long long new_start, new_end;
6235 int i;
54c2c1ea
DW
6236 struct disk_info *inf;
6237 struct dl *dl;
8273f55e
DW
6238
6239 /* handle racing creates: first come first serve */
6240 if (u->dev_idx < mpb->num_raid_devs) {
6241 dprintf("%s: subarray %d already defined\n",
6242 __func__, u->dev_idx);
ba2de7ba 6243 goto create_error;
8273f55e
DW
6244 }
6245
6246 /* check update is next in sequence */
6247 if (u->dev_idx != mpb->num_raid_devs) {
6a3e913e
DW
6248 dprintf("%s: can not create array %d expected index %d\n",
6249 __func__, u->dev_idx, mpb->num_raid_devs);
ba2de7ba 6250 goto create_error;
8273f55e
DW
6251 }
6252
a965f303 6253 new_map = get_imsm_map(&u->dev, 0);
8273f55e
DW
6254 new_start = __le32_to_cpu(new_map->pba_of_lba0);
6255 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
54c2c1ea 6256 inf = get_disk_info(u);
8273f55e
DW
6257
6258 /* handle activate_spare versus create race:
6259 * check to make sure that overlapping arrays do not include
6260 * overalpping disks
6261 */
6262 for (i = 0; i < mpb->num_raid_devs; i++) {
949c47a0 6263 dev = get_imsm_dev(super, i);
a965f303 6264 map = get_imsm_map(dev, 0);
8273f55e
DW
6265 start = __le32_to_cpu(map->pba_of_lba0);
6266 end = start + __le32_to_cpu(map->blocks_per_member);
6267 if ((new_start >= start && new_start <= end) ||
6268 (start >= new_start && start <= new_end))
54c2c1ea
DW
6269 /* overlap */;
6270 else
6271 continue;
6272
6273 if (disks_overlap(super, i, u)) {
8273f55e 6274 dprintf("%s: arrays overlap\n", __func__);
ba2de7ba 6275 goto create_error;
8273f55e
DW
6276 }
6277 }
8273f55e 6278
949c47a0
DW
6279 /* check that prepare update was successful */
6280 if (!update->space) {
6281 dprintf("%s: prepare update failed\n", __func__);
ba2de7ba 6282 goto create_error;
949c47a0
DW
6283 }
6284
54c2c1ea
DW
6285 /* check that all disks are still active before committing
6286 * changes. FIXME: could we instead handle this by creating a
6287 * degraded array? That's probably not what the user expects,
6288 * so better to drop this update on the floor.
6289 */
6290 for (i = 0; i < new_map->num_members; i++) {
6291 dl = serial_to_dl(inf[i].serial, super);
6292 if (!dl) {
6293 dprintf("%s: disk disappeared\n", __func__);
ba2de7ba 6294 goto create_error;
54c2c1ea 6295 }
949c47a0
DW
6296 }
6297
8273f55e 6298 super->updates_pending++;
54c2c1ea
DW
6299
6300 /* convert spares to members and fixup ord_tbl */
6301 for (i = 0; i < new_map->num_members; i++) {
6302 dl = serial_to_dl(inf[i].serial, super);
6303 if (dl->index == -1) {
6304 dl->index = mpb->num_disks;
6305 mpb->num_disks++;
6306 dl->disk.status |= CONFIGURED_DISK;
6307 dl->disk.status &= ~SPARE_DISK;
6308 }
6309 set_imsm_ord_tbl_ent(new_map, i, dl->index);
6310 }
6311
ba2de7ba
DW
6312 dv = update->space;
6313 dev = dv->dev;
949c47a0
DW
6314 update->space = NULL;
6315 imsm_copy_dev(dev, &u->dev);
ba2de7ba
DW
6316 dv->index = u->dev_idx;
6317 dv->next = super->devlist;
6318 super->devlist = dv;
8273f55e 6319 mpb->num_raid_devs++;
8273f55e 6320
4d1313e9 6321 imsm_update_version_info(super);
8273f55e 6322 break;
ba2de7ba
DW
6323 create_error:
6324 /* mdmon knows how to release update->space, but not
6325 * ((struct intel_dev *) update->space)->dev
6326 */
6327 if (update->space) {
6328 dv = update->space;
6329 free(dv->dev);
6330 }
8273f55e 6331 break;
e8319a19 6332 }
33414a01
DW
6333 case update_kill_array: {
6334 struct imsm_update_kill_array *u = (void *) update->buf;
6335 int victim = u->dev_idx;
6336 struct active_array *a;
6337 struct intel_dev **dp;
6338 struct imsm_dev *dev;
6339
6340 /* sanity check that we are not affecting the uuid of
6341 * active arrays, or deleting an active array
6342 *
6343 * FIXME when immutable ids are available, but note that
6344 * we'll also need to fixup the invalidated/active
6345 * subarray indexes in mdstat
6346 */
6347 for (a = st->arrays; a; a = a->next)
6348 if (a->info.container_member >= victim)
6349 break;
6350 /* by definition if mdmon is running at least one array
6351 * is active in the container, so checking
6352 * mpb->num_raid_devs is just extra paranoia
6353 */
6354 dev = get_imsm_dev(super, victim);
6355 if (a || !dev || mpb->num_raid_devs == 1) {
6356 dprintf("failed to delete subarray-%d\n", victim);
6357 break;
6358 }
6359
6360 for (dp = &super->devlist; *dp;)
f21e18ca 6361 if ((*dp)->index == (unsigned)super->current_vol) {
33414a01
DW
6362 *dp = (*dp)->next;
6363 } else {
f21e18ca 6364 if ((*dp)->index > (unsigned)victim)
33414a01
DW
6365 (*dp)->index--;
6366 dp = &(*dp)->next;
6367 }
6368 mpb->num_raid_devs--;
6369 super->updates_pending++;
6370 break;
6371 }
aa534678
DW
6372 case update_rename_array: {
6373 struct imsm_update_rename_array *u = (void *) update->buf;
6374 char name[MAX_RAID_SERIAL_LEN+1];
6375 int target = u->dev_idx;
6376 struct active_array *a;
6377 struct imsm_dev *dev;
6378
6379 /* sanity check that we are not affecting the uuid of
6380 * an active array
6381 */
6382 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
6383 name[MAX_RAID_SERIAL_LEN] = '\0';
6384 for (a = st->arrays; a; a = a->next)
6385 if (a->info.container_member == target)
6386 break;
6387 dev = get_imsm_dev(super, u->dev_idx);
6388 if (a || !dev || !check_name(super, name, 1)) {
6389 dprintf("failed to rename subarray-%d\n", target);
6390 break;
6391 }
6392
cdbe98cd 6393 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
aa534678
DW
6394 super->updates_pending++;
6395 break;
6396 }
1a64be56 6397 case update_add_remove_disk: {
43dad3d6 6398 /* we may be able to repair some arrays if disks are
1a64be56
LM
6399 * being added, check teh status of add_remove_disk
6400 * if discs has been added.
6401 */
6402 if (add_remove_disk_update(super)) {
43dad3d6 6403 struct active_array *a;
072b727f
DW
6404
6405 super->updates_pending++;
1a64be56 6406 for (a = st->arrays; a; a = a->next)
43dad3d6
DW
6407 a->check_degraded = 1;
6408 }
43dad3d6 6409 break;
e8319a19 6410 }
1a64be56
LM
6411 default:
6412 fprintf(stderr, "error: unsuported process update type:"
6413 "(type: %d)\n", type);
6414 }
e8319a19 6415}
88758e9d 6416
8273f55e
DW
6417static void imsm_prepare_update(struct supertype *st,
6418 struct metadata_update *update)
6419{
949c47a0 6420 /**
4d7b1503
DW
6421 * Allocate space to hold new disk entries, raid-device entries or a new
6422 * mpb if necessary. The manager synchronously waits for updates to
6423 * complete in the monitor, so new mpb buffers allocated here can be
6424 * integrated by the monitor thread without worrying about live pointers
6425 * in the manager thread.
8273f55e 6426 */
949c47a0 6427 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
4d7b1503
DW
6428 struct intel_super *super = st->sb;
6429 struct imsm_super *mpb = super->anchor;
6430 size_t buf_len;
6431 size_t len = 0;
949c47a0
DW
6432
6433 switch (type) {
abedf5fc
KW
6434 case update_takeover: {
6435 struct imsm_update_takeover *u = (void *)update->buf;
6436 if (u->direction == R0_TO_R10) {
6437 void **tail = (void **)&update->space_list;
6438 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
6439 struct imsm_map *map = get_imsm_map(dev, 0);
6440 int num_members = map->num_members;
6441 void *space;
6442 int size, i;
6443 int err = 0;
6444 /* allocate memory for added disks */
6445 for (i = 0; i < num_members; i++) {
6446 size = sizeof(struct dl);
6447 space = malloc(size);
6448 if (!space) {
6449 err++;
6450 break;
6451 }
6452 *tail = space;
6453 tail = space;
6454 *tail = NULL;
6455 }
6456 /* allocate memory for new device */
6457 size = sizeof_imsm_dev(super->devlist->dev, 0) +
6458 (num_members * sizeof(__u32));
6459 space = malloc(size);
6460 if (!space)
6461 err++;
6462 else {
6463 *tail = space;
6464 tail = space;
6465 *tail = NULL;
6466 }
6467 if (!err) {
6468 len = disks_to_mpb_size(num_members * 2);
6469 } else {
6470 /* if allocation didn't success, free buffer */
6471 while (update->space_list) {
6472 void **sp = update->space_list;
6473 update->space_list = *sp;
6474 free(sp);
6475 }
6476 }
6477 }
6478
6479 break;
6480 }
78b10e66 6481 case update_reshape_container_disks: {
d195167d
AK
6482 /* Every raid device in the container is about to
6483 * gain some more devices, and we will enter a
6484 * reconfiguration.
6485 * So each 'imsm_map' will be bigger, and the imsm_vol
6486 * will now hold 2 of them.
6487 * Thus we need new 'struct imsm_dev' allocations sized
6488 * as sizeof_imsm_dev but with more devices in both maps.
6489 */
6490 struct imsm_update_reshape *u = (void *)update->buf;
6491 struct intel_dev *dl;
6492 void **space_tail = (void**)&update->space_list;
6493
6494 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
6495
6496 for (dl = super->devlist; dl; dl = dl->next) {
6497 int size = sizeof_imsm_dev(dl->dev, 1);
6498 void *s;
d677e0b8
AK
6499 if (u->new_raid_disks > u->old_raid_disks)
6500 size += sizeof(__u32)*2*
6501 (u->new_raid_disks - u->old_raid_disks);
d195167d
AK
6502 s = malloc(size);
6503 if (!s)
6504 break;
6505 *space_tail = s;
6506 space_tail = s;
6507 *space_tail = NULL;
6508 }
6509
6510 len = disks_to_mpb_size(u->new_raid_disks);
6511 dprintf("New anchor length is %llu\n", (unsigned long long)len);
78b10e66
N
6512 break;
6513 }
949c47a0
DW
6514 case update_create_array: {
6515 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 6516 struct intel_dev *dv;
54c2c1ea
DW
6517 struct imsm_dev *dev = &u->dev;
6518 struct imsm_map *map = get_imsm_map(dev, 0);
6519 struct dl *dl;
6520 struct disk_info *inf;
6521 int i;
6522 int activate = 0;
949c47a0 6523
54c2c1ea
DW
6524 inf = get_disk_info(u);
6525 len = sizeof_imsm_dev(dev, 1);
ba2de7ba
DW
6526 /* allocate a new super->devlist entry */
6527 dv = malloc(sizeof(*dv));
6528 if (dv) {
6529 dv->dev = malloc(len);
6530 if (dv->dev)
6531 update->space = dv;
6532 else {
6533 free(dv);
6534 update->space = NULL;
6535 }
6536 }
949c47a0 6537
54c2c1ea
DW
6538 /* count how many spares will be converted to members */
6539 for (i = 0; i < map->num_members; i++) {
6540 dl = serial_to_dl(inf[i].serial, super);
6541 if (!dl) {
6542 /* hmm maybe it failed?, nothing we can do about
6543 * it here
6544 */
6545 continue;
6546 }
6547 if (count_memberships(dl, super) == 0)
6548 activate++;
6549 }
6550 len += activate * sizeof(struct imsm_disk);
949c47a0
DW
6551 break;
6552 default:
6553 break;
6554 }
6555 }
8273f55e 6556
4d7b1503
DW
6557 /* check if we need a larger metadata buffer */
6558 if (super->next_buf)
6559 buf_len = super->next_len;
6560 else
6561 buf_len = super->len;
6562
6563 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
6564 /* ok we need a larger buf than what is currently allocated
6565 * if this allocation fails process_update will notice that
6566 * ->next_len is set and ->next_buf is NULL
6567 */
6568 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
6569 if (super->next_buf)
6570 free(super->next_buf);
6571
6572 super->next_len = buf_len;
1f45a8ad
DW
6573 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
6574 memset(super->next_buf, 0, buf_len);
6575 else
4d7b1503
DW
6576 super->next_buf = NULL;
6577 }
8273f55e
DW
6578}
6579
ae6aad82 6580/* must be called while manager is quiesced */
f21e18ca 6581static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
ae6aad82
DW
6582{
6583 struct imsm_super *mpb = super->anchor;
ae6aad82
DW
6584 struct dl *iter;
6585 struct imsm_dev *dev;
6586 struct imsm_map *map;
24565c9a
DW
6587 int i, j, num_members;
6588 __u32 ord;
ae6aad82 6589
24565c9a
DW
6590 dprintf("%s: deleting device[%d] from imsm_super\n",
6591 __func__, index);
ae6aad82
DW
6592
6593 /* shift all indexes down one */
6594 for (iter = super->disks; iter; iter = iter->next)
f21e18ca 6595 if (iter->index > (int)index)
ae6aad82 6596 iter->index--;
47ee5a45 6597 for (iter = super->missing; iter; iter = iter->next)
f21e18ca 6598 if (iter->index > (int)index)
47ee5a45 6599 iter->index--;
ae6aad82
DW
6600
6601 for (i = 0; i < mpb->num_raid_devs; i++) {
6602 dev = get_imsm_dev(super, i);
6603 map = get_imsm_map(dev, 0);
24565c9a
DW
6604 num_members = map->num_members;
6605 for (j = 0; j < num_members; j++) {
6606 /* update ord entries being careful not to propagate
6607 * ord-flags to the first map
6608 */
98130f40 6609 ord = get_imsm_ord_tbl_ent(dev, j, -1);
ae6aad82 6610
24565c9a
DW
6611 if (ord_to_idx(ord) <= index)
6612 continue;
ae6aad82 6613
24565c9a
DW
6614 map = get_imsm_map(dev, 0);
6615 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
6616 map = get_imsm_map(dev, 1);
6617 if (map)
6618 set_imsm_ord_tbl_ent(map, j, ord - 1);
ae6aad82
DW
6619 }
6620 }
6621
6622 mpb->num_disks--;
6623 super->updates_pending++;
24565c9a
DW
6624 if (*dlp) {
6625 struct dl *dl = *dlp;
6626
6627 *dlp = (*dlp)->next;
6628 __free_imsm_disk(dl);
6629 }
ae6aad82
DW
6630}
6631
2cda7640
ML
6632static char disk_by_path[] = "/dev/disk/by-path/";
6633
6634static const char *imsm_get_disk_controller_domain(const char *path)
6635{
2cda7640 6636 char disk_path[PATH_MAX];
96234762
LM
6637 char *drv=NULL;
6638 struct stat st;
2cda7640 6639
96234762
LM
6640 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
6641 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
6642 if (stat(disk_path, &st) == 0) {
6643 struct sys_dev* hba;
6644 char *path=NULL;
6645
6646 path = devt_to_devpath(st.st_rdev);
6647 if (path == NULL)
6648 return "unknown";
6649 hba = find_disk_attached_hba(-1, path);
6650 if (hba && hba->type == SYS_DEV_SAS)
6651 drv = "isci";
6652 else if (hba && hba->type == SYS_DEV_SATA)
6653 drv = "ahci";
6654 else
6655 drv = "unknown";
6656 dprintf("path: %s hba: %s attached: %s\n",
6657 path, (hba) ? hba->path : "NULL", drv);
6658 free(path);
6659 if (hba)
6660 free_sys_dev(&hba);
2cda7640 6661 }
96234762 6662 return drv;
2cda7640
ML
6663}
6664
78b10e66
N
6665static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
6666{
6667 char subdev_name[20];
6668 struct mdstat_ent *mdstat;
6669
6670 sprintf(subdev_name, "%d", subdev);
6671 mdstat = mdstat_by_subdev(subdev_name, container);
6672 if (!mdstat)
6673 return -1;
6674
6675 *minor = mdstat->devnum;
6676 free_mdstat(mdstat);
6677 return 0;
6678}
6679
6680static int imsm_reshape_is_allowed_on_container(struct supertype *st,
6681 struct geo_params *geo,
6682 int *old_raid_disks)
6683{
694575e7
KW
6684 /* currently we only support increasing the number of devices
6685 * for a container. This increases the number of device for each
6686 * member array. They must all be RAID0 or RAID5.
6687 */
78b10e66
N
6688 int ret_val = 0;
6689 struct mdinfo *info, *member;
6690 int devices_that_can_grow = 0;
6691
6692 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
6693 "st->devnum = (%i)\n",
6694 st->devnum);
6695
6696 if (geo->size != -1 ||
6697 geo->level != UnSet ||
6698 geo->layout != UnSet ||
6699 geo->chunksize != 0 ||
6700 geo->raid_disks == UnSet) {
6701 dprintf("imsm: Container operation is allowed for "
6702 "raid disks number change only.\n");
6703 return ret_val;
6704 }
6705
6706 info = container_content_imsm(st, NULL);
6707 for (member = info; member; member = member->next) {
6708 int result;
6709 int minor;
6710
6711 dprintf("imsm: checking device_num: %i\n",
6712 member->container_member);
6713
d7d205bd 6714 if (geo->raid_disks <= member->array.raid_disks) {
78b10e66
N
6715 /* we work on container for Online Capacity Expansion
6716 * only so raid_disks has to grow
6717 */
6718 dprintf("imsm: for container operation raid disks "
6719 "increase is required\n");
6720 break;
6721 }
6722
6723 if ((info->array.level != 0) &&
6724 (info->array.level != 5)) {
6725 /* we cannot use this container with other raid level
6726 */
690aae1a 6727 dprintf("imsm: for container operation wrong"
78b10e66
N
6728 " raid level (%i) detected\n",
6729 info->array.level);
6730 break;
6731 } else {
6732 /* check for platform support
6733 * for this raid level configuration
6734 */
6735 struct intel_super *super = st->sb;
6736 if (!is_raid_level_supported(super->orom,
6737 member->array.level,
6738 geo->raid_disks)) {
690aae1a 6739 dprintf("platform does not support raid%d with"
78b10e66
N
6740 " %d disk%s\n",
6741 info->array.level,
6742 geo->raid_disks,
6743 geo->raid_disks > 1 ? "s" : "");
6744 break;
6745 }
6746 }
6747
6748 if (*old_raid_disks &&
6749 info->array.raid_disks != *old_raid_disks)
6750 break;
6751 *old_raid_disks = info->array.raid_disks;
6752
6753 /* All raid5 and raid0 volumes in container
6754 * have to be ready for Online Capacity Expansion
6755 * so they need to be assembled. We have already
6756 * checked that no recovery etc is happening.
6757 */
6758 result = imsm_find_array_minor_by_subdev(member->container_member,
6759 st->container_dev,
6760 &minor);
6761 if (result < 0) {
6762 dprintf("imsm: cannot find array\n");
6763 break;
6764 }
6765 devices_that_can_grow++;
6766 }
6767 sysfs_free(info);
6768 if (!member && devices_that_can_grow)
6769 ret_val = 1;
6770
6771 if (ret_val)
6772 dprintf("\tContainer operation allowed\n");
6773 else
6774 dprintf("\tError: %i\n", ret_val);
6775
6776 return ret_val;
6777}
6778
6779/* Function: get_spares_for_grow
6780 * Description: Allocates memory and creates list of spare devices
6781 * avaliable in container. Checks if spare drive size is acceptable.
6782 * Parameters: Pointer to the supertype structure
6783 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
6784 * NULL if fail
6785 */
6786static struct mdinfo *get_spares_for_grow(struct supertype *st)
6787{
78b10e66 6788 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
326727d9 6789 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
78b10e66
N
6790}
6791
6792/******************************************************************************
6793 * function: imsm_create_metadata_update_for_reshape
6794 * Function creates update for whole IMSM container.
6795 *
6796 ******************************************************************************/
6797static int imsm_create_metadata_update_for_reshape(
6798 struct supertype *st,
6799 struct geo_params *geo,
6800 int old_raid_disks,
6801 struct imsm_update_reshape **updatep)
6802{
6803 struct intel_super *super = st->sb;
6804 struct imsm_super *mpb = super->anchor;
6805 int update_memory_size = 0;
6806 struct imsm_update_reshape *u = NULL;
6807 struct mdinfo *spares = NULL;
6808 int i;
6809 int delta_disks = 0;
bbd24d86 6810 struct mdinfo *dev;
78b10e66
N
6811
6812 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
6813 geo->raid_disks);
6814
6815 delta_disks = geo->raid_disks - old_raid_disks;
6816
6817 /* size of all update data without anchor */
6818 update_memory_size = sizeof(struct imsm_update_reshape);
6819
6820 /* now add space for spare disks that we need to add. */
6821 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
6822
6823 u = calloc(1, update_memory_size);
6824 if (u == NULL) {
6825 dprintf("error: "
6826 "cannot get memory for imsm_update_reshape update\n");
6827 return 0;
6828 }
6829 u->type = update_reshape_container_disks;
6830 u->old_raid_disks = old_raid_disks;
6831 u->new_raid_disks = geo->raid_disks;
6832
6833 /* now get spare disks list
6834 */
6835 spares = get_spares_for_grow(st);
6836
6837 if (spares == NULL
6838 || delta_disks > spares->array.spare_disks) {
6839 dprintf("imsm: ERROR: Cannot get spare devices.\n");
6840 goto abort;
6841 }
6842
6843 /* we have got spares
6844 * update disk list in imsm_disk list table in anchor
6845 */
6846 dprintf("imsm: %i spares are available.\n\n",
6847 spares->array.spare_disks);
6848
bbd24d86 6849 dev = spares->devs;
78b10e66 6850 for (i = 0; i < delta_disks; i++) {
78b10e66
N
6851 struct dl *dl;
6852
bbd24d86
AK
6853 if (dev == NULL)
6854 break;
78b10e66
N
6855 u->new_disks[i] = makedev(dev->disk.major,
6856 dev->disk.minor);
6857 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
ee4beede
AK
6858 dl->index = mpb->num_disks;
6859 mpb->num_disks++;
bbd24d86 6860 dev = dev->next;
78b10e66 6861 }
78b10e66
N
6862
6863abort:
6864 /* free spares
6865 */
6866 sysfs_free(spares);
6867
d677e0b8 6868 dprintf("imsm: reshape update preparation :");
78b10e66 6869 if (i == delta_disks) {
d677e0b8 6870 dprintf(" OK\n");
78b10e66
N
6871 *updatep = u;
6872 return update_memory_size;
6873 }
6874 free(u);
d677e0b8 6875 dprintf(" Error\n");
78b10e66
N
6876
6877 return 0;
6878}
6879
8dd70bce
AK
6880static void imsm_update_metadata_locally(struct supertype *st,
6881 void *buf, int len)
6882{
6883 struct metadata_update mu;
6884
6885 mu.buf = buf;
6886 mu.len = len;
6887 mu.space = NULL;
6888 mu.space_list = NULL;
6889 mu.next = NULL;
6890 imsm_prepare_update(st, &mu);
6891 imsm_process_update(st, &mu);
6892
6893 while (mu.space_list) {
6894 void **space = mu.space_list;
6895 mu.space_list = *space;
6896 free(space);
6897 }
6898}
78b10e66 6899
471bceb6 6900/***************************************************************************
694575e7 6901* Function: imsm_analyze_change
471bceb6
KW
6902* Description: Function analyze change for single volume
6903* and validate if transition is supported
694575e7
KW
6904* Parameters: Geometry parameters, supertype structure
6905* Returns: Operation type code on success, -1 if fail
471bceb6
KW
6906****************************************************************************/
6907enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
6908 struct geo_params *geo)
694575e7 6909{
471bceb6
KW
6910 struct mdinfo info;
6911 int change = -1;
6912 int check_devs = 0;
6913
6914 getinfo_super_imsm_volume(st, &info, NULL);
6915
6916 if ((geo->level != info.array.level) &&
6917 (geo->level >= 0) &&
6918 (geo->level != UnSet)) {
6919 switch (info.array.level) {
6920 case 0:
6921 if (geo->level == 5) {
6922 change = CH_LEVEL_MIGRATION;
6923 check_devs = 1;
6924 }
6925 if (geo->level == 10) {
6926 change = CH_TAKEOVER;
6927 check_devs = 1;
6928 }
dfe77a9e
KW
6929 break;
6930 case 1:
6931 if (geo->level == 0) {
6932 change = CH_TAKEOVER;
6933 check_devs = 1;
6934 }
471bceb6
KW
6935 break;
6936 case 5:
6937 if (geo->level != 0)
6938 change = CH_LEVEL_MIGRATION;
6939 break;
6940 case 10:
6941 if (geo->level == 0) {
6942 change = CH_TAKEOVER;
6943 check_devs = 1;
6944 }
6945 break;
6946 }
6947 if (change == -1) {
6948 fprintf(stderr,
6949 Name " Error. Level Migration from %d to %d "
6950 "not supported!\n",
6951 info.array.level, geo->level);
6952 goto analyse_change_exit;
6953 }
6954 } else
6955 geo->level = info.array.level;
6956
6957 if ((geo->layout != info.array.layout)
6958 && ((geo->layout != UnSet) && (geo->layout != -1))) {
6959 change = CH_LEVEL_MIGRATION;
6960 if ((info.array.layout == 0)
6961 && (info.array.level == 5)
6962 && (geo->layout == 5)) {
6963 /* reshape 5 -> 4 */
6964 } else if ((info.array.layout == 5)
6965 && (info.array.level == 5)
6966 && (geo->layout == 0)) {
6967 /* reshape 4 -> 5 */
6968 geo->layout = 0;
6969 geo->level = 5;
6970 } else {
6971 fprintf(stderr,
6972 Name " Error. Layout Migration from %d to %d "
6973 "not supported!\n",
6974 info.array.layout, geo->layout);
6975 change = -1;
6976 goto analyse_change_exit;
6977 }
6978 } else
6979 geo->layout = info.array.layout;
6980
6981 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
6982 && (geo->chunksize != info.array.chunk_size))
6983 change = CH_CHUNK_MIGR;
6984 else
6985 geo->chunksize = info.array.chunk_size;
6986
6987 if (!validate_geometry_imsm(st,
6988 geo->level,
6989 geo->layout,
6990 geo->raid_disks,
6991 (geo->chunksize / 1024),
6992 geo->size,
6993 0, 0, 1))
6994 change = -1;
6995
6996 if (check_devs) {
6997 struct intel_super *super = st->sb;
6998 struct imsm_super *mpb = super->anchor;
6999
7000 if (mpb->num_raid_devs > 1) {
7001 fprintf(stderr,
7002 Name " Error. Cannot perform operation on %s"
7003 "- for this operation it MUST be single "
7004 "array in container\n",
7005 geo->dev_name);
7006 change = -1;
7007 }
7008 }
7009
7010analyse_change_exit:
7011
7012 return change;
694575e7
KW
7013}
7014
bb025c2f
KW
7015int imsm_takeover(struct supertype *st, struct geo_params *geo)
7016{
7017 struct intel_super *super = st->sb;
7018 struct imsm_update_takeover *u;
7019
7020 u = malloc(sizeof(struct imsm_update_takeover));
7021 if (u == NULL)
7022 return 1;
7023
7024 u->type = update_takeover;
7025 u->subarray = super->current_vol;
7026
7027 /* 10->0 transition */
7028 if (geo->level == 0)
7029 u->direction = R10_TO_R0;
7030
0529c688
KW
7031 /* 0->10 transition */
7032 if (geo->level == 10)
7033 u->direction = R0_TO_R10;
7034
bb025c2f
KW
7035 /* update metadata locally */
7036 imsm_update_metadata_locally(st, u,
7037 sizeof(struct imsm_update_takeover));
7038 /* and possibly remotely */
7039 if (st->update_tail)
7040 append_metadata_update(st, u,
7041 sizeof(struct imsm_update_takeover));
7042 else
7043 free(u);
7044
7045 return 0;
7046}
7047
78b10e66
N
7048static int imsm_reshape_super(struct supertype *st, long long size, int level,
7049 int layout, int chunksize, int raid_disks,
690aae1a 7050 char *backup, char *dev, int verbose)
78b10e66 7051{
78b10e66
N
7052 int ret_val = 1;
7053 struct geo_params geo;
7054
7055 dprintf("imsm: reshape_super called.\n");
7056
71204a50 7057 memset(&geo, 0, sizeof(struct geo_params));
78b10e66
N
7058
7059 geo.dev_name = dev;
694575e7 7060 geo.dev_id = st->devnum;
78b10e66
N
7061 geo.size = size;
7062 geo.level = level;
7063 geo.layout = layout;
7064 geo.chunksize = chunksize;
7065 geo.raid_disks = raid_disks;
7066
7067 dprintf("\tfor level : %i\n", geo.level);
7068 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
7069
7070 if (experimental() == 0)
7071 return ret_val;
7072
78b10e66 7073 if (st->container_dev == st->devnum) {
694575e7
KW
7074 /* On container level we can only increase number of devices. */
7075 dprintf("imsm: info: Container operation\n");
78b10e66
N
7076 int old_raid_disks = 0;
7077 if (imsm_reshape_is_allowed_on_container(
7078 st, &geo, &old_raid_disks)) {
7079 struct imsm_update_reshape *u = NULL;
7080 int len;
7081
7082 len = imsm_create_metadata_update_for_reshape(
7083 st, &geo, old_raid_disks, &u);
7084
ed08d51c
AK
7085 if (len <= 0) {
7086 dprintf("imsm: Cannot prepare update\n");
7087 goto exit_imsm_reshape_super;
7088 }
7089
8dd70bce
AK
7090 ret_val = 0;
7091 /* update metadata locally */
7092 imsm_update_metadata_locally(st, u, len);
7093 /* and possibly remotely */
7094 if (st->update_tail)
7095 append_metadata_update(st, u, len);
7096 else
ed08d51c 7097 free(u);
8dd70bce 7098
694575e7 7099 } else {
690aae1a 7100 fprintf(stderr, Name "imsm: Operation is not allowed "
78b10e66 7101 "on this container\n");
694575e7
KW
7102 }
7103 } else {
7104 /* On volume level we support following operations
471bceb6
KW
7105 * - takeover: raid10 -> raid0; raid0 -> raid10
7106 * - chunk size migration
7107 * - migration: raid5 -> raid0; raid0 -> raid5
7108 */
7109 struct intel_super *super = st->sb;
7110 struct intel_dev *dev = super->devlist;
7111 int change, devnum;
694575e7 7112 dprintf("imsm: info: Volume operation\n");
471bceb6
KW
7113 /* find requested device */
7114 while (dev) {
7115 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
7116 if (devnum == geo.dev_id)
7117 break;
7118 dev = dev->next;
7119 }
7120 if (dev == NULL) {
7121 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
7122 geo.dev_name, geo.dev_id);
7123 goto exit_imsm_reshape_super;
7124 }
7125 super->current_vol = dev->index;
694575e7
KW
7126 change = imsm_analyze_change(st, &geo);
7127 switch (change) {
471bceb6 7128 case CH_TAKEOVER:
bb025c2f 7129 ret_val = imsm_takeover(st, &geo);
694575e7 7130 break;
471bceb6
KW
7131 case CH_CHUNK_MIGR:
7132 ret_val = 0;
694575e7 7133 break;
471bceb6
KW
7134 case CH_LEVEL_MIGRATION:
7135 ret_val = 0;
694575e7 7136 break;
471bceb6
KW
7137 default:
7138 ret_val = 1;
694575e7 7139 }
694575e7 7140 }
78b10e66 7141
ed08d51c 7142exit_imsm_reshape_super:
78b10e66
N
7143 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
7144 return ret_val;
7145}
2cda7640 7146
999b4972
N
7147static int imsm_manage_reshape(
7148 int afd, struct mdinfo *sra, struct reshape *reshape,
7149 struct supertype *st, unsigned long stripes,
7150 int *fds, unsigned long long *offsets,
7151 int dests, int *destfd, unsigned long long *destoffsets)
7152{
7153 /* Just use child_monitor for now */
7154 return child_monitor(
7155 afd, sra, reshape, st, stripes,
7156 fds, offsets, dests, destfd, destoffsets);
7157}
71204a50 7158#endif /* MDASSEMBLE */
999b4972 7159
cdddbdbc
DW
7160struct superswitch super_imsm = {
7161#ifndef MDASSEMBLE
7162 .examine_super = examine_super_imsm,
7163 .brief_examine_super = brief_examine_super_imsm,
4737ae25 7164 .brief_examine_subarrays = brief_examine_subarrays_imsm,
9d84c8ea 7165 .export_examine_super = export_examine_super_imsm,
cdddbdbc
DW
7166 .detail_super = detail_super_imsm,
7167 .brief_detail_super = brief_detail_super_imsm,
bf5a934a 7168 .write_init_super = write_init_super_imsm,
0e600426
N
7169 .validate_geometry = validate_geometry_imsm,
7170 .add_to_super = add_to_super_imsm,
1a64be56 7171 .remove_from_super = remove_from_super_imsm,
d665cc31 7172 .detail_platform = detail_platform_imsm,
33414a01 7173 .kill_subarray = kill_subarray_imsm,
aa534678 7174 .update_subarray = update_subarray_imsm,
2b959fbf 7175 .load_container = load_container_imsm,
71204a50
N
7176 .default_geometry = default_geometry_imsm,
7177 .get_disk_controller_domain = imsm_get_disk_controller_domain,
7178 .reshape_super = imsm_reshape_super,
7179 .manage_reshape = imsm_manage_reshape,
cdddbdbc
DW
7180#endif
7181 .match_home = match_home_imsm,
7182 .uuid_from_super= uuid_from_super_imsm,
7183 .getinfo_super = getinfo_super_imsm,
5c4cd5da 7184 .getinfo_super_disks = getinfo_super_disks_imsm,
cdddbdbc
DW
7185 .update_super = update_super_imsm,
7186
7187 .avail_size = avail_size_imsm,
80e7f8c3 7188 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
cdddbdbc
DW
7189
7190 .compare_super = compare_super_imsm,
7191
7192 .load_super = load_super_imsm,
bf5a934a 7193 .init_super = init_super_imsm,
e683ca88 7194 .store_super = store_super_imsm,
cdddbdbc
DW
7195 .free_super = free_super_imsm,
7196 .match_metadata_desc = match_metadata_desc_imsm,
bf5a934a 7197 .container_content = container_content_imsm,
cdddbdbc 7198
cdddbdbc 7199 .external = 1,
4cce4069 7200 .name = "imsm",
845dea95 7201
0e600426 7202#ifndef MDASSEMBLE
845dea95
NB
7203/* for mdmon */
7204 .open_new = imsm_open_new,
ed9d66aa 7205 .set_array_state= imsm_set_array_state,
845dea95
NB
7206 .set_disk = imsm_set_disk,
7207 .sync_metadata = imsm_sync_metadata,
88758e9d 7208 .activate_spare = imsm_activate_spare,
e8319a19 7209 .process_update = imsm_process_update,
8273f55e 7210 .prepare_update = imsm_prepare_update,
0e600426 7211#endif /* MDASSEMBLE */
cdddbdbc 7212};