]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-intel.c
imsm: process update for raid level migrations
[thirdparty/mdadm.git] / super-intel.c
CommitLineData
cdddbdbc
DW
1/*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
a54d5262 4 * Copyright (C) 2002-2008 Intel Corporation
cdddbdbc
DW
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
51006d85 20#define HAVE_STDINT_H 1
cdddbdbc 21#include "mdadm.h"
c2a1e7da 22#include "mdmon.h"
51006d85 23#include "sha1.h"
88c32bb1 24#include "platform-intel.h"
cdddbdbc
DW
25#include <values.h>
26#include <scsi/sg.h>
27#include <ctype.h>
d665cc31 28#include <dirent.h>
cdddbdbc
DW
29
30/* MPB == Metadata Parameter Block */
31#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32#define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33#define MPB_VERSION_RAID0 "1.0.00"
34#define MPB_VERSION_RAID1 "1.1.00"
fe7ed8cb
DW
35#define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36#define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
cdddbdbc 37#define MPB_VERSION_RAID5 "1.2.02"
fe7ed8cb
DW
38#define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39#define MPB_VERSION_CNG "1.2.06"
40#define MPB_VERSION_ATTRIBS "1.3.00"
cdddbdbc
DW
41#define MAX_SIGNATURE_LENGTH 32
42#define MAX_RAID_SERIAL_LEN 16
fe7ed8cb
DW
43
44#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45#define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
c2c087e6
DW
54#define MPB_SECTOR_CNT 418
55#define IMSM_RESERVED_SECTORS 4096
979d38be 56#define SECT_PER_MB_SHIFT 11
cdddbdbc
DW
57
58/* Disk configuration info. */
59#define IMSM_MAX_DEVICES 255
60struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
f2f27e63
DW
64#define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65#define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66#define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
cdddbdbc 67 __u32 status; /* 0xF0 - 0xF3 */
fe7ed8cb
DW
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69#define IMSM_DISK_FILLERS 4
cdddbdbc
DW
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71};
72
73/* RAID map configuration infos. */
74struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80#define IMSM_T_STATE_NORMAL 0
81#define IMSM_T_STATE_UNINITIALIZED 1
e3bba0e0
DW
82#define IMSM_T_STATE_DEGRADED 2
83#define IMSM_T_STATE_FAILED 3
cdddbdbc
DW
84 __u8 raid_level;
85#define IMSM_T_RAID0 0
86#define IMSM_T_RAID1 1
87#define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
fe7ed8cb
DW
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
252d23c0 91 __u8 ddf;
cdddbdbc 92 __u32 filler[7]; /* expansion area */
7eef0453 93#define IMSM_ORD_REBUILD (1 << 24)
cdddbdbc 94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
7eef0453
DW
95 * top byte contains some flags
96 */
cdddbdbc
DW
97} __attribute__ ((packed));
98
99struct imsm_vol {
f8f603f1 100 __u32 curr_migr_unit;
fe7ed8cb 101 __u32 checkpoint_id; /* id to access curr_migr_unit */
cdddbdbc 102 __u8 migr_state; /* Normal or Migrating */
e3bba0e0
DW
103#define MIGR_INIT 0
104#define MIGR_REBUILD 1
105#define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106#define MIGR_GEN_MIGR 3
107#define MIGR_STATE_CHANGE 4
1484e727 108#define MIGR_REPAIR 5
cdddbdbc
DW
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
fe7ed8cb
DW
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
cdddbdbc
DW
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117} __attribute__ ((packed));
118
119struct imsm_dev {
fe7ed8cb 120 __u8 volume[MAX_RAID_SERIAL_LEN];
cdddbdbc
DW
121 __u32 size_low;
122 __u32 size_high;
fe7ed8cb
DW
123#define DEV_BOOTABLE __cpu_to_le32(0x01)
124#define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125#define DEV_READ_COALESCING __cpu_to_le32(0x04)
126#define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127#define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128#define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129#define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130#define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131#define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132#define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133#define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134#define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135#define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
cdddbdbc
DW
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
fe7ed8cb
DW
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145#define IMSM_DEV_FILLERS 10
cdddbdbc
DW
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148} __attribute__ ((packed));
149
150struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
604b746f
JD
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
cdddbdbc
DW
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
604b746f
JD
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166#define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
cdddbdbc
DW
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
604b746f 170 /* here comes BBM logs */
cdddbdbc
DW
171} __attribute__ ((packed));
172
604b746f
JD
173#define BBM_LOG_MAX_ENTRIES 254
174
175struct bbm_log_entry {
176 __u64 defective_block_start;
177#define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181} __attribute__ ((__packed__));
182
183struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190} __attribute__ ((__packed__));
191
192
cdddbdbc
DW
193#ifndef MDASSEMBLE
194static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195#endif
196
1484e727
DW
197static __u8 migr_type(struct imsm_dev *dev)
198{
199 if (dev->vol.migr_type == MIGR_VERIFY &&
200 dev->status & DEV_VERIFY_AND_FIX)
201 return MIGR_REPAIR;
202 else
203 return dev->vol.migr_type;
204}
205
206static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
207{
208 /* for compatibility with older oroms convert MIGR_REPAIR, into
209 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
210 */
211 if (migr_type == MIGR_REPAIR) {
212 dev->vol.migr_type = MIGR_VERIFY;
213 dev->status |= DEV_VERIFY_AND_FIX;
214 } else {
215 dev->vol.migr_type = migr_type;
216 dev->status &= ~DEV_VERIFY_AND_FIX;
217 }
218}
219
87eb16df 220static unsigned int sector_count(__u32 bytes)
cdddbdbc 221{
87eb16df
DW
222 return ((bytes + (512-1)) & (~(512-1))) / 512;
223}
cdddbdbc 224
87eb16df
DW
225static unsigned int mpb_sectors(struct imsm_super *mpb)
226{
227 return sector_count(__le32_to_cpu(mpb->mpb_size));
cdddbdbc
DW
228}
229
ba2de7ba
DW
230struct intel_dev {
231 struct imsm_dev *dev;
232 struct intel_dev *next;
f21e18ca 233 unsigned index;
ba2de7ba
DW
234};
235
88654014
LM
236struct intel_hba {
237 enum sys_dev_type type;
238 char *path;
239 char *pci_id;
240 struct intel_hba *next;
241};
242
1a64be56
LM
243enum action {
244 DISK_REMOVE = 1,
245 DISK_ADD
246};
cdddbdbc
DW
247/* internal representation of IMSM metadata */
248struct intel_super {
249 union {
949c47a0
DW
250 void *buf; /* O_DIRECT buffer for reading/writing metadata */
251 struct imsm_super *anchor; /* immovable parameters */
cdddbdbc 252 };
949c47a0 253 size_t len; /* size of the 'buf' allocation */
4d7b1503
DW
254 void *next_buf; /* for realloc'ing buf from the manager */
255 size_t next_len;
c2c087e6 256 int updates_pending; /* count of pending updates for mdmon */
bf5a934a 257 int current_vol; /* index of raid device undergoing creation */
0dcecb2e 258 __u32 create_offset; /* common start for 'current_vol' */
148acb7b 259 __u32 random; /* random data for seeding new family numbers */
ba2de7ba 260 struct intel_dev *devlist;
cdddbdbc
DW
261 struct dl {
262 struct dl *next;
263 int index;
264 __u8 serial[MAX_RAID_SERIAL_LEN];
265 int major, minor;
266 char *devname;
b9f594fe 267 struct imsm_disk disk;
cdddbdbc 268 int fd;
0dcecb2e
DW
269 int extent_cnt;
270 struct extent *e; /* for determining freespace @ create */
efb30e7f 271 int raiddisk; /* slot to fill in autolayout */
1a64be56 272 enum action action;
cdddbdbc 273 } *disks;
1a64be56
LM
274 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
275 active */
47ee5a45 276 struct dl *missing; /* disks removed while we weren't looking */
43dad3d6 277 struct bbm_log *bbm_log;
88654014 278 struct intel_hba *hba; /* device path of the raid controller for this metadata */
88c32bb1 279 const struct imsm_orom *orom; /* platform firmware support */
a2b97981
DW
280 struct intel_super *next; /* (temp) list for disambiguating family_num */
281};
282
283struct intel_disk {
284 struct imsm_disk disk;
285 #define IMSM_UNKNOWN_OWNER (-1)
286 int owner;
287 struct intel_disk *next;
cdddbdbc
DW
288};
289
c2c087e6
DW
290struct extent {
291 unsigned long long start, size;
292};
293
694575e7
KW
294/* definitions of reshape process types */
295enum imsm_reshape_type {
296 CH_TAKEOVER,
b5347799 297 CH_MIGRATION,
694575e7
KW
298};
299
88758e9d
DW
300/* definition of messages passed to imsm_process_update */
301enum imsm_update_type {
302 update_activate_spare,
8273f55e 303 update_create_array,
33414a01 304 update_kill_array,
aa534678 305 update_rename_array,
1a64be56 306 update_add_remove_disk,
78b10e66 307 update_reshape_container_disks,
48c5303a 308 update_reshape_migration,
bb025c2f 309 update_takeover
88758e9d
DW
310};
311
312struct imsm_update_activate_spare {
313 enum imsm_update_type type;
d23fe947 314 struct dl *dl;
88758e9d
DW
315 int slot;
316 int array;
317 struct imsm_update_activate_spare *next;
318};
319
78b10e66
N
320struct geo_params {
321 int dev_id;
322 char *dev_name;
323 long long size;
324 int level;
325 int layout;
326 int chunksize;
327 int raid_disks;
328};
329
bb025c2f
KW
330enum takeover_direction {
331 R10_TO_R0,
332 R0_TO_R10
333};
334struct imsm_update_takeover {
335 enum imsm_update_type type;
336 int subarray;
337 enum takeover_direction direction;
338};
78b10e66
N
339
340struct imsm_update_reshape {
341 enum imsm_update_type type;
342 int old_raid_disks;
343 int new_raid_disks;
48c5303a
PC
344
345 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
346};
347
348struct imsm_update_reshape_migration {
349 enum imsm_update_type type;
350 int old_raid_disks;
351 int new_raid_disks;
352 /* fields for array migration changes
353 */
354 int subdev;
355 int new_level;
356 int new_layout;
357
d195167d 358 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
78b10e66
N
359};
360
54c2c1ea
DW
361struct disk_info {
362 __u8 serial[MAX_RAID_SERIAL_LEN];
363};
364
8273f55e
DW
365struct imsm_update_create_array {
366 enum imsm_update_type type;
8273f55e 367 int dev_idx;
6a3e913e 368 struct imsm_dev dev;
8273f55e
DW
369};
370
33414a01
DW
371struct imsm_update_kill_array {
372 enum imsm_update_type type;
373 int dev_idx;
374};
375
aa534678
DW
376struct imsm_update_rename_array {
377 enum imsm_update_type type;
378 __u8 name[MAX_RAID_SERIAL_LEN];
379 int dev_idx;
380};
381
1a64be56 382struct imsm_update_add_remove_disk {
43dad3d6
DW
383 enum imsm_update_type type;
384};
385
88654014
LM
386
387static const char *_sys_dev_type[] = {
388 [SYS_DEV_UNKNOWN] = "Unknown",
389 [SYS_DEV_SAS] = "SAS",
390 [SYS_DEV_SATA] = "SATA"
391};
392
393const char *get_sys_dev_type(enum sys_dev_type type)
394{
395 if (type >= SYS_DEV_MAX)
396 type = SYS_DEV_UNKNOWN;
397
398 return _sys_dev_type[type];
399}
400
401static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
402{
403 struct intel_hba *result = malloc(sizeof(*result));
404 if (result) {
405 result->type = device->type;
406 result->path = strdup(device->path);
407 result->next = NULL;
408 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
409 result->pci_id++;
410 }
411 return result;
412}
413
414static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
415{
416 struct intel_hba *result=NULL;
417 for (result = hba; result; result = result->next) {
418 if (result->type == device->type && strcmp(result->path, device->path) == 0)
419 break;
420 }
421 return result;
422}
423
b4cf4cba 424static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
88654014
LM
425{
426 struct intel_hba *hba;
427
428 /* check if disk attached to Intel HBA */
429 hba = find_intel_hba(super->hba, device);
430 if (hba != NULL)
431 return 1;
432 /* Check if HBA is already attached to super */
433 if (super->hba == NULL) {
434 super->hba = alloc_intel_hba(device);
435 return 1;
436 }
437
438 hba = super->hba;
439 /* Intel metadata allows for all disks attached to the same type HBA.
440 * Do not sypport odf HBA types mixing
441 */
442 if (device->type != hba->type)
443 return 2;
444
445 while (hba->next)
446 hba = hba->next;
447
448 hba->next = alloc_intel_hba(device);
449 return 1;
450}
451
452static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
453{
454 struct sys_dev *list, *elem, *prev;
455 char *disk_path;
456
457 if ((list = find_intel_devices()) == NULL)
458 return 0;
459
460 if (fd < 0)
461 disk_path = (char *) devname;
462 else
463 disk_path = diskfd_to_devpath(fd);
464
465 if (!disk_path) {
466 free_sys_dev(&list);
467 return 0;
468 }
469
470 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
471 if (path_attached_to_hba(disk_path, elem->path)) {
472 if (prev == NULL)
473 list = list->next;
474 else
475 prev->next = elem->next;
476 elem->next = NULL;
477 if (disk_path != devname)
478 free(disk_path);
479 free_sys_dev(&list);
480 return elem;
481 }
482 }
483 if (disk_path != devname)
484 free(disk_path);
485 free_sys_dev(&list);
486
487 return NULL;
488}
489
490
d424212e
N
491static int find_intel_hba_capability(int fd, struct intel_super *super,
492 char *devname);
f2f5c343 493
cdddbdbc
DW
494static struct supertype *match_metadata_desc_imsm(char *arg)
495{
496 struct supertype *st;
497
498 if (strcmp(arg, "imsm") != 0 &&
499 strcmp(arg, "default") != 0
500 )
501 return NULL;
502
503 st = malloc(sizeof(*st));
4e9d2186
AW
504 if (!st)
505 return NULL;
ef609477 506 memset(st, 0, sizeof(*st));
d1d599ea 507 st->container_dev = NoMdDev;
cdddbdbc
DW
508 st->ss = &super_imsm;
509 st->max_devs = IMSM_MAX_DEVICES;
510 st->minor_version = 0;
511 st->sb = NULL;
512 return st;
513}
514
0e600426 515#ifndef MDASSEMBLE
cdddbdbc
DW
516static __u8 *get_imsm_version(struct imsm_super *mpb)
517{
518 return &mpb->sig[MPB_SIG_LEN];
519}
0e600426 520#endif
cdddbdbc 521
949c47a0
DW
522/* retrieve a disk directly from the anchor when the anchor is known to be
523 * up-to-date, currently only at load time
524 */
525static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
cdddbdbc 526{
949c47a0 527 if (index >= mpb->num_disks)
cdddbdbc
DW
528 return NULL;
529 return &mpb->disk[index];
530}
531
95d07a2c
LM
532/* retrieve the disk description based on a index of the disk
533 * in the sub-array
534 */
535static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
949c47a0 536{
b9f594fe
DW
537 struct dl *d;
538
539 for (d = super->disks; d; d = d->next)
540 if (d->index == index)
95d07a2c
LM
541 return d;
542
543 return NULL;
544}
545/* retrieve a disk from the parsed metadata */
546static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
547{
548 struct dl *dl;
549
550 dl = get_imsm_dl_disk(super, index);
551 if (dl)
552 return &dl->disk;
553
b9f594fe 554 return NULL;
949c47a0
DW
555}
556
557/* generate a checksum directly from the anchor when the anchor is known to be
558 * up-to-date, currently only at load or write_super after coalescing
559 */
560static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
cdddbdbc
DW
561{
562 __u32 end = mpb->mpb_size / sizeof(end);
563 __u32 *p = (__u32 *) mpb;
564 __u32 sum = 0;
565
97f734fd
N
566 while (end--) {
567 sum += __le32_to_cpu(*p);
568 p++;
569 }
cdddbdbc
DW
570
571 return sum - __le32_to_cpu(mpb->check_sum);
572}
573
a965f303
DW
574static size_t sizeof_imsm_map(struct imsm_map *map)
575{
576 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
577}
578
579struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
cdddbdbc 580{
5e7b0330
AK
581 /* A device can have 2 maps if it is in the middle of a migration.
582 * If second_map is:
583 * 0 - we return the first map
584 * 1 - we return the second map if it exists, else NULL
585 * -1 - we return the second map if it exists, else the first
586 */
a965f303
DW
587 struct imsm_map *map = &dev->vol.map[0];
588
5e7b0330 589 if (second_map == 1 && !dev->vol.migr_state)
a965f303 590 return NULL;
5e7b0330
AK
591 else if (second_map == 1 ||
592 (second_map < 0 && dev->vol.migr_state)) {
a965f303
DW
593 void *ptr = map;
594
595 return ptr + sizeof_imsm_map(map);
596 } else
597 return map;
5e7b0330 598
a965f303 599}
cdddbdbc 600
3393c6af
DW
601/* return the size of the device.
602 * migr_state increases the returned size if map[0] were to be duplicated
603 */
604static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
a965f303
DW
605{
606 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
607 sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
608
609 /* migrating means an additional map */
a965f303
DW
610 if (dev->vol.migr_state)
611 size += sizeof_imsm_map(get_imsm_map(dev, 1));
3393c6af
DW
612 else if (migr_state)
613 size += sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
614
615 return size;
616}
617
54c2c1ea
DW
618#ifndef MDASSEMBLE
619/* retrieve disk serial number list from a metadata update */
620static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
621{
622 void *u = update;
623 struct disk_info *inf;
624
625 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
626 sizeof_imsm_dev(&update->dev, 0);
627
628 return inf;
629}
630#endif
631
949c47a0 632static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
cdddbdbc
DW
633{
634 int offset;
635 int i;
636 void *_mpb = mpb;
637
949c47a0 638 if (index >= mpb->num_raid_devs)
cdddbdbc
DW
639 return NULL;
640
641 /* devices start after all disks */
642 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
643
644 for (i = 0; i <= index; i++)
645 if (i == index)
646 return _mpb + offset;
647 else
3393c6af 648 offset += sizeof_imsm_dev(_mpb + offset, 0);
cdddbdbc
DW
649
650 return NULL;
651}
652
949c47a0
DW
653static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
654{
ba2de7ba
DW
655 struct intel_dev *dv;
656
949c47a0
DW
657 if (index >= super->anchor->num_raid_devs)
658 return NULL;
ba2de7ba
DW
659 for (dv = super->devlist; dv; dv = dv->next)
660 if (dv->index == index)
661 return dv->dev;
662 return NULL;
949c47a0
DW
663}
664
98130f40
AK
665/*
666 * for second_map:
667 * == 0 get first map
668 * == 1 get second map
669 * == -1 than get map according to the current migr_state
670 */
671static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
672 int slot,
673 int second_map)
7eef0453
DW
674{
675 struct imsm_map *map;
676
5e7b0330 677 map = get_imsm_map(dev, second_map);
7eef0453 678
ff077194
DW
679 /* top byte identifies disk under rebuild */
680 return __le32_to_cpu(map->disk_ord_tbl[slot]);
681}
682
683#define ord_to_idx(ord) (((ord) << 8) >> 8)
98130f40 684static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
ff077194 685{
98130f40 686 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
ff077194
DW
687
688 return ord_to_idx(ord);
7eef0453
DW
689}
690
be73972f
DW
691static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
692{
693 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
694}
695
f21e18ca 696static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
620b1713
DW
697{
698 int slot;
699 __u32 ord;
700
701 for (slot = 0; slot < map->num_members; slot++) {
702 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
703 if (ord_to_idx(ord) == idx)
704 return slot;
705 }
706
707 return -1;
708}
709
cdddbdbc
DW
710static int get_imsm_raid_level(struct imsm_map *map)
711{
712 if (map->raid_level == 1) {
713 if (map->num_members == 2)
714 return 1;
715 else
716 return 10;
717 }
718
719 return map->raid_level;
720}
721
c2c087e6
DW
722static int cmp_extent(const void *av, const void *bv)
723{
724 const struct extent *a = av;
725 const struct extent *b = bv;
726 if (a->start < b->start)
727 return -1;
728 if (a->start > b->start)
729 return 1;
730 return 0;
731}
732
0dcecb2e 733static int count_memberships(struct dl *dl, struct intel_super *super)
c2c087e6 734{
c2c087e6 735 int memberships = 0;
620b1713 736 int i;
c2c087e6 737
949c47a0
DW
738 for (i = 0; i < super->anchor->num_raid_devs; i++) {
739 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 740 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 741
620b1713
DW
742 if (get_imsm_disk_slot(map, dl->index) >= 0)
743 memberships++;
c2c087e6 744 }
0dcecb2e
DW
745
746 return memberships;
747}
748
749static struct extent *get_extents(struct intel_super *super, struct dl *dl)
750{
751 /* find a list of used extents on the given physical device */
752 struct extent *rv, *e;
620b1713 753 int i;
0dcecb2e
DW
754 int memberships = count_memberships(dl, super);
755 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
756
c2c087e6
DW
757 rv = malloc(sizeof(struct extent) * (memberships + 1));
758 if (!rv)
759 return NULL;
760 e = rv;
761
949c47a0
DW
762 for (i = 0; i < super->anchor->num_raid_devs; i++) {
763 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 764 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 765
620b1713
DW
766 if (get_imsm_disk_slot(map, dl->index) >= 0) {
767 e->start = __le32_to_cpu(map->pba_of_lba0);
768 e->size = __le32_to_cpu(map->blocks_per_member);
769 e++;
c2c087e6
DW
770 }
771 }
772 qsort(rv, memberships, sizeof(*rv), cmp_extent);
773
14e8215b
DW
774 /* determine the start of the metadata
775 * when no raid devices are defined use the default
776 * ...otherwise allow the metadata to truncate the value
777 * as is the case with older versions of imsm
778 */
779 if (memberships) {
780 struct extent *last = &rv[memberships - 1];
781 __u32 remainder;
782
783 remainder = __le32_to_cpu(dl->disk.total_blocks) -
784 (last->start + last->size);
dda5855f
DW
785 /* round down to 1k block to satisfy precision of the kernel
786 * 'size' interface
787 */
788 remainder &= ~1UL;
789 /* make sure remainder is still sane */
f21e18ca 790 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
dda5855f 791 remainder = ROUND_UP(super->len, 512) >> 9;
14e8215b
DW
792 if (reservation > remainder)
793 reservation = remainder;
794 }
795 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
c2c087e6
DW
796 e->size = 0;
797 return rv;
798}
799
14e8215b
DW
800/* try to determine how much space is reserved for metadata from
801 * the last get_extents() entry, otherwise fallback to the
802 * default
803 */
804static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
805{
806 struct extent *e;
807 int i;
808 __u32 rv;
809
810 /* for spares just return a minimal reservation which will grow
811 * once the spare is picked up by an array
812 */
813 if (dl->index == -1)
814 return MPB_SECTOR_CNT;
815
816 e = get_extents(super, dl);
817 if (!e)
818 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
819
820 /* scroll to last entry */
821 for (i = 0; e[i].size; i++)
822 continue;
823
824 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
825
826 free(e);
827
828 return rv;
829}
830
25ed7e59
DW
831static int is_spare(struct imsm_disk *disk)
832{
833 return (disk->status & SPARE_DISK) == SPARE_DISK;
834}
835
836static int is_configured(struct imsm_disk *disk)
837{
838 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
839}
840
841static int is_failed(struct imsm_disk *disk)
842{
843 return (disk->status & FAILED_DISK) == FAILED_DISK;
844}
845
80e7f8c3
AC
846/* Return minimum size of a spare that can be used in this array*/
847static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
848{
849 struct intel_super *super = st->sb;
850 struct dl *dl;
851 struct extent *e;
852 int i;
853 unsigned long long rv = 0;
854
855 if (!super)
856 return rv;
857 /* find first active disk in array */
858 dl = super->disks;
859 while (dl && (is_failed(&dl->disk) || dl->index == -1))
860 dl = dl->next;
861 if (!dl)
862 return rv;
863 /* find last lba used by subarrays */
864 e = get_extents(super, dl);
865 if (!e)
866 return rv;
867 for (i = 0; e[i].size; i++)
868 continue;
869 if (i > 0)
870 rv = e[i-1].start + e[i-1].size;
871 free(e);
872 /* add the amount of space needed for metadata */
873 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
874 return rv * 512;
875}
876
1799c9e8 877#ifndef MDASSEMBLE
1e5c6983
DW
878static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
879
44470971 880static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
cdddbdbc
DW
881{
882 __u64 sz;
0d80bb2f 883 int slot, i;
a965f303 884 struct imsm_map *map = get_imsm_map(dev, 0);
dd8bcb3b 885 struct imsm_map *map2 = get_imsm_map(dev, 1);
b10b37b8 886 __u32 ord;
cdddbdbc
DW
887
888 printf("\n");
1e7bc0ed 889 printf("[%.16s]:\n", dev->volume);
44470971 890 printf(" UUID : %s\n", uuid);
dd8bcb3b
AK
891 printf(" RAID Level : %d", get_imsm_raid_level(map));
892 if (map2)
893 printf(" <-- %d", get_imsm_raid_level(map2));
894 printf("\n");
895 printf(" Members : %d", map->num_members);
896 if (map2)
897 printf(" <-- %d", map2->num_members);
898 printf("\n");
0d80bb2f
DW
899 printf(" Slots : [");
900 for (i = 0; i < map->num_members; i++) {
dd8bcb3b 901 ord = get_imsm_ord_tbl_ent(dev, i, 0);
0d80bb2f
DW
902 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
903 }
dd8bcb3b
AK
904 printf("]");
905 if (map2) {
906 printf(" <-- [");
907 for (i = 0; i < map2->num_members; i++) {
908 ord = get_imsm_ord_tbl_ent(dev, i, 1);
909 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
910 }
911 printf("]");
912 }
913 printf("\n");
7095bccb
AK
914 printf(" Failed disk : ");
915 if (map->failed_disk_num == 0xff)
916 printf("none");
917 else
918 printf("%i", map->failed_disk_num);
919 printf("\n");
620b1713
DW
920 slot = get_imsm_disk_slot(map, disk_idx);
921 if (slot >= 0) {
98130f40 922 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
b10b37b8
DW
923 printf(" This Slot : %d%s\n", slot,
924 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
925 } else
cdddbdbc
DW
926 printf(" This Slot : ?\n");
927 sz = __le32_to_cpu(dev->size_high);
928 sz <<= 32;
929 sz += __le32_to_cpu(dev->size_low);
930 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
931 human_size(sz * 512));
932 sz = __le32_to_cpu(map->blocks_per_member);
933 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
934 human_size(sz * 512));
935 printf(" Sector Offset : %u\n",
936 __le32_to_cpu(map->pba_of_lba0));
937 printf(" Num Stripes : %u\n",
938 __le32_to_cpu(map->num_data_stripes));
dd8bcb3b 939 printf(" Chunk Size : %u KiB",
cdddbdbc 940 __le16_to_cpu(map->blocks_per_strip) / 2);
dd8bcb3b
AK
941 if (map2)
942 printf(" <-- %u KiB",
943 __le16_to_cpu(map2->blocks_per_strip) / 2);
944 printf("\n");
cdddbdbc 945 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
8655a7b1 946 printf(" Migrate State : ");
1484e727
DW
947 if (dev->vol.migr_state) {
948 if (migr_type(dev) == MIGR_INIT)
8655a7b1 949 printf("initialize\n");
1484e727 950 else if (migr_type(dev) == MIGR_REBUILD)
8655a7b1 951 printf("rebuild\n");
1484e727 952 else if (migr_type(dev) == MIGR_VERIFY)
8655a7b1 953 printf("check\n");
1484e727 954 else if (migr_type(dev) == MIGR_GEN_MIGR)
8655a7b1 955 printf("general migration\n");
1484e727 956 else if (migr_type(dev) == MIGR_STATE_CHANGE)
8655a7b1 957 printf("state change\n");
1484e727 958 else if (migr_type(dev) == MIGR_REPAIR)
8655a7b1 959 printf("repair\n");
1484e727 960 else
8655a7b1
DW
961 printf("<unknown:%d>\n", migr_type(dev));
962 } else
963 printf("idle\n");
3393c6af
DW
964 printf(" Map State : %s", map_state_str[map->map_state]);
965 if (dev->vol.migr_state) {
966 struct imsm_map *map = get_imsm_map(dev, 1);
1e5c6983 967
b10b37b8 968 printf(" <-- %s", map_state_str[map->map_state]);
1e5c6983
DW
969 printf("\n Checkpoint : %u (%llu)",
970 __le32_to_cpu(dev->vol.curr_migr_unit),
94fcb80a 971 (unsigned long long)blocks_per_migr_unit(dev));
3393c6af
DW
972 }
973 printf("\n");
cdddbdbc 974 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
cdddbdbc
DW
975}
976
14e8215b 977static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
cdddbdbc 978{
949c47a0 979 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
1f24f035 980 char str[MAX_RAID_SERIAL_LEN + 1];
cdddbdbc
DW
981 __u64 sz;
982
d362da3d 983 if (index < 0 || !disk)
e9d82038
DW
984 return;
985
cdddbdbc 986 printf("\n");
1f24f035 987 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
cdddbdbc 988 printf(" Disk%02d Serial : %s\n", index, str);
25ed7e59
DW
989 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
990 is_configured(disk) ? " active" : "",
991 is_failed(disk) ? " failed" : "");
cdddbdbc 992 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
14e8215b 993 sz = __le32_to_cpu(disk->total_blocks) - reserved;
cdddbdbc
DW
994 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
995 human_size(sz * 512));
996}
997
a5d85af7 998static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
44470971 999
cdddbdbc
DW
1000static void examine_super_imsm(struct supertype *st, char *homehost)
1001{
1002 struct intel_super *super = st->sb;
949c47a0 1003 struct imsm_super *mpb = super->anchor;
cdddbdbc
DW
1004 char str[MAX_SIGNATURE_LENGTH];
1005 int i;
27fd6274
DW
1006 struct mdinfo info;
1007 char nbuf[64];
cdddbdbc 1008 __u32 sum;
14e8215b 1009 __u32 reserved = imsm_reserved_sectors(super, super->disks);
94827db3 1010 struct dl *dl;
27fd6274 1011
cdddbdbc
DW
1012 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1013 printf(" Magic : %s\n", str);
1014 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1015 printf(" Version : %s\n", get_imsm_version(mpb));
148acb7b 1016 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
cdddbdbc
DW
1017 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1018 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
a5d85af7 1019 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1020 fname_from_uuid(st, &info, nbuf, ':');
27fd6274 1021 printf(" UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1022 sum = __le32_to_cpu(mpb->check_sum);
1023 printf(" Checksum : %08x %s\n", sum,
949c47a0 1024 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
87eb16df 1025 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
cdddbdbc
DW
1026 printf(" Disks : %d\n", mpb->num_disks);
1027 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
14e8215b 1028 print_imsm_disk(mpb, super->disks->index, reserved);
604b746f
JD
1029 if (super->bbm_log) {
1030 struct bbm_log *log = super->bbm_log;
1031
1032 printf("\n");
1033 printf("Bad Block Management Log:\n");
1034 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1035 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1036 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1037 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
13a3b65d
N
1038 printf(" First Spare : %llx\n",
1039 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
604b746f 1040 }
44470971
DW
1041 for (i = 0; i < mpb->num_raid_devs; i++) {
1042 struct mdinfo info;
1043 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1044
1045 super->current_vol = i;
a5d85af7 1046 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1047 fname_from_uuid(st, &info, nbuf, ':');
44470971
DW
1048 print_imsm_dev(dev, nbuf + 5, super->disks->index);
1049 }
cdddbdbc
DW
1050 for (i = 0; i < mpb->num_disks; i++) {
1051 if (i == super->disks->index)
1052 continue;
14e8215b 1053 print_imsm_disk(mpb, i, reserved);
cdddbdbc 1054 }
94827db3
N
1055 for (dl = super->disks ; dl; dl = dl->next) {
1056 struct imsm_disk *disk;
1057 char str[MAX_RAID_SERIAL_LEN + 1];
1058 __u64 sz;
1059
1060 if (dl->index >= 0)
1061 continue;
1062
1063 disk = &dl->disk;
1064 printf("\n");
1065 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1066 printf(" Disk Serial : %s\n", str);
1067 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1068 is_configured(disk) ? " active" : "",
1069 is_failed(disk) ? " failed" : "");
1070 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1071 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1072 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1073 human_size(sz * 512));
1074 }
cdddbdbc
DW
1075}
1076
061f2c6a 1077static void brief_examine_super_imsm(struct supertype *st, int verbose)
cdddbdbc 1078{
27fd6274 1079 /* We just write a generic IMSM ARRAY entry */
ff54de6e
N
1080 struct mdinfo info;
1081 char nbuf[64];
1e7bc0ed 1082 struct intel_super *super = st->sb;
1e7bc0ed 1083
0d5a423f
DW
1084 if (!super->anchor->num_raid_devs) {
1085 printf("ARRAY metadata=imsm\n");
1e7bc0ed 1086 return;
0d5a423f 1087 }
ff54de6e 1088
a5d85af7 1089 getinfo_super_imsm(st, &info, NULL);
4737ae25
N
1090 fname_from_uuid(st, &info, nbuf, ':');
1091 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1092}
1093
1094static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1095{
1096 /* We just write a generic IMSM ARRAY entry */
1097 struct mdinfo info;
1098 char nbuf[64];
1099 char nbuf1[64];
1100 struct intel_super *super = st->sb;
1101 int i;
1102
1103 if (!super->anchor->num_raid_devs)
1104 return;
1105
a5d85af7 1106 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1107 fname_from_uuid(st, &info, nbuf, ':');
1e7bc0ed
DW
1108 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1109 struct imsm_dev *dev = get_imsm_dev(super, i);
1110
1111 super->current_vol = i;
a5d85af7 1112 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1113 fname_from_uuid(st, &info, nbuf1, ':');
1124b3cf 1114 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
cf8de691 1115 dev->volume, nbuf + 5, i, nbuf1 + 5);
1e7bc0ed 1116 }
cdddbdbc
DW
1117}
1118
9d84c8ea
DW
1119static void export_examine_super_imsm(struct supertype *st)
1120{
1121 struct intel_super *super = st->sb;
1122 struct imsm_super *mpb = super->anchor;
1123 struct mdinfo info;
1124 char nbuf[64];
1125
a5d85af7 1126 getinfo_super_imsm(st, &info, NULL);
9d84c8ea
DW
1127 fname_from_uuid(st, &info, nbuf, ':');
1128 printf("MD_METADATA=imsm\n");
1129 printf("MD_LEVEL=container\n");
1130 printf("MD_UUID=%s\n", nbuf+5);
1131 printf("MD_DEVICES=%u\n", mpb->num_disks);
1132}
1133
cdddbdbc
DW
1134static void detail_super_imsm(struct supertype *st, char *homehost)
1135{
3ebe00a1
DW
1136 struct mdinfo info;
1137 char nbuf[64];
1138
a5d85af7 1139 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1140 fname_from_uuid(st, &info, nbuf, ':');
3ebe00a1 1141 printf("\n UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1142}
1143
1144static void brief_detail_super_imsm(struct supertype *st)
1145{
ff54de6e
N
1146 struct mdinfo info;
1147 char nbuf[64];
a5d85af7 1148 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1149 fname_from_uuid(st, &info, nbuf, ':');
ff54de6e 1150 printf(" UUID=%s", nbuf + 5);
cdddbdbc 1151}
d665cc31
DW
1152
1153static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1154static void fd2devname(int fd, char *name);
1155
120dc887 1156static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
d665cc31 1157{
120dc887
LM
1158 /* dump an unsorted list of devices attached to AHCI Intel storage
1159 * controller, as well as non-connected ports
d665cc31
DW
1160 */
1161 int hba_len = strlen(hba_path) + 1;
1162 struct dirent *ent;
1163 DIR *dir;
1164 char *path = NULL;
1165 int err = 0;
1166 unsigned long port_mask = (1 << port_count) - 1;
1167
f21e18ca 1168 if (port_count > (int)sizeof(port_mask) * 8) {
d665cc31
DW
1169 if (verbose)
1170 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1171 return 2;
1172 }
1173
1174 /* scroll through /sys/dev/block looking for devices attached to
1175 * this hba
1176 */
1177 dir = opendir("/sys/dev/block");
1178 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1179 int fd;
1180 char model[64];
1181 char vendor[64];
1182 char buf[1024];
1183 int major, minor;
1184 char *device;
1185 char *c;
1186 int port;
1187 int type;
1188
1189 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1190 continue;
1191 path = devt_to_devpath(makedev(major, minor));
1192 if (!path)
1193 continue;
1194 if (!path_attached_to_hba(path, hba_path)) {
1195 free(path);
1196 path = NULL;
1197 continue;
1198 }
1199
1200 /* retrieve the scsi device type */
1201 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1202 if (verbose)
1203 fprintf(stderr, Name ": failed to allocate 'device'\n");
1204 err = 2;
1205 break;
1206 }
1207 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1208 if (load_sys(device, buf) != 0) {
1209 if (verbose)
1210 fprintf(stderr, Name ": failed to read device type for %s\n",
1211 path);
1212 err = 2;
1213 free(device);
1214 break;
1215 }
1216 type = strtoul(buf, NULL, 10);
1217
1218 /* if it's not a disk print the vendor and model */
1219 if (!(type == 0 || type == 7 || type == 14)) {
1220 vendor[0] = '\0';
1221 model[0] = '\0';
1222 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1223 if (load_sys(device, buf) == 0) {
1224 strncpy(vendor, buf, sizeof(vendor));
1225 vendor[sizeof(vendor) - 1] = '\0';
1226 c = (char *) &vendor[sizeof(vendor) - 1];
1227 while (isspace(*c) || *c == '\0')
1228 *c-- = '\0';
1229
1230 }
1231 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1232 if (load_sys(device, buf) == 0) {
1233 strncpy(model, buf, sizeof(model));
1234 model[sizeof(model) - 1] = '\0';
1235 c = (char *) &model[sizeof(model) - 1];
1236 while (isspace(*c) || *c == '\0')
1237 *c-- = '\0';
1238 }
1239
1240 if (vendor[0] && model[0])
1241 sprintf(buf, "%.64s %.64s", vendor, model);
1242 else
1243 switch (type) { /* numbers from hald/linux/device.c */
1244 case 1: sprintf(buf, "tape"); break;
1245 case 2: sprintf(buf, "printer"); break;
1246 case 3: sprintf(buf, "processor"); break;
1247 case 4:
1248 case 5: sprintf(buf, "cdrom"); break;
1249 case 6: sprintf(buf, "scanner"); break;
1250 case 8: sprintf(buf, "media_changer"); break;
1251 case 9: sprintf(buf, "comm"); break;
1252 case 12: sprintf(buf, "raid"); break;
1253 default: sprintf(buf, "unknown");
1254 }
1255 } else
1256 buf[0] = '\0';
1257 free(device);
1258
1259 /* chop device path to 'host%d' and calculate the port number */
1260 c = strchr(&path[hba_len], '/');
4e5e717d
AW
1261 if (!c) {
1262 if (verbose)
1263 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1264 err = 2;
1265 break;
1266 }
d665cc31
DW
1267 *c = '\0';
1268 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1269 port -= host_base;
1270 else {
1271 if (verbose) {
1272 *c = '/'; /* repair the full string */
1273 fprintf(stderr, Name ": failed to determine port number for %s\n",
1274 path);
1275 }
1276 err = 2;
1277 break;
1278 }
1279
1280 /* mark this port as used */
1281 port_mask &= ~(1 << port);
1282
1283 /* print out the device information */
1284 if (buf[0]) {
1285 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1286 continue;
1287 }
1288
1289 fd = dev_open(ent->d_name, O_RDONLY);
1290 if (fd < 0)
1291 printf(" Port%d : - disk info unavailable -\n", port);
1292 else {
1293 fd2devname(fd, buf);
1294 printf(" Port%d : %s", port, buf);
1295 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1296 printf(" (%s)\n", buf);
1297 else
1298 printf("()\n");
1299 }
1300 close(fd);
1301 free(path);
1302 path = NULL;
1303 }
1304 if (path)
1305 free(path);
1306 if (dir)
1307 closedir(dir);
1308 if (err == 0) {
1309 int i;
1310
1311 for (i = 0; i < port_count; i++)
1312 if (port_mask & (1 << i))
1313 printf(" Port%d : - no device attached -\n", i);
1314 }
1315
1316 return err;
1317}
1318
120dc887 1319
155cbb4c 1320
120dc887
LM
1321static void print_found_intel_controllers(struct sys_dev *elem)
1322{
1323 for (; elem; elem = elem->next) {
1324 fprintf(stderr, Name ": found Intel(R) ");
1325 if (elem->type == SYS_DEV_SATA)
1326 fprintf(stderr, "SATA ");
155cbb4c
LM
1327 else if (elem->type == SYS_DEV_SAS)
1328 fprintf(stderr, "SAS ");
120dc887
LM
1329 fprintf(stderr, "RAID controller");
1330 if (elem->pci_id)
1331 fprintf(stderr, " at %s", elem->pci_id);
1332 fprintf(stderr, ".\n");
1333 }
1334 fflush(stderr);
1335}
1336
120dc887
LM
1337static int ahci_get_port_count(const char *hba_path, int *port_count)
1338{
1339 struct dirent *ent;
1340 DIR *dir;
1341 int host_base = -1;
1342
1343 *port_count = 0;
1344 if ((dir = opendir(hba_path)) == NULL)
1345 return -1;
1346
1347 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1348 int host;
1349
1350 if (sscanf(ent->d_name, "host%d", &host) != 1)
1351 continue;
1352 if (*port_count == 0)
1353 host_base = host;
1354 else if (host < host_base)
1355 host_base = host;
1356
1357 if (host + 1 > *port_count + host_base)
1358 *port_count = host + 1 - host_base;
1359 }
1360 closedir(dir);
1361 return host_base;
1362}
1363
a891a3c2
LM
1364static void print_imsm_capability(const struct imsm_orom *orom)
1365{
1366 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1367 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1368 orom->hotfix_ver, orom->build);
1369 printf(" RAID Levels :%s%s%s%s%s\n",
1370 imsm_orom_has_raid0(orom) ? " raid0" : "",
1371 imsm_orom_has_raid1(orom) ? " raid1" : "",
1372 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1373 imsm_orom_has_raid10(orom) ? " raid10" : "",
1374 imsm_orom_has_raid5(orom) ? " raid5" : "");
1375 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1376 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1377 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1378 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1379 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1380 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1381 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1382 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1383 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1384 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1385 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1386 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1387 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1388 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1389 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1390 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1391 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1392 printf(" Max Disks : %d\n", orom->tds);
1393 printf(" Max Volumes : %d\n", orom->vpa);
1394 return;
1395}
1396
5615172f 1397static int detail_platform_imsm(int verbose, int enumerate_only)
d665cc31
DW
1398{
1399 /* There are two components to imsm platform support, the ahci SATA
1400 * controller and the option-rom. To find the SATA controller we
1401 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1402 * controller with the Intel vendor id is present. This approach
1403 * allows mdadm to leverage the kernel's ahci detection logic, with the
1404 * caveat that if ahci.ko is not loaded mdadm will not be able to
1405 * detect platform raid capabilities. The option-rom resides in a
1406 * platform "Adapter ROM". We scan for its signature to retrieve the
1407 * platform capabilities. If raid support is disabled in the BIOS the
1408 * option-rom capability structure will not be available.
1409 */
1410 const struct imsm_orom *orom;
1411 struct sys_dev *list, *hba;
d665cc31
DW
1412 int host_base = 0;
1413 int port_count = 0;
120dc887 1414 int result=0;
d665cc31 1415
5615172f 1416 if (enumerate_only) {
a891a3c2 1417 if (check_env("IMSM_NO_PLATFORM"))
5615172f 1418 return 0;
a891a3c2
LM
1419 list = find_intel_devices();
1420 if (!list)
1421 return 2;
1422 for (hba = list; hba; hba = hba->next) {
1423 orom = find_imsm_capability(hba->type);
1424 if (!orom) {
1425 result = 2;
1426 break;
1427 }
1428 }
1429 free_sys_dev(&list);
1430 return result;
5615172f
DW
1431 }
1432
155cbb4c
LM
1433 list = find_intel_devices();
1434 if (!list) {
d665cc31 1435 if (verbose)
155cbb4c
LM
1436 fprintf(stderr, Name ": no active Intel(R) RAID "
1437 "controller found.\n");
d665cc31
DW
1438 free_sys_dev(&list);
1439 return 2;
1440 } else if (verbose)
155cbb4c 1441 print_found_intel_controllers(list);
d665cc31 1442
a891a3c2
LM
1443 for (hba = list; hba; hba = hba->next) {
1444 orom = find_imsm_capability(hba->type);
1445 if (!orom)
1446 fprintf(stderr, Name ": imsm capabilities not found for controller: %s (type %s)\n",
1447 hba->path, get_sys_dev_type(hba->type));
1448 else
1449 print_imsm_capability(orom);
d665cc31
DW
1450 }
1451
120dc887
LM
1452 for (hba = list; hba; hba = hba->next) {
1453 printf(" I/O Controller : %s (%s)\n",
1454 hba->path, get_sys_dev_type(hba->type));
d665cc31 1455
120dc887
LM
1456 if (hba->type == SYS_DEV_SATA) {
1457 host_base = ahci_get_port_count(hba->path, &port_count);
1458 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1459 if (verbose)
1460 fprintf(stderr, Name ": failed to enumerate "
1461 "ports on SATA controller at %s.", hba->pci_id);
1462 result |= 2;
1463 }
1464 }
d665cc31 1465 }
155cbb4c 1466
120dc887
LM
1467 free_sys_dev(&list);
1468 return result;
d665cc31 1469}
cdddbdbc
DW
1470#endif
1471
1472static int match_home_imsm(struct supertype *st, char *homehost)
1473{
5115ca67
DW
1474 /* the imsm metadata format does not specify any host
1475 * identification information. We return -1 since we can never
1476 * confirm nor deny whether a given array is "meant" for this
148acb7b 1477 * host. We rely on compare_super and the 'family_num' fields to
5115ca67
DW
1478 * exclude member disks that do not belong, and we rely on
1479 * mdadm.conf to specify the arrays that should be assembled.
1480 * Auto-assembly may still pick up "foreign" arrays.
1481 */
cdddbdbc 1482
9362c1c8 1483 return -1;
cdddbdbc
DW
1484}
1485
1486static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1487{
51006d85
N
1488 /* The uuid returned here is used for:
1489 * uuid to put into bitmap file (Create, Grow)
1490 * uuid for backup header when saving critical section (Grow)
1491 * comparing uuids when re-adding a device into an array
1492 * In these cases the uuid required is that of the data-array,
1493 * not the device-set.
1494 * uuid to recognise same set when adding a missing device back
1495 * to an array. This is a uuid for the device-set.
1496 *
1497 * For each of these we can make do with a truncated
1498 * or hashed uuid rather than the original, as long as
1499 * everyone agrees.
1500 * In each case the uuid required is that of the data-array,
1501 * not the device-set.
43dad3d6 1502 */
51006d85
N
1503 /* imsm does not track uuid's so we synthesis one using sha1 on
1504 * - The signature (Which is constant for all imsm array, but no matter)
148acb7b 1505 * - the orig_family_num of the container
51006d85
N
1506 * - the index number of the volume
1507 * - the 'serial' number of the volume.
1508 * Hopefully these are all constant.
1509 */
1510 struct intel_super *super = st->sb;
43dad3d6 1511
51006d85
N
1512 char buf[20];
1513 struct sha1_ctx ctx;
1514 struct imsm_dev *dev = NULL;
148acb7b 1515 __u32 family_num;
51006d85 1516
148acb7b
DW
1517 /* some mdadm versions failed to set ->orig_family_num, in which
1518 * case fall back to ->family_num. orig_family_num will be
1519 * fixed up with the first metadata update.
1520 */
1521 family_num = super->anchor->orig_family_num;
1522 if (family_num == 0)
1523 family_num = super->anchor->family_num;
51006d85 1524 sha1_init_ctx(&ctx);
92bd8f8d 1525 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
148acb7b 1526 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
51006d85
N
1527 if (super->current_vol >= 0)
1528 dev = get_imsm_dev(super, super->current_vol);
1529 if (dev) {
1530 __u32 vol = super->current_vol;
1531 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1532 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1533 }
1534 sha1_finish_ctx(&ctx, buf);
1535 memcpy(uuid, buf, 4*4);
cdddbdbc
DW
1536}
1537
0d481d37 1538#if 0
4f5bc454
DW
1539static void
1540get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
cdddbdbc 1541{
cdddbdbc
DW
1542 __u8 *v = get_imsm_version(mpb);
1543 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1544 char major[] = { 0, 0, 0 };
1545 char minor[] = { 0 ,0, 0 };
1546 char patch[] = { 0, 0, 0 };
1547 char *ver_parse[] = { major, minor, patch };
1548 int i, j;
1549
1550 i = j = 0;
1551 while (*v != '\0' && v < end) {
1552 if (*v != '.' && j < 2)
1553 ver_parse[i][j++] = *v;
1554 else {
1555 i++;
1556 j = 0;
1557 }
1558 v++;
1559 }
1560
4f5bc454
DW
1561 *m = strtol(minor, NULL, 0);
1562 *p = strtol(patch, NULL, 0);
1563}
0d481d37 1564#endif
4f5bc454 1565
1e5c6983
DW
1566static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1567{
1568 /* migr_strip_size when repairing or initializing parity */
1569 struct imsm_map *map = get_imsm_map(dev, 0);
1570 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1571
1572 switch (get_imsm_raid_level(map)) {
1573 case 5:
1574 case 10:
1575 return chunk;
1576 default:
1577 return 128*1024 >> 9;
1578 }
1579}
1580
1581static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1582{
1583 /* migr_strip_size when rebuilding a degraded disk, no idea why
1584 * this is different than migr_strip_size_resync(), but it's good
1585 * to be compatible
1586 */
1587 struct imsm_map *map = get_imsm_map(dev, 1);
1588 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1589
1590 switch (get_imsm_raid_level(map)) {
1591 case 1:
1592 case 10:
1593 if (map->num_members % map->num_domains == 0)
1594 return 128*1024 >> 9;
1595 else
1596 return chunk;
1597 case 5:
1598 return max((__u32) 64*1024 >> 9, chunk);
1599 default:
1600 return 128*1024 >> 9;
1601 }
1602}
1603
1604static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1605{
1606 struct imsm_map *lo = get_imsm_map(dev, 0);
1607 struct imsm_map *hi = get_imsm_map(dev, 1);
1608 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1609 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1610
1611 return max((__u32) 1, hi_chunk / lo_chunk);
1612}
1613
1614static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1615{
1616 struct imsm_map *lo = get_imsm_map(dev, 0);
1617 int level = get_imsm_raid_level(lo);
1618
1619 if (level == 1 || level == 10) {
1620 struct imsm_map *hi = get_imsm_map(dev, 1);
1621
1622 return hi->num_domains;
1623 } else
1624 return num_stripes_per_unit_resync(dev);
1625}
1626
98130f40 1627static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1e5c6983
DW
1628{
1629 /* named 'imsm_' because raid0, raid1 and raid10
1630 * counter-intuitively have the same number of data disks
1631 */
98130f40 1632 struct imsm_map *map = get_imsm_map(dev, second_map);
1e5c6983
DW
1633
1634 switch (get_imsm_raid_level(map)) {
1635 case 0:
1636 case 1:
1637 case 10:
1638 return map->num_members;
1639 case 5:
1640 return map->num_members - 1;
1641 default:
1642 dprintf("%s: unsupported raid level\n", __func__);
1643 return 0;
1644 }
1645}
1646
1647static __u32 parity_segment_depth(struct imsm_dev *dev)
1648{
1649 struct imsm_map *map = get_imsm_map(dev, 0);
1650 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1651
1652 switch(get_imsm_raid_level(map)) {
1653 case 1:
1654 case 10:
1655 return chunk * map->num_domains;
1656 case 5:
1657 return chunk * map->num_members;
1658 default:
1659 return chunk;
1660 }
1661}
1662
1663static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1664{
1665 struct imsm_map *map = get_imsm_map(dev, 1);
1666 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1667 __u32 strip = block / chunk;
1668
1669 switch (get_imsm_raid_level(map)) {
1670 case 1:
1671 case 10: {
1672 __u32 vol_strip = (strip * map->num_domains) + 1;
1673 __u32 vol_stripe = vol_strip / map->num_members;
1674
1675 return vol_stripe * chunk + block % chunk;
1676 } case 5: {
1677 __u32 stripe = strip / (map->num_members - 1);
1678
1679 return stripe * chunk + block % chunk;
1680 }
1681 default:
1682 return 0;
1683 }
1684}
1685
1686static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
1687{
1688 /* calculate the conversion factor between per member 'blocks'
1689 * (md/{resync,rebuild}_start) and imsm migration units, return
1690 * 0 for the 'not migrating' and 'unsupported migration' cases
1691 */
1692 if (!dev->vol.migr_state)
1693 return 0;
1694
1695 switch (migr_type(dev)) {
6345120e 1696 case MIGR_GEN_MIGR:
1e5c6983
DW
1697 case MIGR_VERIFY:
1698 case MIGR_REPAIR:
1699 case MIGR_INIT: {
1700 struct imsm_map *map = get_imsm_map(dev, 0);
1701 __u32 stripes_per_unit;
1702 __u32 blocks_per_unit;
1703 __u32 parity_depth;
1704 __u32 migr_chunk;
1705 __u32 block_map;
1706 __u32 block_rel;
1707 __u32 segment;
1708 __u32 stripe;
1709 __u8 disks;
1710
1711 /* yes, this is really the translation of migr_units to
1712 * per-member blocks in the 'resync' case
1713 */
1714 stripes_per_unit = num_stripes_per_unit_resync(dev);
1715 migr_chunk = migr_strip_blocks_resync(dev);
98130f40 1716 disks = imsm_num_data_members(dev, 0);
1e5c6983
DW
1717 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1718 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1719 segment = blocks_per_unit / stripe;
1720 block_rel = blocks_per_unit - segment * stripe;
1721 parity_depth = parity_segment_depth(dev);
1722 block_map = map_migr_block(dev, block_rel);
1723 return block_map + parity_depth * segment;
1724 }
1725 case MIGR_REBUILD: {
1726 __u32 stripes_per_unit;
1727 __u32 migr_chunk;
1728
1729 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1730 migr_chunk = migr_strip_blocks_rebuild(dev);
1731 return migr_chunk * stripes_per_unit;
1732 }
1e5c6983
DW
1733 case MIGR_STATE_CHANGE:
1734 default:
1735 return 0;
1736 }
1737}
1738
c2c087e6
DW
1739static int imsm_level_to_layout(int level)
1740{
1741 switch (level) {
1742 case 0:
1743 case 1:
1744 return 0;
1745 case 5:
1746 case 6:
a380c027 1747 return ALGORITHM_LEFT_ASYMMETRIC;
c2c087e6 1748 case 10:
c92a2527 1749 return 0x102;
c2c087e6 1750 }
a18a888e 1751 return UnSet;
c2c087e6
DW
1752}
1753
a5d85af7 1754static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
bf5a934a
DW
1755{
1756 struct intel_super *super = st->sb;
949c47a0 1757 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
a965f303 1758 struct imsm_map *map = get_imsm_map(dev, 0);
81ac8b4d 1759 struct imsm_map *prev_map = get_imsm_map(dev, 1);
b335e593 1760 struct imsm_map *map_to_analyse = map;
efb30e7f 1761 struct dl *dl;
e207da2f 1762 char *devname;
139dae11 1763 unsigned int component_size_alligment;
a5d85af7 1764 int map_disks = info->array.raid_disks;
bf5a934a 1765
b335e593
AK
1766 if (prev_map)
1767 map_to_analyse = prev_map;
1768
efb30e7f
DW
1769 for (dl = super->disks; dl; dl = dl->next)
1770 if (dl->raiddisk == info->disk.raid_disk)
1771 break;
bf5a934a 1772 info->container_member = super->current_vol;
cd0430a1 1773 info->array.raid_disks = map->num_members;
b335e593 1774 info->array.level = get_imsm_raid_level(map_to_analyse);
bf5a934a
DW
1775 info->array.layout = imsm_level_to_layout(info->array.level);
1776 info->array.md_minor = -1;
1777 info->array.ctime = 0;
1778 info->array.utime = 0;
b335e593
AK
1779 info->array.chunk_size =
1780 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
301406c9 1781 info->array.state = !dev->vol.dirty;
da9b4a62
DW
1782 info->custom_array_size = __le32_to_cpu(dev->size_high);
1783 info->custom_array_size <<= 32;
1784 info->custom_array_size |= __le32_to_cpu(dev->size_low);
3f83228a
N
1785 if (prev_map && map->map_state == prev_map->map_state) {
1786 info->reshape_active = 1;
b335e593
AK
1787 info->new_level = get_imsm_raid_level(map);
1788 info->new_layout = imsm_level_to_layout(info->new_level);
1789 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
3f83228a 1790 info->delta_disks = map->num_members - prev_map->num_members;
493f5dd6
N
1791 if (info->delta_disks) {
1792 /* this needs to be applied to every array
1793 * in the container.
1794 */
1795 info->reshape_active = 2;
1796 }
3f83228a
N
1797 /* We shape information that we give to md might have to be
1798 * modify to cope with md's requirement for reshaping arrays.
1799 * For example, when reshaping a RAID0, md requires it to be
1800 * presented as a degraded RAID4.
1801 * Also if a RAID0 is migrating to a RAID5 we need to specify
1802 * the array as already being RAID5, but the 'before' layout
1803 * is a RAID4-like layout.
1804 */
1805 switch (info->array.level) {
1806 case 0:
1807 switch(info->new_level) {
1808 case 0:
1809 /* conversion is happening as RAID4 */
1810 info->array.level = 4;
1811 info->array.raid_disks += 1;
1812 break;
1813 case 5:
1814 /* conversion is happening as RAID5 */
1815 info->array.level = 5;
1816 info->array.layout = ALGORITHM_PARITY_N;
1817 info->array.raid_disks += 1;
1818 info->delta_disks -= 1;
1819 break;
1820 default:
1821 /* FIXME error message */
1822 info->array.level = UnSet;
1823 break;
1824 }
1825 break;
1826 }
b335e593
AK
1827 } else {
1828 info->new_level = UnSet;
1829 info->new_layout = UnSet;
1830 info->new_chunk = info->array.chunk_size;
3f83228a 1831 info->delta_disks = 0;
b335e593 1832 }
301406c9
DW
1833 info->disk.major = 0;
1834 info->disk.minor = 0;
efb30e7f
DW
1835 if (dl) {
1836 info->disk.major = dl->major;
1837 info->disk.minor = dl->minor;
1838 }
bf5a934a 1839
b335e593
AK
1840 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
1841 info->component_size =
1842 __le32_to_cpu(map_to_analyse->blocks_per_member);
139dae11
AK
1843
1844 /* check component size aligment
1845 */
1846 component_size_alligment =
1847 info->component_size % (info->array.chunk_size/512);
1848
1849 if (component_size_alligment &&
1850 (info->array.level != 1) && (info->array.level != UnSet)) {
1851 dprintf("imsm: reported component size alligned from %llu ",
1852 info->component_size);
1853 info->component_size -= component_size_alligment;
1854 dprintf("to %llu (%i).\n",
1855 info->component_size, component_size_alligment);
1856 }
1857
301406c9 1858 memset(info->uuid, 0, sizeof(info->uuid));
921d9e16 1859 info->recovery_start = MaxSector;
bf5a934a 1860
d2e6d5d6 1861 info->reshape_progress = 0;
b6796ce1 1862 info->resync_start = MaxSector;
b335e593
AK
1863 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
1864 dev->vol.dirty) {
301406c9 1865 info->resync_start = 0;
b6796ce1
AK
1866 }
1867 if (dev->vol.migr_state) {
1e5c6983
DW
1868 switch (migr_type(dev)) {
1869 case MIGR_REPAIR:
1870 case MIGR_INIT: {
1871 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
1872 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
1873
1874 info->resync_start = blocks_per_unit * units;
1875 break;
1876 }
d2e6d5d6
AK
1877 case MIGR_GEN_MIGR: {
1878 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
1879 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
04fa9523
AK
1880 unsigned long long array_blocks;
1881 int used_disks;
d2e6d5d6
AK
1882
1883 info->reshape_progress = blocks_per_unit * units;
6289d1e0
AK
1884
1885 /* checkpoint is written per disks unit
1886 * recalculate it to reshape position
1887 */
1888 used_disks = imsm_num_data_members(dev, 0);
1889 info->reshape_progress *= used_disks;
d2e6d5d6
AK
1890 dprintf("IMSM: General Migration checkpoint : %llu "
1891 "(%llu) -> read reshape progress : %llu\n",
1892 units, blocks_per_unit, info->reshape_progress);
75156c46
AK
1893
1894 used_disks = imsm_num_data_members(dev, 1);
1895 if (used_disks > 0) {
1896 array_blocks = map->blocks_per_member *
1897 used_disks;
1898 /* round array size down to closest MB
1899 */
1900 info->custom_array_size = (array_blocks
1901 >> SECT_PER_MB_SHIFT)
1902 << SECT_PER_MB_SHIFT;
1903 }
d2e6d5d6 1904 }
1e5c6983
DW
1905 case MIGR_VERIFY:
1906 /* we could emulate the checkpointing of
1907 * 'sync_action=check' migrations, but for now
1908 * we just immediately complete them
1909 */
1910 case MIGR_REBUILD:
1911 /* this is handled by container_content_imsm() */
1e5c6983
DW
1912 case MIGR_STATE_CHANGE:
1913 /* FIXME handle other migrations */
1914 default:
1915 /* we are not dirty, so... */
1916 info->resync_start = MaxSector;
1917 }
b6796ce1 1918 }
301406c9
DW
1919
1920 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
1921 info->name[MAX_RAID_SERIAL_LEN] = 0;
bf5a934a 1922
f35f2525
N
1923 info->array.major_version = -1;
1924 info->array.minor_version = -2;
e207da2f
AW
1925 devname = devnum2devname(st->container_dev);
1926 *info->text_version = '\0';
1927 if (devname)
1928 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
1929 free(devname);
a67dd8cc 1930 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
51006d85 1931 uuid_from_super_imsm(st, info->uuid);
a5d85af7
N
1932
1933 if (dmap) {
1934 int i, j;
1935 for (i=0; i<map_disks; i++) {
1936 dmap[i] = 0;
1937 if (i < info->array.raid_disks) {
1938 struct imsm_disk *dsk;
98130f40 1939 j = get_imsm_disk_idx(dev, i, -1);
a5d85af7
N
1940 dsk = get_imsm_disk(super, j);
1941 if (dsk && (dsk->status & CONFIGURED_DISK))
1942 dmap[i] = 1;
1943 }
1944 }
1945 }
81ac8b4d 1946}
bf5a934a 1947
97b4d0e9
DW
1948static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
1949static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
1950
1951static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
1952{
1953 struct dl *d;
1954
1955 for (d = super->missing; d; d = d->next)
1956 if (d->index == index)
1957 return &d->disk;
1958 return NULL;
1959}
1960
a5d85af7 1961static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
4f5bc454
DW
1962{
1963 struct intel_super *super = st->sb;
4f5bc454 1964 struct imsm_disk *disk;
a5d85af7 1965 int map_disks = info->array.raid_disks;
ab3cb6b3
N
1966 int max_enough = -1;
1967 int i;
1968 struct imsm_super *mpb;
4f5bc454 1969
bf5a934a 1970 if (super->current_vol >= 0) {
a5d85af7 1971 getinfo_super_imsm_volume(st, info, map);
bf5a934a
DW
1972 return;
1973 }
d23fe947
DW
1974
1975 /* Set raid_disks to zero so that Assemble will always pull in valid
1976 * spares
1977 */
1978 info->array.raid_disks = 0;
cdddbdbc
DW
1979 info->array.level = LEVEL_CONTAINER;
1980 info->array.layout = 0;
1981 info->array.md_minor = -1;
c2c087e6 1982 info->array.ctime = 0; /* N/A for imsm */
cdddbdbc
DW
1983 info->array.utime = 0;
1984 info->array.chunk_size = 0;
1985
1986 info->disk.major = 0;
1987 info->disk.minor = 0;
cdddbdbc 1988 info->disk.raid_disk = -1;
c2c087e6 1989 info->reshape_active = 0;
f35f2525
N
1990 info->array.major_version = -1;
1991 info->array.minor_version = -2;
c2c087e6 1992 strcpy(info->text_version, "imsm");
a67dd8cc 1993 info->safe_mode_delay = 0;
c2c087e6
DW
1994 info->disk.number = -1;
1995 info->disk.state = 0;
c5afc314 1996 info->name[0] = 0;
921d9e16 1997 info->recovery_start = MaxSector;
c2c087e6 1998
97b4d0e9 1999 /* do we have the all the insync disks that we expect? */
ab3cb6b3 2000 mpb = super->anchor;
97b4d0e9 2001
ab3cb6b3
N
2002 for (i = 0; i < mpb->num_raid_devs; i++) {
2003 struct imsm_dev *dev = get_imsm_dev(super, i);
2004 int failed, enough, j, missing = 0;
2005 struct imsm_map *map;
2006 __u8 state;
97b4d0e9 2007
ab3cb6b3
N
2008 failed = imsm_count_failed(super, dev);
2009 state = imsm_check_degraded(super, dev, failed);
2010 map = get_imsm_map(dev, dev->vol.migr_state);
2011
2012 /* any newly missing disks?
2013 * (catches single-degraded vs double-degraded)
2014 */
2015 for (j = 0; j < map->num_members; j++) {
98130f40 2016 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
ab3cb6b3
N
2017 __u32 idx = ord_to_idx(ord);
2018
2019 if (!(ord & IMSM_ORD_REBUILD) &&
2020 get_imsm_missing(super, idx)) {
2021 missing = 1;
2022 break;
2023 }
97b4d0e9 2024 }
ab3cb6b3
N
2025
2026 if (state == IMSM_T_STATE_FAILED)
2027 enough = -1;
2028 else if (state == IMSM_T_STATE_DEGRADED &&
2029 (state != map->map_state || missing))
2030 enough = 0;
2031 else /* we're normal, or already degraded */
2032 enough = 1;
2033
2034 /* in the missing/failed disk case check to see
2035 * if at least one array is runnable
2036 */
2037 max_enough = max(max_enough, enough);
2038 }
2039 dprintf("%s: enough: %d\n", __func__, max_enough);
2040 info->container_enough = max_enough;
97b4d0e9 2041
4a04ec6c 2042 if (super->disks) {
14e8215b
DW
2043 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2044
b9f594fe 2045 disk = &super->disks->disk;
14e8215b
DW
2046 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
2047 info->component_size = reserved;
25ed7e59 2048 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
df474657
DW
2049 /* we don't change info->disk.raid_disk here because
2050 * this state will be finalized in mdmon after we have
2051 * found the 'most fresh' version of the metadata
2052 */
25ed7e59
DW
2053 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2054 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
cdddbdbc 2055 }
a575e2a7
DW
2056
2057 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2058 * ->compare_super may have updated the 'num_raid_devs' field for spares
2059 */
2060 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
36ba7d48 2061 uuid_from_super_imsm(st, info->uuid);
22e263f6
AC
2062 else
2063 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
a5d85af7
N
2064
2065 /* I don't know how to compute 'map' on imsm, so use safe default */
2066 if (map) {
2067 int i;
2068 for (i = 0; i < map_disks; i++)
2069 map[i] = 1;
2070 }
2071
cdddbdbc
DW
2072}
2073
5c4cd5da
AC
2074/* allocates memory and fills disk in mdinfo structure
2075 * for each disk in array */
2076struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2077{
2078 struct mdinfo *mddev = NULL;
2079 struct intel_super *super = st->sb;
2080 struct imsm_disk *disk;
2081 int count = 0;
2082 struct dl *dl;
2083 if (!super || !super->disks)
2084 return NULL;
2085 dl = super->disks;
2086 mddev = malloc(sizeof(*mddev));
2087 if (!mddev) {
2088 fprintf(stderr, Name ": Failed to allocate memory.\n");
2089 return NULL;
2090 }
2091 memset(mddev, 0, sizeof(*mddev));
2092 while (dl) {
2093 struct mdinfo *tmp;
2094 disk = &dl->disk;
2095 tmp = malloc(sizeof(*tmp));
2096 if (!tmp) {
2097 fprintf(stderr, Name ": Failed to allocate memory.\n");
2098 if (mddev)
2099 sysfs_free(mddev);
2100 return NULL;
2101 }
2102 memset(tmp, 0, sizeof(*tmp));
2103 if (mddev->devs)
2104 tmp->next = mddev->devs;
2105 mddev->devs = tmp;
2106 tmp->disk.number = count++;
2107 tmp->disk.major = dl->major;
2108 tmp->disk.minor = dl->minor;
2109 tmp->disk.state = is_configured(disk) ?
2110 (1 << MD_DISK_ACTIVE) : 0;
2111 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2112 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2113 tmp->disk.raid_disk = -1;
2114 dl = dl->next;
2115 }
2116 return mddev;
2117}
2118
cdddbdbc
DW
2119static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2120 char *update, char *devname, int verbose,
2121 int uuid_set, char *homehost)
2122{
f352c545
DW
2123 /* For 'assemble' and 'force' we need to return non-zero if any
2124 * change was made. For others, the return value is ignored.
2125 * Update options are:
2126 * force-one : This device looks a bit old but needs to be included,
2127 * update age info appropriately.
2128 * assemble: clear any 'faulty' flag to allow this device to
2129 * be assembled.
2130 * force-array: Array is degraded but being forced, mark it clean
2131 * if that will be needed to assemble it.
2132 *
2133 * newdev: not used ????
2134 * grow: Array has gained a new device - this is currently for
2135 * linear only
2136 * resync: mark as dirty so a resync will happen.
2137 * name: update the name - preserving the homehost
6e46bf34 2138 * uuid: Change the uuid of the array to match watch is given
f352c545
DW
2139 *
2140 * Following are not relevant for this imsm:
2141 * sparc2.2 : update from old dodgey metadata
2142 * super-minor: change the preferred_minor number
2143 * summaries: update redundant counters.
f352c545
DW
2144 * homehost: update the recorded homehost
2145 * _reshape_progress: record new reshape_progress position.
2146 */
6e46bf34
DW
2147 int rv = 1;
2148 struct intel_super *super = st->sb;
2149 struct imsm_super *mpb;
f352c545 2150
6e46bf34
DW
2151 /* we can only update container info */
2152 if (!super || super->current_vol >= 0 || !super->anchor)
2153 return 1;
2154
2155 mpb = super->anchor;
2156
2157 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
1e2b2765 2158 rv = -1;
6e46bf34
DW
2159 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2160 mpb->orig_family_num = *((__u32 *) info->update_private);
2161 rv = 0;
2162 } else if (strcmp(update, "uuid") == 0) {
2163 __u32 *new_family = malloc(sizeof(*new_family));
2164
2165 /* update orig_family_number with the incoming random
2166 * data, report the new effective uuid, and store the
2167 * new orig_family_num for future updates.
2168 */
2169 if (new_family) {
2170 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2171 uuid_from_super_imsm(st, info->uuid);
2172 *new_family = mpb->orig_family_num;
2173 info->update_private = new_family;
2174 rv = 0;
2175 }
2176 } else if (strcmp(update, "assemble") == 0)
2177 rv = 0;
2178 else
1e2b2765 2179 rv = -1;
f352c545 2180
6e46bf34
DW
2181 /* successful update? recompute checksum */
2182 if (rv == 0)
2183 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
f352c545
DW
2184
2185 return rv;
cdddbdbc
DW
2186}
2187
c2c087e6 2188static size_t disks_to_mpb_size(int disks)
cdddbdbc 2189{
c2c087e6 2190 size_t size;
cdddbdbc 2191
c2c087e6
DW
2192 size = sizeof(struct imsm_super);
2193 size += (disks - 1) * sizeof(struct imsm_disk);
2194 size += 2 * sizeof(struct imsm_dev);
2195 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2196 size += (4 - 2) * sizeof(struct imsm_map);
2197 /* 4 possible disk_ord_tbl's */
2198 size += 4 * (disks - 1) * sizeof(__u32);
2199
2200 return size;
2201}
2202
2203static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2204{
2205 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2206 return 0;
2207
2208 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
cdddbdbc
DW
2209}
2210
ba2de7ba
DW
2211static void free_devlist(struct intel_super *super)
2212{
2213 struct intel_dev *dv;
2214
2215 while (super->devlist) {
2216 dv = super->devlist->next;
2217 free(super->devlist->dev);
2218 free(super->devlist);
2219 super->devlist = dv;
2220 }
2221}
2222
2223static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2224{
2225 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2226}
2227
cdddbdbc
DW
2228static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2229{
2230 /*
2231 * return:
2232 * 0 same, or first was empty, and second was copied
2233 * 1 second had wrong number
2234 * 2 wrong uuid
2235 * 3 wrong other info
2236 */
2237 struct intel_super *first = st->sb;
2238 struct intel_super *sec = tst->sb;
2239
2240 if (!first) {
2241 st->sb = tst->sb;
2242 tst->sb = NULL;
2243 return 0;
2244 }
8603ea6f
LM
2245 /* in platform dependent environment test if the disks
2246 * use the same Intel hba
2247 */
2248 if (!check_env("IMSM_NO_PLATFORM")) {
ea2bc72b
LM
2249 if (!first->hba || !sec->hba ||
2250 (first->hba->type != sec->hba->type)) {
8603ea6f
LM
2251 fprintf(stderr,
2252 "HBAs of devices does not match %s != %s\n",
ea2bc72b
LM
2253 first->hba ? get_sys_dev_type(first->hba->type) : NULL,
2254 sec->hba ? get_sys_dev_type(sec->hba->type) : NULL);
8603ea6f
LM
2255 return 3;
2256 }
2257 }
cdddbdbc 2258
d23fe947
DW
2259 /* if an anchor does not have num_raid_devs set then it is a free
2260 * floating spare
2261 */
2262 if (first->anchor->num_raid_devs > 0 &&
2263 sec->anchor->num_raid_devs > 0) {
a2b97981
DW
2264 /* Determine if these disks might ever have been
2265 * related. Further disambiguation can only take place
2266 * in load_super_imsm_all
2267 */
2268 __u32 first_family = first->anchor->orig_family_num;
2269 __u32 sec_family = sec->anchor->orig_family_num;
2270
f796af5d
DW
2271 if (memcmp(first->anchor->sig, sec->anchor->sig,
2272 MAX_SIGNATURE_LENGTH) != 0)
2273 return 3;
2274
a2b97981
DW
2275 if (first_family == 0)
2276 first_family = first->anchor->family_num;
2277 if (sec_family == 0)
2278 sec_family = sec->anchor->family_num;
2279
2280 if (first_family != sec_family)
d23fe947 2281 return 3;
f796af5d 2282
d23fe947 2283 }
cdddbdbc 2284
f796af5d 2285
3e372e5a
DW
2286 /* if 'first' is a spare promote it to a populated mpb with sec's
2287 * family number
2288 */
2289 if (first->anchor->num_raid_devs == 0 &&
2290 sec->anchor->num_raid_devs > 0) {
78d30f94 2291 int i;
ba2de7ba
DW
2292 struct intel_dev *dv;
2293 struct imsm_dev *dev;
78d30f94
DW
2294
2295 /* we need to copy raid device info from sec if an allocation
2296 * fails here we don't associate the spare
2297 */
2298 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
ba2de7ba
DW
2299 dv = malloc(sizeof(*dv));
2300 if (!dv)
2301 break;
2302 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2303 if (!dev) {
2304 free(dv);
2305 break;
78d30f94 2306 }
ba2de7ba
DW
2307 dv->dev = dev;
2308 dv->index = i;
2309 dv->next = first->devlist;
2310 first->devlist = dv;
78d30f94 2311 }
709743c5 2312 if (i < sec->anchor->num_raid_devs) {
ba2de7ba
DW
2313 /* allocation failure */
2314 free_devlist(first);
2315 fprintf(stderr, "imsm: failed to associate spare\n");
2316 return 3;
78d30f94 2317 }
3e372e5a 2318 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
148acb7b 2319 first->anchor->orig_family_num = sec->anchor->orig_family_num;
3e372e5a 2320 first->anchor->family_num = sec->anchor->family_num;
ac6449be 2321 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
709743c5
DW
2322 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2323 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
3e372e5a
DW
2324 }
2325
cdddbdbc
DW
2326 return 0;
2327}
2328
0030e8d6
DW
2329static void fd2devname(int fd, char *name)
2330{
2331 struct stat st;
2332 char path[256];
33a6535d 2333 char dname[PATH_MAX];
0030e8d6
DW
2334 char *nm;
2335 int rv;
2336
2337 name[0] = '\0';
2338 if (fstat(fd, &st) != 0)
2339 return;
2340 sprintf(path, "/sys/dev/block/%d:%d",
2341 major(st.st_rdev), minor(st.st_rdev));
2342
2343 rv = readlink(path, dname, sizeof(dname));
2344 if (rv <= 0)
2345 return;
2346
2347 dname[rv] = '\0';
2348 nm = strrchr(dname, '/');
2349 nm++;
2350 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2351}
2352
cdddbdbc
DW
2353extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2354
2355static int imsm_read_serial(int fd, char *devname,
2356 __u8 serial[MAX_RAID_SERIAL_LEN])
2357{
2358 unsigned char scsi_serial[255];
cdddbdbc
DW
2359 int rv;
2360 int rsp_len;
1f24f035 2361 int len;
316e2bf4
DW
2362 char *dest;
2363 char *src;
2364 char *rsp_buf;
2365 int i;
cdddbdbc
DW
2366
2367 memset(scsi_serial, 0, sizeof(scsi_serial));
cdddbdbc 2368
f9ba0ff1
DW
2369 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2370
40ebbb9c 2371 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
f9ba0ff1
DW
2372 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2373 fd2devname(fd, (char *) serial);
0030e8d6
DW
2374 return 0;
2375 }
2376
cdddbdbc
DW
2377 if (rv != 0) {
2378 if (devname)
2379 fprintf(stderr,
2380 Name ": Failed to retrieve serial for %s\n",
2381 devname);
2382 return rv;
2383 }
2384
2385 rsp_len = scsi_serial[3];
03cd4cc8
DW
2386 if (!rsp_len) {
2387 if (devname)
2388 fprintf(stderr,
2389 Name ": Failed to retrieve serial for %s\n",
2390 devname);
2391 return 2;
2392 }
1f24f035 2393 rsp_buf = (char *) &scsi_serial[4];
5c3db629 2394
316e2bf4
DW
2395 /* trim all whitespace and non-printable characters and convert
2396 * ':' to ';'
2397 */
2398 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2399 src = &rsp_buf[i];
2400 if (*src > 0x20) {
2401 /* ':' is reserved for use in placeholder serial
2402 * numbers for missing disks
2403 */
2404 if (*src == ':')
2405 *dest++ = ';';
2406 else
2407 *dest++ = *src;
2408 }
2409 }
2410 len = dest - rsp_buf;
2411 dest = rsp_buf;
2412
2413 /* truncate leading characters */
2414 if (len > MAX_RAID_SERIAL_LEN) {
2415 dest += len - MAX_RAID_SERIAL_LEN;
1f24f035 2416 len = MAX_RAID_SERIAL_LEN;
316e2bf4 2417 }
5c3db629 2418
5c3db629 2419 memset(serial, 0, MAX_RAID_SERIAL_LEN);
316e2bf4 2420 memcpy(serial, dest, len);
cdddbdbc
DW
2421
2422 return 0;
2423}
2424
1f24f035
DW
2425static int serialcmp(__u8 *s1, __u8 *s2)
2426{
2427 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2428}
2429
2430static void serialcpy(__u8 *dest, __u8 *src)
2431{
2432 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2433}
2434
1799c9e8 2435#ifndef MDASSEMBLE
54c2c1ea
DW
2436static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2437{
2438 struct dl *dl;
2439
2440 for (dl = super->disks; dl; dl = dl->next)
2441 if (serialcmp(dl->serial, serial) == 0)
2442 break;
2443
2444 return dl;
2445}
1799c9e8 2446#endif
54c2c1ea 2447
a2b97981
DW
2448static struct imsm_disk *
2449__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2450{
2451 int i;
2452
2453 for (i = 0; i < mpb->num_disks; i++) {
2454 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2455
2456 if (serialcmp(disk->serial, serial) == 0) {
2457 if (idx)
2458 *idx = i;
2459 return disk;
2460 }
2461 }
2462
2463 return NULL;
2464}
2465
cdddbdbc
DW
2466static int
2467load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2468{
a2b97981 2469 struct imsm_disk *disk;
cdddbdbc
DW
2470 struct dl *dl;
2471 struct stat stb;
cdddbdbc 2472 int rv;
a2b97981 2473 char name[40];
d23fe947
DW
2474 __u8 serial[MAX_RAID_SERIAL_LEN];
2475
2476 rv = imsm_read_serial(fd, devname, serial);
2477
2478 if (rv != 0)
2479 return 2;
2480
a2b97981 2481 dl = calloc(1, sizeof(*dl));
b9f594fe 2482 if (!dl) {
cdddbdbc
DW
2483 if (devname)
2484 fprintf(stderr,
2485 Name ": failed to allocate disk buffer for %s\n",
2486 devname);
2487 return 2;
2488 }
cdddbdbc 2489
a2b97981
DW
2490 fstat(fd, &stb);
2491 dl->major = major(stb.st_rdev);
2492 dl->minor = minor(stb.st_rdev);
2493 dl->next = super->disks;
2494 dl->fd = keep_fd ? fd : -1;
2495 assert(super->disks == NULL);
2496 super->disks = dl;
2497 serialcpy(dl->serial, serial);
2498 dl->index = -2;
2499 dl->e = NULL;
2500 fd2devname(fd, name);
2501 if (devname)
2502 dl->devname = strdup(devname);
2503 else
2504 dl->devname = strdup(name);
cdddbdbc 2505
d23fe947 2506 /* look up this disk's index in the current anchor */
a2b97981
DW
2507 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2508 if (disk) {
2509 dl->disk = *disk;
2510 /* only set index on disks that are a member of a
2511 * populated contianer, i.e. one with raid_devs
2512 */
2513 if (is_failed(&dl->disk))
3f6efecc 2514 dl->index = -2;
a2b97981
DW
2515 else if (is_spare(&dl->disk))
2516 dl->index = -1;
3f6efecc
DW
2517 }
2518
949c47a0
DW
2519 return 0;
2520}
2521
0e600426 2522#ifndef MDASSEMBLE
0c046afd
DW
2523/* When migrating map0 contains the 'destination' state while map1
2524 * contains the current state. When not migrating map0 contains the
2525 * current state. This routine assumes that map[0].map_state is set to
2526 * the current array state before being called.
2527 *
2528 * Migration is indicated by one of the following states
2529 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
e3bba0e0 2530 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
0c046afd 2531 * map1state=unitialized)
1484e727 2532 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
0c046afd 2533 * map1state=normal)
e3bba0e0 2534 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
0c046afd
DW
2535 * map1state=degraded)
2536 */
0556e1a2 2537static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type)
3393c6af 2538{
0c046afd 2539 struct imsm_map *dest;
3393c6af
DW
2540 struct imsm_map *src = get_imsm_map(dev, 0);
2541
0c046afd 2542 dev->vol.migr_state = 1;
1484e727 2543 set_migr_type(dev, migr_type);
f8f603f1 2544 dev->vol.curr_migr_unit = 0;
0c046afd
DW
2545 dest = get_imsm_map(dev, 1);
2546
0556e1a2 2547 /* duplicate and then set the target end state in map[0] */
3393c6af 2548 memcpy(dest, src, sizeof_imsm_map(src));
28bce06f
AK
2549 if ((migr_type == MIGR_REBUILD) ||
2550 (migr_type == MIGR_GEN_MIGR)) {
0556e1a2
DW
2551 __u32 ord;
2552 int i;
2553
2554 for (i = 0; i < src->num_members; i++) {
2555 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2556 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2557 }
2558 }
2559
0c046afd 2560 src->map_state = to_state;
949c47a0 2561}
f8f603f1
DW
2562
2563static void end_migration(struct imsm_dev *dev, __u8 map_state)
2564{
2565 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2 2566 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
28bce06f 2567 int i, j;
0556e1a2
DW
2568
2569 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2570 * completed in the last migration.
2571 *
28bce06f 2572 * FIXME add support for raid-level-migration
0556e1a2
DW
2573 */
2574 for (i = 0; i < prev->num_members; i++)
28bce06f
AK
2575 for (j = 0; j < map->num_members; j++)
2576 /* during online capacity expansion
2577 * disks position can be changed if takeover is used
2578 */
2579 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2580 ord_to_idx(prev->disk_ord_tbl[i])) {
2581 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2582 break;
2583 }
f8f603f1
DW
2584
2585 dev->vol.migr_state = 0;
28bce06f 2586 dev->vol.migr_type = 0;
f8f603f1
DW
2587 dev->vol.curr_migr_unit = 0;
2588 map->map_state = map_state;
2589}
0e600426 2590#endif
949c47a0
DW
2591
2592static int parse_raid_devices(struct intel_super *super)
2593{
2594 int i;
2595 struct imsm_dev *dev_new;
4d7b1503 2596 size_t len, len_migr;
401d313b 2597 size_t max_len = 0;
4d7b1503
DW
2598 size_t space_needed = 0;
2599 struct imsm_super *mpb = super->anchor;
949c47a0
DW
2600
2601 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2602 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
ba2de7ba 2603 struct intel_dev *dv;
949c47a0 2604
4d7b1503
DW
2605 len = sizeof_imsm_dev(dev_iter, 0);
2606 len_migr = sizeof_imsm_dev(dev_iter, 1);
2607 if (len_migr > len)
2608 space_needed += len_migr - len;
2609
ba2de7ba
DW
2610 dv = malloc(sizeof(*dv));
2611 if (!dv)
2612 return 1;
401d313b
AK
2613 if (max_len < len_migr)
2614 max_len = len_migr;
2615 if (max_len > len_migr)
2616 space_needed += max_len - len_migr;
2617 dev_new = malloc(max_len);
ba2de7ba
DW
2618 if (!dev_new) {
2619 free(dv);
949c47a0 2620 return 1;
ba2de7ba 2621 }
949c47a0 2622 imsm_copy_dev(dev_new, dev_iter);
ba2de7ba
DW
2623 dv->dev = dev_new;
2624 dv->index = i;
2625 dv->next = super->devlist;
2626 super->devlist = dv;
949c47a0 2627 }
cdddbdbc 2628
4d7b1503
DW
2629 /* ensure that super->buf is large enough when all raid devices
2630 * are migrating
2631 */
2632 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2633 void *buf;
2634
2635 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2636 if (posix_memalign(&buf, 512, len) != 0)
2637 return 1;
2638
1f45a8ad
DW
2639 memcpy(buf, super->buf, super->len);
2640 memset(buf + super->len, 0, len - super->len);
4d7b1503
DW
2641 free(super->buf);
2642 super->buf = buf;
2643 super->len = len;
2644 }
2645
cdddbdbc
DW
2646 return 0;
2647}
2648
604b746f
JD
2649/* retrieve a pointer to the bbm log which starts after all raid devices */
2650struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2651{
2652 void *ptr = NULL;
2653
2654 if (__le32_to_cpu(mpb->bbm_log_size)) {
2655 ptr = mpb;
2656 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2657 }
2658
2659 return ptr;
2660}
2661
d23fe947 2662static void __free_imsm(struct intel_super *super, int free_disks);
9ca2c81c 2663
cdddbdbc 2664/* load_imsm_mpb - read matrix metadata
f2f5c343 2665 * allocates super->mpb to be freed by free_imsm
cdddbdbc
DW
2666 */
2667static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
2668{
2669 unsigned long long dsize;
cdddbdbc
DW
2670 unsigned long long sectors;
2671 struct stat;
6416d527 2672 struct imsm_super *anchor;
cdddbdbc
DW
2673 __u32 check_sum;
2674
cdddbdbc 2675 get_dev_size(fd, NULL, &dsize);
64436f06
N
2676 if (dsize < 1024) {
2677 if (devname)
2678 fprintf(stderr,
2679 Name ": %s: device to small for imsm\n",
2680 devname);
2681 return 1;
2682 }
cdddbdbc
DW
2683
2684 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
2685 if (devname)
2686 fprintf(stderr,
2687 Name ": Cannot seek to anchor block on %s: %s\n",
2688 devname, strerror(errno));
2689 return 1;
2690 }
2691
949c47a0 2692 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
ad97895e
DW
2693 if (devname)
2694 fprintf(stderr,
2695 Name ": Failed to allocate imsm anchor buffer"
2696 " on %s\n", devname);
2697 return 1;
2698 }
949c47a0 2699 if (read(fd, anchor, 512) != 512) {
cdddbdbc
DW
2700 if (devname)
2701 fprintf(stderr,
2702 Name ": Cannot read anchor block on %s: %s\n",
2703 devname, strerror(errno));
6416d527 2704 free(anchor);
cdddbdbc
DW
2705 return 1;
2706 }
2707
6416d527 2708 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
cdddbdbc
DW
2709 if (devname)
2710 fprintf(stderr,
2711 Name ": no IMSM anchor on %s\n", devname);
6416d527 2712 free(anchor);
cdddbdbc
DW
2713 return 2;
2714 }
2715
d23fe947 2716 __free_imsm(super, 0);
f2f5c343
LM
2717 /* reload capability and hba */
2718
2719 /* capability and hba must be updated with new super allocation */
d424212e 2720 find_intel_hba_capability(fd, super, devname);
949c47a0
DW
2721 super->len = ROUND_UP(anchor->mpb_size, 512);
2722 if (posix_memalign(&super->buf, 512, super->len) != 0) {
cdddbdbc
DW
2723 if (devname)
2724 fprintf(stderr,
2725 Name ": unable to allocate %zu byte mpb buffer\n",
949c47a0 2726 super->len);
6416d527 2727 free(anchor);
cdddbdbc
DW
2728 return 2;
2729 }
949c47a0 2730 memcpy(super->buf, anchor, 512);
cdddbdbc 2731
6416d527
NB
2732 sectors = mpb_sectors(anchor) - 1;
2733 free(anchor);
949c47a0 2734 if (!sectors) {
ecf45690
DW
2735 check_sum = __gen_imsm_checksum(super->anchor);
2736 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
2737 if (devname)
2738 fprintf(stderr,
2739 Name ": IMSM checksum %x != %x on %s\n",
2740 check_sum,
2741 __le32_to_cpu(super->anchor->check_sum),
2742 devname);
2743 return 2;
2744 }
2745
a2b97981 2746 return 0;
949c47a0 2747 }
cdddbdbc
DW
2748
2749 /* read the extended mpb */
2750 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
2751 if (devname)
2752 fprintf(stderr,
2753 Name ": Cannot seek to extended mpb on %s: %s\n",
2754 devname, strerror(errno));
2755 return 1;
2756 }
2757
f21e18ca 2758 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
cdddbdbc
DW
2759 if (devname)
2760 fprintf(stderr,
2761 Name ": Cannot read extended mpb on %s: %s\n",
2762 devname, strerror(errno));
2763 return 2;
2764 }
2765
949c47a0
DW
2766 check_sum = __gen_imsm_checksum(super->anchor);
2767 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
cdddbdbc
DW
2768 if (devname)
2769 fprintf(stderr,
2770 Name ": IMSM checksum %x != %x on %s\n",
949c47a0 2771 check_sum, __le32_to_cpu(super->anchor->check_sum),
cdddbdbc 2772 devname);
db575f3b 2773 return 3;
cdddbdbc
DW
2774 }
2775
604b746f
JD
2776 /* FIXME the BBM log is disk specific so we cannot use this global
2777 * buffer for all disks. Ok for now since we only look at the global
2778 * bbm_log_size parameter to gate assembly
2779 */
2780 super->bbm_log = __get_imsm_bbm_log(super->anchor);
2781
a2b97981
DW
2782 return 0;
2783}
2784
2785static int
2786load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
2787{
2788 int err;
2789
2790 err = load_imsm_mpb(fd, super, devname);
2791 if (err)
2792 return err;
2793 err = load_imsm_disk(fd, super, devname, keep_fd);
2794 if (err)
2795 return err;
2796 err = parse_raid_devices(super);
4d7b1503 2797
a2b97981 2798 return err;
cdddbdbc
DW
2799}
2800
ae6aad82
DW
2801static void __free_imsm_disk(struct dl *d)
2802{
2803 if (d->fd >= 0)
2804 close(d->fd);
2805 if (d->devname)
2806 free(d->devname);
0dcecb2e
DW
2807 if (d->e)
2808 free(d->e);
ae6aad82
DW
2809 free(d);
2810
2811}
1a64be56 2812
cdddbdbc
DW
2813static void free_imsm_disks(struct intel_super *super)
2814{
47ee5a45 2815 struct dl *d;
cdddbdbc 2816
47ee5a45
DW
2817 while (super->disks) {
2818 d = super->disks;
cdddbdbc 2819 super->disks = d->next;
ae6aad82 2820 __free_imsm_disk(d);
cdddbdbc 2821 }
cb82edca
AK
2822 while (super->disk_mgmt_list) {
2823 d = super->disk_mgmt_list;
2824 super->disk_mgmt_list = d->next;
2825 __free_imsm_disk(d);
2826 }
47ee5a45
DW
2827 while (super->missing) {
2828 d = super->missing;
2829 super->missing = d->next;
2830 __free_imsm_disk(d);
2831 }
2832
cdddbdbc
DW
2833}
2834
9ca2c81c 2835/* free all the pieces hanging off of a super pointer */
d23fe947 2836static void __free_imsm(struct intel_super *super, int free_disks)
cdddbdbc 2837{
88654014
LM
2838 struct intel_hba *elem, *next;
2839
9ca2c81c 2840 if (super->buf) {
949c47a0 2841 free(super->buf);
9ca2c81c
DW
2842 super->buf = NULL;
2843 }
f2f5c343
LM
2844 /* unlink capability description */
2845 super->orom = NULL;
d23fe947
DW
2846 if (free_disks)
2847 free_imsm_disks(super);
ba2de7ba 2848 free_devlist(super);
88654014
LM
2849 elem = super->hba;
2850 while (elem) {
2851 if (elem->path)
2852 free((void *)elem->path);
2853 next = elem->next;
2854 free(elem);
2855 elem = next;
88c32bb1 2856 }
88654014 2857 super->hba = NULL;
cdddbdbc
DW
2858}
2859
9ca2c81c
DW
2860static void free_imsm(struct intel_super *super)
2861{
d23fe947 2862 __free_imsm(super, 1);
9ca2c81c
DW
2863 free(super);
2864}
cdddbdbc
DW
2865
2866static void free_super_imsm(struct supertype *st)
2867{
2868 struct intel_super *super = st->sb;
2869
2870 if (!super)
2871 return;
2872
2873 free_imsm(super);
2874 st->sb = NULL;
2875}
2876
49133e57 2877static struct intel_super *alloc_super(void)
c2c087e6
DW
2878{
2879 struct intel_super *super = malloc(sizeof(*super));
2880
2881 if (super) {
2882 memset(super, 0, sizeof(*super));
bf5a934a 2883 super->current_vol = -1;
0dcecb2e 2884 super->create_offset = ~((__u32 ) 0);
c2c087e6 2885 }
c2c087e6
DW
2886 return super;
2887}
2888
f0f5a016
LM
2889/*
2890 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
2891 */
d424212e 2892static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
f0f5a016
LM
2893{
2894 struct sys_dev *hba_name;
2895 int rv = 0;
2896
2897 if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
f2f5c343 2898 super->orom = NULL;
f0f5a016
LM
2899 super->hba = NULL;
2900 return 0;
2901 }
2902 hba_name = find_disk_attached_hba(fd, NULL);
2903 if (!hba_name) {
d424212e 2904 if (devname)
f0f5a016
LM
2905 fprintf(stderr,
2906 Name ": %s is not attached to Intel(R) RAID controller.\n",
d424212e 2907 devname);
f0f5a016
LM
2908 return 1;
2909 }
2910 rv = attach_hba_to_super(super, hba_name);
2911 if (rv == 2) {
d424212e
N
2912 if (devname) {
2913 struct intel_hba *hba = super->hba;
f0f5a016 2914
f0f5a016
LM
2915 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
2916 "controller (%s),\n"
2917 " but the container is assigned to Intel(R) "
2918 "%s RAID controller (",
d424212e 2919 devname,
f0f5a016
LM
2920 hba_name->path,
2921 hba_name->pci_id ? : "Err!",
2922 get_sys_dev_type(hba_name->type));
2923
f0f5a016
LM
2924 while (hba) {
2925 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
2926 if (hba->next)
2927 fprintf(stderr, ", ");
2928 hba = hba->next;
2929 }
2930
2931 fprintf(stderr, ").\n"
2932 " Mixing devices attached to different controllers "
2933 "is not allowed.\n");
2934 }
2935 free_sys_dev(&hba_name);
2936 return 2;
2937 }
f2f5c343 2938 super->orom = find_imsm_capability(hba_name->type);
f0f5a016 2939 free_sys_dev(&hba_name);
f2f5c343
LM
2940 if (!super->orom)
2941 return 3;
f0f5a016
LM
2942 return 0;
2943}
2944
cdddbdbc 2945#ifndef MDASSEMBLE
47ee5a45
DW
2946/* find_missing - helper routine for load_super_imsm_all that identifies
2947 * disks that have disappeared from the system. This routine relies on
2948 * the mpb being uptodate, which it is at load time.
2949 */
2950static int find_missing(struct intel_super *super)
2951{
2952 int i;
2953 struct imsm_super *mpb = super->anchor;
2954 struct dl *dl;
2955 struct imsm_disk *disk;
47ee5a45
DW
2956
2957 for (i = 0; i < mpb->num_disks; i++) {
2958 disk = __get_imsm_disk(mpb, i);
54c2c1ea 2959 dl = serial_to_dl(disk->serial, super);
47ee5a45
DW
2960 if (dl)
2961 continue;
47ee5a45
DW
2962
2963 dl = malloc(sizeof(*dl));
2964 if (!dl)
2965 return 1;
2966 dl->major = 0;
2967 dl->minor = 0;
2968 dl->fd = -1;
2969 dl->devname = strdup("missing");
2970 dl->index = i;
2971 serialcpy(dl->serial, disk->serial);
2972 dl->disk = *disk;
689c9bf3 2973 dl->e = NULL;
47ee5a45
DW
2974 dl->next = super->missing;
2975 super->missing = dl;
2976 }
2977
2978 return 0;
2979}
2980
a2b97981
DW
2981static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
2982{
2983 struct intel_disk *idisk = disk_list;
2984
2985 while (idisk) {
2986 if (serialcmp(idisk->disk.serial, serial) == 0)
2987 break;
2988 idisk = idisk->next;
2989 }
2990
2991 return idisk;
2992}
2993
2994static int __prep_thunderdome(struct intel_super **table, int tbl_size,
2995 struct intel_super *super,
2996 struct intel_disk **disk_list)
2997{
2998 struct imsm_disk *d = &super->disks->disk;
2999 struct imsm_super *mpb = super->anchor;
3000 int i, j;
3001
3002 for (i = 0; i < tbl_size; i++) {
3003 struct imsm_super *tbl_mpb = table[i]->anchor;
3004 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3005
3006 if (tbl_mpb->family_num == mpb->family_num) {
3007 if (tbl_mpb->check_sum == mpb->check_sum) {
3008 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3009 __func__, super->disks->major,
3010 super->disks->minor,
3011 table[i]->disks->major,
3012 table[i]->disks->minor);
3013 break;
3014 }
3015
3016 if (((is_configured(d) && !is_configured(tbl_d)) ||
3017 is_configured(d) == is_configured(tbl_d)) &&
3018 tbl_mpb->generation_num < mpb->generation_num) {
3019 /* current version of the mpb is a
3020 * better candidate than the one in
3021 * super_table, but copy over "cross
3022 * generational" status
3023 */
3024 struct intel_disk *idisk;
3025
3026 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3027 __func__, super->disks->major,
3028 super->disks->minor,
3029 table[i]->disks->major,
3030 table[i]->disks->minor);
3031
3032 idisk = disk_list_get(tbl_d->serial, *disk_list);
3033 if (idisk && is_failed(&idisk->disk))
3034 tbl_d->status |= FAILED_DISK;
3035 break;
3036 } else {
3037 struct intel_disk *idisk;
3038 struct imsm_disk *disk;
3039
3040 /* tbl_mpb is more up to date, but copy
3041 * over cross generational status before
3042 * returning
3043 */
3044 disk = __serial_to_disk(d->serial, mpb, NULL);
3045 if (disk && is_failed(disk))
3046 d->status |= FAILED_DISK;
3047
3048 idisk = disk_list_get(d->serial, *disk_list);
3049 if (idisk) {
3050 idisk->owner = i;
3051 if (disk && is_configured(disk))
3052 idisk->disk.status |= CONFIGURED_DISK;
3053 }
3054
3055 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3056 __func__, super->disks->major,
3057 super->disks->minor,
3058 table[i]->disks->major,
3059 table[i]->disks->minor);
3060
3061 return tbl_size;
3062 }
3063 }
3064 }
3065
3066 if (i >= tbl_size)
3067 table[tbl_size++] = super;
3068 else
3069 table[i] = super;
3070
3071 /* update/extend the merged list of imsm_disk records */
3072 for (j = 0; j < mpb->num_disks; j++) {
3073 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3074 struct intel_disk *idisk;
3075
3076 idisk = disk_list_get(disk->serial, *disk_list);
3077 if (idisk) {
3078 idisk->disk.status |= disk->status;
3079 if (is_configured(&idisk->disk) ||
3080 is_failed(&idisk->disk))
3081 idisk->disk.status &= ~(SPARE_DISK);
3082 } else {
3083 idisk = calloc(1, sizeof(*idisk));
3084 if (!idisk)
3085 return -1;
3086 idisk->owner = IMSM_UNKNOWN_OWNER;
3087 idisk->disk = *disk;
3088 idisk->next = *disk_list;
3089 *disk_list = idisk;
3090 }
3091
3092 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3093 idisk->owner = i;
3094 }
3095
3096 return tbl_size;
3097}
3098
3099static struct intel_super *
3100validate_members(struct intel_super *super, struct intel_disk *disk_list,
3101 const int owner)
3102{
3103 struct imsm_super *mpb = super->anchor;
3104 int ok_count = 0;
3105 int i;
3106
3107 for (i = 0; i < mpb->num_disks; i++) {
3108 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3109 struct intel_disk *idisk;
3110
3111 idisk = disk_list_get(disk->serial, disk_list);
3112 if (idisk) {
3113 if (idisk->owner == owner ||
3114 idisk->owner == IMSM_UNKNOWN_OWNER)
3115 ok_count++;
3116 else
3117 dprintf("%s: '%.16s' owner %d != %d\n",
3118 __func__, disk->serial, idisk->owner,
3119 owner);
3120 } else {
3121 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3122 __func__, __le32_to_cpu(mpb->family_num), i,
3123 disk->serial);
3124 break;
3125 }
3126 }
3127
3128 if (ok_count == mpb->num_disks)
3129 return super;
3130 return NULL;
3131}
3132
3133static void show_conflicts(__u32 family_num, struct intel_super *super_list)
3134{
3135 struct intel_super *s;
3136
3137 for (s = super_list; s; s = s->next) {
3138 if (family_num != s->anchor->family_num)
3139 continue;
3140 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
3141 __le32_to_cpu(family_num), s->disks->devname);
3142 }
3143}
3144
3145static struct intel_super *
3146imsm_thunderdome(struct intel_super **super_list, int len)
3147{
3148 struct intel_super *super_table[len];
3149 struct intel_disk *disk_list = NULL;
3150 struct intel_super *champion, *spare;
3151 struct intel_super *s, **del;
3152 int tbl_size = 0;
3153 int conflict;
3154 int i;
3155
3156 memset(super_table, 0, sizeof(super_table));
3157 for (s = *super_list; s; s = s->next)
3158 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
3159
3160 for (i = 0; i < tbl_size; i++) {
3161 struct imsm_disk *d;
3162 struct intel_disk *idisk;
3163 struct imsm_super *mpb = super_table[i]->anchor;
3164
3165 s = super_table[i];
3166 d = &s->disks->disk;
3167
3168 /* 'd' must appear in merged disk list for its
3169 * configuration to be valid
3170 */
3171 idisk = disk_list_get(d->serial, disk_list);
3172 if (idisk && idisk->owner == i)
3173 s = validate_members(s, disk_list, i);
3174 else
3175 s = NULL;
3176
3177 if (!s)
3178 dprintf("%s: marking family: %#x from %d:%d offline\n",
3179 __func__, mpb->family_num,
3180 super_table[i]->disks->major,
3181 super_table[i]->disks->minor);
3182 super_table[i] = s;
3183 }
3184
3185 /* This is where the mdadm implementation differs from the Windows
3186 * driver which has no strict concept of a container. We can only
3187 * assemble one family from a container, so when returning a prodigal
3188 * array member to this system the code will not be able to disambiguate
3189 * the container contents that should be assembled ("foreign" versus
3190 * "local"). It requires user intervention to set the orig_family_num
3191 * to a new value to establish a new container. The Windows driver in
3192 * this situation fixes up the volume name in place and manages the
3193 * foreign array as an independent entity.
3194 */
3195 s = NULL;
3196 spare = NULL;
3197 conflict = 0;
3198 for (i = 0; i < tbl_size; i++) {
3199 struct intel_super *tbl_ent = super_table[i];
3200 int is_spare = 0;
3201
3202 if (!tbl_ent)
3203 continue;
3204
3205 if (tbl_ent->anchor->num_raid_devs == 0) {
3206 spare = tbl_ent;
3207 is_spare = 1;
3208 }
3209
3210 if (s && !is_spare) {
3211 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3212 conflict++;
3213 } else if (!s && !is_spare)
3214 s = tbl_ent;
3215 }
3216
3217 if (!s)
3218 s = spare;
3219 if (!s) {
3220 champion = NULL;
3221 goto out;
3222 }
3223 champion = s;
3224
3225 if (conflict)
3226 fprintf(stderr, "Chose family %#x on '%s', "
3227 "assemble conflicts to new container with '--update=uuid'\n",
3228 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3229
3230 /* collect all dl's onto 'champion', and update them to
3231 * champion's version of the status
3232 */
3233 for (s = *super_list; s; s = s->next) {
3234 struct imsm_super *mpb = champion->anchor;
3235 struct dl *dl = s->disks;
3236
3237 if (s == champion)
3238 continue;
3239
3240 for (i = 0; i < mpb->num_disks; i++) {
3241 struct imsm_disk *disk;
3242
3243 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3244 if (disk) {
3245 dl->disk = *disk;
3246 /* only set index on disks that are a member of
3247 * a populated contianer, i.e. one with
3248 * raid_devs
3249 */
3250 if (is_failed(&dl->disk))
3251 dl->index = -2;
3252 else if (is_spare(&dl->disk))
3253 dl->index = -1;
3254 break;
3255 }
3256 }
3257
3258 if (i >= mpb->num_disks) {
3259 struct intel_disk *idisk;
3260
3261 idisk = disk_list_get(dl->serial, disk_list);
ecf408e9 3262 if (idisk && is_spare(&idisk->disk) &&
a2b97981
DW
3263 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3264 dl->index = -1;
3265 else {
3266 dl->index = -2;
3267 continue;
3268 }
3269 }
3270
3271 dl->next = champion->disks;
3272 champion->disks = dl;
3273 s->disks = NULL;
3274 }
3275
3276 /* delete 'champion' from super_list */
3277 for (del = super_list; *del; ) {
3278 if (*del == champion) {
3279 *del = (*del)->next;
3280 break;
3281 } else
3282 del = &(*del)->next;
3283 }
3284 champion->next = NULL;
3285
3286 out:
3287 while (disk_list) {
3288 struct intel_disk *idisk = disk_list;
3289
3290 disk_list = disk_list->next;
3291 free(idisk);
3292 }
3293
3294 return champion;
3295}
3296
cdddbdbc 3297static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
e1902a7b 3298 char *devname)
cdddbdbc
DW
3299{
3300 struct mdinfo *sra;
a2b97981
DW
3301 struct intel_super *super_list = NULL;
3302 struct intel_super *super = NULL;
db575f3b 3303 int devnum = fd2devnum(fd);
a2b97981 3304 struct mdinfo *sd;
db575f3b 3305 int retry;
a2b97981
DW
3306 int err = 0;
3307 int i;
dab4a513
DW
3308
3309 /* check if 'fd' an opened container */
b526e52d 3310 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
cdddbdbc
DW
3311 if (!sra)
3312 return 1;
3313
3314 if (sra->array.major_version != -1 ||
3315 sra->array.minor_version != -2 ||
1602d52c
AW
3316 strcmp(sra->text_version, "imsm") != 0) {
3317 err = 1;
3318 goto error;
3319 }
a2b97981
DW
3320 /* load all mpbs */
3321 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
49133e57 3322 struct intel_super *s = alloc_super();
7a6ecd55 3323 char nm[32];
a2b97981 3324 int dfd;
f2f5c343 3325 int rv;
a2b97981
DW
3326
3327 err = 1;
3328 if (!s)
3329 goto error;
3330 s->next = super_list;
3331 super_list = s;
cdddbdbc 3332
a2b97981 3333 err = 2;
cdddbdbc 3334 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3335 dfd = dev_open(nm, O_RDWR);
a2b97981
DW
3336 if (dfd < 0)
3337 goto error;
3338
d424212e 3339 rv = find_intel_hba_capability(dfd, s, devname);
f2f5c343
LM
3340 /* no orom/efi or non-intel hba of the disk */
3341 if (rv != 0)
3342 goto error;
3343
e1902a7b 3344 err = load_and_parse_mpb(dfd, s, NULL, 1);
db575f3b
DW
3345
3346 /* retry the load if we might have raced against mdmon */
a2b97981 3347 if (err == 3 && mdmon_running(devnum))
db575f3b
DW
3348 for (retry = 0; retry < 3; retry++) {
3349 usleep(3000);
e1902a7b 3350 err = load_and_parse_mpb(dfd, s, NULL, 1);
a2b97981 3351 if (err != 3)
db575f3b
DW
3352 break;
3353 }
a2b97981
DW
3354 if (err)
3355 goto error;
cdddbdbc
DW
3356 }
3357
a2b97981
DW
3358 /* all mpbs enter, maybe one leaves */
3359 super = imsm_thunderdome(&super_list, i);
3360 if (!super) {
3361 err = 1;
3362 goto error;
cdddbdbc
DW
3363 }
3364
47ee5a45
DW
3365 if (find_missing(super) != 0) {
3366 free_imsm(super);
a2b97981
DW
3367 err = 2;
3368 goto error;
47ee5a45 3369 }
a2b97981
DW
3370 err = 0;
3371
3372 error:
3373 while (super_list) {
3374 struct intel_super *s = super_list;
3375
3376 super_list = super_list->next;
3377 free_imsm(s);
3378 }
1602d52c 3379 sysfs_free(sra);
a2b97981
DW
3380
3381 if (err)
3382 return err;
f7e7067b 3383
cdddbdbc 3384 *sbp = super;
db575f3b 3385 st->container_dev = devnum;
a2b97981 3386 if (err == 0 && st->ss == NULL) {
bf5a934a 3387 st->ss = &super_imsm;
cdddbdbc
DW
3388 st->minor_version = 0;
3389 st->max_devs = IMSM_MAX_DEVICES;
3390 }
cdddbdbc
DW
3391 return 0;
3392}
2b959fbf
N
3393
3394static int load_container_imsm(struct supertype *st, int fd, char *devname)
3395{
3396 return load_super_imsm_all(st, fd, &st->sb, devname);
3397}
cdddbdbc
DW
3398#endif
3399
3400static int load_super_imsm(struct supertype *st, int fd, char *devname)
3401{
3402 struct intel_super *super;
3403 int rv;
3404
691c6ee1
N
3405 if (test_partition(fd))
3406 /* IMSM not allowed on partitions */
3407 return 1;
3408
37424f13
DW
3409 free_super_imsm(st);
3410
49133e57 3411 super = alloc_super();
cdddbdbc
DW
3412 if (!super) {
3413 fprintf(stderr,
3414 Name ": malloc of %zu failed.\n",
3415 sizeof(*super));
3416 return 1;
3417 }
ea2bc72b
LM
3418 /* Load hba and capabilities if they exist.
3419 * But do not preclude loading metadata in case capabilities or hba are
3420 * non-compliant and ignore_hw_compat is set.
3421 */
d424212e 3422 rv = find_intel_hba_capability(fd, super, devname);
f2f5c343 3423 /* no orom/efi or non-intel hba of the disk */
ea2bc72b 3424 if ((rv != 0) && (st->ignore_hw_compat == 0)) {
f2f5c343
LM
3425 if (devname)
3426 fprintf(stderr,
3427 Name ": No OROM/EFI properties for %s\n", devname);
3428 free_imsm(super);
3429 return 2;
3430 }
a2b97981 3431 rv = load_and_parse_mpb(fd, super, devname, 0);
cdddbdbc
DW
3432
3433 if (rv) {
3434 if (devname)
3435 fprintf(stderr,
3436 Name ": Failed to load all information "
3437 "sections on %s\n", devname);
3438 free_imsm(super);
3439 return rv;
3440 }
3441
3442 st->sb = super;
3443 if (st->ss == NULL) {
3444 st->ss = &super_imsm;
3445 st->minor_version = 0;
3446 st->max_devs = IMSM_MAX_DEVICES;
3447 }
cdddbdbc
DW
3448 return 0;
3449}
3450
ef6ffade
DW
3451static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3452{
3453 if (info->level == 1)
3454 return 128;
3455 return info->chunk_size >> 9;
3456}
3457
ff596308 3458static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
ef6ffade
DW
3459{
3460 __u32 num_stripes;
3461
3462 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
ff596308 3463 num_stripes /= num_domains;
ef6ffade
DW
3464
3465 return num_stripes;
3466}
3467
fcfd9599
DW
3468static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3469{
4025c288
DW
3470 if (info->level == 1)
3471 return info->size * 2;
3472 else
3473 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
fcfd9599
DW
3474}
3475
4d1313e9
DW
3476static void imsm_update_version_info(struct intel_super *super)
3477{
3478 /* update the version and attributes */
3479 struct imsm_super *mpb = super->anchor;
3480 char *version;
3481 struct imsm_dev *dev;
3482 struct imsm_map *map;
3483 int i;
3484
3485 for (i = 0; i < mpb->num_raid_devs; i++) {
3486 dev = get_imsm_dev(super, i);
3487 map = get_imsm_map(dev, 0);
3488 if (__le32_to_cpu(dev->size_high) > 0)
3489 mpb->attributes |= MPB_ATTRIB_2TB;
3490
3491 /* FIXME detect when an array spans a port multiplier */
3492 #if 0
3493 mpb->attributes |= MPB_ATTRIB_PM;
3494 #endif
3495
3496 if (mpb->num_raid_devs > 1 ||
3497 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3498 version = MPB_VERSION_ATTRIBS;
3499 switch (get_imsm_raid_level(map)) {
3500 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3501 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3502 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3503 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3504 }
3505 } else {
3506 if (map->num_members >= 5)
3507 version = MPB_VERSION_5OR6_DISK_ARRAY;
3508 else if (dev->status == DEV_CLONE_N_GO)
3509 version = MPB_VERSION_CNG;
3510 else if (get_imsm_raid_level(map) == 5)
3511 version = MPB_VERSION_RAID5;
3512 else if (map->num_members >= 3)
3513 version = MPB_VERSION_3OR4_DISK_ARRAY;
3514 else if (get_imsm_raid_level(map) == 1)
3515 version = MPB_VERSION_RAID1;
3516 else
3517 version = MPB_VERSION_RAID0;
3518 }
3519 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3520 }
3521}
3522
aa534678
DW
3523static int check_name(struct intel_super *super, char *name, int quiet)
3524{
3525 struct imsm_super *mpb = super->anchor;
3526 char *reason = NULL;
3527 int i;
3528
3529 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3530 reason = "must be 16 characters or less";
3531
3532 for (i = 0; i < mpb->num_raid_devs; i++) {
3533 struct imsm_dev *dev = get_imsm_dev(super, i);
3534
3535 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3536 reason = "already exists";
3537 break;
3538 }
3539 }
3540
3541 if (reason && !quiet)
3542 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3543
3544 return !reason;
3545}
3546
8b353278
DW
3547static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3548 unsigned long long size, char *name,
3549 char *homehost, int *uuid)
cdddbdbc 3550{
c2c087e6
DW
3551 /* We are creating a volume inside a pre-existing container.
3552 * so st->sb is already set.
3553 */
3554 struct intel_super *super = st->sb;
949c47a0 3555 struct imsm_super *mpb = super->anchor;
ba2de7ba 3556 struct intel_dev *dv;
c2c087e6
DW
3557 struct imsm_dev *dev;
3558 struct imsm_vol *vol;
3559 struct imsm_map *map;
3560 int idx = mpb->num_raid_devs;
3561 int i;
3562 unsigned long long array_blocks;
2c092cad 3563 size_t size_old, size_new;
ff596308 3564 __u32 num_data_stripes;
cdddbdbc 3565
88c32bb1 3566 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
c2c087e6 3567 fprintf(stderr, Name": This imsm-container already has the "
88c32bb1 3568 "maximum of %d volumes\n", super->orom->vpa);
c2c087e6
DW
3569 return 0;
3570 }
3571
2c092cad
DW
3572 /* ensure the mpb is large enough for the new data */
3573 size_old = __le32_to_cpu(mpb->mpb_size);
3574 size_new = disks_to_mpb_size(info->nr_disks);
3575 if (size_new > size_old) {
3576 void *mpb_new;
3577 size_t size_round = ROUND_UP(size_new, 512);
3578
3579 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3580 fprintf(stderr, Name": could not allocate new mpb\n");
3581 return 0;
3582 }
3583 memcpy(mpb_new, mpb, size_old);
3584 free(mpb);
3585 mpb = mpb_new;
949c47a0 3586 super->anchor = mpb_new;
2c092cad
DW
3587 mpb->mpb_size = __cpu_to_le32(size_new);
3588 memset(mpb_new + size_old, 0, size_round - size_old);
3589 }
bf5a934a 3590 super->current_vol = idx;
d23fe947
DW
3591 /* when creating the first raid device in this container set num_disks
3592 * to zero, i.e. delete this spare and add raid member devices in
3593 * add_to_super_imsm_volume()
3594 */
3595 if (super->current_vol == 0)
3596 mpb->num_disks = 0;
5a038140 3597
aa534678
DW
3598 if (!check_name(super, name, 0))
3599 return 0;
ba2de7ba
DW
3600 dv = malloc(sizeof(*dv));
3601 if (!dv) {
3602 fprintf(stderr, Name ": failed to allocate device list entry\n");
3603 return 0;
3604 }
1a2487c2 3605 dev = calloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
949c47a0 3606 if (!dev) {
ba2de7ba 3607 free(dv);
949c47a0
DW
3608 fprintf(stderr, Name": could not allocate raid device\n");
3609 return 0;
3610 }
1a2487c2 3611
c2c087e6 3612 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
03bcbc65
DW
3613 if (info->level == 1)
3614 array_blocks = info_to_blocks_per_member(info);
3615 else
3616 array_blocks = calc_array_size(info->level, info->raid_disks,
3617 info->layout, info->chunk_size,
3618 info->size*2);
979d38be
DW
3619 /* round array size down to closest MB */
3620 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
3621
c2c087e6
DW
3622 dev->size_low = __cpu_to_le32((__u32) array_blocks);
3623 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
1a2487c2 3624 dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
c2c087e6
DW
3625 vol = &dev->vol;
3626 vol->migr_state = 0;
1484e727 3627 set_migr_type(dev, MIGR_INIT);
c2c087e6 3628 vol->dirty = 0;
f8f603f1 3629 vol->curr_migr_unit = 0;
a965f303 3630 map = get_imsm_map(dev, 0);
0dcecb2e 3631 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
fcfd9599 3632 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
ef6ffade 3633 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
0556e1a2 3634 map->failed_disk_num = ~0;
c2c087e6
DW
3635 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
3636 IMSM_T_STATE_NORMAL;
252d23c0 3637 map->ddf = 1;
ef6ffade
DW
3638
3639 if (info->level == 1 && info->raid_disks > 2) {
38950822
AW
3640 free(dev);
3641 free(dv);
ef6ffade
DW
3642 fprintf(stderr, Name": imsm does not support more than 2 disks"
3643 "in a raid1 volume\n");
3644 return 0;
3645 }
81062a36
DW
3646
3647 map->raid_level = info->level;
4d1313e9 3648 if (info->level == 10) {
c2c087e6 3649 map->raid_level = 1;
4d1313e9 3650 map->num_domains = info->raid_disks / 2;
81062a36
DW
3651 } else if (info->level == 1)
3652 map->num_domains = info->raid_disks;
3653 else
ff596308 3654 map->num_domains = 1;
81062a36 3655
ff596308
DW
3656 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
3657 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
ef6ffade 3658
c2c087e6
DW
3659 map->num_members = info->raid_disks;
3660 for (i = 0; i < map->num_members; i++) {
3661 /* initialized in add_to_super */
4eb26970 3662 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
c2c087e6 3663 }
949c47a0 3664 mpb->num_raid_devs++;
ba2de7ba
DW
3665
3666 dv->dev = dev;
3667 dv->index = super->current_vol;
3668 dv->next = super->devlist;
3669 super->devlist = dv;
c2c087e6 3670
4d1313e9
DW
3671 imsm_update_version_info(super);
3672
c2c087e6 3673 return 1;
cdddbdbc
DW
3674}
3675
bf5a934a
DW
3676static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
3677 unsigned long long size, char *name,
3678 char *homehost, int *uuid)
3679{
3680 /* This is primarily called by Create when creating a new array.
3681 * We will then get add_to_super called for each component, and then
3682 * write_init_super called to write it out to each device.
3683 * For IMSM, Create can create on fresh devices or on a pre-existing
3684 * array.
3685 * To create on a pre-existing array a different method will be called.
3686 * This one is just for fresh drives.
3687 */
3688 struct intel_super *super;
3689 struct imsm_super *mpb;
3690 size_t mpb_size;
4d1313e9 3691 char *version;
bf5a934a 3692
bf5a934a 3693 if (st->sb)
e683ca88
DW
3694 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
3695
3696 if (info)
3697 mpb_size = disks_to_mpb_size(info->nr_disks);
3698 else
3699 mpb_size = 512;
bf5a934a 3700
49133e57 3701 super = alloc_super();
e683ca88 3702 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
bf5a934a 3703 free(super);
e683ca88
DW
3704 super = NULL;
3705 }
3706 if (!super) {
3707 fprintf(stderr, Name
3708 ": %s could not allocate superblock\n", __func__);
bf5a934a
DW
3709 return 0;
3710 }
e683ca88 3711 memset(super->buf, 0, mpb_size);
ef649044 3712 mpb = super->buf;
e683ca88
DW
3713 mpb->mpb_size = __cpu_to_le32(mpb_size);
3714 st->sb = super;
3715
3716 if (info == NULL) {
3717 /* zeroing superblock */
3718 return 0;
3719 }
bf5a934a 3720
4d1313e9
DW
3721 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
3722
3723 version = (char *) mpb->sig;
3724 strcpy(version, MPB_SIGNATURE);
3725 version += strlen(MPB_SIGNATURE);
3726 strcpy(version, MPB_VERSION_RAID0);
bf5a934a 3727
bf5a934a
DW
3728 return 1;
3729}
3730
0e600426 3731#ifndef MDASSEMBLE
f20c3968 3732static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
bf5a934a
DW
3733 int fd, char *devname)
3734{
3735 struct intel_super *super = st->sb;
d23fe947 3736 struct imsm_super *mpb = super->anchor;
bf5a934a
DW
3737 struct dl *dl;
3738 struct imsm_dev *dev;
3739 struct imsm_map *map;
4eb26970 3740 int slot;
bf5a934a 3741
949c47a0 3742 dev = get_imsm_dev(super, super->current_vol);
a965f303 3743 map = get_imsm_map(dev, 0);
bf5a934a 3744
208933a7
N
3745 if (! (dk->state & (1<<MD_DISK_SYNC))) {
3746 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
3747 devname);
3748 return 1;
3749 }
3750
efb30e7f
DW
3751 if (fd == -1) {
3752 /* we're doing autolayout so grab the pre-marked (in
3753 * validate_geometry) raid_disk
3754 */
3755 for (dl = super->disks; dl; dl = dl->next)
3756 if (dl->raiddisk == dk->raid_disk)
3757 break;
3758 } else {
3759 for (dl = super->disks; dl ; dl = dl->next)
3760 if (dl->major == dk->major &&
3761 dl->minor == dk->minor)
3762 break;
3763 }
d23fe947 3764
208933a7
N
3765 if (!dl) {
3766 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
f20c3968 3767 return 1;
208933a7 3768 }
bf5a934a 3769
d23fe947
DW
3770 /* add a pristine spare to the metadata */
3771 if (dl->index < 0) {
3772 dl->index = super->anchor->num_disks;
3773 super->anchor->num_disks++;
3774 }
4eb26970
DW
3775 /* Check the device has not already been added */
3776 slot = get_imsm_disk_slot(map, dl->index);
3777 if (slot >= 0 &&
98130f40 3778 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
4eb26970
DW
3779 fprintf(stderr, Name ": %s has been included in this array twice\n",
3780 devname);
3781 return 1;
3782 }
be73972f 3783 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
ee5aad5a 3784 dl->disk.status = CONFIGURED_DISK;
d23fe947
DW
3785
3786 /* if we are creating the first raid device update the family number */
3787 if (super->current_vol == 0) {
3788 __u32 sum;
3789 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
3790 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
3791
791b666a
AW
3792 if (!_dev || !_disk) {
3793 fprintf(stderr, Name ": BUG mpb setup error\n");
3794 return 1;
3795 }
d23fe947
DW
3796 *_dev = *dev;
3797 *_disk = dl->disk;
148acb7b
DW
3798 sum = random32();
3799 sum += __gen_imsm_checksum(mpb);
d23fe947 3800 mpb->family_num = __cpu_to_le32(sum);
148acb7b 3801 mpb->orig_family_num = mpb->family_num;
d23fe947 3802 }
f20c3968
DW
3803
3804 return 0;
bf5a934a
DW
3805}
3806
88654014 3807
f20c3968 3808static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
88654014 3809 int fd, char *devname)
cdddbdbc 3810{
c2c087e6 3811 struct intel_super *super = st->sb;
c2c087e6
DW
3812 struct dl *dd;
3813 unsigned long long size;
f2f27e63 3814 __u32 id;
c2c087e6
DW
3815 int rv;
3816 struct stat stb;
3817
88654014
LM
3818 /* If we are on an RAID enabled platform check that the disk is
3819 * attached to the raid controller.
3820 * We do not need to test disks attachment for container based additions,
3821 * they shall be already tested when container was created/assembled.
88c32bb1 3822 */
d424212e 3823 rv = find_intel_hba_capability(fd, super, devname);
f2f5c343 3824 /* no orom/efi or non-intel hba of the disk */
f0f5a016
LM
3825 if (rv != 0) {
3826 dprintf("capability: %p fd: %d ret: %d\n",
3827 super->orom, fd, rv);
3828 return 1;
88c32bb1
DW
3829 }
3830
f20c3968
DW
3831 if (super->current_vol >= 0)
3832 return add_to_super_imsm_volume(st, dk, fd, devname);
bf5a934a 3833
c2c087e6
DW
3834 fstat(fd, &stb);
3835 dd = malloc(sizeof(*dd));
b9f594fe 3836 if (!dd) {
c2c087e6
DW
3837 fprintf(stderr,
3838 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
f20c3968 3839 return 1;
c2c087e6
DW
3840 }
3841 memset(dd, 0, sizeof(*dd));
3842 dd->major = major(stb.st_rdev);
3843 dd->minor = minor(stb.st_rdev);
b9f594fe 3844 dd->index = -1;
c2c087e6 3845 dd->devname = devname ? strdup(devname) : NULL;
c2c087e6 3846 dd->fd = fd;
689c9bf3 3847 dd->e = NULL;
1a64be56 3848 dd->action = DISK_ADD;
c2c087e6 3849 rv = imsm_read_serial(fd, devname, dd->serial);
32ba9157 3850 if (rv) {
c2c087e6 3851 fprintf(stderr,
0030e8d6 3852 Name ": failed to retrieve scsi serial, aborting\n");
949c47a0 3853 free(dd);
0030e8d6 3854 abort();
c2c087e6
DW
3855 }
3856
c2c087e6
DW
3857 get_dev_size(fd, NULL, &size);
3858 size /= 512;
1f24f035 3859 serialcpy(dd->disk.serial, dd->serial);
b9f594fe 3860 dd->disk.total_blocks = __cpu_to_le32(size);
ee5aad5a 3861 dd->disk.status = SPARE_DISK;
c2c087e6 3862 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
b9f594fe 3863 dd->disk.scsi_id = __cpu_to_le32(id);
c2c087e6 3864 else
b9f594fe 3865 dd->disk.scsi_id = __cpu_to_le32(0);
43dad3d6
DW
3866
3867 if (st->update_tail) {
1a64be56
LM
3868 dd->next = super->disk_mgmt_list;
3869 super->disk_mgmt_list = dd;
43dad3d6
DW
3870 } else {
3871 dd->next = super->disks;
3872 super->disks = dd;
ceaf0ee1 3873 super->updates_pending++;
43dad3d6 3874 }
f20c3968
DW
3875
3876 return 0;
cdddbdbc
DW
3877}
3878
1a64be56
LM
3879
3880static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
3881{
3882 struct intel_super *super = st->sb;
3883 struct dl *dd;
3884
3885 /* remove from super works only in mdmon - for communication
3886 * manager - monitor. Check if communication memory buffer
3887 * is prepared.
3888 */
3889 if (!st->update_tail) {
3890 fprintf(stderr,
3891 Name ": %s shall be used in mdmon context only"
3892 "(line %d).\n", __func__, __LINE__);
3893 return 1;
3894 }
3895 dd = malloc(sizeof(*dd));
3896 if (!dd) {
3897 fprintf(stderr,
3898 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
3899 return 1;
3900 }
3901 memset(dd, 0, sizeof(*dd));
3902 dd->major = dk->major;
3903 dd->minor = dk->minor;
3904 dd->index = -1;
3905 dd->fd = -1;
3906 dd->disk.status = SPARE_DISK;
3907 dd->action = DISK_REMOVE;
3908
3909 dd->next = super->disk_mgmt_list;
3910 super->disk_mgmt_list = dd;
3911
3912
3913 return 0;
3914}
3915
f796af5d
DW
3916static int store_imsm_mpb(int fd, struct imsm_super *mpb);
3917
3918static union {
3919 char buf[512];
3920 struct imsm_super anchor;
3921} spare_record __attribute__ ((aligned(512)));
c2c087e6 3922
d23fe947
DW
3923/* spare records have their own family number and do not have any defined raid
3924 * devices
3925 */
3926static int write_super_imsm_spares(struct intel_super *super, int doclose)
3927{
d23fe947 3928 struct imsm_super *mpb = super->anchor;
f796af5d 3929 struct imsm_super *spare = &spare_record.anchor;
d23fe947
DW
3930 __u32 sum;
3931 struct dl *d;
3932
f796af5d
DW
3933 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
3934 spare->generation_num = __cpu_to_le32(1UL),
3935 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
3936 spare->num_disks = 1,
3937 spare->num_raid_devs = 0,
3938 spare->cache_size = mpb->cache_size,
3939 spare->pwr_cycle_count = __cpu_to_le32(1),
3940
3941 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
3942 MPB_SIGNATURE MPB_VERSION_RAID0);
d23fe947
DW
3943
3944 for (d = super->disks; d; d = d->next) {
8796fdc4 3945 if (d->index != -1)
d23fe947
DW
3946 continue;
3947
f796af5d
DW
3948 spare->disk[0] = d->disk;
3949 sum = __gen_imsm_checksum(spare);
3950 spare->family_num = __cpu_to_le32(sum);
3951 spare->orig_family_num = 0;
3952 sum = __gen_imsm_checksum(spare);
3953 spare->check_sum = __cpu_to_le32(sum);
d23fe947 3954
f796af5d 3955 if (store_imsm_mpb(d->fd, spare)) {
d23fe947
DW
3956 fprintf(stderr, "%s: failed for device %d:%d %s\n",
3957 __func__, d->major, d->minor, strerror(errno));
e74255d9 3958 return 1;
d23fe947
DW
3959 }
3960 if (doclose) {
3961 close(d->fd);
3962 d->fd = -1;
3963 }
3964 }
3965
e74255d9 3966 return 0;
d23fe947
DW
3967}
3968
36988a3d 3969static int write_super_imsm(struct supertype *st, int doclose)
cdddbdbc 3970{
36988a3d 3971 struct intel_super *super = st->sb;
949c47a0 3972 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
3973 struct dl *d;
3974 __u32 generation;
3975 __u32 sum;
d23fe947 3976 int spares = 0;
949c47a0 3977 int i;
a48ac0a8 3978 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
36988a3d 3979 int num_disks = 0;
cdddbdbc 3980
c2c087e6
DW
3981 /* 'generation' is incremented everytime the metadata is written */
3982 generation = __le32_to_cpu(mpb->generation_num);
3983 generation++;
3984 mpb->generation_num = __cpu_to_le32(generation);
3985
148acb7b
DW
3986 /* fix up cases where previous mdadm releases failed to set
3987 * orig_family_num
3988 */
3989 if (mpb->orig_family_num == 0)
3990 mpb->orig_family_num = mpb->family_num;
3991
d23fe947 3992 for (d = super->disks; d; d = d->next) {
8796fdc4 3993 if (d->index == -1)
d23fe947 3994 spares++;
36988a3d 3995 else {
d23fe947 3996 mpb->disk[d->index] = d->disk;
36988a3d
AK
3997 num_disks++;
3998 }
d23fe947 3999 }
36988a3d 4000 for (d = super->missing; d; d = d->next) {
47ee5a45 4001 mpb->disk[d->index] = d->disk;
36988a3d
AK
4002 num_disks++;
4003 }
4004 mpb->num_disks = num_disks;
4005 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
b9f594fe 4006
949c47a0
DW
4007 for (i = 0; i < mpb->num_raid_devs; i++) {
4008 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
36988a3d
AK
4009 struct imsm_dev *dev2 = get_imsm_dev(super, i);
4010 if (dev && dev2) {
4011 imsm_copy_dev(dev, dev2);
4012 mpb_size += sizeof_imsm_dev(dev, 0);
4013 }
949c47a0 4014 }
a48ac0a8
DW
4015 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
4016 mpb->mpb_size = __cpu_to_le32(mpb_size);
949c47a0 4017
c2c087e6 4018 /* recalculate checksum */
949c47a0 4019 sum = __gen_imsm_checksum(mpb);
c2c087e6
DW
4020 mpb->check_sum = __cpu_to_le32(sum);
4021
d23fe947 4022 /* write the mpb for disks that compose raid devices */
c2c087e6 4023 for (d = super->disks; d ; d = d->next) {
d23fe947
DW
4024 if (d->index < 0)
4025 continue;
f796af5d 4026 if (store_imsm_mpb(d->fd, mpb))
c2c087e6
DW
4027 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4028 __func__, d->major, d->minor, strerror(errno));
c2c087e6
DW
4029 if (doclose) {
4030 close(d->fd);
4031 d->fd = -1;
4032 }
4033 }
4034
d23fe947
DW
4035 if (spares)
4036 return write_super_imsm_spares(super, doclose);
4037
e74255d9 4038 return 0;
c2c087e6
DW
4039}
4040
0e600426 4041
9b1fb677 4042static int create_array(struct supertype *st, int dev_idx)
43dad3d6
DW
4043{
4044 size_t len;
4045 struct imsm_update_create_array *u;
4046 struct intel_super *super = st->sb;
9b1fb677 4047 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
54c2c1ea
DW
4048 struct imsm_map *map = get_imsm_map(dev, 0);
4049 struct disk_info *inf;
4050 struct imsm_disk *disk;
4051 int i;
43dad3d6 4052
54c2c1ea
DW
4053 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
4054 sizeof(*inf) * map->num_members;
43dad3d6
DW
4055 u = malloc(len);
4056 if (!u) {
4057 fprintf(stderr, "%s: failed to allocate update buffer\n",
4058 __func__);
4059 return 1;
4060 }
4061
4062 u->type = update_create_array;
9b1fb677 4063 u->dev_idx = dev_idx;
43dad3d6 4064 imsm_copy_dev(&u->dev, dev);
54c2c1ea
DW
4065 inf = get_disk_info(u);
4066 for (i = 0; i < map->num_members; i++) {
98130f40 4067 int idx = get_imsm_disk_idx(dev, i, -1);
9b1fb677 4068
54c2c1ea
DW
4069 disk = get_imsm_disk(super, idx);
4070 serialcpy(inf[i].serial, disk->serial);
4071 }
43dad3d6
DW
4072 append_metadata_update(st, u, len);
4073
4074 return 0;
4075}
4076
1a64be56 4077static int mgmt_disk(struct supertype *st)
43dad3d6
DW
4078{
4079 struct intel_super *super = st->sb;
4080 size_t len;
1a64be56 4081 struct imsm_update_add_remove_disk *u;
43dad3d6 4082
1a64be56 4083 if (!super->disk_mgmt_list)
43dad3d6
DW
4084 return 0;
4085
4086 len = sizeof(*u);
4087 u = malloc(len);
4088 if (!u) {
4089 fprintf(stderr, "%s: failed to allocate update buffer\n",
4090 __func__);
4091 return 1;
4092 }
4093
1a64be56 4094 u->type = update_add_remove_disk;
43dad3d6
DW
4095 append_metadata_update(st, u, len);
4096
4097 return 0;
4098}
4099
c2c087e6
DW
4100static int write_init_super_imsm(struct supertype *st)
4101{
9b1fb677
DW
4102 struct intel_super *super = st->sb;
4103 int current_vol = super->current_vol;
4104
4105 /* we are done with current_vol reset it to point st at the container */
4106 super->current_vol = -1;
4107
8273f55e 4108 if (st->update_tail) {
43dad3d6
DW
4109 /* queue the recently created array / added disk
4110 * as a metadata update */
43dad3d6 4111 int rv;
8273f55e 4112
43dad3d6 4113 /* determine if we are creating a volume or adding a disk */
9b1fb677 4114 if (current_vol < 0) {
1a64be56
LM
4115 /* in the mgmt (add/remove) disk case we are running
4116 * in mdmon context, so don't close fd's
43dad3d6 4117 */
1a64be56 4118 return mgmt_disk(st);
43dad3d6 4119 } else
9b1fb677 4120 rv = create_array(st, current_vol);
8273f55e 4121
43dad3d6 4122 return rv;
d682f344
N
4123 } else {
4124 struct dl *d;
4125 for (d = super->disks; d; d = d->next)
4126 Kill(d->devname, NULL, 0, 1, 1);
36988a3d 4127 return write_super_imsm(st, 1);
d682f344 4128 }
cdddbdbc 4129}
0e600426 4130#endif
cdddbdbc 4131
e683ca88 4132static int store_super_imsm(struct supertype *st, int fd)
cdddbdbc 4133{
e683ca88
DW
4134 struct intel_super *super = st->sb;
4135 struct imsm_super *mpb = super ? super->anchor : NULL;
551c80c1 4136
e683ca88 4137 if (!mpb)
ad97895e
DW
4138 return 1;
4139
1799c9e8 4140#ifndef MDASSEMBLE
e683ca88 4141 return store_imsm_mpb(fd, mpb);
1799c9e8
N
4142#else
4143 return 1;
4144#endif
cdddbdbc
DW
4145}
4146
0e600426
N
4147static int imsm_bbm_log_size(struct imsm_super *mpb)
4148{
4149 return __le32_to_cpu(mpb->bbm_log_size);
4150}
4151
4152#ifndef MDASSEMBLE
cdddbdbc
DW
4153static int validate_geometry_imsm_container(struct supertype *st, int level,
4154 int layout, int raiddisks, int chunk,
c2c087e6 4155 unsigned long long size, char *dev,
2c514b71
NB
4156 unsigned long long *freesize,
4157 int verbose)
cdddbdbc 4158{
c2c087e6
DW
4159 int fd;
4160 unsigned long long ldsize;
f2f5c343
LM
4161 struct intel_super *super=NULL;
4162 int rv = 0;
cdddbdbc 4163
c2c087e6
DW
4164 if (level != LEVEL_CONTAINER)
4165 return 0;
4166 if (!dev)
4167 return 1;
4168
4169 fd = open(dev, O_RDONLY|O_EXCL, 0);
4170 if (fd < 0) {
2c514b71
NB
4171 if (verbose)
4172 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4173 dev, strerror(errno));
c2c087e6
DW
4174 return 0;
4175 }
4176 if (!get_dev_size(fd, dev, &ldsize)) {
4177 close(fd);
4178 return 0;
4179 }
f2f5c343
LM
4180
4181 /* capabilities retrieve could be possible
4182 * note that there is no fd for the disks in array.
4183 */
4184 super = alloc_super();
4185 if (!super) {
4186 fprintf(stderr,
4187 Name ": malloc of %zu failed.\n",
4188 sizeof(*super));
4189 close(fd);
4190 return 0;
4191 }
4192
d424212e 4193 rv = find_intel_hba_capability(fd, super, verbose ? dev : NULL);
f2f5c343
LM
4194 if (rv != 0) {
4195#if DEBUG
4196 char str[256];
4197 fd2devname(fd, str);
4198 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4199 fd, str, super->orom, rv, raiddisks);
4200#endif
4201 /* no orom/efi or non-intel hba of the disk */
4202 close(fd);
4203 free_imsm(super);
4204 return 0;
4205 }
c2c087e6 4206 close(fd);
f2f5c343
LM
4207 if (super->orom && raiddisks > super->orom->tds) {
4208 if (verbose)
4209 fprintf(stderr, Name ": %d exceeds maximum number of"
4210 " platform supported disks: %d\n",
4211 raiddisks, super->orom->tds);
4212
4213 free_imsm(super);
4214 return 0;
4215 }
c2c087e6
DW
4216
4217 *freesize = avail_size_imsm(st, ldsize >> 9);
f2f5c343 4218 free_imsm(super);
c2c087e6
DW
4219
4220 return 1;
cdddbdbc
DW
4221}
4222
0dcecb2e
DW
4223static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4224{
4225 const unsigned long long base_start = e[*idx].start;
4226 unsigned long long end = base_start + e[*idx].size;
4227 int i;
4228
4229 if (base_start == end)
4230 return 0;
4231
4232 *idx = *idx + 1;
4233 for (i = *idx; i < num_extents; i++) {
4234 /* extend overlapping extents */
4235 if (e[i].start >= base_start &&
4236 e[i].start <= end) {
4237 if (e[i].size == 0)
4238 return 0;
4239 if (e[i].start + e[i].size > end)
4240 end = e[i].start + e[i].size;
4241 } else if (e[i].start > end) {
4242 *idx = i;
4243 break;
4244 }
4245 }
4246
4247 return end - base_start;
4248}
4249
4250static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4251{
4252 /* build a composite disk with all known extents and generate a new
4253 * 'maxsize' given the "all disks in an array must share a common start
4254 * offset" constraint
4255 */
4256 struct extent *e = calloc(sum_extents, sizeof(*e));
4257 struct dl *dl;
4258 int i, j;
4259 int start_extent;
4260 unsigned long long pos;
b9d77223 4261 unsigned long long start = 0;
0dcecb2e
DW
4262 unsigned long long maxsize;
4263 unsigned long reserve;
4264
4265 if (!e)
a7dd165b 4266 return 0;
0dcecb2e
DW
4267
4268 /* coalesce and sort all extents. also, check to see if we need to
4269 * reserve space between member arrays
4270 */
4271 j = 0;
4272 for (dl = super->disks; dl; dl = dl->next) {
4273 if (!dl->e)
4274 continue;
4275 for (i = 0; i < dl->extent_cnt; i++)
4276 e[j++] = dl->e[i];
4277 }
4278 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4279
4280 /* merge extents */
4281 i = 0;
4282 j = 0;
4283 while (i < sum_extents) {
4284 e[j].start = e[i].start;
4285 e[j].size = find_size(e, &i, sum_extents);
4286 j++;
4287 if (e[j-1].size == 0)
4288 break;
4289 }
4290
4291 pos = 0;
4292 maxsize = 0;
4293 start_extent = 0;
4294 i = 0;
4295 do {
4296 unsigned long long esize;
4297
4298 esize = e[i].start - pos;
4299 if (esize >= maxsize) {
4300 maxsize = esize;
4301 start = pos;
4302 start_extent = i;
4303 }
4304 pos = e[i].start + e[i].size;
4305 i++;
4306 } while (e[i-1].size);
4307 free(e);
4308
a7dd165b
DW
4309 if (maxsize == 0)
4310 return 0;
4311
4312 /* FIXME assumes volume at offset 0 is the first volume in a
4313 * container
4314 */
0dcecb2e
DW
4315 if (start_extent > 0)
4316 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4317 else
4318 reserve = 0;
4319
4320 if (maxsize < reserve)
a7dd165b 4321 return 0;
0dcecb2e
DW
4322
4323 super->create_offset = ~((__u32) 0);
4324 if (start + reserve > super->create_offset)
a7dd165b 4325 return 0; /* start overflows create_offset */
0dcecb2e
DW
4326 super->create_offset = start + reserve;
4327
4328 return maxsize - reserve;
4329}
4330
88c32bb1
DW
4331static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4332{
4333 if (level < 0 || level == 6 || level == 4)
4334 return 0;
4335
4336 /* if we have an orom prevent invalid raid levels */
4337 if (orom)
4338 switch (level) {
4339 case 0: return imsm_orom_has_raid0(orom);
4340 case 1:
4341 if (raiddisks > 2)
4342 return imsm_orom_has_raid1e(orom);
1c556e92
DW
4343 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4344 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4345 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
88c32bb1
DW
4346 }
4347 else
4348 return 1; /* not on an Intel RAID platform so anything goes */
4349
4350 return 0;
4351}
4352
73408129 4353
35f81cbb 4354#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
73408129
LM
4355/*
4356 * validate volume parameters with OROM/EFI capabilities
4357 */
6592ce37
DW
4358static int
4359validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
c21e737b 4360 int raiddisks, int *chunk, int verbose)
6592ce37 4361{
73408129
LM
4362#if DEBUG
4363 verbose = 1;
4364#endif
4365 /* validate container capabilities */
4366 if (super->orom && raiddisks > super->orom->tds) {
4367 if (verbose)
4368 fprintf(stderr, Name ": %d exceeds maximum number of"
4369 " platform supported disks: %d\n",
4370 raiddisks, super->orom->tds);
4371 return 0;
4372 }
4373
4374 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4375 if (super->orom && (!is_raid_level_supported(super->orom, level,
4376 raiddisks))) {
6592ce37
DW
4377 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4378 level, raiddisks, raiddisks > 1 ? "s" : "");
4379 return 0;
4380 }
c21e737b
CA
4381 if (super->orom && level != 1) {
4382 if (chunk && (*chunk == 0 || *chunk == UnSet))
4383 *chunk = imsm_orom_default_chunk(super->orom);
4384 else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
4385 pr_vrb(": platform does not support a chunk size of: "
4386 "%d\n", *chunk);
4387 return 0;
4388 }
6592ce37
DW
4389 }
4390 if (layout != imsm_level_to_layout(level)) {
4391 if (level == 5)
4392 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4393 else if (level == 10)
4394 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4395 else
4396 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4397 layout, level);
4398 return 0;
4399 }
6592ce37
DW
4400 return 1;
4401}
4402
c2c087e6
DW
4403/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4404 * FIX ME add ahci details
4405 */
8b353278 4406static int validate_geometry_imsm_volume(struct supertype *st, int level,
c21e737b 4407 int layout, int raiddisks, int *chunk,
c2c087e6 4408 unsigned long long size, char *dev,
2c514b71
NB
4409 unsigned long long *freesize,
4410 int verbose)
cdddbdbc 4411{
c2c087e6
DW
4412 struct stat stb;
4413 struct intel_super *super = st->sb;
a20d2ba5 4414 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
4415 struct dl *dl;
4416 unsigned long long pos = 0;
4417 unsigned long long maxsize;
4418 struct extent *e;
4419 int i;
cdddbdbc 4420
88c32bb1
DW
4421 /* We must have the container info already read in. */
4422 if (!super)
c2c087e6
DW
4423 return 0;
4424
d54559f0
LM
4425 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
4426 fprintf(stderr, Name ": RAID gemetry validation failed. "
4427 "Cannot proceed with the action(s).\n");
c2c087e6 4428 return 0;
d54559f0 4429 }
c2c087e6
DW
4430 if (!dev) {
4431 /* General test: make sure there is space for
2da8544a
DW
4432 * 'raiddisks' device extents of size 'size' at a given
4433 * offset
c2c087e6 4434 */
e46273eb 4435 unsigned long long minsize = size;
b7528a20 4436 unsigned long long start_offset = MaxSector;
c2c087e6
DW
4437 int dcnt = 0;
4438 if (minsize == 0)
4439 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4440 for (dl = super->disks; dl ; dl = dl->next) {
4441 int found = 0;
4442
bf5a934a 4443 pos = 0;
c2c087e6
DW
4444 i = 0;
4445 e = get_extents(super, dl);
4446 if (!e) continue;
4447 do {
4448 unsigned long long esize;
4449 esize = e[i].start - pos;
4450 if (esize >= minsize)
4451 found = 1;
b7528a20 4452 if (found && start_offset == MaxSector) {
2da8544a
DW
4453 start_offset = pos;
4454 break;
4455 } else if (found && pos != start_offset) {
4456 found = 0;
4457 break;
4458 }
c2c087e6
DW
4459 pos = e[i].start + e[i].size;
4460 i++;
4461 } while (e[i-1].size);
4462 if (found)
4463 dcnt++;
4464 free(e);
4465 }
4466 if (dcnt < raiddisks) {
2c514b71
NB
4467 if (verbose)
4468 fprintf(stderr, Name ": imsm: Not enough "
4469 "devices with space for this array "
4470 "(%d < %d)\n",
4471 dcnt, raiddisks);
c2c087e6
DW
4472 return 0;
4473 }
4474 return 1;
4475 }
0dcecb2e 4476
c2c087e6
DW
4477 /* This device must be a member of the set */
4478 if (stat(dev, &stb) < 0)
4479 return 0;
4480 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4481 return 0;
4482 for (dl = super->disks ; dl ; dl = dl->next) {
f21e18ca
N
4483 if (dl->major == (int)major(stb.st_rdev) &&
4484 dl->minor == (int)minor(stb.st_rdev))
c2c087e6
DW
4485 break;
4486 }
4487 if (!dl) {
2c514b71
NB
4488 if (verbose)
4489 fprintf(stderr, Name ": %s is not in the "
4490 "same imsm set\n", dev);
c2c087e6 4491 return 0;
a20d2ba5
DW
4492 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4493 /* If a volume is present then the current creation attempt
4494 * cannot incorporate new spares because the orom may not
4495 * understand this configuration (all member disks must be
4496 * members of each array in the container).
4497 */
4498 fprintf(stderr, Name ": %s is a spare and a volume"
4499 " is already defined for this container\n", dev);
4500 fprintf(stderr, Name ": The option-rom requires all member"
4501 " disks to be a member of all volumes\n");
4502 return 0;
c2c087e6 4503 }
0dcecb2e
DW
4504
4505 /* retrieve the largest free space block */
c2c087e6
DW
4506 e = get_extents(super, dl);
4507 maxsize = 0;
4508 i = 0;
0dcecb2e
DW
4509 if (e) {
4510 do {
4511 unsigned long long esize;
4512
4513 esize = e[i].start - pos;
4514 if (esize >= maxsize)
4515 maxsize = esize;
4516 pos = e[i].start + e[i].size;
4517 i++;
4518 } while (e[i-1].size);
4519 dl->e = e;
4520 dl->extent_cnt = i;
4521 } else {
4522 if (verbose)
4523 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4524 dev);
4525 return 0;
4526 }
4527 if (maxsize < size) {
4528 if (verbose)
4529 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4530 dev, maxsize, size);
4531 return 0;
4532 }
4533
4534 /* count total number of extents for merge */
4535 i = 0;
4536 for (dl = super->disks; dl; dl = dl->next)
4537 if (dl->e)
4538 i += dl->extent_cnt;
4539
4540 maxsize = merge_extents(super, i);
a7dd165b 4541 if (maxsize < size || maxsize == 0) {
0dcecb2e
DW
4542 if (verbose)
4543 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4544 maxsize, size);
4545 return 0;
0dcecb2e
DW
4546 }
4547
c2c087e6
DW
4548 *freesize = maxsize;
4549
4550 return 1;
cdddbdbc
DW
4551}
4552
efb30e7f
DW
4553static int reserve_space(struct supertype *st, int raiddisks,
4554 unsigned long long size, int chunk,
4555 unsigned long long *freesize)
4556{
4557 struct intel_super *super = st->sb;
4558 struct imsm_super *mpb = super->anchor;
4559 struct dl *dl;
4560 int i;
4561 int extent_cnt;
4562 struct extent *e;
4563 unsigned long long maxsize;
4564 unsigned long long minsize;
4565 int cnt;
4566 int used;
4567
4568 /* find the largest common start free region of the possible disks */
4569 used = 0;
4570 extent_cnt = 0;
4571 cnt = 0;
4572 for (dl = super->disks; dl; dl = dl->next) {
4573 dl->raiddisk = -1;
4574
4575 if (dl->index >= 0)
4576 used++;
4577
4578 /* don't activate new spares if we are orom constrained
4579 * and there is already a volume active in the container
4580 */
4581 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
4582 continue;
4583
4584 e = get_extents(super, dl);
4585 if (!e)
4586 continue;
4587 for (i = 1; e[i-1].size; i++)
4588 ;
4589 dl->e = e;
4590 dl->extent_cnt = i;
4591 extent_cnt += i;
4592 cnt++;
4593 }
4594
4595 maxsize = merge_extents(super, extent_cnt);
4596 minsize = size;
4597 if (size == 0)
612e59d8
CA
4598 /* chunk is in K */
4599 minsize = chunk * 2;
efb30e7f
DW
4600
4601 if (cnt < raiddisks ||
4602 (super->orom && used && used != raiddisks) ||
a7dd165b
DW
4603 maxsize < minsize ||
4604 maxsize == 0) {
efb30e7f
DW
4605 fprintf(stderr, Name ": not enough devices with space to create array.\n");
4606 return 0; /* No enough free spaces large enough */
4607 }
4608
4609 if (size == 0) {
4610 size = maxsize;
4611 if (chunk) {
612e59d8
CA
4612 size /= 2 * chunk;
4613 size *= 2 * chunk;
efb30e7f
DW
4614 }
4615 }
4616
4617 cnt = 0;
4618 for (dl = super->disks; dl; dl = dl->next)
4619 if (dl->e)
4620 dl->raiddisk = cnt++;
4621
4622 *freesize = size;
4623
4624 return 1;
4625}
4626
bf5a934a 4627static int validate_geometry_imsm(struct supertype *st, int level, int layout,
c21e737b 4628 int raiddisks, int *chunk, unsigned long long size,
bf5a934a
DW
4629 char *dev, unsigned long long *freesize,
4630 int verbose)
4631{
4632 int fd, cfd;
4633 struct mdinfo *sra;
20cbe8d2 4634 int is_member = 0;
bf5a934a 4635
d54559f0
LM
4636 /* load capability
4637 * if given unused devices create a container
bf5a934a
DW
4638 * if given given devices in a container create a member volume
4639 */
4640 if (level == LEVEL_CONTAINER) {
4641 /* Must be a fresh device to add to a container */
4642 return validate_geometry_imsm_container(st, level, layout,
c21e737b
CA
4643 raiddisks,
4644 chunk?*chunk:0, size,
bf5a934a
DW
4645 dev, freesize,
4646 verbose);
4647 }
4648
8592f29d
N
4649 if (!dev) {
4650 if (st->sb && freesize) {
efb30e7f
DW
4651 /* we are being asked to automatically layout a
4652 * new volume based on the current contents of
4653 * the container. If the the parameters can be
4654 * satisfied reserve_space will record the disks,
4655 * start offset, and size of the volume to be
4656 * created. add_to_super and getinfo_super
4657 * detect when autolayout is in progress.
4658 */
6592ce37
DW
4659 if (!validate_geometry_imsm_orom(st->sb, level, layout,
4660 raiddisks, chunk,
4661 verbose))
4662 return 0;
c21e737b
CA
4663 return reserve_space(st, raiddisks, size,
4664 chunk?*chunk:0, freesize);
8592f29d
N
4665 }
4666 return 1;
4667 }
bf5a934a
DW
4668 if (st->sb) {
4669 /* creating in a given container */
4670 return validate_geometry_imsm_volume(st, level, layout,
4671 raiddisks, chunk, size,
4672 dev, freesize, verbose);
4673 }
4674
bf5a934a
DW
4675 /* This device needs to be a device in an 'imsm' container */
4676 fd = open(dev, O_RDONLY|O_EXCL, 0);
4677 if (fd >= 0) {
4678 if (verbose)
4679 fprintf(stderr,
4680 Name ": Cannot create this array on device %s\n",
4681 dev);
4682 close(fd);
4683 return 0;
4684 }
4685 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
4686 if (verbose)
4687 fprintf(stderr, Name ": Cannot open %s: %s\n",
4688 dev, strerror(errno));
4689 return 0;
4690 }
4691 /* Well, it is in use by someone, maybe an 'imsm' container. */
4692 cfd = open_container(fd);
20cbe8d2 4693 close(fd);
bf5a934a 4694 if (cfd < 0) {
bf5a934a
DW
4695 if (verbose)
4696 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
4697 dev);
4698 return 0;
4699 }
4700 sra = sysfs_read(cfd, 0, GET_VERSION);
bf5a934a 4701 if (sra && sra->array.major_version == -1 &&
20cbe8d2
AW
4702 strcmp(sra->text_version, "imsm") == 0)
4703 is_member = 1;
4704 sysfs_free(sra);
4705 if (is_member) {
bf5a934a
DW
4706 /* This is a member of a imsm container. Load the container
4707 * and try to create a volume
4708 */
4709 struct intel_super *super;
4710
e1902a7b 4711 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
bf5a934a
DW
4712 st->sb = super;
4713 st->container_dev = fd2devnum(cfd);
4714 close(cfd);
4715 return validate_geometry_imsm_volume(st, level, layout,
4716 raiddisks, chunk,
4717 size, dev,
4718 freesize, verbose);
4719 }
20cbe8d2 4720 }
bf5a934a 4721
20cbe8d2
AW
4722 if (verbose)
4723 fprintf(stderr, Name ": failed container membership check\n");
4724
4725 close(cfd);
4726 return 0;
bf5a934a 4727}
0bd16cf2 4728
30f58b22 4729static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
0bd16cf2
DJ
4730{
4731 struct intel_super *super = st->sb;
4732
30f58b22
DW
4733 if (level && *level == UnSet)
4734 *level = LEVEL_CONTAINER;
4735
4736 if (level && layout && *layout == UnSet)
4737 *layout = imsm_level_to_layout(*level);
0bd16cf2 4738
1d54f286
N
4739 if (chunk && (*chunk == UnSet || *chunk == 0) &&
4740 super && super->orom)
30f58b22 4741 *chunk = imsm_orom_default_chunk(super->orom);
0bd16cf2
DJ
4742}
4743
33414a01
DW
4744static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
4745
4746static int kill_subarray_imsm(struct supertype *st)
4747{
4748 /* remove the subarray currently referenced by ->current_vol */
4749 __u8 i;
4750 struct intel_dev **dp;
4751 struct intel_super *super = st->sb;
4752 __u8 current_vol = super->current_vol;
4753 struct imsm_super *mpb = super->anchor;
4754
4755 if (super->current_vol < 0)
4756 return 2;
4757 super->current_vol = -1; /* invalidate subarray cursor */
4758
4759 /* block deletions that would change the uuid of active subarrays
4760 *
4761 * FIXME when immutable ids are available, but note that we'll
4762 * also need to fixup the invalidated/active subarray indexes in
4763 * mdstat
4764 */
4765 for (i = 0; i < mpb->num_raid_devs; i++) {
4766 char subarray[4];
4767
4768 if (i < current_vol)
4769 continue;
4770 sprintf(subarray, "%u", i);
4771 if (is_subarray_active(subarray, st->devname)) {
4772 fprintf(stderr,
4773 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
4774 current_vol, i);
4775
4776 return 2;
4777 }
4778 }
4779
4780 if (st->update_tail) {
4781 struct imsm_update_kill_array *u = malloc(sizeof(*u));
4782
4783 if (!u)
4784 return 2;
4785 u->type = update_kill_array;
4786 u->dev_idx = current_vol;
4787 append_metadata_update(st, u, sizeof(*u));
4788
4789 return 0;
4790 }
4791
4792 for (dp = &super->devlist; *dp;)
4793 if ((*dp)->index == current_vol) {
4794 *dp = (*dp)->next;
4795 } else {
4796 handle_missing(super, (*dp)->dev);
4797 if ((*dp)->index > current_vol)
4798 (*dp)->index--;
4799 dp = &(*dp)->next;
4800 }
4801
4802 /* no more raid devices, all active components are now spares,
4803 * but of course failed are still failed
4804 */
4805 if (--mpb->num_raid_devs == 0) {
4806 struct dl *d;
4807
4808 for (d = super->disks; d; d = d->next)
4809 if (d->index > -2) {
4810 d->index = -1;
4811 d->disk.status = SPARE_DISK;
4812 }
4813 }
4814
4815 super->updates_pending++;
4816
4817 return 0;
4818}
aa534678 4819
a951a4f7 4820static int update_subarray_imsm(struct supertype *st, char *subarray,
fa56eddb 4821 char *update, struct mddev_ident *ident)
aa534678
DW
4822{
4823 /* update the subarray currently referenced by ->current_vol */
4824 struct intel_super *super = st->sb;
4825 struct imsm_super *mpb = super->anchor;
4826
aa534678
DW
4827 if (strcmp(update, "name") == 0) {
4828 char *name = ident->name;
a951a4f7
N
4829 char *ep;
4830 int vol;
aa534678 4831
a951a4f7 4832 if (is_subarray_active(subarray, st->devname)) {
aa534678
DW
4833 fprintf(stderr,
4834 Name ": Unable to update name of active subarray\n");
4835 return 2;
4836 }
4837
4838 if (!check_name(super, name, 0))
4839 return 2;
4840
a951a4f7
N
4841 vol = strtoul(subarray, &ep, 10);
4842 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
4843 return 2;
4844
aa534678
DW
4845 if (st->update_tail) {
4846 struct imsm_update_rename_array *u = malloc(sizeof(*u));
4847
4848 if (!u)
4849 return 2;
4850 u->type = update_rename_array;
a951a4f7 4851 u->dev_idx = vol;
aa534678
DW
4852 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
4853 append_metadata_update(st, u, sizeof(*u));
4854 } else {
4855 struct imsm_dev *dev;
4856 int i;
4857
a951a4f7 4858 dev = get_imsm_dev(super, vol);
aa534678
DW
4859 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
4860 for (i = 0; i < mpb->num_raid_devs; i++) {
4861 dev = get_imsm_dev(super, i);
4862 handle_missing(super, dev);
4863 }
4864 super->updates_pending++;
4865 }
4866 } else
4867 return 2;
4868
4869 return 0;
4870}
bf5a934a 4871
28bce06f
AK
4872static int is_gen_migration(struct imsm_dev *dev)
4873{
4874 if (!dev->vol.migr_state)
4875 return 0;
4876
4877 if (migr_type(dev) == MIGR_GEN_MIGR)
4878 return 1;
4879
4880 return 0;
4881}
71204a50 4882#endif /* MDASSEMBLE */
28bce06f 4883
1e5c6983
DW
4884static int is_rebuilding(struct imsm_dev *dev)
4885{
4886 struct imsm_map *migr_map;
4887
4888 if (!dev->vol.migr_state)
4889 return 0;
4890
4891 if (migr_type(dev) != MIGR_REBUILD)
4892 return 0;
4893
4894 migr_map = get_imsm_map(dev, 1);
4895
4896 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
4897 return 1;
4898 else
4899 return 0;
4900}
4901
4902static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
4903{
4904 struct mdinfo *rebuild = NULL;
4905 struct mdinfo *d;
4906 __u32 units;
4907
4908 if (!is_rebuilding(dev))
4909 return;
4910
4911 /* Find the rebuild target, but punt on the dual rebuild case */
4912 for (d = array->devs; d; d = d->next)
4913 if (d->recovery_start == 0) {
4914 if (rebuild)
4915 return;
4916 rebuild = d;
4917 }
4918
4363fd80
DW
4919 if (!rebuild) {
4920 /* (?) none of the disks are marked with
4921 * IMSM_ORD_REBUILD, so assume they are missing and the
4922 * disk_ord_tbl was not correctly updated
4923 */
4924 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
4925 return;
4926 }
4927
1e5c6983
DW
4928 units = __le32_to_cpu(dev->vol.curr_migr_unit);
4929 rebuild->recovery_start = units * blocks_per_migr_unit(dev);
4930}
4931
4932
00bbdbda 4933static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
cdddbdbc 4934{
4f5bc454
DW
4935 /* Given a container loaded by load_super_imsm_all,
4936 * extract information about all the arrays into
4937 * an mdinfo tree.
00bbdbda 4938 * If 'subarray' is given, just extract info about that array.
4f5bc454
DW
4939 *
4940 * For each imsm_dev create an mdinfo, fill it in,
4941 * then look for matching devices in super->disks
4942 * and create appropriate device mdinfo.
4943 */
4944 struct intel_super *super = st->sb;
949c47a0 4945 struct imsm_super *mpb = super->anchor;
4f5bc454 4946 struct mdinfo *rest = NULL;
00bbdbda 4947 unsigned int i;
a06d022d 4948 int bbm_errors = 0;
abef11a3
AK
4949 struct dl *d;
4950 int spare_disks = 0;
cdddbdbc 4951
a06d022d
KW
4952 /* check for bad blocks */
4953 if (imsm_bbm_log_size(super->anchor))
4954 bbm_errors = 1;
604b746f 4955
abef11a3
AK
4956 /* count spare devices, not used in maps
4957 */
4958 for (d = super->disks; d; d = d->next)
4959 if (d->index == -1)
4960 spare_disks++;
4961
4f5bc454 4962 for (i = 0; i < mpb->num_raid_devs; i++) {
00bbdbda
N
4963 struct imsm_dev *dev;
4964 struct imsm_map *map;
86e3692b 4965 struct imsm_map *map2;
4f5bc454 4966 struct mdinfo *this;
2db86302 4967 int slot, chunk;
00bbdbda
N
4968 char *ep;
4969
4970 if (subarray &&
4971 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
4972 continue;
4973
4974 dev = get_imsm_dev(super, i);
4975 map = get_imsm_map(dev, 0);
86e3692b 4976 map2 = get_imsm_map(dev, 1);
4f5bc454 4977
1ce0101c
DW
4978 /* do not publish arrays that are in the middle of an
4979 * unsupported migration
4980 */
4981 if (dev->vol.migr_state &&
28bce06f 4982 (migr_type(dev) == MIGR_STATE_CHANGE)) {
1ce0101c
DW
4983 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
4984 " unsupported migration in progress\n",
4985 dev->volume);
4986 continue;
4987 }
2db86302
LM
4988 /* do not publish arrays that are not support by controller's
4989 * OROM/EFI
4990 */
1ce0101c 4991
2db86302 4992 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
7b0bbd0f 4993#ifndef MDASSEMBLE
2db86302
LM
4994 if (!validate_geometry_imsm_orom(super,
4995 get_imsm_raid_level(map), /* RAID level */
4996 imsm_level_to_layout(get_imsm_raid_level(map)),
4997 map->num_members, /* raid disks */
4998 &chunk,
4999 1 /* verbose */)) {
5000 fprintf(stderr, Name ": RAID gemetry validation failed. "
5001 "Cannot proceed with the action(s).\n");
5002 continue;
5003 }
7b0bbd0f 5004#endif /* MDASSEMBLE */
4f5bc454 5005 this = malloc(sizeof(*this));
0fbd635c 5006 if (!this) {
cf1be220 5007 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
0fbd635c
AW
5008 sizeof(*this));
5009 break;
5010 }
4f5bc454
DW
5011 memset(this, 0, sizeof(*this));
5012 this->next = rest;
4f5bc454 5013
301406c9 5014 super->current_vol = i;
a5d85af7 5015 getinfo_super_imsm_volume(st, this, NULL);
4f5bc454 5016 for (slot = 0 ; slot < map->num_members; slot++) {
1e5c6983 5017 unsigned long long recovery_start;
4f5bc454
DW
5018 struct mdinfo *info_d;
5019 struct dl *d;
5020 int idx;
9a1608e5 5021 int skip;
7eef0453 5022 __u32 ord;
4f5bc454 5023
9a1608e5 5024 skip = 0;
98130f40 5025 idx = get_imsm_disk_idx(dev, slot, 0);
196b0d44 5026 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
4f5bc454
DW
5027 for (d = super->disks; d ; d = d->next)
5028 if (d->index == idx)
0fbd635c 5029 break;
4f5bc454 5030
1e5c6983 5031 recovery_start = MaxSector;
4f5bc454 5032 if (d == NULL)
9a1608e5 5033 skip = 1;
25ed7e59 5034 if (d && is_failed(&d->disk))
9a1608e5 5035 skip = 1;
7eef0453 5036 if (ord & IMSM_ORD_REBUILD)
1e5c6983 5037 recovery_start = 0;
9a1608e5
DW
5038
5039 /*
5040 * if we skip some disks the array will be assmebled degraded;
1e5c6983
DW
5041 * reset resync start to avoid a dirty-degraded
5042 * situation when performing the intial sync
9a1608e5
DW
5043 *
5044 * FIXME handle dirty degraded
5045 */
1e5c6983 5046 if ((skip || recovery_start == 0) && !dev->vol.dirty)
b7528a20 5047 this->resync_start = MaxSector;
9a1608e5
DW
5048 if (skip)
5049 continue;
4f5bc454 5050
1e5c6983 5051 info_d = calloc(1, sizeof(*info_d));
9a1608e5
DW
5052 if (!info_d) {
5053 fprintf(stderr, Name ": failed to allocate disk"
1ce0101c 5054 " for volume %.16s\n", dev->volume);
1e5c6983
DW
5055 info_d = this->devs;
5056 while (info_d) {
5057 struct mdinfo *d = info_d->next;
5058
5059 free(info_d);
5060 info_d = d;
5061 }
9a1608e5
DW
5062 free(this);
5063 this = rest;
5064 break;
5065 }
4f5bc454
DW
5066 info_d->next = this->devs;
5067 this->devs = info_d;
5068
4f5bc454
DW
5069 info_d->disk.number = d->index;
5070 info_d->disk.major = d->major;
5071 info_d->disk.minor = d->minor;
5072 info_d->disk.raid_disk = slot;
1e5c6983 5073 info_d->recovery_start = recovery_start;
86e3692b
AK
5074 if (map2) {
5075 if (slot < map2->num_members)
5076 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
5077 else
5078 this->array.spare_disks++;
86e3692b
AK
5079 } else {
5080 if (slot < map->num_members)
5081 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
5082 else
5083 this->array.spare_disks++;
86e3692b 5084 }
1e5c6983
DW
5085 if (info_d->recovery_start == MaxSector)
5086 this->array.working_disks++;
4f5bc454
DW
5087
5088 info_d->events = __le32_to_cpu(mpb->generation_num);
5089 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
5090 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
4f5bc454 5091 }
1e5c6983
DW
5092 /* now that the disk list is up-to-date fixup recovery_start */
5093 update_recovery_start(dev, this);
abef11a3 5094 this->array.spare_disks += spare_disks;
9a1608e5 5095 rest = this;
4f5bc454
DW
5096 }
5097
a06d022d
KW
5098 /* if array has bad blocks, set suitable bit in array status */
5099 if (bbm_errors)
5100 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
5101
4f5bc454 5102 return rest;
cdddbdbc
DW
5103}
5104
845dea95 5105
fb49eef2 5106static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
c2a1e7da 5107{
a965f303 5108 struct imsm_map *map = get_imsm_map(dev, 0);
c2a1e7da
DW
5109
5110 if (!failed)
3393c6af
DW
5111 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
5112 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
c2a1e7da
DW
5113
5114 switch (get_imsm_raid_level(map)) {
5115 case 0:
5116 return IMSM_T_STATE_FAILED;
5117 break;
5118 case 1:
5119 if (failed < map->num_members)
5120 return IMSM_T_STATE_DEGRADED;
5121 else
5122 return IMSM_T_STATE_FAILED;
5123 break;
5124 case 10:
5125 {
5126 /**
c92a2527
DW
5127 * check to see if any mirrors have failed, otherwise we
5128 * are degraded. Even numbered slots are mirrored on
5129 * slot+1
c2a1e7da 5130 */
c2a1e7da 5131 int i;
d9b420a5
N
5132 /* gcc -Os complains that this is unused */
5133 int insync = insync;
c2a1e7da
DW
5134
5135 for (i = 0; i < map->num_members; i++) {
98130f40 5136 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
c92a2527
DW
5137 int idx = ord_to_idx(ord);
5138 struct imsm_disk *disk;
c2a1e7da 5139
c92a2527
DW
5140 /* reset the potential in-sync count on even-numbered
5141 * slots. num_copies is always 2 for imsm raid10
5142 */
5143 if ((i & 1) == 0)
5144 insync = 2;
c2a1e7da 5145
c92a2527 5146 disk = get_imsm_disk(super, idx);
25ed7e59 5147 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
c92a2527 5148 insync--;
c2a1e7da 5149
c92a2527
DW
5150 /* no in-sync disks left in this mirror the
5151 * array has failed
5152 */
5153 if (insync == 0)
5154 return IMSM_T_STATE_FAILED;
c2a1e7da
DW
5155 }
5156
5157 return IMSM_T_STATE_DEGRADED;
5158 }
5159 case 5:
5160 if (failed < 2)
5161 return IMSM_T_STATE_DEGRADED;
5162 else
5163 return IMSM_T_STATE_FAILED;
5164 break;
5165 default:
5166 break;
5167 }
5168
5169 return map->map_state;
5170}
5171
ff077194 5172static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
c2a1e7da
DW
5173{
5174 int i;
5175 int failed = 0;
5176 struct imsm_disk *disk;
ff077194 5177 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2
DW
5178 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
5179 __u32 ord;
5180 int idx;
c2a1e7da 5181
0556e1a2
DW
5182 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5183 * disks that are being rebuilt. New failures are recorded to
5184 * map[0]. So we look through all the disks we started with and
5185 * see if any failures are still present, or if any new ones
5186 * have arrived
5187 *
5188 * FIXME add support for online capacity expansion and
5189 * raid-level-migration
5190 */
5191 for (i = 0; i < prev->num_members; i++) {
5192 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
5193 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
5194 idx = ord_to_idx(ord);
c2a1e7da 5195
949c47a0 5196 disk = get_imsm_disk(super, idx);
25ed7e59 5197 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
fcb84475 5198 failed++;
c2a1e7da
DW
5199 }
5200
5201 return failed;
845dea95
NB
5202}
5203
97b4d0e9
DW
5204#ifndef MDASSEMBLE
5205static int imsm_open_new(struct supertype *c, struct active_array *a,
5206 char *inst)
5207{
5208 struct intel_super *super = c->sb;
5209 struct imsm_super *mpb = super->anchor;
5210
5211 if (atoi(inst) >= mpb->num_raid_devs) {
5212 fprintf(stderr, "%s: subarry index %d, out of range\n",
5213 __func__, atoi(inst));
5214 return -ENODEV;
5215 }
5216
5217 dprintf("imsm: open_new %s\n", inst);
5218 a->info.container_member = atoi(inst);
5219 return 0;
5220}
5221
0c046afd
DW
5222static int is_resyncing(struct imsm_dev *dev)
5223{
5224 struct imsm_map *migr_map;
5225
5226 if (!dev->vol.migr_state)
5227 return 0;
5228
1484e727
DW
5229 if (migr_type(dev) == MIGR_INIT ||
5230 migr_type(dev) == MIGR_REPAIR)
0c046afd
DW
5231 return 1;
5232
4c9bc37b
AK
5233 if (migr_type(dev) == MIGR_GEN_MIGR)
5234 return 0;
5235
0c046afd
DW
5236 migr_map = get_imsm_map(dev, 1);
5237
4c9bc37b
AK
5238 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
5239 (dev->vol.migr_type != MIGR_GEN_MIGR))
0c046afd
DW
5240 return 1;
5241 else
5242 return 0;
5243}
5244
0556e1a2
DW
5245/* return true if we recorded new information */
5246static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
47ee5a45 5247{
0556e1a2
DW
5248 __u32 ord;
5249 int slot;
5250 struct imsm_map *map;
5251
5252 /* new failures are always set in map[0] */
5253 map = get_imsm_map(dev, 0);
5254
5255 slot = get_imsm_disk_slot(map, idx);
5256 if (slot < 0)
5257 return 0;
5258
5259 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
25ed7e59 5260 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
0556e1a2
DW
5261 return 0;
5262
f2f27e63 5263 disk->status |= FAILED_DISK;
0556e1a2 5264 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
f21e18ca 5265 if (map->failed_disk_num == 0xff)
0556e1a2
DW
5266 map->failed_disk_num = slot;
5267 return 1;
5268}
5269
5270static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5271{
5272 mark_failure(dev, disk, idx);
5273
5274 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5275 return;
5276
47ee5a45
DW
5277 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5278 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5279}
5280
33414a01
DW
5281static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5282{
5283 __u8 map_state;
5284 struct dl *dl;
5285 int failed;
5286
5287 if (!super->missing)
5288 return;
5289 failed = imsm_count_failed(super, dev);
5290 map_state = imsm_check_degraded(super, dev, failed);
5291
5292 dprintf("imsm: mark missing\n");
5293 end_migration(dev, map_state);
5294 for (dl = super->missing; dl; dl = dl->next)
5295 mark_missing(dev, &dl->disk, dl->index);
5296 super->updates_pending++;
5297}
5298
70bdf0dc
AK
5299static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5300{
5301 int used_disks = imsm_num_data_members(dev, 0);
5302 unsigned long long array_blocks;
5303 struct imsm_map *map;
5304
5305 if (used_disks == 0) {
5306 /* when problems occures
5307 * return current array_blocks value
5308 */
5309 array_blocks = __le32_to_cpu(dev->size_high);
5310 array_blocks = array_blocks << 32;
5311 array_blocks += __le32_to_cpu(dev->size_low);
5312
5313 return array_blocks;
5314 }
5315
5316 /* set array size in metadata
5317 */
5318 map = get_imsm_map(dev, 0);
5319 array_blocks = map->blocks_per_member * used_disks;
5320
5321 /* round array size down to closest MB
5322 */
5323 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5324 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5325 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5326
5327 return array_blocks;
5328}
5329
28bce06f
AK
5330static void imsm_set_disk(struct active_array *a, int n, int state);
5331
0e2d1a4e
AK
5332static void imsm_progress_container_reshape(struct intel_super *super)
5333{
5334 /* if no device has a migr_state, but some device has a
5335 * different number of members than the previous device, start
5336 * changing the number of devices in this device to match
5337 * previous.
5338 */
5339 struct imsm_super *mpb = super->anchor;
5340 int prev_disks = -1;
5341 int i;
1dfaa380 5342 int copy_map_size;
0e2d1a4e
AK
5343
5344 for (i = 0; i < mpb->num_raid_devs; i++) {
5345 struct imsm_dev *dev = get_imsm_dev(super, i);
5346 struct imsm_map *map = get_imsm_map(dev, 0);
5347 struct imsm_map *map2;
5348 int prev_num_members;
0e2d1a4e
AK
5349
5350 if (dev->vol.migr_state)
5351 return;
5352
5353 if (prev_disks == -1)
5354 prev_disks = map->num_members;
5355 if (prev_disks == map->num_members)
5356 continue;
5357
5358 /* OK, this array needs to enter reshape mode.
5359 * i.e it needs a migr_state
5360 */
5361
1dfaa380 5362 copy_map_size = sizeof_imsm_map(map);
0e2d1a4e
AK
5363 prev_num_members = map->num_members;
5364 map->num_members = prev_disks;
5365 dev->vol.migr_state = 1;
5366 dev->vol.curr_migr_unit = 0;
5367 dev->vol.migr_type = MIGR_GEN_MIGR;
5368 for (i = prev_num_members;
5369 i < map->num_members; i++)
5370 set_imsm_ord_tbl_ent(map, i, i);
5371 map2 = get_imsm_map(dev, 1);
5372 /* Copy the current map */
1dfaa380 5373 memcpy(map2, map, copy_map_size);
0e2d1a4e
AK
5374 map2->num_members = prev_num_members;
5375
70bdf0dc 5376 imsm_set_array_size(dev);
0e2d1a4e
AK
5377 super->updates_pending++;
5378 }
5379}
5380
aad6f216 5381/* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
0c046afd
DW
5382 * states are handled in imsm_set_disk() with one exception, when a
5383 * resync is stopped due to a new failure this routine will set the
5384 * 'degraded' state for the array.
5385 */
01f157d7 5386static int imsm_set_array_state(struct active_array *a, int consistent)
a862209d
DW
5387{
5388 int inst = a->info.container_member;
5389 struct intel_super *super = a->container->sb;
949c47a0 5390 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5391 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd
DW
5392 int failed = imsm_count_failed(super, dev);
5393 __u8 map_state = imsm_check_degraded(super, dev, failed);
1e5c6983 5394 __u32 blocks_per_unit;
a862209d 5395
1af97990
AK
5396 if (dev->vol.migr_state &&
5397 dev->vol.migr_type == MIGR_GEN_MIGR) {
5398 /* array state change is blocked due to reshape action
aad6f216
N
5399 * We might need to
5400 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5401 * - finish the reshape (if last_checkpoint is big and action != reshape)
5402 * - update curr_migr_unit
1af97990 5403 */
aad6f216
N
5404 if (a->curr_action == reshape) {
5405 /* still reshaping, maybe update curr_migr_unit */
633b5610 5406 goto mark_checkpoint;
aad6f216
N
5407 } else {
5408 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5409 /* for some reason we aborted the reshape.
5410 * Better clean up
5411 */
5412 struct imsm_map *map2 = get_imsm_map(dev, 1);
5413 dev->vol.migr_state = 0;
5414 dev->vol.migr_type = 0;
5415 dev->vol.curr_migr_unit = 0;
5416 memcpy(map, map2, sizeof_imsm_map(map2));
5417 super->updates_pending++;
5418 }
5419 if (a->last_checkpoint >= a->info.component_size) {
5420 unsigned long long array_blocks;
5421 int used_disks;
e154ced3 5422 struct mdinfo *mdi;
aad6f216 5423
9653001d 5424 used_disks = imsm_num_data_members(dev, 0);
d55adef9
AK
5425 if (used_disks > 0) {
5426 array_blocks =
5427 map->blocks_per_member *
5428 used_disks;
5429 /* round array size down to closest MB
5430 */
5431 array_blocks = (array_blocks
5432 >> SECT_PER_MB_SHIFT)
5433 << SECT_PER_MB_SHIFT;
d55adef9
AK
5434 a->info.custom_array_size = array_blocks;
5435 /* encourage manager to update array
5436 * size
5437 */
e154ced3 5438
d55adef9 5439 a->check_reshape = 1;
633b5610 5440 }
e154ced3
AK
5441 /* finalize online capacity expansion/reshape */
5442 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5443 imsm_set_disk(a,
5444 mdi->disk.raid_disk,
5445 mdi->curr_state);
5446
0e2d1a4e 5447 imsm_progress_container_reshape(super);
e154ced3 5448 }
aad6f216 5449 }
1af97990
AK
5450 }
5451
47ee5a45 5452 /* before we activate this array handle any missing disks */
33414a01
DW
5453 if (consistent == 2)
5454 handle_missing(super, dev);
1e5c6983 5455
0c046afd 5456 if (consistent == 2 &&
b7941fd6 5457 (!is_resync_complete(&a->info) ||
0c046afd
DW
5458 map_state != IMSM_T_STATE_NORMAL ||
5459 dev->vol.migr_state))
01f157d7 5460 consistent = 0;
272906ef 5461
b7941fd6 5462 if (is_resync_complete(&a->info)) {
0c046afd 5463 /* complete intialization / resync,
0556e1a2
DW
5464 * recovery and interrupted recovery is completed in
5465 * ->set_disk
0c046afd
DW
5466 */
5467 if (is_resyncing(dev)) {
5468 dprintf("imsm: mark resync done\n");
f8f603f1 5469 end_migration(dev, map_state);
115c3803 5470 super->updates_pending++;
484240d8 5471 a->last_checkpoint = 0;
115c3803 5472 }
0c046afd
DW
5473 } else if (!is_resyncing(dev) && !failed) {
5474 /* mark the start of the init process if nothing is failed */
b7941fd6 5475 dprintf("imsm: mark resync start\n");
1484e727 5476 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
e3bba0e0 5477 migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT);
1484e727
DW
5478 else
5479 migrate(dev, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
3393c6af 5480 super->updates_pending++;
115c3803 5481 }
a862209d 5482
633b5610 5483mark_checkpoint:
1e5c6983
DW
5484 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5485 blocks_per_unit = blocks_per_migr_unit(dev);
4f0a7acc 5486 if (blocks_per_unit) {
1e5c6983
DW
5487 __u32 units32;
5488 __u64 units;
5489
4f0a7acc 5490 units = a->last_checkpoint / blocks_per_unit;
1e5c6983
DW
5491 units32 = units;
5492
5493 /* check that we did not overflow 32-bits, and that
5494 * curr_migr_unit needs updating
5495 */
5496 if (units32 == units &&
bfd80a56 5497 units32 != 0 &&
1e5c6983
DW
5498 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5499 dprintf("imsm: mark checkpoint (%u)\n", units32);
5500 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5501 super->updates_pending++;
5502 }
5503 }
f8f603f1 5504
3393c6af 5505 /* mark dirty / clean */
0c046afd 5506 if (dev->vol.dirty != !consistent) {
b7941fd6 5507 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
0c046afd
DW
5508 if (consistent)
5509 dev->vol.dirty = 0;
5510 else
5511 dev->vol.dirty = 1;
a862209d
DW
5512 super->updates_pending++;
5513 }
28bce06f 5514
01f157d7 5515 return consistent;
a862209d
DW
5516}
5517
8d45d196 5518static void imsm_set_disk(struct active_array *a, int n, int state)
845dea95 5519{
8d45d196
DW
5520 int inst = a->info.container_member;
5521 struct intel_super *super = a->container->sb;
949c47a0 5522 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5523 struct imsm_map *map = get_imsm_map(dev, 0);
8d45d196 5524 struct imsm_disk *disk;
0c046afd 5525 int failed;
b10b37b8 5526 __u32 ord;
0c046afd 5527 __u8 map_state;
8d45d196
DW
5528
5529 if (n > map->num_members)
5530 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5531 n, map->num_members - 1);
5532
5533 if (n < 0)
5534 return;
5535
4e6e574a 5536 dprintf("imsm: set_disk %d:%x\n", n, state);
8d45d196 5537
98130f40 5538 ord = get_imsm_ord_tbl_ent(dev, n, -1);
b10b37b8 5539 disk = get_imsm_disk(super, ord_to_idx(ord));
8d45d196 5540
5802a811 5541 /* check for new failures */
0556e1a2
DW
5542 if (state & DS_FAULTY) {
5543 if (mark_failure(dev, disk, ord_to_idx(ord)))
5544 super->updates_pending++;
8d45d196 5545 }
47ee5a45 5546
19859edc 5547 /* check if in_sync */
0556e1a2 5548 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
b10b37b8
DW
5549 struct imsm_map *migr_map = get_imsm_map(dev, 1);
5550
5551 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
19859edc
DW
5552 super->updates_pending++;
5553 }
8d45d196 5554
0c046afd
DW
5555 failed = imsm_count_failed(super, dev);
5556 map_state = imsm_check_degraded(super, dev, failed);
5802a811 5557
0c046afd
DW
5558 /* check if recovery complete, newly degraded, or failed */
5559 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
f8f603f1 5560 end_migration(dev, map_state);
0556e1a2
DW
5561 map = get_imsm_map(dev, 0);
5562 map->failed_disk_num = ~0;
0c046afd 5563 super->updates_pending++;
484240d8 5564 a->last_checkpoint = 0;
0c046afd
DW
5565 } else if (map_state == IMSM_T_STATE_DEGRADED &&
5566 map->map_state != map_state &&
5567 !dev->vol.migr_state) {
5568 dprintf("imsm: mark degraded\n");
5569 map->map_state = map_state;
5570 super->updates_pending++;
484240d8 5571 a->last_checkpoint = 0;
0c046afd
DW
5572 } else if (map_state == IMSM_T_STATE_FAILED &&
5573 map->map_state != map_state) {
5574 dprintf("imsm: mark failed\n");
f8f603f1 5575 end_migration(dev, map_state);
0c046afd 5576 super->updates_pending++;
484240d8 5577 a->last_checkpoint = 0;
28bce06f
AK
5578 } else if (is_gen_migration(dev)) {
5579 dprintf("imsm: Detected General Migration in state: ");
5580 if (map_state == IMSM_T_STATE_NORMAL) {
5581 end_migration(dev, map_state);
5582 map = get_imsm_map(dev, 0);
5583 map->failed_disk_num = ~0;
5584 dprintf("normal\n");
5585 } else {
5586 if (map_state == IMSM_T_STATE_DEGRADED) {
5587 printf("degraded\n");
5588 end_migration(dev, map_state);
5589 } else {
5590 dprintf("failed\n");
5591 }
5592 map->map_state = map_state;
5593 }
5594 super->updates_pending++;
5802a811 5595 }
845dea95
NB
5596}
5597
f796af5d 5598static int store_imsm_mpb(int fd, struct imsm_super *mpb)
c2a1e7da 5599{
f796af5d 5600 void *buf = mpb;
c2a1e7da
DW
5601 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
5602 unsigned long long dsize;
5603 unsigned long long sectors;
5604
5605 get_dev_size(fd, NULL, &dsize);
5606
272f648f
DW
5607 if (mpb_size > 512) {
5608 /* -1 to account for anchor */
5609 sectors = mpb_sectors(mpb) - 1;
c2a1e7da 5610
272f648f
DW
5611 /* write the extended mpb to the sectors preceeding the anchor */
5612 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
5613 return 1;
c2a1e7da 5614
f21e18ca
N
5615 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
5616 != 512 * sectors)
272f648f
DW
5617 return 1;
5618 }
c2a1e7da 5619
272f648f
DW
5620 /* first block is stored on second to last sector of the disk */
5621 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
c2a1e7da
DW
5622 return 1;
5623
f796af5d 5624 if (write(fd, buf, 512) != 512)
c2a1e7da
DW
5625 return 1;
5626
c2a1e7da
DW
5627 return 0;
5628}
5629
2e735d19 5630static void imsm_sync_metadata(struct supertype *container)
845dea95 5631{
2e735d19 5632 struct intel_super *super = container->sb;
c2a1e7da 5633
1a64be56 5634 dprintf("sync metadata: %d\n", super->updates_pending);
c2a1e7da
DW
5635 if (!super->updates_pending)
5636 return;
5637
36988a3d 5638 write_super_imsm(container, 0);
c2a1e7da
DW
5639
5640 super->updates_pending = 0;
845dea95
NB
5641}
5642
272906ef
DW
5643static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
5644{
5645 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 5646 int i = get_imsm_disk_idx(dev, idx, -1);
272906ef
DW
5647 struct dl *dl;
5648
5649 for (dl = super->disks; dl; dl = dl->next)
5650 if (dl->index == i)
5651 break;
5652
25ed7e59 5653 if (dl && is_failed(&dl->disk))
272906ef
DW
5654 dl = NULL;
5655
5656 if (dl)
5657 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
5658
5659 return dl;
5660}
5661
a20d2ba5 5662static struct dl *imsm_add_spare(struct intel_super *super, int slot,
8ba77d32
AK
5663 struct active_array *a, int activate_new,
5664 struct mdinfo *additional_test_list)
272906ef
DW
5665{
5666 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 5667 int idx = get_imsm_disk_idx(dev, slot, -1);
a20d2ba5
DW
5668 struct imsm_super *mpb = super->anchor;
5669 struct imsm_map *map;
272906ef
DW
5670 unsigned long long pos;
5671 struct mdinfo *d;
5672 struct extent *ex;
a20d2ba5 5673 int i, j;
272906ef 5674 int found;
569cc43f
DW
5675 __u32 array_start = 0;
5676 __u32 array_end = 0;
272906ef 5677 struct dl *dl;
6c932028 5678 struct mdinfo *test_list;
272906ef
DW
5679
5680 for (dl = super->disks; dl; dl = dl->next) {
5681 /* If in this array, skip */
5682 for (d = a->info.devs ; d ; d = d->next)
e553d2a4
DW
5683 if (d->state_fd >= 0 &&
5684 d->disk.major == dl->major &&
272906ef 5685 d->disk.minor == dl->minor) {
8ba77d32
AK
5686 dprintf("%x:%x already in array\n",
5687 dl->major, dl->minor);
272906ef
DW
5688 break;
5689 }
5690 if (d)
5691 continue;
6c932028
AK
5692 test_list = additional_test_list;
5693 while (test_list) {
5694 if (test_list->disk.major == dl->major &&
5695 test_list->disk.minor == dl->minor) {
8ba77d32
AK
5696 dprintf("%x:%x already in additional test list\n",
5697 dl->major, dl->minor);
5698 break;
5699 }
6c932028 5700 test_list = test_list->next;
8ba77d32 5701 }
6c932028 5702 if (test_list)
8ba77d32 5703 continue;
272906ef 5704
e553d2a4 5705 /* skip in use or failed drives */
25ed7e59 5706 if (is_failed(&dl->disk) || idx == dl->index ||
df474657
DW
5707 dl->index == -2) {
5708 dprintf("%x:%x status (failed: %d index: %d)\n",
25ed7e59 5709 dl->major, dl->minor, is_failed(&dl->disk), idx);
9a1608e5
DW
5710 continue;
5711 }
5712
a20d2ba5
DW
5713 /* skip pure spares when we are looking for partially
5714 * assimilated drives
5715 */
5716 if (dl->index == -1 && !activate_new)
5717 continue;
5718
272906ef 5719 /* Does this unused device have the requisite free space?
a20d2ba5 5720 * It needs to be able to cover all member volumes
272906ef
DW
5721 */
5722 ex = get_extents(super, dl);
5723 if (!ex) {
5724 dprintf("cannot get extents\n");
5725 continue;
5726 }
a20d2ba5
DW
5727 for (i = 0; i < mpb->num_raid_devs; i++) {
5728 dev = get_imsm_dev(super, i);
5729 map = get_imsm_map(dev, 0);
272906ef 5730
a20d2ba5
DW
5731 /* check if this disk is already a member of
5732 * this array
272906ef 5733 */
620b1713 5734 if (get_imsm_disk_slot(map, dl->index) >= 0)
a20d2ba5
DW
5735 continue;
5736
5737 found = 0;
5738 j = 0;
5739 pos = 0;
5740 array_start = __le32_to_cpu(map->pba_of_lba0);
329c8278
DW
5741 array_end = array_start +
5742 __le32_to_cpu(map->blocks_per_member) - 1;
a20d2ba5
DW
5743
5744 do {
5745 /* check that we can start at pba_of_lba0 with
5746 * blocks_per_member of space
5747 */
329c8278 5748 if (array_start >= pos && array_end < ex[j].start) {
a20d2ba5
DW
5749 found = 1;
5750 break;
5751 }
5752 pos = ex[j].start + ex[j].size;
5753 j++;
5754 } while (ex[j-1].size);
5755
5756 if (!found)
272906ef 5757 break;
a20d2ba5 5758 }
272906ef
DW
5759
5760 free(ex);
a20d2ba5 5761 if (i < mpb->num_raid_devs) {
329c8278
DW
5762 dprintf("%x:%x does not have %u to %u available\n",
5763 dl->major, dl->minor, array_start, array_end);
272906ef
DW
5764 /* No room */
5765 continue;
a20d2ba5
DW
5766 }
5767 return dl;
272906ef
DW
5768 }
5769
5770 return dl;
5771}
5772
95d07a2c
LM
5773
5774static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
5775{
5776 struct imsm_dev *dev2;
5777 struct imsm_map *map;
5778 struct dl *idisk;
5779 int slot;
5780 int idx;
5781 __u8 state;
5782
5783 dev2 = get_imsm_dev(cont->sb, dev_idx);
5784 if (dev2) {
5785 state = imsm_check_degraded(cont->sb, dev2, failed);
5786 if (state == IMSM_T_STATE_FAILED) {
5787 map = get_imsm_map(dev2, 0);
5788 if (!map)
5789 return 1;
5790 for (slot = 0; slot < map->num_members; slot++) {
5791 /*
5792 * Check if failed disks are deleted from intel
5793 * disk list or are marked to be deleted
5794 */
98130f40 5795 idx = get_imsm_disk_idx(dev2, slot, -1);
95d07a2c
LM
5796 idisk = get_imsm_dl_disk(cont->sb, idx);
5797 /*
5798 * Do not rebuild the array if failed disks
5799 * from failed sub-array are not removed from
5800 * container.
5801 */
5802 if (idisk &&
5803 is_failed(&idisk->disk) &&
5804 (idisk->action != DISK_REMOVE))
5805 return 0;
5806 }
5807 }
5808 }
5809 return 1;
5810}
5811
88758e9d
DW
5812static struct mdinfo *imsm_activate_spare(struct active_array *a,
5813 struct metadata_update **updates)
5814{
5815 /**
d23fe947
DW
5816 * Find a device with unused free space and use it to replace a
5817 * failed/vacant region in an array. We replace failed regions one a
5818 * array at a time. The result is that a new spare disk will be added
5819 * to the first failed array and after the monitor has finished
5820 * propagating failures the remainder will be consumed.
88758e9d 5821 *
d23fe947
DW
5822 * FIXME add a capability for mdmon to request spares from another
5823 * container.
88758e9d
DW
5824 */
5825
5826 struct intel_super *super = a->container->sb;
88758e9d 5827 int inst = a->info.container_member;
949c47a0 5828 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5829 struct imsm_map *map = get_imsm_map(dev, 0);
88758e9d
DW
5830 int failed = a->info.array.raid_disks;
5831 struct mdinfo *rv = NULL;
5832 struct mdinfo *d;
5833 struct mdinfo *di;
5834 struct metadata_update *mu;
5835 struct dl *dl;
5836 struct imsm_update_activate_spare *u;
5837 int num_spares = 0;
5838 int i;
95d07a2c 5839 int allowed;
88758e9d
DW
5840
5841 for (d = a->info.devs ; d ; d = d->next) {
5842 if ((d->curr_state & DS_FAULTY) &&
5843 d->state_fd >= 0)
5844 /* wait for Removal to happen */
5845 return NULL;
5846 if (d->state_fd >= 0)
5847 failed--;
5848 }
5849
5850 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
5851 inst, failed, a->info.array.raid_disks, a->info.array.level);
1af97990
AK
5852
5853 if (dev->vol.migr_state &&
5854 dev->vol.migr_type == MIGR_GEN_MIGR)
5855 /* No repair during migration */
5856 return NULL;
5857
89c67882
AK
5858 if (a->info.array.level == 4)
5859 /* No repair for takeovered array
5860 * imsm doesn't support raid4
5861 */
5862 return NULL;
5863
fb49eef2 5864 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
88758e9d
DW
5865 return NULL;
5866
95d07a2c
LM
5867 /*
5868 * If there are any failed disks check state of the other volume.
5869 * Block rebuild if the another one is failed until failed disks
5870 * are removed from container.
5871 */
5872 if (failed) {
5873 dprintf("found failed disks in %s, check if there another"
5874 "failed sub-array.\n",
5875 dev->volume);
5876 /* check if states of the other volumes allow for rebuild */
5877 for (i = 0; i < super->anchor->num_raid_devs; i++) {
5878 if (i != inst) {
5879 allowed = imsm_rebuild_allowed(a->container,
5880 i, failed);
5881 if (!allowed)
5882 return NULL;
5883 }
5884 }
5885 }
5886
88758e9d 5887 /* For each slot, if it is not working, find a spare */
88758e9d
DW
5888 for (i = 0; i < a->info.array.raid_disks; i++) {
5889 for (d = a->info.devs ; d ; d = d->next)
5890 if (d->disk.raid_disk == i)
5891 break;
5892 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
5893 if (d && (d->state_fd >= 0))
5894 continue;
5895
272906ef 5896 /*
a20d2ba5
DW
5897 * OK, this device needs recovery. Try to re-add the
5898 * previous occupant of this slot, if this fails see if
5899 * we can continue the assimilation of a spare that was
5900 * partially assimilated, finally try to activate a new
5901 * spare.
272906ef
DW
5902 */
5903 dl = imsm_readd(super, i, a);
5904 if (!dl)
8ba77d32 5905 dl = imsm_add_spare(super, i, a, 0, NULL);
a20d2ba5 5906 if (!dl)
8ba77d32 5907 dl = imsm_add_spare(super, i, a, 1, NULL);
272906ef
DW
5908 if (!dl)
5909 continue;
5910
5911 /* found a usable disk with enough space */
5912 di = malloc(sizeof(*di));
79244939
DW
5913 if (!di)
5914 continue;
272906ef
DW
5915 memset(di, 0, sizeof(*di));
5916
5917 /* dl->index will be -1 in the case we are activating a
5918 * pristine spare. imsm_process_update() will create a
5919 * new index in this case. Once a disk is found to be
5920 * failed in all member arrays it is kicked from the
5921 * metadata
5922 */
5923 di->disk.number = dl->index;
d23fe947 5924
272906ef
DW
5925 /* (ab)use di->devs to store a pointer to the device
5926 * we chose
5927 */
5928 di->devs = (struct mdinfo *) dl;
5929
5930 di->disk.raid_disk = i;
5931 di->disk.major = dl->major;
5932 di->disk.minor = dl->minor;
5933 di->disk.state = 0;
d23534e4 5934 di->recovery_start = 0;
272906ef
DW
5935 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
5936 di->component_size = a->info.component_size;
5937 di->container_member = inst;
148acb7b 5938 super->random = random32();
272906ef
DW
5939 di->next = rv;
5940 rv = di;
5941 num_spares++;
5942 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
5943 i, di->data_offset);
88758e9d 5944
272906ef 5945 break;
88758e9d
DW
5946 }
5947
5948 if (!rv)
5949 /* No spares found */
5950 return rv;
5951 /* Now 'rv' has a list of devices to return.
5952 * Create a metadata_update record to update the
5953 * disk_ord_tbl for the array
5954 */
5955 mu = malloc(sizeof(*mu));
79244939
DW
5956 if (mu) {
5957 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
5958 if (mu->buf == NULL) {
5959 free(mu);
5960 mu = NULL;
5961 }
5962 }
5963 if (!mu) {
5964 while (rv) {
5965 struct mdinfo *n = rv->next;
5966
5967 free(rv);
5968 rv = n;
5969 }
5970 return NULL;
5971 }
5972
88758e9d 5973 mu->space = NULL;
cb23f1f4 5974 mu->space_list = NULL;
88758e9d
DW
5975 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
5976 mu->next = *updates;
5977 u = (struct imsm_update_activate_spare *) mu->buf;
5978
5979 for (di = rv ; di ; di = di->next) {
5980 u->type = update_activate_spare;
d23fe947
DW
5981 u->dl = (struct dl *) di->devs;
5982 di->devs = NULL;
88758e9d
DW
5983 u->slot = di->disk.raid_disk;
5984 u->array = inst;
5985 u->next = u + 1;
5986 u++;
5987 }
5988 (u-1)->next = NULL;
5989 *updates = mu;
5990
5991 return rv;
5992}
5993
54c2c1ea 5994static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
8273f55e 5995{
54c2c1ea
DW
5996 struct imsm_dev *dev = get_imsm_dev(super, idx);
5997 struct imsm_map *map = get_imsm_map(dev, 0);
5998 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
5999 struct disk_info *inf = get_disk_info(u);
6000 struct imsm_disk *disk;
8273f55e
DW
6001 int i;
6002 int j;
8273f55e 6003
54c2c1ea 6004 for (i = 0; i < map->num_members; i++) {
98130f40 6005 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
54c2c1ea
DW
6006 for (j = 0; j < new_map->num_members; j++)
6007 if (serialcmp(disk->serial, inf[j].serial) == 0)
8273f55e
DW
6008 return 1;
6009 }
6010
6011 return 0;
6012}
6013
1a64be56
LM
6014
6015static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
6016{
6017 struct dl *dl = NULL;
6018 for (dl = super->disks; dl; dl = dl->next)
6019 if ((dl->major == major) && (dl->minor == minor))
6020 return dl;
6021 return NULL;
6022}
6023
6024static int remove_disk_super(struct intel_super *super, int major, int minor)
6025{
6026 struct dl *prev = NULL;
6027 struct dl *dl;
6028
6029 prev = NULL;
6030 for (dl = super->disks; dl; dl = dl->next) {
6031 if ((dl->major == major) && (dl->minor == minor)) {
6032 /* remove */
6033 if (prev)
6034 prev->next = dl->next;
6035 else
6036 super->disks = dl->next;
6037 dl->next = NULL;
6038 __free_imsm_disk(dl);
6039 dprintf("%s: removed %x:%x\n",
6040 __func__, major, minor);
6041 break;
6042 }
6043 prev = dl;
6044 }
6045 return 0;
6046}
6047
f21e18ca 6048static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
ae6aad82 6049
1a64be56
LM
6050static int add_remove_disk_update(struct intel_super *super)
6051{
6052 int check_degraded = 0;
6053 struct dl *disk = NULL;
6054 /* add/remove some spares to/from the metadata/contrainer */
6055 while (super->disk_mgmt_list) {
6056 struct dl *disk_cfg;
6057
6058 disk_cfg = super->disk_mgmt_list;
6059 super->disk_mgmt_list = disk_cfg->next;
6060 disk_cfg->next = NULL;
6061
6062 if (disk_cfg->action == DISK_ADD) {
6063 disk_cfg->next = super->disks;
6064 super->disks = disk_cfg;
6065 check_degraded = 1;
6066 dprintf("%s: added %x:%x\n",
6067 __func__, disk_cfg->major,
6068 disk_cfg->minor);
6069 } else if (disk_cfg->action == DISK_REMOVE) {
6070 dprintf("Disk remove action processed: %x.%x\n",
6071 disk_cfg->major, disk_cfg->minor);
6072 disk = get_disk_super(super,
6073 disk_cfg->major,
6074 disk_cfg->minor);
6075 if (disk) {
6076 /* store action status */
6077 disk->action = DISK_REMOVE;
6078 /* remove spare disks only */
6079 if (disk->index == -1) {
6080 remove_disk_super(super,
6081 disk_cfg->major,
6082 disk_cfg->minor);
6083 }
6084 }
6085 /* release allocate disk structure */
6086 __free_imsm_disk(disk_cfg);
6087 }
6088 }
6089 return check_degraded;
6090}
6091
a29911da
PC
6092
6093static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
6094 struct intel_super *super,
6095 void ***space_list)
6096{
6097 struct intel_dev *id;
6098 void **tofree = NULL;
6099 int ret_val = 0;
6100
6101 dprintf("apply_reshape_migration_update()\n");
6102 if ((u->subdev < 0) ||
6103 (u->subdev > 1)) {
6104 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
6105 return ret_val;
6106 }
6107 if ((space_list == NULL) || (*space_list == NULL)) {
6108 dprintf("imsm: Error: Memory is not allocated\n");
6109 return ret_val;
6110 }
6111
6112 for (id = super->devlist ; id; id = id->next) {
6113 if (id->index == (unsigned)u->subdev) {
6114 struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
6115 struct imsm_map *map;
6116 struct imsm_dev *new_dev =
6117 (struct imsm_dev *)*space_list;
6118 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6119 int to_state;
6120 struct dl *new_disk;
6121
6122 if (new_dev == NULL)
6123 return ret_val;
6124 *space_list = **space_list;
6125 memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
6126 map = get_imsm_map(new_dev, 0);
6127 if (migr_map) {
6128 dprintf("imsm: Error: migration in progress");
6129 return ret_val;
6130 }
6131
6132 to_state = map->map_state;
6133 if ((u->new_level == 5) && (map->raid_level == 0)) {
6134 map->num_members++;
6135 /* this should not happen */
6136 if (u->new_disks[0] < 0) {
6137 map->failed_disk_num =
6138 map->num_members - 1;
6139 to_state = IMSM_T_STATE_DEGRADED;
6140 } else
6141 to_state = IMSM_T_STATE_NORMAL;
6142 }
6143 migrate(new_dev, to_state, MIGR_GEN_MIGR);
6144 if (u->new_level > -1)
6145 map->raid_level = u->new_level;
6146 migr_map = get_imsm_map(new_dev, 1);
6147 if ((u->new_level == 5) &&
6148 (migr_map->raid_level == 0)) {
6149 int ord = map->num_members - 1;
6150 migr_map->num_members--;
6151 if (u->new_disks[0] < 0)
6152 ord |= IMSM_ORD_REBUILD;
6153 set_imsm_ord_tbl_ent(map,
6154 map->num_members - 1,
6155 ord);
6156 }
6157 id->dev = new_dev;
6158 tofree = (void **)dev;
6159
6160 /* add disk
6161 */
6162 if ((u->new_level != 5) ||
6163 (migr_map->raid_level != 0) ||
6164 (migr_map->raid_level == map->raid_level))
6165 goto skip_disk_add;
6166
6167 if (u->new_disks[0] >= 0) {
6168 /* use passes spare
6169 */
6170 new_disk = get_disk_super(super,
6171 major(u->new_disks[0]),
6172 minor(u->new_disks[0]));
6173 dprintf("imsm: new disk for reshape is: %i:%i "
6174 "(%p, index = %i)\n",
6175 major(u->new_disks[0]),
6176 minor(u->new_disks[0]),
6177 new_disk, new_disk->index);
6178 if (new_disk == NULL)
6179 goto error_disk_add;
6180
6181 new_disk->index = map->num_members - 1;
6182 /* slot to fill in autolayout
6183 */
6184 new_disk->raiddisk = new_disk->index;
6185 new_disk->disk.status |= CONFIGURED_DISK;
6186 new_disk->disk.status &= ~SPARE_DISK;
6187 } else
6188 goto error_disk_add;
6189
6190skip_disk_add:
6191 *tofree = *space_list;
6192 /* calculate new size
6193 */
6194 imsm_set_array_size(new_dev);
6195
6196 ret_val = 1;
6197 }
6198 }
6199
6200 if (tofree)
6201 *space_list = tofree;
6202 return ret_val;
6203
6204error_disk_add:
6205 dprintf("Error: imsm: Cannot find disk.\n");
6206 return ret_val;
6207}
6208
6209
2e5dc010
N
6210static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
6211 struct intel_super *super,
6212 void ***space_list)
6213{
6214 struct dl *new_disk;
6215 struct intel_dev *id;
6216 int i;
6217 int delta_disks = u->new_raid_disks - u->old_raid_disks;
ee4beede 6218 int disk_count = u->old_raid_disks;
2e5dc010
N
6219 void **tofree = NULL;
6220 int devices_to_reshape = 1;
6221 struct imsm_super *mpb = super->anchor;
6222 int ret_val = 0;
d098291a 6223 unsigned int dev_id;
2e5dc010 6224
ed7333bd 6225 dprintf("imsm: apply_reshape_container_disks_update()\n");
2e5dc010
N
6226
6227 /* enable spares to use in array */
6228 for (i = 0; i < delta_disks; i++) {
6229 new_disk = get_disk_super(super,
6230 major(u->new_disks[i]),
6231 minor(u->new_disks[i]));
ed7333bd
AK
6232 dprintf("imsm: new disk for reshape is: %i:%i "
6233 "(%p, index = %i)\n",
2e5dc010
N
6234 major(u->new_disks[i]), minor(u->new_disks[i]),
6235 new_disk, new_disk->index);
6236 if ((new_disk == NULL) ||
6237 ((new_disk->index >= 0) &&
6238 (new_disk->index < u->old_raid_disks)))
6239 goto update_reshape_exit;
ee4beede 6240 new_disk->index = disk_count++;
2e5dc010
N
6241 /* slot to fill in autolayout
6242 */
6243 new_disk->raiddisk = new_disk->index;
6244 new_disk->disk.status |=
6245 CONFIGURED_DISK;
6246 new_disk->disk.status &= ~SPARE_DISK;
6247 }
6248
ed7333bd
AK
6249 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6250 mpb->num_raid_devs);
2e5dc010
N
6251 /* manage changes in volume
6252 */
d098291a 6253 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
2e5dc010
N
6254 void **sp = *space_list;
6255 struct imsm_dev *newdev;
6256 struct imsm_map *newmap, *oldmap;
6257
d098291a
AK
6258 for (id = super->devlist ; id; id = id->next) {
6259 if (id->index == dev_id)
6260 break;
6261 }
6262 if (id == NULL)
6263 break;
2e5dc010
N
6264 if (!sp)
6265 continue;
6266 *space_list = *sp;
6267 newdev = (void*)sp;
6268 /* Copy the dev, but not (all of) the map */
6269 memcpy(newdev, id->dev, sizeof(*newdev));
6270 oldmap = get_imsm_map(id->dev, 0);
6271 newmap = get_imsm_map(newdev, 0);
6272 /* Copy the current map */
6273 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6274 /* update one device only
6275 */
6276 if (devices_to_reshape) {
ed7333bd
AK
6277 dprintf("imsm: modifying subdev: %i\n",
6278 id->index);
2e5dc010
N
6279 devices_to_reshape--;
6280 newdev->vol.migr_state = 1;
6281 newdev->vol.curr_migr_unit = 0;
6282 newdev->vol.migr_type = MIGR_GEN_MIGR;
6283 newmap->num_members = u->new_raid_disks;
6284 for (i = 0; i < delta_disks; i++) {
6285 set_imsm_ord_tbl_ent(newmap,
6286 u->old_raid_disks + i,
6287 u->old_raid_disks + i);
6288 }
6289 /* New map is correct, now need to save old map
6290 */
6291 newmap = get_imsm_map(newdev, 1);
6292 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6293
70bdf0dc 6294 imsm_set_array_size(newdev);
2e5dc010
N
6295 }
6296
6297 sp = (void **)id->dev;
6298 id->dev = newdev;
6299 *sp = tofree;
6300 tofree = sp;
6301 }
819bc634
AK
6302 if (tofree)
6303 *space_list = tofree;
2e5dc010
N
6304 ret_val = 1;
6305
6306update_reshape_exit:
6307
6308 return ret_val;
6309}
6310
bb025c2f 6311static int apply_takeover_update(struct imsm_update_takeover *u,
8ca6df95
KW
6312 struct intel_super *super,
6313 void ***space_list)
bb025c2f
KW
6314{
6315 struct imsm_dev *dev = NULL;
8ca6df95
KW
6316 struct intel_dev *dv;
6317 struct imsm_dev *dev_new;
bb025c2f
KW
6318 struct imsm_map *map;
6319 struct dl *dm, *du;
8ca6df95 6320 int i;
bb025c2f
KW
6321
6322 for (dv = super->devlist; dv; dv = dv->next)
6323 if (dv->index == (unsigned int)u->subarray) {
6324 dev = dv->dev;
6325 break;
6326 }
6327
6328 if (dev == NULL)
6329 return 0;
6330
6331 map = get_imsm_map(dev, 0);
6332
6333 if (u->direction == R10_TO_R0) {
43d5ec18
KW
6334 /* Number of failed disks must be half of initial disk number */
6335 if (imsm_count_failed(super, dev) != (map->num_members / 2))
6336 return 0;
6337
bb025c2f
KW
6338 /* iterate through devices to mark removed disks as spare */
6339 for (dm = super->disks; dm; dm = dm->next) {
6340 if (dm->disk.status & FAILED_DISK) {
6341 int idx = dm->index;
6342 /* update indexes on the disk list */
6343/* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6344 the index values will end up being correct.... NB */
6345 for (du = super->disks; du; du = du->next)
6346 if (du->index > idx)
6347 du->index--;
6348 /* mark as spare disk */
6349 dm->disk.status = SPARE_DISK;
6350 dm->index = -1;
6351 }
6352 }
bb025c2f
KW
6353 /* update map */
6354 map->num_members = map->num_members / 2;
6355 map->map_state = IMSM_T_STATE_NORMAL;
6356 map->num_domains = 1;
6357 map->raid_level = 0;
6358 map->failed_disk_num = -1;
6359 }
6360
8ca6df95
KW
6361 if (u->direction == R0_TO_R10) {
6362 void **space;
6363 /* update slots in current disk list */
6364 for (dm = super->disks; dm; dm = dm->next) {
6365 if (dm->index >= 0)
6366 dm->index *= 2;
6367 }
6368 /* create new *missing* disks */
6369 for (i = 0; i < map->num_members; i++) {
6370 space = *space_list;
6371 if (!space)
6372 continue;
6373 *space_list = *space;
6374 du = (void *)space;
6375 memcpy(du, super->disks, sizeof(*du));
8ca6df95
KW
6376 du->fd = -1;
6377 du->minor = 0;
6378 du->major = 0;
6379 du->index = (i * 2) + 1;
6380 sprintf((char *)du->disk.serial,
6381 " MISSING_%d", du->index);
6382 sprintf((char *)du->serial,
6383 "MISSING_%d", du->index);
6384 du->next = super->missing;
6385 super->missing = du;
6386 }
6387 /* create new dev and map */
6388 space = *space_list;
6389 if (!space)
6390 return 0;
6391 *space_list = *space;
6392 dev_new = (void *)space;
6393 memcpy(dev_new, dev, sizeof(*dev));
6394 /* update new map */
6395 map = get_imsm_map(dev_new, 0);
8ca6df95 6396 map->num_members = map->num_members * 2;
1a2487c2 6397 map->map_state = IMSM_T_STATE_DEGRADED;
8ca6df95
KW
6398 map->num_domains = 2;
6399 map->raid_level = 1;
6400 /* replace dev<->dev_new */
6401 dv->dev = dev_new;
6402 }
bb025c2f
KW
6403 /* update disk order table */
6404 for (du = super->disks; du; du = du->next)
6405 if (du->index >= 0)
6406 set_imsm_ord_tbl_ent(map, du->index, du->index);
8ca6df95 6407 for (du = super->missing; du; du = du->next)
1a2487c2
KW
6408 if (du->index >= 0) {
6409 set_imsm_ord_tbl_ent(map, du->index, du->index);
6410 mark_missing(dev_new, &du->disk, du->index);
6411 }
bb025c2f
KW
6412
6413 return 1;
6414}
6415
e8319a19
DW
6416static void imsm_process_update(struct supertype *st,
6417 struct metadata_update *update)
6418{
6419 /**
6420 * crack open the metadata_update envelope to find the update record
6421 * update can be one of:
d195167d
AK
6422 * update_reshape_container_disks - all the arrays in the container
6423 * are being reshaped to have more devices. We need to mark
6424 * the arrays for general migration and convert selected spares
6425 * into active devices.
6426 * update_activate_spare - a spare device has replaced a failed
e8319a19
DW
6427 * device in an array, update the disk_ord_tbl. If this disk is
6428 * present in all member arrays then also clear the SPARE_DISK
6429 * flag
d195167d
AK
6430 * update_create_array
6431 * update_kill_array
6432 * update_rename_array
6433 * update_add_remove_disk
e8319a19
DW
6434 */
6435 struct intel_super *super = st->sb;
4d7b1503 6436 struct imsm_super *mpb;
e8319a19
DW
6437 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6438
4d7b1503
DW
6439 /* update requires a larger buf but the allocation failed */
6440 if (super->next_len && !super->next_buf) {
6441 super->next_len = 0;
6442 return;
6443 }
6444
6445 if (super->next_buf) {
6446 memcpy(super->next_buf, super->buf, super->len);
6447 free(super->buf);
6448 super->len = super->next_len;
6449 super->buf = super->next_buf;
6450
6451 super->next_len = 0;
6452 super->next_buf = NULL;
6453 }
6454
6455 mpb = super->anchor;
6456
e8319a19 6457 switch (type) {
bb025c2f
KW
6458 case update_takeover: {
6459 struct imsm_update_takeover *u = (void *)update->buf;
1a2487c2
KW
6460 if (apply_takeover_update(u, super, &update->space_list)) {
6461 imsm_update_version_info(super);
bb025c2f 6462 super->updates_pending++;
1a2487c2 6463 }
bb025c2f
KW
6464 break;
6465 }
6466
78b10e66 6467 case update_reshape_container_disks: {
d195167d 6468 struct imsm_update_reshape *u = (void *)update->buf;
2e5dc010
N
6469 if (apply_reshape_container_disks_update(
6470 u, super, &update->space_list))
6471 super->updates_pending++;
78b10e66
N
6472 break;
6473 }
48c5303a 6474 case update_reshape_migration: {
a29911da
PC
6475 struct imsm_update_reshape_migration *u = (void *)update->buf;
6476 if (apply_reshape_migration_update(
6477 u, super, &update->space_list))
6478 super->updates_pending++;
48c5303a
PC
6479 break;
6480 }
e8319a19
DW
6481 case update_activate_spare: {
6482 struct imsm_update_activate_spare *u = (void *) update->buf;
949c47a0 6483 struct imsm_dev *dev = get_imsm_dev(super, u->array);
a965f303 6484 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd 6485 struct imsm_map *migr_map;
e8319a19
DW
6486 struct active_array *a;
6487 struct imsm_disk *disk;
0c046afd 6488 __u8 to_state;
e8319a19 6489 struct dl *dl;
e8319a19 6490 unsigned int found;
0c046afd 6491 int failed;
98130f40 6492 int victim = get_imsm_disk_idx(dev, u->slot, -1);
e8319a19
DW
6493 int i;
6494
6495 for (dl = super->disks; dl; dl = dl->next)
d23fe947 6496 if (dl == u->dl)
e8319a19
DW
6497 break;
6498
6499 if (!dl) {
6500 fprintf(stderr, "error: imsm_activate_spare passed "
1f24f035
DW
6501 "an unknown disk (index: %d)\n",
6502 u->dl->index);
e8319a19
DW
6503 return;
6504 }
6505
6506 super->updates_pending++;
0c046afd
DW
6507 /* count failures (excluding rebuilds and the victim)
6508 * to determine map[0] state
6509 */
6510 failed = 0;
6511 for (i = 0; i < map->num_members; i++) {
6512 if (i == u->slot)
6513 continue;
98130f40
AK
6514 disk = get_imsm_disk(super,
6515 get_imsm_disk_idx(dev, i, -1));
25ed7e59 6516 if (!disk || is_failed(disk))
0c046afd
DW
6517 failed++;
6518 }
6519
d23fe947
DW
6520 /* adding a pristine spare, assign a new index */
6521 if (dl->index < 0) {
6522 dl->index = super->anchor->num_disks;
6523 super->anchor->num_disks++;
6524 }
d23fe947 6525 disk = &dl->disk;
f2f27e63
DW
6526 disk->status |= CONFIGURED_DISK;
6527 disk->status &= ~SPARE_DISK;
e8319a19 6528
0c046afd
DW
6529 /* mark rebuild */
6530 to_state = imsm_check_degraded(super, dev, failed);
6531 map->map_state = IMSM_T_STATE_DEGRADED;
e3bba0e0 6532 migrate(dev, to_state, MIGR_REBUILD);
0c046afd
DW
6533 migr_map = get_imsm_map(dev, 1);
6534 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6535 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6536
148acb7b
DW
6537 /* update the family_num to mark a new container
6538 * generation, being careful to record the existing
6539 * family_num in orig_family_num to clean up after
6540 * earlier mdadm versions that neglected to set it.
6541 */
6542 if (mpb->orig_family_num == 0)
6543 mpb->orig_family_num = mpb->family_num;
6544 mpb->family_num += super->random;
6545
e8319a19
DW
6546 /* count arrays using the victim in the metadata */
6547 found = 0;
6548 for (a = st->arrays; a ; a = a->next) {
949c47a0 6549 dev = get_imsm_dev(super, a->info.container_member);
620b1713
DW
6550 map = get_imsm_map(dev, 0);
6551
6552 if (get_imsm_disk_slot(map, victim) >= 0)
6553 found++;
e8319a19
DW
6554 }
6555
24565c9a 6556 /* delete the victim if it is no longer being
e8319a19
DW
6557 * utilized anywhere
6558 */
e8319a19 6559 if (!found) {
ae6aad82 6560 struct dl **dlp;
24565c9a 6561
47ee5a45
DW
6562 /* We know that 'manager' isn't touching anything,
6563 * so it is safe to delete
6564 */
24565c9a 6565 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
ae6aad82
DW
6566 if ((*dlp)->index == victim)
6567 break;
47ee5a45
DW
6568
6569 /* victim may be on the missing list */
6570 if (!*dlp)
6571 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
6572 if ((*dlp)->index == victim)
6573 break;
24565c9a 6574 imsm_delete(super, dlp, victim);
e8319a19 6575 }
8273f55e
DW
6576 break;
6577 }
6578 case update_create_array: {
6579 /* someone wants to create a new array, we need to be aware of
6580 * a few races/collisions:
6581 * 1/ 'Create' called by two separate instances of mdadm
6582 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6583 * devices that have since been assimilated via
6584 * activate_spare.
6585 * In the event this update can not be carried out mdadm will
6586 * (FIX ME) notice that its update did not take hold.
6587 */
6588 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 6589 struct intel_dev *dv;
8273f55e
DW
6590 struct imsm_dev *dev;
6591 struct imsm_map *map, *new_map;
6592 unsigned long long start, end;
6593 unsigned long long new_start, new_end;
6594 int i;
54c2c1ea
DW
6595 struct disk_info *inf;
6596 struct dl *dl;
8273f55e
DW
6597
6598 /* handle racing creates: first come first serve */
6599 if (u->dev_idx < mpb->num_raid_devs) {
6600 dprintf("%s: subarray %d already defined\n",
6601 __func__, u->dev_idx);
ba2de7ba 6602 goto create_error;
8273f55e
DW
6603 }
6604
6605 /* check update is next in sequence */
6606 if (u->dev_idx != mpb->num_raid_devs) {
6a3e913e
DW
6607 dprintf("%s: can not create array %d expected index %d\n",
6608 __func__, u->dev_idx, mpb->num_raid_devs);
ba2de7ba 6609 goto create_error;
8273f55e
DW
6610 }
6611
a965f303 6612 new_map = get_imsm_map(&u->dev, 0);
8273f55e
DW
6613 new_start = __le32_to_cpu(new_map->pba_of_lba0);
6614 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
54c2c1ea 6615 inf = get_disk_info(u);
8273f55e
DW
6616
6617 /* handle activate_spare versus create race:
6618 * check to make sure that overlapping arrays do not include
6619 * overalpping disks
6620 */
6621 for (i = 0; i < mpb->num_raid_devs; i++) {
949c47a0 6622 dev = get_imsm_dev(super, i);
a965f303 6623 map = get_imsm_map(dev, 0);
8273f55e
DW
6624 start = __le32_to_cpu(map->pba_of_lba0);
6625 end = start + __le32_to_cpu(map->blocks_per_member);
6626 if ((new_start >= start && new_start <= end) ||
6627 (start >= new_start && start <= new_end))
54c2c1ea
DW
6628 /* overlap */;
6629 else
6630 continue;
6631
6632 if (disks_overlap(super, i, u)) {
8273f55e 6633 dprintf("%s: arrays overlap\n", __func__);
ba2de7ba 6634 goto create_error;
8273f55e
DW
6635 }
6636 }
8273f55e 6637
949c47a0
DW
6638 /* check that prepare update was successful */
6639 if (!update->space) {
6640 dprintf("%s: prepare update failed\n", __func__);
ba2de7ba 6641 goto create_error;
949c47a0
DW
6642 }
6643
54c2c1ea
DW
6644 /* check that all disks are still active before committing
6645 * changes. FIXME: could we instead handle this by creating a
6646 * degraded array? That's probably not what the user expects,
6647 * so better to drop this update on the floor.
6648 */
6649 for (i = 0; i < new_map->num_members; i++) {
6650 dl = serial_to_dl(inf[i].serial, super);
6651 if (!dl) {
6652 dprintf("%s: disk disappeared\n", __func__);
ba2de7ba 6653 goto create_error;
54c2c1ea 6654 }
949c47a0
DW
6655 }
6656
8273f55e 6657 super->updates_pending++;
54c2c1ea
DW
6658
6659 /* convert spares to members and fixup ord_tbl */
6660 for (i = 0; i < new_map->num_members; i++) {
6661 dl = serial_to_dl(inf[i].serial, super);
6662 if (dl->index == -1) {
6663 dl->index = mpb->num_disks;
6664 mpb->num_disks++;
6665 dl->disk.status |= CONFIGURED_DISK;
6666 dl->disk.status &= ~SPARE_DISK;
6667 }
6668 set_imsm_ord_tbl_ent(new_map, i, dl->index);
6669 }
6670
ba2de7ba
DW
6671 dv = update->space;
6672 dev = dv->dev;
949c47a0
DW
6673 update->space = NULL;
6674 imsm_copy_dev(dev, &u->dev);
ba2de7ba
DW
6675 dv->index = u->dev_idx;
6676 dv->next = super->devlist;
6677 super->devlist = dv;
8273f55e 6678 mpb->num_raid_devs++;
8273f55e 6679
4d1313e9 6680 imsm_update_version_info(super);
8273f55e 6681 break;
ba2de7ba
DW
6682 create_error:
6683 /* mdmon knows how to release update->space, but not
6684 * ((struct intel_dev *) update->space)->dev
6685 */
6686 if (update->space) {
6687 dv = update->space;
6688 free(dv->dev);
6689 }
8273f55e 6690 break;
e8319a19 6691 }
33414a01
DW
6692 case update_kill_array: {
6693 struct imsm_update_kill_array *u = (void *) update->buf;
6694 int victim = u->dev_idx;
6695 struct active_array *a;
6696 struct intel_dev **dp;
6697 struct imsm_dev *dev;
6698
6699 /* sanity check that we are not affecting the uuid of
6700 * active arrays, or deleting an active array
6701 *
6702 * FIXME when immutable ids are available, but note that
6703 * we'll also need to fixup the invalidated/active
6704 * subarray indexes in mdstat
6705 */
6706 for (a = st->arrays; a; a = a->next)
6707 if (a->info.container_member >= victim)
6708 break;
6709 /* by definition if mdmon is running at least one array
6710 * is active in the container, so checking
6711 * mpb->num_raid_devs is just extra paranoia
6712 */
6713 dev = get_imsm_dev(super, victim);
6714 if (a || !dev || mpb->num_raid_devs == 1) {
6715 dprintf("failed to delete subarray-%d\n", victim);
6716 break;
6717 }
6718
6719 for (dp = &super->devlist; *dp;)
f21e18ca 6720 if ((*dp)->index == (unsigned)super->current_vol) {
33414a01
DW
6721 *dp = (*dp)->next;
6722 } else {
f21e18ca 6723 if ((*dp)->index > (unsigned)victim)
33414a01
DW
6724 (*dp)->index--;
6725 dp = &(*dp)->next;
6726 }
6727 mpb->num_raid_devs--;
6728 super->updates_pending++;
6729 break;
6730 }
aa534678
DW
6731 case update_rename_array: {
6732 struct imsm_update_rename_array *u = (void *) update->buf;
6733 char name[MAX_RAID_SERIAL_LEN+1];
6734 int target = u->dev_idx;
6735 struct active_array *a;
6736 struct imsm_dev *dev;
6737
6738 /* sanity check that we are not affecting the uuid of
6739 * an active array
6740 */
6741 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
6742 name[MAX_RAID_SERIAL_LEN] = '\0';
6743 for (a = st->arrays; a; a = a->next)
6744 if (a->info.container_member == target)
6745 break;
6746 dev = get_imsm_dev(super, u->dev_idx);
6747 if (a || !dev || !check_name(super, name, 1)) {
6748 dprintf("failed to rename subarray-%d\n", target);
6749 break;
6750 }
6751
cdbe98cd 6752 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
aa534678
DW
6753 super->updates_pending++;
6754 break;
6755 }
1a64be56 6756 case update_add_remove_disk: {
43dad3d6 6757 /* we may be able to repair some arrays if disks are
1a64be56
LM
6758 * being added, check teh status of add_remove_disk
6759 * if discs has been added.
6760 */
6761 if (add_remove_disk_update(super)) {
43dad3d6 6762 struct active_array *a;
072b727f
DW
6763
6764 super->updates_pending++;
1a64be56 6765 for (a = st->arrays; a; a = a->next)
43dad3d6
DW
6766 a->check_degraded = 1;
6767 }
43dad3d6 6768 break;
e8319a19 6769 }
1a64be56
LM
6770 default:
6771 fprintf(stderr, "error: unsuported process update type:"
6772 "(type: %d)\n", type);
6773 }
e8319a19 6774}
88758e9d 6775
bc0b9d34
PC
6776static struct mdinfo *get_spares_for_grow(struct supertype *st);
6777
8273f55e
DW
6778static void imsm_prepare_update(struct supertype *st,
6779 struct metadata_update *update)
6780{
949c47a0 6781 /**
4d7b1503
DW
6782 * Allocate space to hold new disk entries, raid-device entries or a new
6783 * mpb if necessary. The manager synchronously waits for updates to
6784 * complete in the monitor, so new mpb buffers allocated here can be
6785 * integrated by the monitor thread without worrying about live pointers
6786 * in the manager thread.
8273f55e 6787 */
949c47a0 6788 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
4d7b1503
DW
6789 struct intel_super *super = st->sb;
6790 struct imsm_super *mpb = super->anchor;
6791 size_t buf_len;
6792 size_t len = 0;
949c47a0
DW
6793
6794 switch (type) {
abedf5fc
KW
6795 case update_takeover: {
6796 struct imsm_update_takeover *u = (void *)update->buf;
6797 if (u->direction == R0_TO_R10) {
6798 void **tail = (void **)&update->space_list;
6799 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
6800 struct imsm_map *map = get_imsm_map(dev, 0);
6801 int num_members = map->num_members;
6802 void *space;
6803 int size, i;
6804 int err = 0;
6805 /* allocate memory for added disks */
6806 for (i = 0; i < num_members; i++) {
6807 size = sizeof(struct dl);
6808 space = malloc(size);
6809 if (!space) {
6810 err++;
6811 break;
6812 }
6813 *tail = space;
6814 tail = space;
6815 *tail = NULL;
6816 }
6817 /* allocate memory for new device */
6818 size = sizeof_imsm_dev(super->devlist->dev, 0) +
6819 (num_members * sizeof(__u32));
6820 space = malloc(size);
6821 if (!space)
6822 err++;
6823 else {
6824 *tail = space;
6825 tail = space;
6826 *tail = NULL;
6827 }
6828 if (!err) {
6829 len = disks_to_mpb_size(num_members * 2);
6830 } else {
6831 /* if allocation didn't success, free buffer */
6832 while (update->space_list) {
6833 void **sp = update->space_list;
6834 update->space_list = *sp;
6835 free(sp);
6836 }
6837 }
6838 }
6839
6840 break;
6841 }
78b10e66 6842 case update_reshape_container_disks: {
d195167d
AK
6843 /* Every raid device in the container is about to
6844 * gain some more devices, and we will enter a
6845 * reconfiguration.
6846 * So each 'imsm_map' will be bigger, and the imsm_vol
6847 * will now hold 2 of them.
6848 * Thus we need new 'struct imsm_dev' allocations sized
6849 * as sizeof_imsm_dev but with more devices in both maps.
6850 */
6851 struct imsm_update_reshape *u = (void *)update->buf;
6852 struct intel_dev *dl;
6853 void **space_tail = (void**)&update->space_list;
6854
6855 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
6856
6857 for (dl = super->devlist; dl; dl = dl->next) {
6858 int size = sizeof_imsm_dev(dl->dev, 1);
6859 void *s;
d677e0b8
AK
6860 if (u->new_raid_disks > u->old_raid_disks)
6861 size += sizeof(__u32)*2*
6862 (u->new_raid_disks - u->old_raid_disks);
d195167d
AK
6863 s = malloc(size);
6864 if (!s)
6865 break;
6866 *space_tail = s;
6867 space_tail = s;
6868 *space_tail = NULL;
6869 }
6870
6871 len = disks_to_mpb_size(u->new_raid_disks);
6872 dprintf("New anchor length is %llu\n", (unsigned long long)len);
78b10e66
N
6873 break;
6874 }
48c5303a 6875 case update_reshape_migration: {
bc0b9d34
PC
6876 /* for migration level 0->5 we need to add disks
6877 * so the same as for container operation we will copy
6878 * device to the bigger location.
6879 * in memory prepared device and new disk area are prepared
6880 * for usage in process update
6881 */
6882 struct imsm_update_reshape_migration *u = (void *)update->buf;
6883 struct intel_dev *id;
6884 void **space_tail = (void **)&update->space_list;
6885 int size;
6886 void *s;
6887 int current_level = -1;
6888
6889 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
6890
6891 /* add space for bigger array in update
6892 */
6893 for (id = super->devlist; id; id = id->next) {
6894 if (id->index == (unsigned)u->subdev) {
6895 size = sizeof_imsm_dev(id->dev, 1);
6896 if (u->new_raid_disks > u->old_raid_disks)
6897 size += sizeof(__u32)*2*
6898 (u->new_raid_disks - u->old_raid_disks);
6899 s = malloc(size);
6900 if (!s)
6901 break;
6902 *space_tail = s;
6903 space_tail = s;
6904 *space_tail = NULL;
6905 break;
6906 }
6907 }
6908 if (update->space_list == NULL)
6909 break;
6910
6911 /* add space for disk in update
6912 */
6913 size = sizeof(struct dl);
6914 s = malloc(size);
6915 if (!s) {
6916 free(update->space_list);
6917 update->space_list = NULL;
6918 break;
6919 }
6920 *space_tail = s;
6921 space_tail = s;
6922 *space_tail = NULL;
6923
6924 /* add spare device to update
6925 */
6926 for (id = super->devlist ; id; id = id->next)
6927 if (id->index == (unsigned)u->subdev) {
6928 struct imsm_dev *dev;
6929 struct imsm_map *map;
6930
6931 dev = get_imsm_dev(super, u->subdev);
6932 map = get_imsm_map(dev, 0);
6933 current_level = map->raid_level;
6934 break;
6935 }
6936 if ((u->new_level == 5) && (u->new_level != current_level)) {
6937 struct mdinfo *spares;
6938
6939 spares = get_spares_for_grow(st);
6940 if (spares) {
6941 struct dl *dl;
6942 struct mdinfo *dev;
6943
6944 dev = spares->devs;
6945 if (dev) {
6946 u->new_disks[0] =
6947 makedev(dev->disk.major,
6948 dev->disk.minor);
6949 dl = get_disk_super(super,
6950 dev->disk.major,
6951 dev->disk.minor);
6952 dl->index = u->old_raid_disks;
6953 dev = dev->next;
6954 }
6955 sysfs_free(spares);
6956 }
6957 }
6958 len = disks_to_mpb_size(u->new_raid_disks);
6959 dprintf("New anchor length is %llu\n", (unsigned long long)len);
48c5303a
PC
6960 break;
6961 }
949c47a0
DW
6962 case update_create_array: {
6963 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 6964 struct intel_dev *dv;
54c2c1ea
DW
6965 struct imsm_dev *dev = &u->dev;
6966 struct imsm_map *map = get_imsm_map(dev, 0);
6967 struct dl *dl;
6968 struct disk_info *inf;
6969 int i;
6970 int activate = 0;
949c47a0 6971
54c2c1ea
DW
6972 inf = get_disk_info(u);
6973 len = sizeof_imsm_dev(dev, 1);
ba2de7ba
DW
6974 /* allocate a new super->devlist entry */
6975 dv = malloc(sizeof(*dv));
6976 if (dv) {
6977 dv->dev = malloc(len);
6978 if (dv->dev)
6979 update->space = dv;
6980 else {
6981 free(dv);
6982 update->space = NULL;
6983 }
6984 }
949c47a0 6985
54c2c1ea
DW
6986 /* count how many spares will be converted to members */
6987 for (i = 0; i < map->num_members; i++) {
6988 dl = serial_to_dl(inf[i].serial, super);
6989 if (!dl) {
6990 /* hmm maybe it failed?, nothing we can do about
6991 * it here
6992 */
6993 continue;
6994 }
6995 if (count_memberships(dl, super) == 0)
6996 activate++;
6997 }
6998 len += activate * sizeof(struct imsm_disk);
949c47a0
DW
6999 break;
7000 default:
7001 break;
7002 }
7003 }
8273f55e 7004
4d7b1503
DW
7005 /* check if we need a larger metadata buffer */
7006 if (super->next_buf)
7007 buf_len = super->next_len;
7008 else
7009 buf_len = super->len;
7010
7011 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
7012 /* ok we need a larger buf than what is currently allocated
7013 * if this allocation fails process_update will notice that
7014 * ->next_len is set and ->next_buf is NULL
7015 */
7016 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
7017 if (super->next_buf)
7018 free(super->next_buf);
7019
7020 super->next_len = buf_len;
1f45a8ad
DW
7021 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
7022 memset(super->next_buf, 0, buf_len);
7023 else
4d7b1503
DW
7024 super->next_buf = NULL;
7025 }
8273f55e
DW
7026}
7027
ae6aad82 7028/* must be called while manager is quiesced */
f21e18ca 7029static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
ae6aad82
DW
7030{
7031 struct imsm_super *mpb = super->anchor;
ae6aad82
DW
7032 struct dl *iter;
7033 struct imsm_dev *dev;
7034 struct imsm_map *map;
24565c9a
DW
7035 int i, j, num_members;
7036 __u32 ord;
ae6aad82 7037
24565c9a
DW
7038 dprintf("%s: deleting device[%d] from imsm_super\n",
7039 __func__, index);
ae6aad82
DW
7040
7041 /* shift all indexes down one */
7042 for (iter = super->disks; iter; iter = iter->next)
f21e18ca 7043 if (iter->index > (int)index)
ae6aad82 7044 iter->index--;
47ee5a45 7045 for (iter = super->missing; iter; iter = iter->next)
f21e18ca 7046 if (iter->index > (int)index)
47ee5a45 7047 iter->index--;
ae6aad82
DW
7048
7049 for (i = 0; i < mpb->num_raid_devs; i++) {
7050 dev = get_imsm_dev(super, i);
7051 map = get_imsm_map(dev, 0);
24565c9a
DW
7052 num_members = map->num_members;
7053 for (j = 0; j < num_members; j++) {
7054 /* update ord entries being careful not to propagate
7055 * ord-flags to the first map
7056 */
98130f40 7057 ord = get_imsm_ord_tbl_ent(dev, j, -1);
ae6aad82 7058
24565c9a
DW
7059 if (ord_to_idx(ord) <= index)
7060 continue;
ae6aad82 7061
24565c9a
DW
7062 map = get_imsm_map(dev, 0);
7063 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
7064 map = get_imsm_map(dev, 1);
7065 if (map)
7066 set_imsm_ord_tbl_ent(map, j, ord - 1);
ae6aad82
DW
7067 }
7068 }
7069
7070 mpb->num_disks--;
7071 super->updates_pending++;
24565c9a
DW
7072 if (*dlp) {
7073 struct dl *dl = *dlp;
7074
7075 *dlp = (*dlp)->next;
7076 __free_imsm_disk(dl);
7077 }
ae6aad82
DW
7078}
7079
2cda7640
ML
7080static char disk_by_path[] = "/dev/disk/by-path/";
7081
7082static const char *imsm_get_disk_controller_domain(const char *path)
7083{
2cda7640 7084 char disk_path[PATH_MAX];
96234762
LM
7085 char *drv=NULL;
7086 struct stat st;
2cda7640 7087
96234762
LM
7088 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
7089 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
7090 if (stat(disk_path, &st) == 0) {
7091 struct sys_dev* hba;
7092 char *path=NULL;
7093
7094 path = devt_to_devpath(st.st_rdev);
7095 if (path == NULL)
7096 return "unknown";
7097 hba = find_disk_attached_hba(-1, path);
7098 if (hba && hba->type == SYS_DEV_SAS)
7099 drv = "isci";
7100 else if (hba && hba->type == SYS_DEV_SATA)
7101 drv = "ahci";
7102 else
7103 drv = "unknown";
7104 dprintf("path: %s hba: %s attached: %s\n",
7105 path, (hba) ? hba->path : "NULL", drv);
7106 free(path);
7107 if (hba)
7108 free_sys_dev(&hba);
2cda7640 7109 }
96234762 7110 return drv;
2cda7640
ML
7111}
7112
78b10e66
N
7113static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
7114{
7115 char subdev_name[20];
7116 struct mdstat_ent *mdstat;
7117
7118 sprintf(subdev_name, "%d", subdev);
7119 mdstat = mdstat_by_subdev(subdev_name, container);
7120 if (!mdstat)
7121 return -1;
7122
7123 *minor = mdstat->devnum;
7124 free_mdstat(mdstat);
7125 return 0;
7126}
7127
7128static int imsm_reshape_is_allowed_on_container(struct supertype *st,
7129 struct geo_params *geo,
7130 int *old_raid_disks)
7131{
694575e7
KW
7132 /* currently we only support increasing the number of devices
7133 * for a container. This increases the number of device for each
7134 * member array. They must all be RAID0 or RAID5.
7135 */
78b10e66
N
7136 int ret_val = 0;
7137 struct mdinfo *info, *member;
7138 int devices_that_can_grow = 0;
7139
7140 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7141 "st->devnum = (%i)\n",
7142 st->devnum);
7143
7144 if (geo->size != -1 ||
7145 geo->level != UnSet ||
7146 geo->layout != UnSet ||
7147 geo->chunksize != 0 ||
7148 geo->raid_disks == UnSet) {
7149 dprintf("imsm: Container operation is allowed for "
7150 "raid disks number change only.\n");
7151 return ret_val;
7152 }
7153
7154 info = container_content_imsm(st, NULL);
7155 for (member = info; member; member = member->next) {
7156 int result;
7157 int minor;
7158
7159 dprintf("imsm: checking device_num: %i\n",
7160 member->container_member);
7161
d7d205bd 7162 if (geo->raid_disks <= member->array.raid_disks) {
78b10e66
N
7163 /* we work on container for Online Capacity Expansion
7164 * only so raid_disks has to grow
7165 */
7166 dprintf("imsm: for container operation raid disks "
7167 "increase is required\n");
7168 break;
7169 }
7170
7171 if ((info->array.level != 0) &&
7172 (info->array.level != 5)) {
7173 /* we cannot use this container with other raid level
7174 */
690aae1a 7175 dprintf("imsm: for container operation wrong"
78b10e66
N
7176 " raid level (%i) detected\n",
7177 info->array.level);
7178 break;
7179 } else {
7180 /* check for platform support
7181 * for this raid level configuration
7182 */
7183 struct intel_super *super = st->sb;
7184 if (!is_raid_level_supported(super->orom,
7185 member->array.level,
7186 geo->raid_disks)) {
690aae1a 7187 dprintf("platform does not support raid%d with"
78b10e66
N
7188 " %d disk%s\n",
7189 info->array.level,
7190 geo->raid_disks,
7191 geo->raid_disks > 1 ? "s" : "");
7192 break;
7193 }
2a4a08e7
AK
7194 /* check if component size is aligned to chunk size
7195 */
7196 if (info->component_size %
7197 (info->array.chunk_size/512)) {
7198 dprintf("Component size is not aligned to "
7199 "chunk size\n");
7200 break;
7201 }
78b10e66
N
7202 }
7203
7204 if (*old_raid_disks &&
7205 info->array.raid_disks != *old_raid_disks)
7206 break;
7207 *old_raid_disks = info->array.raid_disks;
7208
7209 /* All raid5 and raid0 volumes in container
7210 * have to be ready for Online Capacity Expansion
7211 * so they need to be assembled. We have already
7212 * checked that no recovery etc is happening.
7213 */
7214 result = imsm_find_array_minor_by_subdev(member->container_member,
7215 st->container_dev,
7216 &minor);
7217 if (result < 0) {
7218 dprintf("imsm: cannot find array\n");
7219 break;
7220 }
7221 devices_that_can_grow++;
7222 }
7223 sysfs_free(info);
7224 if (!member && devices_that_can_grow)
7225 ret_val = 1;
7226
7227 if (ret_val)
7228 dprintf("\tContainer operation allowed\n");
7229 else
7230 dprintf("\tError: %i\n", ret_val);
7231
7232 return ret_val;
7233}
7234
7235/* Function: get_spares_for_grow
7236 * Description: Allocates memory and creates list of spare devices
7237 * avaliable in container. Checks if spare drive size is acceptable.
7238 * Parameters: Pointer to the supertype structure
7239 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7240 * NULL if fail
7241 */
7242static struct mdinfo *get_spares_for_grow(struct supertype *st)
7243{
78b10e66 7244 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
326727d9 7245 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
78b10e66
N
7246}
7247
7248/******************************************************************************
7249 * function: imsm_create_metadata_update_for_reshape
7250 * Function creates update for whole IMSM container.
7251 *
7252 ******************************************************************************/
7253static int imsm_create_metadata_update_for_reshape(
7254 struct supertype *st,
7255 struct geo_params *geo,
7256 int old_raid_disks,
7257 struct imsm_update_reshape **updatep)
7258{
7259 struct intel_super *super = st->sb;
7260 struct imsm_super *mpb = super->anchor;
7261 int update_memory_size = 0;
7262 struct imsm_update_reshape *u = NULL;
7263 struct mdinfo *spares = NULL;
7264 int i;
7265 int delta_disks = 0;
bbd24d86 7266 struct mdinfo *dev;
78b10e66
N
7267
7268 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7269 geo->raid_disks);
7270
7271 delta_disks = geo->raid_disks - old_raid_disks;
7272
7273 /* size of all update data without anchor */
7274 update_memory_size = sizeof(struct imsm_update_reshape);
7275
7276 /* now add space for spare disks that we need to add. */
7277 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
7278
7279 u = calloc(1, update_memory_size);
7280 if (u == NULL) {
7281 dprintf("error: "
7282 "cannot get memory for imsm_update_reshape update\n");
7283 return 0;
7284 }
7285 u->type = update_reshape_container_disks;
7286 u->old_raid_disks = old_raid_disks;
7287 u->new_raid_disks = geo->raid_disks;
7288
7289 /* now get spare disks list
7290 */
7291 spares = get_spares_for_grow(st);
7292
7293 if (spares == NULL
7294 || delta_disks > spares->array.spare_disks) {
e14e5960
KW
7295 fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices "
7296 "for %s.\n", geo->dev_name);
78b10e66
N
7297 goto abort;
7298 }
7299
7300 /* we have got spares
7301 * update disk list in imsm_disk list table in anchor
7302 */
7303 dprintf("imsm: %i spares are available.\n\n",
7304 spares->array.spare_disks);
7305
bbd24d86 7306 dev = spares->devs;
78b10e66 7307 for (i = 0; i < delta_disks; i++) {
78b10e66
N
7308 struct dl *dl;
7309
bbd24d86
AK
7310 if (dev == NULL)
7311 break;
78b10e66
N
7312 u->new_disks[i] = makedev(dev->disk.major,
7313 dev->disk.minor);
7314 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
ee4beede
AK
7315 dl->index = mpb->num_disks;
7316 mpb->num_disks++;
bbd24d86 7317 dev = dev->next;
78b10e66 7318 }
78b10e66
N
7319
7320abort:
7321 /* free spares
7322 */
7323 sysfs_free(spares);
7324
d677e0b8 7325 dprintf("imsm: reshape update preparation :");
78b10e66 7326 if (i == delta_disks) {
d677e0b8 7327 dprintf(" OK\n");
78b10e66
N
7328 *updatep = u;
7329 return update_memory_size;
7330 }
7331 free(u);
d677e0b8 7332 dprintf(" Error\n");
78b10e66
N
7333
7334 return 0;
7335}
7336
48c5303a
PC
7337/******************************************************************************
7338 * function: imsm_create_metadata_update_for_migration()
7339 * Creates update for IMSM array.
7340 *
7341 ******************************************************************************/
7342static int imsm_create_metadata_update_for_migration(
7343 struct supertype *st,
7344 struct geo_params *geo,
7345 struct imsm_update_reshape_migration **updatep)
7346{
7347 struct intel_super *super = st->sb;
7348 int update_memory_size = 0;
7349 struct imsm_update_reshape_migration *u = NULL;
7350 struct imsm_dev *dev;
7351 int previous_level = -1;
7352
7353 dprintf("imsm_create_metadata_update_for_migration(enter)"
7354 " New Level = %i\n", geo->level);
7355
7356 /* size of all update data without anchor */
7357 update_memory_size = sizeof(struct imsm_update_reshape_migration);
7358
7359 u = calloc(1, update_memory_size);
7360 if (u == NULL) {
7361 dprintf("error: cannot get memory for "
7362 "imsm_create_metadata_update_for_migration\n");
7363 return 0;
7364 }
7365 u->type = update_reshape_migration;
7366 u->subdev = super->current_vol;
7367 u->new_level = geo->level;
7368 u->new_layout = geo->layout;
7369 u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
7370 u->new_disks[0] = -1;
7371
7372 dev = get_imsm_dev(super, u->subdev);
7373 if (dev) {
7374 struct imsm_map *map;
7375
7376 map = get_imsm_map(dev, 0);
7377 if (map)
7378 previous_level = map->raid_level;
7379 }
7380 if ((geo->level == 5) && (previous_level == 0)) {
7381 struct mdinfo *spares = NULL;
7382
7383 u->new_raid_disks++;
7384 spares = get_spares_for_grow(st);
7385 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
7386 free(u);
7387 sysfs_free(spares);
7388 update_memory_size = 0;
7389 dprintf("error: cannot get spare device "
7390 "for requested migration");
7391 return 0;
7392 }
7393 sysfs_free(spares);
7394 }
7395 dprintf("imsm: reshape update preparation : OK\n");
7396 *updatep = u;
7397
7398 return update_memory_size;
7399}
7400
8dd70bce
AK
7401static void imsm_update_metadata_locally(struct supertype *st,
7402 void *buf, int len)
7403{
7404 struct metadata_update mu;
7405
7406 mu.buf = buf;
7407 mu.len = len;
7408 mu.space = NULL;
7409 mu.space_list = NULL;
7410 mu.next = NULL;
7411 imsm_prepare_update(st, &mu);
7412 imsm_process_update(st, &mu);
7413
7414 while (mu.space_list) {
7415 void **space = mu.space_list;
7416 mu.space_list = *space;
7417 free(space);
7418 }
7419}
78b10e66 7420
471bceb6 7421/***************************************************************************
694575e7 7422* Function: imsm_analyze_change
471bceb6
KW
7423* Description: Function analyze change for single volume
7424* and validate if transition is supported
694575e7
KW
7425* Parameters: Geometry parameters, supertype structure
7426* Returns: Operation type code on success, -1 if fail
471bceb6
KW
7427****************************************************************************/
7428enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
7429 struct geo_params *geo)
694575e7 7430{
471bceb6
KW
7431 struct mdinfo info;
7432 int change = -1;
7433 int check_devs = 0;
c21e737b 7434 int chunk;
471bceb6
KW
7435
7436 getinfo_super_imsm_volume(st, &info, NULL);
7437
7438 if ((geo->level != info.array.level) &&
7439 (geo->level >= 0) &&
7440 (geo->level != UnSet)) {
7441 switch (info.array.level) {
7442 case 0:
7443 if (geo->level == 5) {
b5347799 7444 change = CH_MIGRATION;
471bceb6
KW
7445 check_devs = 1;
7446 }
7447 if (geo->level == 10) {
7448 change = CH_TAKEOVER;
7449 check_devs = 1;
7450 }
dfe77a9e
KW
7451 break;
7452 case 1:
7453 if (geo->level == 0) {
7454 change = CH_TAKEOVER;
7455 check_devs = 1;
7456 }
471bceb6 7457 break;
471bceb6
KW
7458 case 10:
7459 if (geo->level == 0) {
7460 change = CH_TAKEOVER;
7461 check_devs = 1;
7462 }
7463 break;
7464 }
7465 if (change == -1) {
7466 fprintf(stderr,
7467 Name " Error. Level Migration from %d to %d "
7468 "not supported!\n",
7469 info.array.level, geo->level);
7470 goto analyse_change_exit;
7471 }
7472 } else
7473 geo->level = info.array.level;
7474
7475 if ((geo->layout != info.array.layout)
7476 && ((geo->layout != UnSet) && (geo->layout != -1))) {
b5347799 7477 change = CH_MIGRATION;
471bceb6
KW
7478 if ((info.array.layout == 0)
7479 && (info.array.level == 5)
7480 && (geo->layout == 5)) {
7481 /* reshape 5 -> 4 */
7482 } else if ((info.array.layout == 5)
7483 && (info.array.level == 5)
7484 && (geo->layout == 0)) {
7485 /* reshape 4 -> 5 */
7486 geo->layout = 0;
7487 geo->level = 5;
7488 } else {
7489 fprintf(stderr,
7490 Name " Error. Layout Migration from %d to %d "
7491 "not supported!\n",
7492 info.array.layout, geo->layout);
7493 change = -1;
7494 goto analyse_change_exit;
7495 }
7496 } else
7497 geo->layout = info.array.layout;
7498
7499 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
7500 && (geo->chunksize != info.array.chunk_size))
b5347799 7501 change = CH_MIGRATION;
471bceb6
KW
7502 else
7503 geo->chunksize = info.array.chunk_size;
7504
c21e737b 7505 chunk = geo->chunksize / 1024;
471bceb6
KW
7506 if (!validate_geometry_imsm(st,
7507 geo->level,
7508 geo->layout,
7509 geo->raid_disks,
c21e737b 7510 &chunk,
471bceb6
KW
7511 geo->size,
7512 0, 0, 1))
7513 change = -1;
7514
7515 if (check_devs) {
7516 struct intel_super *super = st->sb;
7517 struct imsm_super *mpb = super->anchor;
7518
7519 if (mpb->num_raid_devs > 1) {
7520 fprintf(stderr,
7521 Name " Error. Cannot perform operation on %s"
7522 "- for this operation it MUST be single "
7523 "array in container\n",
7524 geo->dev_name);
7525 change = -1;
7526 }
7527 }
7528
7529analyse_change_exit:
7530
7531 return change;
694575e7
KW
7532}
7533
bb025c2f
KW
7534int imsm_takeover(struct supertype *st, struct geo_params *geo)
7535{
7536 struct intel_super *super = st->sb;
7537 struct imsm_update_takeover *u;
7538
7539 u = malloc(sizeof(struct imsm_update_takeover));
7540 if (u == NULL)
7541 return 1;
7542
7543 u->type = update_takeover;
7544 u->subarray = super->current_vol;
7545
7546 /* 10->0 transition */
7547 if (geo->level == 0)
7548 u->direction = R10_TO_R0;
7549
0529c688
KW
7550 /* 0->10 transition */
7551 if (geo->level == 10)
7552 u->direction = R0_TO_R10;
7553
bb025c2f
KW
7554 /* update metadata locally */
7555 imsm_update_metadata_locally(st, u,
7556 sizeof(struct imsm_update_takeover));
7557 /* and possibly remotely */
7558 if (st->update_tail)
7559 append_metadata_update(st, u,
7560 sizeof(struct imsm_update_takeover));
7561 else
7562 free(u);
7563
7564 return 0;
7565}
7566
6dc0be30
AK
7567static int warn_user_about_risk(void)
7568{
7569 int rv = 0;
7570
7571 fprintf(stderr,
7572 "\nThis is an experimental feature. Data on the RAID volume(s) "
7573 "can be lost!!!\n\n"
7574 "To continue command execution please make sure that\n"
7575 "the grow process will not be interrupted. Use safe power\n"
7576 "supply to avoid unexpected system reboot. Make sure that\n"
7577 "reshaped container is not assembled automatically during\n"
7578 "system boot.\n"
7579 "If reshape is interrupted, assemble array manually\n"
7580 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
7581 "Assembly in scan mode is not possible in such case.\n"
7582 "Growing container with boot array is not possible.\n"
7583 "If boot array reshape is interrupted, whole file system\n"
7584 "can be lost.\n\n");
7585 rv = ask("Do you want to continue? ");
7586 fprintf(stderr, "\n");
7587
7588 return rv;
7589}
7590
78b10e66
N
7591static int imsm_reshape_super(struct supertype *st, long long size, int level,
7592 int layout, int chunksize, int raid_disks,
41784c88
AK
7593 int delta_disks, char *backup, char *dev,
7594 int verbose)
78b10e66 7595{
78b10e66
N
7596 int ret_val = 1;
7597 struct geo_params geo;
7598
7599 dprintf("imsm: reshape_super called.\n");
7600
71204a50 7601 memset(&geo, 0, sizeof(struct geo_params));
78b10e66
N
7602
7603 geo.dev_name = dev;
694575e7 7604 geo.dev_id = st->devnum;
78b10e66
N
7605 geo.size = size;
7606 geo.level = level;
7607 geo.layout = layout;
7608 geo.chunksize = chunksize;
7609 geo.raid_disks = raid_disks;
41784c88
AK
7610 if (delta_disks != UnSet)
7611 geo.raid_disks += delta_disks;
78b10e66
N
7612
7613 dprintf("\tfor level : %i\n", geo.level);
7614 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
7615
7616 if (experimental() == 0)
7617 return ret_val;
7618
78b10e66 7619 if (st->container_dev == st->devnum) {
694575e7
KW
7620 /* On container level we can only increase number of devices. */
7621 dprintf("imsm: info: Container operation\n");
78b10e66 7622 int old_raid_disks = 0;
6dc0be30
AK
7623
7624 /* this warning will be removed when imsm checkpointing
7625 * will be implemented, and restoring from check-point
7626 * operation will be transparent for reboot process
7627 */
7628 if (warn_user_about_risk() == 0)
7629 return ret_val;
7630
78b10e66
N
7631 if (imsm_reshape_is_allowed_on_container(
7632 st, &geo, &old_raid_disks)) {
7633 struct imsm_update_reshape *u = NULL;
7634 int len;
7635
7636 len = imsm_create_metadata_update_for_reshape(
7637 st, &geo, old_raid_disks, &u);
7638
ed08d51c
AK
7639 if (len <= 0) {
7640 dprintf("imsm: Cannot prepare update\n");
7641 goto exit_imsm_reshape_super;
7642 }
7643
8dd70bce
AK
7644 ret_val = 0;
7645 /* update metadata locally */
7646 imsm_update_metadata_locally(st, u, len);
7647 /* and possibly remotely */
7648 if (st->update_tail)
7649 append_metadata_update(st, u, len);
7650 else
ed08d51c 7651 free(u);
8dd70bce 7652
694575e7 7653 } else {
e7ff7e40
AK
7654 fprintf(stderr, Name ": (imsm) Operation "
7655 "is not allowed on this container\n");
694575e7
KW
7656 }
7657 } else {
7658 /* On volume level we support following operations
471bceb6
KW
7659 * - takeover: raid10 -> raid0; raid0 -> raid10
7660 * - chunk size migration
7661 * - migration: raid5 -> raid0; raid0 -> raid5
7662 */
7663 struct intel_super *super = st->sb;
7664 struct intel_dev *dev = super->devlist;
7665 int change, devnum;
694575e7 7666 dprintf("imsm: info: Volume operation\n");
471bceb6
KW
7667 /* find requested device */
7668 while (dev) {
7669 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
7670 if (devnum == geo.dev_id)
7671 break;
7672 dev = dev->next;
7673 }
7674 if (dev == NULL) {
7675 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
7676 geo.dev_name, geo.dev_id);
7677 goto exit_imsm_reshape_super;
7678 }
7679 super->current_vol = dev->index;
694575e7
KW
7680 change = imsm_analyze_change(st, &geo);
7681 switch (change) {
471bceb6 7682 case CH_TAKEOVER:
bb025c2f 7683 ret_val = imsm_takeover(st, &geo);
694575e7 7684 break;
48c5303a
PC
7685 case CH_MIGRATION: {
7686 struct imsm_update_reshape_migration *u = NULL;
7687 int len =
7688 imsm_create_metadata_update_for_migration(
7689 st, &geo, &u);
7690 if (len < 1) {
7691 dprintf("imsm: "
7692 "Cannot prepare update\n");
7693 break;
7694 }
471bceb6 7695 ret_val = 0;
48c5303a
PC
7696 /* update metadata locally */
7697 imsm_update_metadata_locally(st, u, len);
7698 /* and possibly remotely */
7699 if (st->update_tail)
7700 append_metadata_update(st, u, len);
7701 else
7702 free(u);
7703 }
7704 break;
471bceb6
KW
7705 default:
7706 ret_val = 1;
694575e7 7707 }
694575e7 7708 }
78b10e66 7709
ed08d51c 7710exit_imsm_reshape_super:
78b10e66
N
7711 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
7712 return ret_val;
7713}
2cda7640 7714
999b4972
N
7715static int imsm_manage_reshape(
7716 int afd, struct mdinfo *sra, struct reshape *reshape,
7717 struct supertype *st, unsigned long stripes,
7718 int *fds, unsigned long long *offsets,
7719 int dests, int *destfd, unsigned long long *destoffsets)
7720{
7721 /* Just use child_monitor for now */
7722 return child_monitor(
7723 afd, sra, reshape, st, stripes,
7724 fds, offsets, dests, destfd, destoffsets);
7725}
71204a50 7726#endif /* MDASSEMBLE */
999b4972 7727
cdddbdbc
DW
7728struct superswitch super_imsm = {
7729#ifndef MDASSEMBLE
7730 .examine_super = examine_super_imsm,
7731 .brief_examine_super = brief_examine_super_imsm,
4737ae25 7732 .brief_examine_subarrays = brief_examine_subarrays_imsm,
9d84c8ea 7733 .export_examine_super = export_examine_super_imsm,
cdddbdbc
DW
7734 .detail_super = detail_super_imsm,
7735 .brief_detail_super = brief_detail_super_imsm,
bf5a934a 7736 .write_init_super = write_init_super_imsm,
0e600426
N
7737 .validate_geometry = validate_geometry_imsm,
7738 .add_to_super = add_to_super_imsm,
1a64be56 7739 .remove_from_super = remove_from_super_imsm,
d665cc31 7740 .detail_platform = detail_platform_imsm,
33414a01 7741 .kill_subarray = kill_subarray_imsm,
aa534678 7742 .update_subarray = update_subarray_imsm,
2b959fbf 7743 .load_container = load_container_imsm,
71204a50
N
7744 .default_geometry = default_geometry_imsm,
7745 .get_disk_controller_domain = imsm_get_disk_controller_domain,
7746 .reshape_super = imsm_reshape_super,
7747 .manage_reshape = imsm_manage_reshape,
cdddbdbc
DW
7748#endif
7749 .match_home = match_home_imsm,
7750 .uuid_from_super= uuid_from_super_imsm,
7751 .getinfo_super = getinfo_super_imsm,
5c4cd5da 7752 .getinfo_super_disks = getinfo_super_disks_imsm,
cdddbdbc
DW
7753 .update_super = update_super_imsm,
7754
7755 .avail_size = avail_size_imsm,
80e7f8c3 7756 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
cdddbdbc
DW
7757
7758 .compare_super = compare_super_imsm,
7759
7760 .load_super = load_super_imsm,
bf5a934a 7761 .init_super = init_super_imsm,
e683ca88 7762 .store_super = store_super_imsm,
cdddbdbc
DW
7763 .free_super = free_super_imsm,
7764 .match_metadata_desc = match_metadata_desc_imsm,
bf5a934a 7765 .container_content = container_content_imsm,
cdddbdbc 7766
cdddbdbc 7767 .external = 1,
4cce4069 7768 .name = "imsm",
845dea95 7769
0e600426 7770#ifndef MDASSEMBLE
845dea95
NB
7771/* for mdmon */
7772 .open_new = imsm_open_new,
ed9d66aa 7773 .set_array_state= imsm_set_array_state,
845dea95
NB
7774 .set_disk = imsm_set_disk,
7775 .sync_metadata = imsm_sync_metadata,
88758e9d 7776 .activate_spare = imsm_activate_spare,
e8319a19 7777 .process_update = imsm_process_update,
8273f55e 7778 .prepare_update = imsm_prepare_update,
0e600426 7779#endif /* MDASSEMBLE */
cdddbdbc 7780};