]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-intel.c
imsm: Add metadata update type for general migration check-pointing
[thirdparty/mdadm.git] / super-intel.c
CommitLineData
cdddbdbc
DW
1/*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
a54d5262 4 * Copyright (C) 2002-2008 Intel Corporation
cdddbdbc
DW
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
51006d85 20#define HAVE_STDINT_H 1
cdddbdbc 21#include "mdadm.h"
c2a1e7da 22#include "mdmon.h"
51006d85 23#include "sha1.h"
88c32bb1 24#include "platform-intel.h"
cdddbdbc
DW
25#include <values.h>
26#include <scsi/sg.h>
27#include <ctype.h>
d665cc31 28#include <dirent.h>
cdddbdbc
DW
29
30/* MPB == Metadata Parameter Block */
31#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32#define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33#define MPB_VERSION_RAID0 "1.0.00"
34#define MPB_VERSION_RAID1 "1.1.00"
fe7ed8cb
DW
35#define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36#define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
cdddbdbc 37#define MPB_VERSION_RAID5 "1.2.02"
fe7ed8cb
DW
38#define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39#define MPB_VERSION_CNG "1.2.06"
40#define MPB_VERSION_ATTRIBS "1.3.00"
cdddbdbc
DW
41#define MAX_SIGNATURE_LENGTH 32
42#define MAX_RAID_SERIAL_LEN 16
fe7ed8cb
DW
43
44#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45#define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
8e59f3d8 54#define MPB_SECTOR_CNT 2210
c2c087e6 55#define IMSM_RESERVED_SECTORS 4096
979d38be 56#define SECT_PER_MB_SHIFT 11
cdddbdbc
DW
57
58/* Disk configuration info. */
59#define IMSM_MAX_DEVICES 255
60struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
f2f27e63
DW
64#define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65#define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66#define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
cdddbdbc 67 __u32 status; /* 0xF0 - 0xF3 */
fe7ed8cb
DW
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69#define IMSM_DISK_FILLERS 4
cdddbdbc
DW
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71};
72
73/* RAID map configuration infos. */
74struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80#define IMSM_T_STATE_NORMAL 0
81#define IMSM_T_STATE_UNINITIALIZED 1
e3bba0e0
DW
82#define IMSM_T_STATE_DEGRADED 2
83#define IMSM_T_STATE_FAILED 3
cdddbdbc
DW
84 __u8 raid_level;
85#define IMSM_T_RAID0 0
86#define IMSM_T_RAID1 1
87#define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
fe7ed8cb
DW
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
252d23c0 91 __u8 ddf;
cdddbdbc 92 __u32 filler[7]; /* expansion area */
7eef0453 93#define IMSM_ORD_REBUILD (1 << 24)
cdddbdbc 94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
7eef0453
DW
95 * top byte contains some flags
96 */
cdddbdbc
DW
97} __attribute__ ((packed));
98
99struct imsm_vol {
f8f603f1 100 __u32 curr_migr_unit;
fe7ed8cb 101 __u32 checkpoint_id; /* id to access curr_migr_unit */
cdddbdbc 102 __u8 migr_state; /* Normal or Migrating */
e3bba0e0
DW
103#define MIGR_INIT 0
104#define MIGR_REBUILD 1
105#define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106#define MIGR_GEN_MIGR 3
107#define MIGR_STATE_CHANGE 4
1484e727 108#define MIGR_REPAIR 5
cdddbdbc
DW
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
fe7ed8cb
DW
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
cdddbdbc
DW
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117} __attribute__ ((packed));
118
119struct imsm_dev {
fe7ed8cb 120 __u8 volume[MAX_RAID_SERIAL_LEN];
cdddbdbc
DW
121 __u32 size_low;
122 __u32 size_high;
fe7ed8cb
DW
123#define DEV_BOOTABLE __cpu_to_le32(0x01)
124#define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125#define DEV_READ_COALESCING __cpu_to_le32(0x04)
126#define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127#define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128#define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129#define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130#define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131#define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132#define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133#define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134#define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135#define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
cdddbdbc
DW
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
fe7ed8cb
DW
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145#define IMSM_DEV_FILLERS 10
cdddbdbc
DW
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148} __attribute__ ((packed));
149
150struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
604b746f
JD
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
cdddbdbc
DW
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
604b746f
JD
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166#define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
cdddbdbc
DW
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
604b746f 170 /* here comes BBM logs */
cdddbdbc
DW
171} __attribute__ ((packed));
172
604b746f
JD
173#define BBM_LOG_MAX_ENTRIES 254
174
175struct bbm_log_entry {
176 __u64 defective_block_start;
177#define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181} __attribute__ ((__packed__));
182
183struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190} __attribute__ ((__packed__));
191
192
cdddbdbc
DW
193#ifndef MDASSEMBLE
194static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195#endif
196
8e59f3d8
AK
197#define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
198
199#define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
200
201#define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203#define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
206struct migr_record {
207 __u32 rec_status; /* Status used to determine how to restart
208 * migration in case it aborts
209 * in some fashion */
210 __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
211 __u32 family_num; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr; /* True if migrating in increasing
215 * order of lbas */
216 __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit; /* Num member blocks each destMap
218 * member disk
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230} __attribute__ ((__packed__));
231
1484e727
DW
232static __u8 migr_type(struct imsm_dev *dev)
233{
234 if (dev->vol.migr_type == MIGR_VERIFY &&
235 dev->status & DEV_VERIFY_AND_FIX)
236 return MIGR_REPAIR;
237 else
238 return dev->vol.migr_type;
239}
240
241static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
242{
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
245 */
246 if (migr_type == MIGR_REPAIR) {
247 dev->vol.migr_type = MIGR_VERIFY;
248 dev->status |= DEV_VERIFY_AND_FIX;
249 } else {
250 dev->vol.migr_type = migr_type;
251 dev->status &= ~DEV_VERIFY_AND_FIX;
252 }
253}
254
87eb16df 255static unsigned int sector_count(__u32 bytes)
cdddbdbc 256{
87eb16df
DW
257 return ((bytes + (512-1)) & (~(512-1))) / 512;
258}
cdddbdbc 259
87eb16df
DW
260static unsigned int mpb_sectors(struct imsm_super *mpb)
261{
262 return sector_count(__le32_to_cpu(mpb->mpb_size));
cdddbdbc
DW
263}
264
ba2de7ba
DW
265struct intel_dev {
266 struct imsm_dev *dev;
267 struct intel_dev *next;
f21e18ca 268 unsigned index;
ba2de7ba
DW
269};
270
88654014
LM
271struct intel_hba {
272 enum sys_dev_type type;
273 char *path;
274 char *pci_id;
275 struct intel_hba *next;
276};
277
1a64be56
LM
278enum action {
279 DISK_REMOVE = 1,
280 DISK_ADD
281};
cdddbdbc
DW
282/* internal representation of IMSM metadata */
283struct intel_super {
284 union {
949c47a0
DW
285 void *buf; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super *anchor; /* immovable parameters */
cdddbdbc 287 };
8e59f3d8
AK
288 union {
289 void *migr_rec_buf; /* buffer for I/O operations */
290 struct migr_record *migr_rec; /* migration record */
291 };
949c47a0 292 size_t len; /* size of the 'buf' allocation */
4d7b1503
DW
293 void *next_buf; /* for realloc'ing buf from the manager */
294 size_t next_len;
c2c087e6 295 int updates_pending; /* count of pending updates for mdmon */
bf5a934a 296 int current_vol; /* index of raid device undergoing creation */
0dcecb2e 297 __u32 create_offset; /* common start for 'current_vol' */
148acb7b 298 __u32 random; /* random data for seeding new family numbers */
ba2de7ba 299 struct intel_dev *devlist;
cdddbdbc
DW
300 struct dl {
301 struct dl *next;
302 int index;
303 __u8 serial[MAX_RAID_SERIAL_LEN];
304 int major, minor;
305 char *devname;
b9f594fe 306 struct imsm_disk disk;
cdddbdbc 307 int fd;
0dcecb2e
DW
308 int extent_cnt;
309 struct extent *e; /* for determining freespace @ create */
efb30e7f 310 int raiddisk; /* slot to fill in autolayout */
1a64be56 311 enum action action;
cdddbdbc 312 } *disks;
1a64be56
LM
313 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
314 active */
47ee5a45 315 struct dl *missing; /* disks removed while we weren't looking */
43dad3d6 316 struct bbm_log *bbm_log;
88654014 317 struct intel_hba *hba; /* device path of the raid controller for this metadata */
88c32bb1 318 const struct imsm_orom *orom; /* platform firmware support */
a2b97981
DW
319 struct intel_super *next; /* (temp) list for disambiguating family_num */
320};
321
322struct intel_disk {
323 struct imsm_disk disk;
324 #define IMSM_UNKNOWN_OWNER (-1)
325 int owner;
326 struct intel_disk *next;
cdddbdbc
DW
327};
328
c2c087e6
DW
329struct extent {
330 unsigned long long start, size;
331};
332
694575e7
KW
333/* definitions of reshape process types */
334enum imsm_reshape_type {
335 CH_TAKEOVER,
b5347799 336 CH_MIGRATION,
694575e7
KW
337};
338
88758e9d
DW
339/* definition of messages passed to imsm_process_update */
340enum imsm_update_type {
341 update_activate_spare,
8273f55e 342 update_create_array,
33414a01 343 update_kill_array,
aa534678 344 update_rename_array,
1a64be56 345 update_add_remove_disk,
78b10e66 346 update_reshape_container_disks,
48c5303a 347 update_reshape_migration,
2d40f3a1
AK
348 update_takeover,
349 update_general_migration_checkpoint,
88758e9d
DW
350};
351
352struct imsm_update_activate_spare {
353 enum imsm_update_type type;
d23fe947 354 struct dl *dl;
88758e9d
DW
355 int slot;
356 int array;
357 struct imsm_update_activate_spare *next;
358};
359
78b10e66
N
360struct geo_params {
361 int dev_id;
362 char *dev_name;
363 long long size;
364 int level;
365 int layout;
366 int chunksize;
367 int raid_disks;
368};
369
bb025c2f
KW
370enum takeover_direction {
371 R10_TO_R0,
372 R0_TO_R10
373};
374struct imsm_update_takeover {
375 enum imsm_update_type type;
376 int subarray;
377 enum takeover_direction direction;
378};
78b10e66
N
379
380struct imsm_update_reshape {
381 enum imsm_update_type type;
382 int old_raid_disks;
383 int new_raid_disks;
48c5303a
PC
384
385 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
386};
387
388struct imsm_update_reshape_migration {
389 enum imsm_update_type type;
390 int old_raid_disks;
391 int new_raid_disks;
392 /* fields for array migration changes
393 */
394 int subdev;
395 int new_level;
396 int new_layout;
4bba0439 397 int new_chunksize;
48c5303a 398
d195167d 399 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
78b10e66
N
400};
401
2d40f3a1
AK
402struct imsm_update_general_migration_checkpoint {
403 enum imsm_update_type type;
404 __u32 curr_migr_unit;
405};
406
54c2c1ea
DW
407struct disk_info {
408 __u8 serial[MAX_RAID_SERIAL_LEN];
409};
410
8273f55e
DW
411struct imsm_update_create_array {
412 enum imsm_update_type type;
8273f55e 413 int dev_idx;
6a3e913e 414 struct imsm_dev dev;
8273f55e
DW
415};
416
33414a01
DW
417struct imsm_update_kill_array {
418 enum imsm_update_type type;
419 int dev_idx;
420};
421
aa534678
DW
422struct imsm_update_rename_array {
423 enum imsm_update_type type;
424 __u8 name[MAX_RAID_SERIAL_LEN];
425 int dev_idx;
426};
427
1a64be56 428struct imsm_update_add_remove_disk {
43dad3d6
DW
429 enum imsm_update_type type;
430};
431
88654014
LM
432
433static const char *_sys_dev_type[] = {
434 [SYS_DEV_UNKNOWN] = "Unknown",
435 [SYS_DEV_SAS] = "SAS",
436 [SYS_DEV_SATA] = "SATA"
437};
438
439const char *get_sys_dev_type(enum sys_dev_type type)
440{
441 if (type >= SYS_DEV_MAX)
442 type = SYS_DEV_UNKNOWN;
443
444 return _sys_dev_type[type];
445}
446
447static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
448{
449 struct intel_hba *result = malloc(sizeof(*result));
450 if (result) {
451 result->type = device->type;
452 result->path = strdup(device->path);
453 result->next = NULL;
454 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
455 result->pci_id++;
456 }
457 return result;
458}
459
460static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
461{
462 struct intel_hba *result=NULL;
463 for (result = hba; result; result = result->next) {
464 if (result->type == device->type && strcmp(result->path, device->path) == 0)
465 break;
466 }
467 return result;
468}
469
b4cf4cba 470static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
88654014
LM
471{
472 struct intel_hba *hba;
473
474 /* check if disk attached to Intel HBA */
475 hba = find_intel_hba(super->hba, device);
476 if (hba != NULL)
477 return 1;
478 /* Check if HBA is already attached to super */
479 if (super->hba == NULL) {
480 super->hba = alloc_intel_hba(device);
481 return 1;
482 }
483
484 hba = super->hba;
485 /* Intel metadata allows for all disks attached to the same type HBA.
486 * Do not sypport odf HBA types mixing
487 */
488 if (device->type != hba->type)
489 return 2;
490
491 while (hba->next)
492 hba = hba->next;
493
494 hba->next = alloc_intel_hba(device);
495 return 1;
496}
497
498static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
499{
500 struct sys_dev *list, *elem, *prev;
501 char *disk_path;
502
503 if ((list = find_intel_devices()) == NULL)
504 return 0;
505
506 if (fd < 0)
507 disk_path = (char *) devname;
508 else
509 disk_path = diskfd_to_devpath(fd);
510
511 if (!disk_path) {
512 free_sys_dev(&list);
513 return 0;
514 }
515
516 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
517 if (path_attached_to_hba(disk_path, elem->path)) {
518 if (prev == NULL)
519 list = list->next;
520 else
521 prev->next = elem->next;
522 elem->next = NULL;
523 if (disk_path != devname)
524 free(disk_path);
525 free_sys_dev(&list);
526 return elem;
527 }
528 }
529 if (disk_path != devname)
530 free(disk_path);
531 free_sys_dev(&list);
532
533 return NULL;
534}
535
536
d424212e
N
537static int find_intel_hba_capability(int fd, struct intel_super *super,
538 char *devname);
f2f5c343 539
cdddbdbc
DW
540static struct supertype *match_metadata_desc_imsm(char *arg)
541{
542 struct supertype *st;
543
544 if (strcmp(arg, "imsm") != 0 &&
545 strcmp(arg, "default") != 0
546 )
547 return NULL;
548
549 st = malloc(sizeof(*st));
4e9d2186
AW
550 if (!st)
551 return NULL;
ef609477 552 memset(st, 0, sizeof(*st));
d1d599ea 553 st->container_dev = NoMdDev;
cdddbdbc
DW
554 st->ss = &super_imsm;
555 st->max_devs = IMSM_MAX_DEVICES;
556 st->minor_version = 0;
557 st->sb = NULL;
558 return st;
559}
560
0e600426 561#ifndef MDASSEMBLE
cdddbdbc
DW
562static __u8 *get_imsm_version(struct imsm_super *mpb)
563{
564 return &mpb->sig[MPB_SIG_LEN];
565}
0e600426 566#endif
cdddbdbc 567
949c47a0
DW
568/* retrieve a disk directly from the anchor when the anchor is known to be
569 * up-to-date, currently only at load time
570 */
571static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
cdddbdbc 572{
949c47a0 573 if (index >= mpb->num_disks)
cdddbdbc
DW
574 return NULL;
575 return &mpb->disk[index];
576}
577
95d07a2c
LM
578/* retrieve the disk description based on a index of the disk
579 * in the sub-array
580 */
581static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
949c47a0 582{
b9f594fe
DW
583 struct dl *d;
584
585 for (d = super->disks; d; d = d->next)
586 if (d->index == index)
95d07a2c
LM
587 return d;
588
589 return NULL;
590}
591/* retrieve a disk from the parsed metadata */
592static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
593{
594 struct dl *dl;
595
596 dl = get_imsm_dl_disk(super, index);
597 if (dl)
598 return &dl->disk;
599
b9f594fe 600 return NULL;
949c47a0
DW
601}
602
603/* generate a checksum directly from the anchor when the anchor is known to be
604 * up-to-date, currently only at load or write_super after coalescing
605 */
606static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
cdddbdbc
DW
607{
608 __u32 end = mpb->mpb_size / sizeof(end);
609 __u32 *p = (__u32 *) mpb;
610 __u32 sum = 0;
611
97f734fd
N
612 while (end--) {
613 sum += __le32_to_cpu(*p);
614 p++;
615 }
cdddbdbc
DW
616
617 return sum - __le32_to_cpu(mpb->check_sum);
618}
619
a965f303
DW
620static size_t sizeof_imsm_map(struct imsm_map *map)
621{
622 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
623}
624
625struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
cdddbdbc 626{
5e7b0330
AK
627 /* A device can have 2 maps if it is in the middle of a migration.
628 * If second_map is:
629 * 0 - we return the first map
630 * 1 - we return the second map if it exists, else NULL
631 * -1 - we return the second map if it exists, else the first
632 */
a965f303
DW
633 struct imsm_map *map = &dev->vol.map[0];
634
5e7b0330 635 if (second_map == 1 && !dev->vol.migr_state)
a965f303 636 return NULL;
5e7b0330
AK
637 else if (second_map == 1 ||
638 (second_map < 0 && dev->vol.migr_state)) {
a965f303
DW
639 void *ptr = map;
640
641 return ptr + sizeof_imsm_map(map);
642 } else
643 return map;
5e7b0330 644
a965f303 645}
cdddbdbc 646
3393c6af
DW
647/* return the size of the device.
648 * migr_state increases the returned size if map[0] were to be duplicated
649 */
650static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
a965f303
DW
651{
652 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
653 sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
654
655 /* migrating means an additional map */
a965f303
DW
656 if (dev->vol.migr_state)
657 size += sizeof_imsm_map(get_imsm_map(dev, 1));
3393c6af
DW
658 else if (migr_state)
659 size += sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
660
661 return size;
662}
663
54c2c1ea
DW
664#ifndef MDASSEMBLE
665/* retrieve disk serial number list from a metadata update */
666static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
667{
668 void *u = update;
669 struct disk_info *inf;
670
671 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
672 sizeof_imsm_dev(&update->dev, 0);
673
674 return inf;
675}
676#endif
677
949c47a0 678static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
cdddbdbc
DW
679{
680 int offset;
681 int i;
682 void *_mpb = mpb;
683
949c47a0 684 if (index >= mpb->num_raid_devs)
cdddbdbc
DW
685 return NULL;
686
687 /* devices start after all disks */
688 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
689
690 for (i = 0; i <= index; i++)
691 if (i == index)
692 return _mpb + offset;
693 else
3393c6af 694 offset += sizeof_imsm_dev(_mpb + offset, 0);
cdddbdbc
DW
695
696 return NULL;
697}
698
949c47a0
DW
699static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
700{
ba2de7ba
DW
701 struct intel_dev *dv;
702
949c47a0
DW
703 if (index >= super->anchor->num_raid_devs)
704 return NULL;
ba2de7ba
DW
705 for (dv = super->devlist; dv; dv = dv->next)
706 if (dv->index == index)
707 return dv->dev;
708 return NULL;
949c47a0
DW
709}
710
98130f40
AK
711/*
712 * for second_map:
713 * == 0 get first map
714 * == 1 get second map
715 * == -1 than get map according to the current migr_state
716 */
717static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
718 int slot,
719 int second_map)
7eef0453
DW
720{
721 struct imsm_map *map;
722
5e7b0330 723 map = get_imsm_map(dev, second_map);
7eef0453 724
ff077194
DW
725 /* top byte identifies disk under rebuild */
726 return __le32_to_cpu(map->disk_ord_tbl[slot]);
727}
728
729#define ord_to_idx(ord) (((ord) << 8) >> 8)
98130f40 730static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
ff077194 731{
98130f40 732 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
ff077194
DW
733
734 return ord_to_idx(ord);
7eef0453
DW
735}
736
be73972f
DW
737static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
738{
739 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
740}
741
f21e18ca 742static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
620b1713
DW
743{
744 int slot;
745 __u32 ord;
746
747 for (slot = 0; slot < map->num_members; slot++) {
748 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
749 if (ord_to_idx(ord) == idx)
750 return slot;
751 }
752
753 return -1;
754}
755
cdddbdbc
DW
756static int get_imsm_raid_level(struct imsm_map *map)
757{
758 if (map->raid_level == 1) {
759 if (map->num_members == 2)
760 return 1;
761 else
762 return 10;
763 }
764
765 return map->raid_level;
766}
767
c2c087e6
DW
768static int cmp_extent(const void *av, const void *bv)
769{
770 const struct extent *a = av;
771 const struct extent *b = bv;
772 if (a->start < b->start)
773 return -1;
774 if (a->start > b->start)
775 return 1;
776 return 0;
777}
778
0dcecb2e 779static int count_memberships(struct dl *dl, struct intel_super *super)
c2c087e6 780{
c2c087e6 781 int memberships = 0;
620b1713 782 int i;
c2c087e6 783
949c47a0
DW
784 for (i = 0; i < super->anchor->num_raid_devs; i++) {
785 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 786 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 787
620b1713
DW
788 if (get_imsm_disk_slot(map, dl->index) >= 0)
789 memberships++;
c2c087e6 790 }
0dcecb2e
DW
791
792 return memberships;
793}
794
795static struct extent *get_extents(struct intel_super *super, struct dl *dl)
796{
797 /* find a list of used extents on the given physical device */
798 struct extent *rv, *e;
620b1713 799 int i;
0dcecb2e
DW
800 int memberships = count_memberships(dl, super);
801 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
802
c2c087e6
DW
803 rv = malloc(sizeof(struct extent) * (memberships + 1));
804 if (!rv)
805 return NULL;
806 e = rv;
807
949c47a0
DW
808 for (i = 0; i < super->anchor->num_raid_devs; i++) {
809 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 810 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 811
620b1713
DW
812 if (get_imsm_disk_slot(map, dl->index) >= 0) {
813 e->start = __le32_to_cpu(map->pba_of_lba0);
814 e->size = __le32_to_cpu(map->blocks_per_member);
815 e++;
c2c087e6
DW
816 }
817 }
818 qsort(rv, memberships, sizeof(*rv), cmp_extent);
819
14e8215b
DW
820 /* determine the start of the metadata
821 * when no raid devices are defined use the default
822 * ...otherwise allow the metadata to truncate the value
823 * as is the case with older versions of imsm
824 */
825 if (memberships) {
826 struct extent *last = &rv[memberships - 1];
827 __u32 remainder;
828
829 remainder = __le32_to_cpu(dl->disk.total_blocks) -
830 (last->start + last->size);
dda5855f
DW
831 /* round down to 1k block to satisfy precision of the kernel
832 * 'size' interface
833 */
834 remainder &= ~1UL;
835 /* make sure remainder is still sane */
f21e18ca 836 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
dda5855f 837 remainder = ROUND_UP(super->len, 512) >> 9;
14e8215b
DW
838 if (reservation > remainder)
839 reservation = remainder;
840 }
841 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
c2c087e6
DW
842 e->size = 0;
843 return rv;
844}
845
14e8215b
DW
846/* try to determine how much space is reserved for metadata from
847 * the last get_extents() entry, otherwise fallback to the
848 * default
849 */
850static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
851{
852 struct extent *e;
853 int i;
854 __u32 rv;
855
856 /* for spares just return a minimal reservation which will grow
857 * once the spare is picked up by an array
858 */
859 if (dl->index == -1)
860 return MPB_SECTOR_CNT;
861
862 e = get_extents(super, dl);
863 if (!e)
864 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
865
866 /* scroll to last entry */
867 for (i = 0; e[i].size; i++)
868 continue;
869
870 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
871
872 free(e);
873
874 return rv;
875}
876
25ed7e59
DW
877static int is_spare(struct imsm_disk *disk)
878{
879 return (disk->status & SPARE_DISK) == SPARE_DISK;
880}
881
882static int is_configured(struct imsm_disk *disk)
883{
884 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
885}
886
887static int is_failed(struct imsm_disk *disk)
888{
889 return (disk->status & FAILED_DISK) == FAILED_DISK;
890}
891
80e7f8c3
AC
892/* Return minimum size of a spare that can be used in this array*/
893static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
894{
895 struct intel_super *super = st->sb;
896 struct dl *dl;
897 struct extent *e;
898 int i;
899 unsigned long long rv = 0;
900
901 if (!super)
902 return rv;
903 /* find first active disk in array */
904 dl = super->disks;
905 while (dl && (is_failed(&dl->disk) || dl->index == -1))
906 dl = dl->next;
907 if (!dl)
908 return rv;
909 /* find last lba used by subarrays */
910 e = get_extents(super, dl);
911 if (!e)
912 return rv;
913 for (i = 0; e[i].size; i++)
914 continue;
915 if (i > 0)
916 rv = e[i-1].start + e[i-1].size;
917 free(e);
918 /* add the amount of space needed for metadata */
919 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
920 return rv * 512;
921}
922
1799c9e8 923#ifndef MDASSEMBLE
c47b0ff6
AK
924static __u64 blocks_per_migr_unit(struct intel_super *super,
925 struct imsm_dev *dev);
1e5c6983 926
c47b0ff6
AK
927static void print_imsm_dev(struct intel_super *super,
928 struct imsm_dev *dev,
929 char *uuid,
930 int disk_idx)
cdddbdbc
DW
931{
932 __u64 sz;
0d80bb2f 933 int slot, i;
a965f303 934 struct imsm_map *map = get_imsm_map(dev, 0);
dd8bcb3b 935 struct imsm_map *map2 = get_imsm_map(dev, 1);
b10b37b8 936 __u32 ord;
cdddbdbc
DW
937
938 printf("\n");
1e7bc0ed 939 printf("[%.16s]:\n", dev->volume);
44470971 940 printf(" UUID : %s\n", uuid);
dd8bcb3b
AK
941 printf(" RAID Level : %d", get_imsm_raid_level(map));
942 if (map2)
943 printf(" <-- %d", get_imsm_raid_level(map2));
944 printf("\n");
945 printf(" Members : %d", map->num_members);
946 if (map2)
947 printf(" <-- %d", map2->num_members);
948 printf("\n");
0d80bb2f
DW
949 printf(" Slots : [");
950 for (i = 0; i < map->num_members; i++) {
dd8bcb3b 951 ord = get_imsm_ord_tbl_ent(dev, i, 0);
0d80bb2f
DW
952 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
953 }
dd8bcb3b
AK
954 printf("]");
955 if (map2) {
956 printf(" <-- [");
957 for (i = 0; i < map2->num_members; i++) {
958 ord = get_imsm_ord_tbl_ent(dev, i, 1);
959 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
960 }
961 printf("]");
962 }
963 printf("\n");
7095bccb
AK
964 printf(" Failed disk : ");
965 if (map->failed_disk_num == 0xff)
966 printf("none");
967 else
968 printf("%i", map->failed_disk_num);
969 printf("\n");
620b1713
DW
970 slot = get_imsm_disk_slot(map, disk_idx);
971 if (slot >= 0) {
98130f40 972 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
b10b37b8
DW
973 printf(" This Slot : %d%s\n", slot,
974 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
975 } else
cdddbdbc
DW
976 printf(" This Slot : ?\n");
977 sz = __le32_to_cpu(dev->size_high);
978 sz <<= 32;
979 sz += __le32_to_cpu(dev->size_low);
980 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
981 human_size(sz * 512));
982 sz = __le32_to_cpu(map->blocks_per_member);
983 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
984 human_size(sz * 512));
985 printf(" Sector Offset : %u\n",
986 __le32_to_cpu(map->pba_of_lba0));
987 printf(" Num Stripes : %u\n",
988 __le32_to_cpu(map->num_data_stripes));
dd8bcb3b 989 printf(" Chunk Size : %u KiB",
cdddbdbc 990 __le16_to_cpu(map->blocks_per_strip) / 2);
dd8bcb3b
AK
991 if (map2)
992 printf(" <-- %u KiB",
993 __le16_to_cpu(map2->blocks_per_strip) / 2);
994 printf("\n");
cdddbdbc 995 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
8655a7b1 996 printf(" Migrate State : ");
1484e727
DW
997 if (dev->vol.migr_state) {
998 if (migr_type(dev) == MIGR_INIT)
8655a7b1 999 printf("initialize\n");
1484e727 1000 else if (migr_type(dev) == MIGR_REBUILD)
8655a7b1 1001 printf("rebuild\n");
1484e727 1002 else if (migr_type(dev) == MIGR_VERIFY)
8655a7b1 1003 printf("check\n");
1484e727 1004 else if (migr_type(dev) == MIGR_GEN_MIGR)
8655a7b1 1005 printf("general migration\n");
1484e727 1006 else if (migr_type(dev) == MIGR_STATE_CHANGE)
8655a7b1 1007 printf("state change\n");
1484e727 1008 else if (migr_type(dev) == MIGR_REPAIR)
8655a7b1 1009 printf("repair\n");
1484e727 1010 else
8655a7b1
DW
1011 printf("<unknown:%d>\n", migr_type(dev));
1012 } else
1013 printf("idle\n");
3393c6af
DW
1014 printf(" Map State : %s", map_state_str[map->map_state]);
1015 if (dev->vol.migr_state) {
1016 struct imsm_map *map = get_imsm_map(dev, 1);
1e5c6983 1017
b10b37b8 1018 printf(" <-- %s", map_state_str[map->map_state]);
1e5c6983
DW
1019 printf("\n Checkpoint : %u (%llu)",
1020 __le32_to_cpu(dev->vol.curr_migr_unit),
c47b0ff6 1021 (unsigned long long)blocks_per_migr_unit(super, dev));
3393c6af
DW
1022 }
1023 printf("\n");
cdddbdbc 1024 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
cdddbdbc
DW
1025}
1026
14e8215b 1027static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
cdddbdbc 1028{
949c47a0 1029 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
1f24f035 1030 char str[MAX_RAID_SERIAL_LEN + 1];
cdddbdbc
DW
1031 __u64 sz;
1032
d362da3d 1033 if (index < 0 || !disk)
e9d82038
DW
1034 return;
1035
cdddbdbc 1036 printf("\n");
1f24f035 1037 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
cdddbdbc 1038 printf(" Disk%02d Serial : %s\n", index, str);
25ed7e59
DW
1039 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1040 is_configured(disk) ? " active" : "",
1041 is_failed(disk) ? " failed" : "");
cdddbdbc 1042 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
14e8215b 1043 sz = __le32_to_cpu(disk->total_blocks) - reserved;
cdddbdbc
DW
1044 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1045 human_size(sz * 512));
1046}
1047
520e69e2
AK
1048static int is_gen_migration(struct imsm_dev *dev);
1049
1050void examine_migr_rec_imsm(struct intel_super *super)
1051{
1052 struct migr_record *migr_rec = super->migr_rec;
1053 struct imsm_super *mpb = super->anchor;
1054 int i;
1055
1056 for (i = 0; i < mpb->num_raid_devs; i++) {
1057 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1058 if (is_gen_migration(dev) == 0)
1059 continue;
1060
1061 printf("\nMigration Record Information:");
1062 if (super->disks->index > 1) {
1063 printf(" Empty\n ");
1064 printf("Examine one of first two disks in array\n");
1065 break;
1066 }
1067 printf("\n Status : ");
1068 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
1069 printf("Normal\n");
1070 else
1071 printf("Contains Data\n");
1072 printf(" Current Unit : %u\n",
1073 __le32_to_cpu(migr_rec->curr_migr_unit));
1074 printf(" Family : %u\n",
1075 __le32_to_cpu(migr_rec->family_num));
1076 printf(" Ascending : %u\n",
1077 __le32_to_cpu(migr_rec->ascending_migr));
1078 printf(" Blocks Per Unit : %u\n",
1079 __le32_to_cpu(migr_rec->blocks_per_unit));
1080 printf(" Dest. Depth Per Unit : %u\n",
1081 __le32_to_cpu(migr_rec->dest_depth_per_unit));
1082 printf(" Checkpoint Area pba : %u\n",
1083 __le32_to_cpu(migr_rec->ckpt_area_pba));
1084 printf(" First member lba : %u\n",
1085 __le32_to_cpu(migr_rec->dest_1st_member_lba));
1086 printf(" Total Number of Units : %u\n",
1087 __le32_to_cpu(migr_rec->num_migr_units));
1088 printf(" Size of volume : %u\n",
1089 __le32_to_cpu(migr_rec->post_migr_vol_cap));
1090 printf(" Expansion space for LBA64 : %u\n",
1091 __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
1092 printf(" Record was read from : %u\n",
1093 __le32_to_cpu(migr_rec->ckpt_read_disk_num));
1094
1095 break;
1096 }
1097}
1098
a5d85af7 1099static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
44470971 1100
cdddbdbc
DW
1101static void examine_super_imsm(struct supertype *st, char *homehost)
1102{
1103 struct intel_super *super = st->sb;
949c47a0 1104 struct imsm_super *mpb = super->anchor;
cdddbdbc
DW
1105 char str[MAX_SIGNATURE_LENGTH];
1106 int i;
27fd6274
DW
1107 struct mdinfo info;
1108 char nbuf[64];
cdddbdbc 1109 __u32 sum;
14e8215b 1110 __u32 reserved = imsm_reserved_sectors(super, super->disks);
94827db3 1111 struct dl *dl;
27fd6274 1112
cdddbdbc
DW
1113 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1114 printf(" Magic : %s\n", str);
1115 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1116 printf(" Version : %s\n", get_imsm_version(mpb));
148acb7b 1117 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
cdddbdbc
DW
1118 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1119 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
a5d85af7 1120 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1121 fname_from_uuid(st, &info, nbuf, ':');
27fd6274 1122 printf(" UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1123 sum = __le32_to_cpu(mpb->check_sum);
1124 printf(" Checksum : %08x %s\n", sum,
949c47a0 1125 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
87eb16df 1126 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
cdddbdbc
DW
1127 printf(" Disks : %d\n", mpb->num_disks);
1128 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
14e8215b 1129 print_imsm_disk(mpb, super->disks->index, reserved);
604b746f
JD
1130 if (super->bbm_log) {
1131 struct bbm_log *log = super->bbm_log;
1132
1133 printf("\n");
1134 printf("Bad Block Management Log:\n");
1135 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1136 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1137 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1138 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
13a3b65d
N
1139 printf(" First Spare : %llx\n",
1140 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
604b746f 1141 }
44470971
DW
1142 for (i = 0; i < mpb->num_raid_devs; i++) {
1143 struct mdinfo info;
1144 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1145
1146 super->current_vol = i;
a5d85af7 1147 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1148 fname_from_uuid(st, &info, nbuf, ':');
c47b0ff6 1149 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
44470971 1150 }
cdddbdbc
DW
1151 for (i = 0; i < mpb->num_disks; i++) {
1152 if (i == super->disks->index)
1153 continue;
14e8215b 1154 print_imsm_disk(mpb, i, reserved);
cdddbdbc 1155 }
94827db3
N
1156 for (dl = super->disks ; dl; dl = dl->next) {
1157 struct imsm_disk *disk;
1158 char str[MAX_RAID_SERIAL_LEN + 1];
1159 __u64 sz;
1160
1161 if (dl->index >= 0)
1162 continue;
1163
1164 disk = &dl->disk;
1165 printf("\n");
1166 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1167 printf(" Disk Serial : %s\n", str);
1168 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1169 is_configured(disk) ? " active" : "",
1170 is_failed(disk) ? " failed" : "");
1171 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1172 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1173 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1174 human_size(sz * 512));
1175 }
520e69e2
AK
1176
1177 examine_migr_rec_imsm(super);
cdddbdbc
DW
1178}
1179
061f2c6a 1180static void brief_examine_super_imsm(struct supertype *st, int verbose)
cdddbdbc 1181{
27fd6274 1182 /* We just write a generic IMSM ARRAY entry */
ff54de6e
N
1183 struct mdinfo info;
1184 char nbuf[64];
1e7bc0ed 1185 struct intel_super *super = st->sb;
1e7bc0ed 1186
0d5a423f
DW
1187 if (!super->anchor->num_raid_devs) {
1188 printf("ARRAY metadata=imsm\n");
1e7bc0ed 1189 return;
0d5a423f 1190 }
ff54de6e 1191
a5d85af7 1192 getinfo_super_imsm(st, &info, NULL);
4737ae25
N
1193 fname_from_uuid(st, &info, nbuf, ':');
1194 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1195}
1196
1197static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1198{
1199 /* We just write a generic IMSM ARRAY entry */
1200 struct mdinfo info;
1201 char nbuf[64];
1202 char nbuf1[64];
1203 struct intel_super *super = st->sb;
1204 int i;
1205
1206 if (!super->anchor->num_raid_devs)
1207 return;
1208
a5d85af7 1209 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1210 fname_from_uuid(st, &info, nbuf, ':');
1e7bc0ed
DW
1211 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1212 struct imsm_dev *dev = get_imsm_dev(super, i);
1213
1214 super->current_vol = i;
a5d85af7 1215 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1216 fname_from_uuid(st, &info, nbuf1, ':');
1124b3cf 1217 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
cf8de691 1218 dev->volume, nbuf + 5, i, nbuf1 + 5);
1e7bc0ed 1219 }
cdddbdbc
DW
1220}
1221
9d84c8ea
DW
1222static void export_examine_super_imsm(struct supertype *st)
1223{
1224 struct intel_super *super = st->sb;
1225 struct imsm_super *mpb = super->anchor;
1226 struct mdinfo info;
1227 char nbuf[64];
1228
a5d85af7 1229 getinfo_super_imsm(st, &info, NULL);
9d84c8ea
DW
1230 fname_from_uuid(st, &info, nbuf, ':');
1231 printf("MD_METADATA=imsm\n");
1232 printf("MD_LEVEL=container\n");
1233 printf("MD_UUID=%s\n", nbuf+5);
1234 printf("MD_DEVICES=%u\n", mpb->num_disks);
1235}
1236
cdddbdbc
DW
1237static void detail_super_imsm(struct supertype *st, char *homehost)
1238{
3ebe00a1
DW
1239 struct mdinfo info;
1240 char nbuf[64];
1241
a5d85af7 1242 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1243 fname_from_uuid(st, &info, nbuf, ':');
3ebe00a1 1244 printf("\n UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1245}
1246
1247static void brief_detail_super_imsm(struct supertype *st)
1248{
ff54de6e
N
1249 struct mdinfo info;
1250 char nbuf[64];
a5d85af7 1251 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1252 fname_from_uuid(st, &info, nbuf, ':');
ff54de6e 1253 printf(" UUID=%s", nbuf + 5);
cdddbdbc 1254}
d665cc31
DW
1255
1256static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1257static void fd2devname(int fd, char *name);
1258
120dc887 1259static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
d665cc31 1260{
120dc887
LM
1261 /* dump an unsorted list of devices attached to AHCI Intel storage
1262 * controller, as well as non-connected ports
d665cc31
DW
1263 */
1264 int hba_len = strlen(hba_path) + 1;
1265 struct dirent *ent;
1266 DIR *dir;
1267 char *path = NULL;
1268 int err = 0;
1269 unsigned long port_mask = (1 << port_count) - 1;
1270
f21e18ca 1271 if (port_count > (int)sizeof(port_mask) * 8) {
d665cc31
DW
1272 if (verbose)
1273 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1274 return 2;
1275 }
1276
1277 /* scroll through /sys/dev/block looking for devices attached to
1278 * this hba
1279 */
1280 dir = opendir("/sys/dev/block");
1281 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1282 int fd;
1283 char model[64];
1284 char vendor[64];
1285 char buf[1024];
1286 int major, minor;
1287 char *device;
1288 char *c;
1289 int port;
1290 int type;
1291
1292 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1293 continue;
1294 path = devt_to_devpath(makedev(major, minor));
1295 if (!path)
1296 continue;
1297 if (!path_attached_to_hba(path, hba_path)) {
1298 free(path);
1299 path = NULL;
1300 continue;
1301 }
1302
1303 /* retrieve the scsi device type */
1304 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1305 if (verbose)
1306 fprintf(stderr, Name ": failed to allocate 'device'\n");
1307 err = 2;
1308 break;
1309 }
1310 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1311 if (load_sys(device, buf) != 0) {
1312 if (verbose)
1313 fprintf(stderr, Name ": failed to read device type for %s\n",
1314 path);
1315 err = 2;
1316 free(device);
1317 break;
1318 }
1319 type = strtoul(buf, NULL, 10);
1320
1321 /* if it's not a disk print the vendor and model */
1322 if (!(type == 0 || type == 7 || type == 14)) {
1323 vendor[0] = '\0';
1324 model[0] = '\0';
1325 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1326 if (load_sys(device, buf) == 0) {
1327 strncpy(vendor, buf, sizeof(vendor));
1328 vendor[sizeof(vendor) - 1] = '\0';
1329 c = (char *) &vendor[sizeof(vendor) - 1];
1330 while (isspace(*c) || *c == '\0')
1331 *c-- = '\0';
1332
1333 }
1334 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1335 if (load_sys(device, buf) == 0) {
1336 strncpy(model, buf, sizeof(model));
1337 model[sizeof(model) - 1] = '\0';
1338 c = (char *) &model[sizeof(model) - 1];
1339 while (isspace(*c) || *c == '\0')
1340 *c-- = '\0';
1341 }
1342
1343 if (vendor[0] && model[0])
1344 sprintf(buf, "%.64s %.64s", vendor, model);
1345 else
1346 switch (type) { /* numbers from hald/linux/device.c */
1347 case 1: sprintf(buf, "tape"); break;
1348 case 2: sprintf(buf, "printer"); break;
1349 case 3: sprintf(buf, "processor"); break;
1350 case 4:
1351 case 5: sprintf(buf, "cdrom"); break;
1352 case 6: sprintf(buf, "scanner"); break;
1353 case 8: sprintf(buf, "media_changer"); break;
1354 case 9: sprintf(buf, "comm"); break;
1355 case 12: sprintf(buf, "raid"); break;
1356 default: sprintf(buf, "unknown");
1357 }
1358 } else
1359 buf[0] = '\0';
1360 free(device);
1361
1362 /* chop device path to 'host%d' and calculate the port number */
1363 c = strchr(&path[hba_len], '/');
4e5e717d
AW
1364 if (!c) {
1365 if (verbose)
1366 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1367 err = 2;
1368 break;
1369 }
d665cc31
DW
1370 *c = '\0';
1371 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1372 port -= host_base;
1373 else {
1374 if (verbose) {
1375 *c = '/'; /* repair the full string */
1376 fprintf(stderr, Name ": failed to determine port number for %s\n",
1377 path);
1378 }
1379 err = 2;
1380 break;
1381 }
1382
1383 /* mark this port as used */
1384 port_mask &= ~(1 << port);
1385
1386 /* print out the device information */
1387 if (buf[0]) {
1388 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1389 continue;
1390 }
1391
1392 fd = dev_open(ent->d_name, O_RDONLY);
1393 if (fd < 0)
1394 printf(" Port%d : - disk info unavailable -\n", port);
1395 else {
1396 fd2devname(fd, buf);
1397 printf(" Port%d : %s", port, buf);
1398 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1399 printf(" (%s)\n", buf);
1400 else
1401 printf("()\n");
1402 }
1403 close(fd);
1404 free(path);
1405 path = NULL;
1406 }
1407 if (path)
1408 free(path);
1409 if (dir)
1410 closedir(dir);
1411 if (err == 0) {
1412 int i;
1413
1414 for (i = 0; i < port_count; i++)
1415 if (port_mask & (1 << i))
1416 printf(" Port%d : - no device attached -\n", i);
1417 }
1418
1419 return err;
1420}
1421
120dc887 1422
155cbb4c 1423
120dc887
LM
1424static void print_found_intel_controllers(struct sys_dev *elem)
1425{
1426 for (; elem; elem = elem->next) {
1427 fprintf(stderr, Name ": found Intel(R) ");
1428 if (elem->type == SYS_DEV_SATA)
1429 fprintf(stderr, "SATA ");
155cbb4c
LM
1430 else if (elem->type == SYS_DEV_SAS)
1431 fprintf(stderr, "SAS ");
120dc887
LM
1432 fprintf(stderr, "RAID controller");
1433 if (elem->pci_id)
1434 fprintf(stderr, " at %s", elem->pci_id);
1435 fprintf(stderr, ".\n");
1436 }
1437 fflush(stderr);
1438}
1439
120dc887
LM
1440static int ahci_get_port_count(const char *hba_path, int *port_count)
1441{
1442 struct dirent *ent;
1443 DIR *dir;
1444 int host_base = -1;
1445
1446 *port_count = 0;
1447 if ((dir = opendir(hba_path)) == NULL)
1448 return -1;
1449
1450 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1451 int host;
1452
1453 if (sscanf(ent->d_name, "host%d", &host) != 1)
1454 continue;
1455 if (*port_count == 0)
1456 host_base = host;
1457 else if (host < host_base)
1458 host_base = host;
1459
1460 if (host + 1 > *port_count + host_base)
1461 *port_count = host + 1 - host_base;
1462 }
1463 closedir(dir);
1464 return host_base;
1465}
1466
a891a3c2
LM
1467static void print_imsm_capability(const struct imsm_orom *orom)
1468{
1469 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1470 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1471 orom->hotfix_ver, orom->build);
1472 printf(" RAID Levels :%s%s%s%s%s\n",
1473 imsm_orom_has_raid0(orom) ? " raid0" : "",
1474 imsm_orom_has_raid1(orom) ? " raid1" : "",
1475 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1476 imsm_orom_has_raid10(orom) ? " raid10" : "",
1477 imsm_orom_has_raid5(orom) ? " raid5" : "");
1478 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1479 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1480 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1481 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1482 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1483 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1484 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1485 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1486 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1487 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1488 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1489 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1490 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1491 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1492 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1493 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1494 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1495 printf(" Max Disks : %d\n", orom->tds);
1496 printf(" Max Volumes : %d\n", orom->vpa);
1497 return;
1498}
1499
5615172f 1500static int detail_platform_imsm(int verbose, int enumerate_only)
d665cc31
DW
1501{
1502 /* There are two components to imsm platform support, the ahci SATA
1503 * controller and the option-rom. To find the SATA controller we
1504 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1505 * controller with the Intel vendor id is present. This approach
1506 * allows mdadm to leverage the kernel's ahci detection logic, with the
1507 * caveat that if ahci.ko is not loaded mdadm will not be able to
1508 * detect platform raid capabilities. The option-rom resides in a
1509 * platform "Adapter ROM". We scan for its signature to retrieve the
1510 * platform capabilities. If raid support is disabled in the BIOS the
1511 * option-rom capability structure will not be available.
1512 */
1513 const struct imsm_orom *orom;
1514 struct sys_dev *list, *hba;
d665cc31
DW
1515 int host_base = 0;
1516 int port_count = 0;
120dc887 1517 int result=0;
d665cc31 1518
5615172f 1519 if (enumerate_only) {
a891a3c2 1520 if (check_env("IMSM_NO_PLATFORM"))
5615172f 1521 return 0;
a891a3c2
LM
1522 list = find_intel_devices();
1523 if (!list)
1524 return 2;
1525 for (hba = list; hba; hba = hba->next) {
1526 orom = find_imsm_capability(hba->type);
1527 if (!orom) {
1528 result = 2;
1529 break;
1530 }
1531 }
1532 free_sys_dev(&list);
1533 return result;
5615172f
DW
1534 }
1535
155cbb4c
LM
1536 list = find_intel_devices();
1537 if (!list) {
d665cc31 1538 if (verbose)
155cbb4c
LM
1539 fprintf(stderr, Name ": no active Intel(R) RAID "
1540 "controller found.\n");
d665cc31
DW
1541 free_sys_dev(&list);
1542 return 2;
1543 } else if (verbose)
155cbb4c 1544 print_found_intel_controllers(list);
d665cc31 1545
a891a3c2
LM
1546 for (hba = list; hba; hba = hba->next) {
1547 orom = find_imsm_capability(hba->type);
1548 if (!orom)
1549 fprintf(stderr, Name ": imsm capabilities not found for controller: %s (type %s)\n",
1550 hba->path, get_sys_dev_type(hba->type));
1551 else
1552 print_imsm_capability(orom);
d665cc31
DW
1553 }
1554
120dc887
LM
1555 for (hba = list; hba; hba = hba->next) {
1556 printf(" I/O Controller : %s (%s)\n",
1557 hba->path, get_sys_dev_type(hba->type));
d665cc31 1558
120dc887
LM
1559 if (hba->type == SYS_DEV_SATA) {
1560 host_base = ahci_get_port_count(hba->path, &port_count);
1561 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1562 if (verbose)
1563 fprintf(stderr, Name ": failed to enumerate "
1564 "ports on SATA controller at %s.", hba->pci_id);
1565 result |= 2;
1566 }
1567 }
d665cc31 1568 }
155cbb4c 1569
120dc887
LM
1570 free_sys_dev(&list);
1571 return result;
d665cc31 1572}
cdddbdbc
DW
1573#endif
1574
1575static int match_home_imsm(struct supertype *st, char *homehost)
1576{
5115ca67
DW
1577 /* the imsm metadata format does not specify any host
1578 * identification information. We return -1 since we can never
1579 * confirm nor deny whether a given array is "meant" for this
148acb7b 1580 * host. We rely on compare_super and the 'family_num' fields to
5115ca67
DW
1581 * exclude member disks that do not belong, and we rely on
1582 * mdadm.conf to specify the arrays that should be assembled.
1583 * Auto-assembly may still pick up "foreign" arrays.
1584 */
cdddbdbc 1585
9362c1c8 1586 return -1;
cdddbdbc
DW
1587}
1588
1589static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1590{
51006d85
N
1591 /* The uuid returned here is used for:
1592 * uuid to put into bitmap file (Create, Grow)
1593 * uuid for backup header when saving critical section (Grow)
1594 * comparing uuids when re-adding a device into an array
1595 * In these cases the uuid required is that of the data-array,
1596 * not the device-set.
1597 * uuid to recognise same set when adding a missing device back
1598 * to an array. This is a uuid for the device-set.
1599 *
1600 * For each of these we can make do with a truncated
1601 * or hashed uuid rather than the original, as long as
1602 * everyone agrees.
1603 * In each case the uuid required is that of the data-array,
1604 * not the device-set.
43dad3d6 1605 */
51006d85
N
1606 /* imsm does not track uuid's so we synthesis one using sha1 on
1607 * - The signature (Which is constant for all imsm array, but no matter)
148acb7b 1608 * - the orig_family_num of the container
51006d85
N
1609 * - the index number of the volume
1610 * - the 'serial' number of the volume.
1611 * Hopefully these are all constant.
1612 */
1613 struct intel_super *super = st->sb;
43dad3d6 1614
51006d85
N
1615 char buf[20];
1616 struct sha1_ctx ctx;
1617 struct imsm_dev *dev = NULL;
148acb7b 1618 __u32 family_num;
51006d85 1619
148acb7b
DW
1620 /* some mdadm versions failed to set ->orig_family_num, in which
1621 * case fall back to ->family_num. orig_family_num will be
1622 * fixed up with the first metadata update.
1623 */
1624 family_num = super->anchor->orig_family_num;
1625 if (family_num == 0)
1626 family_num = super->anchor->family_num;
51006d85 1627 sha1_init_ctx(&ctx);
92bd8f8d 1628 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
148acb7b 1629 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
51006d85
N
1630 if (super->current_vol >= 0)
1631 dev = get_imsm_dev(super, super->current_vol);
1632 if (dev) {
1633 __u32 vol = super->current_vol;
1634 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1635 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1636 }
1637 sha1_finish_ctx(&ctx, buf);
1638 memcpy(uuid, buf, 4*4);
cdddbdbc
DW
1639}
1640
0d481d37 1641#if 0
4f5bc454
DW
1642static void
1643get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
cdddbdbc 1644{
cdddbdbc
DW
1645 __u8 *v = get_imsm_version(mpb);
1646 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1647 char major[] = { 0, 0, 0 };
1648 char minor[] = { 0 ,0, 0 };
1649 char patch[] = { 0, 0, 0 };
1650 char *ver_parse[] = { major, minor, patch };
1651 int i, j;
1652
1653 i = j = 0;
1654 while (*v != '\0' && v < end) {
1655 if (*v != '.' && j < 2)
1656 ver_parse[i][j++] = *v;
1657 else {
1658 i++;
1659 j = 0;
1660 }
1661 v++;
1662 }
1663
4f5bc454
DW
1664 *m = strtol(minor, NULL, 0);
1665 *p = strtol(patch, NULL, 0);
1666}
0d481d37 1667#endif
4f5bc454 1668
1e5c6983
DW
1669static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1670{
1671 /* migr_strip_size when repairing or initializing parity */
1672 struct imsm_map *map = get_imsm_map(dev, 0);
1673 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1674
1675 switch (get_imsm_raid_level(map)) {
1676 case 5:
1677 case 10:
1678 return chunk;
1679 default:
1680 return 128*1024 >> 9;
1681 }
1682}
1683
1684static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1685{
1686 /* migr_strip_size when rebuilding a degraded disk, no idea why
1687 * this is different than migr_strip_size_resync(), but it's good
1688 * to be compatible
1689 */
1690 struct imsm_map *map = get_imsm_map(dev, 1);
1691 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1692
1693 switch (get_imsm_raid_level(map)) {
1694 case 1:
1695 case 10:
1696 if (map->num_members % map->num_domains == 0)
1697 return 128*1024 >> 9;
1698 else
1699 return chunk;
1700 case 5:
1701 return max((__u32) 64*1024 >> 9, chunk);
1702 default:
1703 return 128*1024 >> 9;
1704 }
1705}
1706
1707static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1708{
1709 struct imsm_map *lo = get_imsm_map(dev, 0);
1710 struct imsm_map *hi = get_imsm_map(dev, 1);
1711 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1712 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1713
1714 return max((__u32) 1, hi_chunk / lo_chunk);
1715}
1716
1717static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1718{
1719 struct imsm_map *lo = get_imsm_map(dev, 0);
1720 int level = get_imsm_raid_level(lo);
1721
1722 if (level == 1 || level == 10) {
1723 struct imsm_map *hi = get_imsm_map(dev, 1);
1724
1725 return hi->num_domains;
1726 } else
1727 return num_stripes_per_unit_resync(dev);
1728}
1729
98130f40 1730static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1e5c6983
DW
1731{
1732 /* named 'imsm_' because raid0, raid1 and raid10
1733 * counter-intuitively have the same number of data disks
1734 */
98130f40 1735 struct imsm_map *map = get_imsm_map(dev, second_map);
1e5c6983
DW
1736
1737 switch (get_imsm_raid_level(map)) {
1738 case 0:
1739 case 1:
1740 case 10:
1741 return map->num_members;
1742 case 5:
1743 return map->num_members - 1;
1744 default:
1745 dprintf("%s: unsupported raid level\n", __func__);
1746 return 0;
1747 }
1748}
1749
1750static __u32 parity_segment_depth(struct imsm_dev *dev)
1751{
1752 struct imsm_map *map = get_imsm_map(dev, 0);
1753 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1754
1755 switch(get_imsm_raid_level(map)) {
1756 case 1:
1757 case 10:
1758 return chunk * map->num_domains;
1759 case 5:
1760 return chunk * map->num_members;
1761 default:
1762 return chunk;
1763 }
1764}
1765
1766static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1767{
1768 struct imsm_map *map = get_imsm_map(dev, 1);
1769 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1770 __u32 strip = block / chunk;
1771
1772 switch (get_imsm_raid_level(map)) {
1773 case 1:
1774 case 10: {
1775 __u32 vol_strip = (strip * map->num_domains) + 1;
1776 __u32 vol_stripe = vol_strip / map->num_members;
1777
1778 return vol_stripe * chunk + block % chunk;
1779 } case 5: {
1780 __u32 stripe = strip / (map->num_members - 1);
1781
1782 return stripe * chunk + block % chunk;
1783 }
1784 default:
1785 return 0;
1786 }
1787}
1788
c47b0ff6
AK
1789static __u64 blocks_per_migr_unit(struct intel_super *super,
1790 struct imsm_dev *dev)
1e5c6983
DW
1791{
1792 /* calculate the conversion factor between per member 'blocks'
1793 * (md/{resync,rebuild}_start) and imsm migration units, return
1794 * 0 for the 'not migrating' and 'unsupported migration' cases
1795 */
1796 if (!dev->vol.migr_state)
1797 return 0;
1798
1799 switch (migr_type(dev)) {
c47b0ff6
AK
1800 case MIGR_GEN_MIGR: {
1801 struct migr_record *migr_rec = super->migr_rec;
1802 return __le32_to_cpu(migr_rec->blocks_per_unit);
1803 }
1e5c6983
DW
1804 case MIGR_VERIFY:
1805 case MIGR_REPAIR:
1806 case MIGR_INIT: {
1807 struct imsm_map *map = get_imsm_map(dev, 0);
1808 __u32 stripes_per_unit;
1809 __u32 blocks_per_unit;
1810 __u32 parity_depth;
1811 __u32 migr_chunk;
1812 __u32 block_map;
1813 __u32 block_rel;
1814 __u32 segment;
1815 __u32 stripe;
1816 __u8 disks;
1817
1818 /* yes, this is really the translation of migr_units to
1819 * per-member blocks in the 'resync' case
1820 */
1821 stripes_per_unit = num_stripes_per_unit_resync(dev);
1822 migr_chunk = migr_strip_blocks_resync(dev);
98130f40 1823 disks = imsm_num_data_members(dev, 0);
1e5c6983
DW
1824 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1825 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1826 segment = blocks_per_unit / stripe;
1827 block_rel = blocks_per_unit - segment * stripe;
1828 parity_depth = parity_segment_depth(dev);
1829 block_map = map_migr_block(dev, block_rel);
1830 return block_map + parity_depth * segment;
1831 }
1832 case MIGR_REBUILD: {
1833 __u32 stripes_per_unit;
1834 __u32 migr_chunk;
1835
1836 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1837 migr_chunk = migr_strip_blocks_rebuild(dev);
1838 return migr_chunk * stripes_per_unit;
1839 }
1e5c6983
DW
1840 case MIGR_STATE_CHANGE:
1841 default:
1842 return 0;
1843 }
1844}
1845
c2c087e6
DW
1846static int imsm_level_to_layout(int level)
1847{
1848 switch (level) {
1849 case 0:
1850 case 1:
1851 return 0;
1852 case 5:
1853 case 6:
a380c027 1854 return ALGORITHM_LEFT_ASYMMETRIC;
c2c087e6 1855 case 10:
c92a2527 1856 return 0x102;
c2c087e6 1857 }
a18a888e 1858 return UnSet;
c2c087e6
DW
1859}
1860
8e59f3d8
AK
1861/*******************************************************************************
1862 * Function: read_imsm_migr_rec
1863 * Description: Function reads imsm migration record from last sector of disk
1864 * Parameters:
1865 * fd : disk descriptor
1866 * super : metadata info
1867 * Returns:
1868 * 0 : success,
1869 * -1 : fail
1870 ******************************************************************************/
1871static int read_imsm_migr_rec(int fd, struct intel_super *super)
1872{
1873 int ret_val = -1;
1874 unsigned long long dsize;
1875
1876 get_dev_size(fd, NULL, &dsize);
1877 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1878 fprintf(stderr,
1879 Name ": Cannot seek to anchor block: %s\n",
1880 strerror(errno));
1881 goto out;
1882 }
1883 if (read(fd, super->migr_rec_buf, 512) != 512) {
1884 fprintf(stderr,
1885 Name ": Cannot read migr record block: %s\n",
1886 strerror(errno));
1887 goto out;
1888 }
1889 ret_val = 0;
1890
1891out:
1892 return ret_val;
1893}
1894
1895/*******************************************************************************
1896 * Function: load_imsm_migr_rec
1897 * Description: Function reads imsm migration record (it is stored at the last
1898 * sector of disk)
1899 * Parameters:
1900 * super : imsm internal array info
1901 * info : general array info
1902 * Returns:
1903 * 0 : success
1904 * -1 : fail
1905 ******************************************************************************/
1906static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
1907{
1908 struct mdinfo *sd;
1909 struct dl *dl = NULL;
1910 char nm[30];
1911 int retval = -1;
1912 int fd = -1;
1913
1914 if (info) {
1915 for (sd = info->devs ; sd ; sd = sd->next) {
1916 /* read only from one of the first two slots */
1917 if ((sd->disk.raid_disk > 1) ||
1918 (sd->disk.raid_disk < 0))
1919 continue;
1920 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1921 fd = dev_open(nm, O_RDONLY);
1922 if (fd >= 0)
1923 break;
1924 }
1925 }
1926 if (fd < 0) {
1927 for (dl = super->disks; dl; dl = dl->next) {
1928 /* read only from one of the first two slots */
1929 if (dl->index > 1)
1930 continue;
1931 sprintf(nm, "%d:%d", dl->major, dl->minor);
1932 fd = dev_open(nm, O_RDONLY);
1933 if (fd >= 0)
1934 break;
1935 }
1936 }
1937 if (fd < 0)
1938 goto out;
1939 retval = read_imsm_migr_rec(fd, super);
1940
1941out:
1942 if (fd >= 0)
1943 close(fd);
1944 return retval;
1945}
1946
687629c2
AK
1947/*******************************************************************************
1948 * Function: write_imsm_migr_rec
1949 * Description: Function writes imsm migration record
1950 * (at the last sector of disk)
1951 * Parameters:
1952 * super : imsm internal array info
1953 * Returns:
1954 * 0 : success
1955 * -1 : if fail
1956 ******************************************************************************/
1957static int write_imsm_migr_rec(struct supertype *st)
1958{
1959 struct intel_super *super = st->sb;
1960 unsigned long long dsize;
1961 char nm[30];
1962 int fd = -1;
1963 int retval = -1;
1964 struct dl *sd;
1965
1966 for (sd = super->disks ; sd ; sd = sd->next) {
1967 /* write to 2 first slots only */
1968 if ((sd->index < 0) || (sd->index > 1))
1969 continue;
1970 sprintf(nm, "%d:%d", sd->major, sd->minor);
1971 fd = dev_open(nm, O_RDWR);
1972 if (fd < 0)
1973 continue;
1974 get_dev_size(fd, NULL, &dsize);
1975 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1976 fprintf(stderr,
1977 Name ": Cannot seek to anchor block: %s\n",
1978 strerror(errno));
1979 goto out;
1980 }
1981 if (write(fd, super->migr_rec_buf, 512) != 512) {
1982 fprintf(stderr,
1983 Name ": Cannot write migr record block: %s\n",
1984 strerror(errno));
1985 goto out;
1986 }
1987 close(fd);
1988 fd = -1;
1989 }
1990
1991 retval = 0;
1992 out:
1993 if (fd >= 0)
1994 close(fd);
1995 return retval;
1996}
1997
a5d85af7 1998static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
bf5a934a
DW
1999{
2000 struct intel_super *super = st->sb;
c47b0ff6 2001 struct migr_record *migr_rec = super->migr_rec;
949c47a0 2002 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
a965f303 2003 struct imsm_map *map = get_imsm_map(dev, 0);
81ac8b4d 2004 struct imsm_map *prev_map = get_imsm_map(dev, 1);
b335e593 2005 struct imsm_map *map_to_analyse = map;
efb30e7f 2006 struct dl *dl;
e207da2f 2007 char *devname;
139dae11 2008 unsigned int component_size_alligment;
a5d85af7 2009 int map_disks = info->array.raid_disks;
bf5a934a 2010
95eeceeb 2011 memset(info, 0, sizeof(*info));
b335e593
AK
2012 if (prev_map)
2013 map_to_analyse = prev_map;
2014
efb30e7f
DW
2015 for (dl = super->disks; dl; dl = dl->next)
2016 if (dl->raiddisk == info->disk.raid_disk)
2017 break;
bf5a934a 2018 info->container_member = super->current_vol;
cd0430a1 2019 info->array.raid_disks = map->num_members;
b335e593 2020 info->array.level = get_imsm_raid_level(map_to_analyse);
bf5a934a
DW
2021 info->array.layout = imsm_level_to_layout(info->array.level);
2022 info->array.md_minor = -1;
2023 info->array.ctime = 0;
2024 info->array.utime = 0;
b335e593
AK
2025 info->array.chunk_size =
2026 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
301406c9 2027 info->array.state = !dev->vol.dirty;
da9b4a62
DW
2028 info->custom_array_size = __le32_to_cpu(dev->size_high);
2029 info->custom_array_size <<= 32;
2030 info->custom_array_size |= __le32_to_cpu(dev->size_low);
3f83228a
N
2031 if (prev_map && map->map_state == prev_map->map_state) {
2032 info->reshape_active = 1;
b335e593
AK
2033 info->new_level = get_imsm_raid_level(map);
2034 info->new_layout = imsm_level_to_layout(info->new_level);
2035 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
3f83228a 2036 info->delta_disks = map->num_members - prev_map->num_members;
493f5dd6
N
2037 if (info->delta_disks) {
2038 /* this needs to be applied to every array
2039 * in the container.
2040 */
2041 info->reshape_active = 2;
2042 }
3f83228a
N
2043 /* We shape information that we give to md might have to be
2044 * modify to cope with md's requirement for reshaping arrays.
2045 * For example, when reshaping a RAID0, md requires it to be
2046 * presented as a degraded RAID4.
2047 * Also if a RAID0 is migrating to a RAID5 we need to specify
2048 * the array as already being RAID5, but the 'before' layout
2049 * is a RAID4-like layout.
2050 */
2051 switch (info->array.level) {
2052 case 0:
2053 switch(info->new_level) {
2054 case 0:
2055 /* conversion is happening as RAID4 */
2056 info->array.level = 4;
2057 info->array.raid_disks += 1;
2058 break;
2059 case 5:
2060 /* conversion is happening as RAID5 */
2061 info->array.level = 5;
2062 info->array.layout = ALGORITHM_PARITY_N;
2063 info->array.raid_disks += 1;
2064 info->delta_disks -= 1;
2065 break;
2066 default:
2067 /* FIXME error message */
2068 info->array.level = UnSet;
2069 break;
2070 }
2071 break;
2072 }
b335e593
AK
2073 } else {
2074 info->new_level = UnSet;
2075 info->new_layout = UnSet;
2076 info->new_chunk = info->array.chunk_size;
3f83228a 2077 info->delta_disks = 0;
b335e593 2078 }
301406c9
DW
2079 info->disk.major = 0;
2080 info->disk.minor = 0;
efb30e7f
DW
2081 if (dl) {
2082 info->disk.major = dl->major;
2083 info->disk.minor = dl->minor;
2084 }
bf5a934a 2085
b335e593
AK
2086 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
2087 info->component_size =
2088 __le32_to_cpu(map_to_analyse->blocks_per_member);
139dae11
AK
2089
2090 /* check component size aligment
2091 */
2092 component_size_alligment =
2093 info->component_size % (info->array.chunk_size/512);
2094
2095 if (component_size_alligment &&
2096 (info->array.level != 1) && (info->array.level != UnSet)) {
2097 dprintf("imsm: reported component size alligned from %llu ",
2098 info->component_size);
2099 info->component_size -= component_size_alligment;
2100 dprintf("to %llu (%i).\n",
2101 info->component_size, component_size_alligment);
2102 }
2103
301406c9 2104 memset(info->uuid, 0, sizeof(info->uuid));
921d9e16 2105 info->recovery_start = MaxSector;
bf5a934a 2106
d2e6d5d6 2107 info->reshape_progress = 0;
b6796ce1 2108 info->resync_start = MaxSector;
b335e593
AK
2109 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
2110 dev->vol.dirty) {
301406c9 2111 info->resync_start = 0;
b6796ce1
AK
2112 }
2113 if (dev->vol.migr_state) {
1e5c6983
DW
2114 switch (migr_type(dev)) {
2115 case MIGR_REPAIR:
2116 case MIGR_INIT: {
c47b0ff6
AK
2117 __u64 blocks_per_unit = blocks_per_migr_unit(super,
2118 dev);
1e5c6983
DW
2119 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
2120
2121 info->resync_start = blocks_per_unit * units;
2122 break;
2123 }
d2e6d5d6 2124 case MIGR_GEN_MIGR: {
c47b0ff6
AK
2125 __u64 blocks_per_unit = blocks_per_migr_unit(super,
2126 dev);
2127 __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
04fa9523
AK
2128 unsigned long long array_blocks;
2129 int used_disks;
d2e6d5d6
AK
2130
2131 info->reshape_progress = blocks_per_unit * units;
6289d1e0 2132
d2e6d5d6
AK
2133 dprintf("IMSM: General Migration checkpoint : %llu "
2134 "(%llu) -> read reshape progress : %llu\n",
2135 units, blocks_per_unit, info->reshape_progress);
75156c46
AK
2136
2137 used_disks = imsm_num_data_members(dev, 1);
2138 if (used_disks > 0) {
2139 array_blocks = map->blocks_per_member *
2140 used_disks;
2141 /* round array size down to closest MB
2142 */
2143 info->custom_array_size = (array_blocks
2144 >> SECT_PER_MB_SHIFT)
2145 << SECT_PER_MB_SHIFT;
2146 }
d2e6d5d6 2147 }
1e5c6983
DW
2148 case MIGR_VERIFY:
2149 /* we could emulate the checkpointing of
2150 * 'sync_action=check' migrations, but for now
2151 * we just immediately complete them
2152 */
2153 case MIGR_REBUILD:
2154 /* this is handled by container_content_imsm() */
1e5c6983
DW
2155 case MIGR_STATE_CHANGE:
2156 /* FIXME handle other migrations */
2157 default:
2158 /* we are not dirty, so... */
2159 info->resync_start = MaxSector;
2160 }
b6796ce1 2161 }
301406c9
DW
2162
2163 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
2164 info->name[MAX_RAID_SERIAL_LEN] = 0;
bf5a934a 2165
f35f2525
N
2166 info->array.major_version = -1;
2167 info->array.minor_version = -2;
e207da2f
AW
2168 devname = devnum2devname(st->container_dev);
2169 *info->text_version = '\0';
2170 if (devname)
2171 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
2172 free(devname);
a67dd8cc 2173 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
51006d85 2174 uuid_from_super_imsm(st, info->uuid);
a5d85af7
N
2175
2176 if (dmap) {
2177 int i, j;
2178 for (i=0; i<map_disks; i++) {
2179 dmap[i] = 0;
2180 if (i < info->array.raid_disks) {
2181 struct imsm_disk *dsk;
98130f40 2182 j = get_imsm_disk_idx(dev, i, -1);
a5d85af7
N
2183 dsk = get_imsm_disk(super, j);
2184 if (dsk && (dsk->status & CONFIGURED_DISK))
2185 dmap[i] = 1;
2186 }
2187 }
2188 }
81ac8b4d 2189}
bf5a934a 2190
97b4d0e9
DW
2191static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
2192static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
2193
2194static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
2195{
2196 struct dl *d;
2197
2198 for (d = super->missing; d; d = d->next)
2199 if (d->index == index)
2200 return &d->disk;
2201 return NULL;
2202}
2203
a5d85af7 2204static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
4f5bc454
DW
2205{
2206 struct intel_super *super = st->sb;
4f5bc454 2207 struct imsm_disk *disk;
a5d85af7 2208 int map_disks = info->array.raid_disks;
ab3cb6b3
N
2209 int max_enough = -1;
2210 int i;
2211 struct imsm_super *mpb;
4f5bc454 2212
bf5a934a 2213 if (super->current_vol >= 0) {
a5d85af7 2214 getinfo_super_imsm_volume(st, info, map);
bf5a934a
DW
2215 return;
2216 }
95eeceeb 2217 memset(info, 0, sizeof(*info));
d23fe947
DW
2218
2219 /* Set raid_disks to zero so that Assemble will always pull in valid
2220 * spares
2221 */
2222 info->array.raid_disks = 0;
cdddbdbc
DW
2223 info->array.level = LEVEL_CONTAINER;
2224 info->array.layout = 0;
2225 info->array.md_minor = -1;
c2c087e6 2226 info->array.ctime = 0; /* N/A for imsm */
cdddbdbc
DW
2227 info->array.utime = 0;
2228 info->array.chunk_size = 0;
2229
2230 info->disk.major = 0;
2231 info->disk.minor = 0;
cdddbdbc 2232 info->disk.raid_disk = -1;
c2c087e6 2233 info->reshape_active = 0;
f35f2525
N
2234 info->array.major_version = -1;
2235 info->array.minor_version = -2;
c2c087e6 2236 strcpy(info->text_version, "imsm");
a67dd8cc 2237 info->safe_mode_delay = 0;
c2c087e6
DW
2238 info->disk.number = -1;
2239 info->disk.state = 0;
c5afc314 2240 info->name[0] = 0;
921d9e16 2241 info->recovery_start = MaxSector;
c2c087e6 2242
97b4d0e9 2243 /* do we have the all the insync disks that we expect? */
ab3cb6b3 2244 mpb = super->anchor;
97b4d0e9 2245
ab3cb6b3
N
2246 for (i = 0; i < mpb->num_raid_devs; i++) {
2247 struct imsm_dev *dev = get_imsm_dev(super, i);
2248 int failed, enough, j, missing = 0;
2249 struct imsm_map *map;
2250 __u8 state;
97b4d0e9 2251
ab3cb6b3
N
2252 failed = imsm_count_failed(super, dev);
2253 state = imsm_check_degraded(super, dev, failed);
2254 map = get_imsm_map(dev, dev->vol.migr_state);
2255
2256 /* any newly missing disks?
2257 * (catches single-degraded vs double-degraded)
2258 */
2259 for (j = 0; j < map->num_members; j++) {
98130f40 2260 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
ab3cb6b3
N
2261 __u32 idx = ord_to_idx(ord);
2262
2263 if (!(ord & IMSM_ORD_REBUILD) &&
2264 get_imsm_missing(super, idx)) {
2265 missing = 1;
2266 break;
2267 }
97b4d0e9 2268 }
ab3cb6b3
N
2269
2270 if (state == IMSM_T_STATE_FAILED)
2271 enough = -1;
2272 else if (state == IMSM_T_STATE_DEGRADED &&
2273 (state != map->map_state || missing))
2274 enough = 0;
2275 else /* we're normal, or already degraded */
2276 enough = 1;
2277
2278 /* in the missing/failed disk case check to see
2279 * if at least one array is runnable
2280 */
2281 max_enough = max(max_enough, enough);
2282 }
2283 dprintf("%s: enough: %d\n", __func__, max_enough);
2284 info->container_enough = max_enough;
97b4d0e9 2285
4a04ec6c 2286 if (super->disks) {
14e8215b
DW
2287 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2288
b9f594fe 2289 disk = &super->disks->disk;
14e8215b
DW
2290 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
2291 info->component_size = reserved;
25ed7e59 2292 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
df474657
DW
2293 /* we don't change info->disk.raid_disk here because
2294 * this state will be finalized in mdmon after we have
2295 * found the 'most fresh' version of the metadata
2296 */
25ed7e59
DW
2297 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2298 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
cdddbdbc 2299 }
a575e2a7
DW
2300
2301 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2302 * ->compare_super may have updated the 'num_raid_devs' field for spares
2303 */
2304 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
36ba7d48 2305 uuid_from_super_imsm(st, info->uuid);
22e263f6
AC
2306 else
2307 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
a5d85af7
N
2308
2309 /* I don't know how to compute 'map' on imsm, so use safe default */
2310 if (map) {
2311 int i;
2312 for (i = 0; i < map_disks; i++)
2313 map[i] = 1;
2314 }
2315
cdddbdbc
DW
2316}
2317
5c4cd5da
AC
2318/* allocates memory and fills disk in mdinfo structure
2319 * for each disk in array */
2320struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2321{
2322 struct mdinfo *mddev = NULL;
2323 struct intel_super *super = st->sb;
2324 struct imsm_disk *disk;
2325 int count = 0;
2326 struct dl *dl;
2327 if (!super || !super->disks)
2328 return NULL;
2329 dl = super->disks;
2330 mddev = malloc(sizeof(*mddev));
2331 if (!mddev) {
2332 fprintf(stderr, Name ": Failed to allocate memory.\n");
2333 return NULL;
2334 }
2335 memset(mddev, 0, sizeof(*mddev));
2336 while (dl) {
2337 struct mdinfo *tmp;
2338 disk = &dl->disk;
2339 tmp = malloc(sizeof(*tmp));
2340 if (!tmp) {
2341 fprintf(stderr, Name ": Failed to allocate memory.\n");
2342 if (mddev)
2343 sysfs_free(mddev);
2344 return NULL;
2345 }
2346 memset(tmp, 0, sizeof(*tmp));
2347 if (mddev->devs)
2348 tmp->next = mddev->devs;
2349 mddev->devs = tmp;
2350 tmp->disk.number = count++;
2351 tmp->disk.major = dl->major;
2352 tmp->disk.minor = dl->minor;
2353 tmp->disk.state = is_configured(disk) ?
2354 (1 << MD_DISK_ACTIVE) : 0;
2355 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2356 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2357 tmp->disk.raid_disk = -1;
2358 dl = dl->next;
2359 }
2360 return mddev;
2361}
2362
cdddbdbc
DW
2363static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2364 char *update, char *devname, int verbose,
2365 int uuid_set, char *homehost)
2366{
f352c545
DW
2367 /* For 'assemble' and 'force' we need to return non-zero if any
2368 * change was made. For others, the return value is ignored.
2369 * Update options are:
2370 * force-one : This device looks a bit old but needs to be included,
2371 * update age info appropriately.
2372 * assemble: clear any 'faulty' flag to allow this device to
2373 * be assembled.
2374 * force-array: Array is degraded but being forced, mark it clean
2375 * if that will be needed to assemble it.
2376 *
2377 * newdev: not used ????
2378 * grow: Array has gained a new device - this is currently for
2379 * linear only
2380 * resync: mark as dirty so a resync will happen.
2381 * name: update the name - preserving the homehost
6e46bf34 2382 * uuid: Change the uuid of the array to match watch is given
f352c545
DW
2383 *
2384 * Following are not relevant for this imsm:
2385 * sparc2.2 : update from old dodgey metadata
2386 * super-minor: change the preferred_minor number
2387 * summaries: update redundant counters.
f352c545
DW
2388 * homehost: update the recorded homehost
2389 * _reshape_progress: record new reshape_progress position.
2390 */
6e46bf34
DW
2391 int rv = 1;
2392 struct intel_super *super = st->sb;
2393 struct imsm_super *mpb;
f352c545 2394
6e46bf34
DW
2395 /* we can only update container info */
2396 if (!super || super->current_vol >= 0 || !super->anchor)
2397 return 1;
2398
2399 mpb = super->anchor;
2400
2401 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
1e2b2765 2402 rv = -1;
6e46bf34
DW
2403 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2404 mpb->orig_family_num = *((__u32 *) info->update_private);
2405 rv = 0;
2406 } else if (strcmp(update, "uuid") == 0) {
2407 __u32 *new_family = malloc(sizeof(*new_family));
2408
2409 /* update orig_family_number with the incoming random
2410 * data, report the new effective uuid, and store the
2411 * new orig_family_num for future updates.
2412 */
2413 if (new_family) {
2414 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2415 uuid_from_super_imsm(st, info->uuid);
2416 *new_family = mpb->orig_family_num;
2417 info->update_private = new_family;
2418 rv = 0;
2419 }
2420 } else if (strcmp(update, "assemble") == 0)
2421 rv = 0;
2422 else
1e2b2765 2423 rv = -1;
f352c545 2424
6e46bf34
DW
2425 /* successful update? recompute checksum */
2426 if (rv == 0)
2427 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
f352c545
DW
2428
2429 return rv;
cdddbdbc
DW
2430}
2431
c2c087e6 2432static size_t disks_to_mpb_size(int disks)
cdddbdbc 2433{
c2c087e6 2434 size_t size;
cdddbdbc 2435
c2c087e6
DW
2436 size = sizeof(struct imsm_super);
2437 size += (disks - 1) * sizeof(struct imsm_disk);
2438 size += 2 * sizeof(struct imsm_dev);
2439 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2440 size += (4 - 2) * sizeof(struct imsm_map);
2441 /* 4 possible disk_ord_tbl's */
2442 size += 4 * (disks - 1) * sizeof(__u32);
2443
2444 return size;
2445}
2446
2447static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2448{
2449 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2450 return 0;
2451
2452 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
cdddbdbc
DW
2453}
2454
ba2de7ba
DW
2455static void free_devlist(struct intel_super *super)
2456{
2457 struct intel_dev *dv;
2458
2459 while (super->devlist) {
2460 dv = super->devlist->next;
2461 free(super->devlist->dev);
2462 free(super->devlist);
2463 super->devlist = dv;
2464 }
2465}
2466
2467static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2468{
2469 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2470}
2471
cdddbdbc
DW
2472static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2473{
2474 /*
2475 * return:
2476 * 0 same, or first was empty, and second was copied
2477 * 1 second had wrong number
2478 * 2 wrong uuid
2479 * 3 wrong other info
2480 */
2481 struct intel_super *first = st->sb;
2482 struct intel_super *sec = tst->sb;
2483
2484 if (!first) {
2485 st->sb = tst->sb;
2486 tst->sb = NULL;
2487 return 0;
2488 }
8603ea6f
LM
2489 /* in platform dependent environment test if the disks
2490 * use the same Intel hba
2491 */
2492 if (!check_env("IMSM_NO_PLATFORM")) {
ea2bc72b
LM
2493 if (!first->hba || !sec->hba ||
2494 (first->hba->type != sec->hba->type)) {
8603ea6f
LM
2495 fprintf(stderr,
2496 "HBAs of devices does not match %s != %s\n",
ea2bc72b
LM
2497 first->hba ? get_sys_dev_type(first->hba->type) : NULL,
2498 sec->hba ? get_sys_dev_type(sec->hba->type) : NULL);
8603ea6f
LM
2499 return 3;
2500 }
2501 }
cdddbdbc 2502
d23fe947
DW
2503 /* if an anchor does not have num_raid_devs set then it is a free
2504 * floating spare
2505 */
2506 if (first->anchor->num_raid_devs > 0 &&
2507 sec->anchor->num_raid_devs > 0) {
a2b97981
DW
2508 /* Determine if these disks might ever have been
2509 * related. Further disambiguation can only take place
2510 * in load_super_imsm_all
2511 */
2512 __u32 first_family = first->anchor->orig_family_num;
2513 __u32 sec_family = sec->anchor->orig_family_num;
2514
f796af5d
DW
2515 if (memcmp(first->anchor->sig, sec->anchor->sig,
2516 MAX_SIGNATURE_LENGTH) != 0)
2517 return 3;
2518
a2b97981
DW
2519 if (first_family == 0)
2520 first_family = first->anchor->family_num;
2521 if (sec_family == 0)
2522 sec_family = sec->anchor->family_num;
2523
2524 if (first_family != sec_family)
d23fe947 2525 return 3;
f796af5d 2526
d23fe947 2527 }
cdddbdbc 2528
f796af5d 2529
3e372e5a
DW
2530 /* if 'first' is a spare promote it to a populated mpb with sec's
2531 * family number
2532 */
2533 if (first->anchor->num_raid_devs == 0 &&
2534 sec->anchor->num_raid_devs > 0) {
78d30f94 2535 int i;
ba2de7ba
DW
2536 struct intel_dev *dv;
2537 struct imsm_dev *dev;
78d30f94
DW
2538
2539 /* we need to copy raid device info from sec if an allocation
2540 * fails here we don't associate the spare
2541 */
2542 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
ba2de7ba
DW
2543 dv = malloc(sizeof(*dv));
2544 if (!dv)
2545 break;
2546 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2547 if (!dev) {
2548 free(dv);
2549 break;
78d30f94 2550 }
ba2de7ba
DW
2551 dv->dev = dev;
2552 dv->index = i;
2553 dv->next = first->devlist;
2554 first->devlist = dv;
78d30f94 2555 }
709743c5 2556 if (i < sec->anchor->num_raid_devs) {
ba2de7ba
DW
2557 /* allocation failure */
2558 free_devlist(first);
2559 fprintf(stderr, "imsm: failed to associate spare\n");
2560 return 3;
78d30f94 2561 }
3e372e5a 2562 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
148acb7b 2563 first->anchor->orig_family_num = sec->anchor->orig_family_num;
3e372e5a 2564 first->anchor->family_num = sec->anchor->family_num;
ac6449be 2565 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
709743c5
DW
2566 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2567 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
3e372e5a
DW
2568 }
2569
cdddbdbc
DW
2570 return 0;
2571}
2572
0030e8d6
DW
2573static void fd2devname(int fd, char *name)
2574{
2575 struct stat st;
2576 char path[256];
33a6535d 2577 char dname[PATH_MAX];
0030e8d6
DW
2578 char *nm;
2579 int rv;
2580
2581 name[0] = '\0';
2582 if (fstat(fd, &st) != 0)
2583 return;
2584 sprintf(path, "/sys/dev/block/%d:%d",
2585 major(st.st_rdev), minor(st.st_rdev));
2586
2587 rv = readlink(path, dname, sizeof(dname));
2588 if (rv <= 0)
2589 return;
2590
2591 dname[rv] = '\0';
2592 nm = strrchr(dname, '/');
2593 nm++;
2594 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2595}
2596
cdddbdbc
DW
2597extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2598
2599static int imsm_read_serial(int fd, char *devname,
2600 __u8 serial[MAX_RAID_SERIAL_LEN])
2601{
2602 unsigned char scsi_serial[255];
cdddbdbc
DW
2603 int rv;
2604 int rsp_len;
1f24f035 2605 int len;
316e2bf4
DW
2606 char *dest;
2607 char *src;
2608 char *rsp_buf;
2609 int i;
cdddbdbc
DW
2610
2611 memset(scsi_serial, 0, sizeof(scsi_serial));
cdddbdbc 2612
f9ba0ff1
DW
2613 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2614
40ebbb9c 2615 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
f9ba0ff1
DW
2616 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2617 fd2devname(fd, (char *) serial);
0030e8d6
DW
2618 return 0;
2619 }
2620
cdddbdbc
DW
2621 if (rv != 0) {
2622 if (devname)
2623 fprintf(stderr,
2624 Name ": Failed to retrieve serial for %s\n",
2625 devname);
2626 return rv;
2627 }
2628
2629 rsp_len = scsi_serial[3];
03cd4cc8
DW
2630 if (!rsp_len) {
2631 if (devname)
2632 fprintf(stderr,
2633 Name ": Failed to retrieve serial for %s\n",
2634 devname);
2635 return 2;
2636 }
1f24f035 2637 rsp_buf = (char *) &scsi_serial[4];
5c3db629 2638
316e2bf4
DW
2639 /* trim all whitespace and non-printable characters and convert
2640 * ':' to ';'
2641 */
2642 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2643 src = &rsp_buf[i];
2644 if (*src > 0x20) {
2645 /* ':' is reserved for use in placeholder serial
2646 * numbers for missing disks
2647 */
2648 if (*src == ':')
2649 *dest++ = ';';
2650 else
2651 *dest++ = *src;
2652 }
2653 }
2654 len = dest - rsp_buf;
2655 dest = rsp_buf;
2656
2657 /* truncate leading characters */
2658 if (len > MAX_RAID_SERIAL_LEN) {
2659 dest += len - MAX_RAID_SERIAL_LEN;
1f24f035 2660 len = MAX_RAID_SERIAL_LEN;
316e2bf4 2661 }
5c3db629 2662
5c3db629 2663 memset(serial, 0, MAX_RAID_SERIAL_LEN);
316e2bf4 2664 memcpy(serial, dest, len);
cdddbdbc
DW
2665
2666 return 0;
2667}
2668
1f24f035
DW
2669static int serialcmp(__u8 *s1, __u8 *s2)
2670{
2671 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2672}
2673
2674static void serialcpy(__u8 *dest, __u8 *src)
2675{
2676 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2677}
2678
1799c9e8 2679#ifndef MDASSEMBLE
54c2c1ea
DW
2680static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2681{
2682 struct dl *dl;
2683
2684 for (dl = super->disks; dl; dl = dl->next)
2685 if (serialcmp(dl->serial, serial) == 0)
2686 break;
2687
2688 return dl;
2689}
1799c9e8 2690#endif
54c2c1ea 2691
a2b97981
DW
2692static struct imsm_disk *
2693__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2694{
2695 int i;
2696
2697 for (i = 0; i < mpb->num_disks; i++) {
2698 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2699
2700 if (serialcmp(disk->serial, serial) == 0) {
2701 if (idx)
2702 *idx = i;
2703 return disk;
2704 }
2705 }
2706
2707 return NULL;
2708}
2709
cdddbdbc
DW
2710static int
2711load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2712{
a2b97981 2713 struct imsm_disk *disk;
cdddbdbc
DW
2714 struct dl *dl;
2715 struct stat stb;
cdddbdbc 2716 int rv;
a2b97981 2717 char name[40];
d23fe947
DW
2718 __u8 serial[MAX_RAID_SERIAL_LEN];
2719
2720 rv = imsm_read_serial(fd, devname, serial);
2721
2722 if (rv != 0)
2723 return 2;
2724
a2b97981 2725 dl = calloc(1, sizeof(*dl));
b9f594fe 2726 if (!dl) {
cdddbdbc
DW
2727 if (devname)
2728 fprintf(stderr,
2729 Name ": failed to allocate disk buffer for %s\n",
2730 devname);
2731 return 2;
2732 }
cdddbdbc 2733
a2b97981
DW
2734 fstat(fd, &stb);
2735 dl->major = major(stb.st_rdev);
2736 dl->minor = minor(stb.st_rdev);
2737 dl->next = super->disks;
2738 dl->fd = keep_fd ? fd : -1;
2739 assert(super->disks == NULL);
2740 super->disks = dl;
2741 serialcpy(dl->serial, serial);
2742 dl->index = -2;
2743 dl->e = NULL;
2744 fd2devname(fd, name);
2745 if (devname)
2746 dl->devname = strdup(devname);
2747 else
2748 dl->devname = strdup(name);
cdddbdbc 2749
d23fe947 2750 /* look up this disk's index in the current anchor */
a2b97981
DW
2751 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2752 if (disk) {
2753 dl->disk = *disk;
2754 /* only set index on disks that are a member of a
2755 * populated contianer, i.e. one with raid_devs
2756 */
2757 if (is_failed(&dl->disk))
3f6efecc 2758 dl->index = -2;
a2b97981
DW
2759 else if (is_spare(&dl->disk))
2760 dl->index = -1;
3f6efecc
DW
2761 }
2762
949c47a0
DW
2763 return 0;
2764}
2765
0e600426 2766#ifndef MDASSEMBLE
0c046afd
DW
2767/* When migrating map0 contains the 'destination' state while map1
2768 * contains the current state. When not migrating map0 contains the
2769 * current state. This routine assumes that map[0].map_state is set to
2770 * the current array state before being called.
2771 *
2772 * Migration is indicated by one of the following states
2773 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
e3bba0e0 2774 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
0c046afd 2775 * map1state=unitialized)
1484e727 2776 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
0c046afd 2777 * map1state=normal)
e3bba0e0 2778 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
0c046afd 2779 * map1state=degraded)
8e59f3d8
AK
2780 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2781 * map1state=normal)
0c046afd 2782 */
8e59f3d8
AK
2783static void migrate(struct imsm_dev *dev, struct intel_super *super,
2784 __u8 to_state, int migr_type)
3393c6af 2785{
0c046afd 2786 struct imsm_map *dest;
3393c6af
DW
2787 struct imsm_map *src = get_imsm_map(dev, 0);
2788
0c046afd 2789 dev->vol.migr_state = 1;
1484e727 2790 set_migr_type(dev, migr_type);
f8f603f1 2791 dev->vol.curr_migr_unit = 0;
0c046afd
DW
2792 dest = get_imsm_map(dev, 1);
2793
0556e1a2 2794 /* duplicate and then set the target end state in map[0] */
3393c6af 2795 memcpy(dest, src, sizeof_imsm_map(src));
28bce06f
AK
2796 if ((migr_type == MIGR_REBUILD) ||
2797 (migr_type == MIGR_GEN_MIGR)) {
0556e1a2
DW
2798 __u32 ord;
2799 int i;
2800
2801 for (i = 0; i < src->num_members; i++) {
2802 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2803 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2804 }
2805 }
2806
8e59f3d8
AK
2807 if (migr_type == MIGR_GEN_MIGR)
2808 /* Clear migration record */
2809 memset(super->migr_rec, 0, sizeof(struct migr_record));
2810
0c046afd 2811 src->map_state = to_state;
949c47a0 2812}
f8f603f1
DW
2813
2814static void end_migration(struct imsm_dev *dev, __u8 map_state)
2815{
2816 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2 2817 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
28bce06f 2818 int i, j;
0556e1a2
DW
2819
2820 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2821 * completed in the last migration.
2822 *
28bce06f 2823 * FIXME add support for raid-level-migration
0556e1a2
DW
2824 */
2825 for (i = 0; i < prev->num_members; i++)
28bce06f
AK
2826 for (j = 0; j < map->num_members; j++)
2827 /* during online capacity expansion
2828 * disks position can be changed if takeover is used
2829 */
2830 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2831 ord_to_idx(prev->disk_ord_tbl[i])) {
2832 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2833 break;
2834 }
f8f603f1
DW
2835
2836 dev->vol.migr_state = 0;
28bce06f 2837 dev->vol.migr_type = 0;
f8f603f1
DW
2838 dev->vol.curr_migr_unit = 0;
2839 map->map_state = map_state;
2840}
0e600426 2841#endif
949c47a0
DW
2842
2843static int parse_raid_devices(struct intel_super *super)
2844{
2845 int i;
2846 struct imsm_dev *dev_new;
4d7b1503 2847 size_t len, len_migr;
401d313b 2848 size_t max_len = 0;
4d7b1503
DW
2849 size_t space_needed = 0;
2850 struct imsm_super *mpb = super->anchor;
949c47a0
DW
2851
2852 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2853 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
ba2de7ba 2854 struct intel_dev *dv;
949c47a0 2855
4d7b1503
DW
2856 len = sizeof_imsm_dev(dev_iter, 0);
2857 len_migr = sizeof_imsm_dev(dev_iter, 1);
2858 if (len_migr > len)
2859 space_needed += len_migr - len;
2860
ba2de7ba
DW
2861 dv = malloc(sizeof(*dv));
2862 if (!dv)
2863 return 1;
401d313b
AK
2864 if (max_len < len_migr)
2865 max_len = len_migr;
2866 if (max_len > len_migr)
2867 space_needed += max_len - len_migr;
2868 dev_new = malloc(max_len);
ba2de7ba
DW
2869 if (!dev_new) {
2870 free(dv);
949c47a0 2871 return 1;
ba2de7ba 2872 }
949c47a0 2873 imsm_copy_dev(dev_new, dev_iter);
ba2de7ba
DW
2874 dv->dev = dev_new;
2875 dv->index = i;
2876 dv->next = super->devlist;
2877 super->devlist = dv;
949c47a0 2878 }
cdddbdbc 2879
4d7b1503
DW
2880 /* ensure that super->buf is large enough when all raid devices
2881 * are migrating
2882 */
2883 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2884 void *buf;
2885
2886 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2887 if (posix_memalign(&buf, 512, len) != 0)
2888 return 1;
2889
1f45a8ad
DW
2890 memcpy(buf, super->buf, super->len);
2891 memset(buf + super->len, 0, len - super->len);
4d7b1503
DW
2892 free(super->buf);
2893 super->buf = buf;
2894 super->len = len;
2895 }
2896
cdddbdbc
DW
2897 return 0;
2898}
2899
604b746f
JD
2900/* retrieve a pointer to the bbm log which starts after all raid devices */
2901struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2902{
2903 void *ptr = NULL;
2904
2905 if (__le32_to_cpu(mpb->bbm_log_size)) {
2906 ptr = mpb;
2907 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2908 }
2909
2910 return ptr;
2911}
2912
e2f41b2c
AK
2913/*******************************************************************************
2914 * Function: check_mpb_migr_compatibility
2915 * Description: Function checks for unsupported migration features:
2916 * - migration optimization area (pba_of_lba0)
2917 * - descending reshape (ascending_migr)
2918 * Parameters:
2919 * super : imsm metadata information
2920 * Returns:
2921 * 0 : migration is compatible
2922 * -1 : migration is not compatible
2923 ******************************************************************************/
2924int check_mpb_migr_compatibility(struct intel_super *super)
2925{
2926 struct imsm_map *map0, *map1;
2927 struct migr_record *migr_rec = super->migr_rec;
2928 int i;
2929
2930 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2931 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2932
2933 if (dev_iter &&
2934 dev_iter->vol.migr_state == 1 &&
2935 dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
2936 /* This device is migrating */
2937 map0 = get_imsm_map(dev_iter, 0);
2938 map1 = get_imsm_map(dev_iter, 1);
2939 if (map0->pba_of_lba0 != map1->pba_of_lba0)
2940 /* migration optimization area was used */
2941 return -1;
2942 if (migr_rec->ascending_migr == 0
2943 && migr_rec->dest_depth_per_unit > 0)
2944 /* descending reshape not supported yet */
2945 return -1;
2946 }
2947 }
2948 return 0;
2949}
2950
d23fe947 2951static void __free_imsm(struct intel_super *super, int free_disks);
9ca2c81c 2952
cdddbdbc 2953/* load_imsm_mpb - read matrix metadata
f2f5c343 2954 * allocates super->mpb to be freed by free_imsm
cdddbdbc
DW
2955 */
2956static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
2957{
2958 unsigned long long dsize;
cdddbdbc
DW
2959 unsigned long long sectors;
2960 struct stat;
6416d527 2961 struct imsm_super *anchor;
cdddbdbc
DW
2962 __u32 check_sum;
2963
cdddbdbc 2964 get_dev_size(fd, NULL, &dsize);
64436f06
N
2965 if (dsize < 1024) {
2966 if (devname)
2967 fprintf(stderr,
2968 Name ": %s: device to small for imsm\n",
2969 devname);
2970 return 1;
2971 }
cdddbdbc
DW
2972
2973 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
2974 if (devname)
2975 fprintf(stderr,
2976 Name ": Cannot seek to anchor block on %s: %s\n",
2977 devname, strerror(errno));
2978 return 1;
2979 }
2980
949c47a0 2981 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
ad97895e
DW
2982 if (devname)
2983 fprintf(stderr,
2984 Name ": Failed to allocate imsm anchor buffer"
2985 " on %s\n", devname);
2986 return 1;
2987 }
949c47a0 2988 if (read(fd, anchor, 512) != 512) {
cdddbdbc
DW
2989 if (devname)
2990 fprintf(stderr,
2991 Name ": Cannot read anchor block on %s: %s\n",
2992 devname, strerror(errno));
6416d527 2993 free(anchor);
cdddbdbc
DW
2994 return 1;
2995 }
2996
6416d527 2997 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
cdddbdbc
DW
2998 if (devname)
2999 fprintf(stderr,
3000 Name ": no IMSM anchor on %s\n", devname);
6416d527 3001 free(anchor);
cdddbdbc
DW
3002 return 2;
3003 }
3004
d23fe947 3005 __free_imsm(super, 0);
f2f5c343
LM
3006 /* reload capability and hba */
3007
3008 /* capability and hba must be updated with new super allocation */
d424212e 3009 find_intel_hba_capability(fd, super, devname);
949c47a0
DW
3010 super->len = ROUND_UP(anchor->mpb_size, 512);
3011 if (posix_memalign(&super->buf, 512, super->len) != 0) {
cdddbdbc
DW
3012 if (devname)
3013 fprintf(stderr,
3014 Name ": unable to allocate %zu byte mpb buffer\n",
949c47a0 3015 super->len);
6416d527 3016 free(anchor);
cdddbdbc
DW
3017 return 2;
3018 }
949c47a0 3019 memcpy(super->buf, anchor, 512);
cdddbdbc 3020
6416d527
NB
3021 sectors = mpb_sectors(anchor) - 1;
3022 free(anchor);
8e59f3d8
AK
3023
3024 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3025 fprintf(stderr, Name
3026 ": %s could not allocate migr_rec buffer\n", __func__);
3027 free(super->buf);
3028 return 2;
3029 }
3030
949c47a0 3031 if (!sectors) {
ecf45690
DW
3032 check_sum = __gen_imsm_checksum(super->anchor);
3033 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
3034 if (devname)
3035 fprintf(stderr,
3036 Name ": IMSM checksum %x != %x on %s\n",
3037 check_sum,
3038 __le32_to_cpu(super->anchor->check_sum),
3039 devname);
3040 return 2;
3041 }
3042
a2b97981 3043 return 0;
949c47a0 3044 }
cdddbdbc
DW
3045
3046 /* read the extended mpb */
3047 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
3048 if (devname)
3049 fprintf(stderr,
3050 Name ": Cannot seek to extended mpb on %s: %s\n",
3051 devname, strerror(errno));
3052 return 1;
3053 }
3054
f21e18ca 3055 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
cdddbdbc
DW
3056 if (devname)
3057 fprintf(stderr,
3058 Name ": Cannot read extended mpb on %s: %s\n",
3059 devname, strerror(errno));
3060 return 2;
3061 }
3062
949c47a0
DW
3063 check_sum = __gen_imsm_checksum(super->anchor);
3064 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
cdddbdbc
DW
3065 if (devname)
3066 fprintf(stderr,
3067 Name ": IMSM checksum %x != %x on %s\n",
949c47a0 3068 check_sum, __le32_to_cpu(super->anchor->check_sum),
cdddbdbc 3069 devname);
db575f3b 3070 return 3;
cdddbdbc
DW
3071 }
3072
604b746f
JD
3073 /* FIXME the BBM log is disk specific so we cannot use this global
3074 * buffer for all disks. Ok for now since we only look at the global
3075 * bbm_log_size parameter to gate assembly
3076 */
3077 super->bbm_log = __get_imsm_bbm_log(super->anchor);
3078
a2b97981
DW
3079 return 0;
3080}
3081
8e59f3d8
AK
3082static int read_imsm_migr_rec(int fd, struct intel_super *super);
3083
a2b97981
DW
3084static int
3085load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
3086{
3087 int err;
3088
3089 err = load_imsm_mpb(fd, super, devname);
3090 if (err)
3091 return err;
3092 err = load_imsm_disk(fd, super, devname, keep_fd);
3093 if (err)
3094 return err;
3095 err = parse_raid_devices(super);
4d7b1503 3096
a2b97981 3097 return err;
cdddbdbc
DW
3098}
3099
ae6aad82
DW
3100static void __free_imsm_disk(struct dl *d)
3101{
3102 if (d->fd >= 0)
3103 close(d->fd);
3104 if (d->devname)
3105 free(d->devname);
0dcecb2e
DW
3106 if (d->e)
3107 free(d->e);
ae6aad82
DW
3108 free(d);
3109
3110}
1a64be56 3111
cdddbdbc
DW
3112static void free_imsm_disks(struct intel_super *super)
3113{
47ee5a45 3114 struct dl *d;
cdddbdbc 3115
47ee5a45
DW
3116 while (super->disks) {
3117 d = super->disks;
cdddbdbc 3118 super->disks = d->next;
ae6aad82 3119 __free_imsm_disk(d);
cdddbdbc 3120 }
cb82edca
AK
3121 while (super->disk_mgmt_list) {
3122 d = super->disk_mgmt_list;
3123 super->disk_mgmt_list = d->next;
3124 __free_imsm_disk(d);
3125 }
47ee5a45
DW
3126 while (super->missing) {
3127 d = super->missing;
3128 super->missing = d->next;
3129 __free_imsm_disk(d);
3130 }
3131
cdddbdbc
DW
3132}
3133
9ca2c81c 3134/* free all the pieces hanging off of a super pointer */
d23fe947 3135static void __free_imsm(struct intel_super *super, int free_disks)
cdddbdbc 3136{
88654014
LM
3137 struct intel_hba *elem, *next;
3138
9ca2c81c 3139 if (super->buf) {
949c47a0 3140 free(super->buf);
9ca2c81c
DW
3141 super->buf = NULL;
3142 }
f2f5c343
LM
3143 /* unlink capability description */
3144 super->orom = NULL;
8e59f3d8
AK
3145 if (super->migr_rec_buf) {
3146 free(super->migr_rec_buf);
3147 super->migr_rec_buf = NULL;
3148 }
d23fe947
DW
3149 if (free_disks)
3150 free_imsm_disks(super);
ba2de7ba 3151 free_devlist(super);
88654014
LM
3152 elem = super->hba;
3153 while (elem) {
3154 if (elem->path)
3155 free((void *)elem->path);
3156 next = elem->next;
3157 free(elem);
3158 elem = next;
88c32bb1 3159 }
88654014 3160 super->hba = NULL;
cdddbdbc
DW
3161}
3162
9ca2c81c
DW
3163static void free_imsm(struct intel_super *super)
3164{
d23fe947 3165 __free_imsm(super, 1);
9ca2c81c
DW
3166 free(super);
3167}
cdddbdbc
DW
3168
3169static void free_super_imsm(struct supertype *st)
3170{
3171 struct intel_super *super = st->sb;
3172
3173 if (!super)
3174 return;
3175
3176 free_imsm(super);
3177 st->sb = NULL;
3178}
3179
49133e57 3180static struct intel_super *alloc_super(void)
c2c087e6
DW
3181{
3182 struct intel_super *super = malloc(sizeof(*super));
3183
3184 if (super) {
3185 memset(super, 0, sizeof(*super));
bf5a934a 3186 super->current_vol = -1;
0dcecb2e 3187 super->create_offset = ~((__u32 ) 0);
c2c087e6 3188 }
c2c087e6
DW
3189 return super;
3190}
3191
f0f5a016
LM
3192/*
3193 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3194 */
d424212e 3195static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
f0f5a016
LM
3196{
3197 struct sys_dev *hba_name;
3198 int rv = 0;
3199
3200 if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
f2f5c343 3201 super->orom = NULL;
f0f5a016
LM
3202 super->hba = NULL;
3203 return 0;
3204 }
3205 hba_name = find_disk_attached_hba(fd, NULL);
3206 if (!hba_name) {
d424212e 3207 if (devname)
f0f5a016
LM
3208 fprintf(stderr,
3209 Name ": %s is not attached to Intel(R) RAID controller.\n",
d424212e 3210 devname);
f0f5a016
LM
3211 return 1;
3212 }
3213 rv = attach_hba_to_super(super, hba_name);
3214 if (rv == 2) {
d424212e
N
3215 if (devname) {
3216 struct intel_hba *hba = super->hba;
f0f5a016 3217
f0f5a016
LM
3218 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
3219 "controller (%s),\n"
3220 " but the container is assigned to Intel(R) "
3221 "%s RAID controller (",
d424212e 3222 devname,
f0f5a016
LM
3223 hba_name->path,
3224 hba_name->pci_id ? : "Err!",
3225 get_sys_dev_type(hba_name->type));
3226
f0f5a016
LM
3227 while (hba) {
3228 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
3229 if (hba->next)
3230 fprintf(stderr, ", ");
3231 hba = hba->next;
3232 }
3233
3234 fprintf(stderr, ").\n"
3235 " Mixing devices attached to different controllers "
3236 "is not allowed.\n");
3237 }
3238 free_sys_dev(&hba_name);
3239 return 2;
3240 }
f2f5c343 3241 super->orom = find_imsm_capability(hba_name->type);
f0f5a016 3242 free_sys_dev(&hba_name);
f2f5c343
LM
3243 if (!super->orom)
3244 return 3;
f0f5a016
LM
3245 return 0;
3246}
3247
cdddbdbc 3248#ifndef MDASSEMBLE
47ee5a45
DW
3249/* find_missing - helper routine for load_super_imsm_all that identifies
3250 * disks that have disappeared from the system. This routine relies on
3251 * the mpb being uptodate, which it is at load time.
3252 */
3253static int find_missing(struct intel_super *super)
3254{
3255 int i;
3256 struct imsm_super *mpb = super->anchor;
3257 struct dl *dl;
3258 struct imsm_disk *disk;
47ee5a45
DW
3259
3260 for (i = 0; i < mpb->num_disks; i++) {
3261 disk = __get_imsm_disk(mpb, i);
54c2c1ea 3262 dl = serial_to_dl(disk->serial, super);
47ee5a45
DW
3263 if (dl)
3264 continue;
47ee5a45
DW
3265
3266 dl = malloc(sizeof(*dl));
3267 if (!dl)
3268 return 1;
3269 dl->major = 0;
3270 dl->minor = 0;
3271 dl->fd = -1;
3272 dl->devname = strdup("missing");
3273 dl->index = i;
3274 serialcpy(dl->serial, disk->serial);
3275 dl->disk = *disk;
689c9bf3 3276 dl->e = NULL;
47ee5a45
DW
3277 dl->next = super->missing;
3278 super->missing = dl;
3279 }
3280
3281 return 0;
3282}
3283
a2b97981
DW
3284static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
3285{
3286 struct intel_disk *idisk = disk_list;
3287
3288 while (idisk) {
3289 if (serialcmp(idisk->disk.serial, serial) == 0)
3290 break;
3291 idisk = idisk->next;
3292 }
3293
3294 return idisk;
3295}
3296
3297static int __prep_thunderdome(struct intel_super **table, int tbl_size,
3298 struct intel_super *super,
3299 struct intel_disk **disk_list)
3300{
3301 struct imsm_disk *d = &super->disks->disk;
3302 struct imsm_super *mpb = super->anchor;
3303 int i, j;
3304
3305 for (i = 0; i < tbl_size; i++) {
3306 struct imsm_super *tbl_mpb = table[i]->anchor;
3307 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3308
3309 if (tbl_mpb->family_num == mpb->family_num) {
3310 if (tbl_mpb->check_sum == mpb->check_sum) {
3311 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3312 __func__, super->disks->major,
3313 super->disks->minor,
3314 table[i]->disks->major,
3315 table[i]->disks->minor);
3316 break;
3317 }
3318
3319 if (((is_configured(d) && !is_configured(tbl_d)) ||
3320 is_configured(d) == is_configured(tbl_d)) &&
3321 tbl_mpb->generation_num < mpb->generation_num) {
3322 /* current version of the mpb is a
3323 * better candidate than the one in
3324 * super_table, but copy over "cross
3325 * generational" status
3326 */
3327 struct intel_disk *idisk;
3328
3329 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3330 __func__, super->disks->major,
3331 super->disks->minor,
3332 table[i]->disks->major,
3333 table[i]->disks->minor);
3334
3335 idisk = disk_list_get(tbl_d->serial, *disk_list);
3336 if (idisk && is_failed(&idisk->disk))
3337 tbl_d->status |= FAILED_DISK;
3338 break;
3339 } else {
3340 struct intel_disk *idisk;
3341 struct imsm_disk *disk;
3342
3343 /* tbl_mpb is more up to date, but copy
3344 * over cross generational status before
3345 * returning
3346 */
3347 disk = __serial_to_disk(d->serial, mpb, NULL);
3348 if (disk && is_failed(disk))
3349 d->status |= FAILED_DISK;
3350
3351 idisk = disk_list_get(d->serial, *disk_list);
3352 if (idisk) {
3353 idisk->owner = i;
3354 if (disk && is_configured(disk))
3355 idisk->disk.status |= CONFIGURED_DISK;
3356 }
3357
3358 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3359 __func__, super->disks->major,
3360 super->disks->minor,
3361 table[i]->disks->major,
3362 table[i]->disks->minor);
3363
3364 return tbl_size;
3365 }
3366 }
3367 }
3368
3369 if (i >= tbl_size)
3370 table[tbl_size++] = super;
3371 else
3372 table[i] = super;
3373
3374 /* update/extend the merged list of imsm_disk records */
3375 for (j = 0; j < mpb->num_disks; j++) {
3376 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3377 struct intel_disk *idisk;
3378
3379 idisk = disk_list_get(disk->serial, *disk_list);
3380 if (idisk) {
3381 idisk->disk.status |= disk->status;
3382 if (is_configured(&idisk->disk) ||
3383 is_failed(&idisk->disk))
3384 idisk->disk.status &= ~(SPARE_DISK);
3385 } else {
3386 idisk = calloc(1, sizeof(*idisk));
3387 if (!idisk)
3388 return -1;
3389 idisk->owner = IMSM_UNKNOWN_OWNER;
3390 idisk->disk = *disk;
3391 idisk->next = *disk_list;
3392 *disk_list = idisk;
3393 }
3394
3395 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3396 idisk->owner = i;
3397 }
3398
3399 return tbl_size;
3400}
3401
3402static struct intel_super *
3403validate_members(struct intel_super *super, struct intel_disk *disk_list,
3404 const int owner)
3405{
3406 struct imsm_super *mpb = super->anchor;
3407 int ok_count = 0;
3408 int i;
3409
3410 for (i = 0; i < mpb->num_disks; i++) {
3411 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3412 struct intel_disk *idisk;
3413
3414 idisk = disk_list_get(disk->serial, disk_list);
3415 if (idisk) {
3416 if (idisk->owner == owner ||
3417 idisk->owner == IMSM_UNKNOWN_OWNER)
3418 ok_count++;
3419 else
3420 dprintf("%s: '%.16s' owner %d != %d\n",
3421 __func__, disk->serial, idisk->owner,
3422 owner);
3423 } else {
3424 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3425 __func__, __le32_to_cpu(mpb->family_num), i,
3426 disk->serial);
3427 break;
3428 }
3429 }
3430
3431 if (ok_count == mpb->num_disks)
3432 return super;
3433 return NULL;
3434}
3435
3436static void show_conflicts(__u32 family_num, struct intel_super *super_list)
3437{
3438 struct intel_super *s;
3439
3440 for (s = super_list; s; s = s->next) {
3441 if (family_num != s->anchor->family_num)
3442 continue;
3443 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
3444 __le32_to_cpu(family_num), s->disks->devname);
3445 }
3446}
3447
3448static struct intel_super *
3449imsm_thunderdome(struct intel_super **super_list, int len)
3450{
3451 struct intel_super *super_table[len];
3452 struct intel_disk *disk_list = NULL;
3453 struct intel_super *champion, *spare;
3454 struct intel_super *s, **del;
3455 int tbl_size = 0;
3456 int conflict;
3457 int i;
3458
3459 memset(super_table, 0, sizeof(super_table));
3460 for (s = *super_list; s; s = s->next)
3461 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
3462
3463 for (i = 0; i < tbl_size; i++) {
3464 struct imsm_disk *d;
3465 struct intel_disk *idisk;
3466 struct imsm_super *mpb = super_table[i]->anchor;
3467
3468 s = super_table[i];
3469 d = &s->disks->disk;
3470
3471 /* 'd' must appear in merged disk list for its
3472 * configuration to be valid
3473 */
3474 idisk = disk_list_get(d->serial, disk_list);
3475 if (idisk && idisk->owner == i)
3476 s = validate_members(s, disk_list, i);
3477 else
3478 s = NULL;
3479
3480 if (!s)
3481 dprintf("%s: marking family: %#x from %d:%d offline\n",
3482 __func__, mpb->family_num,
3483 super_table[i]->disks->major,
3484 super_table[i]->disks->minor);
3485 super_table[i] = s;
3486 }
3487
3488 /* This is where the mdadm implementation differs from the Windows
3489 * driver which has no strict concept of a container. We can only
3490 * assemble one family from a container, so when returning a prodigal
3491 * array member to this system the code will not be able to disambiguate
3492 * the container contents that should be assembled ("foreign" versus
3493 * "local"). It requires user intervention to set the orig_family_num
3494 * to a new value to establish a new container. The Windows driver in
3495 * this situation fixes up the volume name in place and manages the
3496 * foreign array as an independent entity.
3497 */
3498 s = NULL;
3499 spare = NULL;
3500 conflict = 0;
3501 for (i = 0; i < tbl_size; i++) {
3502 struct intel_super *tbl_ent = super_table[i];
3503 int is_spare = 0;
3504
3505 if (!tbl_ent)
3506 continue;
3507
3508 if (tbl_ent->anchor->num_raid_devs == 0) {
3509 spare = tbl_ent;
3510 is_spare = 1;
3511 }
3512
3513 if (s && !is_spare) {
3514 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3515 conflict++;
3516 } else if (!s && !is_spare)
3517 s = tbl_ent;
3518 }
3519
3520 if (!s)
3521 s = spare;
3522 if (!s) {
3523 champion = NULL;
3524 goto out;
3525 }
3526 champion = s;
3527
3528 if (conflict)
3529 fprintf(stderr, "Chose family %#x on '%s', "
3530 "assemble conflicts to new container with '--update=uuid'\n",
3531 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3532
3533 /* collect all dl's onto 'champion', and update them to
3534 * champion's version of the status
3535 */
3536 for (s = *super_list; s; s = s->next) {
3537 struct imsm_super *mpb = champion->anchor;
3538 struct dl *dl = s->disks;
3539
3540 if (s == champion)
3541 continue;
3542
3543 for (i = 0; i < mpb->num_disks; i++) {
3544 struct imsm_disk *disk;
3545
3546 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3547 if (disk) {
3548 dl->disk = *disk;
3549 /* only set index on disks that are a member of
3550 * a populated contianer, i.e. one with
3551 * raid_devs
3552 */
3553 if (is_failed(&dl->disk))
3554 dl->index = -2;
3555 else if (is_spare(&dl->disk))
3556 dl->index = -1;
3557 break;
3558 }
3559 }
3560
3561 if (i >= mpb->num_disks) {
3562 struct intel_disk *idisk;
3563
3564 idisk = disk_list_get(dl->serial, disk_list);
ecf408e9 3565 if (idisk && is_spare(&idisk->disk) &&
a2b97981
DW
3566 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3567 dl->index = -1;
3568 else {
3569 dl->index = -2;
3570 continue;
3571 }
3572 }
3573
3574 dl->next = champion->disks;
3575 champion->disks = dl;
3576 s->disks = NULL;
3577 }
3578
3579 /* delete 'champion' from super_list */
3580 for (del = super_list; *del; ) {
3581 if (*del == champion) {
3582 *del = (*del)->next;
3583 break;
3584 } else
3585 del = &(*del)->next;
3586 }
3587 champion->next = NULL;
3588
3589 out:
3590 while (disk_list) {
3591 struct intel_disk *idisk = disk_list;
3592
3593 disk_list = disk_list->next;
3594 free(idisk);
3595 }
3596
3597 return champion;
3598}
3599
cdddbdbc 3600static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
e1902a7b 3601 char *devname)
cdddbdbc
DW
3602{
3603 struct mdinfo *sra;
a2b97981
DW
3604 struct intel_super *super_list = NULL;
3605 struct intel_super *super = NULL;
db575f3b 3606 int devnum = fd2devnum(fd);
a2b97981 3607 struct mdinfo *sd;
db575f3b 3608 int retry;
a2b97981
DW
3609 int err = 0;
3610 int i;
dab4a513
DW
3611
3612 /* check if 'fd' an opened container */
b526e52d 3613 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
cdddbdbc
DW
3614 if (!sra)
3615 return 1;
3616
3617 if (sra->array.major_version != -1 ||
3618 sra->array.minor_version != -2 ||
1602d52c
AW
3619 strcmp(sra->text_version, "imsm") != 0) {
3620 err = 1;
3621 goto error;
3622 }
a2b97981
DW
3623 /* load all mpbs */
3624 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
49133e57 3625 struct intel_super *s = alloc_super();
7a6ecd55 3626 char nm[32];
a2b97981 3627 int dfd;
f2f5c343 3628 int rv;
a2b97981
DW
3629
3630 err = 1;
3631 if (!s)
3632 goto error;
3633 s->next = super_list;
3634 super_list = s;
cdddbdbc 3635
a2b97981 3636 err = 2;
cdddbdbc 3637 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3638 dfd = dev_open(nm, O_RDWR);
a2b97981
DW
3639 if (dfd < 0)
3640 goto error;
3641
d424212e 3642 rv = find_intel_hba_capability(dfd, s, devname);
f2f5c343
LM
3643 /* no orom/efi or non-intel hba of the disk */
3644 if (rv != 0)
3645 goto error;
3646
e1902a7b 3647 err = load_and_parse_mpb(dfd, s, NULL, 1);
db575f3b
DW
3648
3649 /* retry the load if we might have raced against mdmon */
a2b97981 3650 if (err == 3 && mdmon_running(devnum))
db575f3b
DW
3651 for (retry = 0; retry < 3; retry++) {
3652 usleep(3000);
e1902a7b 3653 err = load_and_parse_mpb(dfd, s, NULL, 1);
a2b97981 3654 if (err != 3)
db575f3b
DW
3655 break;
3656 }
a2b97981
DW
3657 if (err)
3658 goto error;
cdddbdbc
DW
3659 }
3660
a2b97981
DW
3661 /* all mpbs enter, maybe one leaves */
3662 super = imsm_thunderdome(&super_list, i);
3663 if (!super) {
3664 err = 1;
3665 goto error;
cdddbdbc
DW
3666 }
3667
47ee5a45
DW
3668 if (find_missing(super) != 0) {
3669 free_imsm(super);
a2b97981
DW
3670 err = 2;
3671 goto error;
47ee5a45 3672 }
8e59f3d8
AK
3673
3674 /* load migration record */
3675 err = load_imsm_migr_rec(super, NULL);
3676 if (err) {
3677 err = 4;
3678 goto error;
3679 }
e2f41b2c
AK
3680
3681 /* Check migration compatibility */
3682 if (check_mpb_migr_compatibility(super) != 0) {
3683 fprintf(stderr, Name ": Unsupported migration detected");
3684 if (devname)
3685 fprintf(stderr, " on %s\n", devname);
3686 else
3687 fprintf(stderr, " (IMSM).\n");
3688
3689 err = 5;
3690 goto error;
3691 }
3692
a2b97981
DW
3693 err = 0;
3694
3695 error:
3696 while (super_list) {
3697 struct intel_super *s = super_list;
3698
3699 super_list = super_list->next;
3700 free_imsm(s);
3701 }
1602d52c 3702 sysfs_free(sra);
a2b97981
DW
3703
3704 if (err)
3705 return err;
f7e7067b 3706
cdddbdbc 3707 *sbp = super;
db575f3b 3708 st->container_dev = devnum;
a2b97981 3709 if (err == 0 && st->ss == NULL) {
bf5a934a 3710 st->ss = &super_imsm;
cdddbdbc
DW
3711 st->minor_version = 0;
3712 st->max_devs = IMSM_MAX_DEVICES;
3713 }
cdddbdbc
DW
3714 return 0;
3715}
2b959fbf
N
3716
3717static int load_container_imsm(struct supertype *st, int fd, char *devname)
3718{
3719 return load_super_imsm_all(st, fd, &st->sb, devname);
3720}
cdddbdbc
DW
3721#endif
3722
3723static int load_super_imsm(struct supertype *st, int fd, char *devname)
3724{
3725 struct intel_super *super;
3726 int rv;
3727
691c6ee1
N
3728 if (test_partition(fd))
3729 /* IMSM not allowed on partitions */
3730 return 1;
3731
37424f13
DW
3732 free_super_imsm(st);
3733
49133e57 3734 super = alloc_super();
cdddbdbc
DW
3735 if (!super) {
3736 fprintf(stderr,
3737 Name ": malloc of %zu failed.\n",
3738 sizeof(*super));
3739 return 1;
3740 }
ea2bc72b
LM
3741 /* Load hba and capabilities if they exist.
3742 * But do not preclude loading metadata in case capabilities or hba are
3743 * non-compliant and ignore_hw_compat is set.
3744 */
d424212e 3745 rv = find_intel_hba_capability(fd, super, devname);
f2f5c343 3746 /* no orom/efi or non-intel hba of the disk */
ea2bc72b 3747 if ((rv != 0) && (st->ignore_hw_compat == 0)) {
f2f5c343
LM
3748 if (devname)
3749 fprintf(stderr,
3750 Name ": No OROM/EFI properties for %s\n", devname);
3751 free_imsm(super);
3752 return 2;
3753 }
a2b97981 3754 rv = load_and_parse_mpb(fd, super, devname, 0);
cdddbdbc
DW
3755
3756 if (rv) {
3757 if (devname)
3758 fprintf(stderr,
3759 Name ": Failed to load all information "
3760 "sections on %s\n", devname);
3761 free_imsm(super);
3762 return rv;
3763 }
3764
3765 st->sb = super;
3766 if (st->ss == NULL) {
3767 st->ss = &super_imsm;
3768 st->minor_version = 0;
3769 st->max_devs = IMSM_MAX_DEVICES;
3770 }
8e59f3d8
AK
3771
3772 /* load migration record */
3773 load_imsm_migr_rec(super, NULL);
3774
e2f41b2c
AK
3775 /* Check for unsupported migration features */
3776 if (check_mpb_migr_compatibility(super) != 0) {
3777 fprintf(stderr, Name ": Unsupported migration detected");
3778 if (devname)
3779 fprintf(stderr, " on %s\n", devname);
3780 else
3781 fprintf(stderr, " (IMSM).\n");
3782 return 3;
3783 }
3784
cdddbdbc
DW
3785 return 0;
3786}
3787
ef6ffade
DW
3788static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3789{
3790 if (info->level == 1)
3791 return 128;
3792 return info->chunk_size >> 9;
3793}
3794
ff596308 3795static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
ef6ffade
DW
3796{
3797 __u32 num_stripes;
3798
3799 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
ff596308 3800 num_stripes /= num_domains;
ef6ffade
DW
3801
3802 return num_stripes;
3803}
3804
fcfd9599
DW
3805static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3806{
4025c288
DW
3807 if (info->level == 1)
3808 return info->size * 2;
3809 else
3810 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
fcfd9599
DW
3811}
3812
4d1313e9
DW
3813static void imsm_update_version_info(struct intel_super *super)
3814{
3815 /* update the version and attributes */
3816 struct imsm_super *mpb = super->anchor;
3817 char *version;
3818 struct imsm_dev *dev;
3819 struct imsm_map *map;
3820 int i;
3821
3822 for (i = 0; i < mpb->num_raid_devs; i++) {
3823 dev = get_imsm_dev(super, i);
3824 map = get_imsm_map(dev, 0);
3825 if (__le32_to_cpu(dev->size_high) > 0)
3826 mpb->attributes |= MPB_ATTRIB_2TB;
3827
3828 /* FIXME detect when an array spans a port multiplier */
3829 #if 0
3830 mpb->attributes |= MPB_ATTRIB_PM;
3831 #endif
3832
3833 if (mpb->num_raid_devs > 1 ||
3834 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3835 version = MPB_VERSION_ATTRIBS;
3836 switch (get_imsm_raid_level(map)) {
3837 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3838 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3839 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3840 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3841 }
3842 } else {
3843 if (map->num_members >= 5)
3844 version = MPB_VERSION_5OR6_DISK_ARRAY;
3845 else if (dev->status == DEV_CLONE_N_GO)
3846 version = MPB_VERSION_CNG;
3847 else if (get_imsm_raid_level(map) == 5)
3848 version = MPB_VERSION_RAID5;
3849 else if (map->num_members >= 3)
3850 version = MPB_VERSION_3OR4_DISK_ARRAY;
3851 else if (get_imsm_raid_level(map) == 1)
3852 version = MPB_VERSION_RAID1;
3853 else
3854 version = MPB_VERSION_RAID0;
3855 }
3856 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3857 }
3858}
3859
aa534678
DW
3860static int check_name(struct intel_super *super, char *name, int quiet)
3861{
3862 struct imsm_super *mpb = super->anchor;
3863 char *reason = NULL;
3864 int i;
3865
3866 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3867 reason = "must be 16 characters or less";
3868
3869 for (i = 0; i < mpb->num_raid_devs; i++) {
3870 struct imsm_dev *dev = get_imsm_dev(super, i);
3871
3872 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3873 reason = "already exists";
3874 break;
3875 }
3876 }
3877
3878 if (reason && !quiet)
3879 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3880
3881 return !reason;
3882}
3883
8b353278
DW
3884static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3885 unsigned long long size, char *name,
3886 char *homehost, int *uuid)
cdddbdbc 3887{
c2c087e6
DW
3888 /* We are creating a volume inside a pre-existing container.
3889 * so st->sb is already set.
3890 */
3891 struct intel_super *super = st->sb;
949c47a0 3892 struct imsm_super *mpb = super->anchor;
ba2de7ba 3893 struct intel_dev *dv;
c2c087e6
DW
3894 struct imsm_dev *dev;
3895 struct imsm_vol *vol;
3896 struct imsm_map *map;
3897 int idx = mpb->num_raid_devs;
3898 int i;
3899 unsigned long long array_blocks;
2c092cad 3900 size_t size_old, size_new;
ff596308 3901 __u32 num_data_stripes;
cdddbdbc 3902
88c32bb1 3903 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
c2c087e6 3904 fprintf(stderr, Name": This imsm-container already has the "
88c32bb1 3905 "maximum of %d volumes\n", super->orom->vpa);
c2c087e6
DW
3906 return 0;
3907 }
3908
2c092cad
DW
3909 /* ensure the mpb is large enough for the new data */
3910 size_old = __le32_to_cpu(mpb->mpb_size);
3911 size_new = disks_to_mpb_size(info->nr_disks);
3912 if (size_new > size_old) {
3913 void *mpb_new;
3914 size_t size_round = ROUND_UP(size_new, 512);
3915
3916 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3917 fprintf(stderr, Name": could not allocate new mpb\n");
3918 return 0;
3919 }
8e59f3d8
AK
3920 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3921 fprintf(stderr, Name
3922 ": %s could not allocate migr_rec buffer\n",
3923 __func__);
3924 free(super->buf);
3925 free(super);
3926 return 0;
3927 }
2c092cad
DW
3928 memcpy(mpb_new, mpb, size_old);
3929 free(mpb);
3930 mpb = mpb_new;
949c47a0 3931 super->anchor = mpb_new;
2c092cad
DW
3932 mpb->mpb_size = __cpu_to_le32(size_new);
3933 memset(mpb_new + size_old, 0, size_round - size_old);
3934 }
bf5a934a 3935 super->current_vol = idx;
d23fe947
DW
3936 /* when creating the first raid device in this container set num_disks
3937 * to zero, i.e. delete this spare and add raid member devices in
3938 * add_to_super_imsm_volume()
3939 */
3940 if (super->current_vol == 0)
3941 mpb->num_disks = 0;
5a038140 3942
aa534678
DW
3943 if (!check_name(super, name, 0))
3944 return 0;
ba2de7ba
DW
3945 dv = malloc(sizeof(*dv));
3946 if (!dv) {
3947 fprintf(stderr, Name ": failed to allocate device list entry\n");
3948 return 0;
3949 }
1a2487c2 3950 dev = calloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
949c47a0 3951 if (!dev) {
ba2de7ba 3952 free(dv);
949c47a0
DW
3953 fprintf(stderr, Name": could not allocate raid device\n");
3954 return 0;
3955 }
1a2487c2 3956
c2c087e6 3957 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
03bcbc65
DW
3958 if (info->level == 1)
3959 array_blocks = info_to_blocks_per_member(info);
3960 else
3961 array_blocks = calc_array_size(info->level, info->raid_disks,
3962 info->layout, info->chunk_size,
3963 info->size*2);
979d38be
DW
3964 /* round array size down to closest MB */
3965 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
3966
c2c087e6
DW
3967 dev->size_low = __cpu_to_le32((__u32) array_blocks);
3968 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
1a2487c2 3969 dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
c2c087e6
DW
3970 vol = &dev->vol;
3971 vol->migr_state = 0;
1484e727 3972 set_migr_type(dev, MIGR_INIT);
c2c087e6 3973 vol->dirty = 0;
f8f603f1 3974 vol->curr_migr_unit = 0;
a965f303 3975 map = get_imsm_map(dev, 0);
0dcecb2e 3976 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
fcfd9599 3977 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
ef6ffade 3978 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
0556e1a2 3979 map->failed_disk_num = ~0;
c2c087e6
DW
3980 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
3981 IMSM_T_STATE_NORMAL;
252d23c0 3982 map->ddf = 1;
ef6ffade
DW
3983
3984 if (info->level == 1 && info->raid_disks > 2) {
38950822
AW
3985 free(dev);
3986 free(dv);
ef6ffade
DW
3987 fprintf(stderr, Name": imsm does not support more than 2 disks"
3988 "in a raid1 volume\n");
3989 return 0;
3990 }
81062a36
DW
3991
3992 map->raid_level = info->level;
4d1313e9 3993 if (info->level == 10) {
c2c087e6 3994 map->raid_level = 1;
4d1313e9 3995 map->num_domains = info->raid_disks / 2;
81062a36
DW
3996 } else if (info->level == 1)
3997 map->num_domains = info->raid_disks;
3998 else
ff596308 3999 map->num_domains = 1;
81062a36 4000
ff596308
DW
4001 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
4002 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
ef6ffade 4003
c2c087e6
DW
4004 map->num_members = info->raid_disks;
4005 for (i = 0; i < map->num_members; i++) {
4006 /* initialized in add_to_super */
4eb26970 4007 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
c2c087e6 4008 }
949c47a0 4009 mpb->num_raid_devs++;
ba2de7ba
DW
4010
4011 dv->dev = dev;
4012 dv->index = super->current_vol;
4013 dv->next = super->devlist;
4014 super->devlist = dv;
c2c087e6 4015
4d1313e9
DW
4016 imsm_update_version_info(super);
4017
c2c087e6 4018 return 1;
cdddbdbc
DW
4019}
4020
bf5a934a
DW
4021static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
4022 unsigned long long size, char *name,
4023 char *homehost, int *uuid)
4024{
4025 /* This is primarily called by Create when creating a new array.
4026 * We will then get add_to_super called for each component, and then
4027 * write_init_super called to write it out to each device.
4028 * For IMSM, Create can create on fresh devices or on a pre-existing
4029 * array.
4030 * To create on a pre-existing array a different method will be called.
4031 * This one is just for fresh drives.
4032 */
4033 struct intel_super *super;
4034 struct imsm_super *mpb;
4035 size_t mpb_size;
4d1313e9 4036 char *version;
bf5a934a 4037
bf5a934a 4038 if (st->sb)
e683ca88
DW
4039 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
4040
4041 if (info)
4042 mpb_size = disks_to_mpb_size(info->nr_disks);
4043 else
4044 mpb_size = 512;
bf5a934a 4045
49133e57 4046 super = alloc_super();
e683ca88 4047 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
bf5a934a 4048 free(super);
e683ca88
DW
4049 super = NULL;
4050 }
4051 if (!super) {
4052 fprintf(stderr, Name
4053 ": %s could not allocate superblock\n", __func__);
bf5a934a
DW
4054 return 0;
4055 }
8e59f3d8
AK
4056 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
4057 fprintf(stderr, Name
4058 ": %s could not allocate migr_rec buffer\n", __func__);
4059 free(super->buf);
4060 free(super);
4061 return 0;
4062 }
e683ca88 4063 memset(super->buf, 0, mpb_size);
ef649044 4064 mpb = super->buf;
e683ca88
DW
4065 mpb->mpb_size = __cpu_to_le32(mpb_size);
4066 st->sb = super;
4067
4068 if (info == NULL) {
4069 /* zeroing superblock */
4070 return 0;
4071 }
bf5a934a 4072
4d1313e9
DW
4073 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4074
4075 version = (char *) mpb->sig;
4076 strcpy(version, MPB_SIGNATURE);
4077 version += strlen(MPB_SIGNATURE);
4078 strcpy(version, MPB_VERSION_RAID0);
bf5a934a 4079
bf5a934a
DW
4080 return 1;
4081}
4082
0e600426 4083#ifndef MDASSEMBLE
f20c3968 4084static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
bf5a934a
DW
4085 int fd, char *devname)
4086{
4087 struct intel_super *super = st->sb;
d23fe947 4088 struct imsm_super *mpb = super->anchor;
bf5a934a
DW
4089 struct dl *dl;
4090 struct imsm_dev *dev;
4091 struct imsm_map *map;
4eb26970 4092 int slot;
bf5a934a 4093
949c47a0 4094 dev = get_imsm_dev(super, super->current_vol);
a965f303 4095 map = get_imsm_map(dev, 0);
bf5a934a 4096
208933a7
N
4097 if (! (dk->state & (1<<MD_DISK_SYNC))) {
4098 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
4099 devname);
4100 return 1;
4101 }
4102
efb30e7f
DW
4103 if (fd == -1) {
4104 /* we're doing autolayout so grab the pre-marked (in
4105 * validate_geometry) raid_disk
4106 */
4107 for (dl = super->disks; dl; dl = dl->next)
4108 if (dl->raiddisk == dk->raid_disk)
4109 break;
4110 } else {
4111 for (dl = super->disks; dl ; dl = dl->next)
4112 if (dl->major == dk->major &&
4113 dl->minor == dk->minor)
4114 break;
4115 }
d23fe947 4116
208933a7
N
4117 if (!dl) {
4118 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
f20c3968 4119 return 1;
208933a7 4120 }
bf5a934a 4121
d23fe947
DW
4122 /* add a pristine spare to the metadata */
4123 if (dl->index < 0) {
4124 dl->index = super->anchor->num_disks;
4125 super->anchor->num_disks++;
4126 }
4eb26970
DW
4127 /* Check the device has not already been added */
4128 slot = get_imsm_disk_slot(map, dl->index);
4129 if (slot >= 0 &&
98130f40 4130 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
4eb26970
DW
4131 fprintf(stderr, Name ": %s has been included in this array twice\n",
4132 devname);
4133 return 1;
4134 }
be73972f 4135 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
ee5aad5a 4136 dl->disk.status = CONFIGURED_DISK;
d23fe947
DW
4137
4138 /* if we are creating the first raid device update the family number */
4139 if (super->current_vol == 0) {
4140 __u32 sum;
4141 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
4142 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
4143
791b666a
AW
4144 if (!_dev || !_disk) {
4145 fprintf(stderr, Name ": BUG mpb setup error\n");
4146 return 1;
4147 }
d23fe947
DW
4148 *_dev = *dev;
4149 *_disk = dl->disk;
148acb7b
DW
4150 sum = random32();
4151 sum += __gen_imsm_checksum(mpb);
d23fe947 4152 mpb->family_num = __cpu_to_le32(sum);
148acb7b 4153 mpb->orig_family_num = mpb->family_num;
d23fe947 4154 }
f20c3968
DW
4155
4156 return 0;
bf5a934a
DW
4157}
4158
88654014 4159
f20c3968 4160static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
88654014 4161 int fd, char *devname)
cdddbdbc 4162{
c2c087e6 4163 struct intel_super *super = st->sb;
c2c087e6
DW
4164 struct dl *dd;
4165 unsigned long long size;
f2f27e63 4166 __u32 id;
c2c087e6
DW
4167 int rv;
4168 struct stat stb;
4169
88654014
LM
4170 /* If we are on an RAID enabled platform check that the disk is
4171 * attached to the raid controller.
4172 * We do not need to test disks attachment for container based additions,
4173 * they shall be already tested when container was created/assembled.
88c32bb1 4174 */
d424212e 4175 rv = find_intel_hba_capability(fd, super, devname);
f2f5c343 4176 /* no orom/efi or non-intel hba of the disk */
f0f5a016
LM
4177 if (rv != 0) {
4178 dprintf("capability: %p fd: %d ret: %d\n",
4179 super->orom, fd, rv);
4180 return 1;
88c32bb1
DW
4181 }
4182
f20c3968
DW
4183 if (super->current_vol >= 0)
4184 return add_to_super_imsm_volume(st, dk, fd, devname);
bf5a934a 4185
c2c087e6
DW
4186 fstat(fd, &stb);
4187 dd = malloc(sizeof(*dd));
b9f594fe 4188 if (!dd) {
c2c087e6
DW
4189 fprintf(stderr,
4190 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
f20c3968 4191 return 1;
c2c087e6
DW
4192 }
4193 memset(dd, 0, sizeof(*dd));
4194 dd->major = major(stb.st_rdev);
4195 dd->minor = minor(stb.st_rdev);
b9f594fe 4196 dd->index = -1;
c2c087e6 4197 dd->devname = devname ? strdup(devname) : NULL;
c2c087e6 4198 dd->fd = fd;
689c9bf3 4199 dd->e = NULL;
1a64be56 4200 dd->action = DISK_ADD;
c2c087e6 4201 rv = imsm_read_serial(fd, devname, dd->serial);
32ba9157 4202 if (rv) {
c2c087e6 4203 fprintf(stderr,
0030e8d6 4204 Name ": failed to retrieve scsi serial, aborting\n");
949c47a0 4205 free(dd);
0030e8d6 4206 abort();
c2c087e6
DW
4207 }
4208
c2c087e6
DW
4209 get_dev_size(fd, NULL, &size);
4210 size /= 512;
1f24f035 4211 serialcpy(dd->disk.serial, dd->serial);
b9f594fe 4212 dd->disk.total_blocks = __cpu_to_le32(size);
ee5aad5a 4213 dd->disk.status = SPARE_DISK;
c2c087e6 4214 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
b9f594fe 4215 dd->disk.scsi_id = __cpu_to_le32(id);
c2c087e6 4216 else
b9f594fe 4217 dd->disk.scsi_id = __cpu_to_le32(0);
43dad3d6
DW
4218
4219 if (st->update_tail) {
1a64be56
LM
4220 dd->next = super->disk_mgmt_list;
4221 super->disk_mgmt_list = dd;
43dad3d6
DW
4222 } else {
4223 dd->next = super->disks;
4224 super->disks = dd;
ceaf0ee1 4225 super->updates_pending++;
43dad3d6 4226 }
f20c3968
DW
4227
4228 return 0;
cdddbdbc
DW
4229}
4230
1a64be56
LM
4231
4232static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
4233{
4234 struct intel_super *super = st->sb;
4235 struct dl *dd;
4236
4237 /* remove from super works only in mdmon - for communication
4238 * manager - monitor. Check if communication memory buffer
4239 * is prepared.
4240 */
4241 if (!st->update_tail) {
4242 fprintf(stderr,
4243 Name ": %s shall be used in mdmon context only"
4244 "(line %d).\n", __func__, __LINE__);
4245 return 1;
4246 }
4247 dd = malloc(sizeof(*dd));
4248 if (!dd) {
4249 fprintf(stderr,
4250 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4251 return 1;
4252 }
4253 memset(dd, 0, sizeof(*dd));
4254 dd->major = dk->major;
4255 dd->minor = dk->minor;
4256 dd->index = -1;
4257 dd->fd = -1;
4258 dd->disk.status = SPARE_DISK;
4259 dd->action = DISK_REMOVE;
4260
4261 dd->next = super->disk_mgmt_list;
4262 super->disk_mgmt_list = dd;
4263
4264
4265 return 0;
4266}
4267
f796af5d
DW
4268static int store_imsm_mpb(int fd, struct imsm_super *mpb);
4269
4270static union {
4271 char buf[512];
4272 struct imsm_super anchor;
4273} spare_record __attribute__ ((aligned(512)));
c2c087e6 4274
d23fe947
DW
4275/* spare records have their own family number and do not have any defined raid
4276 * devices
4277 */
4278static int write_super_imsm_spares(struct intel_super *super, int doclose)
4279{
d23fe947 4280 struct imsm_super *mpb = super->anchor;
f796af5d 4281 struct imsm_super *spare = &spare_record.anchor;
d23fe947
DW
4282 __u32 sum;
4283 struct dl *d;
4284
f796af5d
DW
4285 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
4286 spare->generation_num = __cpu_to_le32(1UL),
4287 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4288 spare->num_disks = 1,
4289 spare->num_raid_devs = 0,
4290 spare->cache_size = mpb->cache_size,
4291 spare->pwr_cycle_count = __cpu_to_le32(1),
4292
4293 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
4294 MPB_SIGNATURE MPB_VERSION_RAID0);
d23fe947
DW
4295
4296 for (d = super->disks; d; d = d->next) {
8796fdc4 4297 if (d->index != -1)
d23fe947
DW
4298 continue;
4299
f796af5d
DW
4300 spare->disk[0] = d->disk;
4301 sum = __gen_imsm_checksum(spare);
4302 spare->family_num = __cpu_to_le32(sum);
4303 spare->orig_family_num = 0;
4304 sum = __gen_imsm_checksum(spare);
4305 spare->check_sum = __cpu_to_le32(sum);
d23fe947 4306
f796af5d 4307 if (store_imsm_mpb(d->fd, spare)) {
d23fe947
DW
4308 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4309 __func__, d->major, d->minor, strerror(errno));
e74255d9 4310 return 1;
d23fe947
DW
4311 }
4312 if (doclose) {
4313 close(d->fd);
4314 d->fd = -1;
4315 }
4316 }
4317
e74255d9 4318 return 0;
d23fe947
DW
4319}
4320
146c6260
AK
4321static int is_gen_migration(struct imsm_dev *dev);
4322
36988a3d 4323static int write_super_imsm(struct supertype *st, int doclose)
cdddbdbc 4324{
36988a3d 4325 struct intel_super *super = st->sb;
949c47a0 4326 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
4327 struct dl *d;
4328 __u32 generation;
4329 __u32 sum;
d23fe947 4330 int spares = 0;
949c47a0 4331 int i;
a48ac0a8 4332 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
36988a3d 4333 int num_disks = 0;
146c6260 4334 int clear_migration_record = 1;
cdddbdbc 4335
c2c087e6
DW
4336 /* 'generation' is incremented everytime the metadata is written */
4337 generation = __le32_to_cpu(mpb->generation_num);
4338 generation++;
4339 mpb->generation_num = __cpu_to_le32(generation);
4340
148acb7b
DW
4341 /* fix up cases where previous mdadm releases failed to set
4342 * orig_family_num
4343 */
4344 if (mpb->orig_family_num == 0)
4345 mpb->orig_family_num = mpb->family_num;
4346
d23fe947 4347 for (d = super->disks; d; d = d->next) {
8796fdc4 4348 if (d->index == -1)
d23fe947 4349 spares++;
36988a3d 4350 else {
d23fe947 4351 mpb->disk[d->index] = d->disk;
36988a3d
AK
4352 num_disks++;
4353 }
d23fe947 4354 }
36988a3d 4355 for (d = super->missing; d; d = d->next) {
47ee5a45 4356 mpb->disk[d->index] = d->disk;
36988a3d
AK
4357 num_disks++;
4358 }
4359 mpb->num_disks = num_disks;
4360 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
b9f594fe 4361
949c47a0
DW
4362 for (i = 0; i < mpb->num_raid_devs; i++) {
4363 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
36988a3d
AK
4364 struct imsm_dev *dev2 = get_imsm_dev(super, i);
4365 if (dev && dev2) {
4366 imsm_copy_dev(dev, dev2);
4367 mpb_size += sizeof_imsm_dev(dev, 0);
4368 }
146c6260
AK
4369 if (is_gen_migration(dev2))
4370 clear_migration_record = 0;
949c47a0 4371 }
a48ac0a8
DW
4372 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
4373 mpb->mpb_size = __cpu_to_le32(mpb_size);
949c47a0 4374
c2c087e6 4375 /* recalculate checksum */
949c47a0 4376 sum = __gen_imsm_checksum(mpb);
c2c087e6
DW
4377 mpb->check_sum = __cpu_to_le32(sum);
4378
146c6260
AK
4379 if (clear_migration_record)
4380 memset(super->migr_rec_buf, 0, 512);
4381
d23fe947 4382 /* write the mpb for disks that compose raid devices */
c2c087e6 4383 for (d = super->disks; d ; d = d->next) {
d23fe947
DW
4384 if (d->index < 0)
4385 continue;
f796af5d 4386 if (store_imsm_mpb(d->fd, mpb))
c2c087e6
DW
4387 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4388 __func__, d->major, d->minor, strerror(errno));
146c6260
AK
4389 if (clear_migration_record) {
4390 unsigned long long dsize;
4391
4392 get_dev_size(d->fd, NULL, &dsize);
4393 if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
4394 write(d->fd, super->migr_rec_buf, 512);
4395 }
4396 }
c2c087e6
DW
4397 if (doclose) {
4398 close(d->fd);
4399 d->fd = -1;
4400 }
4401 }
4402
d23fe947
DW
4403 if (spares)
4404 return write_super_imsm_spares(super, doclose);
4405
e74255d9 4406 return 0;
c2c087e6
DW
4407}
4408
0e600426 4409
9b1fb677 4410static int create_array(struct supertype *st, int dev_idx)
43dad3d6
DW
4411{
4412 size_t len;
4413 struct imsm_update_create_array *u;
4414 struct intel_super *super = st->sb;
9b1fb677 4415 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
54c2c1ea
DW
4416 struct imsm_map *map = get_imsm_map(dev, 0);
4417 struct disk_info *inf;
4418 struct imsm_disk *disk;
4419 int i;
43dad3d6 4420
54c2c1ea
DW
4421 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
4422 sizeof(*inf) * map->num_members;
43dad3d6
DW
4423 u = malloc(len);
4424 if (!u) {
4425 fprintf(stderr, "%s: failed to allocate update buffer\n",
4426 __func__);
4427 return 1;
4428 }
4429
4430 u->type = update_create_array;
9b1fb677 4431 u->dev_idx = dev_idx;
43dad3d6 4432 imsm_copy_dev(&u->dev, dev);
54c2c1ea
DW
4433 inf = get_disk_info(u);
4434 for (i = 0; i < map->num_members; i++) {
98130f40 4435 int idx = get_imsm_disk_idx(dev, i, -1);
9b1fb677 4436
54c2c1ea
DW
4437 disk = get_imsm_disk(super, idx);
4438 serialcpy(inf[i].serial, disk->serial);
4439 }
43dad3d6
DW
4440 append_metadata_update(st, u, len);
4441
4442 return 0;
4443}
4444
1a64be56 4445static int mgmt_disk(struct supertype *st)
43dad3d6
DW
4446{
4447 struct intel_super *super = st->sb;
4448 size_t len;
1a64be56 4449 struct imsm_update_add_remove_disk *u;
43dad3d6 4450
1a64be56 4451 if (!super->disk_mgmt_list)
43dad3d6
DW
4452 return 0;
4453
4454 len = sizeof(*u);
4455 u = malloc(len);
4456 if (!u) {
4457 fprintf(stderr, "%s: failed to allocate update buffer\n",
4458 __func__);
4459 return 1;
4460 }
4461
1a64be56 4462 u->type = update_add_remove_disk;
43dad3d6
DW
4463 append_metadata_update(st, u, len);
4464
4465 return 0;
4466}
4467
c2c087e6
DW
4468static int write_init_super_imsm(struct supertype *st)
4469{
9b1fb677
DW
4470 struct intel_super *super = st->sb;
4471 int current_vol = super->current_vol;
4472
4473 /* we are done with current_vol reset it to point st at the container */
4474 super->current_vol = -1;
4475
8273f55e 4476 if (st->update_tail) {
43dad3d6
DW
4477 /* queue the recently created array / added disk
4478 * as a metadata update */
43dad3d6 4479 int rv;
8273f55e 4480
43dad3d6 4481 /* determine if we are creating a volume or adding a disk */
9b1fb677 4482 if (current_vol < 0) {
1a64be56
LM
4483 /* in the mgmt (add/remove) disk case we are running
4484 * in mdmon context, so don't close fd's
43dad3d6 4485 */
1a64be56 4486 return mgmt_disk(st);
43dad3d6 4487 } else
9b1fb677 4488 rv = create_array(st, current_vol);
8273f55e 4489
43dad3d6 4490 return rv;
d682f344
N
4491 } else {
4492 struct dl *d;
4493 for (d = super->disks; d; d = d->next)
4494 Kill(d->devname, NULL, 0, 1, 1);
36988a3d 4495 return write_super_imsm(st, 1);
d682f344 4496 }
cdddbdbc 4497}
0e600426 4498#endif
cdddbdbc 4499
e683ca88 4500static int store_super_imsm(struct supertype *st, int fd)
cdddbdbc 4501{
e683ca88
DW
4502 struct intel_super *super = st->sb;
4503 struct imsm_super *mpb = super ? super->anchor : NULL;
551c80c1 4504
e683ca88 4505 if (!mpb)
ad97895e
DW
4506 return 1;
4507
1799c9e8 4508#ifndef MDASSEMBLE
e683ca88 4509 return store_imsm_mpb(fd, mpb);
1799c9e8
N
4510#else
4511 return 1;
4512#endif
cdddbdbc
DW
4513}
4514
0e600426
N
4515static int imsm_bbm_log_size(struct imsm_super *mpb)
4516{
4517 return __le32_to_cpu(mpb->bbm_log_size);
4518}
4519
4520#ifndef MDASSEMBLE
cdddbdbc
DW
4521static int validate_geometry_imsm_container(struct supertype *st, int level,
4522 int layout, int raiddisks, int chunk,
c2c087e6 4523 unsigned long long size, char *dev,
2c514b71
NB
4524 unsigned long long *freesize,
4525 int verbose)
cdddbdbc 4526{
c2c087e6
DW
4527 int fd;
4528 unsigned long long ldsize;
f2f5c343
LM
4529 struct intel_super *super=NULL;
4530 int rv = 0;
cdddbdbc 4531
c2c087e6
DW
4532 if (level != LEVEL_CONTAINER)
4533 return 0;
4534 if (!dev)
4535 return 1;
4536
4537 fd = open(dev, O_RDONLY|O_EXCL, 0);
4538 if (fd < 0) {
2c514b71
NB
4539 if (verbose)
4540 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4541 dev, strerror(errno));
c2c087e6
DW
4542 return 0;
4543 }
4544 if (!get_dev_size(fd, dev, &ldsize)) {
4545 close(fd);
4546 return 0;
4547 }
f2f5c343
LM
4548
4549 /* capabilities retrieve could be possible
4550 * note that there is no fd for the disks in array.
4551 */
4552 super = alloc_super();
4553 if (!super) {
4554 fprintf(stderr,
4555 Name ": malloc of %zu failed.\n",
4556 sizeof(*super));
4557 close(fd);
4558 return 0;
4559 }
4560
d424212e 4561 rv = find_intel_hba_capability(fd, super, verbose ? dev : NULL);
f2f5c343
LM
4562 if (rv != 0) {
4563#if DEBUG
4564 char str[256];
4565 fd2devname(fd, str);
4566 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4567 fd, str, super->orom, rv, raiddisks);
4568#endif
4569 /* no orom/efi or non-intel hba of the disk */
4570 close(fd);
4571 free_imsm(super);
4572 return 0;
4573 }
c2c087e6 4574 close(fd);
f2f5c343
LM
4575 if (super->orom && raiddisks > super->orom->tds) {
4576 if (verbose)
4577 fprintf(stderr, Name ": %d exceeds maximum number of"
4578 " platform supported disks: %d\n",
4579 raiddisks, super->orom->tds);
4580
4581 free_imsm(super);
4582 return 0;
4583 }
c2c087e6
DW
4584
4585 *freesize = avail_size_imsm(st, ldsize >> 9);
f2f5c343 4586 free_imsm(super);
c2c087e6
DW
4587
4588 return 1;
cdddbdbc
DW
4589}
4590
0dcecb2e
DW
4591static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4592{
4593 const unsigned long long base_start = e[*idx].start;
4594 unsigned long long end = base_start + e[*idx].size;
4595 int i;
4596
4597 if (base_start == end)
4598 return 0;
4599
4600 *idx = *idx + 1;
4601 for (i = *idx; i < num_extents; i++) {
4602 /* extend overlapping extents */
4603 if (e[i].start >= base_start &&
4604 e[i].start <= end) {
4605 if (e[i].size == 0)
4606 return 0;
4607 if (e[i].start + e[i].size > end)
4608 end = e[i].start + e[i].size;
4609 } else if (e[i].start > end) {
4610 *idx = i;
4611 break;
4612 }
4613 }
4614
4615 return end - base_start;
4616}
4617
4618static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4619{
4620 /* build a composite disk with all known extents and generate a new
4621 * 'maxsize' given the "all disks in an array must share a common start
4622 * offset" constraint
4623 */
4624 struct extent *e = calloc(sum_extents, sizeof(*e));
4625 struct dl *dl;
4626 int i, j;
4627 int start_extent;
4628 unsigned long long pos;
b9d77223 4629 unsigned long long start = 0;
0dcecb2e
DW
4630 unsigned long long maxsize;
4631 unsigned long reserve;
4632
4633 if (!e)
a7dd165b 4634 return 0;
0dcecb2e
DW
4635
4636 /* coalesce and sort all extents. also, check to see if we need to
4637 * reserve space between member arrays
4638 */
4639 j = 0;
4640 for (dl = super->disks; dl; dl = dl->next) {
4641 if (!dl->e)
4642 continue;
4643 for (i = 0; i < dl->extent_cnt; i++)
4644 e[j++] = dl->e[i];
4645 }
4646 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4647
4648 /* merge extents */
4649 i = 0;
4650 j = 0;
4651 while (i < sum_extents) {
4652 e[j].start = e[i].start;
4653 e[j].size = find_size(e, &i, sum_extents);
4654 j++;
4655 if (e[j-1].size == 0)
4656 break;
4657 }
4658
4659 pos = 0;
4660 maxsize = 0;
4661 start_extent = 0;
4662 i = 0;
4663 do {
4664 unsigned long long esize;
4665
4666 esize = e[i].start - pos;
4667 if (esize >= maxsize) {
4668 maxsize = esize;
4669 start = pos;
4670 start_extent = i;
4671 }
4672 pos = e[i].start + e[i].size;
4673 i++;
4674 } while (e[i-1].size);
4675 free(e);
4676
a7dd165b
DW
4677 if (maxsize == 0)
4678 return 0;
4679
4680 /* FIXME assumes volume at offset 0 is the first volume in a
4681 * container
4682 */
0dcecb2e
DW
4683 if (start_extent > 0)
4684 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4685 else
4686 reserve = 0;
4687
4688 if (maxsize < reserve)
a7dd165b 4689 return 0;
0dcecb2e
DW
4690
4691 super->create_offset = ~((__u32) 0);
4692 if (start + reserve > super->create_offset)
a7dd165b 4693 return 0; /* start overflows create_offset */
0dcecb2e
DW
4694 super->create_offset = start + reserve;
4695
4696 return maxsize - reserve;
4697}
4698
88c32bb1
DW
4699static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4700{
4701 if (level < 0 || level == 6 || level == 4)
4702 return 0;
4703
4704 /* if we have an orom prevent invalid raid levels */
4705 if (orom)
4706 switch (level) {
4707 case 0: return imsm_orom_has_raid0(orom);
4708 case 1:
4709 if (raiddisks > 2)
4710 return imsm_orom_has_raid1e(orom);
1c556e92
DW
4711 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4712 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4713 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
88c32bb1
DW
4714 }
4715 else
4716 return 1; /* not on an Intel RAID platform so anything goes */
4717
4718 return 0;
4719}
4720
73408129 4721
35f81cbb 4722#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
73408129
LM
4723/*
4724 * validate volume parameters with OROM/EFI capabilities
4725 */
6592ce37
DW
4726static int
4727validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
c21e737b 4728 int raiddisks, int *chunk, int verbose)
6592ce37 4729{
73408129
LM
4730#if DEBUG
4731 verbose = 1;
4732#endif
4733 /* validate container capabilities */
4734 if (super->orom && raiddisks > super->orom->tds) {
4735 if (verbose)
4736 fprintf(stderr, Name ": %d exceeds maximum number of"
4737 " platform supported disks: %d\n",
4738 raiddisks, super->orom->tds);
4739 return 0;
4740 }
4741
4742 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4743 if (super->orom && (!is_raid_level_supported(super->orom, level,
4744 raiddisks))) {
6592ce37
DW
4745 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4746 level, raiddisks, raiddisks > 1 ? "s" : "");
4747 return 0;
4748 }
c21e737b
CA
4749 if (super->orom && level != 1) {
4750 if (chunk && (*chunk == 0 || *chunk == UnSet))
4751 *chunk = imsm_orom_default_chunk(super->orom);
4752 else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
4753 pr_vrb(": platform does not support a chunk size of: "
4754 "%d\n", *chunk);
4755 return 0;
4756 }
6592ce37
DW
4757 }
4758 if (layout != imsm_level_to_layout(level)) {
4759 if (level == 5)
4760 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4761 else if (level == 10)
4762 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4763 else
4764 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4765 layout, level);
4766 return 0;
4767 }
6592ce37
DW
4768 return 1;
4769}
4770
c2c087e6
DW
4771/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4772 * FIX ME add ahci details
4773 */
8b353278 4774static int validate_geometry_imsm_volume(struct supertype *st, int level,
c21e737b 4775 int layout, int raiddisks, int *chunk,
c2c087e6 4776 unsigned long long size, char *dev,
2c514b71
NB
4777 unsigned long long *freesize,
4778 int verbose)
cdddbdbc 4779{
c2c087e6
DW
4780 struct stat stb;
4781 struct intel_super *super = st->sb;
a20d2ba5 4782 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
4783 struct dl *dl;
4784 unsigned long long pos = 0;
4785 unsigned long long maxsize;
4786 struct extent *e;
4787 int i;
cdddbdbc 4788
88c32bb1
DW
4789 /* We must have the container info already read in. */
4790 if (!super)
c2c087e6
DW
4791 return 0;
4792
d54559f0
LM
4793 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
4794 fprintf(stderr, Name ": RAID gemetry validation failed. "
4795 "Cannot proceed with the action(s).\n");
c2c087e6 4796 return 0;
d54559f0 4797 }
c2c087e6
DW
4798 if (!dev) {
4799 /* General test: make sure there is space for
2da8544a
DW
4800 * 'raiddisks' device extents of size 'size' at a given
4801 * offset
c2c087e6 4802 */
e46273eb 4803 unsigned long long minsize = size;
b7528a20 4804 unsigned long long start_offset = MaxSector;
c2c087e6
DW
4805 int dcnt = 0;
4806 if (minsize == 0)
4807 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4808 for (dl = super->disks; dl ; dl = dl->next) {
4809 int found = 0;
4810
bf5a934a 4811 pos = 0;
c2c087e6
DW
4812 i = 0;
4813 e = get_extents(super, dl);
4814 if (!e) continue;
4815 do {
4816 unsigned long long esize;
4817 esize = e[i].start - pos;
4818 if (esize >= minsize)
4819 found = 1;
b7528a20 4820 if (found && start_offset == MaxSector) {
2da8544a
DW
4821 start_offset = pos;
4822 break;
4823 } else if (found && pos != start_offset) {
4824 found = 0;
4825 break;
4826 }
c2c087e6
DW
4827 pos = e[i].start + e[i].size;
4828 i++;
4829 } while (e[i-1].size);
4830 if (found)
4831 dcnt++;
4832 free(e);
4833 }
4834 if (dcnt < raiddisks) {
2c514b71
NB
4835 if (verbose)
4836 fprintf(stderr, Name ": imsm: Not enough "
4837 "devices with space for this array "
4838 "(%d < %d)\n",
4839 dcnt, raiddisks);
c2c087e6
DW
4840 return 0;
4841 }
4842 return 1;
4843 }
0dcecb2e 4844
c2c087e6
DW
4845 /* This device must be a member of the set */
4846 if (stat(dev, &stb) < 0)
4847 return 0;
4848 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4849 return 0;
4850 for (dl = super->disks ; dl ; dl = dl->next) {
f21e18ca
N
4851 if (dl->major == (int)major(stb.st_rdev) &&
4852 dl->minor == (int)minor(stb.st_rdev))
c2c087e6
DW
4853 break;
4854 }
4855 if (!dl) {
2c514b71
NB
4856 if (verbose)
4857 fprintf(stderr, Name ": %s is not in the "
4858 "same imsm set\n", dev);
c2c087e6 4859 return 0;
a20d2ba5
DW
4860 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4861 /* If a volume is present then the current creation attempt
4862 * cannot incorporate new spares because the orom may not
4863 * understand this configuration (all member disks must be
4864 * members of each array in the container).
4865 */
4866 fprintf(stderr, Name ": %s is a spare and a volume"
4867 " is already defined for this container\n", dev);
4868 fprintf(stderr, Name ": The option-rom requires all member"
4869 " disks to be a member of all volumes\n");
4870 return 0;
c2c087e6 4871 }
0dcecb2e
DW
4872
4873 /* retrieve the largest free space block */
c2c087e6
DW
4874 e = get_extents(super, dl);
4875 maxsize = 0;
4876 i = 0;
0dcecb2e
DW
4877 if (e) {
4878 do {
4879 unsigned long long esize;
4880
4881 esize = e[i].start - pos;
4882 if (esize >= maxsize)
4883 maxsize = esize;
4884 pos = e[i].start + e[i].size;
4885 i++;
4886 } while (e[i-1].size);
4887 dl->e = e;
4888 dl->extent_cnt = i;
4889 } else {
4890 if (verbose)
4891 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4892 dev);
4893 return 0;
4894 }
4895 if (maxsize < size) {
4896 if (verbose)
4897 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4898 dev, maxsize, size);
4899 return 0;
4900 }
4901
4902 /* count total number of extents for merge */
4903 i = 0;
4904 for (dl = super->disks; dl; dl = dl->next)
4905 if (dl->e)
4906 i += dl->extent_cnt;
4907
4908 maxsize = merge_extents(super, i);
a7dd165b 4909 if (maxsize < size || maxsize == 0) {
0dcecb2e
DW
4910 if (verbose)
4911 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4912 maxsize, size);
4913 return 0;
0dcecb2e
DW
4914 }
4915
c2c087e6
DW
4916 *freesize = maxsize;
4917
4918 return 1;
cdddbdbc
DW
4919}
4920
efb30e7f
DW
4921static int reserve_space(struct supertype *st, int raiddisks,
4922 unsigned long long size, int chunk,
4923 unsigned long long *freesize)
4924{
4925 struct intel_super *super = st->sb;
4926 struct imsm_super *mpb = super->anchor;
4927 struct dl *dl;
4928 int i;
4929 int extent_cnt;
4930 struct extent *e;
4931 unsigned long long maxsize;
4932 unsigned long long minsize;
4933 int cnt;
4934 int used;
4935
4936 /* find the largest common start free region of the possible disks */
4937 used = 0;
4938 extent_cnt = 0;
4939 cnt = 0;
4940 for (dl = super->disks; dl; dl = dl->next) {
4941 dl->raiddisk = -1;
4942
4943 if (dl->index >= 0)
4944 used++;
4945
4946 /* don't activate new spares if we are orom constrained
4947 * and there is already a volume active in the container
4948 */
4949 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
4950 continue;
4951
4952 e = get_extents(super, dl);
4953 if (!e)
4954 continue;
4955 for (i = 1; e[i-1].size; i++)
4956 ;
4957 dl->e = e;
4958 dl->extent_cnt = i;
4959 extent_cnt += i;
4960 cnt++;
4961 }
4962
4963 maxsize = merge_extents(super, extent_cnt);
4964 minsize = size;
4965 if (size == 0)
612e59d8
CA
4966 /* chunk is in K */
4967 minsize = chunk * 2;
efb30e7f
DW
4968
4969 if (cnt < raiddisks ||
4970 (super->orom && used && used != raiddisks) ||
a7dd165b
DW
4971 maxsize < minsize ||
4972 maxsize == 0) {
efb30e7f
DW
4973 fprintf(stderr, Name ": not enough devices with space to create array.\n");
4974 return 0; /* No enough free spaces large enough */
4975 }
4976
4977 if (size == 0) {
4978 size = maxsize;
4979 if (chunk) {
612e59d8
CA
4980 size /= 2 * chunk;
4981 size *= 2 * chunk;
efb30e7f
DW
4982 }
4983 }
4984
4985 cnt = 0;
4986 for (dl = super->disks; dl; dl = dl->next)
4987 if (dl->e)
4988 dl->raiddisk = cnt++;
4989
4990 *freesize = size;
4991
4992 return 1;
4993}
4994
bf5a934a 4995static int validate_geometry_imsm(struct supertype *st, int level, int layout,
c21e737b 4996 int raiddisks, int *chunk, unsigned long long size,
bf5a934a
DW
4997 char *dev, unsigned long long *freesize,
4998 int verbose)
4999{
5000 int fd, cfd;
5001 struct mdinfo *sra;
20cbe8d2 5002 int is_member = 0;
bf5a934a 5003
d54559f0
LM
5004 /* load capability
5005 * if given unused devices create a container
bf5a934a
DW
5006 * if given given devices in a container create a member volume
5007 */
5008 if (level == LEVEL_CONTAINER) {
5009 /* Must be a fresh device to add to a container */
5010 return validate_geometry_imsm_container(st, level, layout,
c21e737b
CA
5011 raiddisks,
5012 chunk?*chunk:0, size,
bf5a934a
DW
5013 dev, freesize,
5014 verbose);
5015 }
5016
8592f29d
N
5017 if (!dev) {
5018 if (st->sb && freesize) {
efb30e7f
DW
5019 /* we are being asked to automatically layout a
5020 * new volume based on the current contents of
5021 * the container. If the the parameters can be
5022 * satisfied reserve_space will record the disks,
5023 * start offset, and size of the volume to be
5024 * created. add_to_super and getinfo_super
5025 * detect when autolayout is in progress.
5026 */
6592ce37
DW
5027 if (!validate_geometry_imsm_orom(st->sb, level, layout,
5028 raiddisks, chunk,
5029 verbose))
5030 return 0;
c21e737b
CA
5031 return reserve_space(st, raiddisks, size,
5032 chunk?*chunk:0, freesize);
8592f29d
N
5033 }
5034 return 1;
5035 }
bf5a934a
DW
5036 if (st->sb) {
5037 /* creating in a given container */
5038 return validate_geometry_imsm_volume(st, level, layout,
5039 raiddisks, chunk, size,
5040 dev, freesize, verbose);
5041 }
5042
bf5a934a
DW
5043 /* This device needs to be a device in an 'imsm' container */
5044 fd = open(dev, O_RDONLY|O_EXCL, 0);
5045 if (fd >= 0) {
5046 if (verbose)
5047 fprintf(stderr,
5048 Name ": Cannot create this array on device %s\n",
5049 dev);
5050 close(fd);
5051 return 0;
5052 }
5053 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
5054 if (verbose)
5055 fprintf(stderr, Name ": Cannot open %s: %s\n",
5056 dev, strerror(errno));
5057 return 0;
5058 }
5059 /* Well, it is in use by someone, maybe an 'imsm' container. */
5060 cfd = open_container(fd);
20cbe8d2 5061 close(fd);
bf5a934a 5062 if (cfd < 0) {
bf5a934a
DW
5063 if (verbose)
5064 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
5065 dev);
5066 return 0;
5067 }
5068 sra = sysfs_read(cfd, 0, GET_VERSION);
bf5a934a 5069 if (sra && sra->array.major_version == -1 &&
20cbe8d2
AW
5070 strcmp(sra->text_version, "imsm") == 0)
5071 is_member = 1;
5072 sysfs_free(sra);
5073 if (is_member) {
bf5a934a
DW
5074 /* This is a member of a imsm container. Load the container
5075 * and try to create a volume
5076 */
5077 struct intel_super *super;
5078
e1902a7b 5079 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
bf5a934a
DW
5080 st->sb = super;
5081 st->container_dev = fd2devnum(cfd);
5082 close(cfd);
5083 return validate_geometry_imsm_volume(st, level, layout,
5084 raiddisks, chunk,
5085 size, dev,
5086 freesize, verbose);
5087 }
20cbe8d2 5088 }
bf5a934a 5089
20cbe8d2
AW
5090 if (verbose)
5091 fprintf(stderr, Name ": failed container membership check\n");
5092
5093 close(cfd);
5094 return 0;
bf5a934a 5095}
0bd16cf2 5096
30f58b22 5097static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
0bd16cf2
DJ
5098{
5099 struct intel_super *super = st->sb;
5100
30f58b22
DW
5101 if (level && *level == UnSet)
5102 *level = LEVEL_CONTAINER;
5103
5104 if (level && layout && *layout == UnSet)
5105 *layout = imsm_level_to_layout(*level);
0bd16cf2 5106
1d54f286
N
5107 if (chunk && (*chunk == UnSet || *chunk == 0) &&
5108 super && super->orom)
30f58b22 5109 *chunk = imsm_orom_default_chunk(super->orom);
0bd16cf2
DJ
5110}
5111
33414a01
DW
5112static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
5113
5114static int kill_subarray_imsm(struct supertype *st)
5115{
5116 /* remove the subarray currently referenced by ->current_vol */
5117 __u8 i;
5118 struct intel_dev **dp;
5119 struct intel_super *super = st->sb;
5120 __u8 current_vol = super->current_vol;
5121 struct imsm_super *mpb = super->anchor;
5122
5123 if (super->current_vol < 0)
5124 return 2;
5125 super->current_vol = -1; /* invalidate subarray cursor */
5126
5127 /* block deletions that would change the uuid of active subarrays
5128 *
5129 * FIXME when immutable ids are available, but note that we'll
5130 * also need to fixup the invalidated/active subarray indexes in
5131 * mdstat
5132 */
5133 for (i = 0; i < mpb->num_raid_devs; i++) {
5134 char subarray[4];
5135
5136 if (i < current_vol)
5137 continue;
5138 sprintf(subarray, "%u", i);
5139 if (is_subarray_active(subarray, st->devname)) {
5140 fprintf(stderr,
5141 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
5142 current_vol, i);
5143
5144 return 2;
5145 }
5146 }
5147
5148 if (st->update_tail) {
5149 struct imsm_update_kill_array *u = malloc(sizeof(*u));
5150
5151 if (!u)
5152 return 2;
5153 u->type = update_kill_array;
5154 u->dev_idx = current_vol;
5155 append_metadata_update(st, u, sizeof(*u));
5156
5157 return 0;
5158 }
5159
5160 for (dp = &super->devlist; *dp;)
5161 if ((*dp)->index == current_vol) {
5162 *dp = (*dp)->next;
5163 } else {
5164 handle_missing(super, (*dp)->dev);
5165 if ((*dp)->index > current_vol)
5166 (*dp)->index--;
5167 dp = &(*dp)->next;
5168 }
5169
5170 /* no more raid devices, all active components are now spares,
5171 * but of course failed are still failed
5172 */
5173 if (--mpb->num_raid_devs == 0) {
5174 struct dl *d;
5175
5176 for (d = super->disks; d; d = d->next)
5177 if (d->index > -2) {
5178 d->index = -1;
5179 d->disk.status = SPARE_DISK;
5180 }
5181 }
5182
5183 super->updates_pending++;
5184
5185 return 0;
5186}
aa534678 5187
a951a4f7 5188static int update_subarray_imsm(struct supertype *st, char *subarray,
fa56eddb 5189 char *update, struct mddev_ident *ident)
aa534678
DW
5190{
5191 /* update the subarray currently referenced by ->current_vol */
5192 struct intel_super *super = st->sb;
5193 struct imsm_super *mpb = super->anchor;
5194
aa534678
DW
5195 if (strcmp(update, "name") == 0) {
5196 char *name = ident->name;
a951a4f7
N
5197 char *ep;
5198 int vol;
aa534678 5199
a951a4f7 5200 if (is_subarray_active(subarray, st->devname)) {
aa534678
DW
5201 fprintf(stderr,
5202 Name ": Unable to update name of active subarray\n");
5203 return 2;
5204 }
5205
5206 if (!check_name(super, name, 0))
5207 return 2;
5208
a951a4f7
N
5209 vol = strtoul(subarray, &ep, 10);
5210 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
5211 return 2;
5212
aa534678
DW
5213 if (st->update_tail) {
5214 struct imsm_update_rename_array *u = malloc(sizeof(*u));
5215
5216 if (!u)
5217 return 2;
5218 u->type = update_rename_array;
a951a4f7 5219 u->dev_idx = vol;
aa534678
DW
5220 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
5221 append_metadata_update(st, u, sizeof(*u));
5222 } else {
5223 struct imsm_dev *dev;
5224 int i;
5225
a951a4f7 5226 dev = get_imsm_dev(super, vol);
aa534678
DW
5227 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
5228 for (i = 0; i < mpb->num_raid_devs; i++) {
5229 dev = get_imsm_dev(super, i);
5230 handle_missing(super, dev);
5231 }
5232 super->updates_pending++;
5233 }
5234 } else
5235 return 2;
5236
5237 return 0;
5238}
bf5a934a 5239
28bce06f
AK
5240static int is_gen_migration(struct imsm_dev *dev)
5241{
5242 if (!dev->vol.migr_state)
5243 return 0;
5244
5245 if (migr_type(dev) == MIGR_GEN_MIGR)
5246 return 1;
5247
5248 return 0;
5249}
71204a50 5250#endif /* MDASSEMBLE */
28bce06f 5251
1e5c6983
DW
5252static int is_rebuilding(struct imsm_dev *dev)
5253{
5254 struct imsm_map *migr_map;
5255
5256 if (!dev->vol.migr_state)
5257 return 0;
5258
5259 if (migr_type(dev) != MIGR_REBUILD)
5260 return 0;
5261
5262 migr_map = get_imsm_map(dev, 1);
5263
5264 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
5265 return 1;
5266 else
5267 return 0;
5268}
5269
c47b0ff6
AK
5270static void update_recovery_start(struct intel_super *super,
5271 struct imsm_dev *dev,
5272 struct mdinfo *array)
1e5c6983
DW
5273{
5274 struct mdinfo *rebuild = NULL;
5275 struct mdinfo *d;
5276 __u32 units;
5277
5278 if (!is_rebuilding(dev))
5279 return;
5280
5281 /* Find the rebuild target, but punt on the dual rebuild case */
5282 for (d = array->devs; d; d = d->next)
5283 if (d->recovery_start == 0) {
5284 if (rebuild)
5285 return;
5286 rebuild = d;
5287 }
5288
4363fd80
DW
5289 if (!rebuild) {
5290 /* (?) none of the disks are marked with
5291 * IMSM_ORD_REBUILD, so assume they are missing and the
5292 * disk_ord_tbl was not correctly updated
5293 */
5294 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
5295 return;
5296 }
5297
1e5c6983 5298 units = __le32_to_cpu(dev->vol.curr_migr_unit);
c47b0ff6 5299 rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
1e5c6983
DW
5300}
5301
276d77db 5302static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
1e5c6983 5303
00bbdbda 5304static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
cdddbdbc 5305{
4f5bc454
DW
5306 /* Given a container loaded by load_super_imsm_all,
5307 * extract information about all the arrays into
5308 * an mdinfo tree.
00bbdbda 5309 * If 'subarray' is given, just extract info about that array.
4f5bc454
DW
5310 *
5311 * For each imsm_dev create an mdinfo, fill it in,
5312 * then look for matching devices in super->disks
5313 * and create appropriate device mdinfo.
5314 */
5315 struct intel_super *super = st->sb;
949c47a0 5316 struct imsm_super *mpb = super->anchor;
4f5bc454 5317 struct mdinfo *rest = NULL;
00bbdbda 5318 unsigned int i;
a06d022d 5319 int bbm_errors = 0;
abef11a3
AK
5320 struct dl *d;
5321 int spare_disks = 0;
cdddbdbc 5322
a06d022d
KW
5323 /* check for bad blocks */
5324 if (imsm_bbm_log_size(super->anchor))
5325 bbm_errors = 1;
604b746f 5326
abef11a3
AK
5327 /* count spare devices, not used in maps
5328 */
5329 for (d = super->disks; d; d = d->next)
5330 if (d->index == -1)
5331 spare_disks++;
5332
4f5bc454 5333 for (i = 0; i < mpb->num_raid_devs; i++) {
00bbdbda
N
5334 struct imsm_dev *dev;
5335 struct imsm_map *map;
86e3692b 5336 struct imsm_map *map2;
4f5bc454 5337 struct mdinfo *this;
2db86302 5338 int slot, chunk;
00bbdbda
N
5339 char *ep;
5340
5341 if (subarray &&
5342 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
5343 continue;
5344
5345 dev = get_imsm_dev(super, i);
5346 map = get_imsm_map(dev, 0);
86e3692b 5347 map2 = get_imsm_map(dev, 1);
4f5bc454 5348
1ce0101c
DW
5349 /* do not publish arrays that are in the middle of an
5350 * unsupported migration
5351 */
5352 if (dev->vol.migr_state &&
28bce06f 5353 (migr_type(dev) == MIGR_STATE_CHANGE)) {
1ce0101c
DW
5354 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
5355 " unsupported migration in progress\n",
5356 dev->volume);
5357 continue;
5358 }
2db86302
LM
5359 /* do not publish arrays that are not support by controller's
5360 * OROM/EFI
5361 */
1ce0101c 5362
2db86302 5363 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
7b0bbd0f 5364#ifndef MDASSEMBLE
2db86302
LM
5365 if (!validate_geometry_imsm_orom(super,
5366 get_imsm_raid_level(map), /* RAID level */
5367 imsm_level_to_layout(get_imsm_raid_level(map)),
5368 map->num_members, /* raid disks */
5369 &chunk,
5370 1 /* verbose */)) {
5371 fprintf(stderr, Name ": RAID gemetry validation failed. "
5372 "Cannot proceed with the action(s).\n");
5373 continue;
5374 }
7b0bbd0f 5375#endif /* MDASSEMBLE */
4f5bc454 5376 this = malloc(sizeof(*this));
0fbd635c 5377 if (!this) {
cf1be220 5378 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
0fbd635c
AW
5379 sizeof(*this));
5380 break;
5381 }
4f5bc454
DW
5382 memset(this, 0, sizeof(*this));
5383 this->next = rest;
4f5bc454 5384
301406c9 5385 super->current_vol = i;
a5d85af7 5386 getinfo_super_imsm_volume(st, this, NULL);
4f5bc454 5387 for (slot = 0 ; slot < map->num_members; slot++) {
1e5c6983 5388 unsigned long long recovery_start;
4f5bc454
DW
5389 struct mdinfo *info_d;
5390 struct dl *d;
5391 int idx;
9a1608e5 5392 int skip;
7eef0453 5393 __u32 ord;
4f5bc454 5394
9a1608e5 5395 skip = 0;
98130f40 5396 idx = get_imsm_disk_idx(dev, slot, 0);
196b0d44 5397 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
4f5bc454
DW
5398 for (d = super->disks; d ; d = d->next)
5399 if (d->index == idx)
0fbd635c 5400 break;
4f5bc454 5401
1e5c6983 5402 recovery_start = MaxSector;
4f5bc454 5403 if (d == NULL)
9a1608e5 5404 skip = 1;
25ed7e59 5405 if (d && is_failed(&d->disk))
9a1608e5 5406 skip = 1;
7eef0453 5407 if (ord & IMSM_ORD_REBUILD)
1e5c6983 5408 recovery_start = 0;
9a1608e5
DW
5409
5410 /*
5411 * if we skip some disks the array will be assmebled degraded;
1e5c6983
DW
5412 * reset resync start to avoid a dirty-degraded
5413 * situation when performing the intial sync
9a1608e5
DW
5414 *
5415 * FIXME handle dirty degraded
5416 */
1e5c6983 5417 if ((skip || recovery_start == 0) && !dev->vol.dirty)
b7528a20 5418 this->resync_start = MaxSector;
9a1608e5
DW
5419 if (skip)
5420 continue;
4f5bc454 5421
1e5c6983 5422 info_d = calloc(1, sizeof(*info_d));
9a1608e5
DW
5423 if (!info_d) {
5424 fprintf(stderr, Name ": failed to allocate disk"
1ce0101c 5425 " for volume %.16s\n", dev->volume);
1e5c6983
DW
5426 info_d = this->devs;
5427 while (info_d) {
5428 struct mdinfo *d = info_d->next;
5429
5430 free(info_d);
5431 info_d = d;
5432 }
9a1608e5
DW
5433 free(this);
5434 this = rest;
5435 break;
5436 }
4f5bc454
DW
5437 info_d->next = this->devs;
5438 this->devs = info_d;
5439
4f5bc454
DW
5440 info_d->disk.number = d->index;
5441 info_d->disk.major = d->major;
5442 info_d->disk.minor = d->minor;
5443 info_d->disk.raid_disk = slot;
1e5c6983 5444 info_d->recovery_start = recovery_start;
86e3692b
AK
5445 if (map2) {
5446 if (slot < map2->num_members)
5447 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
5448 else
5449 this->array.spare_disks++;
86e3692b
AK
5450 } else {
5451 if (slot < map->num_members)
5452 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
5453 else
5454 this->array.spare_disks++;
86e3692b 5455 }
1e5c6983
DW
5456 if (info_d->recovery_start == MaxSector)
5457 this->array.working_disks++;
4f5bc454
DW
5458
5459 info_d->events = __le32_to_cpu(mpb->generation_num);
5460 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
5461 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
4f5bc454 5462 }
1e5c6983 5463 /* now that the disk list is up-to-date fixup recovery_start */
c47b0ff6 5464 update_recovery_start(super, dev, this);
abef11a3 5465 this->array.spare_disks += spare_disks;
276d77db
AK
5466
5467 /* check for reshape */
5468 if (this->reshape_active == 1)
5469 recover_backup_imsm(st, this);
5470
9a1608e5 5471 rest = this;
4f5bc454
DW
5472 }
5473
a06d022d
KW
5474 /* if array has bad blocks, set suitable bit in array status */
5475 if (bbm_errors)
5476 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
5477
4f5bc454 5478 return rest;
cdddbdbc
DW
5479}
5480
845dea95 5481
fb49eef2 5482static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
c2a1e7da 5483{
a965f303 5484 struct imsm_map *map = get_imsm_map(dev, 0);
c2a1e7da
DW
5485
5486 if (!failed)
3393c6af
DW
5487 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
5488 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
c2a1e7da
DW
5489
5490 switch (get_imsm_raid_level(map)) {
5491 case 0:
5492 return IMSM_T_STATE_FAILED;
5493 break;
5494 case 1:
5495 if (failed < map->num_members)
5496 return IMSM_T_STATE_DEGRADED;
5497 else
5498 return IMSM_T_STATE_FAILED;
5499 break;
5500 case 10:
5501 {
5502 /**
c92a2527
DW
5503 * check to see if any mirrors have failed, otherwise we
5504 * are degraded. Even numbered slots are mirrored on
5505 * slot+1
c2a1e7da 5506 */
c2a1e7da 5507 int i;
d9b420a5
N
5508 /* gcc -Os complains that this is unused */
5509 int insync = insync;
c2a1e7da
DW
5510
5511 for (i = 0; i < map->num_members; i++) {
98130f40 5512 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
c92a2527
DW
5513 int idx = ord_to_idx(ord);
5514 struct imsm_disk *disk;
c2a1e7da 5515
c92a2527
DW
5516 /* reset the potential in-sync count on even-numbered
5517 * slots. num_copies is always 2 for imsm raid10
5518 */
5519 if ((i & 1) == 0)
5520 insync = 2;
c2a1e7da 5521
c92a2527 5522 disk = get_imsm_disk(super, idx);
25ed7e59 5523 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
c92a2527 5524 insync--;
c2a1e7da 5525
c92a2527
DW
5526 /* no in-sync disks left in this mirror the
5527 * array has failed
5528 */
5529 if (insync == 0)
5530 return IMSM_T_STATE_FAILED;
c2a1e7da
DW
5531 }
5532
5533 return IMSM_T_STATE_DEGRADED;
5534 }
5535 case 5:
5536 if (failed < 2)
5537 return IMSM_T_STATE_DEGRADED;
5538 else
5539 return IMSM_T_STATE_FAILED;
5540 break;
5541 default:
5542 break;
5543 }
5544
5545 return map->map_state;
5546}
5547
ff077194 5548static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
c2a1e7da
DW
5549{
5550 int i;
5551 int failed = 0;
5552 struct imsm_disk *disk;
ff077194 5553 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2
DW
5554 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
5555 __u32 ord;
5556 int idx;
c2a1e7da 5557
0556e1a2
DW
5558 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5559 * disks that are being rebuilt. New failures are recorded to
5560 * map[0]. So we look through all the disks we started with and
5561 * see if any failures are still present, or if any new ones
5562 * have arrived
5563 *
5564 * FIXME add support for online capacity expansion and
5565 * raid-level-migration
5566 */
5567 for (i = 0; i < prev->num_members; i++) {
5568 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
5569 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
5570 idx = ord_to_idx(ord);
c2a1e7da 5571
949c47a0 5572 disk = get_imsm_disk(super, idx);
25ed7e59 5573 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
fcb84475 5574 failed++;
c2a1e7da
DW
5575 }
5576
5577 return failed;
845dea95
NB
5578}
5579
97b4d0e9
DW
5580#ifndef MDASSEMBLE
5581static int imsm_open_new(struct supertype *c, struct active_array *a,
5582 char *inst)
5583{
5584 struct intel_super *super = c->sb;
5585 struct imsm_super *mpb = super->anchor;
5586
5587 if (atoi(inst) >= mpb->num_raid_devs) {
5588 fprintf(stderr, "%s: subarry index %d, out of range\n",
5589 __func__, atoi(inst));
5590 return -ENODEV;
5591 }
5592
5593 dprintf("imsm: open_new %s\n", inst);
5594 a->info.container_member = atoi(inst);
5595 return 0;
5596}
5597
0c046afd
DW
5598static int is_resyncing(struct imsm_dev *dev)
5599{
5600 struct imsm_map *migr_map;
5601
5602 if (!dev->vol.migr_state)
5603 return 0;
5604
1484e727
DW
5605 if (migr_type(dev) == MIGR_INIT ||
5606 migr_type(dev) == MIGR_REPAIR)
0c046afd
DW
5607 return 1;
5608
4c9bc37b
AK
5609 if (migr_type(dev) == MIGR_GEN_MIGR)
5610 return 0;
5611
0c046afd
DW
5612 migr_map = get_imsm_map(dev, 1);
5613
4c9bc37b
AK
5614 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
5615 (dev->vol.migr_type != MIGR_GEN_MIGR))
0c046afd
DW
5616 return 1;
5617 else
5618 return 0;
5619}
5620
0556e1a2
DW
5621/* return true if we recorded new information */
5622static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
47ee5a45 5623{
0556e1a2
DW
5624 __u32 ord;
5625 int slot;
5626 struct imsm_map *map;
5627
5628 /* new failures are always set in map[0] */
5629 map = get_imsm_map(dev, 0);
5630
5631 slot = get_imsm_disk_slot(map, idx);
5632 if (slot < 0)
5633 return 0;
5634
5635 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
25ed7e59 5636 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
0556e1a2
DW
5637 return 0;
5638
f2f27e63 5639 disk->status |= FAILED_DISK;
0556e1a2 5640 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
f21e18ca 5641 if (map->failed_disk_num == 0xff)
0556e1a2
DW
5642 map->failed_disk_num = slot;
5643 return 1;
5644}
5645
5646static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5647{
5648 mark_failure(dev, disk, idx);
5649
5650 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5651 return;
5652
47ee5a45
DW
5653 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5654 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5655}
5656
33414a01
DW
5657static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5658{
5659 __u8 map_state;
5660 struct dl *dl;
5661 int failed;
5662
5663 if (!super->missing)
5664 return;
5665 failed = imsm_count_failed(super, dev);
5666 map_state = imsm_check_degraded(super, dev, failed);
5667
5668 dprintf("imsm: mark missing\n");
5669 end_migration(dev, map_state);
5670 for (dl = super->missing; dl; dl = dl->next)
5671 mark_missing(dev, &dl->disk, dl->index);
5672 super->updates_pending++;
5673}
5674
70bdf0dc
AK
5675static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5676{
5677 int used_disks = imsm_num_data_members(dev, 0);
5678 unsigned long long array_blocks;
5679 struct imsm_map *map;
5680
5681 if (used_disks == 0) {
5682 /* when problems occures
5683 * return current array_blocks value
5684 */
5685 array_blocks = __le32_to_cpu(dev->size_high);
5686 array_blocks = array_blocks << 32;
5687 array_blocks += __le32_to_cpu(dev->size_low);
5688
5689 return array_blocks;
5690 }
5691
5692 /* set array size in metadata
5693 */
5694 map = get_imsm_map(dev, 0);
5695 array_blocks = map->blocks_per_member * used_disks;
5696
5697 /* round array size down to closest MB
5698 */
5699 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5700 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5701 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5702
5703 return array_blocks;
5704}
5705
28bce06f
AK
5706static void imsm_set_disk(struct active_array *a, int n, int state);
5707
0e2d1a4e
AK
5708static void imsm_progress_container_reshape(struct intel_super *super)
5709{
5710 /* if no device has a migr_state, but some device has a
5711 * different number of members than the previous device, start
5712 * changing the number of devices in this device to match
5713 * previous.
5714 */
5715 struct imsm_super *mpb = super->anchor;
5716 int prev_disks = -1;
5717 int i;
1dfaa380 5718 int copy_map_size;
0e2d1a4e
AK
5719
5720 for (i = 0; i < mpb->num_raid_devs; i++) {
5721 struct imsm_dev *dev = get_imsm_dev(super, i);
5722 struct imsm_map *map = get_imsm_map(dev, 0);
5723 struct imsm_map *map2;
5724 int prev_num_members;
0e2d1a4e
AK
5725
5726 if (dev->vol.migr_state)
5727 return;
5728
5729 if (prev_disks == -1)
5730 prev_disks = map->num_members;
5731 if (prev_disks == map->num_members)
5732 continue;
5733
5734 /* OK, this array needs to enter reshape mode.
5735 * i.e it needs a migr_state
5736 */
5737
1dfaa380 5738 copy_map_size = sizeof_imsm_map(map);
0e2d1a4e
AK
5739 prev_num_members = map->num_members;
5740 map->num_members = prev_disks;
5741 dev->vol.migr_state = 1;
5742 dev->vol.curr_migr_unit = 0;
5743 dev->vol.migr_type = MIGR_GEN_MIGR;
5744 for (i = prev_num_members;
5745 i < map->num_members; i++)
5746 set_imsm_ord_tbl_ent(map, i, i);
5747 map2 = get_imsm_map(dev, 1);
5748 /* Copy the current map */
1dfaa380 5749 memcpy(map2, map, copy_map_size);
0e2d1a4e
AK
5750 map2->num_members = prev_num_members;
5751
70bdf0dc 5752 imsm_set_array_size(dev);
0e2d1a4e
AK
5753 super->updates_pending++;
5754 }
5755}
5756
aad6f216 5757/* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
0c046afd
DW
5758 * states are handled in imsm_set_disk() with one exception, when a
5759 * resync is stopped due to a new failure this routine will set the
5760 * 'degraded' state for the array.
5761 */
01f157d7 5762static int imsm_set_array_state(struct active_array *a, int consistent)
a862209d
DW
5763{
5764 int inst = a->info.container_member;
5765 struct intel_super *super = a->container->sb;
949c47a0 5766 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5767 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd
DW
5768 int failed = imsm_count_failed(super, dev);
5769 __u8 map_state = imsm_check_degraded(super, dev, failed);
1e5c6983 5770 __u32 blocks_per_unit;
a862209d 5771
1af97990
AK
5772 if (dev->vol.migr_state &&
5773 dev->vol.migr_type == MIGR_GEN_MIGR) {
5774 /* array state change is blocked due to reshape action
aad6f216
N
5775 * We might need to
5776 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5777 * - finish the reshape (if last_checkpoint is big and action != reshape)
5778 * - update curr_migr_unit
1af97990 5779 */
aad6f216
N
5780 if (a->curr_action == reshape) {
5781 /* still reshaping, maybe update curr_migr_unit */
633b5610 5782 goto mark_checkpoint;
aad6f216
N
5783 } else {
5784 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5785 /* for some reason we aborted the reshape.
5786 * Better clean up
5787 */
5788 struct imsm_map *map2 = get_imsm_map(dev, 1);
5789 dev->vol.migr_state = 0;
5790 dev->vol.migr_type = 0;
5791 dev->vol.curr_migr_unit = 0;
5792 memcpy(map, map2, sizeof_imsm_map(map2));
5793 super->updates_pending++;
5794 }
5795 if (a->last_checkpoint >= a->info.component_size) {
5796 unsigned long long array_blocks;
5797 int used_disks;
e154ced3 5798 struct mdinfo *mdi;
aad6f216 5799
9653001d 5800 used_disks = imsm_num_data_members(dev, 0);
d55adef9
AK
5801 if (used_disks > 0) {
5802 array_blocks =
5803 map->blocks_per_member *
5804 used_disks;
5805 /* round array size down to closest MB
5806 */
5807 array_blocks = (array_blocks
5808 >> SECT_PER_MB_SHIFT)
5809 << SECT_PER_MB_SHIFT;
d55adef9
AK
5810 a->info.custom_array_size = array_blocks;
5811 /* encourage manager to update array
5812 * size
5813 */
e154ced3 5814
d55adef9 5815 a->check_reshape = 1;
633b5610 5816 }
e154ced3
AK
5817 /* finalize online capacity expansion/reshape */
5818 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5819 imsm_set_disk(a,
5820 mdi->disk.raid_disk,
5821 mdi->curr_state);
5822
0e2d1a4e 5823 imsm_progress_container_reshape(super);
e154ced3 5824 }
aad6f216 5825 }
1af97990
AK
5826 }
5827
47ee5a45 5828 /* before we activate this array handle any missing disks */
33414a01
DW
5829 if (consistent == 2)
5830 handle_missing(super, dev);
1e5c6983 5831
0c046afd 5832 if (consistent == 2 &&
b7941fd6 5833 (!is_resync_complete(&a->info) ||
0c046afd
DW
5834 map_state != IMSM_T_STATE_NORMAL ||
5835 dev->vol.migr_state))
01f157d7 5836 consistent = 0;
272906ef 5837
b7941fd6 5838 if (is_resync_complete(&a->info)) {
0c046afd 5839 /* complete intialization / resync,
0556e1a2
DW
5840 * recovery and interrupted recovery is completed in
5841 * ->set_disk
0c046afd
DW
5842 */
5843 if (is_resyncing(dev)) {
5844 dprintf("imsm: mark resync done\n");
f8f603f1 5845 end_migration(dev, map_state);
115c3803 5846 super->updates_pending++;
484240d8 5847 a->last_checkpoint = 0;
115c3803 5848 }
0c046afd
DW
5849 } else if (!is_resyncing(dev) && !failed) {
5850 /* mark the start of the init process if nothing is failed */
b7941fd6 5851 dprintf("imsm: mark resync start\n");
1484e727 5852 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
8e59f3d8 5853 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
1484e727 5854 else
8e59f3d8 5855 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
3393c6af 5856 super->updates_pending++;
115c3803 5857 }
a862209d 5858
633b5610 5859mark_checkpoint:
5b83bacf
AK
5860 /* skip checkpointing for general migration,
5861 * it is controlled in mdadm
5862 */
5863 if (is_gen_migration(dev))
5864 goto skip_mark_checkpoint;
5865
1e5c6983 5866 /* check if we can update curr_migr_unit from resync_start, recovery_start */
c47b0ff6 5867 blocks_per_unit = blocks_per_migr_unit(super, dev);
4f0a7acc 5868 if (blocks_per_unit) {
1e5c6983
DW
5869 __u32 units32;
5870 __u64 units;
5871
4f0a7acc 5872 units = a->last_checkpoint / blocks_per_unit;
1e5c6983
DW
5873 units32 = units;
5874
5875 /* check that we did not overflow 32-bits, and that
5876 * curr_migr_unit needs updating
5877 */
5878 if (units32 == units &&
bfd80a56 5879 units32 != 0 &&
1e5c6983
DW
5880 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5881 dprintf("imsm: mark checkpoint (%u)\n", units32);
5882 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5883 super->updates_pending++;
5884 }
5885 }
f8f603f1 5886
5b83bacf 5887skip_mark_checkpoint:
3393c6af 5888 /* mark dirty / clean */
0c046afd 5889 if (dev->vol.dirty != !consistent) {
b7941fd6 5890 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
0c046afd
DW
5891 if (consistent)
5892 dev->vol.dirty = 0;
5893 else
5894 dev->vol.dirty = 1;
a862209d
DW
5895 super->updates_pending++;
5896 }
28bce06f 5897
01f157d7 5898 return consistent;
a862209d
DW
5899}
5900
8d45d196 5901static void imsm_set_disk(struct active_array *a, int n, int state)
845dea95 5902{
8d45d196
DW
5903 int inst = a->info.container_member;
5904 struct intel_super *super = a->container->sb;
949c47a0 5905 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5906 struct imsm_map *map = get_imsm_map(dev, 0);
8d45d196 5907 struct imsm_disk *disk;
0c046afd 5908 int failed;
b10b37b8 5909 __u32 ord;
0c046afd 5910 __u8 map_state;
8d45d196
DW
5911
5912 if (n > map->num_members)
5913 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5914 n, map->num_members - 1);
5915
5916 if (n < 0)
5917 return;
5918
4e6e574a 5919 dprintf("imsm: set_disk %d:%x\n", n, state);
8d45d196 5920
98130f40 5921 ord = get_imsm_ord_tbl_ent(dev, n, -1);
b10b37b8 5922 disk = get_imsm_disk(super, ord_to_idx(ord));
8d45d196 5923
5802a811 5924 /* check for new failures */
0556e1a2
DW
5925 if (state & DS_FAULTY) {
5926 if (mark_failure(dev, disk, ord_to_idx(ord)))
5927 super->updates_pending++;
8d45d196 5928 }
47ee5a45 5929
19859edc 5930 /* check if in_sync */
0556e1a2 5931 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
b10b37b8
DW
5932 struct imsm_map *migr_map = get_imsm_map(dev, 1);
5933
5934 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
19859edc
DW
5935 super->updates_pending++;
5936 }
8d45d196 5937
0c046afd
DW
5938 failed = imsm_count_failed(super, dev);
5939 map_state = imsm_check_degraded(super, dev, failed);
5802a811 5940
0c046afd
DW
5941 /* check if recovery complete, newly degraded, or failed */
5942 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
f8f603f1 5943 end_migration(dev, map_state);
0556e1a2
DW
5944 map = get_imsm_map(dev, 0);
5945 map->failed_disk_num = ~0;
0c046afd 5946 super->updates_pending++;
484240d8 5947 a->last_checkpoint = 0;
0c046afd
DW
5948 } else if (map_state == IMSM_T_STATE_DEGRADED &&
5949 map->map_state != map_state &&
5950 !dev->vol.migr_state) {
5951 dprintf("imsm: mark degraded\n");
5952 map->map_state = map_state;
5953 super->updates_pending++;
484240d8 5954 a->last_checkpoint = 0;
0c046afd
DW
5955 } else if (map_state == IMSM_T_STATE_FAILED &&
5956 map->map_state != map_state) {
5957 dprintf("imsm: mark failed\n");
f8f603f1 5958 end_migration(dev, map_state);
0c046afd 5959 super->updates_pending++;
484240d8 5960 a->last_checkpoint = 0;
28bce06f
AK
5961 } else if (is_gen_migration(dev)) {
5962 dprintf("imsm: Detected General Migration in state: ");
5963 if (map_state == IMSM_T_STATE_NORMAL) {
5964 end_migration(dev, map_state);
5965 map = get_imsm_map(dev, 0);
5966 map->failed_disk_num = ~0;
5967 dprintf("normal\n");
5968 } else {
5969 if (map_state == IMSM_T_STATE_DEGRADED) {
5970 printf("degraded\n");
5971 end_migration(dev, map_state);
5972 } else {
5973 dprintf("failed\n");
5974 }
5975 map->map_state = map_state;
5976 }
5977 super->updates_pending++;
5802a811 5978 }
845dea95
NB
5979}
5980
f796af5d 5981static int store_imsm_mpb(int fd, struct imsm_super *mpb)
c2a1e7da 5982{
f796af5d 5983 void *buf = mpb;
c2a1e7da
DW
5984 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
5985 unsigned long long dsize;
5986 unsigned long long sectors;
5987
5988 get_dev_size(fd, NULL, &dsize);
5989
272f648f
DW
5990 if (mpb_size > 512) {
5991 /* -1 to account for anchor */
5992 sectors = mpb_sectors(mpb) - 1;
c2a1e7da 5993
272f648f
DW
5994 /* write the extended mpb to the sectors preceeding the anchor */
5995 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
5996 return 1;
c2a1e7da 5997
f21e18ca
N
5998 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
5999 != 512 * sectors)
272f648f
DW
6000 return 1;
6001 }
c2a1e7da 6002
272f648f
DW
6003 /* first block is stored on second to last sector of the disk */
6004 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
c2a1e7da
DW
6005 return 1;
6006
f796af5d 6007 if (write(fd, buf, 512) != 512)
c2a1e7da
DW
6008 return 1;
6009
c2a1e7da
DW
6010 return 0;
6011}
6012
2e735d19 6013static void imsm_sync_metadata(struct supertype *container)
845dea95 6014{
2e735d19 6015 struct intel_super *super = container->sb;
c2a1e7da 6016
1a64be56 6017 dprintf("sync metadata: %d\n", super->updates_pending);
c2a1e7da
DW
6018 if (!super->updates_pending)
6019 return;
6020
36988a3d 6021 write_super_imsm(container, 0);
c2a1e7da
DW
6022
6023 super->updates_pending = 0;
845dea95
NB
6024}
6025
272906ef
DW
6026static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
6027{
6028 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 6029 int i = get_imsm_disk_idx(dev, idx, -1);
272906ef
DW
6030 struct dl *dl;
6031
6032 for (dl = super->disks; dl; dl = dl->next)
6033 if (dl->index == i)
6034 break;
6035
25ed7e59 6036 if (dl && is_failed(&dl->disk))
272906ef
DW
6037 dl = NULL;
6038
6039 if (dl)
6040 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
6041
6042 return dl;
6043}
6044
a20d2ba5 6045static struct dl *imsm_add_spare(struct intel_super *super, int slot,
8ba77d32
AK
6046 struct active_array *a, int activate_new,
6047 struct mdinfo *additional_test_list)
272906ef
DW
6048{
6049 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 6050 int idx = get_imsm_disk_idx(dev, slot, -1);
a20d2ba5
DW
6051 struct imsm_super *mpb = super->anchor;
6052 struct imsm_map *map;
272906ef
DW
6053 unsigned long long pos;
6054 struct mdinfo *d;
6055 struct extent *ex;
a20d2ba5 6056 int i, j;
272906ef 6057 int found;
569cc43f
DW
6058 __u32 array_start = 0;
6059 __u32 array_end = 0;
272906ef 6060 struct dl *dl;
6c932028 6061 struct mdinfo *test_list;
272906ef
DW
6062
6063 for (dl = super->disks; dl; dl = dl->next) {
6064 /* If in this array, skip */
6065 for (d = a->info.devs ; d ; d = d->next)
e553d2a4
DW
6066 if (d->state_fd >= 0 &&
6067 d->disk.major == dl->major &&
272906ef 6068 d->disk.minor == dl->minor) {
8ba77d32
AK
6069 dprintf("%x:%x already in array\n",
6070 dl->major, dl->minor);
272906ef
DW
6071 break;
6072 }
6073 if (d)
6074 continue;
6c932028
AK
6075 test_list = additional_test_list;
6076 while (test_list) {
6077 if (test_list->disk.major == dl->major &&
6078 test_list->disk.minor == dl->minor) {
8ba77d32
AK
6079 dprintf("%x:%x already in additional test list\n",
6080 dl->major, dl->minor);
6081 break;
6082 }
6c932028 6083 test_list = test_list->next;
8ba77d32 6084 }
6c932028 6085 if (test_list)
8ba77d32 6086 continue;
272906ef 6087
e553d2a4 6088 /* skip in use or failed drives */
25ed7e59 6089 if (is_failed(&dl->disk) || idx == dl->index ||
df474657
DW
6090 dl->index == -2) {
6091 dprintf("%x:%x status (failed: %d index: %d)\n",
25ed7e59 6092 dl->major, dl->minor, is_failed(&dl->disk), idx);
9a1608e5
DW
6093 continue;
6094 }
6095
a20d2ba5
DW
6096 /* skip pure spares when we are looking for partially
6097 * assimilated drives
6098 */
6099 if (dl->index == -1 && !activate_new)
6100 continue;
6101
272906ef 6102 /* Does this unused device have the requisite free space?
a20d2ba5 6103 * It needs to be able to cover all member volumes
272906ef
DW
6104 */
6105 ex = get_extents(super, dl);
6106 if (!ex) {
6107 dprintf("cannot get extents\n");
6108 continue;
6109 }
a20d2ba5
DW
6110 for (i = 0; i < mpb->num_raid_devs; i++) {
6111 dev = get_imsm_dev(super, i);
6112 map = get_imsm_map(dev, 0);
272906ef 6113
a20d2ba5
DW
6114 /* check if this disk is already a member of
6115 * this array
272906ef 6116 */
620b1713 6117 if (get_imsm_disk_slot(map, dl->index) >= 0)
a20d2ba5
DW
6118 continue;
6119
6120 found = 0;
6121 j = 0;
6122 pos = 0;
6123 array_start = __le32_to_cpu(map->pba_of_lba0);
329c8278
DW
6124 array_end = array_start +
6125 __le32_to_cpu(map->blocks_per_member) - 1;
a20d2ba5
DW
6126
6127 do {
6128 /* check that we can start at pba_of_lba0 with
6129 * blocks_per_member of space
6130 */
329c8278 6131 if (array_start >= pos && array_end < ex[j].start) {
a20d2ba5
DW
6132 found = 1;
6133 break;
6134 }
6135 pos = ex[j].start + ex[j].size;
6136 j++;
6137 } while (ex[j-1].size);
6138
6139 if (!found)
272906ef 6140 break;
a20d2ba5 6141 }
272906ef
DW
6142
6143 free(ex);
a20d2ba5 6144 if (i < mpb->num_raid_devs) {
329c8278
DW
6145 dprintf("%x:%x does not have %u to %u available\n",
6146 dl->major, dl->minor, array_start, array_end);
272906ef
DW
6147 /* No room */
6148 continue;
a20d2ba5
DW
6149 }
6150 return dl;
272906ef
DW
6151 }
6152
6153 return dl;
6154}
6155
95d07a2c
LM
6156
6157static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
6158{
6159 struct imsm_dev *dev2;
6160 struct imsm_map *map;
6161 struct dl *idisk;
6162 int slot;
6163 int idx;
6164 __u8 state;
6165
6166 dev2 = get_imsm_dev(cont->sb, dev_idx);
6167 if (dev2) {
6168 state = imsm_check_degraded(cont->sb, dev2, failed);
6169 if (state == IMSM_T_STATE_FAILED) {
6170 map = get_imsm_map(dev2, 0);
6171 if (!map)
6172 return 1;
6173 for (slot = 0; slot < map->num_members; slot++) {
6174 /*
6175 * Check if failed disks are deleted from intel
6176 * disk list or are marked to be deleted
6177 */
98130f40 6178 idx = get_imsm_disk_idx(dev2, slot, -1);
95d07a2c
LM
6179 idisk = get_imsm_dl_disk(cont->sb, idx);
6180 /*
6181 * Do not rebuild the array if failed disks
6182 * from failed sub-array are not removed from
6183 * container.
6184 */
6185 if (idisk &&
6186 is_failed(&idisk->disk) &&
6187 (idisk->action != DISK_REMOVE))
6188 return 0;
6189 }
6190 }
6191 }
6192 return 1;
6193}
6194
88758e9d
DW
6195static struct mdinfo *imsm_activate_spare(struct active_array *a,
6196 struct metadata_update **updates)
6197{
6198 /**
d23fe947
DW
6199 * Find a device with unused free space and use it to replace a
6200 * failed/vacant region in an array. We replace failed regions one a
6201 * array at a time. The result is that a new spare disk will be added
6202 * to the first failed array and after the monitor has finished
6203 * propagating failures the remainder will be consumed.
88758e9d 6204 *
d23fe947
DW
6205 * FIXME add a capability for mdmon to request spares from another
6206 * container.
88758e9d
DW
6207 */
6208
6209 struct intel_super *super = a->container->sb;
88758e9d 6210 int inst = a->info.container_member;
949c47a0 6211 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 6212 struct imsm_map *map = get_imsm_map(dev, 0);
88758e9d
DW
6213 int failed = a->info.array.raid_disks;
6214 struct mdinfo *rv = NULL;
6215 struct mdinfo *d;
6216 struct mdinfo *di;
6217 struct metadata_update *mu;
6218 struct dl *dl;
6219 struct imsm_update_activate_spare *u;
6220 int num_spares = 0;
6221 int i;
95d07a2c 6222 int allowed;
88758e9d
DW
6223
6224 for (d = a->info.devs ; d ; d = d->next) {
6225 if ((d->curr_state & DS_FAULTY) &&
6226 d->state_fd >= 0)
6227 /* wait for Removal to happen */
6228 return NULL;
6229 if (d->state_fd >= 0)
6230 failed--;
6231 }
6232
6233 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6234 inst, failed, a->info.array.raid_disks, a->info.array.level);
1af97990
AK
6235
6236 if (dev->vol.migr_state &&
6237 dev->vol.migr_type == MIGR_GEN_MIGR)
6238 /* No repair during migration */
6239 return NULL;
6240
89c67882
AK
6241 if (a->info.array.level == 4)
6242 /* No repair for takeovered array
6243 * imsm doesn't support raid4
6244 */
6245 return NULL;
6246
fb49eef2 6247 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
88758e9d
DW
6248 return NULL;
6249
95d07a2c
LM
6250 /*
6251 * If there are any failed disks check state of the other volume.
6252 * Block rebuild if the another one is failed until failed disks
6253 * are removed from container.
6254 */
6255 if (failed) {
6256 dprintf("found failed disks in %s, check if there another"
6257 "failed sub-array.\n",
6258 dev->volume);
6259 /* check if states of the other volumes allow for rebuild */
6260 for (i = 0; i < super->anchor->num_raid_devs; i++) {
6261 if (i != inst) {
6262 allowed = imsm_rebuild_allowed(a->container,
6263 i, failed);
6264 if (!allowed)
6265 return NULL;
6266 }
6267 }
6268 }
6269
88758e9d 6270 /* For each slot, if it is not working, find a spare */
88758e9d
DW
6271 for (i = 0; i < a->info.array.raid_disks; i++) {
6272 for (d = a->info.devs ; d ; d = d->next)
6273 if (d->disk.raid_disk == i)
6274 break;
6275 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
6276 if (d && (d->state_fd >= 0))
6277 continue;
6278
272906ef 6279 /*
a20d2ba5
DW
6280 * OK, this device needs recovery. Try to re-add the
6281 * previous occupant of this slot, if this fails see if
6282 * we can continue the assimilation of a spare that was
6283 * partially assimilated, finally try to activate a new
6284 * spare.
272906ef
DW
6285 */
6286 dl = imsm_readd(super, i, a);
6287 if (!dl)
8ba77d32 6288 dl = imsm_add_spare(super, i, a, 0, NULL);
a20d2ba5 6289 if (!dl)
8ba77d32 6290 dl = imsm_add_spare(super, i, a, 1, NULL);
272906ef
DW
6291 if (!dl)
6292 continue;
6293
6294 /* found a usable disk with enough space */
6295 di = malloc(sizeof(*di));
79244939
DW
6296 if (!di)
6297 continue;
272906ef
DW
6298 memset(di, 0, sizeof(*di));
6299
6300 /* dl->index will be -1 in the case we are activating a
6301 * pristine spare. imsm_process_update() will create a
6302 * new index in this case. Once a disk is found to be
6303 * failed in all member arrays it is kicked from the
6304 * metadata
6305 */
6306 di->disk.number = dl->index;
d23fe947 6307
272906ef
DW
6308 /* (ab)use di->devs to store a pointer to the device
6309 * we chose
6310 */
6311 di->devs = (struct mdinfo *) dl;
6312
6313 di->disk.raid_disk = i;
6314 di->disk.major = dl->major;
6315 di->disk.minor = dl->minor;
6316 di->disk.state = 0;
d23534e4 6317 di->recovery_start = 0;
272906ef
DW
6318 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
6319 di->component_size = a->info.component_size;
6320 di->container_member = inst;
148acb7b 6321 super->random = random32();
272906ef
DW
6322 di->next = rv;
6323 rv = di;
6324 num_spares++;
6325 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
6326 i, di->data_offset);
88758e9d 6327
272906ef 6328 break;
88758e9d
DW
6329 }
6330
6331 if (!rv)
6332 /* No spares found */
6333 return rv;
6334 /* Now 'rv' has a list of devices to return.
6335 * Create a metadata_update record to update the
6336 * disk_ord_tbl for the array
6337 */
6338 mu = malloc(sizeof(*mu));
79244939
DW
6339 if (mu) {
6340 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
6341 if (mu->buf == NULL) {
6342 free(mu);
6343 mu = NULL;
6344 }
6345 }
6346 if (!mu) {
6347 while (rv) {
6348 struct mdinfo *n = rv->next;
6349
6350 free(rv);
6351 rv = n;
6352 }
6353 return NULL;
6354 }
6355
88758e9d 6356 mu->space = NULL;
cb23f1f4 6357 mu->space_list = NULL;
88758e9d
DW
6358 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
6359 mu->next = *updates;
6360 u = (struct imsm_update_activate_spare *) mu->buf;
6361
6362 for (di = rv ; di ; di = di->next) {
6363 u->type = update_activate_spare;
d23fe947
DW
6364 u->dl = (struct dl *) di->devs;
6365 di->devs = NULL;
88758e9d
DW
6366 u->slot = di->disk.raid_disk;
6367 u->array = inst;
6368 u->next = u + 1;
6369 u++;
6370 }
6371 (u-1)->next = NULL;
6372 *updates = mu;
6373
6374 return rv;
6375}
6376
54c2c1ea 6377static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
8273f55e 6378{
54c2c1ea
DW
6379 struct imsm_dev *dev = get_imsm_dev(super, idx);
6380 struct imsm_map *map = get_imsm_map(dev, 0);
6381 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
6382 struct disk_info *inf = get_disk_info(u);
6383 struct imsm_disk *disk;
8273f55e
DW
6384 int i;
6385 int j;
8273f55e 6386
54c2c1ea 6387 for (i = 0; i < map->num_members; i++) {
98130f40 6388 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
54c2c1ea
DW
6389 for (j = 0; j < new_map->num_members; j++)
6390 if (serialcmp(disk->serial, inf[j].serial) == 0)
8273f55e
DW
6391 return 1;
6392 }
6393
6394 return 0;
6395}
6396
1a64be56
LM
6397
6398static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
6399{
6400 struct dl *dl = NULL;
6401 for (dl = super->disks; dl; dl = dl->next)
6402 if ((dl->major == major) && (dl->minor == minor))
6403 return dl;
6404 return NULL;
6405}
6406
6407static int remove_disk_super(struct intel_super *super, int major, int minor)
6408{
6409 struct dl *prev = NULL;
6410 struct dl *dl;
6411
6412 prev = NULL;
6413 for (dl = super->disks; dl; dl = dl->next) {
6414 if ((dl->major == major) && (dl->minor == minor)) {
6415 /* remove */
6416 if (prev)
6417 prev->next = dl->next;
6418 else
6419 super->disks = dl->next;
6420 dl->next = NULL;
6421 __free_imsm_disk(dl);
6422 dprintf("%s: removed %x:%x\n",
6423 __func__, major, minor);
6424 break;
6425 }
6426 prev = dl;
6427 }
6428 return 0;
6429}
6430
f21e18ca 6431static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
ae6aad82 6432
1a64be56
LM
6433static int add_remove_disk_update(struct intel_super *super)
6434{
6435 int check_degraded = 0;
6436 struct dl *disk = NULL;
6437 /* add/remove some spares to/from the metadata/contrainer */
6438 while (super->disk_mgmt_list) {
6439 struct dl *disk_cfg;
6440
6441 disk_cfg = super->disk_mgmt_list;
6442 super->disk_mgmt_list = disk_cfg->next;
6443 disk_cfg->next = NULL;
6444
6445 if (disk_cfg->action == DISK_ADD) {
6446 disk_cfg->next = super->disks;
6447 super->disks = disk_cfg;
6448 check_degraded = 1;
6449 dprintf("%s: added %x:%x\n",
6450 __func__, disk_cfg->major,
6451 disk_cfg->minor);
6452 } else if (disk_cfg->action == DISK_REMOVE) {
6453 dprintf("Disk remove action processed: %x.%x\n",
6454 disk_cfg->major, disk_cfg->minor);
6455 disk = get_disk_super(super,
6456 disk_cfg->major,
6457 disk_cfg->minor);
6458 if (disk) {
6459 /* store action status */
6460 disk->action = DISK_REMOVE;
6461 /* remove spare disks only */
6462 if (disk->index == -1) {
6463 remove_disk_super(super,
6464 disk_cfg->major,
6465 disk_cfg->minor);
6466 }
6467 }
6468 /* release allocate disk structure */
6469 __free_imsm_disk(disk_cfg);
6470 }
6471 }
6472 return check_degraded;
6473}
6474
a29911da
PC
6475
6476static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
6477 struct intel_super *super,
6478 void ***space_list)
6479{
6480 struct intel_dev *id;
6481 void **tofree = NULL;
6482 int ret_val = 0;
6483
6484 dprintf("apply_reshape_migration_update()\n");
6485 if ((u->subdev < 0) ||
6486 (u->subdev > 1)) {
6487 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
6488 return ret_val;
6489 }
6490 if ((space_list == NULL) || (*space_list == NULL)) {
6491 dprintf("imsm: Error: Memory is not allocated\n");
6492 return ret_val;
6493 }
6494
6495 for (id = super->devlist ; id; id = id->next) {
6496 if (id->index == (unsigned)u->subdev) {
6497 struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
6498 struct imsm_map *map;
6499 struct imsm_dev *new_dev =
6500 (struct imsm_dev *)*space_list;
6501 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6502 int to_state;
6503 struct dl *new_disk;
6504
6505 if (new_dev == NULL)
6506 return ret_val;
6507 *space_list = **space_list;
6508 memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
6509 map = get_imsm_map(new_dev, 0);
6510 if (migr_map) {
6511 dprintf("imsm: Error: migration in progress");
6512 return ret_val;
6513 }
6514
6515 to_state = map->map_state;
6516 if ((u->new_level == 5) && (map->raid_level == 0)) {
6517 map->num_members++;
6518 /* this should not happen */
6519 if (u->new_disks[0] < 0) {
6520 map->failed_disk_num =
6521 map->num_members - 1;
6522 to_state = IMSM_T_STATE_DEGRADED;
6523 } else
6524 to_state = IMSM_T_STATE_NORMAL;
6525 }
8e59f3d8 6526 migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
a29911da
PC
6527 if (u->new_level > -1)
6528 map->raid_level = u->new_level;
6529 migr_map = get_imsm_map(new_dev, 1);
6530 if ((u->new_level == 5) &&
6531 (migr_map->raid_level == 0)) {
6532 int ord = map->num_members - 1;
6533 migr_map->num_members--;
6534 if (u->new_disks[0] < 0)
6535 ord |= IMSM_ORD_REBUILD;
6536 set_imsm_ord_tbl_ent(map,
6537 map->num_members - 1,
6538 ord);
6539 }
6540 id->dev = new_dev;
6541 tofree = (void **)dev;
6542
4bba0439
PC
6543 /* update chunk size
6544 */
6545 if (u->new_chunksize > 0)
6546 map->blocks_per_strip =
6547 __cpu_to_le16(u->new_chunksize * 2);
6548
a29911da
PC
6549 /* add disk
6550 */
6551 if ((u->new_level != 5) ||
6552 (migr_map->raid_level != 0) ||
6553 (migr_map->raid_level == map->raid_level))
6554 goto skip_disk_add;
6555
6556 if (u->new_disks[0] >= 0) {
6557 /* use passes spare
6558 */
6559 new_disk = get_disk_super(super,
6560 major(u->new_disks[0]),
6561 minor(u->new_disks[0]));
6562 dprintf("imsm: new disk for reshape is: %i:%i "
6563 "(%p, index = %i)\n",
6564 major(u->new_disks[0]),
6565 minor(u->new_disks[0]),
6566 new_disk, new_disk->index);
6567 if (new_disk == NULL)
6568 goto error_disk_add;
6569
6570 new_disk->index = map->num_members - 1;
6571 /* slot to fill in autolayout
6572 */
6573 new_disk->raiddisk = new_disk->index;
6574 new_disk->disk.status |= CONFIGURED_DISK;
6575 new_disk->disk.status &= ~SPARE_DISK;
6576 } else
6577 goto error_disk_add;
6578
6579skip_disk_add:
6580 *tofree = *space_list;
6581 /* calculate new size
6582 */
6583 imsm_set_array_size(new_dev);
6584
6585 ret_val = 1;
6586 }
6587 }
6588
6589 if (tofree)
6590 *space_list = tofree;
6591 return ret_val;
6592
6593error_disk_add:
6594 dprintf("Error: imsm: Cannot find disk.\n");
6595 return ret_val;
6596}
6597
6598
2e5dc010
N
6599static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
6600 struct intel_super *super,
6601 void ***space_list)
6602{
6603 struct dl *new_disk;
6604 struct intel_dev *id;
6605 int i;
6606 int delta_disks = u->new_raid_disks - u->old_raid_disks;
ee4beede 6607 int disk_count = u->old_raid_disks;
2e5dc010
N
6608 void **tofree = NULL;
6609 int devices_to_reshape = 1;
6610 struct imsm_super *mpb = super->anchor;
6611 int ret_val = 0;
d098291a 6612 unsigned int dev_id;
2e5dc010 6613
ed7333bd 6614 dprintf("imsm: apply_reshape_container_disks_update()\n");
2e5dc010
N
6615
6616 /* enable spares to use in array */
6617 for (i = 0; i < delta_disks; i++) {
6618 new_disk = get_disk_super(super,
6619 major(u->new_disks[i]),
6620 minor(u->new_disks[i]));
ed7333bd
AK
6621 dprintf("imsm: new disk for reshape is: %i:%i "
6622 "(%p, index = %i)\n",
2e5dc010
N
6623 major(u->new_disks[i]), minor(u->new_disks[i]),
6624 new_disk, new_disk->index);
6625 if ((new_disk == NULL) ||
6626 ((new_disk->index >= 0) &&
6627 (new_disk->index < u->old_raid_disks)))
6628 goto update_reshape_exit;
ee4beede 6629 new_disk->index = disk_count++;
2e5dc010
N
6630 /* slot to fill in autolayout
6631 */
6632 new_disk->raiddisk = new_disk->index;
6633 new_disk->disk.status |=
6634 CONFIGURED_DISK;
6635 new_disk->disk.status &= ~SPARE_DISK;
6636 }
6637
ed7333bd
AK
6638 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6639 mpb->num_raid_devs);
2e5dc010
N
6640 /* manage changes in volume
6641 */
d098291a 6642 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
2e5dc010
N
6643 void **sp = *space_list;
6644 struct imsm_dev *newdev;
6645 struct imsm_map *newmap, *oldmap;
6646
d098291a
AK
6647 for (id = super->devlist ; id; id = id->next) {
6648 if (id->index == dev_id)
6649 break;
6650 }
6651 if (id == NULL)
6652 break;
2e5dc010
N
6653 if (!sp)
6654 continue;
6655 *space_list = *sp;
6656 newdev = (void*)sp;
6657 /* Copy the dev, but not (all of) the map */
6658 memcpy(newdev, id->dev, sizeof(*newdev));
6659 oldmap = get_imsm_map(id->dev, 0);
6660 newmap = get_imsm_map(newdev, 0);
6661 /* Copy the current map */
6662 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6663 /* update one device only
6664 */
6665 if (devices_to_reshape) {
ed7333bd
AK
6666 dprintf("imsm: modifying subdev: %i\n",
6667 id->index);
2e5dc010
N
6668 devices_to_reshape--;
6669 newdev->vol.migr_state = 1;
6670 newdev->vol.curr_migr_unit = 0;
6671 newdev->vol.migr_type = MIGR_GEN_MIGR;
6672 newmap->num_members = u->new_raid_disks;
6673 for (i = 0; i < delta_disks; i++) {
6674 set_imsm_ord_tbl_ent(newmap,
6675 u->old_raid_disks + i,
6676 u->old_raid_disks + i);
6677 }
6678 /* New map is correct, now need to save old map
6679 */
6680 newmap = get_imsm_map(newdev, 1);
6681 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6682
70bdf0dc 6683 imsm_set_array_size(newdev);
2e5dc010
N
6684 }
6685
6686 sp = (void **)id->dev;
6687 id->dev = newdev;
6688 *sp = tofree;
6689 tofree = sp;
8e59f3d8
AK
6690
6691 /* Clear migration record */
6692 memset(super->migr_rec, 0, sizeof(struct migr_record));
2e5dc010 6693 }
819bc634
AK
6694 if (tofree)
6695 *space_list = tofree;
2e5dc010
N
6696 ret_val = 1;
6697
6698update_reshape_exit:
6699
6700 return ret_val;
6701}
6702
bb025c2f 6703static int apply_takeover_update(struct imsm_update_takeover *u,
8ca6df95
KW
6704 struct intel_super *super,
6705 void ***space_list)
bb025c2f
KW
6706{
6707 struct imsm_dev *dev = NULL;
8ca6df95
KW
6708 struct intel_dev *dv;
6709 struct imsm_dev *dev_new;
bb025c2f
KW
6710 struct imsm_map *map;
6711 struct dl *dm, *du;
8ca6df95 6712 int i;
bb025c2f
KW
6713
6714 for (dv = super->devlist; dv; dv = dv->next)
6715 if (dv->index == (unsigned int)u->subarray) {
6716 dev = dv->dev;
6717 break;
6718 }
6719
6720 if (dev == NULL)
6721 return 0;
6722
6723 map = get_imsm_map(dev, 0);
6724
6725 if (u->direction == R10_TO_R0) {
43d5ec18
KW
6726 /* Number of failed disks must be half of initial disk number */
6727 if (imsm_count_failed(super, dev) != (map->num_members / 2))
6728 return 0;
6729
bb025c2f
KW
6730 /* iterate through devices to mark removed disks as spare */
6731 for (dm = super->disks; dm; dm = dm->next) {
6732 if (dm->disk.status & FAILED_DISK) {
6733 int idx = dm->index;
6734 /* update indexes on the disk list */
6735/* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6736 the index values will end up being correct.... NB */
6737 for (du = super->disks; du; du = du->next)
6738 if (du->index > idx)
6739 du->index--;
6740 /* mark as spare disk */
6741 dm->disk.status = SPARE_DISK;
6742 dm->index = -1;
6743 }
6744 }
bb025c2f
KW
6745 /* update map */
6746 map->num_members = map->num_members / 2;
6747 map->map_state = IMSM_T_STATE_NORMAL;
6748 map->num_domains = 1;
6749 map->raid_level = 0;
6750 map->failed_disk_num = -1;
6751 }
6752
8ca6df95
KW
6753 if (u->direction == R0_TO_R10) {
6754 void **space;
6755 /* update slots in current disk list */
6756 for (dm = super->disks; dm; dm = dm->next) {
6757 if (dm->index >= 0)
6758 dm->index *= 2;
6759 }
6760 /* create new *missing* disks */
6761 for (i = 0; i < map->num_members; i++) {
6762 space = *space_list;
6763 if (!space)
6764 continue;
6765 *space_list = *space;
6766 du = (void *)space;
6767 memcpy(du, super->disks, sizeof(*du));
8ca6df95
KW
6768 du->fd = -1;
6769 du->minor = 0;
6770 du->major = 0;
6771 du->index = (i * 2) + 1;
6772 sprintf((char *)du->disk.serial,
6773 " MISSING_%d", du->index);
6774 sprintf((char *)du->serial,
6775 "MISSING_%d", du->index);
6776 du->next = super->missing;
6777 super->missing = du;
6778 }
6779 /* create new dev and map */
6780 space = *space_list;
6781 if (!space)
6782 return 0;
6783 *space_list = *space;
6784 dev_new = (void *)space;
6785 memcpy(dev_new, dev, sizeof(*dev));
6786 /* update new map */
6787 map = get_imsm_map(dev_new, 0);
8ca6df95 6788 map->num_members = map->num_members * 2;
1a2487c2 6789 map->map_state = IMSM_T_STATE_DEGRADED;
8ca6df95
KW
6790 map->num_domains = 2;
6791 map->raid_level = 1;
6792 /* replace dev<->dev_new */
6793 dv->dev = dev_new;
6794 }
bb025c2f
KW
6795 /* update disk order table */
6796 for (du = super->disks; du; du = du->next)
6797 if (du->index >= 0)
6798 set_imsm_ord_tbl_ent(map, du->index, du->index);
8ca6df95 6799 for (du = super->missing; du; du = du->next)
1a2487c2
KW
6800 if (du->index >= 0) {
6801 set_imsm_ord_tbl_ent(map, du->index, du->index);
6802 mark_missing(dev_new, &du->disk, du->index);
6803 }
bb025c2f
KW
6804
6805 return 1;
6806}
6807
e8319a19
DW
6808static void imsm_process_update(struct supertype *st,
6809 struct metadata_update *update)
6810{
6811 /**
6812 * crack open the metadata_update envelope to find the update record
6813 * update can be one of:
d195167d
AK
6814 * update_reshape_container_disks - all the arrays in the container
6815 * are being reshaped to have more devices. We need to mark
6816 * the arrays for general migration and convert selected spares
6817 * into active devices.
6818 * update_activate_spare - a spare device has replaced a failed
e8319a19
DW
6819 * device in an array, update the disk_ord_tbl. If this disk is
6820 * present in all member arrays then also clear the SPARE_DISK
6821 * flag
d195167d
AK
6822 * update_create_array
6823 * update_kill_array
6824 * update_rename_array
6825 * update_add_remove_disk
e8319a19
DW
6826 */
6827 struct intel_super *super = st->sb;
4d7b1503 6828 struct imsm_super *mpb;
e8319a19
DW
6829 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6830
4d7b1503
DW
6831 /* update requires a larger buf but the allocation failed */
6832 if (super->next_len && !super->next_buf) {
6833 super->next_len = 0;
6834 return;
6835 }
6836
6837 if (super->next_buf) {
6838 memcpy(super->next_buf, super->buf, super->len);
6839 free(super->buf);
6840 super->len = super->next_len;
6841 super->buf = super->next_buf;
6842
6843 super->next_len = 0;
6844 super->next_buf = NULL;
6845 }
6846
6847 mpb = super->anchor;
6848
e8319a19 6849 switch (type) {
bb025c2f
KW
6850 case update_takeover: {
6851 struct imsm_update_takeover *u = (void *)update->buf;
1a2487c2
KW
6852 if (apply_takeover_update(u, super, &update->space_list)) {
6853 imsm_update_version_info(super);
bb025c2f 6854 super->updates_pending++;
1a2487c2 6855 }
bb025c2f
KW
6856 break;
6857 }
6858
78b10e66 6859 case update_reshape_container_disks: {
d195167d 6860 struct imsm_update_reshape *u = (void *)update->buf;
2e5dc010
N
6861 if (apply_reshape_container_disks_update(
6862 u, super, &update->space_list))
6863 super->updates_pending++;
78b10e66
N
6864 break;
6865 }
48c5303a 6866 case update_reshape_migration: {
a29911da
PC
6867 struct imsm_update_reshape_migration *u = (void *)update->buf;
6868 if (apply_reshape_migration_update(
6869 u, super, &update->space_list))
6870 super->updates_pending++;
48c5303a
PC
6871 break;
6872 }
e8319a19
DW
6873 case update_activate_spare: {
6874 struct imsm_update_activate_spare *u = (void *) update->buf;
949c47a0 6875 struct imsm_dev *dev = get_imsm_dev(super, u->array);
a965f303 6876 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd 6877 struct imsm_map *migr_map;
e8319a19
DW
6878 struct active_array *a;
6879 struct imsm_disk *disk;
0c046afd 6880 __u8 to_state;
e8319a19 6881 struct dl *dl;
e8319a19 6882 unsigned int found;
0c046afd 6883 int failed;
98130f40 6884 int victim = get_imsm_disk_idx(dev, u->slot, -1);
e8319a19
DW
6885 int i;
6886
6887 for (dl = super->disks; dl; dl = dl->next)
d23fe947 6888 if (dl == u->dl)
e8319a19
DW
6889 break;
6890
6891 if (!dl) {
6892 fprintf(stderr, "error: imsm_activate_spare passed "
1f24f035
DW
6893 "an unknown disk (index: %d)\n",
6894 u->dl->index);
e8319a19
DW
6895 return;
6896 }
6897
6898 super->updates_pending++;
0c046afd
DW
6899 /* count failures (excluding rebuilds and the victim)
6900 * to determine map[0] state
6901 */
6902 failed = 0;
6903 for (i = 0; i < map->num_members; i++) {
6904 if (i == u->slot)
6905 continue;
98130f40
AK
6906 disk = get_imsm_disk(super,
6907 get_imsm_disk_idx(dev, i, -1));
25ed7e59 6908 if (!disk || is_failed(disk))
0c046afd
DW
6909 failed++;
6910 }
6911
d23fe947
DW
6912 /* adding a pristine spare, assign a new index */
6913 if (dl->index < 0) {
6914 dl->index = super->anchor->num_disks;
6915 super->anchor->num_disks++;
6916 }
d23fe947 6917 disk = &dl->disk;
f2f27e63
DW
6918 disk->status |= CONFIGURED_DISK;
6919 disk->status &= ~SPARE_DISK;
e8319a19 6920
0c046afd
DW
6921 /* mark rebuild */
6922 to_state = imsm_check_degraded(super, dev, failed);
6923 map->map_state = IMSM_T_STATE_DEGRADED;
8e59f3d8 6924 migrate(dev, super, to_state, MIGR_REBUILD);
0c046afd
DW
6925 migr_map = get_imsm_map(dev, 1);
6926 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6927 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6928
148acb7b
DW
6929 /* update the family_num to mark a new container
6930 * generation, being careful to record the existing
6931 * family_num in orig_family_num to clean up after
6932 * earlier mdadm versions that neglected to set it.
6933 */
6934 if (mpb->orig_family_num == 0)
6935 mpb->orig_family_num = mpb->family_num;
6936 mpb->family_num += super->random;
6937
e8319a19
DW
6938 /* count arrays using the victim in the metadata */
6939 found = 0;
6940 for (a = st->arrays; a ; a = a->next) {
949c47a0 6941 dev = get_imsm_dev(super, a->info.container_member);
620b1713
DW
6942 map = get_imsm_map(dev, 0);
6943
6944 if (get_imsm_disk_slot(map, victim) >= 0)
6945 found++;
e8319a19
DW
6946 }
6947
24565c9a 6948 /* delete the victim if it is no longer being
e8319a19
DW
6949 * utilized anywhere
6950 */
e8319a19 6951 if (!found) {
ae6aad82 6952 struct dl **dlp;
24565c9a 6953
47ee5a45
DW
6954 /* We know that 'manager' isn't touching anything,
6955 * so it is safe to delete
6956 */
24565c9a 6957 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
ae6aad82
DW
6958 if ((*dlp)->index == victim)
6959 break;
47ee5a45
DW
6960
6961 /* victim may be on the missing list */
6962 if (!*dlp)
6963 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
6964 if ((*dlp)->index == victim)
6965 break;
24565c9a 6966 imsm_delete(super, dlp, victim);
e8319a19 6967 }
8273f55e
DW
6968 break;
6969 }
6970 case update_create_array: {
6971 /* someone wants to create a new array, we need to be aware of
6972 * a few races/collisions:
6973 * 1/ 'Create' called by two separate instances of mdadm
6974 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6975 * devices that have since been assimilated via
6976 * activate_spare.
6977 * In the event this update can not be carried out mdadm will
6978 * (FIX ME) notice that its update did not take hold.
6979 */
6980 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 6981 struct intel_dev *dv;
8273f55e
DW
6982 struct imsm_dev *dev;
6983 struct imsm_map *map, *new_map;
6984 unsigned long long start, end;
6985 unsigned long long new_start, new_end;
6986 int i;
54c2c1ea
DW
6987 struct disk_info *inf;
6988 struct dl *dl;
8273f55e
DW
6989
6990 /* handle racing creates: first come first serve */
6991 if (u->dev_idx < mpb->num_raid_devs) {
6992 dprintf("%s: subarray %d already defined\n",
6993 __func__, u->dev_idx);
ba2de7ba 6994 goto create_error;
8273f55e
DW
6995 }
6996
6997 /* check update is next in sequence */
6998 if (u->dev_idx != mpb->num_raid_devs) {
6a3e913e
DW
6999 dprintf("%s: can not create array %d expected index %d\n",
7000 __func__, u->dev_idx, mpb->num_raid_devs);
ba2de7ba 7001 goto create_error;
8273f55e
DW
7002 }
7003
a965f303 7004 new_map = get_imsm_map(&u->dev, 0);
8273f55e
DW
7005 new_start = __le32_to_cpu(new_map->pba_of_lba0);
7006 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
54c2c1ea 7007 inf = get_disk_info(u);
8273f55e
DW
7008
7009 /* handle activate_spare versus create race:
7010 * check to make sure that overlapping arrays do not include
7011 * overalpping disks
7012 */
7013 for (i = 0; i < mpb->num_raid_devs; i++) {
949c47a0 7014 dev = get_imsm_dev(super, i);
a965f303 7015 map = get_imsm_map(dev, 0);
8273f55e
DW
7016 start = __le32_to_cpu(map->pba_of_lba0);
7017 end = start + __le32_to_cpu(map->blocks_per_member);
7018 if ((new_start >= start && new_start <= end) ||
7019 (start >= new_start && start <= new_end))
54c2c1ea
DW
7020 /* overlap */;
7021 else
7022 continue;
7023
7024 if (disks_overlap(super, i, u)) {
8273f55e 7025 dprintf("%s: arrays overlap\n", __func__);
ba2de7ba 7026 goto create_error;
8273f55e
DW
7027 }
7028 }
8273f55e 7029
949c47a0
DW
7030 /* check that prepare update was successful */
7031 if (!update->space) {
7032 dprintf("%s: prepare update failed\n", __func__);
ba2de7ba 7033 goto create_error;
949c47a0
DW
7034 }
7035
54c2c1ea
DW
7036 /* check that all disks are still active before committing
7037 * changes. FIXME: could we instead handle this by creating a
7038 * degraded array? That's probably not what the user expects,
7039 * so better to drop this update on the floor.
7040 */
7041 for (i = 0; i < new_map->num_members; i++) {
7042 dl = serial_to_dl(inf[i].serial, super);
7043 if (!dl) {
7044 dprintf("%s: disk disappeared\n", __func__);
ba2de7ba 7045 goto create_error;
54c2c1ea 7046 }
949c47a0
DW
7047 }
7048
8273f55e 7049 super->updates_pending++;
54c2c1ea
DW
7050
7051 /* convert spares to members and fixup ord_tbl */
7052 for (i = 0; i < new_map->num_members; i++) {
7053 dl = serial_to_dl(inf[i].serial, super);
7054 if (dl->index == -1) {
7055 dl->index = mpb->num_disks;
7056 mpb->num_disks++;
7057 dl->disk.status |= CONFIGURED_DISK;
7058 dl->disk.status &= ~SPARE_DISK;
7059 }
7060 set_imsm_ord_tbl_ent(new_map, i, dl->index);
7061 }
7062
ba2de7ba
DW
7063 dv = update->space;
7064 dev = dv->dev;
949c47a0
DW
7065 update->space = NULL;
7066 imsm_copy_dev(dev, &u->dev);
ba2de7ba
DW
7067 dv->index = u->dev_idx;
7068 dv->next = super->devlist;
7069 super->devlist = dv;
8273f55e 7070 mpb->num_raid_devs++;
8273f55e 7071
4d1313e9 7072 imsm_update_version_info(super);
8273f55e 7073 break;
ba2de7ba
DW
7074 create_error:
7075 /* mdmon knows how to release update->space, but not
7076 * ((struct intel_dev *) update->space)->dev
7077 */
7078 if (update->space) {
7079 dv = update->space;
7080 free(dv->dev);
7081 }
8273f55e 7082 break;
e8319a19 7083 }
33414a01
DW
7084 case update_kill_array: {
7085 struct imsm_update_kill_array *u = (void *) update->buf;
7086 int victim = u->dev_idx;
7087 struct active_array *a;
7088 struct intel_dev **dp;
7089 struct imsm_dev *dev;
7090
7091 /* sanity check that we are not affecting the uuid of
7092 * active arrays, or deleting an active array
7093 *
7094 * FIXME when immutable ids are available, but note that
7095 * we'll also need to fixup the invalidated/active
7096 * subarray indexes in mdstat
7097 */
7098 for (a = st->arrays; a; a = a->next)
7099 if (a->info.container_member >= victim)
7100 break;
7101 /* by definition if mdmon is running at least one array
7102 * is active in the container, so checking
7103 * mpb->num_raid_devs is just extra paranoia
7104 */
7105 dev = get_imsm_dev(super, victim);
7106 if (a || !dev || mpb->num_raid_devs == 1) {
7107 dprintf("failed to delete subarray-%d\n", victim);
7108 break;
7109 }
7110
7111 for (dp = &super->devlist; *dp;)
f21e18ca 7112 if ((*dp)->index == (unsigned)super->current_vol) {
33414a01
DW
7113 *dp = (*dp)->next;
7114 } else {
f21e18ca 7115 if ((*dp)->index > (unsigned)victim)
33414a01
DW
7116 (*dp)->index--;
7117 dp = &(*dp)->next;
7118 }
7119 mpb->num_raid_devs--;
7120 super->updates_pending++;
7121 break;
7122 }
aa534678
DW
7123 case update_rename_array: {
7124 struct imsm_update_rename_array *u = (void *) update->buf;
7125 char name[MAX_RAID_SERIAL_LEN+1];
7126 int target = u->dev_idx;
7127 struct active_array *a;
7128 struct imsm_dev *dev;
7129
7130 /* sanity check that we are not affecting the uuid of
7131 * an active array
7132 */
7133 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
7134 name[MAX_RAID_SERIAL_LEN] = '\0';
7135 for (a = st->arrays; a; a = a->next)
7136 if (a->info.container_member == target)
7137 break;
7138 dev = get_imsm_dev(super, u->dev_idx);
7139 if (a || !dev || !check_name(super, name, 1)) {
7140 dprintf("failed to rename subarray-%d\n", target);
7141 break;
7142 }
7143
cdbe98cd 7144 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
aa534678
DW
7145 super->updates_pending++;
7146 break;
7147 }
1a64be56 7148 case update_add_remove_disk: {
43dad3d6 7149 /* we may be able to repair some arrays if disks are
1a64be56
LM
7150 * being added, check teh status of add_remove_disk
7151 * if discs has been added.
7152 */
7153 if (add_remove_disk_update(super)) {
43dad3d6 7154 struct active_array *a;
072b727f
DW
7155
7156 super->updates_pending++;
1a64be56 7157 for (a = st->arrays; a; a = a->next)
43dad3d6
DW
7158 a->check_degraded = 1;
7159 }
43dad3d6 7160 break;
e8319a19 7161 }
1a64be56
LM
7162 default:
7163 fprintf(stderr, "error: unsuported process update type:"
7164 "(type: %d)\n", type);
7165 }
e8319a19 7166}
88758e9d 7167
bc0b9d34
PC
7168static struct mdinfo *get_spares_for_grow(struct supertype *st);
7169
8273f55e
DW
7170static void imsm_prepare_update(struct supertype *st,
7171 struct metadata_update *update)
7172{
949c47a0 7173 /**
4d7b1503
DW
7174 * Allocate space to hold new disk entries, raid-device entries or a new
7175 * mpb if necessary. The manager synchronously waits for updates to
7176 * complete in the monitor, so new mpb buffers allocated here can be
7177 * integrated by the monitor thread without worrying about live pointers
7178 * in the manager thread.
8273f55e 7179 */
949c47a0 7180 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
4d7b1503
DW
7181 struct intel_super *super = st->sb;
7182 struct imsm_super *mpb = super->anchor;
7183 size_t buf_len;
7184 size_t len = 0;
949c47a0
DW
7185
7186 switch (type) {
abedf5fc
KW
7187 case update_takeover: {
7188 struct imsm_update_takeover *u = (void *)update->buf;
7189 if (u->direction == R0_TO_R10) {
7190 void **tail = (void **)&update->space_list;
7191 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
7192 struct imsm_map *map = get_imsm_map(dev, 0);
7193 int num_members = map->num_members;
7194 void *space;
7195 int size, i;
7196 int err = 0;
7197 /* allocate memory for added disks */
7198 for (i = 0; i < num_members; i++) {
7199 size = sizeof(struct dl);
7200 space = malloc(size);
7201 if (!space) {
7202 err++;
7203 break;
7204 }
7205 *tail = space;
7206 tail = space;
7207 *tail = NULL;
7208 }
7209 /* allocate memory for new device */
7210 size = sizeof_imsm_dev(super->devlist->dev, 0) +
7211 (num_members * sizeof(__u32));
7212 space = malloc(size);
7213 if (!space)
7214 err++;
7215 else {
7216 *tail = space;
7217 tail = space;
7218 *tail = NULL;
7219 }
7220 if (!err) {
7221 len = disks_to_mpb_size(num_members * 2);
7222 } else {
7223 /* if allocation didn't success, free buffer */
7224 while (update->space_list) {
7225 void **sp = update->space_list;
7226 update->space_list = *sp;
7227 free(sp);
7228 }
7229 }
7230 }
7231
7232 break;
7233 }
78b10e66 7234 case update_reshape_container_disks: {
d195167d
AK
7235 /* Every raid device in the container is about to
7236 * gain some more devices, and we will enter a
7237 * reconfiguration.
7238 * So each 'imsm_map' will be bigger, and the imsm_vol
7239 * will now hold 2 of them.
7240 * Thus we need new 'struct imsm_dev' allocations sized
7241 * as sizeof_imsm_dev but with more devices in both maps.
7242 */
7243 struct imsm_update_reshape *u = (void *)update->buf;
7244 struct intel_dev *dl;
7245 void **space_tail = (void**)&update->space_list;
7246
7247 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7248
7249 for (dl = super->devlist; dl; dl = dl->next) {
7250 int size = sizeof_imsm_dev(dl->dev, 1);
7251 void *s;
d677e0b8
AK
7252 if (u->new_raid_disks > u->old_raid_disks)
7253 size += sizeof(__u32)*2*
7254 (u->new_raid_disks - u->old_raid_disks);
d195167d
AK
7255 s = malloc(size);
7256 if (!s)
7257 break;
7258 *space_tail = s;
7259 space_tail = s;
7260 *space_tail = NULL;
7261 }
7262
7263 len = disks_to_mpb_size(u->new_raid_disks);
7264 dprintf("New anchor length is %llu\n", (unsigned long long)len);
78b10e66
N
7265 break;
7266 }
48c5303a 7267 case update_reshape_migration: {
bc0b9d34
PC
7268 /* for migration level 0->5 we need to add disks
7269 * so the same as for container operation we will copy
7270 * device to the bigger location.
7271 * in memory prepared device and new disk area are prepared
7272 * for usage in process update
7273 */
7274 struct imsm_update_reshape_migration *u = (void *)update->buf;
7275 struct intel_dev *id;
7276 void **space_tail = (void **)&update->space_list;
7277 int size;
7278 void *s;
7279 int current_level = -1;
7280
7281 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7282
7283 /* add space for bigger array in update
7284 */
7285 for (id = super->devlist; id; id = id->next) {
7286 if (id->index == (unsigned)u->subdev) {
7287 size = sizeof_imsm_dev(id->dev, 1);
7288 if (u->new_raid_disks > u->old_raid_disks)
7289 size += sizeof(__u32)*2*
7290 (u->new_raid_disks - u->old_raid_disks);
7291 s = malloc(size);
7292 if (!s)
7293 break;
7294 *space_tail = s;
7295 space_tail = s;
7296 *space_tail = NULL;
7297 break;
7298 }
7299 }
7300 if (update->space_list == NULL)
7301 break;
7302
7303 /* add space for disk in update
7304 */
7305 size = sizeof(struct dl);
7306 s = malloc(size);
7307 if (!s) {
7308 free(update->space_list);
7309 update->space_list = NULL;
7310 break;
7311 }
7312 *space_tail = s;
7313 space_tail = s;
7314 *space_tail = NULL;
7315
7316 /* add spare device to update
7317 */
7318 for (id = super->devlist ; id; id = id->next)
7319 if (id->index == (unsigned)u->subdev) {
7320 struct imsm_dev *dev;
7321 struct imsm_map *map;
7322
7323 dev = get_imsm_dev(super, u->subdev);
7324 map = get_imsm_map(dev, 0);
7325 current_level = map->raid_level;
7326 break;
7327 }
7328 if ((u->new_level == 5) && (u->new_level != current_level)) {
7329 struct mdinfo *spares;
7330
7331 spares = get_spares_for_grow(st);
7332 if (spares) {
7333 struct dl *dl;
7334 struct mdinfo *dev;
7335
7336 dev = spares->devs;
7337 if (dev) {
7338 u->new_disks[0] =
7339 makedev(dev->disk.major,
7340 dev->disk.minor);
7341 dl = get_disk_super(super,
7342 dev->disk.major,
7343 dev->disk.minor);
7344 dl->index = u->old_raid_disks;
7345 dev = dev->next;
7346 }
7347 sysfs_free(spares);
7348 }
7349 }
7350 len = disks_to_mpb_size(u->new_raid_disks);
7351 dprintf("New anchor length is %llu\n", (unsigned long long)len);
48c5303a
PC
7352 break;
7353 }
949c47a0
DW
7354 case update_create_array: {
7355 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 7356 struct intel_dev *dv;
54c2c1ea
DW
7357 struct imsm_dev *dev = &u->dev;
7358 struct imsm_map *map = get_imsm_map(dev, 0);
7359 struct dl *dl;
7360 struct disk_info *inf;
7361 int i;
7362 int activate = 0;
949c47a0 7363
54c2c1ea
DW
7364 inf = get_disk_info(u);
7365 len = sizeof_imsm_dev(dev, 1);
ba2de7ba
DW
7366 /* allocate a new super->devlist entry */
7367 dv = malloc(sizeof(*dv));
7368 if (dv) {
7369 dv->dev = malloc(len);
7370 if (dv->dev)
7371 update->space = dv;
7372 else {
7373 free(dv);
7374 update->space = NULL;
7375 }
7376 }
949c47a0 7377
54c2c1ea
DW
7378 /* count how many spares will be converted to members */
7379 for (i = 0; i < map->num_members; i++) {
7380 dl = serial_to_dl(inf[i].serial, super);
7381 if (!dl) {
7382 /* hmm maybe it failed?, nothing we can do about
7383 * it here
7384 */
7385 continue;
7386 }
7387 if (count_memberships(dl, super) == 0)
7388 activate++;
7389 }
7390 len += activate * sizeof(struct imsm_disk);
949c47a0
DW
7391 break;
7392 default:
7393 break;
7394 }
7395 }
8273f55e 7396
4d7b1503
DW
7397 /* check if we need a larger metadata buffer */
7398 if (super->next_buf)
7399 buf_len = super->next_len;
7400 else
7401 buf_len = super->len;
7402
7403 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
7404 /* ok we need a larger buf than what is currently allocated
7405 * if this allocation fails process_update will notice that
7406 * ->next_len is set and ->next_buf is NULL
7407 */
7408 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
7409 if (super->next_buf)
7410 free(super->next_buf);
7411
7412 super->next_len = buf_len;
1f45a8ad
DW
7413 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
7414 memset(super->next_buf, 0, buf_len);
7415 else
4d7b1503
DW
7416 super->next_buf = NULL;
7417 }
8273f55e
DW
7418}
7419
ae6aad82 7420/* must be called while manager is quiesced */
f21e18ca 7421static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
ae6aad82
DW
7422{
7423 struct imsm_super *mpb = super->anchor;
ae6aad82
DW
7424 struct dl *iter;
7425 struct imsm_dev *dev;
7426 struct imsm_map *map;
24565c9a
DW
7427 int i, j, num_members;
7428 __u32 ord;
ae6aad82 7429
24565c9a
DW
7430 dprintf("%s: deleting device[%d] from imsm_super\n",
7431 __func__, index);
ae6aad82
DW
7432
7433 /* shift all indexes down one */
7434 for (iter = super->disks; iter; iter = iter->next)
f21e18ca 7435 if (iter->index > (int)index)
ae6aad82 7436 iter->index--;
47ee5a45 7437 for (iter = super->missing; iter; iter = iter->next)
f21e18ca 7438 if (iter->index > (int)index)
47ee5a45 7439 iter->index--;
ae6aad82
DW
7440
7441 for (i = 0; i < mpb->num_raid_devs; i++) {
7442 dev = get_imsm_dev(super, i);
7443 map = get_imsm_map(dev, 0);
24565c9a
DW
7444 num_members = map->num_members;
7445 for (j = 0; j < num_members; j++) {
7446 /* update ord entries being careful not to propagate
7447 * ord-flags to the first map
7448 */
98130f40 7449 ord = get_imsm_ord_tbl_ent(dev, j, -1);
ae6aad82 7450
24565c9a
DW
7451 if (ord_to_idx(ord) <= index)
7452 continue;
ae6aad82 7453
24565c9a
DW
7454 map = get_imsm_map(dev, 0);
7455 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
7456 map = get_imsm_map(dev, 1);
7457 if (map)
7458 set_imsm_ord_tbl_ent(map, j, ord - 1);
ae6aad82
DW
7459 }
7460 }
7461
7462 mpb->num_disks--;
7463 super->updates_pending++;
24565c9a
DW
7464 if (*dlp) {
7465 struct dl *dl = *dlp;
7466
7467 *dlp = (*dlp)->next;
7468 __free_imsm_disk(dl);
7469 }
ae6aad82
DW
7470}
7471
687629c2
AK
7472/*******************************************************************************
7473 * Function: open_backup_targets
7474 * Description: Function opens file descriptors for all devices given in
7475 * info->devs
7476 * Parameters:
7477 * info : general array info
7478 * raid_disks : number of disks
7479 * raid_fds : table of device's file descriptors
7480 * Returns:
7481 * 0 : success
7482 * -1 : fail
7483 ******************************************************************************/
7484int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds)
7485{
7486 struct mdinfo *sd;
7487
7488 for (sd = info->devs ; sd ; sd = sd->next) {
7489 char *dn;
7490
7491 if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
7492 dprintf("disk is faulty!!\n");
7493 continue;
7494 }
7495
7496 if ((sd->disk.raid_disk >= raid_disks) ||
7497 (sd->disk.raid_disk < 0))
7498 continue;
7499
7500 dn = map_dev(sd->disk.major,
7501 sd->disk.minor, 1);
7502 raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
7503 if (raid_fds[sd->disk.raid_disk] < 0) {
7504 fprintf(stderr, "cannot open component\n");
7505 return -1;
7506 }
7507 }
7508 return 0;
7509}
7510
7511/*******************************************************************************
7512 * Function: init_migr_record_imsm
7513 * Description: Function inits imsm migration record
7514 * Parameters:
7515 * super : imsm internal array info
7516 * dev : device under migration
7517 * info : general array info to find the smallest device
7518 * Returns:
7519 * none
7520 ******************************************************************************/
7521void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
7522 struct mdinfo *info)
7523{
7524 struct intel_super *super = st->sb;
7525 struct migr_record *migr_rec = super->migr_rec;
7526 int new_data_disks;
7527 unsigned long long dsize, dev_sectors;
7528 long long unsigned min_dev_sectors = -1LLU;
7529 struct mdinfo *sd;
7530 char nm[30];
7531 int fd;
7532 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7533 struct imsm_map *map_src = get_imsm_map(dev, 1);
7534 unsigned long long num_migr_units;
7535
7536 unsigned long long array_blocks =
7537 (((unsigned long long)__le32_to_cpu(dev->size_high)) << 32) +
7538 __le32_to_cpu(dev->size_low);
7539
7540 memset(migr_rec, 0, sizeof(struct migr_record));
7541 migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
7542
7543 /* only ascending reshape supported now */
7544 migr_rec->ascending_migr = __cpu_to_le32(1);
7545
7546 migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
7547 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
7548 migr_rec->dest_depth_per_unit *= map_dest->blocks_per_strip;
7549 new_data_disks = imsm_num_data_members(dev, 0);
7550 migr_rec->blocks_per_unit =
7551 __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
7552 migr_rec->dest_depth_per_unit =
7553 __cpu_to_le32(migr_rec->dest_depth_per_unit);
7554
7555 num_migr_units =
7556 array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
7557
7558 if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
7559 num_migr_units++;
7560 migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
7561
7562 migr_rec->post_migr_vol_cap = dev->size_low;
7563 migr_rec->post_migr_vol_cap_hi = dev->size_high;
7564
7565
7566 /* Find the smallest dev */
7567 for (sd = info->devs ; sd ; sd = sd->next) {
7568 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7569 fd = dev_open(nm, O_RDONLY);
7570 if (fd < 0)
7571 continue;
7572 get_dev_size(fd, NULL, &dsize);
7573 dev_sectors = dsize / 512;
7574 if (dev_sectors < min_dev_sectors)
7575 min_dev_sectors = dev_sectors;
7576 close(fd);
7577 }
7578 migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
7579 RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
7580
7581 write_imsm_migr_rec(st);
7582
7583 return;
7584}
7585
7586/*******************************************************************************
7587 * Function: save_backup_imsm
7588 * Description: Function saves critical data stripes to Migration Copy Area
7589 * and updates the current migration unit status.
7590 * Use restore_stripes() to form a destination stripe,
7591 * and to write it to the Copy Area.
7592 * Parameters:
7593 * st : supertype information
7594 * info : general array info
7595 * buf : input buffer
7596 * write_offset : address of data to backup
7597 * length : length of data to backup (blocks_per_unit)
7598 * Returns:
7599 * 0 : success
7600 *, -1 : fail
7601 ******************************************************************************/
7602int save_backup_imsm(struct supertype *st,
7603 struct imsm_dev *dev,
7604 struct mdinfo *info,
7605 void *buf,
7606 int new_data,
7607 int length)
7608{
7609 int rv = -1;
7610 struct intel_super *super = st->sb;
7611 unsigned long long *target_offsets = NULL;
7612 int *targets = NULL;
7613 int i;
7614 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7615 int new_disks = map_dest->num_members;
7616
7617 targets = malloc(new_disks * sizeof(int));
7618 if (!targets)
7619 goto abort;
7620
7621 target_offsets = malloc(new_disks * sizeof(unsigned long long));
7622 if (!target_offsets)
7623 goto abort;
7624
7625 for (i = 0; i < new_disks; i++) {
7626 targets[i] = -1;
7627 target_offsets[i] = (unsigned long long)
7628 __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
7629 }
7630
7631 if (open_backup_targets(info, new_disks, targets))
7632 goto abort;
7633
7634 if (restore_stripes(targets, /* list of dest devices */
7635 target_offsets, /* migration record offsets */
7636 new_disks,
7637 info->new_chunk,
7638 info->new_level,
7639 info->new_layout,
7640 -1, /* source backup file descriptor */
7641 0, /* input buf offset
7642 * always 0 buf is already offset */
7643 0,
7644 length,
7645 buf) != 0) {
7646 fprintf(stderr, Name ": Error restoring stripes\n");
7647 goto abort;
7648 }
7649
7650 rv = 0;
7651
7652abort:
7653 if (targets) {
7654 for (i = 0; i < new_disks; i++)
7655 if (targets[i] >= 0)
7656 close(targets[i]);
7657 free(targets);
7658 }
7659 free(target_offsets);
7660
7661 return rv;
7662}
7663
7664/*******************************************************************************
7665 * Function: save_checkpoint_imsm
7666 * Description: Function called for current unit status update
7667 * in the migration record. It writes it to disk.
7668 * Parameters:
7669 * super : imsm internal array info
7670 * info : general array info
7671 * Returns:
7672 * 0: success
7673 * 1: failure
7674 ******************************************************************************/
7675int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
7676{
7677 struct intel_super *super = st->sb;
7678 load_imsm_migr_rec(super, info);
7679 if (__le32_to_cpu(super->migr_rec->blocks_per_unit) == 0) {
7680 dprintf("ERROR: blocks_per_unit = 0!!!\n");
7681 return 1;
7682 }
7683
7684 super->migr_rec->curr_migr_unit =
7685 __cpu_to_le32(info->reshape_progress /
7686 __le32_to_cpu(super->migr_rec->blocks_per_unit));
7687 super->migr_rec->rec_status = __cpu_to_le32(state);
7688 super->migr_rec->dest_1st_member_lba =
7689 __cpu_to_le32((__le32_to_cpu(super->migr_rec->curr_migr_unit))
7690 * __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
7691 if (write_imsm_migr_rec(st) < 0) {
7692 dprintf("imsm: Cannot write migration record "
7693 "outside backup area\n");
7694 return 1;
7695 }
7696
7697 return 0;
7698}
7699
276d77db
AK
7700static __u64 blocks_per_migr_unit(struct intel_super *super,
7701 struct imsm_dev *dev);
7702
7703/*******************************************************************************
7704 * Function: recover_backup_imsm
7705 * Description: Function recovers critical data from the Migration Copy Area
7706 * while assembling an array.
7707 * Parameters:
7708 * super : imsm internal array info
7709 * info : general array info
7710 * Returns:
7711 * 0 : success (or there is no data to recover)
7712 * 1 : fail
7713 ******************************************************************************/
7714int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
7715{
7716 struct intel_super *super = st->sb;
7717 struct migr_record *migr_rec = super->migr_rec;
7718 struct imsm_map *map_dest = NULL;
7719 struct intel_dev *id = NULL;
7720 unsigned long long read_offset;
7721 unsigned long long write_offset;
7722 unsigned unit_len;
7723 int *targets = NULL;
7724 int new_disks, i, err;
7725 char *buf = NULL;
7726 int retval = 1;
7727 unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
7728 unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
7729 int ascending = __le32_to_cpu(migr_rec->ascending_migr);
7730 char buffer[20];
7731
7732 err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
7733 if (err < 1)
7734 return 1;
7735
7736 /* recover data only during assemblation */
7737 if (strncmp(buffer, "inactive", 8) != 0)
7738 return 0;
7739 /* no data to recover */
7740 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
7741 return 0;
7742 if (curr_migr_unit >= num_migr_units)
7743 return 1;
7744
7745 /* find device during reshape */
7746 for (id = super->devlist; id; id = id->next)
7747 if (is_gen_migration(id->dev))
7748 break;
7749 if (id == NULL)
7750 return 1;
7751
7752 map_dest = get_imsm_map(id->dev, 0);
7753 new_disks = map_dest->num_members;
7754
7755 read_offset = (unsigned long long)
7756 __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
7757
7758 write_offset = ((unsigned long long)
7759 __le32_to_cpu(migr_rec->dest_1st_member_lba) +
7760 info->data_offset) * 512;
7761
7762 unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
7763 if (posix_memalign((void **)&buf, 512, unit_len) != 0)
7764 goto abort;
7765 targets = malloc(new_disks * sizeof(int));
7766 if (!targets)
7767 goto abort;
7768
7769 open_backup_targets(info, new_disks, targets);
7770
7771 for (i = 0; i < new_disks; i++) {
7772 if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
7773 fprintf(stderr,
7774 Name ": Cannot seek to block: %s\n",
7775 strerror(errno));
7776 goto abort;
7777 }
7778 if (read(targets[i], buf, unit_len) != unit_len) {
7779 fprintf(stderr,
7780 Name ": Cannot read copy area block: %s\n",
7781 strerror(errno));
7782 goto abort;
7783 }
7784 if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
7785 fprintf(stderr,
7786 Name ": Cannot seek to block: %s\n",
7787 strerror(errno));
7788 goto abort;
7789 }
7790 if (write(targets[i], buf, unit_len) != unit_len) {
7791 fprintf(stderr,
7792 Name ": Cannot restore block: %s\n",
7793 strerror(errno));
7794 goto abort;
7795 }
7796 }
7797
7798 if (ascending && curr_migr_unit < (num_migr_units-1))
7799 curr_migr_unit++;
7800
7801 migr_rec->curr_migr_unit = __le32_to_cpu(curr_migr_unit);
7802 super->migr_rec->rec_status = __cpu_to_le32(UNIT_SRC_NORMAL);
7803 if (write_imsm_migr_rec(st) == 0) {
7804 __u64 blocks_per_unit = blocks_per_migr_unit(super, id->dev);
7805 info->reshape_progress = curr_migr_unit * blocks_per_unit;
7806 retval = 0;
7807 }
7808
7809abort:
7810 if (targets) {
7811 for (i = 0; i < new_disks; i++)
7812 if (targets[i])
7813 close(targets[i]);
7814 free(targets);
7815 }
7816 free(buf);
7817 return retval;
7818}
7819
2cda7640
ML
7820static char disk_by_path[] = "/dev/disk/by-path/";
7821
7822static const char *imsm_get_disk_controller_domain(const char *path)
7823{
2cda7640 7824 char disk_path[PATH_MAX];
96234762
LM
7825 char *drv=NULL;
7826 struct stat st;
2cda7640 7827
96234762
LM
7828 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
7829 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
7830 if (stat(disk_path, &st) == 0) {
7831 struct sys_dev* hba;
7832 char *path=NULL;
7833
7834 path = devt_to_devpath(st.st_rdev);
7835 if (path == NULL)
7836 return "unknown";
7837 hba = find_disk_attached_hba(-1, path);
7838 if (hba && hba->type == SYS_DEV_SAS)
7839 drv = "isci";
7840 else if (hba && hba->type == SYS_DEV_SATA)
7841 drv = "ahci";
7842 else
7843 drv = "unknown";
7844 dprintf("path: %s hba: %s attached: %s\n",
7845 path, (hba) ? hba->path : "NULL", drv);
7846 free(path);
7847 if (hba)
7848 free_sys_dev(&hba);
2cda7640 7849 }
96234762 7850 return drv;
2cda7640
ML
7851}
7852
78b10e66
N
7853static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
7854{
7855 char subdev_name[20];
7856 struct mdstat_ent *mdstat;
7857
7858 sprintf(subdev_name, "%d", subdev);
7859 mdstat = mdstat_by_subdev(subdev_name, container);
7860 if (!mdstat)
7861 return -1;
7862
7863 *minor = mdstat->devnum;
7864 free_mdstat(mdstat);
7865 return 0;
7866}
7867
7868static int imsm_reshape_is_allowed_on_container(struct supertype *st,
7869 struct geo_params *geo,
7870 int *old_raid_disks)
7871{
694575e7
KW
7872 /* currently we only support increasing the number of devices
7873 * for a container. This increases the number of device for each
7874 * member array. They must all be RAID0 or RAID5.
7875 */
78b10e66
N
7876 int ret_val = 0;
7877 struct mdinfo *info, *member;
7878 int devices_that_can_grow = 0;
7879
7880 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7881 "st->devnum = (%i)\n",
7882 st->devnum);
7883
7884 if (geo->size != -1 ||
7885 geo->level != UnSet ||
7886 geo->layout != UnSet ||
7887 geo->chunksize != 0 ||
7888 geo->raid_disks == UnSet) {
7889 dprintf("imsm: Container operation is allowed for "
7890 "raid disks number change only.\n");
7891 return ret_val;
7892 }
7893
7894 info = container_content_imsm(st, NULL);
7895 for (member = info; member; member = member->next) {
7896 int result;
7897 int minor;
7898
7899 dprintf("imsm: checking device_num: %i\n",
7900 member->container_member);
7901
d7d205bd 7902 if (geo->raid_disks <= member->array.raid_disks) {
78b10e66
N
7903 /* we work on container for Online Capacity Expansion
7904 * only so raid_disks has to grow
7905 */
7906 dprintf("imsm: for container operation raid disks "
7907 "increase is required\n");
7908 break;
7909 }
7910
7911 if ((info->array.level != 0) &&
7912 (info->array.level != 5)) {
7913 /* we cannot use this container with other raid level
7914 */
690aae1a 7915 dprintf("imsm: for container operation wrong"
78b10e66
N
7916 " raid level (%i) detected\n",
7917 info->array.level);
7918 break;
7919 } else {
7920 /* check for platform support
7921 * for this raid level configuration
7922 */
7923 struct intel_super *super = st->sb;
7924 if (!is_raid_level_supported(super->orom,
7925 member->array.level,
7926 geo->raid_disks)) {
690aae1a 7927 dprintf("platform does not support raid%d with"
78b10e66
N
7928 " %d disk%s\n",
7929 info->array.level,
7930 geo->raid_disks,
7931 geo->raid_disks > 1 ? "s" : "");
7932 break;
7933 }
2a4a08e7
AK
7934 /* check if component size is aligned to chunk size
7935 */
7936 if (info->component_size %
7937 (info->array.chunk_size/512)) {
7938 dprintf("Component size is not aligned to "
7939 "chunk size\n");
7940 break;
7941 }
78b10e66
N
7942 }
7943
7944 if (*old_raid_disks &&
7945 info->array.raid_disks != *old_raid_disks)
7946 break;
7947 *old_raid_disks = info->array.raid_disks;
7948
7949 /* All raid5 and raid0 volumes in container
7950 * have to be ready for Online Capacity Expansion
7951 * so they need to be assembled. We have already
7952 * checked that no recovery etc is happening.
7953 */
7954 result = imsm_find_array_minor_by_subdev(member->container_member,
7955 st->container_dev,
7956 &minor);
7957 if (result < 0) {
7958 dprintf("imsm: cannot find array\n");
7959 break;
7960 }
7961 devices_that_can_grow++;
7962 }
7963 sysfs_free(info);
7964 if (!member && devices_that_can_grow)
7965 ret_val = 1;
7966
7967 if (ret_val)
7968 dprintf("\tContainer operation allowed\n");
7969 else
7970 dprintf("\tError: %i\n", ret_val);
7971
7972 return ret_val;
7973}
7974
7975/* Function: get_spares_for_grow
7976 * Description: Allocates memory and creates list of spare devices
7977 * avaliable in container. Checks if spare drive size is acceptable.
7978 * Parameters: Pointer to the supertype structure
7979 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7980 * NULL if fail
7981 */
7982static struct mdinfo *get_spares_for_grow(struct supertype *st)
7983{
78b10e66 7984 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
326727d9 7985 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
78b10e66
N
7986}
7987
7988/******************************************************************************
7989 * function: imsm_create_metadata_update_for_reshape
7990 * Function creates update for whole IMSM container.
7991 *
7992 ******************************************************************************/
7993static int imsm_create_metadata_update_for_reshape(
7994 struct supertype *st,
7995 struct geo_params *geo,
7996 int old_raid_disks,
7997 struct imsm_update_reshape **updatep)
7998{
7999 struct intel_super *super = st->sb;
8000 struct imsm_super *mpb = super->anchor;
8001 int update_memory_size = 0;
8002 struct imsm_update_reshape *u = NULL;
8003 struct mdinfo *spares = NULL;
8004 int i;
8005 int delta_disks = 0;
bbd24d86 8006 struct mdinfo *dev;
78b10e66
N
8007
8008 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
8009 geo->raid_disks);
8010
8011 delta_disks = geo->raid_disks - old_raid_disks;
8012
8013 /* size of all update data without anchor */
8014 update_memory_size = sizeof(struct imsm_update_reshape);
8015
8016 /* now add space for spare disks that we need to add. */
8017 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
8018
8019 u = calloc(1, update_memory_size);
8020 if (u == NULL) {
8021 dprintf("error: "
8022 "cannot get memory for imsm_update_reshape update\n");
8023 return 0;
8024 }
8025 u->type = update_reshape_container_disks;
8026 u->old_raid_disks = old_raid_disks;
8027 u->new_raid_disks = geo->raid_disks;
8028
8029 /* now get spare disks list
8030 */
8031 spares = get_spares_for_grow(st);
8032
8033 if (spares == NULL
8034 || delta_disks > spares->array.spare_disks) {
e14e5960
KW
8035 fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices "
8036 "for %s.\n", geo->dev_name);
78b10e66
N
8037 goto abort;
8038 }
8039
8040 /* we have got spares
8041 * update disk list in imsm_disk list table in anchor
8042 */
8043 dprintf("imsm: %i spares are available.\n\n",
8044 spares->array.spare_disks);
8045
bbd24d86 8046 dev = spares->devs;
78b10e66 8047 for (i = 0; i < delta_disks; i++) {
78b10e66
N
8048 struct dl *dl;
8049
bbd24d86
AK
8050 if (dev == NULL)
8051 break;
78b10e66
N
8052 u->new_disks[i] = makedev(dev->disk.major,
8053 dev->disk.minor);
8054 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
ee4beede
AK
8055 dl->index = mpb->num_disks;
8056 mpb->num_disks++;
bbd24d86 8057 dev = dev->next;
78b10e66 8058 }
78b10e66
N
8059
8060abort:
8061 /* free spares
8062 */
8063 sysfs_free(spares);
8064
d677e0b8 8065 dprintf("imsm: reshape update preparation :");
78b10e66 8066 if (i == delta_disks) {
d677e0b8 8067 dprintf(" OK\n");
78b10e66
N
8068 *updatep = u;
8069 return update_memory_size;
8070 }
8071 free(u);
d677e0b8 8072 dprintf(" Error\n");
78b10e66
N
8073
8074 return 0;
8075}
8076
48c5303a
PC
8077/******************************************************************************
8078 * function: imsm_create_metadata_update_for_migration()
8079 * Creates update for IMSM array.
8080 *
8081 ******************************************************************************/
8082static int imsm_create_metadata_update_for_migration(
8083 struct supertype *st,
8084 struct geo_params *geo,
8085 struct imsm_update_reshape_migration **updatep)
8086{
8087 struct intel_super *super = st->sb;
8088 int update_memory_size = 0;
8089 struct imsm_update_reshape_migration *u = NULL;
8090 struct imsm_dev *dev;
8091 int previous_level = -1;
8092
8093 dprintf("imsm_create_metadata_update_for_migration(enter)"
8094 " New Level = %i\n", geo->level);
8095
8096 /* size of all update data without anchor */
8097 update_memory_size = sizeof(struct imsm_update_reshape_migration);
8098
8099 u = calloc(1, update_memory_size);
8100 if (u == NULL) {
8101 dprintf("error: cannot get memory for "
8102 "imsm_create_metadata_update_for_migration\n");
8103 return 0;
8104 }
8105 u->type = update_reshape_migration;
8106 u->subdev = super->current_vol;
8107 u->new_level = geo->level;
8108 u->new_layout = geo->layout;
8109 u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
8110 u->new_disks[0] = -1;
4bba0439 8111 u->new_chunksize = -1;
48c5303a
PC
8112
8113 dev = get_imsm_dev(super, u->subdev);
8114 if (dev) {
8115 struct imsm_map *map;
8116
8117 map = get_imsm_map(dev, 0);
4bba0439
PC
8118 if (map) {
8119 int current_chunk_size =
8120 __le16_to_cpu(map->blocks_per_strip) / 2;
8121
8122 if (geo->chunksize != current_chunk_size) {
8123 u->new_chunksize = geo->chunksize / 1024;
8124 dprintf("imsm: "
8125 "chunk size change from %i to %i\n",
8126 current_chunk_size, u->new_chunksize);
8127 }
48c5303a 8128 previous_level = map->raid_level;
4bba0439 8129 }
48c5303a
PC
8130 }
8131 if ((geo->level == 5) && (previous_level == 0)) {
8132 struct mdinfo *spares = NULL;
8133
8134 u->new_raid_disks++;
8135 spares = get_spares_for_grow(st);
8136 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
8137 free(u);
8138 sysfs_free(spares);
8139 update_memory_size = 0;
8140 dprintf("error: cannot get spare device "
8141 "for requested migration");
8142 return 0;
8143 }
8144 sysfs_free(spares);
8145 }
8146 dprintf("imsm: reshape update preparation : OK\n");
8147 *updatep = u;
8148
8149 return update_memory_size;
8150}
8151
8dd70bce
AK
8152static void imsm_update_metadata_locally(struct supertype *st,
8153 void *buf, int len)
8154{
8155 struct metadata_update mu;
8156
8157 mu.buf = buf;
8158 mu.len = len;
8159 mu.space = NULL;
8160 mu.space_list = NULL;
8161 mu.next = NULL;
8162 imsm_prepare_update(st, &mu);
8163 imsm_process_update(st, &mu);
8164
8165 while (mu.space_list) {
8166 void **space = mu.space_list;
8167 mu.space_list = *space;
8168 free(space);
8169 }
8170}
78b10e66 8171
471bceb6 8172/***************************************************************************
694575e7 8173* Function: imsm_analyze_change
471bceb6
KW
8174* Description: Function analyze change for single volume
8175* and validate if transition is supported
694575e7
KW
8176* Parameters: Geometry parameters, supertype structure
8177* Returns: Operation type code on success, -1 if fail
471bceb6
KW
8178****************************************************************************/
8179enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
8180 struct geo_params *geo)
694575e7 8181{
471bceb6
KW
8182 struct mdinfo info;
8183 int change = -1;
8184 int check_devs = 0;
c21e737b 8185 int chunk;
471bceb6
KW
8186
8187 getinfo_super_imsm_volume(st, &info, NULL);
8188
8189 if ((geo->level != info.array.level) &&
8190 (geo->level >= 0) &&
8191 (geo->level != UnSet)) {
8192 switch (info.array.level) {
8193 case 0:
8194 if (geo->level == 5) {
b5347799 8195 change = CH_MIGRATION;
471bceb6
KW
8196 check_devs = 1;
8197 }
8198 if (geo->level == 10) {
8199 change = CH_TAKEOVER;
8200 check_devs = 1;
8201 }
dfe77a9e
KW
8202 break;
8203 case 1:
8204 if (geo->level == 0) {
8205 change = CH_TAKEOVER;
8206 check_devs = 1;
8207 }
471bceb6 8208 break;
471bceb6
KW
8209 case 10:
8210 if (geo->level == 0) {
8211 change = CH_TAKEOVER;
8212 check_devs = 1;
8213 }
8214 break;
8215 }
8216 if (change == -1) {
8217 fprintf(stderr,
8218 Name " Error. Level Migration from %d to %d "
8219 "not supported!\n",
8220 info.array.level, geo->level);
8221 goto analyse_change_exit;
8222 }
8223 } else
8224 geo->level = info.array.level;
8225
8226 if ((geo->layout != info.array.layout)
8227 && ((geo->layout != UnSet) && (geo->layout != -1))) {
b5347799 8228 change = CH_MIGRATION;
471bceb6
KW
8229 if ((info.array.layout == 0)
8230 && (info.array.level == 5)
8231 && (geo->layout == 5)) {
8232 /* reshape 5 -> 4 */
8233 } else if ((info.array.layout == 5)
8234 && (info.array.level == 5)
8235 && (geo->layout == 0)) {
8236 /* reshape 4 -> 5 */
8237 geo->layout = 0;
8238 geo->level = 5;
8239 } else {
8240 fprintf(stderr,
8241 Name " Error. Layout Migration from %d to %d "
8242 "not supported!\n",
8243 info.array.layout, geo->layout);
8244 change = -1;
8245 goto analyse_change_exit;
8246 }
8247 } else
8248 geo->layout = info.array.layout;
8249
8250 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
8251 && (geo->chunksize != info.array.chunk_size))
b5347799 8252 change = CH_MIGRATION;
471bceb6
KW
8253 else
8254 geo->chunksize = info.array.chunk_size;
8255
c21e737b 8256 chunk = geo->chunksize / 1024;
471bceb6
KW
8257 if (!validate_geometry_imsm(st,
8258 geo->level,
8259 geo->layout,
8260 geo->raid_disks,
c21e737b 8261 &chunk,
471bceb6
KW
8262 geo->size,
8263 0, 0, 1))
8264 change = -1;
8265
8266 if (check_devs) {
8267 struct intel_super *super = st->sb;
8268 struct imsm_super *mpb = super->anchor;
8269
8270 if (mpb->num_raid_devs > 1) {
8271 fprintf(stderr,
8272 Name " Error. Cannot perform operation on %s"
8273 "- for this operation it MUST be single "
8274 "array in container\n",
8275 geo->dev_name);
8276 change = -1;
8277 }
8278 }
8279
8280analyse_change_exit:
8281
8282 return change;
694575e7
KW
8283}
8284
bb025c2f
KW
8285int imsm_takeover(struct supertype *st, struct geo_params *geo)
8286{
8287 struct intel_super *super = st->sb;
8288 struct imsm_update_takeover *u;
8289
8290 u = malloc(sizeof(struct imsm_update_takeover));
8291 if (u == NULL)
8292 return 1;
8293
8294 u->type = update_takeover;
8295 u->subarray = super->current_vol;
8296
8297 /* 10->0 transition */
8298 if (geo->level == 0)
8299 u->direction = R10_TO_R0;
8300
0529c688
KW
8301 /* 0->10 transition */
8302 if (geo->level == 10)
8303 u->direction = R0_TO_R10;
8304
bb025c2f
KW
8305 /* update metadata locally */
8306 imsm_update_metadata_locally(st, u,
8307 sizeof(struct imsm_update_takeover));
8308 /* and possibly remotely */
8309 if (st->update_tail)
8310 append_metadata_update(st, u,
8311 sizeof(struct imsm_update_takeover));
8312 else
8313 free(u);
8314
8315 return 0;
8316}
8317
6dc0be30
AK
8318static int warn_user_about_risk(void)
8319{
8320 int rv = 0;
8321
8322 fprintf(stderr,
8323 "\nThis is an experimental feature. Data on the RAID volume(s) "
8324 "can be lost!!!\n\n"
8325 "To continue command execution please make sure that\n"
8326 "the grow process will not be interrupted. Use safe power\n"
8327 "supply to avoid unexpected system reboot. Make sure that\n"
8328 "reshaped container is not assembled automatically during\n"
8329 "system boot.\n"
8330 "If reshape is interrupted, assemble array manually\n"
8331 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
8332 "Assembly in scan mode is not possible in such case.\n"
8333 "Growing container with boot array is not possible.\n"
8334 "If boot array reshape is interrupted, whole file system\n"
8335 "can be lost.\n\n");
8336 rv = ask("Do you want to continue? ");
8337 fprintf(stderr, "\n");
8338
8339 return rv;
8340}
8341
78b10e66
N
8342static int imsm_reshape_super(struct supertype *st, long long size, int level,
8343 int layout, int chunksize, int raid_disks,
41784c88
AK
8344 int delta_disks, char *backup, char *dev,
8345 int verbose)
78b10e66 8346{
78b10e66
N
8347 int ret_val = 1;
8348 struct geo_params geo;
8349
8350 dprintf("imsm: reshape_super called.\n");
8351
71204a50 8352 memset(&geo, 0, sizeof(struct geo_params));
78b10e66
N
8353
8354 geo.dev_name = dev;
694575e7 8355 geo.dev_id = st->devnum;
78b10e66
N
8356 geo.size = size;
8357 geo.level = level;
8358 geo.layout = layout;
8359 geo.chunksize = chunksize;
8360 geo.raid_disks = raid_disks;
41784c88
AK
8361 if (delta_disks != UnSet)
8362 geo.raid_disks += delta_disks;
78b10e66
N
8363
8364 dprintf("\tfor level : %i\n", geo.level);
8365 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
8366
8367 if (experimental() == 0)
8368 return ret_val;
8369
78b10e66 8370 if (st->container_dev == st->devnum) {
694575e7
KW
8371 /* On container level we can only increase number of devices. */
8372 dprintf("imsm: info: Container operation\n");
78b10e66 8373 int old_raid_disks = 0;
6dc0be30
AK
8374
8375 /* this warning will be removed when imsm checkpointing
8376 * will be implemented, and restoring from check-point
8377 * operation will be transparent for reboot process
8378 */
8379 if (warn_user_about_risk() == 0)
8380 return ret_val;
8381
78b10e66
N
8382 if (imsm_reshape_is_allowed_on_container(
8383 st, &geo, &old_raid_disks)) {
8384 struct imsm_update_reshape *u = NULL;
8385 int len;
8386
8387 len = imsm_create_metadata_update_for_reshape(
8388 st, &geo, old_raid_disks, &u);
8389
ed08d51c
AK
8390 if (len <= 0) {
8391 dprintf("imsm: Cannot prepare update\n");
8392 goto exit_imsm_reshape_super;
8393 }
8394
8dd70bce
AK
8395 ret_val = 0;
8396 /* update metadata locally */
8397 imsm_update_metadata_locally(st, u, len);
8398 /* and possibly remotely */
8399 if (st->update_tail)
8400 append_metadata_update(st, u, len);
8401 else
ed08d51c 8402 free(u);
8dd70bce 8403
694575e7 8404 } else {
e7ff7e40
AK
8405 fprintf(stderr, Name ": (imsm) Operation "
8406 "is not allowed on this container\n");
694575e7
KW
8407 }
8408 } else {
8409 /* On volume level we support following operations
471bceb6
KW
8410 * - takeover: raid10 -> raid0; raid0 -> raid10
8411 * - chunk size migration
8412 * - migration: raid5 -> raid0; raid0 -> raid5
8413 */
8414 struct intel_super *super = st->sb;
8415 struct intel_dev *dev = super->devlist;
8416 int change, devnum;
694575e7 8417 dprintf("imsm: info: Volume operation\n");
471bceb6
KW
8418 /* find requested device */
8419 while (dev) {
8420 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
8421 if (devnum == geo.dev_id)
8422 break;
8423 dev = dev->next;
8424 }
8425 if (dev == NULL) {
8426 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
8427 geo.dev_name, geo.dev_id);
8428 goto exit_imsm_reshape_super;
8429 }
8430 super->current_vol = dev->index;
694575e7
KW
8431 change = imsm_analyze_change(st, &geo);
8432 switch (change) {
471bceb6 8433 case CH_TAKEOVER:
bb025c2f 8434 ret_val = imsm_takeover(st, &geo);
694575e7 8435 break;
48c5303a
PC
8436 case CH_MIGRATION: {
8437 struct imsm_update_reshape_migration *u = NULL;
8438 int len =
8439 imsm_create_metadata_update_for_migration(
8440 st, &geo, &u);
8441 if (len < 1) {
8442 dprintf("imsm: "
8443 "Cannot prepare update\n");
8444 break;
8445 }
471bceb6 8446 ret_val = 0;
48c5303a
PC
8447 /* update metadata locally */
8448 imsm_update_metadata_locally(st, u, len);
8449 /* and possibly remotely */
8450 if (st->update_tail)
8451 append_metadata_update(st, u, len);
8452 else
8453 free(u);
8454 }
8455 break;
471bceb6
KW
8456 default:
8457 ret_val = 1;
694575e7 8458 }
694575e7 8459 }
78b10e66 8460
ed08d51c 8461exit_imsm_reshape_super:
78b10e66
N
8462 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
8463 return ret_val;
8464}
2cda7640 8465
eee67a47
AK
8466/*******************************************************************************
8467 * Function: wait_for_reshape_imsm
8468 * Description: Function writes new sync_max value and waits until
8469 * reshape process reach new position
8470 * Parameters:
8471 * sra : general array info
8472 * to_complete : new sync_max position
8473 * ndata : number of disks in new array's layout
8474 * Returns:
8475 * 0 : success,
8476 * 1 : there is no reshape in progress,
8477 * -1 : fail
8478 ******************************************************************************/
8479int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete,
8480 int ndata)
8481{
8482 int fd = sysfs_get_fd(sra, NULL, "reshape_position");
8483 unsigned long long completed;
8484
8485 struct timeval timeout;
8486
8487 if (fd < 0)
8488 return 1;
8489
8490 sysfs_fd_get_ll(fd, &completed);
8491
8492 if (to_complete == 0) {/* reshape till the end of array */
8493 sysfs_set_str(sra, NULL, "sync_max", "max");
8494 to_complete = MaxSector;
8495 } else {
8496 if (completed > to_complete)
8497 return -1;
8498 if (sysfs_set_num(sra, NULL, "sync_max",
8499 to_complete / ndata) != 0) {
8500 close(fd);
8501 return -1;
8502 }
8503 }
8504
8505 /* FIXME should not need a timeout at all */
8506 timeout.tv_sec = 30;
8507 timeout.tv_usec = 0;
8508 do {
8509 char action[20];
8510 fd_set rfds;
8511 FD_ZERO(&rfds);
8512 FD_SET(fd, &rfds);
8513 select(fd+1, NULL, NULL, &rfds, &timeout);
8514 if (sysfs_fd_get_ll(fd, &completed) < 0) {
8515 close(fd);
8516 return 1;
8517 }
8518 if (sysfs_get_str(sra, NULL, "sync_action",
8519 action, 20) > 0 &&
8520 strncmp(action, "reshape", 7) != 0)
8521 break;
8522 } while (completed < to_complete);
8523 close(fd);
8524 return 0;
8525
8526}
8527
b915c95f
AK
8528/*******************************************************************************
8529 * Function: check_degradation_change
8530 * Description: Check that array hasn't become failed.
8531 * Parameters:
8532 * info : for sysfs access
8533 * sources : source disks descriptors
8534 * degraded: previous degradation level
8535 * Returns:
8536 * degradation level
8537 ******************************************************************************/
8538int check_degradation_change(struct mdinfo *info,
8539 int *sources,
8540 int degraded)
8541{
8542 unsigned long long new_degraded;
8543 sysfs_get_ll(info, NULL, "degraded", &new_degraded);
8544 if (new_degraded != (unsigned long long)degraded) {
8545 /* check each device to ensure it is still working */
8546 struct mdinfo *sd;
8547 new_degraded = 0;
8548 for (sd = info->devs ; sd ; sd = sd->next) {
8549 if (sd->disk.state & (1<<MD_DISK_FAULTY))
8550 continue;
8551 if (sd->disk.state & (1<<MD_DISK_SYNC)) {
8552 char sbuf[20];
8553 if (sysfs_get_str(info,
8554 sd, "state", sbuf, 20) < 0 ||
8555 strstr(sbuf, "faulty") ||
8556 strstr(sbuf, "in_sync") == NULL) {
8557 /* this device is dead */
8558 sd->disk.state = (1<<MD_DISK_FAULTY);
8559 if (sd->disk.raid_disk >= 0 &&
8560 sources[sd->disk.raid_disk] >= 0) {
8561 close(sources[
8562 sd->disk.raid_disk]);
8563 sources[sd->disk.raid_disk] =
8564 -1;
8565 }
8566 new_degraded++;
8567 }
8568 }
8569 }
8570 }
8571
8572 return new_degraded;
8573}
8574
10f22854
AK
8575/*******************************************************************************
8576 * Function: imsm_manage_reshape
8577 * Description: Function finds array under reshape and it manages reshape
8578 * process. It creates stripes backups (if required) and sets
8579 * checheckpoits.
8580 * Parameters:
8581 * afd : Backup handle (nattive) - not used
8582 * sra : general array info
8583 * reshape : reshape parameters - not used
8584 * st : supertype structure
8585 * blocks : size of critical section [blocks]
8586 * fds : table of source device descriptor
8587 * offsets : start of array (offest per devices)
8588 * dests : not used
8589 * destfd : table of destination device descriptor
8590 * destoffsets : table of destination offsets (per device)
8591 * Returns:
8592 * 1 : success, reshape is done
8593 * 0 : fail
8594 ******************************************************************************/
999b4972
N
8595static int imsm_manage_reshape(
8596 int afd, struct mdinfo *sra, struct reshape *reshape,
10f22854 8597 struct supertype *st, unsigned long backup_blocks,
999b4972
N
8598 int *fds, unsigned long long *offsets,
8599 int dests, int *destfd, unsigned long long *destoffsets)
8600{
10f22854
AK
8601 int ret_val = 0;
8602 struct intel_super *super = st->sb;
8603 struct intel_dev *dv = NULL;
8604 struct imsm_dev *dev = NULL;
8605 struct imsm_map *map_src, *map_dest;
8606 int migr_vol_qan = 0;
8607 int ndata, odata; /* [bytes] */
8608 int chunk; /* [bytes] */
8609 struct migr_record *migr_rec;
8610 char *buf = NULL;
8611 unsigned int buf_size; /* [bytes] */
8612 unsigned long long max_position; /* array size [bytes] */
8613 unsigned long long next_step; /* [blocks]/[bytes] */
8614 unsigned long long old_data_stripe_length;
8615 unsigned long long new_data_stripe_length;
8616 unsigned long long start_src; /* [bytes] */
8617 unsigned long long start; /* [bytes] */
8618 unsigned long long start_buf_shift; /* [bytes] */
b915c95f 8619 int degraded = 0;
10f22854
AK
8620
8621 if (!fds || !offsets || !destfd || !destoffsets || !sra)
8622 goto abort;
8623
8624 /* Find volume during the reshape */
8625 for (dv = super->devlist; dv; dv = dv->next) {
8626 if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
8627 && dv->dev->vol.migr_state == 1) {
8628 dev = dv->dev;
8629 migr_vol_qan++;
8630 }
8631 }
8632 /* Only one volume can migrate at the same time */
8633 if (migr_vol_qan != 1) {
8634 fprintf(stderr, Name " : %s", migr_vol_qan ?
8635 "Number of migrating volumes greater than 1\n" :
8636 "There is no volume during migrationg\n");
8637 goto abort;
8638 }
8639
8640 map_src = get_imsm_map(dev, 1);
8641 if (map_src == NULL)
8642 goto abort;
8643 map_dest = get_imsm_map(dev, 0);
8644
8645 ndata = imsm_num_data_members(dev, 0);
8646 odata = imsm_num_data_members(dev, 1);
8647
8648 chunk = map_src->blocks_per_strip * 512;
8649 old_data_stripe_length = odata * chunk;
8650
8651 migr_rec = super->migr_rec;
8652
8653 /* [bytes] */
8654 sra->new_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
8655 sra->new_level = map_dest->raid_level;
8656 new_data_stripe_length = sra->new_chunk * ndata;
8657
8658 /* initialize migration record for start condition */
8659 if (sra->reshape_progress == 0)
8660 init_migr_record_imsm(st, dev, sra);
8661
8662 /* size for data */
8663 buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
8664 /* extend buffer size for parity disk */
8665 buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
8666 /* add space for stripe aligment */
8667 buf_size += old_data_stripe_length;
8668 if (posix_memalign((void **)&buf, 4096, buf_size)) {
8669 dprintf("imsm: Cannot allocate checpoint buffer\n");
8670 goto abort;
8671 }
8672
8673 max_position =
8674 __le32_to_cpu(migr_rec->post_migr_vol_cap) +
8675 ((unsigned long long)__le32_to_cpu(
8676 migr_rec->post_migr_vol_cap_hi) << 32);
8677
8678 while (__le32_to_cpu(migr_rec->curr_migr_unit) <
8679 __le32_to_cpu(migr_rec->num_migr_units)) {
8680 /* current reshape position [blocks] */
8681 unsigned long long current_position =
8682 __le32_to_cpu(migr_rec->blocks_per_unit)
8683 * __le32_to_cpu(migr_rec->curr_migr_unit);
8684 unsigned long long border;
8685
b915c95f
AK
8686 /* Check that array hasn't become failed.
8687 */
8688 degraded = check_degradation_change(sra, fds, degraded);
8689 if (degraded > 1) {
8690 dprintf("imsm: Abort reshape due to degradation"
8691 " level (%i)\n", degraded);
8692 goto abort;
8693 }
8694
10f22854
AK
8695 next_step = __le32_to_cpu(migr_rec->blocks_per_unit);
8696
8697 if ((current_position + next_step) > max_position)
8698 next_step = max_position - current_position;
8699
8700 start = (map_src->pba_of_lba0 + dev->reserved_blocks +
8701 current_position) * 512;
8702
8703 /* allign reading start to old geometry */
8704 start_buf_shift = start % old_data_stripe_length;
8705 start_src = start - start_buf_shift;
8706
8707 border = (start_src / odata) - (start / ndata);
8708 border /= 512;
8709 if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) {
8710 /* save critical stripes to buf
8711 * start - start address of current unit
8712 * to backup [bytes]
8713 * start_src - start address of current unit
8714 * to backup alligned to source array
8715 * [bytes]
8716 */
8717 unsigned long long next_step_filler = 0;
8718 unsigned long long copy_length = next_step * 512;
8719
8720 /* allign copy area length to stripe in old geometry */
8721 next_step_filler = ((copy_length + start_buf_shift)
8722 % old_data_stripe_length);
8723 if (next_step_filler)
8724 next_step_filler = (old_data_stripe_length
8725 - next_step_filler);
8726 dprintf("save_stripes() parameters: start = %llu,"
8727 "\tstart_src = %llu,\tnext_step*512 = %llu,"
8728 "\tstart_in_buf_shift = %llu,"
8729 "\tnext_step_filler = %llu\n",
8730 start, start_src, copy_length,
8731 start_buf_shift, next_step_filler);
8732
8733 if (save_stripes(fds, offsets, map_src->num_members,
8734 chunk, sra->array.level,
8735 sra->array.layout, 0, NULL, start_src,
8736 copy_length +
8737 next_step_filler + start_buf_shift,
8738 buf)) {
8739 dprintf("imsm: Cannot save stripes"
8740 " to buffer\n");
8741 goto abort;
8742 }
8743 /* Convert data to destination format and store it
8744 * in backup general migration area
8745 */
8746 if (save_backup_imsm(st, dev, sra,
8747 buf + start_buf_shift,
8748 ndata, copy_length)) {
8749 dprintf("imsm: Cannot save stripes to "
8750 "target devices\n");
8751 goto abort;
8752 }
8753 if (save_checkpoint_imsm(st, sra,
8754 UNIT_SRC_IN_CP_AREA)) {
8755 dprintf("imsm: Cannot write checkpoint to "
8756 "migration record (UNIT_SRC_IN_CP_AREA)\n");
8757 goto abort;
8758 }
8759 /* decrease backup_blocks */
8760 if (backup_blocks > (unsigned long)next_step)
8761 backup_blocks -= next_step;
8762 else
8763 backup_blocks = 0;
8764 }
8765 /* When data backed up, checkpoint stored,
8766 * kick the kernel to reshape unit of data
8767 */
8768 next_step = next_step + sra->reshape_progress;
8769 sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
8770 sysfs_set_num(sra, NULL, "suspend_hi", next_step);
8771
8772 /* wait until reshape finish */
c47b0ff6
AK
8773 if (wait_for_reshape_imsm(sra, next_step, ndata) < 0) {
8774 dprintf("wait_for_reshape_imsm returned error!\n");
8775 goto abort;
8776 }
10f22854
AK
8777
8778 sra->reshape_progress = next_step;
8779
8780 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL)) {
8781 dprintf("imsm: Cannot write checkpoint to "
8782 "migration record (UNIT_SRC_NORMAL)\n");
8783 goto abort;
8784 }
8785
8786 }
8787
8788 /* return '1' if done */
8789 ret_val = 1;
8790abort:
8791 free(buf);
8792 abort_reshape(sra);
8793
8794 return ret_val;
999b4972 8795}
71204a50 8796#endif /* MDASSEMBLE */
999b4972 8797
cdddbdbc
DW
8798struct superswitch super_imsm = {
8799#ifndef MDASSEMBLE
8800 .examine_super = examine_super_imsm,
8801 .brief_examine_super = brief_examine_super_imsm,
4737ae25 8802 .brief_examine_subarrays = brief_examine_subarrays_imsm,
9d84c8ea 8803 .export_examine_super = export_examine_super_imsm,
cdddbdbc
DW
8804 .detail_super = detail_super_imsm,
8805 .brief_detail_super = brief_detail_super_imsm,
bf5a934a 8806 .write_init_super = write_init_super_imsm,
0e600426
N
8807 .validate_geometry = validate_geometry_imsm,
8808 .add_to_super = add_to_super_imsm,
1a64be56 8809 .remove_from_super = remove_from_super_imsm,
d665cc31 8810 .detail_platform = detail_platform_imsm,
33414a01 8811 .kill_subarray = kill_subarray_imsm,
aa534678 8812 .update_subarray = update_subarray_imsm,
2b959fbf 8813 .load_container = load_container_imsm,
71204a50
N
8814 .default_geometry = default_geometry_imsm,
8815 .get_disk_controller_domain = imsm_get_disk_controller_domain,
8816 .reshape_super = imsm_reshape_super,
8817 .manage_reshape = imsm_manage_reshape,
cdddbdbc
DW
8818#endif
8819 .match_home = match_home_imsm,
8820 .uuid_from_super= uuid_from_super_imsm,
8821 .getinfo_super = getinfo_super_imsm,
5c4cd5da 8822 .getinfo_super_disks = getinfo_super_disks_imsm,
cdddbdbc
DW
8823 .update_super = update_super_imsm,
8824
8825 .avail_size = avail_size_imsm,
80e7f8c3 8826 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
cdddbdbc
DW
8827
8828 .compare_super = compare_super_imsm,
8829
8830 .load_super = load_super_imsm,
bf5a934a 8831 .init_super = init_super_imsm,
e683ca88 8832 .store_super = store_super_imsm,
cdddbdbc
DW
8833 .free_super = free_super_imsm,
8834 .match_metadata_desc = match_metadata_desc_imsm,
bf5a934a 8835 .container_content = container_content_imsm,
cdddbdbc 8836
276d77db
AK
8837 .recover_backup = recover_backup_imsm,
8838
cdddbdbc 8839 .external = 1,
4cce4069 8840 .name = "imsm",
845dea95 8841
0e600426 8842#ifndef MDASSEMBLE
845dea95
NB
8843/* for mdmon */
8844 .open_new = imsm_open_new,
ed9d66aa 8845 .set_array_state= imsm_set_array_state,
845dea95
NB
8846 .set_disk = imsm_set_disk,
8847 .sync_metadata = imsm_sync_metadata,
88758e9d 8848 .activate_spare = imsm_activate_spare,
e8319a19 8849 .process_update = imsm_process_update,
8273f55e 8850 .prepare_update = imsm_prepare_update,
0e600426 8851#endif /* MDASSEMBLE */
cdddbdbc 8852};