]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-intel.c
imsm: Prepare checkpoint update for general migration
[thirdparty/mdadm.git] / super-intel.c
CommitLineData
cdddbdbc
DW
1/*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
a54d5262 4 * Copyright (C) 2002-2008 Intel Corporation
cdddbdbc
DW
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
51006d85 20#define HAVE_STDINT_H 1
cdddbdbc 21#include "mdadm.h"
c2a1e7da 22#include "mdmon.h"
51006d85 23#include "sha1.h"
88c32bb1 24#include "platform-intel.h"
cdddbdbc
DW
25#include <values.h>
26#include <scsi/sg.h>
27#include <ctype.h>
d665cc31 28#include <dirent.h>
cdddbdbc
DW
29
30/* MPB == Metadata Parameter Block */
31#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32#define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33#define MPB_VERSION_RAID0 "1.0.00"
34#define MPB_VERSION_RAID1 "1.1.00"
fe7ed8cb
DW
35#define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36#define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
cdddbdbc 37#define MPB_VERSION_RAID5 "1.2.02"
fe7ed8cb
DW
38#define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39#define MPB_VERSION_CNG "1.2.06"
40#define MPB_VERSION_ATTRIBS "1.3.00"
cdddbdbc
DW
41#define MAX_SIGNATURE_LENGTH 32
42#define MAX_RAID_SERIAL_LEN 16
fe7ed8cb
DW
43
44#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45#define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
8e59f3d8 54#define MPB_SECTOR_CNT 2210
c2c087e6 55#define IMSM_RESERVED_SECTORS 4096
979d38be 56#define SECT_PER_MB_SHIFT 11
cdddbdbc
DW
57
58/* Disk configuration info. */
59#define IMSM_MAX_DEVICES 255
60struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
f2f27e63
DW
64#define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65#define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66#define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
cdddbdbc 67 __u32 status; /* 0xF0 - 0xF3 */
fe7ed8cb
DW
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69#define IMSM_DISK_FILLERS 4
cdddbdbc
DW
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71};
72
73/* RAID map configuration infos. */
74struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80#define IMSM_T_STATE_NORMAL 0
81#define IMSM_T_STATE_UNINITIALIZED 1
e3bba0e0
DW
82#define IMSM_T_STATE_DEGRADED 2
83#define IMSM_T_STATE_FAILED 3
cdddbdbc
DW
84 __u8 raid_level;
85#define IMSM_T_RAID0 0
86#define IMSM_T_RAID1 1
87#define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
fe7ed8cb
DW
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
252d23c0 91 __u8 ddf;
cdddbdbc 92 __u32 filler[7]; /* expansion area */
7eef0453 93#define IMSM_ORD_REBUILD (1 << 24)
cdddbdbc 94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
7eef0453
DW
95 * top byte contains some flags
96 */
cdddbdbc
DW
97} __attribute__ ((packed));
98
99struct imsm_vol {
f8f603f1 100 __u32 curr_migr_unit;
fe7ed8cb 101 __u32 checkpoint_id; /* id to access curr_migr_unit */
cdddbdbc 102 __u8 migr_state; /* Normal or Migrating */
e3bba0e0
DW
103#define MIGR_INIT 0
104#define MIGR_REBUILD 1
105#define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106#define MIGR_GEN_MIGR 3
107#define MIGR_STATE_CHANGE 4
1484e727 108#define MIGR_REPAIR 5
cdddbdbc
DW
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
fe7ed8cb
DW
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
cdddbdbc
DW
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117} __attribute__ ((packed));
118
119struct imsm_dev {
fe7ed8cb 120 __u8 volume[MAX_RAID_SERIAL_LEN];
cdddbdbc
DW
121 __u32 size_low;
122 __u32 size_high;
fe7ed8cb
DW
123#define DEV_BOOTABLE __cpu_to_le32(0x01)
124#define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125#define DEV_READ_COALESCING __cpu_to_le32(0x04)
126#define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127#define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128#define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129#define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130#define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131#define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132#define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133#define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134#define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135#define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
cdddbdbc
DW
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
fe7ed8cb
DW
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145#define IMSM_DEV_FILLERS 10
cdddbdbc
DW
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148} __attribute__ ((packed));
149
150struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
604b746f
JD
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
cdddbdbc
DW
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
604b746f
JD
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166#define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
cdddbdbc
DW
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
604b746f 170 /* here comes BBM logs */
cdddbdbc
DW
171} __attribute__ ((packed));
172
604b746f
JD
173#define BBM_LOG_MAX_ENTRIES 254
174
175struct bbm_log_entry {
176 __u64 defective_block_start;
177#define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181} __attribute__ ((__packed__));
182
183struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190} __attribute__ ((__packed__));
191
192
cdddbdbc
DW
193#ifndef MDASSEMBLE
194static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195#endif
196
8e59f3d8
AK
197#define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
198
199#define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
200
201#define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203#define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
206struct migr_record {
207 __u32 rec_status; /* Status used to determine how to restart
208 * migration in case it aborts
209 * in some fashion */
210 __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
211 __u32 family_num; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr; /* True if migrating in increasing
215 * order of lbas */
216 __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit; /* Num member blocks each destMap
218 * member disk
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230} __attribute__ ((__packed__));
231
1484e727
DW
232static __u8 migr_type(struct imsm_dev *dev)
233{
234 if (dev->vol.migr_type == MIGR_VERIFY &&
235 dev->status & DEV_VERIFY_AND_FIX)
236 return MIGR_REPAIR;
237 else
238 return dev->vol.migr_type;
239}
240
241static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
242{
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
245 */
246 if (migr_type == MIGR_REPAIR) {
247 dev->vol.migr_type = MIGR_VERIFY;
248 dev->status |= DEV_VERIFY_AND_FIX;
249 } else {
250 dev->vol.migr_type = migr_type;
251 dev->status &= ~DEV_VERIFY_AND_FIX;
252 }
253}
254
87eb16df 255static unsigned int sector_count(__u32 bytes)
cdddbdbc 256{
87eb16df
DW
257 return ((bytes + (512-1)) & (~(512-1))) / 512;
258}
cdddbdbc 259
87eb16df
DW
260static unsigned int mpb_sectors(struct imsm_super *mpb)
261{
262 return sector_count(__le32_to_cpu(mpb->mpb_size));
cdddbdbc
DW
263}
264
ba2de7ba
DW
265struct intel_dev {
266 struct imsm_dev *dev;
267 struct intel_dev *next;
f21e18ca 268 unsigned index;
ba2de7ba
DW
269};
270
88654014
LM
271struct intel_hba {
272 enum sys_dev_type type;
273 char *path;
274 char *pci_id;
275 struct intel_hba *next;
276};
277
1a64be56
LM
278enum action {
279 DISK_REMOVE = 1,
280 DISK_ADD
281};
cdddbdbc
DW
282/* internal representation of IMSM metadata */
283struct intel_super {
284 union {
949c47a0
DW
285 void *buf; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super *anchor; /* immovable parameters */
cdddbdbc 287 };
8e59f3d8
AK
288 union {
289 void *migr_rec_buf; /* buffer for I/O operations */
290 struct migr_record *migr_rec; /* migration record */
291 };
949c47a0 292 size_t len; /* size of the 'buf' allocation */
4d7b1503
DW
293 void *next_buf; /* for realloc'ing buf from the manager */
294 size_t next_len;
c2c087e6 295 int updates_pending; /* count of pending updates for mdmon */
bf5a934a 296 int current_vol; /* index of raid device undergoing creation */
0dcecb2e 297 __u32 create_offset; /* common start for 'current_vol' */
148acb7b 298 __u32 random; /* random data for seeding new family numbers */
ba2de7ba 299 struct intel_dev *devlist;
cdddbdbc
DW
300 struct dl {
301 struct dl *next;
302 int index;
303 __u8 serial[MAX_RAID_SERIAL_LEN];
304 int major, minor;
305 char *devname;
b9f594fe 306 struct imsm_disk disk;
cdddbdbc 307 int fd;
0dcecb2e
DW
308 int extent_cnt;
309 struct extent *e; /* for determining freespace @ create */
efb30e7f 310 int raiddisk; /* slot to fill in autolayout */
1a64be56 311 enum action action;
cdddbdbc 312 } *disks;
1a64be56
LM
313 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
314 active */
47ee5a45 315 struct dl *missing; /* disks removed while we weren't looking */
43dad3d6 316 struct bbm_log *bbm_log;
88654014 317 struct intel_hba *hba; /* device path of the raid controller for this metadata */
88c32bb1 318 const struct imsm_orom *orom; /* platform firmware support */
a2b97981
DW
319 struct intel_super *next; /* (temp) list for disambiguating family_num */
320};
321
322struct intel_disk {
323 struct imsm_disk disk;
324 #define IMSM_UNKNOWN_OWNER (-1)
325 int owner;
326 struct intel_disk *next;
cdddbdbc
DW
327};
328
c2c087e6
DW
329struct extent {
330 unsigned long long start, size;
331};
332
694575e7
KW
333/* definitions of reshape process types */
334enum imsm_reshape_type {
335 CH_TAKEOVER,
b5347799 336 CH_MIGRATION,
694575e7
KW
337};
338
88758e9d
DW
339/* definition of messages passed to imsm_process_update */
340enum imsm_update_type {
341 update_activate_spare,
8273f55e 342 update_create_array,
33414a01 343 update_kill_array,
aa534678 344 update_rename_array,
1a64be56 345 update_add_remove_disk,
78b10e66 346 update_reshape_container_disks,
48c5303a 347 update_reshape_migration,
2d40f3a1
AK
348 update_takeover,
349 update_general_migration_checkpoint,
88758e9d
DW
350};
351
352struct imsm_update_activate_spare {
353 enum imsm_update_type type;
d23fe947 354 struct dl *dl;
88758e9d
DW
355 int slot;
356 int array;
357 struct imsm_update_activate_spare *next;
358};
359
78b10e66
N
360struct geo_params {
361 int dev_id;
362 char *dev_name;
363 long long size;
364 int level;
365 int layout;
366 int chunksize;
367 int raid_disks;
368};
369
bb025c2f
KW
370enum takeover_direction {
371 R10_TO_R0,
372 R0_TO_R10
373};
374struct imsm_update_takeover {
375 enum imsm_update_type type;
376 int subarray;
377 enum takeover_direction direction;
378};
78b10e66
N
379
380struct imsm_update_reshape {
381 enum imsm_update_type type;
382 int old_raid_disks;
383 int new_raid_disks;
48c5303a
PC
384
385 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
386};
387
388struct imsm_update_reshape_migration {
389 enum imsm_update_type type;
390 int old_raid_disks;
391 int new_raid_disks;
392 /* fields for array migration changes
393 */
394 int subdev;
395 int new_level;
396 int new_layout;
4bba0439 397 int new_chunksize;
48c5303a 398
d195167d 399 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
78b10e66
N
400};
401
2d40f3a1
AK
402struct imsm_update_general_migration_checkpoint {
403 enum imsm_update_type type;
404 __u32 curr_migr_unit;
405};
406
54c2c1ea
DW
407struct disk_info {
408 __u8 serial[MAX_RAID_SERIAL_LEN];
409};
410
8273f55e
DW
411struct imsm_update_create_array {
412 enum imsm_update_type type;
8273f55e 413 int dev_idx;
6a3e913e 414 struct imsm_dev dev;
8273f55e
DW
415};
416
33414a01
DW
417struct imsm_update_kill_array {
418 enum imsm_update_type type;
419 int dev_idx;
420};
421
aa534678
DW
422struct imsm_update_rename_array {
423 enum imsm_update_type type;
424 __u8 name[MAX_RAID_SERIAL_LEN];
425 int dev_idx;
426};
427
1a64be56 428struct imsm_update_add_remove_disk {
43dad3d6
DW
429 enum imsm_update_type type;
430};
431
88654014
LM
432
433static const char *_sys_dev_type[] = {
434 [SYS_DEV_UNKNOWN] = "Unknown",
435 [SYS_DEV_SAS] = "SAS",
436 [SYS_DEV_SATA] = "SATA"
437};
438
439const char *get_sys_dev_type(enum sys_dev_type type)
440{
441 if (type >= SYS_DEV_MAX)
442 type = SYS_DEV_UNKNOWN;
443
444 return _sys_dev_type[type];
445}
446
447static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
448{
449 struct intel_hba *result = malloc(sizeof(*result));
450 if (result) {
451 result->type = device->type;
452 result->path = strdup(device->path);
453 result->next = NULL;
454 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
455 result->pci_id++;
456 }
457 return result;
458}
459
460static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
461{
462 struct intel_hba *result=NULL;
463 for (result = hba; result; result = result->next) {
464 if (result->type == device->type && strcmp(result->path, device->path) == 0)
465 break;
466 }
467 return result;
468}
469
b4cf4cba 470static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
88654014
LM
471{
472 struct intel_hba *hba;
473
474 /* check if disk attached to Intel HBA */
475 hba = find_intel_hba(super->hba, device);
476 if (hba != NULL)
477 return 1;
478 /* Check if HBA is already attached to super */
479 if (super->hba == NULL) {
480 super->hba = alloc_intel_hba(device);
481 return 1;
482 }
483
484 hba = super->hba;
485 /* Intel metadata allows for all disks attached to the same type HBA.
486 * Do not sypport odf HBA types mixing
487 */
488 if (device->type != hba->type)
489 return 2;
490
491 while (hba->next)
492 hba = hba->next;
493
494 hba->next = alloc_intel_hba(device);
495 return 1;
496}
497
498static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
499{
500 struct sys_dev *list, *elem, *prev;
501 char *disk_path;
502
503 if ((list = find_intel_devices()) == NULL)
504 return 0;
505
506 if (fd < 0)
507 disk_path = (char *) devname;
508 else
509 disk_path = diskfd_to_devpath(fd);
510
511 if (!disk_path) {
512 free_sys_dev(&list);
513 return 0;
514 }
515
516 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
517 if (path_attached_to_hba(disk_path, elem->path)) {
518 if (prev == NULL)
519 list = list->next;
520 else
521 prev->next = elem->next;
522 elem->next = NULL;
523 if (disk_path != devname)
524 free(disk_path);
525 free_sys_dev(&list);
526 return elem;
527 }
528 }
529 if (disk_path != devname)
530 free(disk_path);
531 free_sys_dev(&list);
532
533 return NULL;
534}
535
536
d424212e
N
537static int find_intel_hba_capability(int fd, struct intel_super *super,
538 char *devname);
f2f5c343 539
cdddbdbc
DW
540static struct supertype *match_metadata_desc_imsm(char *arg)
541{
542 struct supertype *st;
543
544 if (strcmp(arg, "imsm") != 0 &&
545 strcmp(arg, "default") != 0
546 )
547 return NULL;
548
549 st = malloc(sizeof(*st));
4e9d2186
AW
550 if (!st)
551 return NULL;
ef609477 552 memset(st, 0, sizeof(*st));
d1d599ea 553 st->container_dev = NoMdDev;
cdddbdbc
DW
554 st->ss = &super_imsm;
555 st->max_devs = IMSM_MAX_DEVICES;
556 st->minor_version = 0;
557 st->sb = NULL;
558 return st;
559}
560
0e600426 561#ifndef MDASSEMBLE
cdddbdbc
DW
562static __u8 *get_imsm_version(struct imsm_super *mpb)
563{
564 return &mpb->sig[MPB_SIG_LEN];
565}
0e600426 566#endif
cdddbdbc 567
949c47a0
DW
568/* retrieve a disk directly from the anchor when the anchor is known to be
569 * up-to-date, currently only at load time
570 */
571static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
cdddbdbc 572{
949c47a0 573 if (index >= mpb->num_disks)
cdddbdbc
DW
574 return NULL;
575 return &mpb->disk[index];
576}
577
95d07a2c
LM
578/* retrieve the disk description based on a index of the disk
579 * in the sub-array
580 */
581static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
949c47a0 582{
b9f594fe
DW
583 struct dl *d;
584
585 for (d = super->disks; d; d = d->next)
586 if (d->index == index)
95d07a2c
LM
587 return d;
588
589 return NULL;
590}
591/* retrieve a disk from the parsed metadata */
592static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
593{
594 struct dl *dl;
595
596 dl = get_imsm_dl_disk(super, index);
597 if (dl)
598 return &dl->disk;
599
b9f594fe 600 return NULL;
949c47a0
DW
601}
602
603/* generate a checksum directly from the anchor when the anchor is known to be
604 * up-to-date, currently only at load or write_super after coalescing
605 */
606static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
cdddbdbc
DW
607{
608 __u32 end = mpb->mpb_size / sizeof(end);
609 __u32 *p = (__u32 *) mpb;
610 __u32 sum = 0;
611
97f734fd
N
612 while (end--) {
613 sum += __le32_to_cpu(*p);
614 p++;
615 }
cdddbdbc
DW
616
617 return sum - __le32_to_cpu(mpb->check_sum);
618}
619
a965f303
DW
620static size_t sizeof_imsm_map(struct imsm_map *map)
621{
622 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
623}
624
625struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
cdddbdbc 626{
5e7b0330
AK
627 /* A device can have 2 maps if it is in the middle of a migration.
628 * If second_map is:
629 * 0 - we return the first map
630 * 1 - we return the second map if it exists, else NULL
631 * -1 - we return the second map if it exists, else the first
632 */
a965f303
DW
633 struct imsm_map *map = &dev->vol.map[0];
634
5e7b0330 635 if (second_map == 1 && !dev->vol.migr_state)
a965f303 636 return NULL;
5e7b0330
AK
637 else if (second_map == 1 ||
638 (second_map < 0 && dev->vol.migr_state)) {
a965f303
DW
639 void *ptr = map;
640
641 return ptr + sizeof_imsm_map(map);
642 } else
643 return map;
5e7b0330 644
a965f303 645}
cdddbdbc 646
3393c6af
DW
647/* return the size of the device.
648 * migr_state increases the returned size if map[0] were to be duplicated
649 */
650static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
a965f303
DW
651{
652 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
653 sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
654
655 /* migrating means an additional map */
a965f303
DW
656 if (dev->vol.migr_state)
657 size += sizeof_imsm_map(get_imsm_map(dev, 1));
3393c6af
DW
658 else if (migr_state)
659 size += sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
660
661 return size;
662}
663
54c2c1ea
DW
664#ifndef MDASSEMBLE
665/* retrieve disk serial number list from a metadata update */
666static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
667{
668 void *u = update;
669 struct disk_info *inf;
670
671 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
672 sizeof_imsm_dev(&update->dev, 0);
673
674 return inf;
675}
676#endif
677
949c47a0 678static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
cdddbdbc
DW
679{
680 int offset;
681 int i;
682 void *_mpb = mpb;
683
949c47a0 684 if (index >= mpb->num_raid_devs)
cdddbdbc
DW
685 return NULL;
686
687 /* devices start after all disks */
688 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
689
690 for (i = 0; i <= index; i++)
691 if (i == index)
692 return _mpb + offset;
693 else
3393c6af 694 offset += sizeof_imsm_dev(_mpb + offset, 0);
cdddbdbc
DW
695
696 return NULL;
697}
698
949c47a0
DW
699static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
700{
ba2de7ba
DW
701 struct intel_dev *dv;
702
949c47a0
DW
703 if (index >= super->anchor->num_raid_devs)
704 return NULL;
ba2de7ba
DW
705 for (dv = super->devlist; dv; dv = dv->next)
706 if (dv->index == index)
707 return dv->dev;
708 return NULL;
949c47a0
DW
709}
710
98130f40
AK
711/*
712 * for second_map:
713 * == 0 get first map
714 * == 1 get second map
715 * == -1 than get map according to the current migr_state
716 */
717static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
718 int slot,
719 int second_map)
7eef0453
DW
720{
721 struct imsm_map *map;
722
5e7b0330 723 map = get_imsm_map(dev, second_map);
7eef0453 724
ff077194
DW
725 /* top byte identifies disk under rebuild */
726 return __le32_to_cpu(map->disk_ord_tbl[slot]);
727}
728
729#define ord_to_idx(ord) (((ord) << 8) >> 8)
98130f40 730static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
ff077194 731{
98130f40 732 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
ff077194
DW
733
734 return ord_to_idx(ord);
7eef0453
DW
735}
736
be73972f
DW
737static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
738{
739 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
740}
741
f21e18ca 742static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
620b1713
DW
743{
744 int slot;
745 __u32 ord;
746
747 for (slot = 0; slot < map->num_members; slot++) {
748 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
749 if (ord_to_idx(ord) == idx)
750 return slot;
751 }
752
753 return -1;
754}
755
cdddbdbc
DW
756static int get_imsm_raid_level(struct imsm_map *map)
757{
758 if (map->raid_level == 1) {
759 if (map->num_members == 2)
760 return 1;
761 else
762 return 10;
763 }
764
765 return map->raid_level;
766}
767
c2c087e6
DW
768static int cmp_extent(const void *av, const void *bv)
769{
770 const struct extent *a = av;
771 const struct extent *b = bv;
772 if (a->start < b->start)
773 return -1;
774 if (a->start > b->start)
775 return 1;
776 return 0;
777}
778
0dcecb2e 779static int count_memberships(struct dl *dl, struct intel_super *super)
c2c087e6 780{
c2c087e6 781 int memberships = 0;
620b1713 782 int i;
c2c087e6 783
949c47a0
DW
784 for (i = 0; i < super->anchor->num_raid_devs; i++) {
785 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 786 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 787
620b1713
DW
788 if (get_imsm_disk_slot(map, dl->index) >= 0)
789 memberships++;
c2c087e6 790 }
0dcecb2e
DW
791
792 return memberships;
793}
794
795static struct extent *get_extents(struct intel_super *super, struct dl *dl)
796{
797 /* find a list of used extents on the given physical device */
798 struct extent *rv, *e;
620b1713 799 int i;
0dcecb2e
DW
800 int memberships = count_memberships(dl, super);
801 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
802
c2c087e6
DW
803 rv = malloc(sizeof(struct extent) * (memberships + 1));
804 if (!rv)
805 return NULL;
806 e = rv;
807
949c47a0
DW
808 for (i = 0; i < super->anchor->num_raid_devs; i++) {
809 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 810 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6 811
620b1713
DW
812 if (get_imsm_disk_slot(map, dl->index) >= 0) {
813 e->start = __le32_to_cpu(map->pba_of_lba0);
814 e->size = __le32_to_cpu(map->blocks_per_member);
815 e++;
c2c087e6
DW
816 }
817 }
818 qsort(rv, memberships, sizeof(*rv), cmp_extent);
819
14e8215b
DW
820 /* determine the start of the metadata
821 * when no raid devices are defined use the default
822 * ...otherwise allow the metadata to truncate the value
823 * as is the case with older versions of imsm
824 */
825 if (memberships) {
826 struct extent *last = &rv[memberships - 1];
827 __u32 remainder;
828
829 remainder = __le32_to_cpu(dl->disk.total_blocks) -
830 (last->start + last->size);
dda5855f
DW
831 /* round down to 1k block to satisfy precision of the kernel
832 * 'size' interface
833 */
834 remainder &= ~1UL;
835 /* make sure remainder is still sane */
f21e18ca 836 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
dda5855f 837 remainder = ROUND_UP(super->len, 512) >> 9;
14e8215b
DW
838 if (reservation > remainder)
839 reservation = remainder;
840 }
841 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
c2c087e6
DW
842 e->size = 0;
843 return rv;
844}
845
14e8215b
DW
846/* try to determine how much space is reserved for metadata from
847 * the last get_extents() entry, otherwise fallback to the
848 * default
849 */
850static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
851{
852 struct extent *e;
853 int i;
854 __u32 rv;
855
856 /* for spares just return a minimal reservation which will grow
857 * once the spare is picked up by an array
858 */
859 if (dl->index == -1)
860 return MPB_SECTOR_CNT;
861
862 e = get_extents(super, dl);
863 if (!e)
864 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
865
866 /* scroll to last entry */
867 for (i = 0; e[i].size; i++)
868 continue;
869
870 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
871
872 free(e);
873
874 return rv;
875}
876
25ed7e59
DW
877static int is_spare(struct imsm_disk *disk)
878{
879 return (disk->status & SPARE_DISK) == SPARE_DISK;
880}
881
882static int is_configured(struct imsm_disk *disk)
883{
884 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
885}
886
887static int is_failed(struct imsm_disk *disk)
888{
889 return (disk->status & FAILED_DISK) == FAILED_DISK;
890}
891
80e7f8c3
AC
892/* Return minimum size of a spare that can be used in this array*/
893static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
894{
895 struct intel_super *super = st->sb;
896 struct dl *dl;
897 struct extent *e;
898 int i;
899 unsigned long long rv = 0;
900
901 if (!super)
902 return rv;
903 /* find first active disk in array */
904 dl = super->disks;
905 while (dl && (is_failed(&dl->disk) || dl->index == -1))
906 dl = dl->next;
907 if (!dl)
908 return rv;
909 /* find last lba used by subarrays */
910 e = get_extents(super, dl);
911 if (!e)
912 return rv;
913 for (i = 0; e[i].size; i++)
914 continue;
915 if (i > 0)
916 rv = e[i-1].start + e[i-1].size;
917 free(e);
918 /* add the amount of space needed for metadata */
919 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
920 return rv * 512;
921}
922
1799c9e8 923#ifndef MDASSEMBLE
c47b0ff6
AK
924static __u64 blocks_per_migr_unit(struct intel_super *super,
925 struct imsm_dev *dev);
1e5c6983 926
c47b0ff6
AK
927static void print_imsm_dev(struct intel_super *super,
928 struct imsm_dev *dev,
929 char *uuid,
930 int disk_idx)
cdddbdbc
DW
931{
932 __u64 sz;
0d80bb2f 933 int slot, i;
a965f303 934 struct imsm_map *map = get_imsm_map(dev, 0);
dd8bcb3b 935 struct imsm_map *map2 = get_imsm_map(dev, 1);
b10b37b8 936 __u32 ord;
cdddbdbc
DW
937
938 printf("\n");
1e7bc0ed 939 printf("[%.16s]:\n", dev->volume);
44470971 940 printf(" UUID : %s\n", uuid);
dd8bcb3b
AK
941 printf(" RAID Level : %d", get_imsm_raid_level(map));
942 if (map2)
943 printf(" <-- %d", get_imsm_raid_level(map2));
944 printf("\n");
945 printf(" Members : %d", map->num_members);
946 if (map2)
947 printf(" <-- %d", map2->num_members);
948 printf("\n");
0d80bb2f
DW
949 printf(" Slots : [");
950 for (i = 0; i < map->num_members; i++) {
dd8bcb3b 951 ord = get_imsm_ord_tbl_ent(dev, i, 0);
0d80bb2f
DW
952 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
953 }
dd8bcb3b
AK
954 printf("]");
955 if (map2) {
956 printf(" <-- [");
957 for (i = 0; i < map2->num_members; i++) {
958 ord = get_imsm_ord_tbl_ent(dev, i, 1);
959 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
960 }
961 printf("]");
962 }
963 printf("\n");
7095bccb
AK
964 printf(" Failed disk : ");
965 if (map->failed_disk_num == 0xff)
966 printf("none");
967 else
968 printf("%i", map->failed_disk_num);
969 printf("\n");
620b1713
DW
970 slot = get_imsm_disk_slot(map, disk_idx);
971 if (slot >= 0) {
98130f40 972 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
b10b37b8
DW
973 printf(" This Slot : %d%s\n", slot,
974 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
975 } else
cdddbdbc
DW
976 printf(" This Slot : ?\n");
977 sz = __le32_to_cpu(dev->size_high);
978 sz <<= 32;
979 sz += __le32_to_cpu(dev->size_low);
980 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
981 human_size(sz * 512));
982 sz = __le32_to_cpu(map->blocks_per_member);
983 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
984 human_size(sz * 512));
985 printf(" Sector Offset : %u\n",
986 __le32_to_cpu(map->pba_of_lba0));
987 printf(" Num Stripes : %u\n",
988 __le32_to_cpu(map->num_data_stripes));
dd8bcb3b 989 printf(" Chunk Size : %u KiB",
cdddbdbc 990 __le16_to_cpu(map->blocks_per_strip) / 2);
dd8bcb3b
AK
991 if (map2)
992 printf(" <-- %u KiB",
993 __le16_to_cpu(map2->blocks_per_strip) / 2);
994 printf("\n");
cdddbdbc 995 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
8655a7b1 996 printf(" Migrate State : ");
1484e727
DW
997 if (dev->vol.migr_state) {
998 if (migr_type(dev) == MIGR_INIT)
8655a7b1 999 printf("initialize\n");
1484e727 1000 else if (migr_type(dev) == MIGR_REBUILD)
8655a7b1 1001 printf("rebuild\n");
1484e727 1002 else if (migr_type(dev) == MIGR_VERIFY)
8655a7b1 1003 printf("check\n");
1484e727 1004 else if (migr_type(dev) == MIGR_GEN_MIGR)
8655a7b1 1005 printf("general migration\n");
1484e727 1006 else if (migr_type(dev) == MIGR_STATE_CHANGE)
8655a7b1 1007 printf("state change\n");
1484e727 1008 else if (migr_type(dev) == MIGR_REPAIR)
8655a7b1 1009 printf("repair\n");
1484e727 1010 else
8655a7b1
DW
1011 printf("<unknown:%d>\n", migr_type(dev));
1012 } else
1013 printf("idle\n");
3393c6af
DW
1014 printf(" Map State : %s", map_state_str[map->map_state]);
1015 if (dev->vol.migr_state) {
1016 struct imsm_map *map = get_imsm_map(dev, 1);
1e5c6983 1017
b10b37b8 1018 printf(" <-- %s", map_state_str[map->map_state]);
1e5c6983
DW
1019 printf("\n Checkpoint : %u (%llu)",
1020 __le32_to_cpu(dev->vol.curr_migr_unit),
c47b0ff6 1021 (unsigned long long)blocks_per_migr_unit(super, dev));
3393c6af
DW
1022 }
1023 printf("\n");
cdddbdbc 1024 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
cdddbdbc
DW
1025}
1026
14e8215b 1027static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
cdddbdbc 1028{
949c47a0 1029 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
1f24f035 1030 char str[MAX_RAID_SERIAL_LEN + 1];
cdddbdbc
DW
1031 __u64 sz;
1032
d362da3d 1033 if (index < 0 || !disk)
e9d82038
DW
1034 return;
1035
cdddbdbc 1036 printf("\n");
1f24f035 1037 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
cdddbdbc 1038 printf(" Disk%02d Serial : %s\n", index, str);
25ed7e59
DW
1039 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1040 is_configured(disk) ? " active" : "",
1041 is_failed(disk) ? " failed" : "");
cdddbdbc 1042 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
14e8215b 1043 sz = __le32_to_cpu(disk->total_blocks) - reserved;
cdddbdbc
DW
1044 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1045 human_size(sz * 512));
1046}
1047
520e69e2
AK
1048static int is_gen_migration(struct imsm_dev *dev);
1049
1050void examine_migr_rec_imsm(struct intel_super *super)
1051{
1052 struct migr_record *migr_rec = super->migr_rec;
1053 struct imsm_super *mpb = super->anchor;
1054 int i;
1055
1056 for (i = 0; i < mpb->num_raid_devs; i++) {
1057 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1058 if (is_gen_migration(dev) == 0)
1059 continue;
1060
1061 printf("\nMigration Record Information:");
1062 if (super->disks->index > 1) {
1063 printf(" Empty\n ");
1064 printf("Examine one of first two disks in array\n");
1065 break;
1066 }
1067 printf("\n Status : ");
1068 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
1069 printf("Normal\n");
1070 else
1071 printf("Contains Data\n");
1072 printf(" Current Unit : %u\n",
1073 __le32_to_cpu(migr_rec->curr_migr_unit));
1074 printf(" Family : %u\n",
1075 __le32_to_cpu(migr_rec->family_num));
1076 printf(" Ascending : %u\n",
1077 __le32_to_cpu(migr_rec->ascending_migr));
1078 printf(" Blocks Per Unit : %u\n",
1079 __le32_to_cpu(migr_rec->blocks_per_unit));
1080 printf(" Dest. Depth Per Unit : %u\n",
1081 __le32_to_cpu(migr_rec->dest_depth_per_unit));
1082 printf(" Checkpoint Area pba : %u\n",
1083 __le32_to_cpu(migr_rec->ckpt_area_pba));
1084 printf(" First member lba : %u\n",
1085 __le32_to_cpu(migr_rec->dest_1st_member_lba));
1086 printf(" Total Number of Units : %u\n",
1087 __le32_to_cpu(migr_rec->num_migr_units));
1088 printf(" Size of volume : %u\n",
1089 __le32_to_cpu(migr_rec->post_migr_vol_cap));
1090 printf(" Expansion space for LBA64 : %u\n",
1091 __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
1092 printf(" Record was read from : %u\n",
1093 __le32_to_cpu(migr_rec->ckpt_read_disk_num));
1094
1095 break;
1096 }
1097}
1098
a5d85af7 1099static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
44470971 1100
cdddbdbc
DW
1101static void examine_super_imsm(struct supertype *st, char *homehost)
1102{
1103 struct intel_super *super = st->sb;
949c47a0 1104 struct imsm_super *mpb = super->anchor;
cdddbdbc
DW
1105 char str[MAX_SIGNATURE_LENGTH];
1106 int i;
27fd6274
DW
1107 struct mdinfo info;
1108 char nbuf[64];
cdddbdbc 1109 __u32 sum;
14e8215b 1110 __u32 reserved = imsm_reserved_sectors(super, super->disks);
94827db3 1111 struct dl *dl;
27fd6274 1112
cdddbdbc
DW
1113 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1114 printf(" Magic : %s\n", str);
1115 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1116 printf(" Version : %s\n", get_imsm_version(mpb));
148acb7b 1117 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
cdddbdbc
DW
1118 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1119 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
a5d85af7 1120 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1121 fname_from_uuid(st, &info, nbuf, ':');
27fd6274 1122 printf(" UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1123 sum = __le32_to_cpu(mpb->check_sum);
1124 printf(" Checksum : %08x %s\n", sum,
949c47a0 1125 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
87eb16df 1126 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
cdddbdbc
DW
1127 printf(" Disks : %d\n", mpb->num_disks);
1128 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
14e8215b 1129 print_imsm_disk(mpb, super->disks->index, reserved);
604b746f
JD
1130 if (super->bbm_log) {
1131 struct bbm_log *log = super->bbm_log;
1132
1133 printf("\n");
1134 printf("Bad Block Management Log:\n");
1135 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1136 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1137 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1138 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
13a3b65d
N
1139 printf(" First Spare : %llx\n",
1140 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
604b746f 1141 }
44470971
DW
1142 for (i = 0; i < mpb->num_raid_devs; i++) {
1143 struct mdinfo info;
1144 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1145
1146 super->current_vol = i;
a5d85af7 1147 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1148 fname_from_uuid(st, &info, nbuf, ':');
c47b0ff6 1149 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
44470971 1150 }
cdddbdbc
DW
1151 for (i = 0; i < mpb->num_disks; i++) {
1152 if (i == super->disks->index)
1153 continue;
14e8215b 1154 print_imsm_disk(mpb, i, reserved);
cdddbdbc 1155 }
94827db3
N
1156 for (dl = super->disks ; dl; dl = dl->next) {
1157 struct imsm_disk *disk;
1158 char str[MAX_RAID_SERIAL_LEN + 1];
1159 __u64 sz;
1160
1161 if (dl->index >= 0)
1162 continue;
1163
1164 disk = &dl->disk;
1165 printf("\n");
1166 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1167 printf(" Disk Serial : %s\n", str);
1168 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1169 is_configured(disk) ? " active" : "",
1170 is_failed(disk) ? " failed" : "");
1171 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1172 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1173 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1174 human_size(sz * 512));
1175 }
520e69e2
AK
1176
1177 examine_migr_rec_imsm(super);
cdddbdbc
DW
1178}
1179
061f2c6a 1180static void brief_examine_super_imsm(struct supertype *st, int verbose)
cdddbdbc 1181{
27fd6274 1182 /* We just write a generic IMSM ARRAY entry */
ff54de6e
N
1183 struct mdinfo info;
1184 char nbuf[64];
1e7bc0ed 1185 struct intel_super *super = st->sb;
1e7bc0ed 1186
0d5a423f
DW
1187 if (!super->anchor->num_raid_devs) {
1188 printf("ARRAY metadata=imsm\n");
1e7bc0ed 1189 return;
0d5a423f 1190 }
ff54de6e 1191
a5d85af7 1192 getinfo_super_imsm(st, &info, NULL);
4737ae25
N
1193 fname_from_uuid(st, &info, nbuf, ':');
1194 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1195}
1196
1197static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1198{
1199 /* We just write a generic IMSM ARRAY entry */
1200 struct mdinfo info;
1201 char nbuf[64];
1202 char nbuf1[64];
1203 struct intel_super *super = st->sb;
1204 int i;
1205
1206 if (!super->anchor->num_raid_devs)
1207 return;
1208
a5d85af7 1209 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1210 fname_from_uuid(st, &info, nbuf, ':');
1e7bc0ed
DW
1211 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1212 struct imsm_dev *dev = get_imsm_dev(super, i);
1213
1214 super->current_vol = i;
a5d85af7 1215 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1216 fname_from_uuid(st, &info, nbuf1, ':');
1124b3cf 1217 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
cf8de691 1218 dev->volume, nbuf + 5, i, nbuf1 + 5);
1e7bc0ed 1219 }
cdddbdbc
DW
1220}
1221
9d84c8ea
DW
1222static void export_examine_super_imsm(struct supertype *st)
1223{
1224 struct intel_super *super = st->sb;
1225 struct imsm_super *mpb = super->anchor;
1226 struct mdinfo info;
1227 char nbuf[64];
1228
a5d85af7 1229 getinfo_super_imsm(st, &info, NULL);
9d84c8ea
DW
1230 fname_from_uuid(st, &info, nbuf, ':');
1231 printf("MD_METADATA=imsm\n");
1232 printf("MD_LEVEL=container\n");
1233 printf("MD_UUID=%s\n", nbuf+5);
1234 printf("MD_DEVICES=%u\n", mpb->num_disks);
1235}
1236
cdddbdbc
DW
1237static void detail_super_imsm(struct supertype *st, char *homehost)
1238{
3ebe00a1
DW
1239 struct mdinfo info;
1240 char nbuf[64];
1241
a5d85af7 1242 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1243 fname_from_uuid(st, &info, nbuf, ':');
3ebe00a1 1244 printf("\n UUID : %s\n", nbuf + 5);
cdddbdbc
DW
1245}
1246
1247static void brief_detail_super_imsm(struct supertype *st)
1248{
ff54de6e
N
1249 struct mdinfo info;
1250 char nbuf[64];
a5d85af7 1251 getinfo_super_imsm(st, &info, NULL);
ae2bfd4e 1252 fname_from_uuid(st, &info, nbuf, ':');
ff54de6e 1253 printf(" UUID=%s", nbuf + 5);
cdddbdbc 1254}
d665cc31
DW
1255
1256static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1257static void fd2devname(int fd, char *name);
1258
120dc887 1259static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
d665cc31 1260{
120dc887
LM
1261 /* dump an unsorted list of devices attached to AHCI Intel storage
1262 * controller, as well as non-connected ports
d665cc31
DW
1263 */
1264 int hba_len = strlen(hba_path) + 1;
1265 struct dirent *ent;
1266 DIR *dir;
1267 char *path = NULL;
1268 int err = 0;
1269 unsigned long port_mask = (1 << port_count) - 1;
1270
f21e18ca 1271 if (port_count > (int)sizeof(port_mask) * 8) {
d665cc31
DW
1272 if (verbose)
1273 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1274 return 2;
1275 }
1276
1277 /* scroll through /sys/dev/block looking for devices attached to
1278 * this hba
1279 */
1280 dir = opendir("/sys/dev/block");
1281 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1282 int fd;
1283 char model[64];
1284 char vendor[64];
1285 char buf[1024];
1286 int major, minor;
1287 char *device;
1288 char *c;
1289 int port;
1290 int type;
1291
1292 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1293 continue;
1294 path = devt_to_devpath(makedev(major, minor));
1295 if (!path)
1296 continue;
1297 if (!path_attached_to_hba(path, hba_path)) {
1298 free(path);
1299 path = NULL;
1300 continue;
1301 }
1302
1303 /* retrieve the scsi device type */
1304 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1305 if (verbose)
1306 fprintf(stderr, Name ": failed to allocate 'device'\n");
1307 err = 2;
1308 break;
1309 }
1310 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1311 if (load_sys(device, buf) != 0) {
1312 if (verbose)
1313 fprintf(stderr, Name ": failed to read device type for %s\n",
1314 path);
1315 err = 2;
1316 free(device);
1317 break;
1318 }
1319 type = strtoul(buf, NULL, 10);
1320
1321 /* if it's not a disk print the vendor and model */
1322 if (!(type == 0 || type == 7 || type == 14)) {
1323 vendor[0] = '\0';
1324 model[0] = '\0';
1325 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1326 if (load_sys(device, buf) == 0) {
1327 strncpy(vendor, buf, sizeof(vendor));
1328 vendor[sizeof(vendor) - 1] = '\0';
1329 c = (char *) &vendor[sizeof(vendor) - 1];
1330 while (isspace(*c) || *c == '\0')
1331 *c-- = '\0';
1332
1333 }
1334 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1335 if (load_sys(device, buf) == 0) {
1336 strncpy(model, buf, sizeof(model));
1337 model[sizeof(model) - 1] = '\0';
1338 c = (char *) &model[sizeof(model) - 1];
1339 while (isspace(*c) || *c == '\0')
1340 *c-- = '\0';
1341 }
1342
1343 if (vendor[0] && model[0])
1344 sprintf(buf, "%.64s %.64s", vendor, model);
1345 else
1346 switch (type) { /* numbers from hald/linux/device.c */
1347 case 1: sprintf(buf, "tape"); break;
1348 case 2: sprintf(buf, "printer"); break;
1349 case 3: sprintf(buf, "processor"); break;
1350 case 4:
1351 case 5: sprintf(buf, "cdrom"); break;
1352 case 6: sprintf(buf, "scanner"); break;
1353 case 8: sprintf(buf, "media_changer"); break;
1354 case 9: sprintf(buf, "comm"); break;
1355 case 12: sprintf(buf, "raid"); break;
1356 default: sprintf(buf, "unknown");
1357 }
1358 } else
1359 buf[0] = '\0';
1360 free(device);
1361
1362 /* chop device path to 'host%d' and calculate the port number */
1363 c = strchr(&path[hba_len], '/');
4e5e717d
AW
1364 if (!c) {
1365 if (verbose)
1366 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1367 err = 2;
1368 break;
1369 }
d665cc31
DW
1370 *c = '\0';
1371 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1372 port -= host_base;
1373 else {
1374 if (verbose) {
1375 *c = '/'; /* repair the full string */
1376 fprintf(stderr, Name ": failed to determine port number for %s\n",
1377 path);
1378 }
1379 err = 2;
1380 break;
1381 }
1382
1383 /* mark this port as used */
1384 port_mask &= ~(1 << port);
1385
1386 /* print out the device information */
1387 if (buf[0]) {
1388 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1389 continue;
1390 }
1391
1392 fd = dev_open(ent->d_name, O_RDONLY);
1393 if (fd < 0)
1394 printf(" Port%d : - disk info unavailable -\n", port);
1395 else {
1396 fd2devname(fd, buf);
1397 printf(" Port%d : %s", port, buf);
1398 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1399 printf(" (%s)\n", buf);
1400 else
1401 printf("()\n");
1402 }
1403 close(fd);
1404 free(path);
1405 path = NULL;
1406 }
1407 if (path)
1408 free(path);
1409 if (dir)
1410 closedir(dir);
1411 if (err == 0) {
1412 int i;
1413
1414 for (i = 0; i < port_count; i++)
1415 if (port_mask & (1 << i))
1416 printf(" Port%d : - no device attached -\n", i);
1417 }
1418
1419 return err;
1420}
1421
120dc887 1422
155cbb4c 1423
120dc887
LM
1424static void print_found_intel_controllers(struct sys_dev *elem)
1425{
1426 for (; elem; elem = elem->next) {
1427 fprintf(stderr, Name ": found Intel(R) ");
1428 if (elem->type == SYS_DEV_SATA)
1429 fprintf(stderr, "SATA ");
155cbb4c
LM
1430 else if (elem->type == SYS_DEV_SAS)
1431 fprintf(stderr, "SAS ");
120dc887
LM
1432 fprintf(stderr, "RAID controller");
1433 if (elem->pci_id)
1434 fprintf(stderr, " at %s", elem->pci_id);
1435 fprintf(stderr, ".\n");
1436 }
1437 fflush(stderr);
1438}
1439
120dc887
LM
1440static int ahci_get_port_count(const char *hba_path, int *port_count)
1441{
1442 struct dirent *ent;
1443 DIR *dir;
1444 int host_base = -1;
1445
1446 *port_count = 0;
1447 if ((dir = opendir(hba_path)) == NULL)
1448 return -1;
1449
1450 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1451 int host;
1452
1453 if (sscanf(ent->d_name, "host%d", &host) != 1)
1454 continue;
1455 if (*port_count == 0)
1456 host_base = host;
1457 else if (host < host_base)
1458 host_base = host;
1459
1460 if (host + 1 > *port_count + host_base)
1461 *port_count = host + 1 - host_base;
1462 }
1463 closedir(dir);
1464 return host_base;
1465}
1466
a891a3c2
LM
1467static void print_imsm_capability(const struct imsm_orom *orom)
1468{
1469 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1470 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1471 orom->hotfix_ver, orom->build);
1472 printf(" RAID Levels :%s%s%s%s%s\n",
1473 imsm_orom_has_raid0(orom) ? " raid0" : "",
1474 imsm_orom_has_raid1(orom) ? " raid1" : "",
1475 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1476 imsm_orom_has_raid10(orom) ? " raid10" : "",
1477 imsm_orom_has_raid5(orom) ? " raid5" : "");
1478 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1479 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1480 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1481 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1482 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1483 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1484 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1485 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1486 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1487 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1488 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1489 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1490 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1491 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1492 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1493 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1494 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1495 printf(" Max Disks : %d\n", orom->tds);
1496 printf(" Max Volumes : %d\n", orom->vpa);
1497 return;
1498}
1499
5615172f 1500static int detail_platform_imsm(int verbose, int enumerate_only)
d665cc31
DW
1501{
1502 /* There are two components to imsm platform support, the ahci SATA
1503 * controller and the option-rom. To find the SATA controller we
1504 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1505 * controller with the Intel vendor id is present. This approach
1506 * allows mdadm to leverage the kernel's ahci detection logic, with the
1507 * caveat that if ahci.ko is not loaded mdadm will not be able to
1508 * detect platform raid capabilities. The option-rom resides in a
1509 * platform "Adapter ROM". We scan for its signature to retrieve the
1510 * platform capabilities. If raid support is disabled in the BIOS the
1511 * option-rom capability structure will not be available.
1512 */
1513 const struct imsm_orom *orom;
1514 struct sys_dev *list, *hba;
d665cc31
DW
1515 int host_base = 0;
1516 int port_count = 0;
120dc887 1517 int result=0;
d665cc31 1518
5615172f 1519 if (enumerate_only) {
a891a3c2 1520 if (check_env("IMSM_NO_PLATFORM"))
5615172f 1521 return 0;
a891a3c2
LM
1522 list = find_intel_devices();
1523 if (!list)
1524 return 2;
1525 for (hba = list; hba; hba = hba->next) {
1526 orom = find_imsm_capability(hba->type);
1527 if (!orom) {
1528 result = 2;
1529 break;
1530 }
1531 }
1532 free_sys_dev(&list);
1533 return result;
5615172f
DW
1534 }
1535
155cbb4c
LM
1536 list = find_intel_devices();
1537 if (!list) {
d665cc31 1538 if (verbose)
155cbb4c
LM
1539 fprintf(stderr, Name ": no active Intel(R) RAID "
1540 "controller found.\n");
d665cc31
DW
1541 free_sys_dev(&list);
1542 return 2;
1543 } else if (verbose)
155cbb4c 1544 print_found_intel_controllers(list);
d665cc31 1545
a891a3c2
LM
1546 for (hba = list; hba; hba = hba->next) {
1547 orom = find_imsm_capability(hba->type);
1548 if (!orom)
1549 fprintf(stderr, Name ": imsm capabilities not found for controller: %s (type %s)\n",
1550 hba->path, get_sys_dev_type(hba->type));
1551 else
1552 print_imsm_capability(orom);
d665cc31
DW
1553 }
1554
120dc887
LM
1555 for (hba = list; hba; hba = hba->next) {
1556 printf(" I/O Controller : %s (%s)\n",
1557 hba->path, get_sys_dev_type(hba->type));
d665cc31 1558
120dc887
LM
1559 if (hba->type == SYS_DEV_SATA) {
1560 host_base = ahci_get_port_count(hba->path, &port_count);
1561 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1562 if (verbose)
1563 fprintf(stderr, Name ": failed to enumerate "
1564 "ports on SATA controller at %s.", hba->pci_id);
1565 result |= 2;
1566 }
1567 }
d665cc31 1568 }
155cbb4c 1569
120dc887
LM
1570 free_sys_dev(&list);
1571 return result;
d665cc31 1572}
cdddbdbc
DW
1573#endif
1574
1575static int match_home_imsm(struct supertype *st, char *homehost)
1576{
5115ca67
DW
1577 /* the imsm metadata format does not specify any host
1578 * identification information. We return -1 since we can never
1579 * confirm nor deny whether a given array is "meant" for this
148acb7b 1580 * host. We rely on compare_super and the 'family_num' fields to
5115ca67
DW
1581 * exclude member disks that do not belong, and we rely on
1582 * mdadm.conf to specify the arrays that should be assembled.
1583 * Auto-assembly may still pick up "foreign" arrays.
1584 */
cdddbdbc 1585
9362c1c8 1586 return -1;
cdddbdbc
DW
1587}
1588
1589static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1590{
51006d85
N
1591 /* The uuid returned here is used for:
1592 * uuid to put into bitmap file (Create, Grow)
1593 * uuid for backup header when saving critical section (Grow)
1594 * comparing uuids when re-adding a device into an array
1595 * In these cases the uuid required is that of the data-array,
1596 * not the device-set.
1597 * uuid to recognise same set when adding a missing device back
1598 * to an array. This is a uuid for the device-set.
1599 *
1600 * For each of these we can make do with a truncated
1601 * or hashed uuid rather than the original, as long as
1602 * everyone agrees.
1603 * In each case the uuid required is that of the data-array,
1604 * not the device-set.
43dad3d6 1605 */
51006d85
N
1606 /* imsm does not track uuid's so we synthesis one using sha1 on
1607 * - The signature (Which is constant for all imsm array, but no matter)
148acb7b 1608 * - the orig_family_num of the container
51006d85
N
1609 * - the index number of the volume
1610 * - the 'serial' number of the volume.
1611 * Hopefully these are all constant.
1612 */
1613 struct intel_super *super = st->sb;
43dad3d6 1614
51006d85
N
1615 char buf[20];
1616 struct sha1_ctx ctx;
1617 struct imsm_dev *dev = NULL;
148acb7b 1618 __u32 family_num;
51006d85 1619
148acb7b
DW
1620 /* some mdadm versions failed to set ->orig_family_num, in which
1621 * case fall back to ->family_num. orig_family_num will be
1622 * fixed up with the first metadata update.
1623 */
1624 family_num = super->anchor->orig_family_num;
1625 if (family_num == 0)
1626 family_num = super->anchor->family_num;
51006d85 1627 sha1_init_ctx(&ctx);
92bd8f8d 1628 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
148acb7b 1629 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
51006d85
N
1630 if (super->current_vol >= 0)
1631 dev = get_imsm_dev(super, super->current_vol);
1632 if (dev) {
1633 __u32 vol = super->current_vol;
1634 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1635 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1636 }
1637 sha1_finish_ctx(&ctx, buf);
1638 memcpy(uuid, buf, 4*4);
cdddbdbc
DW
1639}
1640
0d481d37 1641#if 0
4f5bc454
DW
1642static void
1643get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
cdddbdbc 1644{
cdddbdbc
DW
1645 __u8 *v = get_imsm_version(mpb);
1646 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1647 char major[] = { 0, 0, 0 };
1648 char minor[] = { 0 ,0, 0 };
1649 char patch[] = { 0, 0, 0 };
1650 char *ver_parse[] = { major, minor, patch };
1651 int i, j;
1652
1653 i = j = 0;
1654 while (*v != '\0' && v < end) {
1655 if (*v != '.' && j < 2)
1656 ver_parse[i][j++] = *v;
1657 else {
1658 i++;
1659 j = 0;
1660 }
1661 v++;
1662 }
1663
4f5bc454
DW
1664 *m = strtol(minor, NULL, 0);
1665 *p = strtol(patch, NULL, 0);
1666}
0d481d37 1667#endif
4f5bc454 1668
1e5c6983
DW
1669static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1670{
1671 /* migr_strip_size when repairing or initializing parity */
1672 struct imsm_map *map = get_imsm_map(dev, 0);
1673 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1674
1675 switch (get_imsm_raid_level(map)) {
1676 case 5:
1677 case 10:
1678 return chunk;
1679 default:
1680 return 128*1024 >> 9;
1681 }
1682}
1683
1684static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1685{
1686 /* migr_strip_size when rebuilding a degraded disk, no idea why
1687 * this is different than migr_strip_size_resync(), but it's good
1688 * to be compatible
1689 */
1690 struct imsm_map *map = get_imsm_map(dev, 1);
1691 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1692
1693 switch (get_imsm_raid_level(map)) {
1694 case 1:
1695 case 10:
1696 if (map->num_members % map->num_domains == 0)
1697 return 128*1024 >> 9;
1698 else
1699 return chunk;
1700 case 5:
1701 return max((__u32) 64*1024 >> 9, chunk);
1702 default:
1703 return 128*1024 >> 9;
1704 }
1705}
1706
1707static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1708{
1709 struct imsm_map *lo = get_imsm_map(dev, 0);
1710 struct imsm_map *hi = get_imsm_map(dev, 1);
1711 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1712 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1713
1714 return max((__u32) 1, hi_chunk / lo_chunk);
1715}
1716
1717static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1718{
1719 struct imsm_map *lo = get_imsm_map(dev, 0);
1720 int level = get_imsm_raid_level(lo);
1721
1722 if (level == 1 || level == 10) {
1723 struct imsm_map *hi = get_imsm_map(dev, 1);
1724
1725 return hi->num_domains;
1726 } else
1727 return num_stripes_per_unit_resync(dev);
1728}
1729
98130f40 1730static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1e5c6983
DW
1731{
1732 /* named 'imsm_' because raid0, raid1 and raid10
1733 * counter-intuitively have the same number of data disks
1734 */
98130f40 1735 struct imsm_map *map = get_imsm_map(dev, second_map);
1e5c6983
DW
1736
1737 switch (get_imsm_raid_level(map)) {
1738 case 0:
1739 case 1:
1740 case 10:
1741 return map->num_members;
1742 case 5:
1743 return map->num_members - 1;
1744 default:
1745 dprintf("%s: unsupported raid level\n", __func__);
1746 return 0;
1747 }
1748}
1749
1750static __u32 parity_segment_depth(struct imsm_dev *dev)
1751{
1752 struct imsm_map *map = get_imsm_map(dev, 0);
1753 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1754
1755 switch(get_imsm_raid_level(map)) {
1756 case 1:
1757 case 10:
1758 return chunk * map->num_domains;
1759 case 5:
1760 return chunk * map->num_members;
1761 default:
1762 return chunk;
1763 }
1764}
1765
1766static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1767{
1768 struct imsm_map *map = get_imsm_map(dev, 1);
1769 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1770 __u32 strip = block / chunk;
1771
1772 switch (get_imsm_raid_level(map)) {
1773 case 1:
1774 case 10: {
1775 __u32 vol_strip = (strip * map->num_domains) + 1;
1776 __u32 vol_stripe = vol_strip / map->num_members;
1777
1778 return vol_stripe * chunk + block % chunk;
1779 } case 5: {
1780 __u32 stripe = strip / (map->num_members - 1);
1781
1782 return stripe * chunk + block % chunk;
1783 }
1784 default:
1785 return 0;
1786 }
1787}
1788
c47b0ff6
AK
1789static __u64 blocks_per_migr_unit(struct intel_super *super,
1790 struct imsm_dev *dev)
1e5c6983
DW
1791{
1792 /* calculate the conversion factor between per member 'blocks'
1793 * (md/{resync,rebuild}_start) and imsm migration units, return
1794 * 0 for the 'not migrating' and 'unsupported migration' cases
1795 */
1796 if (!dev->vol.migr_state)
1797 return 0;
1798
1799 switch (migr_type(dev)) {
c47b0ff6
AK
1800 case MIGR_GEN_MIGR: {
1801 struct migr_record *migr_rec = super->migr_rec;
1802 return __le32_to_cpu(migr_rec->blocks_per_unit);
1803 }
1e5c6983
DW
1804 case MIGR_VERIFY:
1805 case MIGR_REPAIR:
1806 case MIGR_INIT: {
1807 struct imsm_map *map = get_imsm_map(dev, 0);
1808 __u32 stripes_per_unit;
1809 __u32 blocks_per_unit;
1810 __u32 parity_depth;
1811 __u32 migr_chunk;
1812 __u32 block_map;
1813 __u32 block_rel;
1814 __u32 segment;
1815 __u32 stripe;
1816 __u8 disks;
1817
1818 /* yes, this is really the translation of migr_units to
1819 * per-member blocks in the 'resync' case
1820 */
1821 stripes_per_unit = num_stripes_per_unit_resync(dev);
1822 migr_chunk = migr_strip_blocks_resync(dev);
98130f40 1823 disks = imsm_num_data_members(dev, 0);
1e5c6983
DW
1824 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1825 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1826 segment = blocks_per_unit / stripe;
1827 block_rel = blocks_per_unit - segment * stripe;
1828 parity_depth = parity_segment_depth(dev);
1829 block_map = map_migr_block(dev, block_rel);
1830 return block_map + parity_depth * segment;
1831 }
1832 case MIGR_REBUILD: {
1833 __u32 stripes_per_unit;
1834 __u32 migr_chunk;
1835
1836 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1837 migr_chunk = migr_strip_blocks_rebuild(dev);
1838 return migr_chunk * stripes_per_unit;
1839 }
1e5c6983
DW
1840 case MIGR_STATE_CHANGE:
1841 default:
1842 return 0;
1843 }
1844}
1845
c2c087e6
DW
1846static int imsm_level_to_layout(int level)
1847{
1848 switch (level) {
1849 case 0:
1850 case 1:
1851 return 0;
1852 case 5:
1853 case 6:
a380c027 1854 return ALGORITHM_LEFT_ASYMMETRIC;
c2c087e6 1855 case 10:
c92a2527 1856 return 0x102;
c2c087e6 1857 }
a18a888e 1858 return UnSet;
c2c087e6
DW
1859}
1860
8e59f3d8
AK
1861/*******************************************************************************
1862 * Function: read_imsm_migr_rec
1863 * Description: Function reads imsm migration record from last sector of disk
1864 * Parameters:
1865 * fd : disk descriptor
1866 * super : metadata info
1867 * Returns:
1868 * 0 : success,
1869 * -1 : fail
1870 ******************************************************************************/
1871static int read_imsm_migr_rec(int fd, struct intel_super *super)
1872{
1873 int ret_val = -1;
1874 unsigned long long dsize;
1875
1876 get_dev_size(fd, NULL, &dsize);
1877 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1878 fprintf(stderr,
1879 Name ": Cannot seek to anchor block: %s\n",
1880 strerror(errno));
1881 goto out;
1882 }
1883 if (read(fd, super->migr_rec_buf, 512) != 512) {
1884 fprintf(stderr,
1885 Name ": Cannot read migr record block: %s\n",
1886 strerror(errno));
1887 goto out;
1888 }
1889 ret_val = 0;
1890
1891out:
1892 return ret_val;
1893}
1894
1895/*******************************************************************************
1896 * Function: load_imsm_migr_rec
1897 * Description: Function reads imsm migration record (it is stored at the last
1898 * sector of disk)
1899 * Parameters:
1900 * super : imsm internal array info
1901 * info : general array info
1902 * Returns:
1903 * 0 : success
1904 * -1 : fail
1905 ******************************************************************************/
1906static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
1907{
1908 struct mdinfo *sd;
1909 struct dl *dl = NULL;
1910 char nm[30];
1911 int retval = -1;
1912 int fd = -1;
1913
1914 if (info) {
1915 for (sd = info->devs ; sd ; sd = sd->next) {
1916 /* read only from one of the first two slots */
1917 if ((sd->disk.raid_disk > 1) ||
1918 (sd->disk.raid_disk < 0))
1919 continue;
1920 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1921 fd = dev_open(nm, O_RDONLY);
1922 if (fd >= 0)
1923 break;
1924 }
1925 }
1926 if (fd < 0) {
1927 for (dl = super->disks; dl; dl = dl->next) {
1928 /* read only from one of the first two slots */
1929 if (dl->index > 1)
1930 continue;
1931 sprintf(nm, "%d:%d", dl->major, dl->minor);
1932 fd = dev_open(nm, O_RDONLY);
1933 if (fd >= 0)
1934 break;
1935 }
1936 }
1937 if (fd < 0)
1938 goto out;
1939 retval = read_imsm_migr_rec(fd, super);
1940
1941out:
1942 if (fd >= 0)
1943 close(fd);
1944 return retval;
1945}
1946
c17608ea
AK
1947/*******************************************************************************
1948 * function: imsm_create_metadata_checkpoint_update
1949 * Description: It creates update for checkpoint change.
1950 * Parameters:
1951 * super : imsm internal array info
1952 * u : pointer to prepared update
1953 * Returns:
1954 * Uptate length.
1955 * If length is equal to 0, input pointer u contains no update
1956 ******************************************************************************/
1957static int imsm_create_metadata_checkpoint_update(
1958 struct intel_super *super,
1959 struct imsm_update_general_migration_checkpoint **u)
1960{
1961
1962 int update_memory_size = 0;
1963
1964 dprintf("imsm_create_metadata_checkpoint_update(enter)\n");
1965
1966 if (u == NULL)
1967 return 0;
1968 *u = NULL;
1969
1970 /* size of all update data without anchor */
1971 update_memory_size =
1972 sizeof(struct imsm_update_general_migration_checkpoint);
1973
1974 *u = calloc(1, update_memory_size);
1975 if (*u == NULL) {
1976 dprintf("error: cannot get memory for "
1977 "imsm_create_metadata_checkpoint_update update\n");
1978 return 0;
1979 }
1980 (*u)->type = update_general_migration_checkpoint;
1981 (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
1982 dprintf("imsm_create_metadata_checkpoint_update: prepared for %u\n",
1983 (*u)->curr_migr_unit);
1984
1985 return update_memory_size;
1986}
1987
1988
1989static void imsm_update_metadata_locally(struct supertype *st,
1990 void *buf, int len);
1991
687629c2
AK
1992/*******************************************************************************
1993 * Function: write_imsm_migr_rec
1994 * Description: Function writes imsm migration record
1995 * (at the last sector of disk)
1996 * Parameters:
1997 * super : imsm internal array info
1998 * Returns:
1999 * 0 : success
2000 * -1 : if fail
2001 ******************************************************************************/
2002static int write_imsm_migr_rec(struct supertype *st)
2003{
2004 struct intel_super *super = st->sb;
2005 unsigned long long dsize;
2006 char nm[30];
2007 int fd = -1;
2008 int retval = -1;
2009 struct dl *sd;
c17608ea
AK
2010 int len;
2011 struct imsm_update_general_migration_checkpoint *u;
687629c2
AK
2012
2013 for (sd = super->disks ; sd ; sd = sd->next) {
2014 /* write to 2 first slots only */
2015 if ((sd->index < 0) || (sd->index > 1))
2016 continue;
2017 sprintf(nm, "%d:%d", sd->major, sd->minor);
2018 fd = dev_open(nm, O_RDWR);
2019 if (fd < 0)
2020 continue;
2021 get_dev_size(fd, NULL, &dsize);
2022 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
2023 fprintf(stderr,
2024 Name ": Cannot seek to anchor block: %s\n",
2025 strerror(errno));
2026 goto out;
2027 }
2028 if (write(fd, super->migr_rec_buf, 512) != 512) {
2029 fprintf(stderr,
2030 Name ": Cannot write migr record block: %s\n",
2031 strerror(errno));
2032 goto out;
2033 }
2034 close(fd);
2035 fd = -1;
2036 }
c17608ea
AK
2037 /* update checkpoint information in metadata */
2038 len = imsm_create_metadata_checkpoint_update(super, &u);
2039
2040 if (len <= 0) {
2041 dprintf("imsm: Cannot prepare update\n");
2042 goto out;
2043 }
2044 /* update metadata locally */
2045 imsm_update_metadata_locally(st, u, len);
2046 /* and possibly remotely */
2047 if (st->update_tail) {
2048 append_metadata_update(st, u, len);
2049 /* during reshape we do all work inside metadata handler
2050 * manage_reshape(), so metadata update has to be triggered
2051 * insida it
2052 */
2053 flush_metadata_updates(st);
2054 st->update_tail = &st->updates;
2055 } else
2056 free(u);
687629c2
AK
2057
2058 retval = 0;
2059 out:
2060 if (fd >= 0)
2061 close(fd);
2062 return retval;
2063}
2064
a5d85af7 2065static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
bf5a934a
DW
2066{
2067 struct intel_super *super = st->sb;
c47b0ff6 2068 struct migr_record *migr_rec = super->migr_rec;
949c47a0 2069 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
a965f303 2070 struct imsm_map *map = get_imsm_map(dev, 0);
81ac8b4d 2071 struct imsm_map *prev_map = get_imsm_map(dev, 1);
b335e593 2072 struct imsm_map *map_to_analyse = map;
efb30e7f 2073 struct dl *dl;
e207da2f 2074 char *devname;
139dae11 2075 unsigned int component_size_alligment;
a5d85af7 2076 int map_disks = info->array.raid_disks;
bf5a934a 2077
95eeceeb 2078 memset(info, 0, sizeof(*info));
b335e593
AK
2079 if (prev_map)
2080 map_to_analyse = prev_map;
2081
efb30e7f
DW
2082 for (dl = super->disks; dl; dl = dl->next)
2083 if (dl->raiddisk == info->disk.raid_disk)
2084 break;
bf5a934a 2085 info->container_member = super->current_vol;
cd0430a1 2086 info->array.raid_disks = map->num_members;
b335e593 2087 info->array.level = get_imsm_raid_level(map_to_analyse);
bf5a934a
DW
2088 info->array.layout = imsm_level_to_layout(info->array.level);
2089 info->array.md_minor = -1;
2090 info->array.ctime = 0;
2091 info->array.utime = 0;
b335e593
AK
2092 info->array.chunk_size =
2093 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
301406c9 2094 info->array.state = !dev->vol.dirty;
da9b4a62
DW
2095 info->custom_array_size = __le32_to_cpu(dev->size_high);
2096 info->custom_array_size <<= 32;
2097 info->custom_array_size |= __le32_to_cpu(dev->size_low);
3f83228a
N
2098 if (prev_map && map->map_state == prev_map->map_state) {
2099 info->reshape_active = 1;
b335e593
AK
2100 info->new_level = get_imsm_raid_level(map);
2101 info->new_layout = imsm_level_to_layout(info->new_level);
2102 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
3f83228a 2103 info->delta_disks = map->num_members - prev_map->num_members;
493f5dd6
N
2104 if (info->delta_disks) {
2105 /* this needs to be applied to every array
2106 * in the container.
2107 */
2108 info->reshape_active = 2;
2109 }
3f83228a
N
2110 /* We shape information that we give to md might have to be
2111 * modify to cope with md's requirement for reshaping arrays.
2112 * For example, when reshaping a RAID0, md requires it to be
2113 * presented as a degraded RAID4.
2114 * Also if a RAID0 is migrating to a RAID5 we need to specify
2115 * the array as already being RAID5, but the 'before' layout
2116 * is a RAID4-like layout.
2117 */
2118 switch (info->array.level) {
2119 case 0:
2120 switch(info->new_level) {
2121 case 0:
2122 /* conversion is happening as RAID4 */
2123 info->array.level = 4;
2124 info->array.raid_disks += 1;
2125 break;
2126 case 5:
2127 /* conversion is happening as RAID5 */
2128 info->array.level = 5;
2129 info->array.layout = ALGORITHM_PARITY_N;
2130 info->array.raid_disks += 1;
2131 info->delta_disks -= 1;
2132 break;
2133 default:
2134 /* FIXME error message */
2135 info->array.level = UnSet;
2136 break;
2137 }
2138 break;
2139 }
b335e593
AK
2140 } else {
2141 info->new_level = UnSet;
2142 info->new_layout = UnSet;
2143 info->new_chunk = info->array.chunk_size;
3f83228a 2144 info->delta_disks = 0;
b335e593 2145 }
301406c9
DW
2146 info->disk.major = 0;
2147 info->disk.minor = 0;
efb30e7f
DW
2148 if (dl) {
2149 info->disk.major = dl->major;
2150 info->disk.minor = dl->minor;
2151 }
bf5a934a 2152
b335e593
AK
2153 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
2154 info->component_size =
2155 __le32_to_cpu(map_to_analyse->blocks_per_member);
139dae11
AK
2156
2157 /* check component size aligment
2158 */
2159 component_size_alligment =
2160 info->component_size % (info->array.chunk_size/512);
2161
2162 if (component_size_alligment &&
2163 (info->array.level != 1) && (info->array.level != UnSet)) {
2164 dprintf("imsm: reported component size alligned from %llu ",
2165 info->component_size);
2166 info->component_size -= component_size_alligment;
2167 dprintf("to %llu (%i).\n",
2168 info->component_size, component_size_alligment);
2169 }
2170
301406c9 2171 memset(info->uuid, 0, sizeof(info->uuid));
921d9e16 2172 info->recovery_start = MaxSector;
bf5a934a 2173
d2e6d5d6 2174 info->reshape_progress = 0;
b6796ce1 2175 info->resync_start = MaxSector;
b335e593
AK
2176 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
2177 dev->vol.dirty) {
301406c9 2178 info->resync_start = 0;
b6796ce1
AK
2179 }
2180 if (dev->vol.migr_state) {
1e5c6983
DW
2181 switch (migr_type(dev)) {
2182 case MIGR_REPAIR:
2183 case MIGR_INIT: {
c47b0ff6
AK
2184 __u64 blocks_per_unit = blocks_per_migr_unit(super,
2185 dev);
1e5c6983
DW
2186 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
2187
2188 info->resync_start = blocks_per_unit * units;
2189 break;
2190 }
d2e6d5d6 2191 case MIGR_GEN_MIGR: {
c47b0ff6
AK
2192 __u64 blocks_per_unit = blocks_per_migr_unit(super,
2193 dev);
2194 __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
04fa9523
AK
2195 unsigned long long array_blocks;
2196 int used_disks;
d2e6d5d6
AK
2197
2198 info->reshape_progress = blocks_per_unit * units;
6289d1e0 2199
d2e6d5d6
AK
2200 dprintf("IMSM: General Migration checkpoint : %llu "
2201 "(%llu) -> read reshape progress : %llu\n",
2202 units, blocks_per_unit, info->reshape_progress);
75156c46
AK
2203
2204 used_disks = imsm_num_data_members(dev, 1);
2205 if (used_disks > 0) {
2206 array_blocks = map->blocks_per_member *
2207 used_disks;
2208 /* round array size down to closest MB
2209 */
2210 info->custom_array_size = (array_blocks
2211 >> SECT_PER_MB_SHIFT)
2212 << SECT_PER_MB_SHIFT;
2213 }
d2e6d5d6 2214 }
1e5c6983
DW
2215 case MIGR_VERIFY:
2216 /* we could emulate the checkpointing of
2217 * 'sync_action=check' migrations, but for now
2218 * we just immediately complete them
2219 */
2220 case MIGR_REBUILD:
2221 /* this is handled by container_content_imsm() */
1e5c6983
DW
2222 case MIGR_STATE_CHANGE:
2223 /* FIXME handle other migrations */
2224 default:
2225 /* we are not dirty, so... */
2226 info->resync_start = MaxSector;
2227 }
b6796ce1 2228 }
301406c9
DW
2229
2230 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
2231 info->name[MAX_RAID_SERIAL_LEN] = 0;
bf5a934a 2232
f35f2525
N
2233 info->array.major_version = -1;
2234 info->array.minor_version = -2;
e207da2f
AW
2235 devname = devnum2devname(st->container_dev);
2236 *info->text_version = '\0';
2237 if (devname)
2238 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
2239 free(devname);
a67dd8cc 2240 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
51006d85 2241 uuid_from_super_imsm(st, info->uuid);
a5d85af7
N
2242
2243 if (dmap) {
2244 int i, j;
2245 for (i=0; i<map_disks; i++) {
2246 dmap[i] = 0;
2247 if (i < info->array.raid_disks) {
2248 struct imsm_disk *dsk;
98130f40 2249 j = get_imsm_disk_idx(dev, i, -1);
a5d85af7
N
2250 dsk = get_imsm_disk(super, j);
2251 if (dsk && (dsk->status & CONFIGURED_DISK))
2252 dmap[i] = 1;
2253 }
2254 }
2255 }
81ac8b4d 2256}
bf5a934a 2257
97b4d0e9
DW
2258static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
2259static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
2260
2261static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
2262{
2263 struct dl *d;
2264
2265 for (d = super->missing; d; d = d->next)
2266 if (d->index == index)
2267 return &d->disk;
2268 return NULL;
2269}
2270
a5d85af7 2271static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
4f5bc454
DW
2272{
2273 struct intel_super *super = st->sb;
4f5bc454 2274 struct imsm_disk *disk;
a5d85af7 2275 int map_disks = info->array.raid_disks;
ab3cb6b3
N
2276 int max_enough = -1;
2277 int i;
2278 struct imsm_super *mpb;
4f5bc454 2279
bf5a934a 2280 if (super->current_vol >= 0) {
a5d85af7 2281 getinfo_super_imsm_volume(st, info, map);
bf5a934a
DW
2282 return;
2283 }
95eeceeb 2284 memset(info, 0, sizeof(*info));
d23fe947
DW
2285
2286 /* Set raid_disks to zero so that Assemble will always pull in valid
2287 * spares
2288 */
2289 info->array.raid_disks = 0;
cdddbdbc
DW
2290 info->array.level = LEVEL_CONTAINER;
2291 info->array.layout = 0;
2292 info->array.md_minor = -1;
c2c087e6 2293 info->array.ctime = 0; /* N/A for imsm */
cdddbdbc
DW
2294 info->array.utime = 0;
2295 info->array.chunk_size = 0;
2296
2297 info->disk.major = 0;
2298 info->disk.minor = 0;
cdddbdbc 2299 info->disk.raid_disk = -1;
c2c087e6 2300 info->reshape_active = 0;
f35f2525
N
2301 info->array.major_version = -1;
2302 info->array.minor_version = -2;
c2c087e6 2303 strcpy(info->text_version, "imsm");
a67dd8cc 2304 info->safe_mode_delay = 0;
c2c087e6
DW
2305 info->disk.number = -1;
2306 info->disk.state = 0;
c5afc314 2307 info->name[0] = 0;
921d9e16 2308 info->recovery_start = MaxSector;
c2c087e6 2309
97b4d0e9 2310 /* do we have the all the insync disks that we expect? */
ab3cb6b3 2311 mpb = super->anchor;
97b4d0e9 2312
ab3cb6b3
N
2313 for (i = 0; i < mpb->num_raid_devs; i++) {
2314 struct imsm_dev *dev = get_imsm_dev(super, i);
2315 int failed, enough, j, missing = 0;
2316 struct imsm_map *map;
2317 __u8 state;
97b4d0e9 2318
ab3cb6b3
N
2319 failed = imsm_count_failed(super, dev);
2320 state = imsm_check_degraded(super, dev, failed);
2321 map = get_imsm_map(dev, dev->vol.migr_state);
2322
2323 /* any newly missing disks?
2324 * (catches single-degraded vs double-degraded)
2325 */
2326 for (j = 0; j < map->num_members; j++) {
98130f40 2327 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
ab3cb6b3
N
2328 __u32 idx = ord_to_idx(ord);
2329
2330 if (!(ord & IMSM_ORD_REBUILD) &&
2331 get_imsm_missing(super, idx)) {
2332 missing = 1;
2333 break;
2334 }
97b4d0e9 2335 }
ab3cb6b3
N
2336
2337 if (state == IMSM_T_STATE_FAILED)
2338 enough = -1;
2339 else if (state == IMSM_T_STATE_DEGRADED &&
2340 (state != map->map_state || missing))
2341 enough = 0;
2342 else /* we're normal, or already degraded */
2343 enough = 1;
2344
2345 /* in the missing/failed disk case check to see
2346 * if at least one array is runnable
2347 */
2348 max_enough = max(max_enough, enough);
2349 }
2350 dprintf("%s: enough: %d\n", __func__, max_enough);
2351 info->container_enough = max_enough;
97b4d0e9 2352
4a04ec6c 2353 if (super->disks) {
14e8215b
DW
2354 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2355
b9f594fe 2356 disk = &super->disks->disk;
14e8215b
DW
2357 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
2358 info->component_size = reserved;
25ed7e59 2359 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
df474657
DW
2360 /* we don't change info->disk.raid_disk here because
2361 * this state will be finalized in mdmon after we have
2362 * found the 'most fresh' version of the metadata
2363 */
25ed7e59
DW
2364 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2365 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
cdddbdbc 2366 }
a575e2a7
DW
2367
2368 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2369 * ->compare_super may have updated the 'num_raid_devs' field for spares
2370 */
2371 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
36ba7d48 2372 uuid_from_super_imsm(st, info->uuid);
22e263f6
AC
2373 else
2374 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
a5d85af7
N
2375
2376 /* I don't know how to compute 'map' on imsm, so use safe default */
2377 if (map) {
2378 int i;
2379 for (i = 0; i < map_disks; i++)
2380 map[i] = 1;
2381 }
2382
cdddbdbc
DW
2383}
2384
5c4cd5da
AC
2385/* allocates memory and fills disk in mdinfo structure
2386 * for each disk in array */
2387struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2388{
2389 struct mdinfo *mddev = NULL;
2390 struct intel_super *super = st->sb;
2391 struct imsm_disk *disk;
2392 int count = 0;
2393 struct dl *dl;
2394 if (!super || !super->disks)
2395 return NULL;
2396 dl = super->disks;
2397 mddev = malloc(sizeof(*mddev));
2398 if (!mddev) {
2399 fprintf(stderr, Name ": Failed to allocate memory.\n");
2400 return NULL;
2401 }
2402 memset(mddev, 0, sizeof(*mddev));
2403 while (dl) {
2404 struct mdinfo *tmp;
2405 disk = &dl->disk;
2406 tmp = malloc(sizeof(*tmp));
2407 if (!tmp) {
2408 fprintf(stderr, Name ": Failed to allocate memory.\n");
2409 if (mddev)
2410 sysfs_free(mddev);
2411 return NULL;
2412 }
2413 memset(tmp, 0, sizeof(*tmp));
2414 if (mddev->devs)
2415 tmp->next = mddev->devs;
2416 mddev->devs = tmp;
2417 tmp->disk.number = count++;
2418 tmp->disk.major = dl->major;
2419 tmp->disk.minor = dl->minor;
2420 tmp->disk.state = is_configured(disk) ?
2421 (1 << MD_DISK_ACTIVE) : 0;
2422 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2423 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2424 tmp->disk.raid_disk = -1;
2425 dl = dl->next;
2426 }
2427 return mddev;
2428}
2429
cdddbdbc
DW
2430static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2431 char *update, char *devname, int verbose,
2432 int uuid_set, char *homehost)
2433{
f352c545
DW
2434 /* For 'assemble' and 'force' we need to return non-zero if any
2435 * change was made. For others, the return value is ignored.
2436 * Update options are:
2437 * force-one : This device looks a bit old but needs to be included,
2438 * update age info appropriately.
2439 * assemble: clear any 'faulty' flag to allow this device to
2440 * be assembled.
2441 * force-array: Array is degraded but being forced, mark it clean
2442 * if that will be needed to assemble it.
2443 *
2444 * newdev: not used ????
2445 * grow: Array has gained a new device - this is currently for
2446 * linear only
2447 * resync: mark as dirty so a resync will happen.
2448 * name: update the name - preserving the homehost
6e46bf34 2449 * uuid: Change the uuid of the array to match watch is given
f352c545
DW
2450 *
2451 * Following are not relevant for this imsm:
2452 * sparc2.2 : update from old dodgey metadata
2453 * super-minor: change the preferred_minor number
2454 * summaries: update redundant counters.
f352c545
DW
2455 * homehost: update the recorded homehost
2456 * _reshape_progress: record new reshape_progress position.
2457 */
6e46bf34
DW
2458 int rv = 1;
2459 struct intel_super *super = st->sb;
2460 struct imsm_super *mpb;
f352c545 2461
6e46bf34
DW
2462 /* we can only update container info */
2463 if (!super || super->current_vol >= 0 || !super->anchor)
2464 return 1;
2465
2466 mpb = super->anchor;
2467
2468 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
1e2b2765 2469 rv = -1;
6e46bf34
DW
2470 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2471 mpb->orig_family_num = *((__u32 *) info->update_private);
2472 rv = 0;
2473 } else if (strcmp(update, "uuid") == 0) {
2474 __u32 *new_family = malloc(sizeof(*new_family));
2475
2476 /* update orig_family_number with the incoming random
2477 * data, report the new effective uuid, and store the
2478 * new orig_family_num for future updates.
2479 */
2480 if (new_family) {
2481 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2482 uuid_from_super_imsm(st, info->uuid);
2483 *new_family = mpb->orig_family_num;
2484 info->update_private = new_family;
2485 rv = 0;
2486 }
2487 } else if (strcmp(update, "assemble") == 0)
2488 rv = 0;
2489 else
1e2b2765 2490 rv = -1;
f352c545 2491
6e46bf34
DW
2492 /* successful update? recompute checksum */
2493 if (rv == 0)
2494 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
f352c545
DW
2495
2496 return rv;
cdddbdbc
DW
2497}
2498
c2c087e6 2499static size_t disks_to_mpb_size(int disks)
cdddbdbc 2500{
c2c087e6 2501 size_t size;
cdddbdbc 2502
c2c087e6
DW
2503 size = sizeof(struct imsm_super);
2504 size += (disks - 1) * sizeof(struct imsm_disk);
2505 size += 2 * sizeof(struct imsm_dev);
2506 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2507 size += (4 - 2) * sizeof(struct imsm_map);
2508 /* 4 possible disk_ord_tbl's */
2509 size += 4 * (disks - 1) * sizeof(__u32);
2510
2511 return size;
2512}
2513
2514static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2515{
2516 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2517 return 0;
2518
2519 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
cdddbdbc
DW
2520}
2521
ba2de7ba
DW
2522static void free_devlist(struct intel_super *super)
2523{
2524 struct intel_dev *dv;
2525
2526 while (super->devlist) {
2527 dv = super->devlist->next;
2528 free(super->devlist->dev);
2529 free(super->devlist);
2530 super->devlist = dv;
2531 }
2532}
2533
2534static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2535{
2536 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2537}
2538
cdddbdbc
DW
2539static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2540{
2541 /*
2542 * return:
2543 * 0 same, or first was empty, and second was copied
2544 * 1 second had wrong number
2545 * 2 wrong uuid
2546 * 3 wrong other info
2547 */
2548 struct intel_super *first = st->sb;
2549 struct intel_super *sec = tst->sb;
2550
2551 if (!first) {
2552 st->sb = tst->sb;
2553 tst->sb = NULL;
2554 return 0;
2555 }
8603ea6f
LM
2556 /* in platform dependent environment test if the disks
2557 * use the same Intel hba
2558 */
2559 if (!check_env("IMSM_NO_PLATFORM")) {
ea2bc72b
LM
2560 if (!first->hba || !sec->hba ||
2561 (first->hba->type != sec->hba->type)) {
8603ea6f
LM
2562 fprintf(stderr,
2563 "HBAs of devices does not match %s != %s\n",
ea2bc72b
LM
2564 first->hba ? get_sys_dev_type(first->hba->type) : NULL,
2565 sec->hba ? get_sys_dev_type(sec->hba->type) : NULL);
8603ea6f
LM
2566 return 3;
2567 }
2568 }
cdddbdbc 2569
d23fe947
DW
2570 /* if an anchor does not have num_raid_devs set then it is a free
2571 * floating spare
2572 */
2573 if (first->anchor->num_raid_devs > 0 &&
2574 sec->anchor->num_raid_devs > 0) {
a2b97981
DW
2575 /* Determine if these disks might ever have been
2576 * related. Further disambiguation can only take place
2577 * in load_super_imsm_all
2578 */
2579 __u32 first_family = first->anchor->orig_family_num;
2580 __u32 sec_family = sec->anchor->orig_family_num;
2581
f796af5d
DW
2582 if (memcmp(first->anchor->sig, sec->anchor->sig,
2583 MAX_SIGNATURE_LENGTH) != 0)
2584 return 3;
2585
a2b97981
DW
2586 if (first_family == 0)
2587 first_family = first->anchor->family_num;
2588 if (sec_family == 0)
2589 sec_family = sec->anchor->family_num;
2590
2591 if (first_family != sec_family)
d23fe947 2592 return 3;
f796af5d 2593
d23fe947 2594 }
cdddbdbc 2595
f796af5d 2596
3e372e5a
DW
2597 /* if 'first' is a spare promote it to a populated mpb with sec's
2598 * family number
2599 */
2600 if (first->anchor->num_raid_devs == 0 &&
2601 sec->anchor->num_raid_devs > 0) {
78d30f94 2602 int i;
ba2de7ba
DW
2603 struct intel_dev *dv;
2604 struct imsm_dev *dev;
78d30f94
DW
2605
2606 /* we need to copy raid device info from sec if an allocation
2607 * fails here we don't associate the spare
2608 */
2609 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
ba2de7ba
DW
2610 dv = malloc(sizeof(*dv));
2611 if (!dv)
2612 break;
2613 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2614 if (!dev) {
2615 free(dv);
2616 break;
78d30f94 2617 }
ba2de7ba
DW
2618 dv->dev = dev;
2619 dv->index = i;
2620 dv->next = first->devlist;
2621 first->devlist = dv;
78d30f94 2622 }
709743c5 2623 if (i < sec->anchor->num_raid_devs) {
ba2de7ba
DW
2624 /* allocation failure */
2625 free_devlist(first);
2626 fprintf(stderr, "imsm: failed to associate spare\n");
2627 return 3;
78d30f94 2628 }
3e372e5a 2629 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
148acb7b 2630 first->anchor->orig_family_num = sec->anchor->orig_family_num;
3e372e5a 2631 first->anchor->family_num = sec->anchor->family_num;
ac6449be 2632 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
709743c5
DW
2633 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2634 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
3e372e5a
DW
2635 }
2636
cdddbdbc
DW
2637 return 0;
2638}
2639
0030e8d6
DW
2640static void fd2devname(int fd, char *name)
2641{
2642 struct stat st;
2643 char path[256];
33a6535d 2644 char dname[PATH_MAX];
0030e8d6
DW
2645 char *nm;
2646 int rv;
2647
2648 name[0] = '\0';
2649 if (fstat(fd, &st) != 0)
2650 return;
2651 sprintf(path, "/sys/dev/block/%d:%d",
2652 major(st.st_rdev), minor(st.st_rdev));
2653
2654 rv = readlink(path, dname, sizeof(dname));
2655 if (rv <= 0)
2656 return;
2657
2658 dname[rv] = '\0';
2659 nm = strrchr(dname, '/');
2660 nm++;
2661 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2662}
2663
cdddbdbc
DW
2664extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2665
2666static int imsm_read_serial(int fd, char *devname,
2667 __u8 serial[MAX_RAID_SERIAL_LEN])
2668{
2669 unsigned char scsi_serial[255];
cdddbdbc
DW
2670 int rv;
2671 int rsp_len;
1f24f035 2672 int len;
316e2bf4
DW
2673 char *dest;
2674 char *src;
2675 char *rsp_buf;
2676 int i;
cdddbdbc
DW
2677
2678 memset(scsi_serial, 0, sizeof(scsi_serial));
cdddbdbc 2679
f9ba0ff1
DW
2680 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2681
40ebbb9c 2682 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
f9ba0ff1
DW
2683 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2684 fd2devname(fd, (char *) serial);
0030e8d6
DW
2685 return 0;
2686 }
2687
cdddbdbc
DW
2688 if (rv != 0) {
2689 if (devname)
2690 fprintf(stderr,
2691 Name ": Failed to retrieve serial for %s\n",
2692 devname);
2693 return rv;
2694 }
2695
2696 rsp_len = scsi_serial[3];
03cd4cc8
DW
2697 if (!rsp_len) {
2698 if (devname)
2699 fprintf(stderr,
2700 Name ": Failed to retrieve serial for %s\n",
2701 devname);
2702 return 2;
2703 }
1f24f035 2704 rsp_buf = (char *) &scsi_serial[4];
5c3db629 2705
316e2bf4
DW
2706 /* trim all whitespace and non-printable characters and convert
2707 * ':' to ';'
2708 */
2709 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2710 src = &rsp_buf[i];
2711 if (*src > 0x20) {
2712 /* ':' is reserved for use in placeholder serial
2713 * numbers for missing disks
2714 */
2715 if (*src == ':')
2716 *dest++ = ';';
2717 else
2718 *dest++ = *src;
2719 }
2720 }
2721 len = dest - rsp_buf;
2722 dest = rsp_buf;
2723
2724 /* truncate leading characters */
2725 if (len > MAX_RAID_SERIAL_LEN) {
2726 dest += len - MAX_RAID_SERIAL_LEN;
1f24f035 2727 len = MAX_RAID_SERIAL_LEN;
316e2bf4 2728 }
5c3db629 2729
5c3db629 2730 memset(serial, 0, MAX_RAID_SERIAL_LEN);
316e2bf4 2731 memcpy(serial, dest, len);
cdddbdbc
DW
2732
2733 return 0;
2734}
2735
1f24f035
DW
2736static int serialcmp(__u8 *s1, __u8 *s2)
2737{
2738 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2739}
2740
2741static void serialcpy(__u8 *dest, __u8 *src)
2742{
2743 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2744}
2745
1799c9e8 2746#ifndef MDASSEMBLE
54c2c1ea
DW
2747static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2748{
2749 struct dl *dl;
2750
2751 for (dl = super->disks; dl; dl = dl->next)
2752 if (serialcmp(dl->serial, serial) == 0)
2753 break;
2754
2755 return dl;
2756}
1799c9e8 2757#endif
54c2c1ea 2758
a2b97981
DW
2759static struct imsm_disk *
2760__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2761{
2762 int i;
2763
2764 for (i = 0; i < mpb->num_disks; i++) {
2765 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2766
2767 if (serialcmp(disk->serial, serial) == 0) {
2768 if (idx)
2769 *idx = i;
2770 return disk;
2771 }
2772 }
2773
2774 return NULL;
2775}
2776
cdddbdbc
DW
2777static int
2778load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2779{
a2b97981 2780 struct imsm_disk *disk;
cdddbdbc
DW
2781 struct dl *dl;
2782 struct stat stb;
cdddbdbc 2783 int rv;
a2b97981 2784 char name[40];
d23fe947
DW
2785 __u8 serial[MAX_RAID_SERIAL_LEN];
2786
2787 rv = imsm_read_serial(fd, devname, serial);
2788
2789 if (rv != 0)
2790 return 2;
2791
a2b97981 2792 dl = calloc(1, sizeof(*dl));
b9f594fe 2793 if (!dl) {
cdddbdbc
DW
2794 if (devname)
2795 fprintf(stderr,
2796 Name ": failed to allocate disk buffer for %s\n",
2797 devname);
2798 return 2;
2799 }
cdddbdbc 2800
a2b97981
DW
2801 fstat(fd, &stb);
2802 dl->major = major(stb.st_rdev);
2803 dl->minor = minor(stb.st_rdev);
2804 dl->next = super->disks;
2805 dl->fd = keep_fd ? fd : -1;
2806 assert(super->disks == NULL);
2807 super->disks = dl;
2808 serialcpy(dl->serial, serial);
2809 dl->index = -2;
2810 dl->e = NULL;
2811 fd2devname(fd, name);
2812 if (devname)
2813 dl->devname = strdup(devname);
2814 else
2815 dl->devname = strdup(name);
cdddbdbc 2816
d23fe947 2817 /* look up this disk's index in the current anchor */
a2b97981
DW
2818 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2819 if (disk) {
2820 dl->disk = *disk;
2821 /* only set index on disks that are a member of a
2822 * populated contianer, i.e. one with raid_devs
2823 */
2824 if (is_failed(&dl->disk))
3f6efecc 2825 dl->index = -2;
a2b97981
DW
2826 else if (is_spare(&dl->disk))
2827 dl->index = -1;
3f6efecc
DW
2828 }
2829
949c47a0
DW
2830 return 0;
2831}
2832
0e600426 2833#ifndef MDASSEMBLE
0c046afd
DW
2834/* When migrating map0 contains the 'destination' state while map1
2835 * contains the current state. When not migrating map0 contains the
2836 * current state. This routine assumes that map[0].map_state is set to
2837 * the current array state before being called.
2838 *
2839 * Migration is indicated by one of the following states
2840 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
e3bba0e0 2841 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
0c046afd 2842 * map1state=unitialized)
1484e727 2843 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
0c046afd 2844 * map1state=normal)
e3bba0e0 2845 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
0c046afd 2846 * map1state=degraded)
8e59f3d8
AK
2847 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2848 * map1state=normal)
0c046afd 2849 */
8e59f3d8
AK
2850static void migrate(struct imsm_dev *dev, struct intel_super *super,
2851 __u8 to_state, int migr_type)
3393c6af 2852{
0c046afd 2853 struct imsm_map *dest;
3393c6af
DW
2854 struct imsm_map *src = get_imsm_map(dev, 0);
2855
0c046afd 2856 dev->vol.migr_state = 1;
1484e727 2857 set_migr_type(dev, migr_type);
f8f603f1 2858 dev->vol.curr_migr_unit = 0;
0c046afd
DW
2859 dest = get_imsm_map(dev, 1);
2860
0556e1a2 2861 /* duplicate and then set the target end state in map[0] */
3393c6af 2862 memcpy(dest, src, sizeof_imsm_map(src));
28bce06f
AK
2863 if ((migr_type == MIGR_REBUILD) ||
2864 (migr_type == MIGR_GEN_MIGR)) {
0556e1a2
DW
2865 __u32 ord;
2866 int i;
2867
2868 for (i = 0; i < src->num_members; i++) {
2869 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2870 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2871 }
2872 }
2873
8e59f3d8
AK
2874 if (migr_type == MIGR_GEN_MIGR)
2875 /* Clear migration record */
2876 memset(super->migr_rec, 0, sizeof(struct migr_record));
2877
0c046afd 2878 src->map_state = to_state;
949c47a0 2879}
f8f603f1
DW
2880
2881static void end_migration(struct imsm_dev *dev, __u8 map_state)
2882{
2883 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2 2884 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
28bce06f 2885 int i, j;
0556e1a2
DW
2886
2887 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2888 * completed in the last migration.
2889 *
28bce06f 2890 * FIXME add support for raid-level-migration
0556e1a2
DW
2891 */
2892 for (i = 0; i < prev->num_members; i++)
28bce06f
AK
2893 for (j = 0; j < map->num_members; j++)
2894 /* during online capacity expansion
2895 * disks position can be changed if takeover is used
2896 */
2897 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2898 ord_to_idx(prev->disk_ord_tbl[i])) {
2899 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2900 break;
2901 }
f8f603f1
DW
2902
2903 dev->vol.migr_state = 0;
28bce06f 2904 dev->vol.migr_type = 0;
f8f603f1
DW
2905 dev->vol.curr_migr_unit = 0;
2906 map->map_state = map_state;
2907}
0e600426 2908#endif
949c47a0
DW
2909
2910static int parse_raid_devices(struct intel_super *super)
2911{
2912 int i;
2913 struct imsm_dev *dev_new;
4d7b1503 2914 size_t len, len_migr;
401d313b 2915 size_t max_len = 0;
4d7b1503
DW
2916 size_t space_needed = 0;
2917 struct imsm_super *mpb = super->anchor;
949c47a0
DW
2918
2919 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2920 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
ba2de7ba 2921 struct intel_dev *dv;
949c47a0 2922
4d7b1503
DW
2923 len = sizeof_imsm_dev(dev_iter, 0);
2924 len_migr = sizeof_imsm_dev(dev_iter, 1);
2925 if (len_migr > len)
2926 space_needed += len_migr - len;
2927
ba2de7ba
DW
2928 dv = malloc(sizeof(*dv));
2929 if (!dv)
2930 return 1;
401d313b
AK
2931 if (max_len < len_migr)
2932 max_len = len_migr;
2933 if (max_len > len_migr)
2934 space_needed += max_len - len_migr;
2935 dev_new = malloc(max_len);
ba2de7ba
DW
2936 if (!dev_new) {
2937 free(dv);
949c47a0 2938 return 1;
ba2de7ba 2939 }
949c47a0 2940 imsm_copy_dev(dev_new, dev_iter);
ba2de7ba
DW
2941 dv->dev = dev_new;
2942 dv->index = i;
2943 dv->next = super->devlist;
2944 super->devlist = dv;
949c47a0 2945 }
cdddbdbc 2946
4d7b1503
DW
2947 /* ensure that super->buf is large enough when all raid devices
2948 * are migrating
2949 */
2950 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2951 void *buf;
2952
2953 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2954 if (posix_memalign(&buf, 512, len) != 0)
2955 return 1;
2956
1f45a8ad
DW
2957 memcpy(buf, super->buf, super->len);
2958 memset(buf + super->len, 0, len - super->len);
4d7b1503
DW
2959 free(super->buf);
2960 super->buf = buf;
2961 super->len = len;
2962 }
2963
cdddbdbc
DW
2964 return 0;
2965}
2966
604b746f
JD
2967/* retrieve a pointer to the bbm log which starts after all raid devices */
2968struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2969{
2970 void *ptr = NULL;
2971
2972 if (__le32_to_cpu(mpb->bbm_log_size)) {
2973 ptr = mpb;
2974 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2975 }
2976
2977 return ptr;
2978}
2979
e2f41b2c
AK
2980/*******************************************************************************
2981 * Function: check_mpb_migr_compatibility
2982 * Description: Function checks for unsupported migration features:
2983 * - migration optimization area (pba_of_lba0)
2984 * - descending reshape (ascending_migr)
2985 * Parameters:
2986 * super : imsm metadata information
2987 * Returns:
2988 * 0 : migration is compatible
2989 * -1 : migration is not compatible
2990 ******************************************************************************/
2991int check_mpb_migr_compatibility(struct intel_super *super)
2992{
2993 struct imsm_map *map0, *map1;
2994 struct migr_record *migr_rec = super->migr_rec;
2995 int i;
2996
2997 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2998 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2999
3000 if (dev_iter &&
3001 dev_iter->vol.migr_state == 1 &&
3002 dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
3003 /* This device is migrating */
3004 map0 = get_imsm_map(dev_iter, 0);
3005 map1 = get_imsm_map(dev_iter, 1);
3006 if (map0->pba_of_lba0 != map1->pba_of_lba0)
3007 /* migration optimization area was used */
3008 return -1;
3009 if (migr_rec->ascending_migr == 0
3010 && migr_rec->dest_depth_per_unit > 0)
3011 /* descending reshape not supported yet */
3012 return -1;
3013 }
3014 }
3015 return 0;
3016}
3017
d23fe947 3018static void __free_imsm(struct intel_super *super, int free_disks);
9ca2c81c 3019
cdddbdbc 3020/* load_imsm_mpb - read matrix metadata
f2f5c343 3021 * allocates super->mpb to be freed by free_imsm
cdddbdbc
DW
3022 */
3023static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
3024{
3025 unsigned long long dsize;
cdddbdbc
DW
3026 unsigned long long sectors;
3027 struct stat;
6416d527 3028 struct imsm_super *anchor;
cdddbdbc
DW
3029 __u32 check_sum;
3030
cdddbdbc 3031 get_dev_size(fd, NULL, &dsize);
64436f06
N
3032 if (dsize < 1024) {
3033 if (devname)
3034 fprintf(stderr,
3035 Name ": %s: device to small for imsm\n",
3036 devname);
3037 return 1;
3038 }
cdddbdbc
DW
3039
3040 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
3041 if (devname)
3042 fprintf(stderr,
3043 Name ": Cannot seek to anchor block on %s: %s\n",
3044 devname, strerror(errno));
3045 return 1;
3046 }
3047
949c47a0 3048 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
ad97895e
DW
3049 if (devname)
3050 fprintf(stderr,
3051 Name ": Failed to allocate imsm anchor buffer"
3052 " on %s\n", devname);
3053 return 1;
3054 }
949c47a0 3055 if (read(fd, anchor, 512) != 512) {
cdddbdbc
DW
3056 if (devname)
3057 fprintf(stderr,
3058 Name ": Cannot read anchor block on %s: %s\n",
3059 devname, strerror(errno));
6416d527 3060 free(anchor);
cdddbdbc
DW
3061 return 1;
3062 }
3063
6416d527 3064 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
cdddbdbc
DW
3065 if (devname)
3066 fprintf(stderr,
3067 Name ": no IMSM anchor on %s\n", devname);
6416d527 3068 free(anchor);
cdddbdbc
DW
3069 return 2;
3070 }
3071
d23fe947 3072 __free_imsm(super, 0);
f2f5c343
LM
3073 /* reload capability and hba */
3074
3075 /* capability and hba must be updated with new super allocation */
d424212e 3076 find_intel_hba_capability(fd, super, devname);
949c47a0
DW
3077 super->len = ROUND_UP(anchor->mpb_size, 512);
3078 if (posix_memalign(&super->buf, 512, super->len) != 0) {
cdddbdbc
DW
3079 if (devname)
3080 fprintf(stderr,
3081 Name ": unable to allocate %zu byte mpb buffer\n",
949c47a0 3082 super->len);
6416d527 3083 free(anchor);
cdddbdbc
DW
3084 return 2;
3085 }
949c47a0 3086 memcpy(super->buf, anchor, 512);
cdddbdbc 3087
6416d527
NB
3088 sectors = mpb_sectors(anchor) - 1;
3089 free(anchor);
8e59f3d8
AK
3090
3091 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3092 fprintf(stderr, Name
3093 ": %s could not allocate migr_rec buffer\n", __func__);
3094 free(super->buf);
3095 return 2;
3096 }
3097
949c47a0 3098 if (!sectors) {
ecf45690
DW
3099 check_sum = __gen_imsm_checksum(super->anchor);
3100 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
3101 if (devname)
3102 fprintf(stderr,
3103 Name ": IMSM checksum %x != %x on %s\n",
3104 check_sum,
3105 __le32_to_cpu(super->anchor->check_sum),
3106 devname);
3107 return 2;
3108 }
3109
a2b97981 3110 return 0;
949c47a0 3111 }
cdddbdbc
DW
3112
3113 /* read the extended mpb */
3114 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
3115 if (devname)
3116 fprintf(stderr,
3117 Name ": Cannot seek to extended mpb on %s: %s\n",
3118 devname, strerror(errno));
3119 return 1;
3120 }
3121
f21e18ca 3122 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
cdddbdbc
DW
3123 if (devname)
3124 fprintf(stderr,
3125 Name ": Cannot read extended mpb on %s: %s\n",
3126 devname, strerror(errno));
3127 return 2;
3128 }
3129
949c47a0
DW
3130 check_sum = __gen_imsm_checksum(super->anchor);
3131 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
cdddbdbc
DW
3132 if (devname)
3133 fprintf(stderr,
3134 Name ": IMSM checksum %x != %x on %s\n",
949c47a0 3135 check_sum, __le32_to_cpu(super->anchor->check_sum),
cdddbdbc 3136 devname);
db575f3b 3137 return 3;
cdddbdbc
DW
3138 }
3139
604b746f
JD
3140 /* FIXME the BBM log is disk specific so we cannot use this global
3141 * buffer for all disks. Ok for now since we only look at the global
3142 * bbm_log_size parameter to gate assembly
3143 */
3144 super->bbm_log = __get_imsm_bbm_log(super->anchor);
3145
a2b97981
DW
3146 return 0;
3147}
3148
8e59f3d8
AK
3149static int read_imsm_migr_rec(int fd, struct intel_super *super);
3150
a2b97981
DW
3151static int
3152load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
3153{
3154 int err;
3155
3156 err = load_imsm_mpb(fd, super, devname);
3157 if (err)
3158 return err;
3159 err = load_imsm_disk(fd, super, devname, keep_fd);
3160 if (err)
3161 return err;
3162 err = parse_raid_devices(super);
4d7b1503 3163
a2b97981 3164 return err;
cdddbdbc
DW
3165}
3166
ae6aad82
DW
3167static void __free_imsm_disk(struct dl *d)
3168{
3169 if (d->fd >= 0)
3170 close(d->fd);
3171 if (d->devname)
3172 free(d->devname);
0dcecb2e
DW
3173 if (d->e)
3174 free(d->e);
ae6aad82
DW
3175 free(d);
3176
3177}
1a64be56 3178
cdddbdbc
DW
3179static void free_imsm_disks(struct intel_super *super)
3180{
47ee5a45 3181 struct dl *d;
cdddbdbc 3182
47ee5a45
DW
3183 while (super->disks) {
3184 d = super->disks;
cdddbdbc 3185 super->disks = d->next;
ae6aad82 3186 __free_imsm_disk(d);
cdddbdbc 3187 }
cb82edca
AK
3188 while (super->disk_mgmt_list) {
3189 d = super->disk_mgmt_list;
3190 super->disk_mgmt_list = d->next;
3191 __free_imsm_disk(d);
3192 }
47ee5a45
DW
3193 while (super->missing) {
3194 d = super->missing;
3195 super->missing = d->next;
3196 __free_imsm_disk(d);
3197 }
3198
cdddbdbc
DW
3199}
3200
9ca2c81c 3201/* free all the pieces hanging off of a super pointer */
d23fe947 3202static void __free_imsm(struct intel_super *super, int free_disks)
cdddbdbc 3203{
88654014
LM
3204 struct intel_hba *elem, *next;
3205
9ca2c81c 3206 if (super->buf) {
949c47a0 3207 free(super->buf);
9ca2c81c
DW
3208 super->buf = NULL;
3209 }
f2f5c343
LM
3210 /* unlink capability description */
3211 super->orom = NULL;
8e59f3d8
AK
3212 if (super->migr_rec_buf) {
3213 free(super->migr_rec_buf);
3214 super->migr_rec_buf = NULL;
3215 }
d23fe947
DW
3216 if (free_disks)
3217 free_imsm_disks(super);
ba2de7ba 3218 free_devlist(super);
88654014
LM
3219 elem = super->hba;
3220 while (elem) {
3221 if (elem->path)
3222 free((void *)elem->path);
3223 next = elem->next;
3224 free(elem);
3225 elem = next;
88c32bb1 3226 }
88654014 3227 super->hba = NULL;
cdddbdbc
DW
3228}
3229
9ca2c81c
DW
3230static void free_imsm(struct intel_super *super)
3231{
d23fe947 3232 __free_imsm(super, 1);
9ca2c81c
DW
3233 free(super);
3234}
cdddbdbc
DW
3235
3236static void free_super_imsm(struct supertype *st)
3237{
3238 struct intel_super *super = st->sb;
3239
3240 if (!super)
3241 return;
3242
3243 free_imsm(super);
3244 st->sb = NULL;
3245}
3246
49133e57 3247static struct intel_super *alloc_super(void)
c2c087e6
DW
3248{
3249 struct intel_super *super = malloc(sizeof(*super));
3250
3251 if (super) {
3252 memset(super, 0, sizeof(*super));
bf5a934a 3253 super->current_vol = -1;
0dcecb2e 3254 super->create_offset = ~((__u32 ) 0);
c2c087e6 3255 }
c2c087e6
DW
3256 return super;
3257}
3258
f0f5a016
LM
3259/*
3260 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3261 */
d424212e 3262static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
f0f5a016
LM
3263{
3264 struct sys_dev *hba_name;
3265 int rv = 0;
3266
3267 if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
f2f5c343 3268 super->orom = NULL;
f0f5a016
LM
3269 super->hba = NULL;
3270 return 0;
3271 }
3272 hba_name = find_disk_attached_hba(fd, NULL);
3273 if (!hba_name) {
d424212e 3274 if (devname)
f0f5a016
LM
3275 fprintf(stderr,
3276 Name ": %s is not attached to Intel(R) RAID controller.\n",
d424212e 3277 devname);
f0f5a016
LM
3278 return 1;
3279 }
3280 rv = attach_hba_to_super(super, hba_name);
3281 if (rv == 2) {
d424212e
N
3282 if (devname) {
3283 struct intel_hba *hba = super->hba;
f0f5a016 3284
f0f5a016
LM
3285 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
3286 "controller (%s),\n"
3287 " but the container is assigned to Intel(R) "
3288 "%s RAID controller (",
d424212e 3289 devname,
f0f5a016
LM
3290 hba_name->path,
3291 hba_name->pci_id ? : "Err!",
3292 get_sys_dev_type(hba_name->type));
3293
f0f5a016
LM
3294 while (hba) {
3295 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
3296 if (hba->next)
3297 fprintf(stderr, ", ");
3298 hba = hba->next;
3299 }
3300
3301 fprintf(stderr, ").\n"
3302 " Mixing devices attached to different controllers "
3303 "is not allowed.\n");
3304 }
3305 free_sys_dev(&hba_name);
3306 return 2;
3307 }
f2f5c343 3308 super->orom = find_imsm_capability(hba_name->type);
f0f5a016 3309 free_sys_dev(&hba_name);
f2f5c343
LM
3310 if (!super->orom)
3311 return 3;
f0f5a016
LM
3312 return 0;
3313}
3314
cdddbdbc 3315#ifndef MDASSEMBLE
47ee5a45
DW
3316/* find_missing - helper routine for load_super_imsm_all that identifies
3317 * disks that have disappeared from the system. This routine relies on
3318 * the mpb being uptodate, which it is at load time.
3319 */
3320static int find_missing(struct intel_super *super)
3321{
3322 int i;
3323 struct imsm_super *mpb = super->anchor;
3324 struct dl *dl;
3325 struct imsm_disk *disk;
47ee5a45
DW
3326
3327 for (i = 0; i < mpb->num_disks; i++) {
3328 disk = __get_imsm_disk(mpb, i);
54c2c1ea 3329 dl = serial_to_dl(disk->serial, super);
47ee5a45
DW
3330 if (dl)
3331 continue;
47ee5a45
DW
3332
3333 dl = malloc(sizeof(*dl));
3334 if (!dl)
3335 return 1;
3336 dl->major = 0;
3337 dl->minor = 0;
3338 dl->fd = -1;
3339 dl->devname = strdup("missing");
3340 dl->index = i;
3341 serialcpy(dl->serial, disk->serial);
3342 dl->disk = *disk;
689c9bf3 3343 dl->e = NULL;
47ee5a45
DW
3344 dl->next = super->missing;
3345 super->missing = dl;
3346 }
3347
3348 return 0;
3349}
3350
a2b97981
DW
3351static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
3352{
3353 struct intel_disk *idisk = disk_list;
3354
3355 while (idisk) {
3356 if (serialcmp(idisk->disk.serial, serial) == 0)
3357 break;
3358 idisk = idisk->next;
3359 }
3360
3361 return idisk;
3362}
3363
3364static int __prep_thunderdome(struct intel_super **table, int tbl_size,
3365 struct intel_super *super,
3366 struct intel_disk **disk_list)
3367{
3368 struct imsm_disk *d = &super->disks->disk;
3369 struct imsm_super *mpb = super->anchor;
3370 int i, j;
3371
3372 for (i = 0; i < tbl_size; i++) {
3373 struct imsm_super *tbl_mpb = table[i]->anchor;
3374 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3375
3376 if (tbl_mpb->family_num == mpb->family_num) {
3377 if (tbl_mpb->check_sum == mpb->check_sum) {
3378 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3379 __func__, super->disks->major,
3380 super->disks->minor,
3381 table[i]->disks->major,
3382 table[i]->disks->minor);
3383 break;
3384 }
3385
3386 if (((is_configured(d) && !is_configured(tbl_d)) ||
3387 is_configured(d) == is_configured(tbl_d)) &&
3388 tbl_mpb->generation_num < mpb->generation_num) {
3389 /* current version of the mpb is a
3390 * better candidate than the one in
3391 * super_table, but copy over "cross
3392 * generational" status
3393 */
3394 struct intel_disk *idisk;
3395
3396 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3397 __func__, super->disks->major,
3398 super->disks->minor,
3399 table[i]->disks->major,
3400 table[i]->disks->minor);
3401
3402 idisk = disk_list_get(tbl_d->serial, *disk_list);
3403 if (idisk && is_failed(&idisk->disk))
3404 tbl_d->status |= FAILED_DISK;
3405 break;
3406 } else {
3407 struct intel_disk *idisk;
3408 struct imsm_disk *disk;
3409
3410 /* tbl_mpb is more up to date, but copy
3411 * over cross generational status before
3412 * returning
3413 */
3414 disk = __serial_to_disk(d->serial, mpb, NULL);
3415 if (disk && is_failed(disk))
3416 d->status |= FAILED_DISK;
3417
3418 idisk = disk_list_get(d->serial, *disk_list);
3419 if (idisk) {
3420 idisk->owner = i;
3421 if (disk && is_configured(disk))
3422 idisk->disk.status |= CONFIGURED_DISK;
3423 }
3424
3425 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3426 __func__, super->disks->major,
3427 super->disks->minor,
3428 table[i]->disks->major,
3429 table[i]->disks->minor);
3430
3431 return tbl_size;
3432 }
3433 }
3434 }
3435
3436 if (i >= tbl_size)
3437 table[tbl_size++] = super;
3438 else
3439 table[i] = super;
3440
3441 /* update/extend the merged list of imsm_disk records */
3442 for (j = 0; j < mpb->num_disks; j++) {
3443 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3444 struct intel_disk *idisk;
3445
3446 idisk = disk_list_get(disk->serial, *disk_list);
3447 if (idisk) {
3448 idisk->disk.status |= disk->status;
3449 if (is_configured(&idisk->disk) ||
3450 is_failed(&idisk->disk))
3451 idisk->disk.status &= ~(SPARE_DISK);
3452 } else {
3453 idisk = calloc(1, sizeof(*idisk));
3454 if (!idisk)
3455 return -1;
3456 idisk->owner = IMSM_UNKNOWN_OWNER;
3457 idisk->disk = *disk;
3458 idisk->next = *disk_list;
3459 *disk_list = idisk;
3460 }
3461
3462 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3463 idisk->owner = i;
3464 }
3465
3466 return tbl_size;
3467}
3468
3469static struct intel_super *
3470validate_members(struct intel_super *super, struct intel_disk *disk_list,
3471 const int owner)
3472{
3473 struct imsm_super *mpb = super->anchor;
3474 int ok_count = 0;
3475 int i;
3476
3477 for (i = 0; i < mpb->num_disks; i++) {
3478 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3479 struct intel_disk *idisk;
3480
3481 idisk = disk_list_get(disk->serial, disk_list);
3482 if (idisk) {
3483 if (idisk->owner == owner ||
3484 idisk->owner == IMSM_UNKNOWN_OWNER)
3485 ok_count++;
3486 else
3487 dprintf("%s: '%.16s' owner %d != %d\n",
3488 __func__, disk->serial, idisk->owner,
3489 owner);
3490 } else {
3491 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3492 __func__, __le32_to_cpu(mpb->family_num), i,
3493 disk->serial);
3494 break;
3495 }
3496 }
3497
3498 if (ok_count == mpb->num_disks)
3499 return super;
3500 return NULL;
3501}
3502
3503static void show_conflicts(__u32 family_num, struct intel_super *super_list)
3504{
3505 struct intel_super *s;
3506
3507 for (s = super_list; s; s = s->next) {
3508 if (family_num != s->anchor->family_num)
3509 continue;
3510 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
3511 __le32_to_cpu(family_num), s->disks->devname);
3512 }
3513}
3514
3515static struct intel_super *
3516imsm_thunderdome(struct intel_super **super_list, int len)
3517{
3518 struct intel_super *super_table[len];
3519 struct intel_disk *disk_list = NULL;
3520 struct intel_super *champion, *spare;
3521 struct intel_super *s, **del;
3522 int tbl_size = 0;
3523 int conflict;
3524 int i;
3525
3526 memset(super_table, 0, sizeof(super_table));
3527 for (s = *super_list; s; s = s->next)
3528 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
3529
3530 for (i = 0; i < tbl_size; i++) {
3531 struct imsm_disk *d;
3532 struct intel_disk *idisk;
3533 struct imsm_super *mpb = super_table[i]->anchor;
3534
3535 s = super_table[i];
3536 d = &s->disks->disk;
3537
3538 /* 'd' must appear in merged disk list for its
3539 * configuration to be valid
3540 */
3541 idisk = disk_list_get(d->serial, disk_list);
3542 if (idisk && idisk->owner == i)
3543 s = validate_members(s, disk_list, i);
3544 else
3545 s = NULL;
3546
3547 if (!s)
3548 dprintf("%s: marking family: %#x from %d:%d offline\n",
3549 __func__, mpb->family_num,
3550 super_table[i]->disks->major,
3551 super_table[i]->disks->minor);
3552 super_table[i] = s;
3553 }
3554
3555 /* This is where the mdadm implementation differs from the Windows
3556 * driver which has no strict concept of a container. We can only
3557 * assemble one family from a container, so when returning a prodigal
3558 * array member to this system the code will not be able to disambiguate
3559 * the container contents that should be assembled ("foreign" versus
3560 * "local"). It requires user intervention to set the orig_family_num
3561 * to a new value to establish a new container. The Windows driver in
3562 * this situation fixes up the volume name in place and manages the
3563 * foreign array as an independent entity.
3564 */
3565 s = NULL;
3566 spare = NULL;
3567 conflict = 0;
3568 for (i = 0; i < tbl_size; i++) {
3569 struct intel_super *tbl_ent = super_table[i];
3570 int is_spare = 0;
3571
3572 if (!tbl_ent)
3573 continue;
3574
3575 if (tbl_ent->anchor->num_raid_devs == 0) {
3576 spare = tbl_ent;
3577 is_spare = 1;
3578 }
3579
3580 if (s && !is_spare) {
3581 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3582 conflict++;
3583 } else if (!s && !is_spare)
3584 s = tbl_ent;
3585 }
3586
3587 if (!s)
3588 s = spare;
3589 if (!s) {
3590 champion = NULL;
3591 goto out;
3592 }
3593 champion = s;
3594
3595 if (conflict)
3596 fprintf(stderr, "Chose family %#x on '%s', "
3597 "assemble conflicts to new container with '--update=uuid'\n",
3598 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3599
3600 /* collect all dl's onto 'champion', and update them to
3601 * champion's version of the status
3602 */
3603 for (s = *super_list; s; s = s->next) {
3604 struct imsm_super *mpb = champion->anchor;
3605 struct dl *dl = s->disks;
3606
3607 if (s == champion)
3608 continue;
3609
3610 for (i = 0; i < mpb->num_disks; i++) {
3611 struct imsm_disk *disk;
3612
3613 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3614 if (disk) {
3615 dl->disk = *disk;
3616 /* only set index on disks that are a member of
3617 * a populated contianer, i.e. one with
3618 * raid_devs
3619 */
3620 if (is_failed(&dl->disk))
3621 dl->index = -2;
3622 else if (is_spare(&dl->disk))
3623 dl->index = -1;
3624 break;
3625 }
3626 }
3627
3628 if (i >= mpb->num_disks) {
3629 struct intel_disk *idisk;
3630
3631 idisk = disk_list_get(dl->serial, disk_list);
ecf408e9 3632 if (idisk && is_spare(&idisk->disk) &&
a2b97981
DW
3633 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3634 dl->index = -1;
3635 else {
3636 dl->index = -2;
3637 continue;
3638 }
3639 }
3640
3641 dl->next = champion->disks;
3642 champion->disks = dl;
3643 s->disks = NULL;
3644 }
3645
3646 /* delete 'champion' from super_list */
3647 for (del = super_list; *del; ) {
3648 if (*del == champion) {
3649 *del = (*del)->next;
3650 break;
3651 } else
3652 del = &(*del)->next;
3653 }
3654 champion->next = NULL;
3655
3656 out:
3657 while (disk_list) {
3658 struct intel_disk *idisk = disk_list;
3659
3660 disk_list = disk_list->next;
3661 free(idisk);
3662 }
3663
3664 return champion;
3665}
3666
cdddbdbc 3667static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
e1902a7b 3668 char *devname)
cdddbdbc
DW
3669{
3670 struct mdinfo *sra;
a2b97981
DW
3671 struct intel_super *super_list = NULL;
3672 struct intel_super *super = NULL;
db575f3b 3673 int devnum = fd2devnum(fd);
a2b97981 3674 struct mdinfo *sd;
db575f3b 3675 int retry;
a2b97981
DW
3676 int err = 0;
3677 int i;
dab4a513
DW
3678
3679 /* check if 'fd' an opened container */
b526e52d 3680 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
cdddbdbc
DW
3681 if (!sra)
3682 return 1;
3683
3684 if (sra->array.major_version != -1 ||
3685 sra->array.minor_version != -2 ||
1602d52c
AW
3686 strcmp(sra->text_version, "imsm") != 0) {
3687 err = 1;
3688 goto error;
3689 }
a2b97981
DW
3690 /* load all mpbs */
3691 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
49133e57 3692 struct intel_super *s = alloc_super();
7a6ecd55 3693 char nm[32];
a2b97981 3694 int dfd;
f2f5c343 3695 int rv;
a2b97981
DW
3696
3697 err = 1;
3698 if (!s)
3699 goto error;
3700 s->next = super_list;
3701 super_list = s;
cdddbdbc 3702
a2b97981 3703 err = 2;
cdddbdbc 3704 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3705 dfd = dev_open(nm, O_RDWR);
a2b97981
DW
3706 if (dfd < 0)
3707 goto error;
3708
d424212e 3709 rv = find_intel_hba_capability(dfd, s, devname);
f2f5c343
LM
3710 /* no orom/efi or non-intel hba of the disk */
3711 if (rv != 0)
3712 goto error;
3713
e1902a7b 3714 err = load_and_parse_mpb(dfd, s, NULL, 1);
db575f3b
DW
3715
3716 /* retry the load if we might have raced against mdmon */
a2b97981 3717 if (err == 3 && mdmon_running(devnum))
db575f3b
DW
3718 for (retry = 0; retry < 3; retry++) {
3719 usleep(3000);
e1902a7b 3720 err = load_and_parse_mpb(dfd, s, NULL, 1);
a2b97981 3721 if (err != 3)
db575f3b
DW
3722 break;
3723 }
a2b97981
DW
3724 if (err)
3725 goto error;
cdddbdbc
DW
3726 }
3727
a2b97981
DW
3728 /* all mpbs enter, maybe one leaves */
3729 super = imsm_thunderdome(&super_list, i);
3730 if (!super) {
3731 err = 1;
3732 goto error;
cdddbdbc
DW
3733 }
3734
47ee5a45
DW
3735 if (find_missing(super) != 0) {
3736 free_imsm(super);
a2b97981
DW
3737 err = 2;
3738 goto error;
47ee5a45 3739 }
8e59f3d8
AK
3740
3741 /* load migration record */
3742 err = load_imsm_migr_rec(super, NULL);
3743 if (err) {
3744 err = 4;
3745 goto error;
3746 }
e2f41b2c
AK
3747
3748 /* Check migration compatibility */
3749 if (check_mpb_migr_compatibility(super) != 0) {
3750 fprintf(stderr, Name ": Unsupported migration detected");
3751 if (devname)
3752 fprintf(stderr, " on %s\n", devname);
3753 else
3754 fprintf(stderr, " (IMSM).\n");
3755
3756 err = 5;
3757 goto error;
3758 }
3759
a2b97981
DW
3760 err = 0;
3761
3762 error:
3763 while (super_list) {
3764 struct intel_super *s = super_list;
3765
3766 super_list = super_list->next;
3767 free_imsm(s);
3768 }
1602d52c 3769 sysfs_free(sra);
a2b97981
DW
3770
3771 if (err)
3772 return err;
f7e7067b 3773
cdddbdbc 3774 *sbp = super;
db575f3b 3775 st->container_dev = devnum;
a2b97981 3776 if (err == 0 && st->ss == NULL) {
bf5a934a 3777 st->ss = &super_imsm;
cdddbdbc
DW
3778 st->minor_version = 0;
3779 st->max_devs = IMSM_MAX_DEVICES;
3780 }
cdddbdbc
DW
3781 return 0;
3782}
2b959fbf
N
3783
3784static int load_container_imsm(struct supertype *st, int fd, char *devname)
3785{
3786 return load_super_imsm_all(st, fd, &st->sb, devname);
3787}
cdddbdbc
DW
3788#endif
3789
3790static int load_super_imsm(struct supertype *st, int fd, char *devname)
3791{
3792 struct intel_super *super;
3793 int rv;
3794
691c6ee1
N
3795 if (test_partition(fd))
3796 /* IMSM not allowed on partitions */
3797 return 1;
3798
37424f13
DW
3799 free_super_imsm(st);
3800
49133e57 3801 super = alloc_super();
cdddbdbc
DW
3802 if (!super) {
3803 fprintf(stderr,
3804 Name ": malloc of %zu failed.\n",
3805 sizeof(*super));
3806 return 1;
3807 }
ea2bc72b
LM
3808 /* Load hba and capabilities if they exist.
3809 * But do not preclude loading metadata in case capabilities or hba are
3810 * non-compliant and ignore_hw_compat is set.
3811 */
d424212e 3812 rv = find_intel_hba_capability(fd, super, devname);
f2f5c343 3813 /* no orom/efi or non-intel hba of the disk */
ea2bc72b 3814 if ((rv != 0) && (st->ignore_hw_compat == 0)) {
f2f5c343
LM
3815 if (devname)
3816 fprintf(stderr,
3817 Name ": No OROM/EFI properties for %s\n", devname);
3818 free_imsm(super);
3819 return 2;
3820 }
a2b97981 3821 rv = load_and_parse_mpb(fd, super, devname, 0);
cdddbdbc
DW
3822
3823 if (rv) {
3824 if (devname)
3825 fprintf(stderr,
3826 Name ": Failed to load all information "
3827 "sections on %s\n", devname);
3828 free_imsm(super);
3829 return rv;
3830 }
3831
3832 st->sb = super;
3833 if (st->ss == NULL) {
3834 st->ss = &super_imsm;
3835 st->minor_version = 0;
3836 st->max_devs = IMSM_MAX_DEVICES;
3837 }
8e59f3d8
AK
3838
3839 /* load migration record */
3840 load_imsm_migr_rec(super, NULL);
3841
e2f41b2c
AK
3842 /* Check for unsupported migration features */
3843 if (check_mpb_migr_compatibility(super) != 0) {
3844 fprintf(stderr, Name ": Unsupported migration detected");
3845 if (devname)
3846 fprintf(stderr, " on %s\n", devname);
3847 else
3848 fprintf(stderr, " (IMSM).\n");
3849 return 3;
3850 }
3851
cdddbdbc
DW
3852 return 0;
3853}
3854
ef6ffade
DW
3855static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3856{
3857 if (info->level == 1)
3858 return 128;
3859 return info->chunk_size >> 9;
3860}
3861
ff596308 3862static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
ef6ffade
DW
3863{
3864 __u32 num_stripes;
3865
3866 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
ff596308 3867 num_stripes /= num_domains;
ef6ffade
DW
3868
3869 return num_stripes;
3870}
3871
fcfd9599
DW
3872static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3873{
4025c288
DW
3874 if (info->level == 1)
3875 return info->size * 2;
3876 else
3877 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
fcfd9599
DW
3878}
3879
4d1313e9
DW
3880static void imsm_update_version_info(struct intel_super *super)
3881{
3882 /* update the version and attributes */
3883 struct imsm_super *mpb = super->anchor;
3884 char *version;
3885 struct imsm_dev *dev;
3886 struct imsm_map *map;
3887 int i;
3888
3889 for (i = 0; i < mpb->num_raid_devs; i++) {
3890 dev = get_imsm_dev(super, i);
3891 map = get_imsm_map(dev, 0);
3892 if (__le32_to_cpu(dev->size_high) > 0)
3893 mpb->attributes |= MPB_ATTRIB_2TB;
3894
3895 /* FIXME detect when an array spans a port multiplier */
3896 #if 0
3897 mpb->attributes |= MPB_ATTRIB_PM;
3898 #endif
3899
3900 if (mpb->num_raid_devs > 1 ||
3901 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3902 version = MPB_VERSION_ATTRIBS;
3903 switch (get_imsm_raid_level(map)) {
3904 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3905 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3906 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3907 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3908 }
3909 } else {
3910 if (map->num_members >= 5)
3911 version = MPB_VERSION_5OR6_DISK_ARRAY;
3912 else if (dev->status == DEV_CLONE_N_GO)
3913 version = MPB_VERSION_CNG;
3914 else if (get_imsm_raid_level(map) == 5)
3915 version = MPB_VERSION_RAID5;
3916 else if (map->num_members >= 3)
3917 version = MPB_VERSION_3OR4_DISK_ARRAY;
3918 else if (get_imsm_raid_level(map) == 1)
3919 version = MPB_VERSION_RAID1;
3920 else
3921 version = MPB_VERSION_RAID0;
3922 }
3923 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3924 }
3925}
3926
aa534678
DW
3927static int check_name(struct intel_super *super, char *name, int quiet)
3928{
3929 struct imsm_super *mpb = super->anchor;
3930 char *reason = NULL;
3931 int i;
3932
3933 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3934 reason = "must be 16 characters or less";
3935
3936 for (i = 0; i < mpb->num_raid_devs; i++) {
3937 struct imsm_dev *dev = get_imsm_dev(super, i);
3938
3939 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3940 reason = "already exists";
3941 break;
3942 }
3943 }
3944
3945 if (reason && !quiet)
3946 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3947
3948 return !reason;
3949}
3950
8b353278
DW
3951static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3952 unsigned long long size, char *name,
3953 char *homehost, int *uuid)
cdddbdbc 3954{
c2c087e6
DW
3955 /* We are creating a volume inside a pre-existing container.
3956 * so st->sb is already set.
3957 */
3958 struct intel_super *super = st->sb;
949c47a0 3959 struct imsm_super *mpb = super->anchor;
ba2de7ba 3960 struct intel_dev *dv;
c2c087e6
DW
3961 struct imsm_dev *dev;
3962 struct imsm_vol *vol;
3963 struct imsm_map *map;
3964 int idx = mpb->num_raid_devs;
3965 int i;
3966 unsigned long long array_blocks;
2c092cad 3967 size_t size_old, size_new;
ff596308 3968 __u32 num_data_stripes;
cdddbdbc 3969
88c32bb1 3970 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
c2c087e6 3971 fprintf(stderr, Name": This imsm-container already has the "
88c32bb1 3972 "maximum of %d volumes\n", super->orom->vpa);
c2c087e6
DW
3973 return 0;
3974 }
3975
2c092cad
DW
3976 /* ensure the mpb is large enough for the new data */
3977 size_old = __le32_to_cpu(mpb->mpb_size);
3978 size_new = disks_to_mpb_size(info->nr_disks);
3979 if (size_new > size_old) {
3980 void *mpb_new;
3981 size_t size_round = ROUND_UP(size_new, 512);
3982
3983 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3984 fprintf(stderr, Name": could not allocate new mpb\n");
3985 return 0;
3986 }
8e59f3d8
AK
3987 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3988 fprintf(stderr, Name
3989 ": %s could not allocate migr_rec buffer\n",
3990 __func__);
3991 free(super->buf);
3992 free(super);
3993 return 0;
3994 }
2c092cad
DW
3995 memcpy(mpb_new, mpb, size_old);
3996 free(mpb);
3997 mpb = mpb_new;
949c47a0 3998 super->anchor = mpb_new;
2c092cad
DW
3999 mpb->mpb_size = __cpu_to_le32(size_new);
4000 memset(mpb_new + size_old, 0, size_round - size_old);
4001 }
bf5a934a 4002 super->current_vol = idx;
d23fe947
DW
4003 /* when creating the first raid device in this container set num_disks
4004 * to zero, i.e. delete this spare and add raid member devices in
4005 * add_to_super_imsm_volume()
4006 */
4007 if (super->current_vol == 0)
4008 mpb->num_disks = 0;
5a038140 4009
aa534678
DW
4010 if (!check_name(super, name, 0))
4011 return 0;
ba2de7ba
DW
4012 dv = malloc(sizeof(*dv));
4013 if (!dv) {
4014 fprintf(stderr, Name ": failed to allocate device list entry\n");
4015 return 0;
4016 }
1a2487c2 4017 dev = calloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
949c47a0 4018 if (!dev) {
ba2de7ba 4019 free(dv);
949c47a0
DW
4020 fprintf(stderr, Name": could not allocate raid device\n");
4021 return 0;
4022 }
1a2487c2 4023
c2c087e6 4024 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
03bcbc65
DW
4025 if (info->level == 1)
4026 array_blocks = info_to_blocks_per_member(info);
4027 else
4028 array_blocks = calc_array_size(info->level, info->raid_disks,
4029 info->layout, info->chunk_size,
4030 info->size*2);
979d38be
DW
4031 /* round array size down to closest MB */
4032 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
4033
c2c087e6
DW
4034 dev->size_low = __cpu_to_le32((__u32) array_blocks);
4035 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
1a2487c2 4036 dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
c2c087e6
DW
4037 vol = &dev->vol;
4038 vol->migr_state = 0;
1484e727 4039 set_migr_type(dev, MIGR_INIT);
c2c087e6 4040 vol->dirty = 0;
f8f603f1 4041 vol->curr_migr_unit = 0;
a965f303 4042 map = get_imsm_map(dev, 0);
0dcecb2e 4043 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
fcfd9599 4044 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
ef6ffade 4045 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
0556e1a2 4046 map->failed_disk_num = ~0;
c2c087e6
DW
4047 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
4048 IMSM_T_STATE_NORMAL;
252d23c0 4049 map->ddf = 1;
ef6ffade
DW
4050
4051 if (info->level == 1 && info->raid_disks > 2) {
38950822
AW
4052 free(dev);
4053 free(dv);
ef6ffade
DW
4054 fprintf(stderr, Name": imsm does not support more than 2 disks"
4055 "in a raid1 volume\n");
4056 return 0;
4057 }
81062a36
DW
4058
4059 map->raid_level = info->level;
4d1313e9 4060 if (info->level == 10) {
c2c087e6 4061 map->raid_level = 1;
4d1313e9 4062 map->num_domains = info->raid_disks / 2;
81062a36
DW
4063 } else if (info->level == 1)
4064 map->num_domains = info->raid_disks;
4065 else
ff596308 4066 map->num_domains = 1;
81062a36 4067
ff596308
DW
4068 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
4069 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
ef6ffade 4070
c2c087e6
DW
4071 map->num_members = info->raid_disks;
4072 for (i = 0; i < map->num_members; i++) {
4073 /* initialized in add_to_super */
4eb26970 4074 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
c2c087e6 4075 }
949c47a0 4076 mpb->num_raid_devs++;
ba2de7ba
DW
4077
4078 dv->dev = dev;
4079 dv->index = super->current_vol;
4080 dv->next = super->devlist;
4081 super->devlist = dv;
c2c087e6 4082
4d1313e9
DW
4083 imsm_update_version_info(super);
4084
c2c087e6 4085 return 1;
cdddbdbc
DW
4086}
4087
bf5a934a
DW
4088static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
4089 unsigned long long size, char *name,
4090 char *homehost, int *uuid)
4091{
4092 /* This is primarily called by Create when creating a new array.
4093 * We will then get add_to_super called for each component, and then
4094 * write_init_super called to write it out to each device.
4095 * For IMSM, Create can create on fresh devices or on a pre-existing
4096 * array.
4097 * To create on a pre-existing array a different method will be called.
4098 * This one is just for fresh drives.
4099 */
4100 struct intel_super *super;
4101 struct imsm_super *mpb;
4102 size_t mpb_size;
4d1313e9 4103 char *version;
bf5a934a 4104
bf5a934a 4105 if (st->sb)
e683ca88
DW
4106 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
4107
4108 if (info)
4109 mpb_size = disks_to_mpb_size(info->nr_disks);
4110 else
4111 mpb_size = 512;
bf5a934a 4112
49133e57 4113 super = alloc_super();
e683ca88 4114 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
bf5a934a 4115 free(super);
e683ca88
DW
4116 super = NULL;
4117 }
4118 if (!super) {
4119 fprintf(stderr, Name
4120 ": %s could not allocate superblock\n", __func__);
bf5a934a
DW
4121 return 0;
4122 }
8e59f3d8
AK
4123 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
4124 fprintf(stderr, Name
4125 ": %s could not allocate migr_rec buffer\n", __func__);
4126 free(super->buf);
4127 free(super);
4128 return 0;
4129 }
e683ca88 4130 memset(super->buf, 0, mpb_size);
ef649044 4131 mpb = super->buf;
e683ca88
DW
4132 mpb->mpb_size = __cpu_to_le32(mpb_size);
4133 st->sb = super;
4134
4135 if (info == NULL) {
4136 /* zeroing superblock */
4137 return 0;
4138 }
bf5a934a 4139
4d1313e9
DW
4140 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4141
4142 version = (char *) mpb->sig;
4143 strcpy(version, MPB_SIGNATURE);
4144 version += strlen(MPB_SIGNATURE);
4145 strcpy(version, MPB_VERSION_RAID0);
bf5a934a 4146
bf5a934a
DW
4147 return 1;
4148}
4149
0e600426 4150#ifndef MDASSEMBLE
f20c3968 4151static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
bf5a934a
DW
4152 int fd, char *devname)
4153{
4154 struct intel_super *super = st->sb;
d23fe947 4155 struct imsm_super *mpb = super->anchor;
bf5a934a
DW
4156 struct dl *dl;
4157 struct imsm_dev *dev;
4158 struct imsm_map *map;
4eb26970 4159 int slot;
bf5a934a 4160
949c47a0 4161 dev = get_imsm_dev(super, super->current_vol);
a965f303 4162 map = get_imsm_map(dev, 0);
bf5a934a 4163
208933a7
N
4164 if (! (dk->state & (1<<MD_DISK_SYNC))) {
4165 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
4166 devname);
4167 return 1;
4168 }
4169
efb30e7f
DW
4170 if (fd == -1) {
4171 /* we're doing autolayout so grab the pre-marked (in
4172 * validate_geometry) raid_disk
4173 */
4174 for (dl = super->disks; dl; dl = dl->next)
4175 if (dl->raiddisk == dk->raid_disk)
4176 break;
4177 } else {
4178 for (dl = super->disks; dl ; dl = dl->next)
4179 if (dl->major == dk->major &&
4180 dl->minor == dk->minor)
4181 break;
4182 }
d23fe947 4183
208933a7
N
4184 if (!dl) {
4185 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
f20c3968 4186 return 1;
208933a7 4187 }
bf5a934a 4188
d23fe947
DW
4189 /* add a pristine spare to the metadata */
4190 if (dl->index < 0) {
4191 dl->index = super->anchor->num_disks;
4192 super->anchor->num_disks++;
4193 }
4eb26970
DW
4194 /* Check the device has not already been added */
4195 slot = get_imsm_disk_slot(map, dl->index);
4196 if (slot >= 0 &&
98130f40 4197 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
4eb26970
DW
4198 fprintf(stderr, Name ": %s has been included in this array twice\n",
4199 devname);
4200 return 1;
4201 }
be73972f 4202 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
ee5aad5a 4203 dl->disk.status = CONFIGURED_DISK;
d23fe947
DW
4204
4205 /* if we are creating the first raid device update the family number */
4206 if (super->current_vol == 0) {
4207 __u32 sum;
4208 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
4209 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
4210
791b666a
AW
4211 if (!_dev || !_disk) {
4212 fprintf(stderr, Name ": BUG mpb setup error\n");
4213 return 1;
4214 }
d23fe947
DW
4215 *_dev = *dev;
4216 *_disk = dl->disk;
148acb7b
DW
4217 sum = random32();
4218 sum += __gen_imsm_checksum(mpb);
d23fe947 4219 mpb->family_num = __cpu_to_le32(sum);
148acb7b 4220 mpb->orig_family_num = mpb->family_num;
d23fe947 4221 }
f20c3968
DW
4222
4223 return 0;
bf5a934a
DW
4224}
4225
88654014 4226
f20c3968 4227static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
88654014 4228 int fd, char *devname)
cdddbdbc 4229{
c2c087e6 4230 struct intel_super *super = st->sb;
c2c087e6
DW
4231 struct dl *dd;
4232 unsigned long long size;
f2f27e63 4233 __u32 id;
c2c087e6
DW
4234 int rv;
4235 struct stat stb;
4236
88654014
LM
4237 /* If we are on an RAID enabled platform check that the disk is
4238 * attached to the raid controller.
4239 * We do not need to test disks attachment for container based additions,
4240 * they shall be already tested when container was created/assembled.
88c32bb1 4241 */
d424212e 4242 rv = find_intel_hba_capability(fd, super, devname);
f2f5c343 4243 /* no orom/efi or non-intel hba of the disk */
f0f5a016
LM
4244 if (rv != 0) {
4245 dprintf("capability: %p fd: %d ret: %d\n",
4246 super->orom, fd, rv);
4247 return 1;
88c32bb1
DW
4248 }
4249
f20c3968
DW
4250 if (super->current_vol >= 0)
4251 return add_to_super_imsm_volume(st, dk, fd, devname);
bf5a934a 4252
c2c087e6
DW
4253 fstat(fd, &stb);
4254 dd = malloc(sizeof(*dd));
b9f594fe 4255 if (!dd) {
c2c087e6
DW
4256 fprintf(stderr,
4257 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
f20c3968 4258 return 1;
c2c087e6
DW
4259 }
4260 memset(dd, 0, sizeof(*dd));
4261 dd->major = major(stb.st_rdev);
4262 dd->minor = minor(stb.st_rdev);
b9f594fe 4263 dd->index = -1;
c2c087e6 4264 dd->devname = devname ? strdup(devname) : NULL;
c2c087e6 4265 dd->fd = fd;
689c9bf3 4266 dd->e = NULL;
1a64be56 4267 dd->action = DISK_ADD;
c2c087e6 4268 rv = imsm_read_serial(fd, devname, dd->serial);
32ba9157 4269 if (rv) {
c2c087e6 4270 fprintf(stderr,
0030e8d6 4271 Name ": failed to retrieve scsi serial, aborting\n");
949c47a0 4272 free(dd);
0030e8d6 4273 abort();
c2c087e6
DW
4274 }
4275
c2c087e6
DW
4276 get_dev_size(fd, NULL, &size);
4277 size /= 512;
1f24f035 4278 serialcpy(dd->disk.serial, dd->serial);
b9f594fe 4279 dd->disk.total_blocks = __cpu_to_le32(size);
ee5aad5a 4280 dd->disk.status = SPARE_DISK;
c2c087e6 4281 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
b9f594fe 4282 dd->disk.scsi_id = __cpu_to_le32(id);
c2c087e6 4283 else
b9f594fe 4284 dd->disk.scsi_id = __cpu_to_le32(0);
43dad3d6
DW
4285
4286 if (st->update_tail) {
1a64be56
LM
4287 dd->next = super->disk_mgmt_list;
4288 super->disk_mgmt_list = dd;
43dad3d6
DW
4289 } else {
4290 dd->next = super->disks;
4291 super->disks = dd;
ceaf0ee1 4292 super->updates_pending++;
43dad3d6 4293 }
f20c3968
DW
4294
4295 return 0;
cdddbdbc
DW
4296}
4297
1a64be56
LM
4298
4299static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
4300{
4301 struct intel_super *super = st->sb;
4302 struct dl *dd;
4303
4304 /* remove from super works only in mdmon - for communication
4305 * manager - monitor. Check if communication memory buffer
4306 * is prepared.
4307 */
4308 if (!st->update_tail) {
4309 fprintf(stderr,
4310 Name ": %s shall be used in mdmon context only"
4311 "(line %d).\n", __func__, __LINE__);
4312 return 1;
4313 }
4314 dd = malloc(sizeof(*dd));
4315 if (!dd) {
4316 fprintf(stderr,
4317 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4318 return 1;
4319 }
4320 memset(dd, 0, sizeof(*dd));
4321 dd->major = dk->major;
4322 dd->minor = dk->minor;
4323 dd->index = -1;
4324 dd->fd = -1;
4325 dd->disk.status = SPARE_DISK;
4326 dd->action = DISK_REMOVE;
4327
4328 dd->next = super->disk_mgmt_list;
4329 super->disk_mgmt_list = dd;
4330
4331
4332 return 0;
4333}
4334
f796af5d
DW
4335static int store_imsm_mpb(int fd, struct imsm_super *mpb);
4336
4337static union {
4338 char buf[512];
4339 struct imsm_super anchor;
4340} spare_record __attribute__ ((aligned(512)));
c2c087e6 4341
d23fe947
DW
4342/* spare records have their own family number and do not have any defined raid
4343 * devices
4344 */
4345static int write_super_imsm_spares(struct intel_super *super, int doclose)
4346{
d23fe947 4347 struct imsm_super *mpb = super->anchor;
f796af5d 4348 struct imsm_super *spare = &spare_record.anchor;
d23fe947
DW
4349 __u32 sum;
4350 struct dl *d;
4351
f796af5d
DW
4352 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
4353 spare->generation_num = __cpu_to_le32(1UL),
4354 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4355 spare->num_disks = 1,
4356 spare->num_raid_devs = 0,
4357 spare->cache_size = mpb->cache_size,
4358 spare->pwr_cycle_count = __cpu_to_le32(1),
4359
4360 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
4361 MPB_SIGNATURE MPB_VERSION_RAID0);
d23fe947
DW
4362
4363 for (d = super->disks; d; d = d->next) {
8796fdc4 4364 if (d->index != -1)
d23fe947
DW
4365 continue;
4366
f796af5d
DW
4367 spare->disk[0] = d->disk;
4368 sum = __gen_imsm_checksum(spare);
4369 spare->family_num = __cpu_to_le32(sum);
4370 spare->orig_family_num = 0;
4371 sum = __gen_imsm_checksum(spare);
4372 spare->check_sum = __cpu_to_le32(sum);
d23fe947 4373
f796af5d 4374 if (store_imsm_mpb(d->fd, spare)) {
d23fe947
DW
4375 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4376 __func__, d->major, d->minor, strerror(errno));
e74255d9 4377 return 1;
d23fe947
DW
4378 }
4379 if (doclose) {
4380 close(d->fd);
4381 d->fd = -1;
4382 }
4383 }
4384
e74255d9 4385 return 0;
d23fe947
DW
4386}
4387
146c6260
AK
4388static int is_gen_migration(struct imsm_dev *dev);
4389
36988a3d 4390static int write_super_imsm(struct supertype *st, int doclose)
cdddbdbc 4391{
36988a3d 4392 struct intel_super *super = st->sb;
949c47a0 4393 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
4394 struct dl *d;
4395 __u32 generation;
4396 __u32 sum;
d23fe947 4397 int spares = 0;
949c47a0 4398 int i;
a48ac0a8 4399 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
36988a3d 4400 int num_disks = 0;
146c6260 4401 int clear_migration_record = 1;
cdddbdbc 4402
c2c087e6
DW
4403 /* 'generation' is incremented everytime the metadata is written */
4404 generation = __le32_to_cpu(mpb->generation_num);
4405 generation++;
4406 mpb->generation_num = __cpu_to_le32(generation);
4407
148acb7b
DW
4408 /* fix up cases where previous mdadm releases failed to set
4409 * orig_family_num
4410 */
4411 if (mpb->orig_family_num == 0)
4412 mpb->orig_family_num = mpb->family_num;
4413
d23fe947 4414 for (d = super->disks; d; d = d->next) {
8796fdc4 4415 if (d->index == -1)
d23fe947 4416 spares++;
36988a3d 4417 else {
d23fe947 4418 mpb->disk[d->index] = d->disk;
36988a3d
AK
4419 num_disks++;
4420 }
d23fe947 4421 }
36988a3d 4422 for (d = super->missing; d; d = d->next) {
47ee5a45 4423 mpb->disk[d->index] = d->disk;
36988a3d
AK
4424 num_disks++;
4425 }
4426 mpb->num_disks = num_disks;
4427 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
b9f594fe 4428
949c47a0
DW
4429 for (i = 0; i < mpb->num_raid_devs; i++) {
4430 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
36988a3d
AK
4431 struct imsm_dev *dev2 = get_imsm_dev(super, i);
4432 if (dev && dev2) {
4433 imsm_copy_dev(dev, dev2);
4434 mpb_size += sizeof_imsm_dev(dev, 0);
4435 }
146c6260
AK
4436 if (is_gen_migration(dev2))
4437 clear_migration_record = 0;
949c47a0 4438 }
a48ac0a8
DW
4439 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
4440 mpb->mpb_size = __cpu_to_le32(mpb_size);
949c47a0 4441
c2c087e6 4442 /* recalculate checksum */
949c47a0 4443 sum = __gen_imsm_checksum(mpb);
c2c087e6
DW
4444 mpb->check_sum = __cpu_to_le32(sum);
4445
146c6260
AK
4446 if (clear_migration_record)
4447 memset(super->migr_rec_buf, 0, 512);
4448
d23fe947 4449 /* write the mpb for disks that compose raid devices */
c2c087e6 4450 for (d = super->disks; d ; d = d->next) {
d23fe947
DW
4451 if (d->index < 0)
4452 continue;
f796af5d 4453 if (store_imsm_mpb(d->fd, mpb))
c2c087e6
DW
4454 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4455 __func__, d->major, d->minor, strerror(errno));
146c6260
AK
4456 if (clear_migration_record) {
4457 unsigned long long dsize;
4458
4459 get_dev_size(d->fd, NULL, &dsize);
4460 if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
4461 write(d->fd, super->migr_rec_buf, 512);
4462 }
4463 }
c2c087e6
DW
4464 if (doclose) {
4465 close(d->fd);
4466 d->fd = -1;
4467 }
4468 }
4469
d23fe947
DW
4470 if (spares)
4471 return write_super_imsm_spares(super, doclose);
4472
e74255d9 4473 return 0;
c2c087e6
DW
4474}
4475
0e600426 4476
9b1fb677 4477static int create_array(struct supertype *st, int dev_idx)
43dad3d6
DW
4478{
4479 size_t len;
4480 struct imsm_update_create_array *u;
4481 struct intel_super *super = st->sb;
9b1fb677 4482 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
54c2c1ea
DW
4483 struct imsm_map *map = get_imsm_map(dev, 0);
4484 struct disk_info *inf;
4485 struct imsm_disk *disk;
4486 int i;
43dad3d6 4487
54c2c1ea
DW
4488 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
4489 sizeof(*inf) * map->num_members;
43dad3d6
DW
4490 u = malloc(len);
4491 if (!u) {
4492 fprintf(stderr, "%s: failed to allocate update buffer\n",
4493 __func__);
4494 return 1;
4495 }
4496
4497 u->type = update_create_array;
9b1fb677 4498 u->dev_idx = dev_idx;
43dad3d6 4499 imsm_copy_dev(&u->dev, dev);
54c2c1ea
DW
4500 inf = get_disk_info(u);
4501 for (i = 0; i < map->num_members; i++) {
98130f40 4502 int idx = get_imsm_disk_idx(dev, i, -1);
9b1fb677 4503
54c2c1ea
DW
4504 disk = get_imsm_disk(super, idx);
4505 serialcpy(inf[i].serial, disk->serial);
4506 }
43dad3d6
DW
4507 append_metadata_update(st, u, len);
4508
4509 return 0;
4510}
4511
1a64be56 4512static int mgmt_disk(struct supertype *st)
43dad3d6
DW
4513{
4514 struct intel_super *super = st->sb;
4515 size_t len;
1a64be56 4516 struct imsm_update_add_remove_disk *u;
43dad3d6 4517
1a64be56 4518 if (!super->disk_mgmt_list)
43dad3d6
DW
4519 return 0;
4520
4521 len = sizeof(*u);
4522 u = malloc(len);
4523 if (!u) {
4524 fprintf(stderr, "%s: failed to allocate update buffer\n",
4525 __func__);
4526 return 1;
4527 }
4528
1a64be56 4529 u->type = update_add_remove_disk;
43dad3d6
DW
4530 append_metadata_update(st, u, len);
4531
4532 return 0;
4533}
4534
c2c087e6
DW
4535static int write_init_super_imsm(struct supertype *st)
4536{
9b1fb677
DW
4537 struct intel_super *super = st->sb;
4538 int current_vol = super->current_vol;
4539
4540 /* we are done with current_vol reset it to point st at the container */
4541 super->current_vol = -1;
4542
8273f55e 4543 if (st->update_tail) {
43dad3d6
DW
4544 /* queue the recently created array / added disk
4545 * as a metadata update */
43dad3d6 4546 int rv;
8273f55e 4547
43dad3d6 4548 /* determine if we are creating a volume or adding a disk */
9b1fb677 4549 if (current_vol < 0) {
1a64be56
LM
4550 /* in the mgmt (add/remove) disk case we are running
4551 * in mdmon context, so don't close fd's
43dad3d6 4552 */
1a64be56 4553 return mgmt_disk(st);
43dad3d6 4554 } else
9b1fb677 4555 rv = create_array(st, current_vol);
8273f55e 4556
43dad3d6 4557 return rv;
d682f344
N
4558 } else {
4559 struct dl *d;
4560 for (d = super->disks; d; d = d->next)
4561 Kill(d->devname, NULL, 0, 1, 1);
36988a3d 4562 return write_super_imsm(st, 1);
d682f344 4563 }
cdddbdbc 4564}
0e600426 4565#endif
cdddbdbc 4566
e683ca88 4567static int store_super_imsm(struct supertype *st, int fd)
cdddbdbc 4568{
e683ca88
DW
4569 struct intel_super *super = st->sb;
4570 struct imsm_super *mpb = super ? super->anchor : NULL;
551c80c1 4571
e683ca88 4572 if (!mpb)
ad97895e
DW
4573 return 1;
4574
1799c9e8 4575#ifndef MDASSEMBLE
e683ca88 4576 return store_imsm_mpb(fd, mpb);
1799c9e8
N
4577#else
4578 return 1;
4579#endif
cdddbdbc
DW
4580}
4581
0e600426
N
4582static int imsm_bbm_log_size(struct imsm_super *mpb)
4583{
4584 return __le32_to_cpu(mpb->bbm_log_size);
4585}
4586
4587#ifndef MDASSEMBLE
cdddbdbc
DW
4588static int validate_geometry_imsm_container(struct supertype *st, int level,
4589 int layout, int raiddisks, int chunk,
c2c087e6 4590 unsigned long long size, char *dev,
2c514b71
NB
4591 unsigned long long *freesize,
4592 int verbose)
cdddbdbc 4593{
c2c087e6
DW
4594 int fd;
4595 unsigned long long ldsize;
f2f5c343
LM
4596 struct intel_super *super=NULL;
4597 int rv = 0;
cdddbdbc 4598
c2c087e6
DW
4599 if (level != LEVEL_CONTAINER)
4600 return 0;
4601 if (!dev)
4602 return 1;
4603
4604 fd = open(dev, O_RDONLY|O_EXCL, 0);
4605 if (fd < 0) {
2c514b71
NB
4606 if (verbose)
4607 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4608 dev, strerror(errno));
c2c087e6
DW
4609 return 0;
4610 }
4611 if (!get_dev_size(fd, dev, &ldsize)) {
4612 close(fd);
4613 return 0;
4614 }
f2f5c343
LM
4615
4616 /* capabilities retrieve could be possible
4617 * note that there is no fd for the disks in array.
4618 */
4619 super = alloc_super();
4620 if (!super) {
4621 fprintf(stderr,
4622 Name ": malloc of %zu failed.\n",
4623 sizeof(*super));
4624 close(fd);
4625 return 0;
4626 }
4627
d424212e 4628 rv = find_intel_hba_capability(fd, super, verbose ? dev : NULL);
f2f5c343
LM
4629 if (rv != 0) {
4630#if DEBUG
4631 char str[256];
4632 fd2devname(fd, str);
4633 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4634 fd, str, super->orom, rv, raiddisks);
4635#endif
4636 /* no orom/efi or non-intel hba of the disk */
4637 close(fd);
4638 free_imsm(super);
4639 return 0;
4640 }
c2c087e6 4641 close(fd);
f2f5c343
LM
4642 if (super->orom && raiddisks > super->orom->tds) {
4643 if (verbose)
4644 fprintf(stderr, Name ": %d exceeds maximum number of"
4645 " platform supported disks: %d\n",
4646 raiddisks, super->orom->tds);
4647
4648 free_imsm(super);
4649 return 0;
4650 }
c2c087e6
DW
4651
4652 *freesize = avail_size_imsm(st, ldsize >> 9);
f2f5c343 4653 free_imsm(super);
c2c087e6
DW
4654
4655 return 1;
cdddbdbc
DW
4656}
4657
0dcecb2e
DW
4658static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4659{
4660 const unsigned long long base_start = e[*idx].start;
4661 unsigned long long end = base_start + e[*idx].size;
4662 int i;
4663
4664 if (base_start == end)
4665 return 0;
4666
4667 *idx = *idx + 1;
4668 for (i = *idx; i < num_extents; i++) {
4669 /* extend overlapping extents */
4670 if (e[i].start >= base_start &&
4671 e[i].start <= end) {
4672 if (e[i].size == 0)
4673 return 0;
4674 if (e[i].start + e[i].size > end)
4675 end = e[i].start + e[i].size;
4676 } else if (e[i].start > end) {
4677 *idx = i;
4678 break;
4679 }
4680 }
4681
4682 return end - base_start;
4683}
4684
4685static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4686{
4687 /* build a composite disk with all known extents and generate a new
4688 * 'maxsize' given the "all disks in an array must share a common start
4689 * offset" constraint
4690 */
4691 struct extent *e = calloc(sum_extents, sizeof(*e));
4692 struct dl *dl;
4693 int i, j;
4694 int start_extent;
4695 unsigned long long pos;
b9d77223 4696 unsigned long long start = 0;
0dcecb2e
DW
4697 unsigned long long maxsize;
4698 unsigned long reserve;
4699
4700 if (!e)
a7dd165b 4701 return 0;
0dcecb2e
DW
4702
4703 /* coalesce and sort all extents. also, check to see if we need to
4704 * reserve space between member arrays
4705 */
4706 j = 0;
4707 for (dl = super->disks; dl; dl = dl->next) {
4708 if (!dl->e)
4709 continue;
4710 for (i = 0; i < dl->extent_cnt; i++)
4711 e[j++] = dl->e[i];
4712 }
4713 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4714
4715 /* merge extents */
4716 i = 0;
4717 j = 0;
4718 while (i < sum_extents) {
4719 e[j].start = e[i].start;
4720 e[j].size = find_size(e, &i, sum_extents);
4721 j++;
4722 if (e[j-1].size == 0)
4723 break;
4724 }
4725
4726 pos = 0;
4727 maxsize = 0;
4728 start_extent = 0;
4729 i = 0;
4730 do {
4731 unsigned long long esize;
4732
4733 esize = e[i].start - pos;
4734 if (esize >= maxsize) {
4735 maxsize = esize;
4736 start = pos;
4737 start_extent = i;
4738 }
4739 pos = e[i].start + e[i].size;
4740 i++;
4741 } while (e[i-1].size);
4742 free(e);
4743
a7dd165b
DW
4744 if (maxsize == 0)
4745 return 0;
4746
4747 /* FIXME assumes volume at offset 0 is the first volume in a
4748 * container
4749 */
0dcecb2e
DW
4750 if (start_extent > 0)
4751 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4752 else
4753 reserve = 0;
4754
4755 if (maxsize < reserve)
a7dd165b 4756 return 0;
0dcecb2e
DW
4757
4758 super->create_offset = ~((__u32) 0);
4759 if (start + reserve > super->create_offset)
a7dd165b 4760 return 0; /* start overflows create_offset */
0dcecb2e
DW
4761 super->create_offset = start + reserve;
4762
4763 return maxsize - reserve;
4764}
4765
88c32bb1
DW
4766static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4767{
4768 if (level < 0 || level == 6 || level == 4)
4769 return 0;
4770
4771 /* if we have an orom prevent invalid raid levels */
4772 if (orom)
4773 switch (level) {
4774 case 0: return imsm_orom_has_raid0(orom);
4775 case 1:
4776 if (raiddisks > 2)
4777 return imsm_orom_has_raid1e(orom);
1c556e92
DW
4778 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4779 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4780 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
88c32bb1
DW
4781 }
4782 else
4783 return 1; /* not on an Intel RAID platform so anything goes */
4784
4785 return 0;
4786}
4787
73408129 4788
35f81cbb 4789#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
73408129
LM
4790/*
4791 * validate volume parameters with OROM/EFI capabilities
4792 */
6592ce37
DW
4793static int
4794validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
c21e737b 4795 int raiddisks, int *chunk, int verbose)
6592ce37 4796{
73408129
LM
4797#if DEBUG
4798 verbose = 1;
4799#endif
4800 /* validate container capabilities */
4801 if (super->orom && raiddisks > super->orom->tds) {
4802 if (verbose)
4803 fprintf(stderr, Name ": %d exceeds maximum number of"
4804 " platform supported disks: %d\n",
4805 raiddisks, super->orom->tds);
4806 return 0;
4807 }
4808
4809 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4810 if (super->orom && (!is_raid_level_supported(super->orom, level,
4811 raiddisks))) {
6592ce37
DW
4812 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4813 level, raiddisks, raiddisks > 1 ? "s" : "");
4814 return 0;
4815 }
c21e737b
CA
4816 if (super->orom && level != 1) {
4817 if (chunk && (*chunk == 0 || *chunk == UnSet))
4818 *chunk = imsm_orom_default_chunk(super->orom);
4819 else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
4820 pr_vrb(": platform does not support a chunk size of: "
4821 "%d\n", *chunk);
4822 return 0;
4823 }
6592ce37
DW
4824 }
4825 if (layout != imsm_level_to_layout(level)) {
4826 if (level == 5)
4827 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4828 else if (level == 10)
4829 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4830 else
4831 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4832 layout, level);
4833 return 0;
4834 }
6592ce37
DW
4835 return 1;
4836}
4837
c2c087e6
DW
4838/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4839 * FIX ME add ahci details
4840 */
8b353278 4841static int validate_geometry_imsm_volume(struct supertype *st, int level,
c21e737b 4842 int layout, int raiddisks, int *chunk,
c2c087e6 4843 unsigned long long size, char *dev,
2c514b71
NB
4844 unsigned long long *freesize,
4845 int verbose)
cdddbdbc 4846{
c2c087e6
DW
4847 struct stat stb;
4848 struct intel_super *super = st->sb;
a20d2ba5 4849 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
4850 struct dl *dl;
4851 unsigned long long pos = 0;
4852 unsigned long long maxsize;
4853 struct extent *e;
4854 int i;
cdddbdbc 4855
88c32bb1
DW
4856 /* We must have the container info already read in. */
4857 if (!super)
c2c087e6
DW
4858 return 0;
4859
d54559f0
LM
4860 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
4861 fprintf(stderr, Name ": RAID gemetry validation failed. "
4862 "Cannot proceed with the action(s).\n");
c2c087e6 4863 return 0;
d54559f0 4864 }
c2c087e6
DW
4865 if (!dev) {
4866 /* General test: make sure there is space for
2da8544a
DW
4867 * 'raiddisks' device extents of size 'size' at a given
4868 * offset
c2c087e6 4869 */
e46273eb 4870 unsigned long long minsize = size;
b7528a20 4871 unsigned long long start_offset = MaxSector;
c2c087e6
DW
4872 int dcnt = 0;
4873 if (minsize == 0)
4874 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4875 for (dl = super->disks; dl ; dl = dl->next) {
4876 int found = 0;
4877
bf5a934a 4878 pos = 0;
c2c087e6
DW
4879 i = 0;
4880 e = get_extents(super, dl);
4881 if (!e) continue;
4882 do {
4883 unsigned long long esize;
4884 esize = e[i].start - pos;
4885 if (esize >= minsize)
4886 found = 1;
b7528a20 4887 if (found && start_offset == MaxSector) {
2da8544a
DW
4888 start_offset = pos;
4889 break;
4890 } else if (found && pos != start_offset) {
4891 found = 0;
4892 break;
4893 }
c2c087e6
DW
4894 pos = e[i].start + e[i].size;
4895 i++;
4896 } while (e[i-1].size);
4897 if (found)
4898 dcnt++;
4899 free(e);
4900 }
4901 if (dcnt < raiddisks) {
2c514b71
NB
4902 if (verbose)
4903 fprintf(stderr, Name ": imsm: Not enough "
4904 "devices with space for this array "
4905 "(%d < %d)\n",
4906 dcnt, raiddisks);
c2c087e6
DW
4907 return 0;
4908 }
4909 return 1;
4910 }
0dcecb2e 4911
c2c087e6
DW
4912 /* This device must be a member of the set */
4913 if (stat(dev, &stb) < 0)
4914 return 0;
4915 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4916 return 0;
4917 for (dl = super->disks ; dl ; dl = dl->next) {
f21e18ca
N
4918 if (dl->major == (int)major(stb.st_rdev) &&
4919 dl->minor == (int)minor(stb.st_rdev))
c2c087e6
DW
4920 break;
4921 }
4922 if (!dl) {
2c514b71
NB
4923 if (verbose)
4924 fprintf(stderr, Name ": %s is not in the "
4925 "same imsm set\n", dev);
c2c087e6 4926 return 0;
a20d2ba5
DW
4927 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4928 /* If a volume is present then the current creation attempt
4929 * cannot incorporate new spares because the orom may not
4930 * understand this configuration (all member disks must be
4931 * members of each array in the container).
4932 */
4933 fprintf(stderr, Name ": %s is a spare and a volume"
4934 " is already defined for this container\n", dev);
4935 fprintf(stderr, Name ": The option-rom requires all member"
4936 " disks to be a member of all volumes\n");
4937 return 0;
c2c087e6 4938 }
0dcecb2e
DW
4939
4940 /* retrieve the largest free space block */
c2c087e6
DW
4941 e = get_extents(super, dl);
4942 maxsize = 0;
4943 i = 0;
0dcecb2e
DW
4944 if (e) {
4945 do {
4946 unsigned long long esize;
4947
4948 esize = e[i].start - pos;
4949 if (esize >= maxsize)
4950 maxsize = esize;
4951 pos = e[i].start + e[i].size;
4952 i++;
4953 } while (e[i-1].size);
4954 dl->e = e;
4955 dl->extent_cnt = i;
4956 } else {
4957 if (verbose)
4958 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4959 dev);
4960 return 0;
4961 }
4962 if (maxsize < size) {
4963 if (verbose)
4964 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4965 dev, maxsize, size);
4966 return 0;
4967 }
4968
4969 /* count total number of extents for merge */
4970 i = 0;
4971 for (dl = super->disks; dl; dl = dl->next)
4972 if (dl->e)
4973 i += dl->extent_cnt;
4974
4975 maxsize = merge_extents(super, i);
a7dd165b 4976 if (maxsize < size || maxsize == 0) {
0dcecb2e
DW
4977 if (verbose)
4978 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4979 maxsize, size);
4980 return 0;
0dcecb2e
DW
4981 }
4982
c2c087e6
DW
4983 *freesize = maxsize;
4984
4985 return 1;
cdddbdbc
DW
4986}
4987
efb30e7f
DW
4988static int reserve_space(struct supertype *st, int raiddisks,
4989 unsigned long long size, int chunk,
4990 unsigned long long *freesize)
4991{
4992 struct intel_super *super = st->sb;
4993 struct imsm_super *mpb = super->anchor;
4994 struct dl *dl;
4995 int i;
4996 int extent_cnt;
4997 struct extent *e;
4998 unsigned long long maxsize;
4999 unsigned long long minsize;
5000 int cnt;
5001 int used;
5002
5003 /* find the largest common start free region of the possible disks */
5004 used = 0;
5005 extent_cnt = 0;
5006 cnt = 0;
5007 for (dl = super->disks; dl; dl = dl->next) {
5008 dl->raiddisk = -1;
5009
5010 if (dl->index >= 0)
5011 used++;
5012
5013 /* don't activate new spares if we are orom constrained
5014 * and there is already a volume active in the container
5015 */
5016 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
5017 continue;
5018
5019 e = get_extents(super, dl);
5020 if (!e)
5021 continue;
5022 for (i = 1; e[i-1].size; i++)
5023 ;
5024 dl->e = e;
5025 dl->extent_cnt = i;
5026 extent_cnt += i;
5027 cnt++;
5028 }
5029
5030 maxsize = merge_extents(super, extent_cnt);
5031 minsize = size;
5032 if (size == 0)
612e59d8
CA
5033 /* chunk is in K */
5034 minsize = chunk * 2;
efb30e7f
DW
5035
5036 if (cnt < raiddisks ||
5037 (super->orom && used && used != raiddisks) ||
a7dd165b
DW
5038 maxsize < minsize ||
5039 maxsize == 0) {
efb30e7f
DW
5040 fprintf(stderr, Name ": not enough devices with space to create array.\n");
5041 return 0; /* No enough free spaces large enough */
5042 }
5043
5044 if (size == 0) {
5045 size = maxsize;
5046 if (chunk) {
612e59d8
CA
5047 size /= 2 * chunk;
5048 size *= 2 * chunk;
efb30e7f
DW
5049 }
5050 }
5051
5052 cnt = 0;
5053 for (dl = super->disks; dl; dl = dl->next)
5054 if (dl->e)
5055 dl->raiddisk = cnt++;
5056
5057 *freesize = size;
5058
5059 return 1;
5060}
5061
bf5a934a 5062static int validate_geometry_imsm(struct supertype *st, int level, int layout,
c21e737b 5063 int raiddisks, int *chunk, unsigned long long size,
bf5a934a
DW
5064 char *dev, unsigned long long *freesize,
5065 int verbose)
5066{
5067 int fd, cfd;
5068 struct mdinfo *sra;
20cbe8d2 5069 int is_member = 0;
bf5a934a 5070
d54559f0
LM
5071 /* load capability
5072 * if given unused devices create a container
bf5a934a
DW
5073 * if given given devices in a container create a member volume
5074 */
5075 if (level == LEVEL_CONTAINER) {
5076 /* Must be a fresh device to add to a container */
5077 return validate_geometry_imsm_container(st, level, layout,
c21e737b
CA
5078 raiddisks,
5079 chunk?*chunk:0, size,
bf5a934a
DW
5080 dev, freesize,
5081 verbose);
5082 }
5083
8592f29d
N
5084 if (!dev) {
5085 if (st->sb && freesize) {
efb30e7f
DW
5086 /* we are being asked to automatically layout a
5087 * new volume based on the current contents of
5088 * the container. If the the parameters can be
5089 * satisfied reserve_space will record the disks,
5090 * start offset, and size of the volume to be
5091 * created. add_to_super and getinfo_super
5092 * detect when autolayout is in progress.
5093 */
6592ce37
DW
5094 if (!validate_geometry_imsm_orom(st->sb, level, layout,
5095 raiddisks, chunk,
5096 verbose))
5097 return 0;
c21e737b
CA
5098 return reserve_space(st, raiddisks, size,
5099 chunk?*chunk:0, freesize);
8592f29d
N
5100 }
5101 return 1;
5102 }
bf5a934a
DW
5103 if (st->sb) {
5104 /* creating in a given container */
5105 return validate_geometry_imsm_volume(st, level, layout,
5106 raiddisks, chunk, size,
5107 dev, freesize, verbose);
5108 }
5109
bf5a934a
DW
5110 /* This device needs to be a device in an 'imsm' container */
5111 fd = open(dev, O_RDONLY|O_EXCL, 0);
5112 if (fd >= 0) {
5113 if (verbose)
5114 fprintf(stderr,
5115 Name ": Cannot create this array on device %s\n",
5116 dev);
5117 close(fd);
5118 return 0;
5119 }
5120 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
5121 if (verbose)
5122 fprintf(stderr, Name ": Cannot open %s: %s\n",
5123 dev, strerror(errno));
5124 return 0;
5125 }
5126 /* Well, it is in use by someone, maybe an 'imsm' container. */
5127 cfd = open_container(fd);
20cbe8d2 5128 close(fd);
bf5a934a 5129 if (cfd < 0) {
bf5a934a
DW
5130 if (verbose)
5131 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
5132 dev);
5133 return 0;
5134 }
5135 sra = sysfs_read(cfd, 0, GET_VERSION);
bf5a934a 5136 if (sra && sra->array.major_version == -1 &&
20cbe8d2
AW
5137 strcmp(sra->text_version, "imsm") == 0)
5138 is_member = 1;
5139 sysfs_free(sra);
5140 if (is_member) {
bf5a934a
DW
5141 /* This is a member of a imsm container. Load the container
5142 * and try to create a volume
5143 */
5144 struct intel_super *super;
5145
e1902a7b 5146 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
bf5a934a
DW
5147 st->sb = super;
5148 st->container_dev = fd2devnum(cfd);
5149 close(cfd);
5150 return validate_geometry_imsm_volume(st, level, layout,
5151 raiddisks, chunk,
5152 size, dev,
5153 freesize, verbose);
5154 }
20cbe8d2 5155 }
bf5a934a 5156
20cbe8d2
AW
5157 if (verbose)
5158 fprintf(stderr, Name ": failed container membership check\n");
5159
5160 close(cfd);
5161 return 0;
bf5a934a 5162}
0bd16cf2 5163
30f58b22 5164static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
0bd16cf2
DJ
5165{
5166 struct intel_super *super = st->sb;
5167
30f58b22
DW
5168 if (level && *level == UnSet)
5169 *level = LEVEL_CONTAINER;
5170
5171 if (level && layout && *layout == UnSet)
5172 *layout = imsm_level_to_layout(*level);
0bd16cf2 5173
1d54f286
N
5174 if (chunk && (*chunk == UnSet || *chunk == 0) &&
5175 super && super->orom)
30f58b22 5176 *chunk = imsm_orom_default_chunk(super->orom);
0bd16cf2
DJ
5177}
5178
33414a01
DW
5179static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
5180
5181static int kill_subarray_imsm(struct supertype *st)
5182{
5183 /* remove the subarray currently referenced by ->current_vol */
5184 __u8 i;
5185 struct intel_dev **dp;
5186 struct intel_super *super = st->sb;
5187 __u8 current_vol = super->current_vol;
5188 struct imsm_super *mpb = super->anchor;
5189
5190 if (super->current_vol < 0)
5191 return 2;
5192 super->current_vol = -1; /* invalidate subarray cursor */
5193
5194 /* block deletions that would change the uuid of active subarrays
5195 *
5196 * FIXME when immutable ids are available, but note that we'll
5197 * also need to fixup the invalidated/active subarray indexes in
5198 * mdstat
5199 */
5200 for (i = 0; i < mpb->num_raid_devs; i++) {
5201 char subarray[4];
5202
5203 if (i < current_vol)
5204 continue;
5205 sprintf(subarray, "%u", i);
5206 if (is_subarray_active(subarray, st->devname)) {
5207 fprintf(stderr,
5208 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
5209 current_vol, i);
5210
5211 return 2;
5212 }
5213 }
5214
5215 if (st->update_tail) {
5216 struct imsm_update_kill_array *u = malloc(sizeof(*u));
5217
5218 if (!u)
5219 return 2;
5220 u->type = update_kill_array;
5221 u->dev_idx = current_vol;
5222 append_metadata_update(st, u, sizeof(*u));
5223
5224 return 0;
5225 }
5226
5227 for (dp = &super->devlist; *dp;)
5228 if ((*dp)->index == current_vol) {
5229 *dp = (*dp)->next;
5230 } else {
5231 handle_missing(super, (*dp)->dev);
5232 if ((*dp)->index > current_vol)
5233 (*dp)->index--;
5234 dp = &(*dp)->next;
5235 }
5236
5237 /* no more raid devices, all active components are now spares,
5238 * but of course failed are still failed
5239 */
5240 if (--mpb->num_raid_devs == 0) {
5241 struct dl *d;
5242
5243 for (d = super->disks; d; d = d->next)
5244 if (d->index > -2) {
5245 d->index = -1;
5246 d->disk.status = SPARE_DISK;
5247 }
5248 }
5249
5250 super->updates_pending++;
5251
5252 return 0;
5253}
aa534678 5254
a951a4f7 5255static int update_subarray_imsm(struct supertype *st, char *subarray,
fa56eddb 5256 char *update, struct mddev_ident *ident)
aa534678
DW
5257{
5258 /* update the subarray currently referenced by ->current_vol */
5259 struct intel_super *super = st->sb;
5260 struct imsm_super *mpb = super->anchor;
5261
aa534678
DW
5262 if (strcmp(update, "name") == 0) {
5263 char *name = ident->name;
a951a4f7
N
5264 char *ep;
5265 int vol;
aa534678 5266
a951a4f7 5267 if (is_subarray_active(subarray, st->devname)) {
aa534678
DW
5268 fprintf(stderr,
5269 Name ": Unable to update name of active subarray\n");
5270 return 2;
5271 }
5272
5273 if (!check_name(super, name, 0))
5274 return 2;
5275
a951a4f7
N
5276 vol = strtoul(subarray, &ep, 10);
5277 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
5278 return 2;
5279
aa534678
DW
5280 if (st->update_tail) {
5281 struct imsm_update_rename_array *u = malloc(sizeof(*u));
5282
5283 if (!u)
5284 return 2;
5285 u->type = update_rename_array;
a951a4f7 5286 u->dev_idx = vol;
aa534678
DW
5287 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
5288 append_metadata_update(st, u, sizeof(*u));
5289 } else {
5290 struct imsm_dev *dev;
5291 int i;
5292
a951a4f7 5293 dev = get_imsm_dev(super, vol);
aa534678
DW
5294 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
5295 for (i = 0; i < mpb->num_raid_devs; i++) {
5296 dev = get_imsm_dev(super, i);
5297 handle_missing(super, dev);
5298 }
5299 super->updates_pending++;
5300 }
5301 } else
5302 return 2;
5303
5304 return 0;
5305}
bf5a934a 5306
28bce06f
AK
5307static int is_gen_migration(struct imsm_dev *dev)
5308{
5309 if (!dev->vol.migr_state)
5310 return 0;
5311
5312 if (migr_type(dev) == MIGR_GEN_MIGR)
5313 return 1;
5314
5315 return 0;
5316}
71204a50 5317#endif /* MDASSEMBLE */
28bce06f 5318
1e5c6983
DW
5319static int is_rebuilding(struct imsm_dev *dev)
5320{
5321 struct imsm_map *migr_map;
5322
5323 if (!dev->vol.migr_state)
5324 return 0;
5325
5326 if (migr_type(dev) != MIGR_REBUILD)
5327 return 0;
5328
5329 migr_map = get_imsm_map(dev, 1);
5330
5331 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
5332 return 1;
5333 else
5334 return 0;
5335}
5336
c47b0ff6
AK
5337static void update_recovery_start(struct intel_super *super,
5338 struct imsm_dev *dev,
5339 struct mdinfo *array)
1e5c6983
DW
5340{
5341 struct mdinfo *rebuild = NULL;
5342 struct mdinfo *d;
5343 __u32 units;
5344
5345 if (!is_rebuilding(dev))
5346 return;
5347
5348 /* Find the rebuild target, but punt on the dual rebuild case */
5349 for (d = array->devs; d; d = d->next)
5350 if (d->recovery_start == 0) {
5351 if (rebuild)
5352 return;
5353 rebuild = d;
5354 }
5355
4363fd80
DW
5356 if (!rebuild) {
5357 /* (?) none of the disks are marked with
5358 * IMSM_ORD_REBUILD, so assume they are missing and the
5359 * disk_ord_tbl was not correctly updated
5360 */
5361 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
5362 return;
5363 }
5364
1e5c6983 5365 units = __le32_to_cpu(dev->vol.curr_migr_unit);
c47b0ff6 5366 rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
1e5c6983
DW
5367}
5368
276d77db 5369static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
1e5c6983 5370
00bbdbda 5371static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
cdddbdbc 5372{
4f5bc454
DW
5373 /* Given a container loaded by load_super_imsm_all,
5374 * extract information about all the arrays into
5375 * an mdinfo tree.
00bbdbda 5376 * If 'subarray' is given, just extract info about that array.
4f5bc454
DW
5377 *
5378 * For each imsm_dev create an mdinfo, fill it in,
5379 * then look for matching devices in super->disks
5380 * and create appropriate device mdinfo.
5381 */
5382 struct intel_super *super = st->sb;
949c47a0 5383 struct imsm_super *mpb = super->anchor;
4f5bc454 5384 struct mdinfo *rest = NULL;
00bbdbda 5385 unsigned int i;
a06d022d 5386 int bbm_errors = 0;
abef11a3
AK
5387 struct dl *d;
5388 int spare_disks = 0;
cdddbdbc 5389
a06d022d
KW
5390 /* check for bad blocks */
5391 if (imsm_bbm_log_size(super->anchor))
5392 bbm_errors = 1;
604b746f 5393
abef11a3
AK
5394 /* count spare devices, not used in maps
5395 */
5396 for (d = super->disks; d; d = d->next)
5397 if (d->index == -1)
5398 spare_disks++;
5399
4f5bc454 5400 for (i = 0; i < mpb->num_raid_devs; i++) {
00bbdbda
N
5401 struct imsm_dev *dev;
5402 struct imsm_map *map;
86e3692b 5403 struct imsm_map *map2;
4f5bc454 5404 struct mdinfo *this;
2db86302 5405 int slot, chunk;
00bbdbda
N
5406 char *ep;
5407
5408 if (subarray &&
5409 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
5410 continue;
5411
5412 dev = get_imsm_dev(super, i);
5413 map = get_imsm_map(dev, 0);
86e3692b 5414 map2 = get_imsm_map(dev, 1);
4f5bc454 5415
1ce0101c
DW
5416 /* do not publish arrays that are in the middle of an
5417 * unsupported migration
5418 */
5419 if (dev->vol.migr_state &&
28bce06f 5420 (migr_type(dev) == MIGR_STATE_CHANGE)) {
1ce0101c
DW
5421 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
5422 " unsupported migration in progress\n",
5423 dev->volume);
5424 continue;
5425 }
2db86302
LM
5426 /* do not publish arrays that are not support by controller's
5427 * OROM/EFI
5428 */
1ce0101c 5429
2db86302 5430 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
7b0bbd0f 5431#ifndef MDASSEMBLE
2db86302
LM
5432 if (!validate_geometry_imsm_orom(super,
5433 get_imsm_raid_level(map), /* RAID level */
5434 imsm_level_to_layout(get_imsm_raid_level(map)),
5435 map->num_members, /* raid disks */
5436 &chunk,
5437 1 /* verbose */)) {
5438 fprintf(stderr, Name ": RAID gemetry validation failed. "
5439 "Cannot proceed with the action(s).\n");
5440 continue;
5441 }
7b0bbd0f 5442#endif /* MDASSEMBLE */
4f5bc454 5443 this = malloc(sizeof(*this));
0fbd635c 5444 if (!this) {
cf1be220 5445 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
0fbd635c
AW
5446 sizeof(*this));
5447 break;
5448 }
4f5bc454
DW
5449 memset(this, 0, sizeof(*this));
5450 this->next = rest;
4f5bc454 5451
301406c9 5452 super->current_vol = i;
a5d85af7 5453 getinfo_super_imsm_volume(st, this, NULL);
4f5bc454 5454 for (slot = 0 ; slot < map->num_members; slot++) {
1e5c6983 5455 unsigned long long recovery_start;
4f5bc454
DW
5456 struct mdinfo *info_d;
5457 struct dl *d;
5458 int idx;
9a1608e5 5459 int skip;
7eef0453 5460 __u32 ord;
4f5bc454 5461
9a1608e5 5462 skip = 0;
98130f40 5463 idx = get_imsm_disk_idx(dev, slot, 0);
196b0d44 5464 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
4f5bc454
DW
5465 for (d = super->disks; d ; d = d->next)
5466 if (d->index == idx)
0fbd635c 5467 break;
4f5bc454 5468
1e5c6983 5469 recovery_start = MaxSector;
4f5bc454 5470 if (d == NULL)
9a1608e5 5471 skip = 1;
25ed7e59 5472 if (d && is_failed(&d->disk))
9a1608e5 5473 skip = 1;
7eef0453 5474 if (ord & IMSM_ORD_REBUILD)
1e5c6983 5475 recovery_start = 0;
9a1608e5
DW
5476
5477 /*
5478 * if we skip some disks the array will be assmebled degraded;
1e5c6983
DW
5479 * reset resync start to avoid a dirty-degraded
5480 * situation when performing the intial sync
9a1608e5
DW
5481 *
5482 * FIXME handle dirty degraded
5483 */
1e5c6983 5484 if ((skip || recovery_start == 0) && !dev->vol.dirty)
b7528a20 5485 this->resync_start = MaxSector;
9a1608e5
DW
5486 if (skip)
5487 continue;
4f5bc454 5488
1e5c6983 5489 info_d = calloc(1, sizeof(*info_d));
9a1608e5
DW
5490 if (!info_d) {
5491 fprintf(stderr, Name ": failed to allocate disk"
1ce0101c 5492 " for volume %.16s\n", dev->volume);
1e5c6983
DW
5493 info_d = this->devs;
5494 while (info_d) {
5495 struct mdinfo *d = info_d->next;
5496
5497 free(info_d);
5498 info_d = d;
5499 }
9a1608e5
DW
5500 free(this);
5501 this = rest;
5502 break;
5503 }
4f5bc454
DW
5504 info_d->next = this->devs;
5505 this->devs = info_d;
5506
4f5bc454
DW
5507 info_d->disk.number = d->index;
5508 info_d->disk.major = d->major;
5509 info_d->disk.minor = d->minor;
5510 info_d->disk.raid_disk = slot;
1e5c6983 5511 info_d->recovery_start = recovery_start;
86e3692b
AK
5512 if (map2) {
5513 if (slot < map2->num_members)
5514 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
5515 else
5516 this->array.spare_disks++;
86e3692b
AK
5517 } else {
5518 if (slot < map->num_members)
5519 info_d->disk.state = (1 << MD_DISK_ACTIVE);
04c3c514
AK
5520 else
5521 this->array.spare_disks++;
86e3692b 5522 }
1e5c6983
DW
5523 if (info_d->recovery_start == MaxSector)
5524 this->array.working_disks++;
4f5bc454
DW
5525
5526 info_d->events = __le32_to_cpu(mpb->generation_num);
5527 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
5528 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
4f5bc454 5529 }
1e5c6983 5530 /* now that the disk list is up-to-date fixup recovery_start */
c47b0ff6 5531 update_recovery_start(super, dev, this);
abef11a3 5532 this->array.spare_disks += spare_disks;
276d77db
AK
5533
5534 /* check for reshape */
5535 if (this->reshape_active == 1)
5536 recover_backup_imsm(st, this);
5537
9a1608e5 5538 rest = this;
4f5bc454
DW
5539 }
5540
a06d022d
KW
5541 /* if array has bad blocks, set suitable bit in array status */
5542 if (bbm_errors)
5543 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
5544
4f5bc454 5545 return rest;
cdddbdbc
DW
5546}
5547
845dea95 5548
fb49eef2 5549static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
c2a1e7da 5550{
a965f303 5551 struct imsm_map *map = get_imsm_map(dev, 0);
c2a1e7da
DW
5552
5553 if (!failed)
3393c6af
DW
5554 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
5555 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
c2a1e7da
DW
5556
5557 switch (get_imsm_raid_level(map)) {
5558 case 0:
5559 return IMSM_T_STATE_FAILED;
5560 break;
5561 case 1:
5562 if (failed < map->num_members)
5563 return IMSM_T_STATE_DEGRADED;
5564 else
5565 return IMSM_T_STATE_FAILED;
5566 break;
5567 case 10:
5568 {
5569 /**
c92a2527
DW
5570 * check to see if any mirrors have failed, otherwise we
5571 * are degraded. Even numbered slots are mirrored on
5572 * slot+1
c2a1e7da 5573 */
c2a1e7da 5574 int i;
d9b420a5
N
5575 /* gcc -Os complains that this is unused */
5576 int insync = insync;
c2a1e7da
DW
5577
5578 for (i = 0; i < map->num_members; i++) {
98130f40 5579 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
c92a2527
DW
5580 int idx = ord_to_idx(ord);
5581 struct imsm_disk *disk;
c2a1e7da 5582
c92a2527
DW
5583 /* reset the potential in-sync count on even-numbered
5584 * slots. num_copies is always 2 for imsm raid10
5585 */
5586 if ((i & 1) == 0)
5587 insync = 2;
c2a1e7da 5588
c92a2527 5589 disk = get_imsm_disk(super, idx);
25ed7e59 5590 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
c92a2527 5591 insync--;
c2a1e7da 5592
c92a2527
DW
5593 /* no in-sync disks left in this mirror the
5594 * array has failed
5595 */
5596 if (insync == 0)
5597 return IMSM_T_STATE_FAILED;
c2a1e7da
DW
5598 }
5599
5600 return IMSM_T_STATE_DEGRADED;
5601 }
5602 case 5:
5603 if (failed < 2)
5604 return IMSM_T_STATE_DEGRADED;
5605 else
5606 return IMSM_T_STATE_FAILED;
5607 break;
5608 default:
5609 break;
5610 }
5611
5612 return map->map_state;
5613}
5614
ff077194 5615static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
c2a1e7da
DW
5616{
5617 int i;
5618 int failed = 0;
5619 struct imsm_disk *disk;
ff077194 5620 struct imsm_map *map = get_imsm_map(dev, 0);
0556e1a2
DW
5621 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
5622 __u32 ord;
5623 int idx;
c2a1e7da 5624
0556e1a2
DW
5625 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5626 * disks that are being rebuilt. New failures are recorded to
5627 * map[0]. So we look through all the disks we started with and
5628 * see if any failures are still present, or if any new ones
5629 * have arrived
5630 *
5631 * FIXME add support for online capacity expansion and
5632 * raid-level-migration
5633 */
5634 for (i = 0; i < prev->num_members; i++) {
5635 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
5636 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
5637 idx = ord_to_idx(ord);
c2a1e7da 5638
949c47a0 5639 disk = get_imsm_disk(super, idx);
25ed7e59 5640 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
fcb84475 5641 failed++;
c2a1e7da
DW
5642 }
5643
5644 return failed;
845dea95
NB
5645}
5646
97b4d0e9
DW
5647#ifndef MDASSEMBLE
5648static int imsm_open_new(struct supertype *c, struct active_array *a,
5649 char *inst)
5650{
5651 struct intel_super *super = c->sb;
5652 struct imsm_super *mpb = super->anchor;
5653
5654 if (atoi(inst) >= mpb->num_raid_devs) {
5655 fprintf(stderr, "%s: subarry index %d, out of range\n",
5656 __func__, atoi(inst));
5657 return -ENODEV;
5658 }
5659
5660 dprintf("imsm: open_new %s\n", inst);
5661 a->info.container_member = atoi(inst);
5662 return 0;
5663}
5664
0c046afd
DW
5665static int is_resyncing(struct imsm_dev *dev)
5666{
5667 struct imsm_map *migr_map;
5668
5669 if (!dev->vol.migr_state)
5670 return 0;
5671
1484e727
DW
5672 if (migr_type(dev) == MIGR_INIT ||
5673 migr_type(dev) == MIGR_REPAIR)
0c046afd
DW
5674 return 1;
5675
4c9bc37b
AK
5676 if (migr_type(dev) == MIGR_GEN_MIGR)
5677 return 0;
5678
0c046afd
DW
5679 migr_map = get_imsm_map(dev, 1);
5680
4c9bc37b
AK
5681 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
5682 (dev->vol.migr_type != MIGR_GEN_MIGR))
0c046afd
DW
5683 return 1;
5684 else
5685 return 0;
5686}
5687
0556e1a2
DW
5688/* return true if we recorded new information */
5689static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
47ee5a45 5690{
0556e1a2
DW
5691 __u32 ord;
5692 int slot;
5693 struct imsm_map *map;
5694
5695 /* new failures are always set in map[0] */
5696 map = get_imsm_map(dev, 0);
5697
5698 slot = get_imsm_disk_slot(map, idx);
5699 if (slot < 0)
5700 return 0;
5701
5702 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
25ed7e59 5703 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
0556e1a2
DW
5704 return 0;
5705
f2f27e63 5706 disk->status |= FAILED_DISK;
0556e1a2 5707 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
f21e18ca 5708 if (map->failed_disk_num == 0xff)
0556e1a2
DW
5709 map->failed_disk_num = slot;
5710 return 1;
5711}
5712
5713static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5714{
5715 mark_failure(dev, disk, idx);
5716
5717 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5718 return;
5719
47ee5a45
DW
5720 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5721 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5722}
5723
33414a01
DW
5724static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5725{
5726 __u8 map_state;
5727 struct dl *dl;
5728 int failed;
5729
5730 if (!super->missing)
5731 return;
5732 failed = imsm_count_failed(super, dev);
5733 map_state = imsm_check_degraded(super, dev, failed);
5734
5735 dprintf("imsm: mark missing\n");
5736 end_migration(dev, map_state);
5737 for (dl = super->missing; dl; dl = dl->next)
5738 mark_missing(dev, &dl->disk, dl->index);
5739 super->updates_pending++;
5740}
5741
70bdf0dc
AK
5742static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5743{
5744 int used_disks = imsm_num_data_members(dev, 0);
5745 unsigned long long array_blocks;
5746 struct imsm_map *map;
5747
5748 if (used_disks == 0) {
5749 /* when problems occures
5750 * return current array_blocks value
5751 */
5752 array_blocks = __le32_to_cpu(dev->size_high);
5753 array_blocks = array_blocks << 32;
5754 array_blocks += __le32_to_cpu(dev->size_low);
5755
5756 return array_blocks;
5757 }
5758
5759 /* set array size in metadata
5760 */
5761 map = get_imsm_map(dev, 0);
5762 array_blocks = map->blocks_per_member * used_disks;
5763
5764 /* round array size down to closest MB
5765 */
5766 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5767 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5768 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5769
5770 return array_blocks;
5771}
5772
28bce06f
AK
5773static void imsm_set_disk(struct active_array *a, int n, int state);
5774
0e2d1a4e
AK
5775static void imsm_progress_container_reshape(struct intel_super *super)
5776{
5777 /* if no device has a migr_state, but some device has a
5778 * different number of members than the previous device, start
5779 * changing the number of devices in this device to match
5780 * previous.
5781 */
5782 struct imsm_super *mpb = super->anchor;
5783 int prev_disks = -1;
5784 int i;
1dfaa380 5785 int copy_map_size;
0e2d1a4e
AK
5786
5787 for (i = 0; i < mpb->num_raid_devs; i++) {
5788 struct imsm_dev *dev = get_imsm_dev(super, i);
5789 struct imsm_map *map = get_imsm_map(dev, 0);
5790 struct imsm_map *map2;
5791 int prev_num_members;
0e2d1a4e
AK
5792
5793 if (dev->vol.migr_state)
5794 return;
5795
5796 if (prev_disks == -1)
5797 prev_disks = map->num_members;
5798 if (prev_disks == map->num_members)
5799 continue;
5800
5801 /* OK, this array needs to enter reshape mode.
5802 * i.e it needs a migr_state
5803 */
5804
1dfaa380 5805 copy_map_size = sizeof_imsm_map(map);
0e2d1a4e
AK
5806 prev_num_members = map->num_members;
5807 map->num_members = prev_disks;
5808 dev->vol.migr_state = 1;
5809 dev->vol.curr_migr_unit = 0;
5810 dev->vol.migr_type = MIGR_GEN_MIGR;
5811 for (i = prev_num_members;
5812 i < map->num_members; i++)
5813 set_imsm_ord_tbl_ent(map, i, i);
5814 map2 = get_imsm_map(dev, 1);
5815 /* Copy the current map */
1dfaa380 5816 memcpy(map2, map, copy_map_size);
0e2d1a4e
AK
5817 map2->num_members = prev_num_members;
5818
70bdf0dc 5819 imsm_set_array_size(dev);
0e2d1a4e
AK
5820 super->updates_pending++;
5821 }
5822}
5823
aad6f216 5824/* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
0c046afd
DW
5825 * states are handled in imsm_set_disk() with one exception, when a
5826 * resync is stopped due to a new failure this routine will set the
5827 * 'degraded' state for the array.
5828 */
01f157d7 5829static int imsm_set_array_state(struct active_array *a, int consistent)
a862209d
DW
5830{
5831 int inst = a->info.container_member;
5832 struct intel_super *super = a->container->sb;
949c47a0 5833 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5834 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd
DW
5835 int failed = imsm_count_failed(super, dev);
5836 __u8 map_state = imsm_check_degraded(super, dev, failed);
1e5c6983 5837 __u32 blocks_per_unit;
a862209d 5838
1af97990
AK
5839 if (dev->vol.migr_state &&
5840 dev->vol.migr_type == MIGR_GEN_MIGR) {
5841 /* array state change is blocked due to reshape action
aad6f216
N
5842 * We might need to
5843 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5844 * - finish the reshape (if last_checkpoint is big and action != reshape)
5845 * - update curr_migr_unit
1af97990 5846 */
aad6f216
N
5847 if (a->curr_action == reshape) {
5848 /* still reshaping, maybe update curr_migr_unit */
633b5610 5849 goto mark_checkpoint;
aad6f216
N
5850 } else {
5851 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5852 /* for some reason we aborted the reshape.
5853 * Better clean up
5854 */
5855 struct imsm_map *map2 = get_imsm_map(dev, 1);
5856 dev->vol.migr_state = 0;
5857 dev->vol.migr_type = 0;
5858 dev->vol.curr_migr_unit = 0;
5859 memcpy(map, map2, sizeof_imsm_map(map2));
5860 super->updates_pending++;
5861 }
5862 if (a->last_checkpoint >= a->info.component_size) {
5863 unsigned long long array_blocks;
5864 int used_disks;
e154ced3 5865 struct mdinfo *mdi;
aad6f216 5866
9653001d 5867 used_disks = imsm_num_data_members(dev, 0);
d55adef9
AK
5868 if (used_disks > 0) {
5869 array_blocks =
5870 map->blocks_per_member *
5871 used_disks;
5872 /* round array size down to closest MB
5873 */
5874 array_blocks = (array_blocks
5875 >> SECT_PER_MB_SHIFT)
5876 << SECT_PER_MB_SHIFT;
d55adef9
AK
5877 a->info.custom_array_size = array_blocks;
5878 /* encourage manager to update array
5879 * size
5880 */
e154ced3 5881
d55adef9 5882 a->check_reshape = 1;
633b5610 5883 }
e154ced3
AK
5884 /* finalize online capacity expansion/reshape */
5885 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5886 imsm_set_disk(a,
5887 mdi->disk.raid_disk,
5888 mdi->curr_state);
5889
0e2d1a4e 5890 imsm_progress_container_reshape(super);
e154ced3 5891 }
aad6f216 5892 }
1af97990
AK
5893 }
5894
47ee5a45 5895 /* before we activate this array handle any missing disks */
33414a01
DW
5896 if (consistent == 2)
5897 handle_missing(super, dev);
1e5c6983 5898
0c046afd 5899 if (consistent == 2 &&
b7941fd6 5900 (!is_resync_complete(&a->info) ||
0c046afd
DW
5901 map_state != IMSM_T_STATE_NORMAL ||
5902 dev->vol.migr_state))
01f157d7 5903 consistent = 0;
272906ef 5904
b7941fd6 5905 if (is_resync_complete(&a->info)) {
0c046afd 5906 /* complete intialization / resync,
0556e1a2
DW
5907 * recovery and interrupted recovery is completed in
5908 * ->set_disk
0c046afd
DW
5909 */
5910 if (is_resyncing(dev)) {
5911 dprintf("imsm: mark resync done\n");
f8f603f1 5912 end_migration(dev, map_state);
115c3803 5913 super->updates_pending++;
484240d8 5914 a->last_checkpoint = 0;
115c3803 5915 }
0c046afd
DW
5916 } else if (!is_resyncing(dev) && !failed) {
5917 /* mark the start of the init process if nothing is failed */
b7941fd6 5918 dprintf("imsm: mark resync start\n");
1484e727 5919 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
8e59f3d8 5920 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
1484e727 5921 else
8e59f3d8 5922 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
3393c6af 5923 super->updates_pending++;
115c3803 5924 }
a862209d 5925
633b5610 5926mark_checkpoint:
5b83bacf
AK
5927 /* skip checkpointing for general migration,
5928 * it is controlled in mdadm
5929 */
5930 if (is_gen_migration(dev))
5931 goto skip_mark_checkpoint;
5932
1e5c6983 5933 /* check if we can update curr_migr_unit from resync_start, recovery_start */
c47b0ff6 5934 blocks_per_unit = blocks_per_migr_unit(super, dev);
4f0a7acc 5935 if (blocks_per_unit) {
1e5c6983
DW
5936 __u32 units32;
5937 __u64 units;
5938
4f0a7acc 5939 units = a->last_checkpoint / blocks_per_unit;
1e5c6983
DW
5940 units32 = units;
5941
5942 /* check that we did not overflow 32-bits, and that
5943 * curr_migr_unit needs updating
5944 */
5945 if (units32 == units &&
bfd80a56 5946 units32 != 0 &&
1e5c6983
DW
5947 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5948 dprintf("imsm: mark checkpoint (%u)\n", units32);
5949 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5950 super->updates_pending++;
5951 }
5952 }
f8f603f1 5953
5b83bacf 5954skip_mark_checkpoint:
3393c6af 5955 /* mark dirty / clean */
0c046afd 5956 if (dev->vol.dirty != !consistent) {
b7941fd6 5957 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
0c046afd
DW
5958 if (consistent)
5959 dev->vol.dirty = 0;
5960 else
5961 dev->vol.dirty = 1;
a862209d
DW
5962 super->updates_pending++;
5963 }
28bce06f 5964
01f157d7 5965 return consistent;
a862209d
DW
5966}
5967
8d45d196 5968static void imsm_set_disk(struct active_array *a, int n, int state)
845dea95 5969{
8d45d196
DW
5970 int inst = a->info.container_member;
5971 struct intel_super *super = a->container->sb;
949c47a0 5972 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 5973 struct imsm_map *map = get_imsm_map(dev, 0);
8d45d196 5974 struct imsm_disk *disk;
0c046afd 5975 int failed;
b10b37b8 5976 __u32 ord;
0c046afd 5977 __u8 map_state;
8d45d196
DW
5978
5979 if (n > map->num_members)
5980 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5981 n, map->num_members - 1);
5982
5983 if (n < 0)
5984 return;
5985
4e6e574a 5986 dprintf("imsm: set_disk %d:%x\n", n, state);
8d45d196 5987
98130f40 5988 ord = get_imsm_ord_tbl_ent(dev, n, -1);
b10b37b8 5989 disk = get_imsm_disk(super, ord_to_idx(ord));
8d45d196 5990
5802a811 5991 /* check for new failures */
0556e1a2
DW
5992 if (state & DS_FAULTY) {
5993 if (mark_failure(dev, disk, ord_to_idx(ord)))
5994 super->updates_pending++;
8d45d196 5995 }
47ee5a45 5996
19859edc 5997 /* check if in_sync */
0556e1a2 5998 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
b10b37b8
DW
5999 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6000
6001 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
19859edc
DW
6002 super->updates_pending++;
6003 }
8d45d196 6004
0c046afd
DW
6005 failed = imsm_count_failed(super, dev);
6006 map_state = imsm_check_degraded(super, dev, failed);
5802a811 6007
0c046afd
DW
6008 /* check if recovery complete, newly degraded, or failed */
6009 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
f8f603f1 6010 end_migration(dev, map_state);
0556e1a2
DW
6011 map = get_imsm_map(dev, 0);
6012 map->failed_disk_num = ~0;
0c046afd 6013 super->updates_pending++;
484240d8 6014 a->last_checkpoint = 0;
0c046afd
DW
6015 } else if (map_state == IMSM_T_STATE_DEGRADED &&
6016 map->map_state != map_state &&
6017 !dev->vol.migr_state) {
6018 dprintf("imsm: mark degraded\n");
6019 map->map_state = map_state;
6020 super->updates_pending++;
484240d8 6021 a->last_checkpoint = 0;
0c046afd
DW
6022 } else if (map_state == IMSM_T_STATE_FAILED &&
6023 map->map_state != map_state) {
6024 dprintf("imsm: mark failed\n");
f8f603f1 6025 end_migration(dev, map_state);
0c046afd 6026 super->updates_pending++;
484240d8 6027 a->last_checkpoint = 0;
28bce06f
AK
6028 } else if (is_gen_migration(dev)) {
6029 dprintf("imsm: Detected General Migration in state: ");
6030 if (map_state == IMSM_T_STATE_NORMAL) {
6031 end_migration(dev, map_state);
6032 map = get_imsm_map(dev, 0);
6033 map->failed_disk_num = ~0;
6034 dprintf("normal\n");
6035 } else {
6036 if (map_state == IMSM_T_STATE_DEGRADED) {
6037 printf("degraded\n");
6038 end_migration(dev, map_state);
6039 } else {
6040 dprintf("failed\n");
6041 }
6042 map->map_state = map_state;
6043 }
6044 super->updates_pending++;
5802a811 6045 }
845dea95
NB
6046}
6047
f796af5d 6048static int store_imsm_mpb(int fd, struct imsm_super *mpb)
c2a1e7da 6049{
f796af5d 6050 void *buf = mpb;
c2a1e7da
DW
6051 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
6052 unsigned long long dsize;
6053 unsigned long long sectors;
6054
6055 get_dev_size(fd, NULL, &dsize);
6056
272f648f
DW
6057 if (mpb_size > 512) {
6058 /* -1 to account for anchor */
6059 sectors = mpb_sectors(mpb) - 1;
c2a1e7da 6060
272f648f
DW
6061 /* write the extended mpb to the sectors preceeding the anchor */
6062 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
6063 return 1;
c2a1e7da 6064
f21e18ca
N
6065 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
6066 != 512 * sectors)
272f648f
DW
6067 return 1;
6068 }
c2a1e7da 6069
272f648f
DW
6070 /* first block is stored on second to last sector of the disk */
6071 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
c2a1e7da
DW
6072 return 1;
6073
f796af5d 6074 if (write(fd, buf, 512) != 512)
c2a1e7da
DW
6075 return 1;
6076
c2a1e7da
DW
6077 return 0;
6078}
6079
2e735d19 6080static void imsm_sync_metadata(struct supertype *container)
845dea95 6081{
2e735d19 6082 struct intel_super *super = container->sb;
c2a1e7da 6083
1a64be56 6084 dprintf("sync metadata: %d\n", super->updates_pending);
c2a1e7da
DW
6085 if (!super->updates_pending)
6086 return;
6087
36988a3d 6088 write_super_imsm(container, 0);
c2a1e7da
DW
6089
6090 super->updates_pending = 0;
845dea95
NB
6091}
6092
272906ef
DW
6093static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
6094{
6095 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 6096 int i = get_imsm_disk_idx(dev, idx, -1);
272906ef
DW
6097 struct dl *dl;
6098
6099 for (dl = super->disks; dl; dl = dl->next)
6100 if (dl->index == i)
6101 break;
6102
25ed7e59 6103 if (dl && is_failed(&dl->disk))
272906ef
DW
6104 dl = NULL;
6105
6106 if (dl)
6107 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
6108
6109 return dl;
6110}
6111
a20d2ba5 6112static struct dl *imsm_add_spare(struct intel_super *super, int slot,
8ba77d32
AK
6113 struct active_array *a, int activate_new,
6114 struct mdinfo *additional_test_list)
272906ef
DW
6115{
6116 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
98130f40 6117 int idx = get_imsm_disk_idx(dev, slot, -1);
a20d2ba5
DW
6118 struct imsm_super *mpb = super->anchor;
6119 struct imsm_map *map;
272906ef
DW
6120 unsigned long long pos;
6121 struct mdinfo *d;
6122 struct extent *ex;
a20d2ba5 6123 int i, j;
272906ef 6124 int found;
569cc43f
DW
6125 __u32 array_start = 0;
6126 __u32 array_end = 0;
272906ef 6127 struct dl *dl;
6c932028 6128 struct mdinfo *test_list;
272906ef
DW
6129
6130 for (dl = super->disks; dl; dl = dl->next) {
6131 /* If in this array, skip */
6132 for (d = a->info.devs ; d ; d = d->next)
e553d2a4
DW
6133 if (d->state_fd >= 0 &&
6134 d->disk.major == dl->major &&
272906ef 6135 d->disk.minor == dl->minor) {
8ba77d32
AK
6136 dprintf("%x:%x already in array\n",
6137 dl->major, dl->minor);
272906ef
DW
6138 break;
6139 }
6140 if (d)
6141 continue;
6c932028
AK
6142 test_list = additional_test_list;
6143 while (test_list) {
6144 if (test_list->disk.major == dl->major &&
6145 test_list->disk.minor == dl->minor) {
8ba77d32
AK
6146 dprintf("%x:%x already in additional test list\n",
6147 dl->major, dl->minor);
6148 break;
6149 }
6c932028 6150 test_list = test_list->next;
8ba77d32 6151 }
6c932028 6152 if (test_list)
8ba77d32 6153 continue;
272906ef 6154
e553d2a4 6155 /* skip in use or failed drives */
25ed7e59 6156 if (is_failed(&dl->disk) || idx == dl->index ||
df474657
DW
6157 dl->index == -2) {
6158 dprintf("%x:%x status (failed: %d index: %d)\n",
25ed7e59 6159 dl->major, dl->minor, is_failed(&dl->disk), idx);
9a1608e5
DW
6160 continue;
6161 }
6162
a20d2ba5
DW
6163 /* skip pure spares when we are looking for partially
6164 * assimilated drives
6165 */
6166 if (dl->index == -1 && !activate_new)
6167 continue;
6168
272906ef 6169 /* Does this unused device have the requisite free space?
a20d2ba5 6170 * It needs to be able to cover all member volumes
272906ef
DW
6171 */
6172 ex = get_extents(super, dl);
6173 if (!ex) {
6174 dprintf("cannot get extents\n");
6175 continue;
6176 }
a20d2ba5
DW
6177 for (i = 0; i < mpb->num_raid_devs; i++) {
6178 dev = get_imsm_dev(super, i);
6179 map = get_imsm_map(dev, 0);
272906ef 6180
a20d2ba5
DW
6181 /* check if this disk is already a member of
6182 * this array
272906ef 6183 */
620b1713 6184 if (get_imsm_disk_slot(map, dl->index) >= 0)
a20d2ba5
DW
6185 continue;
6186
6187 found = 0;
6188 j = 0;
6189 pos = 0;
6190 array_start = __le32_to_cpu(map->pba_of_lba0);
329c8278
DW
6191 array_end = array_start +
6192 __le32_to_cpu(map->blocks_per_member) - 1;
a20d2ba5
DW
6193
6194 do {
6195 /* check that we can start at pba_of_lba0 with
6196 * blocks_per_member of space
6197 */
329c8278 6198 if (array_start >= pos && array_end < ex[j].start) {
a20d2ba5
DW
6199 found = 1;
6200 break;
6201 }
6202 pos = ex[j].start + ex[j].size;
6203 j++;
6204 } while (ex[j-1].size);
6205
6206 if (!found)
272906ef 6207 break;
a20d2ba5 6208 }
272906ef
DW
6209
6210 free(ex);
a20d2ba5 6211 if (i < mpb->num_raid_devs) {
329c8278
DW
6212 dprintf("%x:%x does not have %u to %u available\n",
6213 dl->major, dl->minor, array_start, array_end);
272906ef
DW
6214 /* No room */
6215 continue;
a20d2ba5
DW
6216 }
6217 return dl;
272906ef
DW
6218 }
6219
6220 return dl;
6221}
6222
95d07a2c
LM
6223
6224static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
6225{
6226 struct imsm_dev *dev2;
6227 struct imsm_map *map;
6228 struct dl *idisk;
6229 int slot;
6230 int idx;
6231 __u8 state;
6232
6233 dev2 = get_imsm_dev(cont->sb, dev_idx);
6234 if (dev2) {
6235 state = imsm_check_degraded(cont->sb, dev2, failed);
6236 if (state == IMSM_T_STATE_FAILED) {
6237 map = get_imsm_map(dev2, 0);
6238 if (!map)
6239 return 1;
6240 for (slot = 0; slot < map->num_members; slot++) {
6241 /*
6242 * Check if failed disks are deleted from intel
6243 * disk list or are marked to be deleted
6244 */
98130f40 6245 idx = get_imsm_disk_idx(dev2, slot, -1);
95d07a2c
LM
6246 idisk = get_imsm_dl_disk(cont->sb, idx);
6247 /*
6248 * Do not rebuild the array if failed disks
6249 * from failed sub-array are not removed from
6250 * container.
6251 */
6252 if (idisk &&
6253 is_failed(&idisk->disk) &&
6254 (idisk->action != DISK_REMOVE))
6255 return 0;
6256 }
6257 }
6258 }
6259 return 1;
6260}
6261
88758e9d
DW
6262static struct mdinfo *imsm_activate_spare(struct active_array *a,
6263 struct metadata_update **updates)
6264{
6265 /**
d23fe947
DW
6266 * Find a device with unused free space and use it to replace a
6267 * failed/vacant region in an array. We replace failed regions one a
6268 * array at a time. The result is that a new spare disk will be added
6269 * to the first failed array and after the monitor has finished
6270 * propagating failures the remainder will be consumed.
88758e9d 6271 *
d23fe947
DW
6272 * FIXME add a capability for mdmon to request spares from another
6273 * container.
88758e9d
DW
6274 */
6275
6276 struct intel_super *super = a->container->sb;
88758e9d 6277 int inst = a->info.container_member;
949c47a0 6278 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 6279 struct imsm_map *map = get_imsm_map(dev, 0);
88758e9d
DW
6280 int failed = a->info.array.raid_disks;
6281 struct mdinfo *rv = NULL;
6282 struct mdinfo *d;
6283 struct mdinfo *di;
6284 struct metadata_update *mu;
6285 struct dl *dl;
6286 struct imsm_update_activate_spare *u;
6287 int num_spares = 0;
6288 int i;
95d07a2c 6289 int allowed;
88758e9d
DW
6290
6291 for (d = a->info.devs ; d ; d = d->next) {
6292 if ((d->curr_state & DS_FAULTY) &&
6293 d->state_fd >= 0)
6294 /* wait for Removal to happen */
6295 return NULL;
6296 if (d->state_fd >= 0)
6297 failed--;
6298 }
6299
6300 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6301 inst, failed, a->info.array.raid_disks, a->info.array.level);
1af97990
AK
6302
6303 if (dev->vol.migr_state &&
6304 dev->vol.migr_type == MIGR_GEN_MIGR)
6305 /* No repair during migration */
6306 return NULL;
6307
89c67882
AK
6308 if (a->info.array.level == 4)
6309 /* No repair for takeovered array
6310 * imsm doesn't support raid4
6311 */
6312 return NULL;
6313
fb49eef2 6314 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
88758e9d
DW
6315 return NULL;
6316
95d07a2c
LM
6317 /*
6318 * If there are any failed disks check state of the other volume.
6319 * Block rebuild if the another one is failed until failed disks
6320 * are removed from container.
6321 */
6322 if (failed) {
6323 dprintf("found failed disks in %s, check if there another"
6324 "failed sub-array.\n",
6325 dev->volume);
6326 /* check if states of the other volumes allow for rebuild */
6327 for (i = 0; i < super->anchor->num_raid_devs; i++) {
6328 if (i != inst) {
6329 allowed = imsm_rebuild_allowed(a->container,
6330 i, failed);
6331 if (!allowed)
6332 return NULL;
6333 }
6334 }
6335 }
6336
88758e9d 6337 /* For each slot, if it is not working, find a spare */
88758e9d
DW
6338 for (i = 0; i < a->info.array.raid_disks; i++) {
6339 for (d = a->info.devs ; d ; d = d->next)
6340 if (d->disk.raid_disk == i)
6341 break;
6342 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
6343 if (d && (d->state_fd >= 0))
6344 continue;
6345
272906ef 6346 /*
a20d2ba5
DW
6347 * OK, this device needs recovery. Try to re-add the
6348 * previous occupant of this slot, if this fails see if
6349 * we can continue the assimilation of a spare that was
6350 * partially assimilated, finally try to activate a new
6351 * spare.
272906ef
DW
6352 */
6353 dl = imsm_readd(super, i, a);
6354 if (!dl)
8ba77d32 6355 dl = imsm_add_spare(super, i, a, 0, NULL);
a20d2ba5 6356 if (!dl)
8ba77d32 6357 dl = imsm_add_spare(super, i, a, 1, NULL);
272906ef
DW
6358 if (!dl)
6359 continue;
6360
6361 /* found a usable disk with enough space */
6362 di = malloc(sizeof(*di));
79244939
DW
6363 if (!di)
6364 continue;
272906ef
DW
6365 memset(di, 0, sizeof(*di));
6366
6367 /* dl->index will be -1 in the case we are activating a
6368 * pristine spare. imsm_process_update() will create a
6369 * new index in this case. Once a disk is found to be
6370 * failed in all member arrays it is kicked from the
6371 * metadata
6372 */
6373 di->disk.number = dl->index;
d23fe947 6374
272906ef
DW
6375 /* (ab)use di->devs to store a pointer to the device
6376 * we chose
6377 */
6378 di->devs = (struct mdinfo *) dl;
6379
6380 di->disk.raid_disk = i;
6381 di->disk.major = dl->major;
6382 di->disk.minor = dl->minor;
6383 di->disk.state = 0;
d23534e4 6384 di->recovery_start = 0;
272906ef
DW
6385 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
6386 di->component_size = a->info.component_size;
6387 di->container_member = inst;
148acb7b 6388 super->random = random32();
272906ef
DW
6389 di->next = rv;
6390 rv = di;
6391 num_spares++;
6392 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
6393 i, di->data_offset);
88758e9d 6394
272906ef 6395 break;
88758e9d
DW
6396 }
6397
6398 if (!rv)
6399 /* No spares found */
6400 return rv;
6401 /* Now 'rv' has a list of devices to return.
6402 * Create a metadata_update record to update the
6403 * disk_ord_tbl for the array
6404 */
6405 mu = malloc(sizeof(*mu));
79244939
DW
6406 if (mu) {
6407 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
6408 if (mu->buf == NULL) {
6409 free(mu);
6410 mu = NULL;
6411 }
6412 }
6413 if (!mu) {
6414 while (rv) {
6415 struct mdinfo *n = rv->next;
6416
6417 free(rv);
6418 rv = n;
6419 }
6420 return NULL;
6421 }
6422
88758e9d 6423 mu->space = NULL;
cb23f1f4 6424 mu->space_list = NULL;
88758e9d
DW
6425 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
6426 mu->next = *updates;
6427 u = (struct imsm_update_activate_spare *) mu->buf;
6428
6429 for (di = rv ; di ; di = di->next) {
6430 u->type = update_activate_spare;
d23fe947
DW
6431 u->dl = (struct dl *) di->devs;
6432 di->devs = NULL;
88758e9d
DW
6433 u->slot = di->disk.raid_disk;
6434 u->array = inst;
6435 u->next = u + 1;
6436 u++;
6437 }
6438 (u-1)->next = NULL;
6439 *updates = mu;
6440
6441 return rv;
6442}
6443
54c2c1ea 6444static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
8273f55e 6445{
54c2c1ea
DW
6446 struct imsm_dev *dev = get_imsm_dev(super, idx);
6447 struct imsm_map *map = get_imsm_map(dev, 0);
6448 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
6449 struct disk_info *inf = get_disk_info(u);
6450 struct imsm_disk *disk;
8273f55e
DW
6451 int i;
6452 int j;
8273f55e 6453
54c2c1ea 6454 for (i = 0; i < map->num_members; i++) {
98130f40 6455 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
54c2c1ea
DW
6456 for (j = 0; j < new_map->num_members; j++)
6457 if (serialcmp(disk->serial, inf[j].serial) == 0)
8273f55e
DW
6458 return 1;
6459 }
6460
6461 return 0;
6462}
6463
1a64be56
LM
6464
6465static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
6466{
6467 struct dl *dl = NULL;
6468 for (dl = super->disks; dl; dl = dl->next)
6469 if ((dl->major == major) && (dl->minor == minor))
6470 return dl;
6471 return NULL;
6472}
6473
6474static int remove_disk_super(struct intel_super *super, int major, int minor)
6475{
6476 struct dl *prev = NULL;
6477 struct dl *dl;
6478
6479 prev = NULL;
6480 for (dl = super->disks; dl; dl = dl->next) {
6481 if ((dl->major == major) && (dl->minor == minor)) {
6482 /* remove */
6483 if (prev)
6484 prev->next = dl->next;
6485 else
6486 super->disks = dl->next;
6487 dl->next = NULL;
6488 __free_imsm_disk(dl);
6489 dprintf("%s: removed %x:%x\n",
6490 __func__, major, minor);
6491 break;
6492 }
6493 prev = dl;
6494 }
6495 return 0;
6496}
6497
f21e18ca 6498static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
ae6aad82 6499
1a64be56
LM
6500static int add_remove_disk_update(struct intel_super *super)
6501{
6502 int check_degraded = 0;
6503 struct dl *disk = NULL;
6504 /* add/remove some spares to/from the metadata/contrainer */
6505 while (super->disk_mgmt_list) {
6506 struct dl *disk_cfg;
6507
6508 disk_cfg = super->disk_mgmt_list;
6509 super->disk_mgmt_list = disk_cfg->next;
6510 disk_cfg->next = NULL;
6511
6512 if (disk_cfg->action == DISK_ADD) {
6513 disk_cfg->next = super->disks;
6514 super->disks = disk_cfg;
6515 check_degraded = 1;
6516 dprintf("%s: added %x:%x\n",
6517 __func__, disk_cfg->major,
6518 disk_cfg->minor);
6519 } else if (disk_cfg->action == DISK_REMOVE) {
6520 dprintf("Disk remove action processed: %x.%x\n",
6521 disk_cfg->major, disk_cfg->minor);
6522 disk = get_disk_super(super,
6523 disk_cfg->major,
6524 disk_cfg->minor);
6525 if (disk) {
6526 /* store action status */
6527 disk->action = DISK_REMOVE;
6528 /* remove spare disks only */
6529 if (disk->index == -1) {
6530 remove_disk_super(super,
6531 disk_cfg->major,
6532 disk_cfg->minor);
6533 }
6534 }
6535 /* release allocate disk structure */
6536 __free_imsm_disk(disk_cfg);
6537 }
6538 }
6539 return check_degraded;
6540}
6541
a29911da
PC
6542
6543static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
6544 struct intel_super *super,
6545 void ***space_list)
6546{
6547 struct intel_dev *id;
6548 void **tofree = NULL;
6549 int ret_val = 0;
6550
6551 dprintf("apply_reshape_migration_update()\n");
6552 if ((u->subdev < 0) ||
6553 (u->subdev > 1)) {
6554 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
6555 return ret_val;
6556 }
6557 if ((space_list == NULL) || (*space_list == NULL)) {
6558 dprintf("imsm: Error: Memory is not allocated\n");
6559 return ret_val;
6560 }
6561
6562 for (id = super->devlist ; id; id = id->next) {
6563 if (id->index == (unsigned)u->subdev) {
6564 struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
6565 struct imsm_map *map;
6566 struct imsm_dev *new_dev =
6567 (struct imsm_dev *)*space_list;
6568 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6569 int to_state;
6570 struct dl *new_disk;
6571
6572 if (new_dev == NULL)
6573 return ret_val;
6574 *space_list = **space_list;
6575 memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
6576 map = get_imsm_map(new_dev, 0);
6577 if (migr_map) {
6578 dprintf("imsm: Error: migration in progress");
6579 return ret_val;
6580 }
6581
6582 to_state = map->map_state;
6583 if ((u->new_level == 5) && (map->raid_level == 0)) {
6584 map->num_members++;
6585 /* this should not happen */
6586 if (u->new_disks[0] < 0) {
6587 map->failed_disk_num =
6588 map->num_members - 1;
6589 to_state = IMSM_T_STATE_DEGRADED;
6590 } else
6591 to_state = IMSM_T_STATE_NORMAL;
6592 }
8e59f3d8 6593 migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
a29911da
PC
6594 if (u->new_level > -1)
6595 map->raid_level = u->new_level;
6596 migr_map = get_imsm_map(new_dev, 1);
6597 if ((u->new_level == 5) &&
6598 (migr_map->raid_level == 0)) {
6599 int ord = map->num_members - 1;
6600 migr_map->num_members--;
6601 if (u->new_disks[0] < 0)
6602 ord |= IMSM_ORD_REBUILD;
6603 set_imsm_ord_tbl_ent(map,
6604 map->num_members - 1,
6605 ord);
6606 }
6607 id->dev = new_dev;
6608 tofree = (void **)dev;
6609
4bba0439
PC
6610 /* update chunk size
6611 */
6612 if (u->new_chunksize > 0)
6613 map->blocks_per_strip =
6614 __cpu_to_le16(u->new_chunksize * 2);
6615
a29911da
PC
6616 /* add disk
6617 */
6618 if ((u->new_level != 5) ||
6619 (migr_map->raid_level != 0) ||
6620 (migr_map->raid_level == map->raid_level))
6621 goto skip_disk_add;
6622
6623 if (u->new_disks[0] >= 0) {
6624 /* use passes spare
6625 */
6626 new_disk = get_disk_super(super,
6627 major(u->new_disks[0]),
6628 minor(u->new_disks[0]));
6629 dprintf("imsm: new disk for reshape is: %i:%i "
6630 "(%p, index = %i)\n",
6631 major(u->new_disks[0]),
6632 minor(u->new_disks[0]),
6633 new_disk, new_disk->index);
6634 if (new_disk == NULL)
6635 goto error_disk_add;
6636
6637 new_disk->index = map->num_members - 1;
6638 /* slot to fill in autolayout
6639 */
6640 new_disk->raiddisk = new_disk->index;
6641 new_disk->disk.status |= CONFIGURED_DISK;
6642 new_disk->disk.status &= ~SPARE_DISK;
6643 } else
6644 goto error_disk_add;
6645
6646skip_disk_add:
6647 *tofree = *space_list;
6648 /* calculate new size
6649 */
6650 imsm_set_array_size(new_dev);
6651
6652 ret_val = 1;
6653 }
6654 }
6655
6656 if (tofree)
6657 *space_list = tofree;
6658 return ret_val;
6659
6660error_disk_add:
6661 dprintf("Error: imsm: Cannot find disk.\n");
6662 return ret_val;
6663}
6664
6665
2e5dc010
N
6666static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
6667 struct intel_super *super,
6668 void ***space_list)
6669{
6670 struct dl *new_disk;
6671 struct intel_dev *id;
6672 int i;
6673 int delta_disks = u->new_raid_disks - u->old_raid_disks;
ee4beede 6674 int disk_count = u->old_raid_disks;
2e5dc010
N
6675 void **tofree = NULL;
6676 int devices_to_reshape = 1;
6677 struct imsm_super *mpb = super->anchor;
6678 int ret_val = 0;
d098291a 6679 unsigned int dev_id;
2e5dc010 6680
ed7333bd 6681 dprintf("imsm: apply_reshape_container_disks_update()\n");
2e5dc010
N
6682
6683 /* enable spares to use in array */
6684 for (i = 0; i < delta_disks; i++) {
6685 new_disk = get_disk_super(super,
6686 major(u->new_disks[i]),
6687 minor(u->new_disks[i]));
ed7333bd
AK
6688 dprintf("imsm: new disk for reshape is: %i:%i "
6689 "(%p, index = %i)\n",
2e5dc010
N
6690 major(u->new_disks[i]), minor(u->new_disks[i]),
6691 new_disk, new_disk->index);
6692 if ((new_disk == NULL) ||
6693 ((new_disk->index >= 0) &&
6694 (new_disk->index < u->old_raid_disks)))
6695 goto update_reshape_exit;
ee4beede 6696 new_disk->index = disk_count++;
2e5dc010
N
6697 /* slot to fill in autolayout
6698 */
6699 new_disk->raiddisk = new_disk->index;
6700 new_disk->disk.status |=
6701 CONFIGURED_DISK;
6702 new_disk->disk.status &= ~SPARE_DISK;
6703 }
6704
ed7333bd
AK
6705 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6706 mpb->num_raid_devs);
2e5dc010
N
6707 /* manage changes in volume
6708 */
d098291a 6709 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
2e5dc010
N
6710 void **sp = *space_list;
6711 struct imsm_dev *newdev;
6712 struct imsm_map *newmap, *oldmap;
6713
d098291a
AK
6714 for (id = super->devlist ; id; id = id->next) {
6715 if (id->index == dev_id)
6716 break;
6717 }
6718 if (id == NULL)
6719 break;
2e5dc010
N
6720 if (!sp)
6721 continue;
6722 *space_list = *sp;
6723 newdev = (void*)sp;
6724 /* Copy the dev, but not (all of) the map */
6725 memcpy(newdev, id->dev, sizeof(*newdev));
6726 oldmap = get_imsm_map(id->dev, 0);
6727 newmap = get_imsm_map(newdev, 0);
6728 /* Copy the current map */
6729 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6730 /* update one device only
6731 */
6732 if (devices_to_reshape) {
ed7333bd
AK
6733 dprintf("imsm: modifying subdev: %i\n",
6734 id->index);
2e5dc010
N
6735 devices_to_reshape--;
6736 newdev->vol.migr_state = 1;
6737 newdev->vol.curr_migr_unit = 0;
6738 newdev->vol.migr_type = MIGR_GEN_MIGR;
6739 newmap->num_members = u->new_raid_disks;
6740 for (i = 0; i < delta_disks; i++) {
6741 set_imsm_ord_tbl_ent(newmap,
6742 u->old_raid_disks + i,
6743 u->old_raid_disks + i);
6744 }
6745 /* New map is correct, now need to save old map
6746 */
6747 newmap = get_imsm_map(newdev, 1);
6748 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6749
70bdf0dc 6750 imsm_set_array_size(newdev);
2e5dc010
N
6751 }
6752
6753 sp = (void **)id->dev;
6754 id->dev = newdev;
6755 *sp = tofree;
6756 tofree = sp;
8e59f3d8
AK
6757
6758 /* Clear migration record */
6759 memset(super->migr_rec, 0, sizeof(struct migr_record));
2e5dc010 6760 }
819bc634
AK
6761 if (tofree)
6762 *space_list = tofree;
2e5dc010
N
6763 ret_val = 1;
6764
6765update_reshape_exit:
6766
6767 return ret_val;
6768}
6769
bb025c2f 6770static int apply_takeover_update(struct imsm_update_takeover *u,
8ca6df95
KW
6771 struct intel_super *super,
6772 void ***space_list)
bb025c2f
KW
6773{
6774 struct imsm_dev *dev = NULL;
8ca6df95
KW
6775 struct intel_dev *dv;
6776 struct imsm_dev *dev_new;
bb025c2f
KW
6777 struct imsm_map *map;
6778 struct dl *dm, *du;
8ca6df95 6779 int i;
bb025c2f
KW
6780
6781 for (dv = super->devlist; dv; dv = dv->next)
6782 if (dv->index == (unsigned int)u->subarray) {
6783 dev = dv->dev;
6784 break;
6785 }
6786
6787 if (dev == NULL)
6788 return 0;
6789
6790 map = get_imsm_map(dev, 0);
6791
6792 if (u->direction == R10_TO_R0) {
43d5ec18
KW
6793 /* Number of failed disks must be half of initial disk number */
6794 if (imsm_count_failed(super, dev) != (map->num_members / 2))
6795 return 0;
6796
bb025c2f
KW
6797 /* iterate through devices to mark removed disks as spare */
6798 for (dm = super->disks; dm; dm = dm->next) {
6799 if (dm->disk.status & FAILED_DISK) {
6800 int idx = dm->index;
6801 /* update indexes on the disk list */
6802/* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6803 the index values will end up being correct.... NB */
6804 for (du = super->disks; du; du = du->next)
6805 if (du->index > idx)
6806 du->index--;
6807 /* mark as spare disk */
6808 dm->disk.status = SPARE_DISK;
6809 dm->index = -1;
6810 }
6811 }
bb025c2f
KW
6812 /* update map */
6813 map->num_members = map->num_members / 2;
6814 map->map_state = IMSM_T_STATE_NORMAL;
6815 map->num_domains = 1;
6816 map->raid_level = 0;
6817 map->failed_disk_num = -1;
6818 }
6819
8ca6df95
KW
6820 if (u->direction == R0_TO_R10) {
6821 void **space;
6822 /* update slots in current disk list */
6823 for (dm = super->disks; dm; dm = dm->next) {
6824 if (dm->index >= 0)
6825 dm->index *= 2;
6826 }
6827 /* create new *missing* disks */
6828 for (i = 0; i < map->num_members; i++) {
6829 space = *space_list;
6830 if (!space)
6831 continue;
6832 *space_list = *space;
6833 du = (void *)space;
6834 memcpy(du, super->disks, sizeof(*du));
8ca6df95
KW
6835 du->fd = -1;
6836 du->minor = 0;
6837 du->major = 0;
6838 du->index = (i * 2) + 1;
6839 sprintf((char *)du->disk.serial,
6840 " MISSING_%d", du->index);
6841 sprintf((char *)du->serial,
6842 "MISSING_%d", du->index);
6843 du->next = super->missing;
6844 super->missing = du;
6845 }
6846 /* create new dev and map */
6847 space = *space_list;
6848 if (!space)
6849 return 0;
6850 *space_list = *space;
6851 dev_new = (void *)space;
6852 memcpy(dev_new, dev, sizeof(*dev));
6853 /* update new map */
6854 map = get_imsm_map(dev_new, 0);
8ca6df95 6855 map->num_members = map->num_members * 2;
1a2487c2 6856 map->map_state = IMSM_T_STATE_DEGRADED;
8ca6df95
KW
6857 map->num_domains = 2;
6858 map->raid_level = 1;
6859 /* replace dev<->dev_new */
6860 dv->dev = dev_new;
6861 }
bb025c2f
KW
6862 /* update disk order table */
6863 for (du = super->disks; du; du = du->next)
6864 if (du->index >= 0)
6865 set_imsm_ord_tbl_ent(map, du->index, du->index);
8ca6df95 6866 for (du = super->missing; du; du = du->next)
1a2487c2
KW
6867 if (du->index >= 0) {
6868 set_imsm_ord_tbl_ent(map, du->index, du->index);
6869 mark_missing(dev_new, &du->disk, du->index);
6870 }
bb025c2f
KW
6871
6872 return 1;
6873}
6874
e8319a19
DW
6875static void imsm_process_update(struct supertype *st,
6876 struct metadata_update *update)
6877{
6878 /**
6879 * crack open the metadata_update envelope to find the update record
6880 * update can be one of:
d195167d
AK
6881 * update_reshape_container_disks - all the arrays in the container
6882 * are being reshaped to have more devices. We need to mark
6883 * the arrays for general migration and convert selected spares
6884 * into active devices.
6885 * update_activate_spare - a spare device has replaced a failed
e8319a19
DW
6886 * device in an array, update the disk_ord_tbl. If this disk is
6887 * present in all member arrays then also clear the SPARE_DISK
6888 * flag
d195167d
AK
6889 * update_create_array
6890 * update_kill_array
6891 * update_rename_array
6892 * update_add_remove_disk
e8319a19
DW
6893 */
6894 struct intel_super *super = st->sb;
4d7b1503 6895 struct imsm_super *mpb;
e8319a19
DW
6896 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6897
4d7b1503
DW
6898 /* update requires a larger buf but the allocation failed */
6899 if (super->next_len && !super->next_buf) {
6900 super->next_len = 0;
6901 return;
6902 }
6903
6904 if (super->next_buf) {
6905 memcpy(super->next_buf, super->buf, super->len);
6906 free(super->buf);
6907 super->len = super->next_len;
6908 super->buf = super->next_buf;
6909
6910 super->next_len = 0;
6911 super->next_buf = NULL;
6912 }
6913
6914 mpb = super->anchor;
6915
e8319a19 6916 switch (type) {
bb025c2f
KW
6917 case update_takeover: {
6918 struct imsm_update_takeover *u = (void *)update->buf;
1a2487c2
KW
6919 if (apply_takeover_update(u, super, &update->space_list)) {
6920 imsm_update_version_info(super);
bb025c2f 6921 super->updates_pending++;
1a2487c2 6922 }
bb025c2f
KW
6923 break;
6924 }
6925
78b10e66 6926 case update_reshape_container_disks: {
d195167d 6927 struct imsm_update_reshape *u = (void *)update->buf;
2e5dc010
N
6928 if (apply_reshape_container_disks_update(
6929 u, super, &update->space_list))
6930 super->updates_pending++;
78b10e66
N
6931 break;
6932 }
48c5303a 6933 case update_reshape_migration: {
a29911da
PC
6934 struct imsm_update_reshape_migration *u = (void *)update->buf;
6935 if (apply_reshape_migration_update(
6936 u, super, &update->space_list))
6937 super->updates_pending++;
48c5303a
PC
6938 break;
6939 }
e8319a19
DW
6940 case update_activate_spare: {
6941 struct imsm_update_activate_spare *u = (void *) update->buf;
949c47a0 6942 struct imsm_dev *dev = get_imsm_dev(super, u->array);
a965f303 6943 struct imsm_map *map = get_imsm_map(dev, 0);
0c046afd 6944 struct imsm_map *migr_map;
e8319a19
DW
6945 struct active_array *a;
6946 struct imsm_disk *disk;
0c046afd 6947 __u8 to_state;
e8319a19 6948 struct dl *dl;
e8319a19 6949 unsigned int found;
0c046afd 6950 int failed;
98130f40 6951 int victim = get_imsm_disk_idx(dev, u->slot, -1);
e8319a19
DW
6952 int i;
6953
6954 for (dl = super->disks; dl; dl = dl->next)
d23fe947 6955 if (dl == u->dl)
e8319a19
DW
6956 break;
6957
6958 if (!dl) {
6959 fprintf(stderr, "error: imsm_activate_spare passed "
1f24f035
DW
6960 "an unknown disk (index: %d)\n",
6961 u->dl->index);
e8319a19
DW
6962 return;
6963 }
6964
6965 super->updates_pending++;
0c046afd
DW
6966 /* count failures (excluding rebuilds and the victim)
6967 * to determine map[0] state
6968 */
6969 failed = 0;
6970 for (i = 0; i < map->num_members; i++) {
6971 if (i == u->slot)
6972 continue;
98130f40
AK
6973 disk = get_imsm_disk(super,
6974 get_imsm_disk_idx(dev, i, -1));
25ed7e59 6975 if (!disk || is_failed(disk))
0c046afd
DW
6976 failed++;
6977 }
6978
d23fe947
DW
6979 /* adding a pristine spare, assign a new index */
6980 if (dl->index < 0) {
6981 dl->index = super->anchor->num_disks;
6982 super->anchor->num_disks++;
6983 }
d23fe947 6984 disk = &dl->disk;
f2f27e63
DW
6985 disk->status |= CONFIGURED_DISK;
6986 disk->status &= ~SPARE_DISK;
e8319a19 6987
0c046afd
DW
6988 /* mark rebuild */
6989 to_state = imsm_check_degraded(super, dev, failed);
6990 map->map_state = IMSM_T_STATE_DEGRADED;
8e59f3d8 6991 migrate(dev, super, to_state, MIGR_REBUILD);
0c046afd
DW
6992 migr_map = get_imsm_map(dev, 1);
6993 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6994 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6995
148acb7b
DW
6996 /* update the family_num to mark a new container
6997 * generation, being careful to record the existing
6998 * family_num in orig_family_num to clean up after
6999 * earlier mdadm versions that neglected to set it.
7000 */
7001 if (mpb->orig_family_num == 0)
7002 mpb->orig_family_num = mpb->family_num;
7003 mpb->family_num += super->random;
7004
e8319a19
DW
7005 /* count arrays using the victim in the metadata */
7006 found = 0;
7007 for (a = st->arrays; a ; a = a->next) {
949c47a0 7008 dev = get_imsm_dev(super, a->info.container_member);
620b1713
DW
7009 map = get_imsm_map(dev, 0);
7010
7011 if (get_imsm_disk_slot(map, victim) >= 0)
7012 found++;
e8319a19
DW
7013 }
7014
24565c9a 7015 /* delete the victim if it is no longer being
e8319a19
DW
7016 * utilized anywhere
7017 */
e8319a19 7018 if (!found) {
ae6aad82 7019 struct dl **dlp;
24565c9a 7020
47ee5a45
DW
7021 /* We know that 'manager' isn't touching anything,
7022 * so it is safe to delete
7023 */
24565c9a 7024 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
ae6aad82
DW
7025 if ((*dlp)->index == victim)
7026 break;
47ee5a45
DW
7027
7028 /* victim may be on the missing list */
7029 if (!*dlp)
7030 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
7031 if ((*dlp)->index == victim)
7032 break;
24565c9a 7033 imsm_delete(super, dlp, victim);
e8319a19 7034 }
8273f55e
DW
7035 break;
7036 }
7037 case update_create_array: {
7038 /* someone wants to create a new array, we need to be aware of
7039 * a few races/collisions:
7040 * 1/ 'Create' called by two separate instances of mdadm
7041 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
7042 * devices that have since been assimilated via
7043 * activate_spare.
7044 * In the event this update can not be carried out mdadm will
7045 * (FIX ME) notice that its update did not take hold.
7046 */
7047 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 7048 struct intel_dev *dv;
8273f55e
DW
7049 struct imsm_dev *dev;
7050 struct imsm_map *map, *new_map;
7051 unsigned long long start, end;
7052 unsigned long long new_start, new_end;
7053 int i;
54c2c1ea
DW
7054 struct disk_info *inf;
7055 struct dl *dl;
8273f55e
DW
7056
7057 /* handle racing creates: first come first serve */
7058 if (u->dev_idx < mpb->num_raid_devs) {
7059 dprintf("%s: subarray %d already defined\n",
7060 __func__, u->dev_idx);
ba2de7ba 7061 goto create_error;
8273f55e
DW
7062 }
7063
7064 /* check update is next in sequence */
7065 if (u->dev_idx != mpb->num_raid_devs) {
6a3e913e
DW
7066 dprintf("%s: can not create array %d expected index %d\n",
7067 __func__, u->dev_idx, mpb->num_raid_devs);
ba2de7ba 7068 goto create_error;
8273f55e
DW
7069 }
7070
a965f303 7071 new_map = get_imsm_map(&u->dev, 0);
8273f55e
DW
7072 new_start = __le32_to_cpu(new_map->pba_of_lba0);
7073 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
54c2c1ea 7074 inf = get_disk_info(u);
8273f55e
DW
7075
7076 /* handle activate_spare versus create race:
7077 * check to make sure that overlapping arrays do not include
7078 * overalpping disks
7079 */
7080 for (i = 0; i < mpb->num_raid_devs; i++) {
949c47a0 7081 dev = get_imsm_dev(super, i);
a965f303 7082 map = get_imsm_map(dev, 0);
8273f55e
DW
7083 start = __le32_to_cpu(map->pba_of_lba0);
7084 end = start + __le32_to_cpu(map->blocks_per_member);
7085 if ((new_start >= start && new_start <= end) ||
7086 (start >= new_start && start <= new_end))
54c2c1ea
DW
7087 /* overlap */;
7088 else
7089 continue;
7090
7091 if (disks_overlap(super, i, u)) {
8273f55e 7092 dprintf("%s: arrays overlap\n", __func__);
ba2de7ba 7093 goto create_error;
8273f55e
DW
7094 }
7095 }
8273f55e 7096
949c47a0
DW
7097 /* check that prepare update was successful */
7098 if (!update->space) {
7099 dprintf("%s: prepare update failed\n", __func__);
ba2de7ba 7100 goto create_error;
949c47a0
DW
7101 }
7102
54c2c1ea
DW
7103 /* check that all disks are still active before committing
7104 * changes. FIXME: could we instead handle this by creating a
7105 * degraded array? That's probably not what the user expects,
7106 * so better to drop this update on the floor.
7107 */
7108 for (i = 0; i < new_map->num_members; i++) {
7109 dl = serial_to_dl(inf[i].serial, super);
7110 if (!dl) {
7111 dprintf("%s: disk disappeared\n", __func__);
ba2de7ba 7112 goto create_error;
54c2c1ea 7113 }
949c47a0
DW
7114 }
7115
8273f55e 7116 super->updates_pending++;
54c2c1ea
DW
7117
7118 /* convert spares to members and fixup ord_tbl */
7119 for (i = 0; i < new_map->num_members; i++) {
7120 dl = serial_to_dl(inf[i].serial, super);
7121 if (dl->index == -1) {
7122 dl->index = mpb->num_disks;
7123 mpb->num_disks++;
7124 dl->disk.status |= CONFIGURED_DISK;
7125 dl->disk.status &= ~SPARE_DISK;
7126 }
7127 set_imsm_ord_tbl_ent(new_map, i, dl->index);
7128 }
7129
ba2de7ba
DW
7130 dv = update->space;
7131 dev = dv->dev;
949c47a0
DW
7132 update->space = NULL;
7133 imsm_copy_dev(dev, &u->dev);
ba2de7ba
DW
7134 dv->index = u->dev_idx;
7135 dv->next = super->devlist;
7136 super->devlist = dv;
8273f55e 7137 mpb->num_raid_devs++;
8273f55e 7138
4d1313e9 7139 imsm_update_version_info(super);
8273f55e 7140 break;
ba2de7ba
DW
7141 create_error:
7142 /* mdmon knows how to release update->space, but not
7143 * ((struct intel_dev *) update->space)->dev
7144 */
7145 if (update->space) {
7146 dv = update->space;
7147 free(dv->dev);
7148 }
8273f55e 7149 break;
e8319a19 7150 }
33414a01
DW
7151 case update_kill_array: {
7152 struct imsm_update_kill_array *u = (void *) update->buf;
7153 int victim = u->dev_idx;
7154 struct active_array *a;
7155 struct intel_dev **dp;
7156 struct imsm_dev *dev;
7157
7158 /* sanity check that we are not affecting the uuid of
7159 * active arrays, or deleting an active array
7160 *
7161 * FIXME when immutable ids are available, but note that
7162 * we'll also need to fixup the invalidated/active
7163 * subarray indexes in mdstat
7164 */
7165 for (a = st->arrays; a; a = a->next)
7166 if (a->info.container_member >= victim)
7167 break;
7168 /* by definition if mdmon is running at least one array
7169 * is active in the container, so checking
7170 * mpb->num_raid_devs is just extra paranoia
7171 */
7172 dev = get_imsm_dev(super, victim);
7173 if (a || !dev || mpb->num_raid_devs == 1) {
7174 dprintf("failed to delete subarray-%d\n", victim);
7175 break;
7176 }
7177
7178 for (dp = &super->devlist; *dp;)
f21e18ca 7179 if ((*dp)->index == (unsigned)super->current_vol) {
33414a01
DW
7180 *dp = (*dp)->next;
7181 } else {
f21e18ca 7182 if ((*dp)->index > (unsigned)victim)
33414a01
DW
7183 (*dp)->index--;
7184 dp = &(*dp)->next;
7185 }
7186 mpb->num_raid_devs--;
7187 super->updates_pending++;
7188 break;
7189 }
aa534678
DW
7190 case update_rename_array: {
7191 struct imsm_update_rename_array *u = (void *) update->buf;
7192 char name[MAX_RAID_SERIAL_LEN+1];
7193 int target = u->dev_idx;
7194 struct active_array *a;
7195 struct imsm_dev *dev;
7196
7197 /* sanity check that we are not affecting the uuid of
7198 * an active array
7199 */
7200 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
7201 name[MAX_RAID_SERIAL_LEN] = '\0';
7202 for (a = st->arrays; a; a = a->next)
7203 if (a->info.container_member == target)
7204 break;
7205 dev = get_imsm_dev(super, u->dev_idx);
7206 if (a || !dev || !check_name(super, name, 1)) {
7207 dprintf("failed to rename subarray-%d\n", target);
7208 break;
7209 }
7210
cdbe98cd 7211 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
aa534678
DW
7212 super->updates_pending++;
7213 break;
7214 }
1a64be56 7215 case update_add_remove_disk: {
43dad3d6 7216 /* we may be able to repair some arrays if disks are
1a64be56
LM
7217 * being added, check teh status of add_remove_disk
7218 * if discs has been added.
7219 */
7220 if (add_remove_disk_update(super)) {
43dad3d6 7221 struct active_array *a;
072b727f
DW
7222
7223 super->updates_pending++;
1a64be56 7224 for (a = st->arrays; a; a = a->next)
43dad3d6
DW
7225 a->check_degraded = 1;
7226 }
43dad3d6 7227 break;
e8319a19 7228 }
1a64be56
LM
7229 default:
7230 fprintf(stderr, "error: unsuported process update type:"
7231 "(type: %d)\n", type);
7232 }
e8319a19 7233}
88758e9d 7234
bc0b9d34
PC
7235static struct mdinfo *get_spares_for_grow(struct supertype *st);
7236
8273f55e
DW
7237static void imsm_prepare_update(struct supertype *st,
7238 struct metadata_update *update)
7239{
949c47a0 7240 /**
4d7b1503
DW
7241 * Allocate space to hold new disk entries, raid-device entries or a new
7242 * mpb if necessary. The manager synchronously waits for updates to
7243 * complete in the monitor, so new mpb buffers allocated here can be
7244 * integrated by the monitor thread without worrying about live pointers
7245 * in the manager thread.
8273f55e 7246 */
949c47a0 7247 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
4d7b1503
DW
7248 struct intel_super *super = st->sb;
7249 struct imsm_super *mpb = super->anchor;
7250 size_t buf_len;
7251 size_t len = 0;
949c47a0
DW
7252
7253 switch (type) {
abedf5fc
KW
7254 case update_takeover: {
7255 struct imsm_update_takeover *u = (void *)update->buf;
7256 if (u->direction == R0_TO_R10) {
7257 void **tail = (void **)&update->space_list;
7258 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
7259 struct imsm_map *map = get_imsm_map(dev, 0);
7260 int num_members = map->num_members;
7261 void *space;
7262 int size, i;
7263 int err = 0;
7264 /* allocate memory for added disks */
7265 for (i = 0; i < num_members; i++) {
7266 size = sizeof(struct dl);
7267 space = malloc(size);
7268 if (!space) {
7269 err++;
7270 break;
7271 }
7272 *tail = space;
7273 tail = space;
7274 *tail = NULL;
7275 }
7276 /* allocate memory for new device */
7277 size = sizeof_imsm_dev(super->devlist->dev, 0) +
7278 (num_members * sizeof(__u32));
7279 space = malloc(size);
7280 if (!space)
7281 err++;
7282 else {
7283 *tail = space;
7284 tail = space;
7285 *tail = NULL;
7286 }
7287 if (!err) {
7288 len = disks_to_mpb_size(num_members * 2);
7289 } else {
7290 /* if allocation didn't success, free buffer */
7291 while (update->space_list) {
7292 void **sp = update->space_list;
7293 update->space_list = *sp;
7294 free(sp);
7295 }
7296 }
7297 }
7298
7299 break;
7300 }
78b10e66 7301 case update_reshape_container_disks: {
d195167d
AK
7302 /* Every raid device in the container is about to
7303 * gain some more devices, and we will enter a
7304 * reconfiguration.
7305 * So each 'imsm_map' will be bigger, and the imsm_vol
7306 * will now hold 2 of them.
7307 * Thus we need new 'struct imsm_dev' allocations sized
7308 * as sizeof_imsm_dev but with more devices in both maps.
7309 */
7310 struct imsm_update_reshape *u = (void *)update->buf;
7311 struct intel_dev *dl;
7312 void **space_tail = (void**)&update->space_list;
7313
7314 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7315
7316 for (dl = super->devlist; dl; dl = dl->next) {
7317 int size = sizeof_imsm_dev(dl->dev, 1);
7318 void *s;
d677e0b8
AK
7319 if (u->new_raid_disks > u->old_raid_disks)
7320 size += sizeof(__u32)*2*
7321 (u->new_raid_disks - u->old_raid_disks);
d195167d
AK
7322 s = malloc(size);
7323 if (!s)
7324 break;
7325 *space_tail = s;
7326 space_tail = s;
7327 *space_tail = NULL;
7328 }
7329
7330 len = disks_to_mpb_size(u->new_raid_disks);
7331 dprintf("New anchor length is %llu\n", (unsigned long long)len);
78b10e66
N
7332 break;
7333 }
48c5303a 7334 case update_reshape_migration: {
bc0b9d34
PC
7335 /* for migration level 0->5 we need to add disks
7336 * so the same as for container operation we will copy
7337 * device to the bigger location.
7338 * in memory prepared device and new disk area are prepared
7339 * for usage in process update
7340 */
7341 struct imsm_update_reshape_migration *u = (void *)update->buf;
7342 struct intel_dev *id;
7343 void **space_tail = (void **)&update->space_list;
7344 int size;
7345 void *s;
7346 int current_level = -1;
7347
7348 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7349
7350 /* add space for bigger array in update
7351 */
7352 for (id = super->devlist; id; id = id->next) {
7353 if (id->index == (unsigned)u->subdev) {
7354 size = sizeof_imsm_dev(id->dev, 1);
7355 if (u->new_raid_disks > u->old_raid_disks)
7356 size += sizeof(__u32)*2*
7357 (u->new_raid_disks - u->old_raid_disks);
7358 s = malloc(size);
7359 if (!s)
7360 break;
7361 *space_tail = s;
7362 space_tail = s;
7363 *space_tail = NULL;
7364 break;
7365 }
7366 }
7367 if (update->space_list == NULL)
7368 break;
7369
7370 /* add space for disk in update
7371 */
7372 size = sizeof(struct dl);
7373 s = malloc(size);
7374 if (!s) {
7375 free(update->space_list);
7376 update->space_list = NULL;
7377 break;
7378 }
7379 *space_tail = s;
7380 space_tail = s;
7381 *space_tail = NULL;
7382
7383 /* add spare device to update
7384 */
7385 for (id = super->devlist ; id; id = id->next)
7386 if (id->index == (unsigned)u->subdev) {
7387 struct imsm_dev *dev;
7388 struct imsm_map *map;
7389
7390 dev = get_imsm_dev(super, u->subdev);
7391 map = get_imsm_map(dev, 0);
7392 current_level = map->raid_level;
7393 break;
7394 }
7395 if ((u->new_level == 5) && (u->new_level != current_level)) {
7396 struct mdinfo *spares;
7397
7398 spares = get_spares_for_grow(st);
7399 if (spares) {
7400 struct dl *dl;
7401 struct mdinfo *dev;
7402
7403 dev = spares->devs;
7404 if (dev) {
7405 u->new_disks[0] =
7406 makedev(dev->disk.major,
7407 dev->disk.minor);
7408 dl = get_disk_super(super,
7409 dev->disk.major,
7410 dev->disk.minor);
7411 dl->index = u->old_raid_disks;
7412 dev = dev->next;
7413 }
7414 sysfs_free(spares);
7415 }
7416 }
7417 len = disks_to_mpb_size(u->new_raid_disks);
7418 dprintf("New anchor length is %llu\n", (unsigned long long)len);
48c5303a
PC
7419 break;
7420 }
949c47a0
DW
7421 case update_create_array: {
7422 struct imsm_update_create_array *u = (void *) update->buf;
ba2de7ba 7423 struct intel_dev *dv;
54c2c1ea
DW
7424 struct imsm_dev *dev = &u->dev;
7425 struct imsm_map *map = get_imsm_map(dev, 0);
7426 struct dl *dl;
7427 struct disk_info *inf;
7428 int i;
7429 int activate = 0;
949c47a0 7430
54c2c1ea
DW
7431 inf = get_disk_info(u);
7432 len = sizeof_imsm_dev(dev, 1);
ba2de7ba
DW
7433 /* allocate a new super->devlist entry */
7434 dv = malloc(sizeof(*dv));
7435 if (dv) {
7436 dv->dev = malloc(len);
7437 if (dv->dev)
7438 update->space = dv;
7439 else {
7440 free(dv);
7441 update->space = NULL;
7442 }
7443 }
949c47a0 7444
54c2c1ea
DW
7445 /* count how many spares will be converted to members */
7446 for (i = 0; i < map->num_members; i++) {
7447 dl = serial_to_dl(inf[i].serial, super);
7448 if (!dl) {
7449 /* hmm maybe it failed?, nothing we can do about
7450 * it here
7451 */
7452 continue;
7453 }
7454 if (count_memberships(dl, super) == 0)
7455 activate++;
7456 }
7457 len += activate * sizeof(struct imsm_disk);
949c47a0
DW
7458 break;
7459 default:
7460 break;
7461 }
7462 }
8273f55e 7463
4d7b1503
DW
7464 /* check if we need a larger metadata buffer */
7465 if (super->next_buf)
7466 buf_len = super->next_len;
7467 else
7468 buf_len = super->len;
7469
7470 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
7471 /* ok we need a larger buf than what is currently allocated
7472 * if this allocation fails process_update will notice that
7473 * ->next_len is set and ->next_buf is NULL
7474 */
7475 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
7476 if (super->next_buf)
7477 free(super->next_buf);
7478
7479 super->next_len = buf_len;
1f45a8ad
DW
7480 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
7481 memset(super->next_buf, 0, buf_len);
7482 else
4d7b1503
DW
7483 super->next_buf = NULL;
7484 }
8273f55e
DW
7485}
7486
ae6aad82 7487/* must be called while manager is quiesced */
f21e18ca 7488static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
ae6aad82
DW
7489{
7490 struct imsm_super *mpb = super->anchor;
ae6aad82
DW
7491 struct dl *iter;
7492 struct imsm_dev *dev;
7493 struct imsm_map *map;
24565c9a
DW
7494 int i, j, num_members;
7495 __u32 ord;
ae6aad82 7496
24565c9a
DW
7497 dprintf("%s: deleting device[%d] from imsm_super\n",
7498 __func__, index);
ae6aad82
DW
7499
7500 /* shift all indexes down one */
7501 for (iter = super->disks; iter; iter = iter->next)
f21e18ca 7502 if (iter->index > (int)index)
ae6aad82 7503 iter->index--;
47ee5a45 7504 for (iter = super->missing; iter; iter = iter->next)
f21e18ca 7505 if (iter->index > (int)index)
47ee5a45 7506 iter->index--;
ae6aad82
DW
7507
7508 for (i = 0; i < mpb->num_raid_devs; i++) {
7509 dev = get_imsm_dev(super, i);
7510 map = get_imsm_map(dev, 0);
24565c9a
DW
7511 num_members = map->num_members;
7512 for (j = 0; j < num_members; j++) {
7513 /* update ord entries being careful not to propagate
7514 * ord-flags to the first map
7515 */
98130f40 7516 ord = get_imsm_ord_tbl_ent(dev, j, -1);
ae6aad82 7517
24565c9a
DW
7518 if (ord_to_idx(ord) <= index)
7519 continue;
ae6aad82 7520
24565c9a
DW
7521 map = get_imsm_map(dev, 0);
7522 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
7523 map = get_imsm_map(dev, 1);
7524 if (map)
7525 set_imsm_ord_tbl_ent(map, j, ord - 1);
ae6aad82
DW
7526 }
7527 }
7528
7529 mpb->num_disks--;
7530 super->updates_pending++;
24565c9a
DW
7531 if (*dlp) {
7532 struct dl *dl = *dlp;
7533
7534 *dlp = (*dlp)->next;
7535 __free_imsm_disk(dl);
7536 }
ae6aad82
DW
7537}
7538
687629c2
AK
7539/*******************************************************************************
7540 * Function: open_backup_targets
7541 * Description: Function opens file descriptors for all devices given in
7542 * info->devs
7543 * Parameters:
7544 * info : general array info
7545 * raid_disks : number of disks
7546 * raid_fds : table of device's file descriptors
7547 * Returns:
7548 * 0 : success
7549 * -1 : fail
7550 ******************************************************************************/
7551int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds)
7552{
7553 struct mdinfo *sd;
7554
7555 for (sd = info->devs ; sd ; sd = sd->next) {
7556 char *dn;
7557
7558 if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
7559 dprintf("disk is faulty!!\n");
7560 continue;
7561 }
7562
7563 if ((sd->disk.raid_disk >= raid_disks) ||
7564 (sd->disk.raid_disk < 0))
7565 continue;
7566
7567 dn = map_dev(sd->disk.major,
7568 sd->disk.minor, 1);
7569 raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
7570 if (raid_fds[sd->disk.raid_disk] < 0) {
7571 fprintf(stderr, "cannot open component\n");
7572 return -1;
7573 }
7574 }
7575 return 0;
7576}
7577
7578/*******************************************************************************
7579 * Function: init_migr_record_imsm
7580 * Description: Function inits imsm migration record
7581 * Parameters:
7582 * super : imsm internal array info
7583 * dev : device under migration
7584 * info : general array info to find the smallest device
7585 * Returns:
7586 * none
7587 ******************************************************************************/
7588void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
7589 struct mdinfo *info)
7590{
7591 struct intel_super *super = st->sb;
7592 struct migr_record *migr_rec = super->migr_rec;
7593 int new_data_disks;
7594 unsigned long long dsize, dev_sectors;
7595 long long unsigned min_dev_sectors = -1LLU;
7596 struct mdinfo *sd;
7597 char nm[30];
7598 int fd;
7599 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7600 struct imsm_map *map_src = get_imsm_map(dev, 1);
7601 unsigned long long num_migr_units;
7602
7603 unsigned long long array_blocks =
7604 (((unsigned long long)__le32_to_cpu(dev->size_high)) << 32) +
7605 __le32_to_cpu(dev->size_low);
7606
7607 memset(migr_rec, 0, sizeof(struct migr_record));
7608 migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
7609
7610 /* only ascending reshape supported now */
7611 migr_rec->ascending_migr = __cpu_to_le32(1);
7612
7613 migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
7614 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
7615 migr_rec->dest_depth_per_unit *= map_dest->blocks_per_strip;
7616 new_data_disks = imsm_num_data_members(dev, 0);
7617 migr_rec->blocks_per_unit =
7618 __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
7619 migr_rec->dest_depth_per_unit =
7620 __cpu_to_le32(migr_rec->dest_depth_per_unit);
7621
7622 num_migr_units =
7623 array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
7624
7625 if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
7626 num_migr_units++;
7627 migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
7628
7629 migr_rec->post_migr_vol_cap = dev->size_low;
7630 migr_rec->post_migr_vol_cap_hi = dev->size_high;
7631
7632
7633 /* Find the smallest dev */
7634 for (sd = info->devs ; sd ; sd = sd->next) {
7635 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7636 fd = dev_open(nm, O_RDONLY);
7637 if (fd < 0)
7638 continue;
7639 get_dev_size(fd, NULL, &dsize);
7640 dev_sectors = dsize / 512;
7641 if (dev_sectors < min_dev_sectors)
7642 min_dev_sectors = dev_sectors;
7643 close(fd);
7644 }
7645 migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
7646 RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
7647
7648 write_imsm_migr_rec(st);
7649
7650 return;
7651}
7652
7653/*******************************************************************************
7654 * Function: save_backup_imsm
7655 * Description: Function saves critical data stripes to Migration Copy Area
7656 * and updates the current migration unit status.
7657 * Use restore_stripes() to form a destination stripe,
7658 * and to write it to the Copy Area.
7659 * Parameters:
7660 * st : supertype information
7661 * info : general array info
7662 * buf : input buffer
7663 * write_offset : address of data to backup
7664 * length : length of data to backup (blocks_per_unit)
7665 * Returns:
7666 * 0 : success
7667 *, -1 : fail
7668 ******************************************************************************/
7669int save_backup_imsm(struct supertype *st,
7670 struct imsm_dev *dev,
7671 struct mdinfo *info,
7672 void *buf,
7673 int new_data,
7674 int length)
7675{
7676 int rv = -1;
7677 struct intel_super *super = st->sb;
7678 unsigned long long *target_offsets = NULL;
7679 int *targets = NULL;
7680 int i;
7681 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7682 int new_disks = map_dest->num_members;
7683
7684 targets = malloc(new_disks * sizeof(int));
7685 if (!targets)
7686 goto abort;
7687
7688 target_offsets = malloc(new_disks * sizeof(unsigned long long));
7689 if (!target_offsets)
7690 goto abort;
7691
7692 for (i = 0; i < new_disks; i++) {
7693 targets[i] = -1;
7694 target_offsets[i] = (unsigned long long)
7695 __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
7696 }
7697
7698 if (open_backup_targets(info, new_disks, targets))
7699 goto abort;
7700
7701 if (restore_stripes(targets, /* list of dest devices */
7702 target_offsets, /* migration record offsets */
7703 new_disks,
7704 info->new_chunk,
7705 info->new_level,
7706 info->new_layout,
7707 -1, /* source backup file descriptor */
7708 0, /* input buf offset
7709 * always 0 buf is already offset */
7710 0,
7711 length,
7712 buf) != 0) {
7713 fprintf(stderr, Name ": Error restoring stripes\n");
7714 goto abort;
7715 }
7716
7717 rv = 0;
7718
7719abort:
7720 if (targets) {
7721 for (i = 0; i < new_disks; i++)
7722 if (targets[i] >= 0)
7723 close(targets[i]);
7724 free(targets);
7725 }
7726 free(target_offsets);
7727
7728 return rv;
7729}
7730
7731/*******************************************************************************
7732 * Function: save_checkpoint_imsm
7733 * Description: Function called for current unit status update
7734 * in the migration record. It writes it to disk.
7735 * Parameters:
7736 * super : imsm internal array info
7737 * info : general array info
7738 * Returns:
7739 * 0: success
7740 * 1: failure
7741 ******************************************************************************/
7742int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
7743{
7744 struct intel_super *super = st->sb;
7745 load_imsm_migr_rec(super, info);
7746 if (__le32_to_cpu(super->migr_rec->blocks_per_unit) == 0) {
7747 dprintf("ERROR: blocks_per_unit = 0!!!\n");
7748 return 1;
7749 }
7750
7751 super->migr_rec->curr_migr_unit =
7752 __cpu_to_le32(info->reshape_progress /
7753 __le32_to_cpu(super->migr_rec->blocks_per_unit));
7754 super->migr_rec->rec_status = __cpu_to_le32(state);
7755 super->migr_rec->dest_1st_member_lba =
7756 __cpu_to_le32((__le32_to_cpu(super->migr_rec->curr_migr_unit))
7757 * __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
7758 if (write_imsm_migr_rec(st) < 0) {
7759 dprintf("imsm: Cannot write migration record "
7760 "outside backup area\n");
7761 return 1;
7762 }
7763
7764 return 0;
7765}
7766
276d77db
AK
7767static __u64 blocks_per_migr_unit(struct intel_super *super,
7768 struct imsm_dev *dev);
7769
7770/*******************************************************************************
7771 * Function: recover_backup_imsm
7772 * Description: Function recovers critical data from the Migration Copy Area
7773 * while assembling an array.
7774 * Parameters:
7775 * super : imsm internal array info
7776 * info : general array info
7777 * Returns:
7778 * 0 : success (or there is no data to recover)
7779 * 1 : fail
7780 ******************************************************************************/
7781int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
7782{
7783 struct intel_super *super = st->sb;
7784 struct migr_record *migr_rec = super->migr_rec;
7785 struct imsm_map *map_dest = NULL;
7786 struct intel_dev *id = NULL;
7787 unsigned long long read_offset;
7788 unsigned long long write_offset;
7789 unsigned unit_len;
7790 int *targets = NULL;
7791 int new_disks, i, err;
7792 char *buf = NULL;
7793 int retval = 1;
7794 unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
7795 unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
7796 int ascending = __le32_to_cpu(migr_rec->ascending_migr);
7797 char buffer[20];
7798
7799 err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
7800 if (err < 1)
7801 return 1;
7802
7803 /* recover data only during assemblation */
7804 if (strncmp(buffer, "inactive", 8) != 0)
7805 return 0;
7806 /* no data to recover */
7807 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
7808 return 0;
7809 if (curr_migr_unit >= num_migr_units)
7810 return 1;
7811
7812 /* find device during reshape */
7813 for (id = super->devlist; id; id = id->next)
7814 if (is_gen_migration(id->dev))
7815 break;
7816 if (id == NULL)
7817 return 1;
7818
7819 map_dest = get_imsm_map(id->dev, 0);
7820 new_disks = map_dest->num_members;
7821
7822 read_offset = (unsigned long long)
7823 __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
7824
7825 write_offset = ((unsigned long long)
7826 __le32_to_cpu(migr_rec->dest_1st_member_lba) +
7827 info->data_offset) * 512;
7828
7829 unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
7830 if (posix_memalign((void **)&buf, 512, unit_len) != 0)
7831 goto abort;
7832 targets = malloc(new_disks * sizeof(int));
7833 if (!targets)
7834 goto abort;
7835
7836 open_backup_targets(info, new_disks, targets);
7837
7838 for (i = 0; i < new_disks; i++) {
7839 if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
7840 fprintf(stderr,
7841 Name ": Cannot seek to block: %s\n",
7842 strerror(errno));
7843 goto abort;
7844 }
7845 if (read(targets[i], buf, unit_len) != unit_len) {
7846 fprintf(stderr,
7847 Name ": Cannot read copy area block: %s\n",
7848 strerror(errno));
7849 goto abort;
7850 }
7851 if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
7852 fprintf(stderr,
7853 Name ": Cannot seek to block: %s\n",
7854 strerror(errno));
7855 goto abort;
7856 }
7857 if (write(targets[i], buf, unit_len) != unit_len) {
7858 fprintf(stderr,
7859 Name ": Cannot restore block: %s\n",
7860 strerror(errno));
7861 goto abort;
7862 }
7863 }
7864
7865 if (ascending && curr_migr_unit < (num_migr_units-1))
7866 curr_migr_unit++;
7867
7868 migr_rec->curr_migr_unit = __le32_to_cpu(curr_migr_unit);
7869 super->migr_rec->rec_status = __cpu_to_le32(UNIT_SRC_NORMAL);
7870 if (write_imsm_migr_rec(st) == 0) {
7871 __u64 blocks_per_unit = blocks_per_migr_unit(super, id->dev);
7872 info->reshape_progress = curr_migr_unit * blocks_per_unit;
7873 retval = 0;
7874 }
7875
7876abort:
7877 if (targets) {
7878 for (i = 0; i < new_disks; i++)
7879 if (targets[i])
7880 close(targets[i]);
7881 free(targets);
7882 }
7883 free(buf);
7884 return retval;
7885}
7886
2cda7640
ML
7887static char disk_by_path[] = "/dev/disk/by-path/";
7888
7889static const char *imsm_get_disk_controller_domain(const char *path)
7890{
2cda7640 7891 char disk_path[PATH_MAX];
96234762
LM
7892 char *drv=NULL;
7893 struct stat st;
2cda7640 7894
96234762
LM
7895 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
7896 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
7897 if (stat(disk_path, &st) == 0) {
7898 struct sys_dev* hba;
7899 char *path=NULL;
7900
7901 path = devt_to_devpath(st.st_rdev);
7902 if (path == NULL)
7903 return "unknown";
7904 hba = find_disk_attached_hba(-1, path);
7905 if (hba && hba->type == SYS_DEV_SAS)
7906 drv = "isci";
7907 else if (hba && hba->type == SYS_DEV_SATA)
7908 drv = "ahci";
7909 else
7910 drv = "unknown";
7911 dprintf("path: %s hba: %s attached: %s\n",
7912 path, (hba) ? hba->path : "NULL", drv);
7913 free(path);
7914 if (hba)
7915 free_sys_dev(&hba);
2cda7640 7916 }
96234762 7917 return drv;
2cda7640
ML
7918}
7919
78b10e66
N
7920static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
7921{
7922 char subdev_name[20];
7923 struct mdstat_ent *mdstat;
7924
7925 sprintf(subdev_name, "%d", subdev);
7926 mdstat = mdstat_by_subdev(subdev_name, container);
7927 if (!mdstat)
7928 return -1;
7929
7930 *minor = mdstat->devnum;
7931 free_mdstat(mdstat);
7932 return 0;
7933}
7934
7935static int imsm_reshape_is_allowed_on_container(struct supertype *st,
7936 struct geo_params *geo,
7937 int *old_raid_disks)
7938{
694575e7
KW
7939 /* currently we only support increasing the number of devices
7940 * for a container. This increases the number of device for each
7941 * member array. They must all be RAID0 or RAID5.
7942 */
78b10e66
N
7943 int ret_val = 0;
7944 struct mdinfo *info, *member;
7945 int devices_that_can_grow = 0;
7946
7947 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7948 "st->devnum = (%i)\n",
7949 st->devnum);
7950
7951 if (geo->size != -1 ||
7952 geo->level != UnSet ||
7953 geo->layout != UnSet ||
7954 geo->chunksize != 0 ||
7955 geo->raid_disks == UnSet) {
7956 dprintf("imsm: Container operation is allowed for "
7957 "raid disks number change only.\n");
7958 return ret_val;
7959 }
7960
7961 info = container_content_imsm(st, NULL);
7962 for (member = info; member; member = member->next) {
7963 int result;
7964 int minor;
7965
7966 dprintf("imsm: checking device_num: %i\n",
7967 member->container_member);
7968
d7d205bd 7969 if (geo->raid_disks <= member->array.raid_disks) {
78b10e66
N
7970 /* we work on container for Online Capacity Expansion
7971 * only so raid_disks has to grow
7972 */
7973 dprintf("imsm: for container operation raid disks "
7974 "increase is required\n");
7975 break;
7976 }
7977
7978 if ((info->array.level != 0) &&
7979 (info->array.level != 5)) {
7980 /* we cannot use this container with other raid level
7981 */
690aae1a 7982 dprintf("imsm: for container operation wrong"
78b10e66
N
7983 " raid level (%i) detected\n",
7984 info->array.level);
7985 break;
7986 } else {
7987 /* check for platform support
7988 * for this raid level configuration
7989 */
7990 struct intel_super *super = st->sb;
7991 if (!is_raid_level_supported(super->orom,
7992 member->array.level,
7993 geo->raid_disks)) {
690aae1a 7994 dprintf("platform does not support raid%d with"
78b10e66
N
7995 " %d disk%s\n",
7996 info->array.level,
7997 geo->raid_disks,
7998 geo->raid_disks > 1 ? "s" : "");
7999 break;
8000 }
2a4a08e7
AK
8001 /* check if component size is aligned to chunk size
8002 */
8003 if (info->component_size %
8004 (info->array.chunk_size/512)) {
8005 dprintf("Component size is not aligned to "
8006 "chunk size\n");
8007 break;
8008 }
78b10e66
N
8009 }
8010
8011 if (*old_raid_disks &&
8012 info->array.raid_disks != *old_raid_disks)
8013 break;
8014 *old_raid_disks = info->array.raid_disks;
8015
8016 /* All raid5 and raid0 volumes in container
8017 * have to be ready for Online Capacity Expansion
8018 * so they need to be assembled. We have already
8019 * checked that no recovery etc is happening.
8020 */
8021 result = imsm_find_array_minor_by_subdev(member->container_member,
8022 st->container_dev,
8023 &minor);
8024 if (result < 0) {
8025 dprintf("imsm: cannot find array\n");
8026 break;
8027 }
8028 devices_that_can_grow++;
8029 }
8030 sysfs_free(info);
8031 if (!member && devices_that_can_grow)
8032 ret_val = 1;
8033
8034 if (ret_val)
8035 dprintf("\tContainer operation allowed\n");
8036 else
8037 dprintf("\tError: %i\n", ret_val);
8038
8039 return ret_val;
8040}
8041
8042/* Function: get_spares_for_grow
8043 * Description: Allocates memory and creates list of spare devices
8044 * avaliable in container. Checks if spare drive size is acceptable.
8045 * Parameters: Pointer to the supertype structure
8046 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
8047 * NULL if fail
8048 */
8049static struct mdinfo *get_spares_for_grow(struct supertype *st)
8050{
78b10e66 8051 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
326727d9 8052 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
78b10e66
N
8053}
8054
8055/******************************************************************************
8056 * function: imsm_create_metadata_update_for_reshape
8057 * Function creates update for whole IMSM container.
8058 *
8059 ******************************************************************************/
8060static int imsm_create_metadata_update_for_reshape(
8061 struct supertype *st,
8062 struct geo_params *geo,
8063 int old_raid_disks,
8064 struct imsm_update_reshape **updatep)
8065{
8066 struct intel_super *super = st->sb;
8067 struct imsm_super *mpb = super->anchor;
8068 int update_memory_size = 0;
8069 struct imsm_update_reshape *u = NULL;
8070 struct mdinfo *spares = NULL;
8071 int i;
8072 int delta_disks = 0;
bbd24d86 8073 struct mdinfo *dev;
78b10e66
N
8074
8075 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
8076 geo->raid_disks);
8077
8078 delta_disks = geo->raid_disks - old_raid_disks;
8079
8080 /* size of all update data without anchor */
8081 update_memory_size = sizeof(struct imsm_update_reshape);
8082
8083 /* now add space for spare disks that we need to add. */
8084 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
8085
8086 u = calloc(1, update_memory_size);
8087 if (u == NULL) {
8088 dprintf("error: "
8089 "cannot get memory for imsm_update_reshape update\n");
8090 return 0;
8091 }
8092 u->type = update_reshape_container_disks;
8093 u->old_raid_disks = old_raid_disks;
8094 u->new_raid_disks = geo->raid_disks;
8095
8096 /* now get spare disks list
8097 */
8098 spares = get_spares_for_grow(st);
8099
8100 if (spares == NULL
8101 || delta_disks > spares->array.spare_disks) {
e14e5960
KW
8102 fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices "
8103 "for %s.\n", geo->dev_name);
78b10e66
N
8104 goto abort;
8105 }
8106
8107 /* we have got spares
8108 * update disk list in imsm_disk list table in anchor
8109 */
8110 dprintf("imsm: %i spares are available.\n\n",
8111 spares->array.spare_disks);
8112
bbd24d86 8113 dev = spares->devs;
78b10e66 8114 for (i = 0; i < delta_disks; i++) {
78b10e66
N
8115 struct dl *dl;
8116
bbd24d86
AK
8117 if (dev == NULL)
8118 break;
78b10e66
N
8119 u->new_disks[i] = makedev(dev->disk.major,
8120 dev->disk.minor);
8121 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
ee4beede
AK
8122 dl->index = mpb->num_disks;
8123 mpb->num_disks++;
bbd24d86 8124 dev = dev->next;
78b10e66 8125 }
78b10e66
N
8126
8127abort:
8128 /* free spares
8129 */
8130 sysfs_free(spares);
8131
d677e0b8 8132 dprintf("imsm: reshape update preparation :");
78b10e66 8133 if (i == delta_disks) {
d677e0b8 8134 dprintf(" OK\n");
78b10e66
N
8135 *updatep = u;
8136 return update_memory_size;
8137 }
8138 free(u);
d677e0b8 8139 dprintf(" Error\n");
78b10e66
N
8140
8141 return 0;
8142}
8143
48c5303a
PC
8144/******************************************************************************
8145 * function: imsm_create_metadata_update_for_migration()
8146 * Creates update for IMSM array.
8147 *
8148 ******************************************************************************/
8149static int imsm_create_metadata_update_for_migration(
8150 struct supertype *st,
8151 struct geo_params *geo,
8152 struct imsm_update_reshape_migration **updatep)
8153{
8154 struct intel_super *super = st->sb;
8155 int update_memory_size = 0;
8156 struct imsm_update_reshape_migration *u = NULL;
8157 struct imsm_dev *dev;
8158 int previous_level = -1;
8159
8160 dprintf("imsm_create_metadata_update_for_migration(enter)"
8161 " New Level = %i\n", geo->level);
8162
8163 /* size of all update data without anchor */
8164 update_memory_size = sizeof(struct imsm_update_reshape_migration);
8165
8166 u = calloc(1, update_memory_size);
8167 if (u == NULL) {
8168 dprintf("error: cannot get memory for "
8169 "imsm_create_metadata_update_for_migration\n");
8170 return 0;
8171 }
8172 u->type = update_reshape_migration;
8173 u->subdev = super->current_vol;
8174 u->new_level = geo->level;
8175 u->new_layout = geo->layout;
8176 u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
8177 u->new_disks[0] = -1;
4bba0439 8178 u->new_chunksize = -1;
48c5303a
PC
8179
8180 dev = get_imsm_dev(super, u->subdev);
8181 if (dev) {
8182 struct imsm_map *map;
8183
8184 map = get_imsm_map(dev, 0);
4bba0439
PC
8185 if (map) {
8186 int current_chunk_size =
8187 __le16_to_cpu(map->blocks_per_strip) / 2;
8188
8189 if (geo->chunksize != current_chunk_size) {
8190 u->new_chunksize = geo->chunksize / 1024;
8191 dprintf("imsm: "
8192 "chunk size change from %i to %i\n",
8193 current_chunk_size, u->new_chunksize);
8194 }
48c5303a 8195 previous_level = map->raid_level;
4bba0439 8196 }
48c5303a
PC
8197 }
8198 if ((geo->level == 5) && (previous_level == 0)) {
8199 struct mdinfo *spares = NULL;
8200
8201 u->new_raid_disks++;
8202 spares = get_spares_for_grow(st);
8203 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
8204 free(u);
8205 sysfs_free(spares);
8206 update_memory_size = 0;
8207 dprintf("error: cannot get spare device "
8208 "for requested migration");
8209 return 0;
8210 }
8211 sysfs_free(spares);
8212 }
8213 dprintf("imsm: reshape update preparation : OK\n");
8214 *updatep = u;
8215
8216 return update_memory_size;
8217}
8218
8dd70bce
AK
8219static void imsm_update_metadata_locally(struct supertype *st,
8220 void *buf, int len)
8221{
8222 struct metadata_update mu;
8223
8224 mu.buf = buf;
8225 mu.len = len;
8226 mu.space = NULL;
8227 mu.space_list = NULL;
8228 mu.next = NULL;
8229 imsm_prepare_update(st, &mu);
8230 imsm_process_update(st, &mu);
8231
8232 while (mu.space_list) {
8233 void **space = mu.space_list;
8234 mu.space_list = *space;
8235 free(space);
8236 }
8237}
78b10e66 8238
471bceb6 8239/***************************************************************************
694575e7 8240* Function: imsm_analyze_change
471bceb6
KW
8241* Description: Function analyze change for single volume
8242* and validate if transition is supported
694575e7
KW
8243* Parameters: Geometry parameters, supertype structure
8244* Returns: Operation type code on success, -1 if fail
471bceb6
KW
8245****************************************************************************/
8246enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
8247 struct geo_params *geo)
694575e7 8248{
471bceb6
KW
8249 struct mdinfo info;
8250 int change = -1;
8251 int check_devs = 0;
c21e737b 8252 int chunk;
471bceb6
KW
8253
8254 getinfo_super_imsm_volume(st, &info, NULL);
8255
8256 if ((geo->level != info.array.level) &&
8257 (geo->level >= 0) &&
8258 (geo->level != UnSet)) {
8259 switch (info.array.level) {
8260 case 0:
8261 if (geo->level == 5) {
b5347799 8262 change = CH_MIGRATION;
471bceb6
KW
8263 check_devs = 1;
8264 }
8265 if (geo->level == 10) {
8266 change = CH_TAKEOVER;
8267 check_devs = 1;
8268 }
dfe77a9e
KW
8269 break;
8270 case 1:
8271 if (geo->level == 0) {
8272 change = CH_TAKEOVER;
8273 check_devs = 1;
8274 }
471bceb6 8275 break;
471bceb6
KW
8276 case 10:
8277 if (geo->level == 0) {
8278 change = CH_TAKEOVER;
8279 check_devs = 1;
8280 }
8281 break;
8282 }
8283 if (change == -1) {
8284 fprintf(stderr,
8285 Name " Error. Level Migration from %d to %d "
8286 "not supported!\n",
8287 info.array.level, geo->level);
8288 goto analyse_change_exit;
8289 }
8290 } else
8291 geo->level = info.array.level;
8292
8293 if ((geo->layout != info.array.layout)
8294 && ((geo->layout != UnSet) && (geo->layout != -1))) {
b5347799 8295 change = CH_MIGRATION;
471bceb6
KW
8296 if ((info.array.layout == 0)
8297 && (info.array.level == 5)
8298 && (geo->layout == 5)) {
8299 /* reshape 5 -> 4 */
8300 } else if ((info.array.layout == 5)
8301 && (info.array.level == 5)
8302 && (geo->layout == 0)) {
8303 /* reshape 4 -> 5 */
8304 geo->layout = 0;
8305 geo->level = 5;
8306 } else {
8307 fprintf(stderr,
8308 Name " Error. Layout Migration from %d to %d "
8309 "not supported!\n",
8310 info.array.layout, geo->layout);
8311 change = -1;
8312 goto analyse_change_exit;
8313 }
8314 } else
8315 geo->layout = info.array.layout;
8316
8317 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
8318 && (geo->chunksize != info.array.chunk_size))
b5347799 8319 change = CH_MIGRATION;
471bceb6
KW
8320 else
8321 geo->chunksize = info.array.chunk_size;
8322
c21e737b 8323 chunk = geo->chunksize / 1024;
471bceb6
KW
8324 if (!validate_geometry_imsm(st,
8325 geo->level,
8326 geo->layout,
8327 geo->raid_disks,
c21e737b 8328 &chunk,
471bceb6
KW
8329 geo->size,
8330 0, 0, 1))
8331 change = -1;
8332
8333 if (check_devs) {
8334 struct intel_super *super = st->sb;
8335 struct imsm_super *mpb = super->anchor;
8336
8337 if (mpb->num_raid_devs > 1) {
8338 fprintf(stderr,
8339 Name " Error. Cannot perform operation on %s"
8340 "- for this operation it MUST be single "
8341 "array in container\n",
8342 geo->dev_name);
8343 change = -1;
8344 }
8345 }
8346
8347analyse_change_exit:
8348
8349 return change;
694575e7
KW
8350}
8351
bb025c2f
KW
8352int imsm_takeover(struct supertype *st, struct geo_params *geo)
8353{
8354 struct intel_super *super = st->sb;
8355 struct imsm_update_takeover *u;
8356
8357 u = malloc(sizeof(struct imsm_update_takeover));
8358 if (u == NULL)
8359 return 1;
8360
8361 u->type = update_takeover;
8362 u->subarray = super->current_vol;
8363
8364 /* 10->0 transition */
8365 if (geo->level == 0)
8366 u->direction = R10_TO_R0;
8367
0529c688
KW
8368 /* 0->10 transition */
8369 if (geo->level == 10)
8370 u->direction = R0_TO_R10;
8371
bb025c2f
KW
8372 /* update metadata locally */
8373 imsm_update_metadata_locally(st, u,
8374 sizeof(struct imsm_update_takeover));
8375 /* and possibly remotely */
8376 if (st->update_tail)
8377 append_metadata_update(st, u,
8378 sizeof(struct imsm_update_takeover));
8379 else
8380 free(u);
8381
8382 return 0;
8383}
8384
6dc0be30
AK
8385static int warn_user_about_risk(void)
8386{
8387 int rv = 0;
8388
8389 fprintf(stderr,
8390 "\nThis is an experimental feature. Data on the RAID volume(s) "
8391 "can be lost!!!\n\n"
8392 "To continue command execution please make sure that\n"
8393 "the grow process will not be interrupted. Use safe power\n"
8394 "supply to avoid unexpected system reboot. Make sure that\n"
8395 "reshaped container is not assembled automatically during\n"
8396 "system boot.\n"
8397 "If reshape is interrupted, assemble array manually\n"
8398 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
8399 "Assembly in scan mode is not possible in such case.\n"
8400 "Growing container with boot array is not possible.\n"
8401 "If boot array reshape is interrupted, whole file system\n"
8402 "can be lost.\n\n");
8403 rv = ask("Do you want to continue? ");
8404 fprintf(stderr, "\n");
8405
8406 return rv;
8407}
8408
78b10e66
N
8409static int imsm_reshape_super(struct supertype *st, long long size, int level,
8410 int layout, int chunksize, int raid_disks,
41784c88
AK
8411 int delta_disks, char *backup, char *dev,
8412 int verbose)
78b10e66 8413{
78b10e66
N
8414 int ret_val = 1;
8415 struct geo_params geo;
8416
8417 dprintf("imsm: reshape_super called.\n");
8418
71204a50 8419 memset(&geo, 0, sizeof(struct geo_params));
78b10e66
N
8420
8421 geo.dev_name = dev;
694575e7 8422 geo.dev_id = st->devnum;
78b10e66
N
8423 geo.size = size;
8424 geo.level = level;
8425 geo.layout = layout;
8426 geo.chunksize = chunksize;
8427 geo.raid_disks = raid_disks;
41784c88
AK
8428 if (delta_disks != UnSet)
8429 geo.raid_disks += delta_disks;
78b10e66
N
8430
8431 dprintf("\tfor level : %i\n", geo.level);
8432 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
8433
8434 if (experimental() == 0)
8435 return ret_val;
8436
78b10e66 8437 if (st->container_dev == st->devnum) {
694575e7
KW
8438 /* On container level we can only increase number of devices. */
8439 dprintf("imsm: info: Container operation\n");
78b10e66 8440 int old_raid_disks = 0;
6dc0be30
AK
8441
8442 /* this warning will be removed when imsm checkpointing
8443 * will be implemented, and restoring from check-point
8444 * operation will be transparent for reboot process
8445 */
8446 if (warn_user_about_risk() == 0)
8447 return ret_val;
8448
78b10e66
N
8449 if (imsm_reshape_is_allowed_on_container(
8450 st, &geo, &old_raid_disks)) {
8451 struct imsm_update_reshape *u = NULL;
8452 int len;
8453
8454 len = imsm_create_metadata_update_for_reshape(
8455 st, &geo, old_raid_disks, &u);
8456
ed08d51c
AK
8457 if (len <= 0) {
8458 dprintf("imsm: Cannot prepare update\n");
8459 goto exit_imsm_reshape_super;
8460 }
8461
8dd70bce
AK
8462 ret_val = 0;
8463 /* update metadata locally */
8464 imsm_update_metadata_locally(st, u, len);
8465 /* and possibly remotely */
8466 if (st->update_tail)
8467 append_metadata_update(st, u, len);
8468 else
ed08d51c 8469 free(u);
8dd70bce 8470
694575e7 8471 } else {
e7ff7e40
AK
8472 fprintf(stderr, Name ": (imsm) Operation "
8473 "is not allowed on this container\n");
694575e7
KW
8474 }
8475 } else {
8476 /* On volume level we support following operations
471bceb6
KW
8477 * - takeover: raid10 -> raid0; raid0 -> raid10
8478 * - chunk size migration
8479 * - migration: raid5 -> raid0; raid0 -> raid5
8480 */
8481 struct intel_super *super = st->sb;
8482 struct intel_dev *dev = super->devlist;
8483 int change, devnum;
694575e7 8484 dprintf("imsm: info: Volume operation\n");
471bceb6
KW
8485 /* find requested device */
8486 while (dev) {
8487 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
8488 if (devnum == geo.dev_id)
8489 break;
8490 dev = dev->next;
8491 }
8492 if (dev == NULL) {
8493 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
8494 geo.dev_name, geo.dev_id);
8495 goto exit_imsm_reshape_super;
8496 }
8497 super->current_vol = dev->index;
694575e7
KW
8498 change = imsm_analyze_change(st, &geo);
8499 switch (change) {
471bceb6 8500 case CH_TAKEOVER:
bb025c2f 8501 ret_val = imsm_takeover(st, &geo);
694575e7 8502 break;
48c5303a
PC
8503 case CH_MIGRATION: {
8504 struct imsm_update_reshape_migration *u = NULL;
8505 int len =
8506 imsm_create_metadata_update_for_migration(
8507 st, &geo, &u);
8508 if (len < 1) {
8509 dprintf("imsm: "
8510 "Cannot prepare update\n");
8511 break;
8512 }
471bceb6 8513 ret_val = 0;
48c5303a
PC
8514 /* update metadata locally */
8515 imsm_update_metadata_locally(st, u, len);
8516 /* and possibly remotely */
8517 if (st->update_tail)
8518 append_metadata_update(st, u, len);
8519 else
8520 free(u);
8521 }
8522 break;
471bceb6
KW
8523 default:
8524 ret_val = 1;
694575e7 8525 }
694575e7 8526 }
78b10e66 8527
ed08d51c 8528exit_imsm_reshape_super:
78b10e66
N
8529 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
8530 return ret_val;
8531}
2cda7640 8532
eee67a47
AK
8533/*******************************************************************************
8534 * Function: wait_for_reshape_imsm
8535 * Description: Function writes new sync_max value and waits until
8536 * reshape process reach new position
8537 * Parameters:
8538 * sra : general array info
8539 * to_complete : new sync_max position
8540 * ndata : number of disks in new array's layout
8541 * Returns:
8542 * 0 : success,
8543 * 1 : there is no reshape in progress,
8544 * -1 : fail
8545 ******************************************************************************/
8546int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete,
8547 int ndata)
8548{
8549 int fd = sysfs_get_fd(sra, NULL, "reshape_position");
8550 unsigned long long completed;
8551
8552 struct timeval timeout;
8553
8554 if (fd < 0)
8555 return 1;
8556
8557 sysfs_fd_get_ll(fd, &completed);
8558
8559 if (to_complete == 0) {/* reshape till the end of array */
8560 sysfs_set_str(sra, NULL, "sync_max", "max");
8561 to_complete = MaxSector;
8562 } else {
8563 if (completed > to_complete)
8564 return -1;
8565 if (sysfs_set_num(sra, NULL, "sync_max",
8566 to_complete / ndata) != 0) {
8567 close(fd);
8568 return -1;
8569 }
8570 }
8571
8572 /* FIXME should not need a timeout at all */
8573 timeout.tv_sec = 30;
8574 timeout.tv_usec = 0;
8575 do {
8576 char action[20];
8577 fd_set rfds;
8578 FD_ZERO(&rfds);
8579 FD_SET(fd, &rfds);
8580 select(fd+1, NULL, NULL, &rfds, &timeout);
8581 if (sysfs_fd_get_ll(fd, &completed) < 0) {
8582 close(fd);
8583 return 1;
8584 }
8585 if (sysfs_get_str(sra, NULL, "sync_action",
8586 action, 20) > 0 &&
8587 strncmp(action, "reshape", 7) != 0)
8588 break;
8589 } while (completed < to_complete);
8590 close(fd);
8591 return 0;
8592
8593}
8594
b915c95f
AK
8595/*******************************************************************************
8596 * Function: check_degradation_change
8597 * Description: Check that array hasn't become failed.
8598 * Parameters:
8599 * info : for sysfs access
8600 * sources : source disks descriptors
8601 * degraded: previous degradation level
8602 * Returns:
8603 * degradation level
8604 ******************************************************************************/
8605int check_degradation_change(struct mdinfo *info,
8606 int *sources,
8607 int degraded)
8608{
8609 unsigned long long new_degraded;
8610 sysfs_get_ll(info, NULL, "degraded", &new_degraded);
8611 if (new_degraded != (unsigned long long)degraded) {
8612 /* check each device to ensure it is still working */
8613 struct mdinfo *sd;
8614 new_degraded = 0;
8615 for (sd = info->devs ; sd ; sd = sd->next) {
8616 if (sd->disk.state & (1<<MD_DISK_FAULTY))
8617 continue;
8618 if (sd->disk.state & (1<<MD_DISK_SYNC)) {
8619 char sbuf[20];
8620 if (sysfs_get_str(info,
8621 sd, "state", sbuf, 20) < 0 ||
8622 strstr(sbuf, "faulty") ||
8623 strstr(sbuf, "in_sync") == NULL) {
8624 /* this device is dead */
8625 sd->disk.state = (1<<MD_DISK_FAULTY);
8626 if (sd->disk.raid_disk >= 0 &&
8627 sources[sd->disk.raid_disk] >= 0) {
8628 close(sources[
8629 sd->disk.raid_disk]);
8630 sources[sd->disk.raid_disk] =
8631 -1;
8632 }
8633 new_degraded++;
8634 }
8635 }
8636 }
8637 }
8638
8639 return new_degraded;
8640}
8641
10f22854
AK
8642/*******************************************************************************
8643 * Function: imsm_manage_reshape
8644 * Description: Function finds array under reshape and it manages reshape
8645 * process. It creates stripes backups (if required) and sets
8646 * checheckpoits.
8647 * Parameters:
8648 * afd : Backup handle (nattive) - not used
8649 * sra : general array info
8650 * reshape : reshape parameters - not used
8651 * st : supertype structure
8652 * blocks : size of critical section [blocks]
8653 * fds : table of source device descriptor
8654 * offsets : start of array (offest per devices)
8655 * dests : not used
8656 * destfd : table of destination device descriptor
8657 * destoffsets : table of destination offsets (per device)
8658 * Returns:
8659 * 1 : success, reshape is done
8660 * 0 : fail
8661 ******************************************************************************/
999b4972
N
8662static int imsm_manage_reshape(
8663 int afd, struct mdinfo *sra, struct reshape *reshape,
10f22854 8664 struct supertype *st, unsigned long backup_blocks,
999b4972
N
8665 int *fds, unsigned long long *offsets,
8666 int dests, int *destfd, unsigned long long *destoffsets)
8667{
10f22854
AK
8668 int ret_val = 0;
8669 struct intel_super *super = st->sb;
8670 struct intel_dev *dv = NULL;
8671 struct imsm_dev *dev = NULL;
8672 struct imsm_map *map_src, *map_dest;
8673 int migr_vol_qan = 0;
8674 int ndata, odata; /* [bytes] */
8675 int chunk; /* [bytes] */
8676 struct migr_record *migr_rec;
8677 char *buf = NULL;
8678 unsigned int buf_size; /* [bytes] */
8679 unsigned long long max_position; /* array size [bytes] */
8680 unsigned long long next_step; /* [blocks]/[bytes] */
8681 unsigned long long old_data_stripe_length;
8682 unsigned long long new_data_stripe_length;
8683 unsigned long long start_src; /* [bytes] */
8684 unsigned long long start; /* [bytes] */
8685 unsigned long long start_buf_shift; /* [bytes] */
b915c95f 8686 int degraded = 0;
10f22854
AK
8687
8688 if (!fds || !offsets || !destfd || !destoffsets || !sra)
8689 goto abort;
8690
8691 /* Find volume during the reshape */
8692 for (dv = super->devlist; dv; dv = dv->next) {
8693 if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
8694 && dv->dev->vol.migr_state == 1) {
8695 dev = dv->dev;
8696 migr_vol_qan++;
8697 }
8698 }
8699 /* Only one volume can migrate at the same time */
8700 if (migr_vol_qan != 1) {
8701 fprintf(stderr, Name " : %s", migr_vol_qan ?
8702 "Number of migrating volumes greater than 1\n" :
8703 "There is no volume during migrationg\n");
8704 goto abort;
8705 }
8706
8707 map_src = get_imsm_map(dev, 1);
8708 if (map_src == NULL)
8709 goto abort;
8710 map_dest = get_imsm_map(dev, 0);
8711
8712 ndata = imsm_num_data_members(dev, 0);
8713 odata = imsm_num_data_members(dev, 1);
8714
8715 chunk = map_src->blocks_per_strip * 512;
8716 old_data_stripe_length = odata * chunk;
8717
8718 migr_rec = super->migr_rec;
8719
8720 /* [bytes] */
8721 sra->new_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
8722 sra->new_level = map_dest->raid_level;
8723 new_data_stripe_length = sra->new_chunk * ndata;
8724
8725 /* initialize migration record for start condition */
8726 if (sra->reshape_progress == 0)
8727 init_migr_record_imsm(st, dev, sra);
8728
8729 /* size for data */
8730 buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
8731 /* extend buffer size for parity disk */
8732 buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
8733 /* add space for stripe aligment */
8734 buf_size += old_data_stripe_length;
8735 if (posix_memalign((void **)&buf, 4096, buf_size)) {
8736 dprintf("imsm: Cannot allocate checpoint buffer\n");
8737 goto abort;
8738 }
8739
8740 max_position =
8741 __le32_to_cpu(migr_rec->post_migr_vol_cap) +
8742 ((unsigned long long)__le32_to_cpu(
8743 migr_rec->post_migr_vol_cap_hi) << 32);
8744
8745 while (__le32_to_cpu(migr_rec->curr_migr_unit) <
8746 __le32_to_cpu(migr_rec->num_migr_units)) {
8747 /* current reshape position [blocks] */
8748 unsigned long long current_position =
8749 __le32_to_cpu(migr_rec->blocks_per_unit)
8750 * __le32_to_cpu(migr_rec->curr_migr_unit);
8751 unsigned long long border;
8752
b915c95f
AK
8753 /* Check that array hasn't become failed.
8754 */
8755 degraded = check_degradation_change(sra, fds, degraded);
8756 if (degraded > 1) {
8757 dprintf("imsm: Abort reshape due to degradation"
8758 " level (%i)\n", degraded);
8759 goto abort;
8760 }
8761
10f22854
AK
8762 next_step = __le32_to_cpu(migr_rec->blocks_per_unit);
8763
8764 if ((current_position + next_step) > max_position)
8765 next_step = max_position - current_position;
8766
8767 start = (map_src->pba_of_lba0 + dev->reserved_blocks +
8768 current_position) * 512;
8769
8770 /* allign reading start to old geometry */
8771 start_buf_shift = start % old_data_stripe_length;
8772 start_src = start - start_buf_shift;
8773
8774 border = (start_src / odata) - (start / ndata);
8775 border /= 512;
8776 if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) {
8777 /* save critical stripes to buf
8778 * start - start address of current unit
8779 * to backup [bytes]
8780 * start_src - start address of current unit
8781 * to backup alligned to source array
8782 * [bytes]
8783 */
8784 unsigned long long next_step_filler = 0;
8785 unsigned long long copy_length = next_step * 512;
8786
8787 /* allign copy area length to stripe in old geometry */
8788 next_step_filler = ((copy_length + start_buf_shift)
8789 % old_data_stripe_length);
8790 if (next_step_filler)
8791 next_step_filler = (old_data_stripe_length
8792 - next_step_filler);
8793 dprintf("save_stripes() parameters: start = %llu,"
8794 "\tstart_src = %llu,\tnext_step*512 = %llu,"
8795 "\tstart_in_buf_shift = %llu,"
8796 "\tnext_step_filler = %llu\n",
8797 start, start_src, copy_length,
8798 start_buf_shift, next_step_filler);
8799
8800 if (save_stripes(fds, offsets, map_src->num_members,
8801 chunk, sra->array.level,
8802 sra->array.layout, 0, NULL, start_src,
8803 copy_length +
8804 next_step_filler + start_buf_shift,
8805 buf)) {
8806 dprintf("imsm: Cannot save stripes"
8807 " to buffer\n");
8808 goto abort;
8809 }
8810 /* Convert data to destination format and store it
8811 * in backup general migration area
8812 */
8813 if (save_backup_imsm(st, dev, sra,
8814 buf + start_buf_shift,
8815 ndata, copy_length)) {
8816 dprintf("imsm: Cannot save stripes to "
8817 "target devices\n");
8818 goto abort;
8819 }
8820 if (save_checkpoint_imsm(st, sra,
8821 UNIT_SRC_IN_CP_AREA)) {
8822 dprintf("imsm: Cannot write checkpoint to "
8823 "migration record (UNIT_SRC_IN_CP_AREA)\n");
8824 goto abort;
8825 }
8826 /* decrease backup_blocks */
8827 if (backup_blocks > (unsigned long)next_step)
8828 backup_blocks -= next_step;
8829 else
8830 backup_blocks = 0;
8831 }
8832 /* When data backed up, checkpoint stored,
8833 * kick the kernel to reshape unit of data
8834 */
8835 next_step = next_step + sra->reshape_progress;
8836 sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
8837 sysfs_set_num(sra, NULL, "suspend_hi", next_step);
8838
8839 /* wait until reshape finish */
c47b0ff6
AK
8840 if (wait_for_reshape_imsm(sra, next_step, ndata) < 0) {
8841 dprintf("wait_for_reshape_imsm returned error!\n");
8842 goto abort;
8843 }
10f22854
AK
8844
8845 sra->reshape_progress = next_step;
8846
8847 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL)) {
8848 dprintf("imsm: Cannot write checkpoint to "
8849 "migration record (UNIT_SRC_NORMAL)\n");
8850 goto abort;
8851 }
8852
8853 }
8854
8855 /* return '1' if done */
8856 ret_val = 1;
8857abort:
8858 free(buf);
8859 abort_reshape(sra);
8860
8861 return ret_val;
999b4972 8862}
71204a50 8863#endif /* MDASSEMBLE */
999b4972 8864
cdddbdbc
DW
8865struct superswitch super_imsm = {
8866#ifndef MDASSEMBLE
8867 .examine_super = examine_super_imsm,
8868 .brief_examine_super = brief_examine_super_imsm,
4737ae25 8869 .brief_examine_subarrays = brief_examine_subarrays_imsm,
9d84c8ea 8870 .export_examine_super = export_examine_super_imsm,
cdddbdbc
DW
8871 .detail_super = detail_super_imsm,
8872 .brief_detail_super = brief_detail_super_imsm,
bf5a934a 8873 .write_init_super = write_init_super_imsm,
0e600426
N
8874 .validate_geometry = validate_geometry_imsm,
8875 .add_to_super = add_to_super_imsm,
1a64be56 8876 .remove_from_super = remove_from_super_imsm,
d665cc31 8877 .detail_platform = detail_platform_imsm,
33414a01 8878 .kill_subarray = kill_subarray_imsm,
aa534678 8879 .update_subarray = update_subarray_imsm,
2b959fbf 8880 .load_container = load_container_imsm,
71204a50
N
8881 .default_geometry = default_geometry_imsm,
8882 .get_disk_controller_domain = imsm_get_disk_controller_domain,
8883 .reshape_super = imsm_reshape_super,
8884 .manage_reshape = imsm_manage_reshape,
cdddbdbc
DW
8885#endif
8886 .match_home = match_home_imsm,
8887 .uuid_from_super= uuid_from_super_imsm,
8888 .getinfo_super = getinfo_super_imsm,
5c4cd5da 8889 .getinfo_super_disks = getinfo_super_disks_imsm,
cdddbdbc
DW
8890 .update_super = update_super_imsm,
8891
8892 .avail_size = avail_size_imsm,
80e7f8c3 8893 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
cdddbdbc
DW
8894
8895 .compare_super = compare_super_imsm,
8896
8897 .load_super = load_super_imsm,
bf5a934a 8898 .init_super = init_super_imsm,
e683ca88 8899 .store_super = store_super_imsm,
cdddbdbc
DW
8900 .free_super = free_super_imsm,
8901 .match_metadata_desc = match_metadata_desc_imsm,
bf5a934a 8902 .container_content = container_content_imsm,
cdddbdbc 8903
276d77db
AK
8904 .recover_backup = recover_backup_imsm,
8905
cdddbdbc 8906 .external = 1,
4cce4069 8907 .name = "imsm",
845dea95 8908
0e600426 8909#ifndef MDASSEMBLE
845dea95
NB
8910/* for mdmon */
8911 .open_new = imsm_open_new,
ed9d66aa 8912 .set_array_state= imsm_set_array_state,
845dea95
NB
8913 .set_disk = imsm_set_disk,
8914 .sync_metadata = imsm_sync_metadata,
88758e9d 8915 .activate_spare = imsm_activate_spare,
e8319a19 8916 .process_update = imsm_process_update,
8273f55e 8917 .prepare_update = imsm_prepare_update,
0e600426 8918#endif /* MDASSEMBLE */
cdddbdbc 8919};