]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-intel.c
imsm: Clear migration record when no migration in progress
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include "platform-intel.h"
25 #include <values.h>
26 #include <scsi/sg.h>
27 #include <ctype.h>
28 #include <dirent.h>
29
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
43
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
54 #define MPB_SECTOR_CNT 2210
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
57
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
60 struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71 };
72
73 /* RAID map configuration infos. */
74 struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
84 __u8 raid_level;
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
91 __u8 ddf;
92 __u32 filler[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
96 */
97 } __attribute__ ((packed));
98
99 struct imsm_vol {
100 __u32 curr_migr_unit;
101 __u32 checkpoint_id; /* id to access curr_migr_unit */
102 __u8 migr_state; /* Normal or Migrating */
103 #define MIGR_INIT 0
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed));
118
119 struct imsm_dev {
120 __u8 volume[MAX_RAID_SERIAL_LEN];
121 __u32 size_low;
122 __u32 size_high;
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148 } __attribute__ ((packed));
149
150 struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed));
172
173 #define BBM_LOG_MAX_ENTRIES 254
174
175 struct bbm_log_entry {
176 __u64 defective_block_start;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181 } __attribute__ ((__packed__));
182
183 struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190 } __attribute__ ((__packed__));
191
192
193 #ifndef MDASSEMBLE
194 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195 #endif
196
197 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
198
199 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
200
201 #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203 #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
206 struct migr_record {
207 __u32 rec_status; /* Status used to determine how to restart
208 * migration in case it aborts
209 * in some fashion */
210 __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
211 __u32 family_num; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr; /* True if migrating in increasing
215 * order of lbas */
216 __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit; /* Num member blocks each destMap
218 * member disk
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230 } __attribute__ ((__packed__));
231
232 static __u8 migr_type(struct imsm_dev *dev)
233 {
234 if (dev->vol.migr_type == MIGR_VERIFY &&
235 dev->status & DEV_VERIFY_AND_FIX)
236 return MIGR_REPAIR;
237 else
238 return dev->vol.migr_type;
239 }
240
241 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
242 {
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
245 */
246 if (migr_type == MIGR_REPAIR) {
247 dev->vol.migr_type = MIGR_VERIFY;
248 dev->status |= DEV_VERIFY_AND_FIX;
249 } else {
250 dev->vol.migr_type = migr_type;
251 dev->status &= ~DEV_VERIFY_AND_FIX;
252 }
253 }
254
255 static unsigned int sector_count(__u32 bytes)
256 {
257 return ((bytes + (512-1)) & (~(512-1))) / 512;
258 }
259
260 static unsigned int mpb_sectors(struct imsm_super *mpb)
261 {
262 return sector_count(__le32_to_cpu(mpb->mpb_size));
263 }
264
265 struct intel_dev {
266 struct imsm_dev *dev;
267 struct intel_dev *next;
268 unsigned index;
269 };
270
271 struct intel_hba {
272 enum sys_dev_type type;
273 char *path;
274 char *pci_id;
275 struct intel_hba *next;
276 };
277
278 enum action {
279 DISK_REMOVE = 1,
280 DISK_ADD
281 };
282 /* internal representation of IMSM metadata */
283 struct intel_super {
284 union {
285 void *buf; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super *anchor; /* immovable parameters */
287 };
288 union {
289 void *migr_rec_buf; /* buffer for I/O operations */
290 struct migr_record *migr_rec; /* migration record */
291 };
292 size_t len; /* size of the 'buf' allocation */
293 void *next_buf; /* for realloc'ing buf from the manager */
294 size_t next_len;
295 int updates_pending; /* count of pending updates for mdmon */
296 int current_vol; /* index of raid device undergoing creation */
297 __u32 create_offset; /* common start for 'current_vol' */
298 __u32 random; /* random data for seeding new family numbers */
299 struct intel_dev *devlist;
300 struct dl {
301 struct dl *next;
302 int index;
303 __u8 serial[MAX_RAID_SERIAL_LEN];
304 int major, minor;
305 char *devname;
306 struct imsm_disk disk;
307 int fd;
308 int extent_cnt;
309 struct extent *e; /* for determining freespace @ create */
310 int raiddisk; /* slot to fill in autolayout */
311 enum action action;
312 } *disks;
313 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
314 active */
315 struct dl *missing; /* disks removed while we weren't looking */
316 struct bbm_log *bbm_log;
317 struct intel_hba *hba; /* device path of the raid controller for this metadata */
318 const struct imsm_orom *orom; /* platform firmware support */
319 struct intel_super *next; /* (temp) list for disambiguating family_num */
320 };
321
322 struct intel_disk {
323 struct imsm_disk disk;
324 #define IMSM_UNKNOWN_OWNER (-1)
325 int owner;
326 struct intel_disk *next;
327 };
328
329 struct extent {
330 unsigned long long start, size;
331 };
332
333 /* definitions of reshape process types */
334 enum imsm_reshape_type {
335 CH_TAKEOVER,
336 CH_MIGRATION,
337 };
338
339 /* definition of messages passed to imsm_process_update */
340 enum imsm_update_type {
341 update_activate_spare,
342 update_create_array,
343 update_kill_array,
344 update_rename_array,
345 update_add_remove_disk,
346 update_reshape_container_disks,
347 update_reshape_migration,
348 update_takeover
349 };
350
351 struct imsm_update_activate_spare {
352 enum imsm_update_type type;
353 struct dl *dl;
354 int slot;
355 int array;
356 struct imsm_update_activate_spare *next;
357 };
358
359 struct geo_params {
360 int dev_id;
361 char *dev_name;
362 long long size;
363 int level;
364 int layout;
365 int chunksize;
366 int raid_disks;
367 };
368
369 enum takeover_direction {
370 R10_TO_R0,
371 R0_TO_R10
372 };
373 struct imsm_update_takeover {
374 enum imsm_update_type type;
375 int subarray;
376 enum takeover_direction direction;
377 };
378
379 struct imsm_update_reshape {
380 enum imsm_update_type type;
381 int old_raid_disks;
382 int new_raid_disks;
383
384 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
385 };
386
387 struct imsm_update_reshape_migration {
388 enum imsm_update_type type;
389 int old_raid_disks;
390 int new_raid_disks;
391 /* fields for array migration changes
392 */
393 int subdev;
394 int new_level;
395 int new_layout;
396 int new_chunksize;
397
398 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
399 };
400
401 struct disk_info {
402 __u8 serial[MAX_RAID_SERIAL_LEN];
403 };
404
405 struct imsm_update_create_array {
406 enum imsm_update_type type;
407 int dev_idx;
408 struct imsm_dev dev;
409 };
410
411 struct imsm_update_kill_array {
412 enum imsm_update_type type;
413 int dev_idx;
414 };
415
416 struct imsm_update_rename_array {
417 enum imsm_update_type type;
418 __u8 name[MAX_RAID_SERIAL_LEN];
419 int dev_idx;
420 };
421
422 struct imsm_update_add_remove_disk {
423 enum imsm_update_type type;
424 };
425
426
427 static const char *_sys_dev_type[] = {
428 [SYS_DEV_UNKNOWN] = "Unknown",
429 [SYS_DEV_SAS] = "SAS",
430 [SYS_DEV_SATA] = "SATA"
431 };
432
433 const char *get_sys_dev_type(enum sys_dev_type type)
434 {
435 if (type >= SYS_DEV_MAX)
436 type = SYS_DEV_UNKNOWN;
437
438 return _sys_dev_type[type];
439 }
440
441 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
442 {
443 struct intel_hba *result = malloc(sizeof(*result));
444 if (result) {
445 result->type = device->type;
446 result->path = strdup(device->path);
447 result->next = NULL;
448 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
449 result->pci_id++;
450 }
451 return result;
452 }
453
454 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
455 {
456 struct intel_hba *result=NULL;
457 for (result = hba; result; result = result->next) {
458 if (result->type == device->type && strcmp(result->path, device->path) == 0)
459 break;
460 }
461 return result;
462 }
463
464 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
465 {
466 struct intel_hba *hba;
467
468 /* check if disk attached to Intel HBA */
469 hba = find_intel_hba(super->hba, device);
470 if (hba != NULL)
471 return 1;
472 /* Check if HBA is already attached to super */
473 if (super->hba == NULL) {
474 super->hba = alloc_intel_hba(device);
475 return 1;
476 }
477
478 hba = super->hba;
479 /* Intel metadata allows for all disks attached to the same type HBA.
480 * Do not sypport odf HBA types mixing
481 */
482 if (device->type != hba->type)
483 return 2;
484
485 while (hba->next)
486 hba = hba->next;
487
488 hba->next = alloc_intel_hba(device);
489 return 1;
490 }
491
492 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
493 {
494 struct sys_dev *list, *elem, *prev;
495 char *disk_path;
496
497 if ((list = find_intel_devices()) == NULL)
498 return 0;
499
500 if (fd < 0)
501 disk_path = (char *) devname;
502 else
503 disk_path = diskfd_to_devpath(fd);
504
505 if (!disk_path) {
506 free_sys_dev(&list);
507 return 0;
508 }
509
510 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
511 if (path_attached_to_hba(disk_path, elem->path)) {
512 if (prev == NULL)
513 list = list->next;
514 else
515 prev->next = elem->next;
516 elem->next = NULL;
517 if (disk_path != devname)
518 free(disk_path);
519 free_sys_dev(&list);
520 return elem;
521 }
522 }
523 if (disk_path != devname)
524 free(disk_path);
525 free_sys_dev(&list);
526
527 return NULL;
528 }
529
530
531 static int find_intel_hba_capability(int fd, struct intel_super *super,
532 char *devname);
533
534 static struct supertype *match_metadata_desc_imsm(char *arg)
535 {
536 struct supertype *st;
537
538 if (strcmp(arg, "imsm") != 0 &&
539 strcmp(arg, "default") != 0
540 )
541 return NULL;
542
543 st = malloc(sizeof(*st));
544 if (!st)
545 return NULL;
546 memset(st, 0, sizeof(*st));
547 st->container_dev = NoMdDev;
548 st->ss = &super_imsm;
549 st->max_devs = IMSM_MAX_DEVICES;
550 st->minor_version = 0;
551 st->sb = NULL;
552 return st;
553 }
554
555 #ifndef MDASSEMBLE
556 static __u8 *get_imsm_version(struct imsm_super *mpb)
557 {
558 return &mpb->sig[MPB_SIG_LEN];
559 }
560 #endif
561
562 /* retrieve a disk directly from the anchor when the anchor is known to be
563 * up-to-date, currently only at load time
564 */
565 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
566 {
567 if (index >= mpb->num_disks)
568 return NULL;
569 return &mpb->disk[index];
570 }
571
572 /* retrieve the disk description based on a index of the disk
573 * in the sub-array
574 */
575 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
576 {
577 struct dl *d;
578
579 for (d = super->disks; d; d = d->next)
580 if (d->index == index)
581 return d;
582
583 return NULL;
584 }
585 /* retrieve a disk from the parsed metadata */
586 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
587 {
588 struct dl *dl;
589
590 dl = get_imsm_dl_disk(super, index);
591 if (dl)
592 return &dl->disk;
593
594 return NULL;
595 }
596
597 /* generate a checksum directly from the anchor when the anchor is known to be
598 * up-to-date, currently only at load or write_super after coalescing
599 */
600 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
601 {
602 __u32 end = mpb->mpb_size / sizeof(end);
603 __u32 *p = (__u32 *) mpb;
604 __u32 sum = 0;
605
606 while (end--) {
607 sum += __le32_to_cpu(*p);
608 p++;
609 }
610
611 return sum - __le32_to_cpu(mpb->check_sum);
612 }
613
614 static size_t sizeof_imsm_map(struct imsm_map *map)
615 {
616 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
617 }
618
619 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
620 {
621 /* A device can have 2 maps if it is in the middle of a migration.
622 * If second_map is:
623 * 0 - we return the first map
624 * 1 - we return the second map if it exists, else NULL
625 * -1 - we return the second map if it exists, else the first
626 */
627 struct imsm_map *map = &dev->vol.map[0];
628
629 if (second_map == 1 && !dev->vol.migr_state)
630 return NULL;
631 else if (second_map == 1 ||
632 (second_map < 0 && dev->vol.migr_state)) {
633 void *ptr = map;
634
635 return ptr + sizeof_imsm_map(map);
636 } else
637 return map;
638
639 }
640
641 /* return the size of the device.
642 * migr_state increases the returned size if map[0] were to be duplicated
643 */
644 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
645 {
646 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
647 sizeof_imsm_map(get_imsm_map(dev, 0));
648
649 /* migrating means an additional map */
650 if (dev->vol.migr_state)
651 size += sizeof_imsm_map(get_imsm_map(dev, 1));
652 else if (migr_state)
653 size += sizeof_imsm_map(get_imsm_map(dev, 0));
654
655 return size;
656 }
657
658 #ifndef MDASSEMBLE
659 /* retrieve disk serial number list from a metadata update */
660 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
661 {
662 void *u = update;
663 struct disk_info *inf;
664
665 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
666 sizeof_imsm_dev(&update->dev, 0);
667
668 return inf;
669 }
670 #endif
671
672 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
673 {
674 int offset;
675 int i;
676 void *_mpb = mpb;
677
678 if (index >= mpb->num_raid_devs)
679 return NULL;
680
681 /* devices start after all disks */
682 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
683
684 for (i = 0; i <= index; i++)
685 if (i == index)
686 return _mpb + offset;
687 else
688 offset += sizeof_imsm_dev(_mpb + offset, 0);
689
690 return NULL;
691 }
692
693 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
694 {
695 struct intel_dev *dv;
696
697 if (index >= super->anchor->num_raid_devs)
698 return NULL;
699 for (dv = super->devlist; dv; dv = dv->next)
700 if (dv->index == index)
701 return dv->dev;
702 return NULL;
703 }
704
705 /*
706 * for second_map:
707 * == 0 get first map
708 * == 1 get second map
709 * == -1 than get map according to the current migr_state
710 */
711 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
712 int slot,
713 int second_map)
714 {
715 struct imsm_map *map;
716
717 map = get_imsm_map(dev, second_map);
718
719 /* top byte identifies disk under rebuild */
720 return __le32_to_cpu(map->disk_ord_tbl[slot]);
721 }
722
723 #define ord_to_idx(ord) (((ord) << 8) >> 8)
724 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
725 {
726 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
727
728 return ord_to_idx(ord);
729 }
730
731 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
732 {
733 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
734 }
735
736 static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
737 {
738 int slot;
739 __u32 ord;
740
741 for (slot = 0; slot < map->num_members; slot++) {
742 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
743 if (ord_to_idx(ord) == idx)
744 return slot;
745 }
746
747 return -1;
748 }
749
750 static int get_imsm_raid_level(struct imsm_map *map)
751 {
752 if (map->raid_level == 1) {
753 if (map->num_members == 2)
754 return 1;
755 else
756 return 10;
757 }
758
759 return map->raid_level;
760 }
761
762 static int cmp_extent(const void *av, const void *bv)
763 {
764 const struct extent *a = av;
765 const struct extent *b = bv;
766 if (a->start < b->start)
767 return -1;
768 if (a->start > b->start)
769 return 1;
770 return 0;
771 }
772
773 static int count_memberships(struct dl *dl, struct intel_super *super)
774 {
775 int memberships = 0;
776 int i;
777
778 for (i = 0; i < super->anchor->num_raid_devs; i++) {
779 struct imsm_dev *dev = get_imsm_dev(super, i);
780 struct imsm_map *map = get_imsm_map(dev, 0);
781
782 if (get_imsm_disk_slot(map, dl->index) >= 0)
783 memberships++;
784 }
785
786 return memberships;
787 }
788
789 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
790 {
791 /* find a list of used extents on the given physical device */
792 struct extent *rv, *e;
793 int i;
794 int memberships = count_memberships(dl, super);
795 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
796
797 rv = malloc(sizeof(struct extent) * (memberships + 1));
798 if (!rv)
799 return NULL;
800 e = rv;
801
802 for (i = 0; i < super->anchor->num_raid_devs; i++) {
803 struct imsm_dev *dev = get_imsm_dev(super, i);
804 struct imsm_map *map = get_imsm_map(dev, 0);
805
806 if (get_imsm_disk_slot(map, dl->index) >= 0) {
807 e->start = __le32_to_cpu(map->pba_of_lba0);
808 e->size = __le32_to_cpu(map->blocks_per_member);
809 e++;
810 }
811 }
812 qsort(rv, memberships, sizeof(*rv), cmp_extent);
813
814 /* determine the start of the metadata
815 * when no raid devices are defined use the default
816 * ...otherwise allow the metadata to truncate the value
817 * as is the case with older versions of imsm
818 */
819 if (memberships) {
820 struct extent *last = &rv[memberships - 1];
821 __u32 remainder;
822
823 remainder = __le32_to_cpu(dl->disk.total_blocks) -
824 (last->start + last->size);
825 /* round down to 1k block to satisfy precision of the kernel
826 * 'size' interface
827 */
828 remainder &= ~1UL;
829 /* make sure remainder is still sane */
830 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
831 remainder = ROUND_UP(super->len, 512) >> 9;
832 if (reservation > remainder)
833 reservation = remainder;
834 }
835 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
836 e->size = 0;
837 return rv;
838 }
839
840 /* try to determine how much space is reserved for metadata from
841 * the last get_extents() entry, otherwise fallback to the
842 * default
843 */
844 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
845 {
846 struct extent *e;
847 int i;
848 __u32 rv;
849
850 /* for spares just return a minimal reservation which will grow
851 * once the spare is picked up by an array
852 */
853 if (dl->index == -1)
854 return MPB_SECTOR_CNT;
855
856 e = get_extents(super, dl);
857 if (!e)
858 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
859
860 /* scroll to last entry */
861 for (i = 0; e[i].size; i++)
862 continue;
863
864 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
865
866 free(e);
867
868 return rv;
869 }
870
871 static int is_spare(struct imsm_disk *disk)
872 {
873 return (disk->status & SPARE_DISK) == SPARE_DISK;
874 }
875
876 static int is_configured(struct imsm_disk *disk)
877 {
878 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
879 }
880
881 static int is_failed(struct imsm_disk *disk)
882 {
883 return (disk->status & FAILED_DISK) == FAILED_DISK;
884 }
885
886 /* Return minimum size of a spare that can be used in this array*/
887 static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
888 {
889 struct intel_super *super = st->sb;
890 struct dl *dl;
891 struct extent *e;
892 int i;
893 unsigned long long rv = 0;
894
895 if (!super)
896 return rv;
897 /* find first active disk in array */
898 dl = super->disks;
899 while (dl && (is_failed(&dl->disk) || dl->index == -1))
900 dl = dl->next;
901 if (!dl)
902 return rv;
903 /* find last lba used by subarrays */
904 e = get_extents(super, dl);
905 if (!e)
906 return rv;
907 for (i = 0; e[i].size; i++)
908 continue;
909 if (i > 0)
910 rv = e[i-1].start + e[i-1].size;
911 free(e);
912 /* add the amount of space needed for metadata */
913 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
914 return rv * 512;
915 }
916
917 #ifndef MDASSEMBLE
918 static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
919
920 static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
921 {
922 __u64 sz;
923 int slot, i;
924 struct imsm_map *map = get_imsm_map(dev, 0);
925 struct imsm_map *map2 = get_imsm_map(dev, 1);
926 __u32 ord;
927
928 printf("\n");
929 printf("[%.16s]:\n", dev->volume);
930 printf(" UUID : %s\n", uuid);
931 printf(" RAID Level : %d", get_imsm_raid_level(map));
932 if (map2)
933 printf(" <-- %d", get_imsm_raid_level(map2));
934 printf("\n");
935 printf(" Members : %d", map->num_members);
936 if (map2)
937 printf(" <-- %d", map2->num_members);
938 printf("\n");
939 printf(" Slots : [");
940 for (i = 0; i < map->num_members; i++) {
941 ord = get_imsm_ord_tbl_ent(dev, i, 0);
942 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
943 }
944 printf("]");
945 if (map2) {
946 printf(" <-- [");
947 for (i = 0; i < map2->num_members; i++) {
948 ord = get_imsm_ord_tbl_ent(dev, i, 1);
949 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
950 }
951 printf("]");
952 }
953 printf("\n");
954 printf(" Failed disk : ");
955 if (map->failed_disk_num == 0xff)
956 printf("none");
957 else
958 printf("%i", map->failed_disk_num);
959 printf("\n");
960 slot = get_imsm_disk_slot(map, disk_idx);
961 if (slot >= 0) {
962 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
963 printf(" This Slot : %d%s\n", slot,
964 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
965 } else
966 printf(" This Slot : ?\n");
967 sz = __le32_to_cpu(dev->size_high);
968 sz <<= 32;
969 sz += __le32_to_cpu(dev->size_low);
970 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
971 human_size(sz * 512));
972 sz = __le32_to_cpu(map->blocks_per_member);
973 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
974 human_size(sz * 512));
975 printf(" Sector Offset : %u\n",
976 __le32_to_cpu(map->pba_of_lba0));
977 printf(" Num Stripes : %u\n",
978 __le32_to_cpu(map->num_data_stripes));
979 printf(" Chunk Size : %u KiB",
980 __le16_to_cpu(map->blocks_per_strip) / 2);
981 if (map2)
982 printf(" <-- %u KiB",
983 __le16_to_cpu(map2->blocks_per_strip) / 2);
984 printf("\n");
985 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
986 printf(" Migrate State : ");
987 if (dev->vol.migr_state) {
988 if (migr_type(dev) == MIGR_INIT)
989 printf("initialize\n");
990 else if (migr_type(dev) == MIGR_REBUILD)
991 printf("rebuild\n");
992 else if (migr_type(dev) == MIGR_VERIFY)
993 printf("check\n");
994 else if (migr_type(dev) == MIGR_GEN_MIGR)
995 printf("general migration\n");
996 else if (migr_type(dev) == MIGR_STATE_CHANGE)
997 printf("state change\n");
998 else if (migr_type(dev) == MIGR_REPAIR)
999 printf("repair\n");
1000 else
1001 printf("<unknown:%d>\n", migr_type(dev));
1002 } else
1003 printf("idle\n");
1004 printf(" Map State : %s", map_state_str[map->map_state]);
1005 if (dev->vol.migr_state) {
1006 struct imsm_map *map = get_imsm_map(dev, 1);
1007
1008 printf(" <-- %s", map_state_str[map->map_state]);
1009 printf("\n Checkpoint : %u (%llu)",
1010 __le32_to_cpu(dev->vol.curr_migr_unit),
1011 (unsigned long long)blocks_per_migr_unit(dev));
1012 }
1013 printf("\n");
1014 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
1015 }
1016
1017 static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
1018 {
1019 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
1020 char str[MAX_RAID_SERIAL_LEN + 1];
1021 __u64 sz;
1022
1023 if (index < 0 || !disk)
1024 return;
1025
1026 printf("\n");
1027 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1028 printf(" Disk%02d Serial : %s\n", index, str);
1029 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1030 is_configured(disk) ? " active" : "",
1031 is_failed(disk) ? " failed" : "");
1032 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1033 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1034 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1035 human_size(sz * 512));
1036 }
1037
1038 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
1039
1040 static void examine_super_imsm(struct supertype *st, char *homehost)
1041 {
1042 struct intel_super *super = st->sb;
1043 struct imsm_super *mpb = super->anchor;
1044 char str[MAX_SIGNATURE_LENGTH];
1045 int i;
1046 struct mdinfo info;
1047 char nbuf[64];
1048 __u32 sum;
1049 __u32 reserved = imsm_reserved_sectors(super, super->disks);
1050 struct dl *dl;
1051
1052 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1053 printf(" Magic : %s\n", str);
1054 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1055 printf(" Version : %s\n", get_imsm_version(mpb));
1056 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
1057 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1058 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
1059 getinfo_super_imsm(st, &info, NULL);
1060 fname_from_uuid(st, &info, nbuf, ':');
1061 printf(" UUID : %s\n", nbuf + 5);
1062 sum = __le32_to_cpu(mpb->check_sum);
1063 printf(" Checksum : %08x %s\n", sum,
1064 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
1065 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
1066 printf(" Disks : %d\n", mpb->num_disks);
1067 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
1068 print_imsm_disk(mpb, super->disks->index, reserved);
1069 if (super->bbm_log) {
1070 struct bbm_log *log = super->bbm_log;
1071
1072 printf("\n");
1073 printf("Bad Block Management Log:\n");
1074 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1075 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1076 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1077 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
1078 printf(" First Spare : %llx\n",
1079 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
1080 }
1081 for (i = 0; i < mpb->num_raid_devs; i++) {
1082 struct mdinfo info;
1083 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1084
1085 super->current_vol = i;
1086 getinfo_super_imsm(st, &info, NULL);
1087 fname_from_uuid(st, &info, nbuf, ':');
1088 print_imsm_dev(dev, nbuf + 5, super->disks->index);
1089 }
1090 for (i = 0; i < mpb->num_disks; i++) {
1091 if (i == super->disks->index)
1092 continue;
1093 print_imsm_disk(mpb, i, reserved);
1094 }
1095 for (dl = super->disks ; dl; dl = dl->next) {
1096 struct imsm_disk *disk;
1097 char str[MAX_RAID_SERIAL_LEN + 1];
1098 __u64 sz;
1099
1100 if (dl->index >= 0)
1101 continue;
1102
1103 disk = &dl->disk;
1104 printf("\n");
1105 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1106 printf(" Disk Serial : %s\n", str);
1107 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1108 is_configured(disk) ? " active" : "",
1109 is_failed(disk) ? " failed" : "");
1110 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1111 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1112 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1113 human_size(sz * 512));
1114 }
1115 }
1116
1117 static void brief_examine_super_imsm(struct supertype *st, int verbose)
1118 {
1119 /* We just write a generic IMSM ARRAY entry */
1120 struct mdinfo info;
1121 char nbuf[64];
1122 struct intel_super *super = st->sb;
1123
1124 if (!super->anchor->num_raid_devs) {
1125 printf("ARRAY metadata=imsm\n");
1126 return;
1127 }
1128
1129 getinfo_super_imsm(st, &info, NULL);
1130 fname_from_uuid(st, &info, nbuf, ':');
1131 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1132 }
1133
1134 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1135 {
1136 /* We just write a generic IMSM ARRAY entry */
1137 struct mdinfo info;
1138 char nbuf[64];
1139 char nbuf1[64];
1140 struct intel_super *super = st->sb;
1141 int i;
1142
1143 if (!super->anchor->num_raid_devs)
1144 return;
1145
1146 getinfo_super_imsm(st, &info, NULL);
1147 fname_from_uuid(st, &info, nbuf, ':');
1148 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1149 struct imsm_dev *dev = get_imsm_dev(super, i);
1150
1151 super->current_vol = i;
1152 getinfo_super_imsm(st, &info, NULL);
1153 fname_from_uuid(st, &info, nbuf1, ':');
1154 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1155 dev->volume, nbuf + 5, i, nbuf1 + 5);
1156 }
1157 }
1158
1159 static void export_examine_super_imsm(struct supertype *st)
1160 {
1161 struct intel_super *super = st->sb;
1162 struct imsm_super *mpb = super->anchor;
1163 struct mdinfo info;
1164 char nbuf[64];
1165
1166 getinfo_super_imsm(st, &info, NULL);
1167 fname_from_uuid(st, &info, nbuf, ':');
1168 printf("MD_METADATA=imsm\n");
1169 printf("MD_LEVEL=container\n");
1170 printf("MD_UUID=%s\n", nbuf+5);
1171 printf("MD_DEVICES=%u\n", mpb->num_disks);
1172 }
1173
1174 static void detail_super_imsm(struct supertype *st, char *homehost)
1175 {
1176 struct mdinfo info;
1177 char nbuf[64];
1178
1179 getinfo_super_imsm(st, &info, NULL);
1180 fname_from_uuid(st, &info, nbuf, ':');
1181 printf("\n UUID : %s\n", nbuf + 5);
1182 }
1183
1184 static void brief_detail_super_imsm(struct supertype *st)
1185 {
1186 struct mdinfo info;
1187 char nbuf[64];
1188 getinfo_super_imsm(st, &info, NULL);
1189 fname_from_uuid(st, &info, nbuf, ':');
1190 printf(" UUID=%s", nbuf + 5);
1191 }
1192
1193 static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1194 static void fd2devname(int fd, char *name);
1195
1196 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
1197 {
1198 /* dump an unsorted list of devices attached to AHCI Intel storage
1199 * controller, as well as non-connected ports
1200 */
1201 int hba_len = strlen(hba_path) + 1;
1202 struct dirent *ent;
1203 DIR *dir;
1204 char *path = NULL;
1205 int err = 0;
1206 unsigned long port_mask = (1 << port_count) - 1;
1207
1208 if (port_count > (int)sizeof(port_mask) * 8) {
1209 if (verbose)
1210 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1211 return 2;
1212 }
1213
1214 /* scroll through /sys/dev/block looking for devices attached to
1215 * this hba
1216 */
1217 dir = opendir("/sys/dev/block");
1218 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1219 int fd;
1220 char model[64];
1221 char vendor[64];
1222 char buf[1024];
1223 int major, minor;
1224 char *device;
1225 char *c;
1226 int port;
1227 int type;
1228
1229 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1230 continue;
1231 path = devt_to_devpath(makedev(major, minor));
1232 if (!path)
1233 continue;
1234 if (!path_attached_to_hba(path, hba_path)) {
1235 free(path);
1236 path = NULL;
1237 continue;
1238 }
1239
1240 /* retrieve the scsi device type */
1241 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1242 if (verbose)
1243 fprintf(stderr, Name ": failed to allocate 'device'\n");
1244 err = 2;
1245 break;
1246 }
1247 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1248 if (load_sys(device, buf) != 0) {
1249 if (verbose)
1250 fprintf(stderr, Name ": failed to read device type for %s\n",
1251 path);
1252 err = 2;
1253 free(device);
1254 break;
1255 }
1256 type = strtoul(buf, NULL, 10);
1257
1258 /* if it's not a disk print the vendor and model */
1259 if (!(type == 0 || type == 7 || type == 14)) {
1260 vendor[0] = '\0';
1261 model[0] = '\0';
1262 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1263 if (load_sys(device, buf) == 0) {
1264 strncpy(vendor, buf, sizeof(vendor));
1265 vendor[sizeof(vendor) - 1] = '\0';
1266 c = (char *) &vendor[sizeof(vendor) - 1];
1267 while (isspace(*c) || *c == '\0')
1268 *c-- = '\0';
1269
1270 }
1271 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1272 if (load_sys(device, buf) == 0) {
1273 strncpy(model, buf, sizeof(model));
1274 model[sizeof(model) - 1] = '\0';
1275 c = (char *) &model[sizeof(model) - 1];
1276 while (isspace(*c) || *c == '\0')
1277 *c-- = '\0';
1278 }
1279
1280 if (vendor[0] && model[0])
1281 sprintf(buf, "%.64s %.64s", vendor, model);
1282 else
1283 switch (type) { /* numbers from hald/linux/device.c */
1284 case 1: sprintf(buf, "tape"); break;
1285 case 2: sprintf(buf, "printer"); break;
1286 case 3: sprintf(buf, "processor"); break;
1287 case 4:
1288 case 5: sprintf(buf, "cdrom"); break;
1289 case 6: sprintf(buf, "scanner"); break;
1290 case 8: sprintf(buf, "media_changer"); break;
1291 case 9: sprintf(buf, "comm"); break;
1292 case 12: sprintf(buf, "raid"); break;
1293 default: sprintf(buf, "unknown");
1294 }
1295 } else
1296 buf[0] = '\0';
1297 free(device);
1298
1299 /* chop device path to 'host%d' and calculate the port number */
1300 c = strchr(&path[hba_len], '/');
1301 if (!c) {
1302 if (verbose)
1303 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1304 err = 2;
1305 break;
1306 }
1307 *c = '\0';
1308 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1309 port -= host_base;
1310 else {
1311 if (verbose) {
1312 *c = '/'; /* repair the full string */
1313 fprintf(stderr, Name ": failed to determine port number for %s\n",
1314 path);
1315 }
1316 err = 2;
1317 break;
1318 }
1319
1320 /* mark this port as used */
1321 port_mask &= ~(1 << port);
1322
1323 /* print out the device information */
1324 if (buf[0]) {
1325 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1326 continue;
1327 }
1328
1329 fd = dev_open(ent->d_name, O_RDONLY);
1330 if (fd < 0)
1331 printf(" Port%d : - disk info unavailable -\n", port);
1332 else {
1333 fd2devname(fd, buf);
1334 printf(" Port%d : %s", port, buf);
1335 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1336 printf(" (%s)\n", buf);
1337 else
1338 printf("()\n");
1339 }
1340 close(fd);
1341 free(path);
1342 path = NULL;
1343 }
1344 if (path)
1345 free(path);
1346 if (dir)
1347 closedir(dir);
1348 if (err == 0) {
1349 int i;
1350
1351 for (i = 0; i < port_count; i++)
1352 if (port_mask & (1 << i))
1353 printf(" Port%d : - no device attached -\n", i);
1354 }
1355
1356 return err;
1357 }
1358
1359
1360
1361 static void print_found_intel_controllers(struct sys_dev *elem)
1362 {
1363 for (; elem; elem = elem->next) {
1364 fprintf(stderr, Name ": found Intel(R) ");
1365 if (elem->type == SYS_DEV_SATA)
1366 fprintf(stderr, "SATA ");
1367 else if (elem->type == SYS_DEV_SAS)
1368 fprintf(stderr, "SAS ");
1369 fprintf(stderr, "RAID controller");
1370 if (elem->pci_id)
1371 fprintf(stderr, " at %s", elem->pci_id);
1372 fprintf(stderr, ".\n");
1373 }
1374 fflush(stderr);
1375 }
1376
1377 static int ahci_get_port_count(const char *hba_path, int *port_count)
1378 {
1379 struct dirent *ent;
1380 DIR *dir;
1381 int host_base = -1;
1382
1383 *port_count = 0;
1384 if ((dir = opendir(hba_path)) == NULL)
1385 return -1;
1386
1387 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1388 int host;
1389
1390 if (sscanf(ent->d_name, "host%d", &host) != 1)
1391 continue;
1392 if (*port_count == 0)
1393 host_base = host;
1394 else if (host < host_base)
1395 host_base = host;
1396
1397 if (host + 1 > *port_count + host_base)
1398 *port_count = host + 1 - host_base;
1399 }
1400 closedir(dir);
1401 return host_base;
1402 }
1403
1404 static void print_imsm_capability(const struct imsm_orom *orom)
1405 {
1406 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1407 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1408 orom->hotfix_ver, orom->build);
1409 printf(" RAID Levels :%s%s%s%s%s\n",
1410 imsm_orom_has_raid0(orom) ? " raid0" : "",
1411 imsm_orom_has_raid1(orom) ? " raid1" : "",
1412 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1413 imsm_orom_has_raid10(orom) ? " raid10" : "",
1414 imsm_orom_has_raid5(orom) ? " raid5" : "");
1415 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1416 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1417 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1418 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1419 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1420 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1421 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1422 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1423 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1424 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1425 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1426 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1427 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1428 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1429 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1430 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1431 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1432 printf(" Max Disks : %d\n", orom->tds);
1433 printf(" Max Volumes : %d\n", orom->vpa);
1434 return;
1435 }
1436
1437 static int detail_platform_imsm(int verbose, int enumerate_only)
1438 {
1439 /* There are two components to imsm platform support, the ahci SATA
1440 * controller and the option-rom. To find the SATA controller we
1441 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1442 * controller with the Intel vendor id is present. This approach
1443 * allows mdadm to leverage the kernel's ahci detection logic, with the
1444 * caveat that if ahci.ko is not loaded mdadm will not be able to
1445 * detect platform raid capabilities. The option-rom resides in a
1446 * platform "Adapter ROM". We scan for its signature to retrieve the
1447 * platform capabilities. If raid support is disabled in the BIOS the
1448 * option-rom capability structure will not be available.
1449 */
1450 const struct imsm_orom *orom;
1451 struct sys_dev *list, *hba;
1452 int host_base = 0;
1453 int port_count = 0;
1454 int result=0;
1455
1456 if (enumerate_only) {
1457 if (check_env("IMSM_NO_PLATFORM"))
1458 return 0;
1459 list = find_intel_devices();
1460 if (!list)
1461 return 2;
1462 for (hba = list; hba; hba = hba->next) {
1463 orom = find_imsm_capability(hba->type);
1464 if (!orom) {
1465 result = 2;
1466 break;
1467 }
1468 }
1469 free_sys_dev(&list);
1470 return result;
1471 }
1472
1473 list = find_intel_devices();
1474 if (!list) {
1475 if (verbose)
1476 fprintf(stderr, Name ": no active Intel(R) RAID "
1477 "controller found.\n");
1478 free_sys_dev(&list);
1479 return 2;
1480 } else if (verbose)
1481 print_found_intel_controllers(list);
1482
1483 for (hba = list; hba; hba = hba->next) {
1484 orom = find_imsm_capability(hba->type);
1485 if (!orom)
1486 fprintf(stderr, Name ": imsm capabilities not found for controller: %s (type %s)\n",
1487 hba->path, get_sys_dev_type(hba->type));
1488 else
1489 print_imsm_capability(orom);
1490 }
1491
1492 for (hba = list; hba; hba = hba->next) {
1493 printf(" I/O Controller : %s (%s)\n",
1494 hba->path, get_sys_dev_type(hba->type));
1495
1496 if (hba->type == SYS_DEV_SATA) {
1497 host_base = ahci_get_port_count(hba->path, &port_count);
1498 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1499 if (verbose)
1500 fprintf(stderr, Name ": failed to enumerate "
1501 "ports on SATA controller at %s.", hba->pci_id);
1502 result |= 2;
1503 }
1504 }
1505 }
1506
1507 free_sys_dev(&list);
1508 return result;
1509 }
1510 #endif
1511
1512 static int match_home_imsm(struct supertype *st, char *homehost)
1513 {
1514 /* the imsm metadata format does not specify any host
1515 * identification information. We return -1 since we can never
1516 * confirm nor deny whether a given array is "meant" for this
1517 * host. We rely on compare_super and the 'family_num' fields to
1518 * exclude member disks that do not belong, and we rely on
1519 * mdadm.conf to specify the arrays that should be assembled.
1520 * Auto-assembly may still pick up "foreign" arrays.
1521 */
1522
1523 return -1;
1524 }
1525
1526 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1527 {
1528 /* The uuid returned here is used for:
1529 * uuid to put into bitmap file (Create, Grow)
1530 * uuid for backup header when saving critical section (Grow)
1531 * comparing uuids when re-adding a device into an array
1532 * In these cases the uuid required is that of the data-array,
1533 * not the device-set.
1534 * uuid to recognise same set when adding a missing device back
1535 * to an array. This is a uuid for the device-set.
1536 *
1537 * For each of these we can make do with a truncated
1538 * or hashed uuid rather than the original, as long as
1539 * everyone agrees.
1540 * In each case the uuid required is that of the data-array,
1541 * not the device-set.
1542 */
1543 /* imsm does not track uuid's so we synthesis one using sha1 on
1544 * - The signature (Which is constant for all imsm array, but no matter)
1545 * - the orig_family_num of the container
1546 * - the index number of the volume
1547 * - the 'serial' number of the volume.
1548 * Hopefully these are all constant.
1549 */
1550 struct intel_super *super = st->sb;
1551
1552 char buf[20];
1553 struct sha1_ctx ctx;
1554 struct imsm_dev *dev = NULL;
1555 __u32 family_num;
1556
1557 /* some mdadm versions failed to set ->orig_family_num, in which
1558 * case fall back to ->family_num. orig_family_num will be
1559 * fixed up with the first metadata update.
1560 */
1561 family_num = super->anchor->orig_family_num;
1562 if (family_num == 0)
1563 family_num = super->anchor->family_num;
1564 sha1_init_ctx(&ctx);
1565 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
1566 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
1567 if (super->current_vol >= 0)
1568 dev = get_imsm_dev(super, super->current_vol);
1569 if (dev) {
1570 __u32 vol = super->current_vol;
1571 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1572 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1573 }
1574 sha1_finish_ctx(&ctx, buf);
1575 memcpy(uuid, buf, 4*4);
1576 }
1577
1578 #if 0
1579 static void
1580 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
1581 {
1582 __u8 *v = get_imsm_version(mpb);
1583 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1584 char major[] = { 0, 0, 0 };
1585 char minor[] = { 0 ,0, 0 };
1586 char patch[] = { 0, 0, 0 };
1587 char *ver_parse[] = { major, minor, patch };
1588 int i, j;
1589
1590 i = j = 0;
1591 while (*v != '\0' && v < end) {
1592 if (*v != '.' && j < 2)
1593 ver_parse[i][j++] = *v;
1594 else {
1595 i++;
1596 j = 0;
1597 }
1598 v++;
1599 }
1600
1601 *m = strtol(minor, NULL, 0);
1602 *p = strtol(patch, NULL, 0);
1603 }
1604 #endif
1605
1606 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1607 {
1608 /* migr_strip_size when repairing or initializing parity */
1609 struct imsm_map *map = get_imsm_map(dev, 0);
1610 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1611
1612 switch (get_imsm_raid_level(map)) {
1613 case 5:
1614 case 10:
1615 return chunk;
1616 default:
1617 return 128*1024 >> 9;
1618 }
1619 }
1620
1621 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1622 {
1623 /* migr_strip_size when rebuilding a degraded disk, no idea why
1624 * this is different than migr_strip_size_resync(), but it's good
1625 * to be compatible
1626 */
1627 struct imsm_map *map = get_imsm_map(dev, 1);
1628 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1629
1630 switch (get_imsm_raid_level(map)) {
1631 case 1:
1632 case 10:
1633 if (map->num_members % map->num_domains == 0)
1634 return 128*1024 >> 9;
1635 else
1636 return chunk;
1637 case 5:
1638 return max((__u32) 64*1024 >> 9, chunk);
1639 default:
1640 return 128*1024 >> 9;
1641 }
1642 }
1643
1644 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1645 {
1646 struct imsm_map *lo = get_imsm_map(dev, 0);
1647 struct imsm_map *hi = get_imsm_map(dev, 1);
1648 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1649 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1650
1651 return max((__u32) 1, hi_chunk / lo_chunk);
1652 }
1653
1654 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1655 {
1656 struct imsm_map *lo = get_imsm_map(dev, 0);
1657 int level = get_imsm_raid_level(lo);
1658
1659 if (level == 1 || level == 10) {
1660 struct imsm_map *hi = get_imsm_map(dev, 1);
1661
1662 return hi->num_domains;
1663 } else
1664 return num_stripes_per_unit_resync(dev);
1665 }
1666
1667 static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1668 {
1669 /* named 'imsm_' because raid0, raid1 and raid10
1670 * counter-intuitively have the same number of data disks
1671 */
1672 struct imsm_map *map = get_imsm_map(dev, second_map);
1673
1674 switch (get_imsm_raid_level(map)) {
1675 case 0:
1676 case 1:
1677 case 10:
1678 return map->num_members;
1679 case 5:
1680 return map->num_members - 1;
1681 default:
1682 dprintf("%s: unsupported raid level\n", __func__);
1683 return 0;
1684 }
1685 }
1686
1687 static __u32 parity_segment_depth(struct imsm_dev *dev)
1688 {
1689 struct imsm_map *map = get_imsm_map(dev, 0);
1690 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1691
1692 switch(get_imsm_raid_level(map)) {
1693 case 1:
1694 case 10:
1695 return chunk * map->num_domains;
1696 case 5:
1697 return chunk * map->num_members;
1698 default:
1699 return chunk;
1700 }
1701 }
1702
1703 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1704 {
1705 struct imsm_map *map = get_imsm_map(dev, 1);
1706 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1707 __u32 strip = block / chunk;
1708
1709 switch (get_imsm_raid_level(map)) {
1710 case 1:
1711 case 10: {
1712 __u32 vol_strip = (strip * map->num_domains) + 1;
1713 __u32 vol_stripe = vol_strip / map->num_members;
1714
1715 return vol_stripe * chunk + block % chunk;
1716 } case 5: {
1717 __u32 stripe = strip / (map->num_members - 1);
1718
1719 return stripe * chunk + block % chunk;
1720 }
1721 default:
1722 return 0;
1723 }
1724 }
1725
1726 static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
1727 {
1728 /* calculate the conversion factor between per member 'blocks'
1729 * (md/{resync,rebuild}_start) and imsm migration units, return
1730 * 0 for the 'not migrating' and 'unsupported migration' cases
1731 */
1732 if (!dev->vol.migr_state)
1733 return 0;
1734
1735 switch (migr_type(dev)) {
1736 case MIGR_GEN_MIGR:
1737 case MIGR_VERIFY:
1738 case MIGR_REPAIR:
1739 case MIGR_INIT: {
1740 struct imsm_map *map = get_imsm_map(dev, 0);
1741 __u32 stripes_per_unit;
1742 __u32 blocks_per_unit;
1743 __u32 parity_depth;
1744 __u32 migr_chunk;
1745 __u32 block_map;
1746 __u32 block_rel;
1747 __u32 segment;
1748 __u32 stripe;
1749 __u8 disks;
1750
1751 /* yes, this is really the translation of migr_units to
1752 * per-member blocks in the 'resync' case
1753 */
1754 stripes_per_unit = num_stripes_per_unit_resync(dev);
1755 migr_chunk = migr_strip_blocks_resync(dev);
1756 disks = imsm_num_data_members(dev, 0);
1757 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1758 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1759 segment = blocks_per_unit / stripe;
1760 block_rel = blocks_per_unit - segment * stripe;
1761 parity_depth = parity_segment_depth(dev);
1762 block_map = map_migr_block(dev, block_rel);
1763 return block_map + parity_depth * segment;
1764 }
1765 case MIGR_REBUILD: {
1766 __u32 stripes_per_unit;
1767 __u32 migr_chunk;
1768
1769 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1770 migr_chunk = migr_strip_blocks_rebuild(dev);
1771 return migr_chunk * stripes_per_unit;
1772 }
1773 case MIGR_STATE_CHANGE:
1774 default:
1775 return 0;
1776 }
1777 }
1778
1779 static int imsm_level_to_layout(int level)
1780 {
1781 switch (level) {
1782 case 0:
1783 case 1:
1784 return 0;
1785 case 5:
1786 case 6:
1787 return ALGORITHM_LEFT_ASYMMETRIC;
1788 case 10:
1789 return 0x102;
1790 }
1791 return UnSet;
1792 }
1793
1794 /*******************************************************************************
1795 * Function: read_imsm_migr_rec
1796 * Description: Function reads imsm migration record from last sector of disk
1797 * Parameters:
1798 * fd : disk descriptor
1799 * super : metadata info
1800 * Returns:
1801 * 0 : success,
1802 * -1 : fail
1803 ******************************************************************************/
1804 static int read_imsm_migr_rec(int fd, struct intel_super *super)
1805 {
1806 int ret_val = -1;
1807 unsigned long long dsize;
1808
1809 get_dev_size(fd, NULL, &dsize);
1810 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1811 fprintf(stderr,
1812 Name ": Cannot seek to anchor block: %s\n",
1813 strerror(errno));
1814 goto out;
1815 }
1816 if (read(fd, super->migr_rec_buf, 512) != 512) {
1817 fprintf(stderr,
1818 Name ": Cannot read migr record block: %s\n",
1819 strerror(errno));
1820 goto out;
1821 }
1822 ret_val = 0;
1823
1824 out:
1825 return ret_val;
1826 }
1827
1828 /*******************************************************************************
1829 * Function: load_imsm_migr_rec
1830 * Description: Function reads imsm migration record (it is stored at the last
1831 * sector of disk)
1832 * Parameters:
1833 * super : imsm internal array info
1834 * info : general array info
1835 * Returns:
1836 * 0 : success
1837 * -1 : fail
1838 ******************************************************************************/
1839 static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
1840 {
1841 struct mdinfo *sd;
1842 struct dl *dl = NULL;
1843 char nm[30];
1844 int retval = -1;
1845 int fd = -1;
1846
1847 if (info) {
1848 for (sd = info->devs ; sd ; sd = sd->next) {
1849 /* read only from one of the first two slots */
1850 if ((sd->disk.raid_disk > 1) ||
1851 (sd->disk.raid_disk < 0))
1852 continue;
1853 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1854 fd = dev_open(nm, O_RDONLY);
1855 if (fd >= 0)
1856 break;
1857 }
1858 }
1859 if (fd < 0) {
1860 for (dl = super->disks; dl; dl = dl->next) {
1861 /* read only from one of the first two slots */
1862 if (dl->index > 1)
1863 continue;
1864 sprintf(nm, "%d:%d", dl->major, dl->minor);
1865 fd = dev_open(nm, O_RDONLY);
1866 if (fd >= 0)
1867 break;
1868 }
1869 }
1870 if (fd < 0)
1871 goto out;
1872 retval = read_imsm_migr_rec(fd, super);
1873
1874 out:
1875 if (fd >= 0)
1876 close(fd);
1877 return retval;
1878 }
1879
1880 /*******************************************************************************
1881 * Function: write_imsm_migr_rec
1882 * Description: Function writes imsm migration record
1883 * (at the last sector of disk)
1884 * Parameters:
1885 * super : imsm internal array info
1886 * Returns:
1887 * 0 : success
1888 * -1 : if fail
1889 ******************************************************************************/
1890 static int write_imsm_migr_rec(struct supertype *st)
1891 {
1892 struct intel_super *super = st->sb;
1893 unsigned long long dsize;
1894 char nm[30];
1895 int fd = -1;
1896 int retval = -1;
1897 struct dl *sd;
1898
1899 for (sd = super->disks ; sd ; sd = sd->next) {
1900 /* write to 2 first slots only */
1901 if ((sd->index < 0) || (sd->index > 1))
1902 continue;
1903 sprintf(nm, "%d:%d", sd->major, sd->minor);
1904 fd = dev_open(nm, O_RDWR);
1905 if (fd < 0)
1906 continue;
1907 get_dev_size(fd, NULL, &dsize);
1908 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1909 fprintf(stderr,
1910 Name ": Cannot seek to anchor block: %s\n",
1911 strerror(errno));
1912 goto out;
1913 }
1914 if (write(fd, super->migr_rec_buf, 512) != 512) {
1915 fprintf(stderr,
1916 Name ": Cannot write migr record block: %s\n",
1917 strerror(errno));
1918 goto out;
1919 }
1920 close(fd);
1921 fd = -1;
1922 }
1923
1924 retval = 0;
1925 out:
1926 if (fd >= 0)
1927 close(fd);
1928 return retval;
1929 }
1930
1931 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
1932 {
1933 struct intel_super *super = st->sb;
1934 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
1935 struct imsm_map *map = get_imsm_map(dev, 0);
1936 struct imsm_map *prev_map = get_imsm_map(dev, 1);
1937 struct imsm_map *map_to_analyse = map;
1938 struct dl *dl;
1939 char *devname;
1940 unsigned int component_size_alligment;
1941 int map_disks = info->array.raid_disks;
1942
1943 memset(info, 0, sizeof(*info));
1944 if (prev_map)
1945 map_to_analyse = prev_map;
1946
1947 for (dl = super->disks; dl; dl = dl->next)
1948 if (dl->raiddisk == info->disk.raid_disk)
1949 break;
1950 info->container_member = super->current_vol;
1951 info->array.raid_disks = map->num_members;
1952 info->array.level = get_imsm_raid_level(map_to_analyse);
1953 info->array.layout = imsm_level_to_layout(info->array.level);
1954 info->array.md_minor = -1;
1955 info->array.ctime = 0;
1956 info->array.utime = 0;
1957 info->array.chunk_size =
1958 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
1959 info->array.state = !dev->vol.dirty;
1960 info->custom_array_size = __le32_to_cpu(dev->size_high);
1961 info->custom_array_size <<= 32;
1962 info->custom_array_size |= __le32_to_cpu(dev->size_low);
1963 if (prev_map && map->map_state == prev_map->map_state) {
1964 info->reshape_active = 1;
1965 info->new_level = get_imsm_raid_level(map);
1966 info->new_layout = imsm_level_to_layout(info->new_level);
1967 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
1968 info->delta_disks = map->num_members - prev_map->num_members;
1969 if (info->delta_disks) {
1970 /* this needs to be applied to every array
1971 * in the container.
1972 */
1973 info->reshape_active = 2;
1974 }
1975 /* We shape information that we give to md might have to be
1976 * modify to cope with md's requirement for reshaping arrays.
1977 * For example, when reshaping a RAID0, md requires it to be
1978 * presented as a degraded RAID4.
1979 * Also if a RAID0 is migrating to a RAID5 we need to specify
1980 * the array as already being RAID5, but the 'before' layout
1981 * is a RAID4-like layout.
1982 */
1983 switch (info->array.level) {
1984 case 0:
1985 switch(info->new_level) {
1986 case 0:
1987 /* conversion is happening as RAID4 */
1988 info->array.level = 4;
1989 info->array.raid_disks += 1;
1990 break;
1991 case 5:
1992 /* conversion is happening as RAID5 */
1993 info->array.level = 5;
1994 info->array.layout = ALGORITHM_PARITY_N;
1995 info->array.raid_disks += 1;
1996 info->delta_disks -= 1;
1997 break;
1998 default:
1999 /* FIXME error message */
2000 info->array.level = UnSet;
2001 break;
2002 }
2003 break;
2004 }
2005 } else {
2006 info->new_level = UnSet;
2007 info->new_layout = UnSet;
2008 info->new_chunk = info->array.chunk_size;
2009 info->delta_disks = 0;
2010 }
2011 info->disk.major = 0;
2012 info->disk.minor = 0;
2013 if (dl) {
2014 info->disk.major = dl->major;
2015 info->disk.minor = dl->minor;
2016 }
2017
2018 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
2019 info->component_size =
2020 __le32_to_cpu(map_to_analyse->blocks_per_member);
2021
2022 /* check component size aligment
2023 */
2024 component_size_alligment =
2025 info->component_size % (info->array.chunk_size/512);
2026
2027 if (component_size_alligment &&
2028 (info->array.level != 1) && (info->array.level != UnSet)) {
2029 dprintf("imsm: reported component size alligned from %llu ",
2030 info->component_size);
2031 info->component_size -= component_size_alligment;
2032 dprintf("to %llu (%i).\n",
2033 info->component_size, component_size_alligment);
2034 }
2035
2036 memset(info->uuid, 0, sizeof(info->uuid));
2037 info->recovery_start = MaxSector;
2038
2039 info->reshape_progress = 0;
2040 info->resync_start = MaxSector;
2041 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
2042 dev->vol.dirty) {
2043 info->resync_start = 0;
2044 }
2045 if (dev->vol.migr_state) {
2046 switch (migr_type(dev)) {
2047 case MIGR_REPAIR:
2048 case MIGR_INIT: {
2049 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
2050 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
2051
2052 info->resync_start = blocks_per_unit * units;
2053 break;
2054 }
2055 case MIGR_GEN_MIGR: {
2056 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
2057 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
2058 unsigned long long array_blocks;
2059 int used_disks;
2060
2061 info->reshape_progress = blocks_per_unit * units;
2062
2063 dprintf("IMSM: General Migration checkpoint : %llu "
2064 "(%llu) -> read reshape progress : %llu\n",
2065 units, blocks_per_unit, info->reshape_progress);
2066
2067 used_disks = imsm_num_data_members(dev, 1);
2068 if (used_disks > 0) {
2069 array_blocks = map->blocks_per_member *
2070 used_disks;
2071 /* round array size down to closest MB
2072 */
2073 info->custom_array_size = (array_blocks
2074 >> SECT_PER_MB_SHIFT)
2075 << SECT_PER_MB_SHIFT;
2076 }
2077 }
2078 case MIGR_VERIFY:
2079 /* we could emulate the checkpointing of
2080 * 'sync_action=check' migrations, but for now
2081 * we just immediately complete them
2082 */
2083 case MIGR_REBUILD:
2084 /* this is handled by container_content_imsm() */
2085 case MIGR_STATE_CHANGE:
2086 /* FIXME handle other migrations */
2087 default:
2088 /* we are not dirty, so... */
2089 info->resync_start = MaxSector;
2090 }
2091 }
2092
2093 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
2094 info->name[MAX_RAID_SERIAL_LEN] = 0;
2095
2096 info->array.major_version = -1;
2097 info->array.minor_version = -2;
2098 devname = devnum2devname(st->container_dev);
2099 *info->text_version = '\0';
2100 if (devname)
2101 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
2102 free(devname);
2103 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
2104 uuid_from_super_imsm(st, info->uuid);
2105
2106 if (dmap) {
2107 int i, j;
2108 for (i=0; i<map_disks; i++) {
2109 dmap[i] = 0;
2110 if (i < info->array.raid_disks) {
2111 struct imsm_disk *dsk;
2112 j = get_imsm_disk_idx(dev, i, -1);
2113 dsk = get_imsm_disk(super, j);
2114 if (dsk && (dsk->status & CONFIGURED_DISK))
2115 dmap[i] = 1;
2116 }
2117 }
2118 }
2119 }
2120
2121 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
2122 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
2123
2124 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
2125 {
2126 struct dl *d;
2127
2128 for (d = super->missing; d; d = d->next)
2129 if (d->index == index)
2130 return &d->disk;
2131 return NULL;
2132 }
2133
2134 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
2135 {
2136 struct intel_super *super = st->sb;
2137 struct imsm_disk *disk;
2138 int map_disks = info->array.raid_disks;
2139 int max_enough = -1;
2140 int i;
2141 struct imsm_super *mpb;
2142
2143 if (super->current_vol >= 0) {
2144 getinfo_super_imsm_volume(st, info, map);
2145 return;
2146 }
2147 memset(info, 0, sizeof(*info));
2148
2149 /* Set raid_disks to zero so that Assemble will always pull in valid
2150 * spares
2151 */
2152 info->array.raid_disks = 0;
2153 info->array.level = LEVEL_CONTAINER;
2154 info->array.layout = 0;
2155 info->array.md_minor = -1;
2156 info->array.ctime = 0; /* N/A for imsm */
2157 info->array.utime = 0;
2158 info->array.chunk_size = 0;
2159
2160 info->disk.major = 0;
2161 info->disk.minor = 0;
2162 info->disk.raid_disk = -1;
2163 info->reshape_active = 0;
2164 info->array.major_version = -1;
2165 info->array.minor_version = -2;
2166 strcpy(info->text_version, "imsm");
2167 info->safe_mode_delay = 0;
2168 info->disk.number = -1;
2169 info->disk.state = 0;
2170 info->name[0] = 0;
2171 info->recovery_start = MaxSector;
2172
2173 /* do we have the all the insync disks that we expect? */
2174 mpb = super->anchor;
2175
2176 for (i = 0; i < mpb->num_raid_devs; i++) {
2177 struct imsm_dev *dev = get_imsm_dev(super, i);
2178 int failed, enough, j, missing = 0;
2179 struct imsm_map *map;
2180 __u8 state;
2181
2182 failed = imsm_count_failed(super, dev);
2183 state = imsm_check_degraded(super, dev, failed);
2184 map = get_imsm_map(dev, dev->vol.migr_state);
2185
2186 /* any newly missing disks?
2187 * (catches single-degraded vs double-degraded)
2188 */
2189 for (j = 0; j < map->num_members; j++) {
2190 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
2191 __u32 idx = ord_to_idx(ord);
2192
2193 if (!(ord & IMSM_ORD_REBUILD) &&
2194 get_imsm_missing(super, idx)) {
2195 missing = 1;
2196 break;
2197 }
2198 }
2199
2200 if (state == IMSM_T_STATE_FAILED)
2201 enough = -1;
2202 else if (state == IMSM_T_STATE_DEGRADED &&
2203 (state != map->map_state || missing))
2204 enough = 0;
2205 else /* we're normal, or already degraded */
2206 enough = 1;
2207
2208 /* in the missing/failed disk case check to see
2209 * if at least one array is runnable
2210 */
2211 max_enough = max(max_enough, enough);
2212 }
2213 dprintf("%s: enough: %d\n", __func__, max_enough);
2214 info->container_enough = max_enough;
2215
2216 if (super->disks) {
2217 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2218
2219 disk = &super->disks->disk;
2220 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
2221 info->component_size = reserved;
2222 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
2223 /* we don't change info->disk.raid_disk here because
2224 * this state will be finalized in mdmon after we have
2225 * found the 'most fresh' version of the metadata
2226 */
2227 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2228 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2229 }
2230
2231 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2232 * ->compare_super may have updated the 'num_raid_devs' field for spares
2233 */
2234 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
2235 uuid_from_super_imsm(st, info->uuid);
2236 else
2237 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
2238
2239 /* I don't know how to compute 'map' on imsm, so use safe default */
2240 if (map) {
2241 int i;
2242 for (i = 0; i < map_disks; i++)
2243 map[i] = 1;
2244 }
2245
2246 }
2247
2248 /* allocates memory and fills disk in mdinfo structure
2249 * for each disk in array */
2250 struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2251 {
2252 struct mdinfo *mddev = NULL;
2253 struct intel_super *super = st->sb;
2254 struct imsm_disk *disk;
2255 int count = 0;
2256 struct dl *dl;
2257 if (!super || !super->disks)
2258 return NULL;
2259 dl = super->disks;
2260 mddev = malloc(sizeof(*mddev));
2261 if (!mddev) {
2262 fprintf(stderr, Name ": Failed to allocate memory.\n");
2263 return NULL;
2264 }
2265 memset(mddev, 0, sizeof(*mddev));
2266 while (dl) {
2267 struct mdinfo *tmp;
2268 disk = &dl->disk;
2269 tmp = malloc(sizeof(*tmp));
2270 if (!tmp) {
2271 fprintf(stderr, Name ": Failed to allocate memory.\n");
2272 if (mddev)
2273 sysfs_free(mddev);
2274 return NULL;
2275 }
2276 memset(tmp, 0, sizeof(*tmp));
2277 if (mddev->devs)
2278 tmp->next = mddev->devs;
2279 mddev->devs = tmp;
2280 tmp->disk.number = count++;
2281 tmp->disk.major = dl->major;
2282 tmp->disk.minor = dl->minor;
2283 tmp->disk.state = is_configured(disk) ?
2284 (1 << MD_DISK_ACTIVE) : 0;
2285 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2286 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2287 tmp->disk.raid_disk = -1;
2288 dl = dl->next;
2289 }
2290 return mddev;
2291 }
2292
2293 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2294 char *update, char *devname, int verbose,
2295 int uuid_set, char *homehost)
2296 {
2297 /* For 'assemble' and 'force' we need to return non-zero if any
2298 * change was made. For others, the return value is ignored.
2299 * Update options are:
2300 * force-one : This device looks a bit old but needs to be included,
2301 * update age info appropriately.
2302 * assemble: clear any 'faulty' flag to allow this device to
2303 * be assembled.
2304 * force-array: Array is degraded but being forced, mark it clean
2305 * if that will be needed to assemble it.
2306 *
2307 * newdev: not used ????
2308 * grow: Array has gained a new device - this is currently for
2309 * linear only
2310 * resync: mark as dirty so a resync will happen.
2311 * name: update the name - preserving the homehost
2312 * uuid: Change the uuid of the array to match watch is given
2313 *
2314 * Following are not relevant for this imsm:
2315 * sparc2.2 : update from old dodgey metadata
2316 * super-minor: change the preferred_minor number
2317 * summaries: update redundant counters.
2318 * homehost: update the recorded homehost
2319 * _reshape_progress: record new reshape_progress position.
2320 */
2321 int rv = 1;
2322 struct intel_super *super = st->sb;
2323 struct imsm_super *mpb;
2324
2325 /* we can only update container info */
2326 if (!super || super->current_vol >= 0 || !super->anchor)
2327 return 1;
2328
2329 mpb = super->anchor;
2330
2331 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
2332 rv = -1;
2333 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2334 mpb->orig_family_num = *((__u32 *) info->update_private);
2335 rv = 0;
2336 } else if (strcmp(update, "uuid") == 0) {
2337 __u32 *new_family = malloc(sizeof(*new_family));
2338
2339 /* update orig_family_number with the incoming random
2340 * data, report the new effective uuid, and store the
2341 * new orig_family_num for future updates.
2342 */
2343 if (new_family) {
2344 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2345 uuid_from_super_imsm(st, info->uuid);
2346 *new_family = mpb->orig_family_num;
2347 info->update_private = new_family;
2348 rv = 0;
2349 }
2350 } else if (strcmp(update, "assemble") == 0)
2351 rv = 0;
2352 else
2353 rv = -1;
2354
2355 /* successful update? recompute checksum */
2356 if (rv == 0)
2357 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
2358
2359 return rv;
2360 }
2361
2362 static size_t disks_to_mpb_size(int disks)
2363 {
2364 size_t size;
2365
2366 size = sizeof(struct imsm_super);
2367 size += (disks - 1) * sizeof(struct imsm_disk);
2368 size += 2 * sizeof(struct imsm_dev);
2369 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2370 size += (4 - 2) * sizeof(struct imsm_map);
2371 /* 4 possible disk_ord_tbl's */
2372 size += 4 * (disks - 1) * sizeof(__u32);
2373
2374 return size;
2375 }
2376
2377 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2378 {
2379 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2380 return 0;
2381
2382 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
2383 }
2384
2385 static void free_devlist(struct intel_super *super)
2386 {
2387 struct intel_dev *dv;
2388
2389 while (super->devlist) {
2390 dv = super->devlist->next;
2391 free(super->devlist->dev);
2392 free(super->devlist);
2393 super->devlist = dv;
2394 }
2395 }
2396
2397 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2398 {
2399 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2400 }
2401
2402 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2403 {
2404 /*
2405 * return:
2406 * 0 same, or first was empty, and second was copied
2407 * 1 second had wrong number
2408 * 2 wrong uuid
2409 * 3 wrong other info
2410 */
2411 struct intel_super *first = st->sb;
2412 struct intel_super *sec = tst->sb;
2413
2414 if (!first) {
2415 st->sb = tst->sb;
2416 tst->sb = NULL;
2417 return 0;
2418 }
2419 /* in platform dependent environment test if the disks
2420 * use the same Intel hba
2421 */
2422 if (!check_env("IMSM_NO_PLATFORM")) {
2423 if (!first->hba || !sec->hba ||
2424 (first->hba->type != sec->hba->type)) {
2425 fprintf(stderr,
2426 "HBAs of devices does not match %s != %s\n",
2427 first->hba ? get_sys_dev_type(first->hba->type) : NULL,
2428 sec->hba ? get_sys_dev_type(sec->hba->type) : NULL);
2429 return 3;
2430 }
2431 }
2432
2433 /* if an anchor does not have num_raid_devs set then it is a free
2434 * floating spare
2435 */
2436 if (first->anchor->num_raid_devs > 0 &&
2437 sec->anchor->num_raid_devs > 0) {
2438 /* Determine if these disks might ever have been
2439 * related. Further disambiguation can only take place
2440 * in load_super_imsm_all
2441 */
2442 __u32 first_family = first->anchor->orig_family_num;
2443 __u32 sec_family = sec->anchor->orig_family_num;
2444
2445 if (memcmp(first->anchor->sig, sec->anchor->sig,
2446 MAX_SIGNATURE_LENGTH) != 0)
2447 return 3;
2448
2449 if (first_family == 0)
2450 first_family = first->anchor->family_num;
2451 if (sec_family == 0)
2452 sec_family = sec->anchor->family_num;
2453
2454 if (first_family != sec_family)
2455 return 3;
2456
2457 }
2458
2459
2460 /* if 'first' is a spare promote it to a populated mpb with sec's
2461 * family number
2462 */
2463 if (first->anchor->num_raid_devs == 0 &&
2464 sec->anchor->num_raid_devs > 0) {
2465 int i;
2466 struct intel_dev *dv;
2467 struct imsm_dev *dev;
2468
2469 /* we need to copy raid device info from sec if an allocation
2470 * fails here we don't associate the spare
2471 */
2472 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
2473 dv = malloc(sizeof(*dv));
2474 if (!dv)
2475 break;
2476 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2477 if (!dev) {
2478 free(dv);
2479 break;
2480 }
2481 dv->dev = dev;
2482 dv->index = i;
2483 dv->next = first->devlist;
2484 first->devlist = dv;
2485 }
2486 if (i < sec->anchor->num_raid_devs) {
2487 /* allocation failure */
2488 free_devlist(first);
2489 fprintf(stderr, "imsm: failed to associate spare\n");
2490 return 3;
2491 }
2492 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
2493 first->anchor->orig_family_num = sec->anchor->orig_family_num;
2494 first->anchor->family_num = sec->anchor->family_num;
2495 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
2496 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2497 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
2498 }
2499
2500 return 0;
2501 }
2502
2503 static void fd2devname(int fd, char *name)
2504 {
2505 struct stat st;
2506 char path[256];
2507 char dname[PATH_MAX];
2508 char *nm;
2509 int rv;
2510
2511 name[0] = '\0';
2512 if (fstat(fd, &st) != 0)
2513 return;
2514 sprintf(path, "/sys/dev/block/%d:%d",
2515 major(st.st_rdev), minor(st.st_rdev));
2516
2517 rv = readlink(path, dname, sizeof(dname));
2518 if (rv <= 0)
2519 return;
2520
2521 dname[rv] = '\0';
2522 nm = strrchr(dname, '/');
2523 nm++;
2524 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2525 }
2526
2527 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2528
2529 static int imsm_read_serial(int fd, char *devname,
2530 __u8 serial[MAX_RAID_SERIAL_LEN])
2531 {
2532 unsigned char scsi_serial[255];
2533 int rv;
2534 int rsp_len;
2535 int len;
2536 char *dest;
2537 char *src;
2538 char *rsp_buf;
2539 int i;
2540
2541 memset(scsi_serial, 0, sizeof(scsi_serial));
2542
2543 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2544
2545 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
2546 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2547 fd2devname(fd, (char *) serial);
2548 return 0;
2549 }
2550
2551 if (rv != 0) {
2552 if (devname)
2553 fprintf(stderr,
2554 Name ": Failed to retrieve serial for %s\n",
2555 devname);
2556 return rv;
2557 }
2558
2559 rsp_len = scsi_serial[3];
2560 if (!rsp_len) {
2561 if (devname)
2562 fprintf(stderr,
2563 Name ": Failed to retrieve serial for %s\n",
2564 devname);
2565 return 2;
2566 }
2567 rsp_buf = (char *) &scsi_serial[4];
2568
2569 /* trim all whitespace and non-printable characters and convert
2570 * ':' to ';'
2571 */
2572 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2573 src = &rsp_buf[i];
2574 if (*src > 0x20) {
2575 /* ':' is reserved for use in placeholder serial
2576 * numbers for missing disks
2577 */
2578 if (*src == ':')
2579 *dest++ = ';';
2580 else
2581 *dest++ = *src;
2582 }
2583 }
2584 len = dest - rsp_buf;
2585 dest = rsp_buf;
2586
2587 /* truncate leading characters */
2588 if (len > MAX_RAID_SERIAL_LEN) {
2589 dest += len - MAX_RAID_SERIAL_LEN;
2590 len = MAX_RAID_SERIAL_LEN;
2591 }
2592
2593 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2594 memcpy(serial, dest, len);
2595
2596 return 0;
2597 }
2598
2599 static int serialcmp(__u8 *s1, __u8 *s2)
2600 {
2601 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2602 }
2603
2604 static void serialcpy(__u8 *dest, __u8 *src)
2605 {
2606 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2607 }
2608
2609 #ifndef MDASSEMBLE
2610 static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2611 {
2612 struct dl *dl;
2613
2614 for (dl = super->disks; dl; dl = dl->next)
2615 if (serialcmp(dl->serial, serial) == 0)
2616 break;
2617
2618 return dl;
2619 }
2620 #endif
2621
2622 static struct imsm_disk *
2623 __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2624 {
2625 int i;
2626
2627 for (i = 0; i < mpb->num_disks; i++) {
2628 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2629
2630 if (serialcmp(disk->serial, serial) == 0) {
2631 if (idx)
2632 *idx = i;
2633 return disk;
2634 }
2635 }
2636
2637 return NULL;
2638 }
2639
2640 static int
2641 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2642 {
2643 struct imsm_disk *disk;
2644 struct dl *dl;
2645 struct stat stb;
2646 int rv;
2647 char name[40];
2648 __u8 serial[MAX_RAID_SERIAL_LEN];
2649
2650 rv = imsm_read_serial(fd, devname, serial);
2651
2652 if (rv != 0)
2653 return 2;
2654
2655 dl = calloc(1, sizeof(*dl));
2656 if (!dl) {
2657 if (devname)
2658 fprintf(stderr,
2659 Name ": failed to allocate disk buffer for %s\n",
2660 devname);
2661 return 2;
2662 }
2663
2664 fstat(fd, &stb);
2665 dl->major = major(stb.st_rdev);
2666 dl->minor = minor(stb.st_rdev);
2667 dl->next = super->disks;
2668 dl->fd = keep_fd ? fd : -1;
2669 assert(super->disks == NULL);
2670 super->disks = dl;
2671 serialcpy(dl->serial, serial);
2672 dl->index = -2;
2673 dl->e = NULL;
2674 fd2devname(fd, name);
2675 if (devname)
2676 dl->devname = strdup(devname);
2677 else
2678 dl->devname = strdup(name);
2679
2680 /* look up this disk's index in the current anchor */
2681 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2682 if (disk) {
2683 dl->disk = *disk;
2684 /* only set index on disks that are a member of a
2685 * populated contianer, i.e. one with raid_devs
2686 */
2687 if (is_failed(&dl->disk))
2688 dl->index = -2;
2689 else if (is_spare(&dl->disk))
2690 dl->index = -1;
2691 }
2692
2693 return 0;
2694 }
2695
2696 #ifndef MDASSEMBLE
2697 /* When migrating map0 contains the 'destination' state while map1
2698 * contains the current state. When not migrating map0 contains the
2699 * current state. This routine assumes that map[0].map_state is set to
2700 * the current array state before being called.
2701 *
2702 * Migration is indicated by one of the following states
2703 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2704 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2705 * map1state=unitialized)
2706 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2707 * map1state=normal)
2708 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2709 * map1state=degraded)
2710 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2711 * map1state=normal)
2712 */
2713 static void migrate(struct imsm_dev *dev, struct intel_super *super,
2714 __u8 to_state, int migr_type)
2715 {
2716 struct imsm_map *dest;
2717 struct imsm_map *src = get_imsm_map(dev, 0);
2718
2719 dev->vol.migr_state = 1;
2720 set_migr_type(dev, migr_type);
2721 dev->vol.curr_migr_unit = 0;
2722 dest = get_imsm_map(dev, 1);
2723
2724 /* duplicate and then set the target end state in map[0] */
2725 memcpy(dest, src, sizeof_imsm_map(src));
2726 if ((migr_type == MIGR_REBUILD) ||
2727 (migr_type == MIGR_GEN_MIGR)) {
2728 __u32 ord;
2729 int i;
2730
2731 for (i = 0; i < src->num_members; i++) {
2732 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2733 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2734 }
2735 }
2736
2737 if (migr_type == MIGR_GEN_MIGR)
2738 /* Clear migration record */
2739 memset(super->migr_rec, 0, sizeof(struct migr_record));
2740
2741 src->map_state = to_state;
2742 }
2743
2744 static void end_migration(struct imsm_dev *dev, __u8 map_state)
2745 {
2746 struct imsm_map *map = get_imsm_map(dev, 0);
2747 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
2748 int i, j;
2749
2750 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2751 * completed in the last migration.
2752 *
2753 * FIXME add support for raid-level-migration
2754 */
2755 for (i = 0; i < prev->num_members; i++)
2756 for (j = 0; j < map->num_members; j++)
2757 /* during online capacity expansion
2758 * disks position can be changed if takeover is used
2759 */
2760 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2761 ord_to_idx(prev->disk_ord_tbl[i])) {
2762 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2763 break;
2764 }
2765
2766 dev->vol.migr_state = 0;
2767 dev->vol.migr_type = 0;
2768 dev->vol.curr_migr_unit = 0;
2769 map->map_state = map_state;
2770 }
2771 #endif
2772
2773 static int parse_raid_devices(struct intel_super *super)
2774 {
2775 int i;
2776 struct imsm_dev *dev_new;
2777 size_t len, len_migr;
2778 size_t max_len = 0;
2779 size_t space_needed = 0;
2780 struct imsm_super *mpb = super->anchor;
2781
2782 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2783 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2784 struct intel_dev *dv;
2785
2786 len = sizeof_imsm_dev(dev_iter, 0);
2787 len_migr = sizeof_imsm_dev(dev_iter, 1);
2788 if (len_migr > len)
2789 space_needed += len_migr - len;
2790
2791 dv = malloc(sizeof(*dv));
2792 if (!dv)
2793 return 1;
2794 if (max_len < len_migr)
2795 max_len = len_migr;
2796 if (max_len > len_migr)
2797 space_needed += max_len - len_migr;
2798 dev_new = malloc(max_len);
2799 if (!dev_new) {
2800 free(dv);
2801 return 1;
2802 }
2803 imsm_copy_dev(dev_new, dev_iter);
2804 dv->dev = dev_new;
2805 dv->index = i;
2806 dv->next = super->devlist;
2807 super->devlist = dv;
2808 }
2809
2810 /* ensure that super->buf is large enough when all raid devices
2811 * are migrating
2812 */
2813 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2814 void *buf;
2815
2816 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2817 if (posix_memalign(&buf, 512, len) != 0)
2818 return 1;
2819
2820 memcpy(buf, super->buf, super->len);
2821 memset(buf + super->len, 0, len - super->len);
2822 free(super->buf);
2823 super->buf = buf;
2824 super->len = len;
2825 }
2826
2827 return 0;
2828 }
2829
2830 /* retrieve a pointer to the bbm log which starts after all raid devices */
2831 struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2832 {
2833 void *ptr = NULL;
2834
2835 if (__le32_to_cpu(mpb->bbm_log_size)) {
2836 ptr = mpb;
2837 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2838 }
2839
2840 return ptr;
2841 }
2842
2843 /*******************************************************************************
2844 * Function: check_mpb_migr_compatibility
2845 * Description: Function checks for unsupported migration features:
2846 * - migration optimization area (pba_of_lba0)
2847 * - descending reshape (ascending_migr)
2848 * Parameters:
2849 * super : imsm metadata information
2850 * Returns:
2851 * 0 : migration is compatible
2852 * -1 : migration is not compatible
2853 ******************************************************************************/
2854 int check_mpb_migr_compatibility(struct intel_super *super)
2855 {
2856 struct imsm_map *map0, *map1;
2857 struct migr_record *migr_rec = super->migr_rec;
2858 int i;
2859
2860 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2861 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2862
2863 if (dev_iter &&
2864 dev_iter->vol.migr_state == 1 &&
2865 dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
2866 /* This device is migrating */
2867 map0 = get_imsm_map(dev_iter, 0);
2868 map1 = get_imsm_map(dev_iter, 1);
2869 if (map0->pba_of_lba0 != map1->pba_of_lba0)
2870 /* migration optimization area was used */
2871 return -1;
2872 if (migr_rec->ascending_migr == 0
2873 && migr_rec->dest_depth_per_unit > 0)
2874 /* descending reshape not supported yet */
2875 return -1;
2876 }
2877 }
2878 return 0;
2879 }
2880
2881 static void __free_imsm(struct intel_super *super, int free_disks);
2882
2883 /* load_imsm_mpb - read matrix metadata
2884 * allocates super->mpb to be freed by free_imsm
2885 */
2886 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
2887 {
2888 unsigned long long dsize;
2889 unsigned long long sectors;
2890 struct stat;
2891 struct imsm_super *anchor;
2892 __u32 check_sum;
2893
2894 get_dev_size(fd, NULL, &dsize);
2895 if (dsize < 1024) {
2896 if (devname)
2897 fprintf(stderr,
2898 Name ": %s: device to small for imsm\n",
2899 devname);
2900 return 1;
2901 }
2902
2903 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
2904 if (devname)
2905 fprintf(stderr,
2906 Name ": Cannot seek to anchor block on %s: %s\n",
2907 devname, strerror(errno));
2908 return 1;
2909 }
2910
2911 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
2912 if (devname)
2913 fprintf(stderr,
2914 Name ": Failed to allocate imsm anchor buffer"
2915 " on %s\n", devname);
2916 return 1;
2917 }
2918 if (read(fd, anchor, 512) != 512) {
2919 if (devname)
2920 fprintf(stderr,
2921 Name ": Cannot read anchor block on %s: %s\n",
2922 devname, strerror(errno));
2923 free(anchor);
2924 return 1;
2925 }
2926
2927 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
2928 if (devname)
2929 fprintf(stderr,
2930 Name ": no IMSM anchor on %s\n", devname);
2931 free(anchor);
2932 return 2;
2933 }
2934
2935 __free_imsm(super, 0);
2936 /* reload capability and hba */
2937
2938 /* capability and hba must be updated with new super allocation */
2939 find_intel_hba_capability(fd, super, devname);
2940 super->len = ROUND_UP(anchor->mpb_size, 512);
2941 if (posix_memalign(&super->buf, 512, super->len) != 0) {
2942 if (devname)
2943 fprintf(stderr,
2944 Name ": unable to allocate %zu byte mpb buffer\n",
2945 super->len);
2946 free(anchor);
2947 return 2;
2948 }
2949 memcpy(super->buf, anchor, 512);
2950
2951 sectors = mpb_sectors(anchor) - 1;
2952 free(anchor);
2953
2954 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
2955 fprintf(stderr, Name
2956 ": %s could not allocate migr_rec buffer\n", __func__);
2957 free(super->buf);
2958 return 2;
2959 }
2960
2961 if (!sectors) {
2962 check_sum = __gen_imsm_checksum(super->anchor);
2963 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
2964 if (devname)
2965 fprintf(stderr,
2966 Name ": IMSM checksum %x != %x on %s\n",
2967 check_sum,
2968 __le32_to_cpu(super->anchor->check_sum),
2969 devname);
2970 return 2;
2971 }
2972
2973 return 0;
2974 }
2975
2976 /* read the extended mpb */
2977 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
2978 if (devname)
2979 fprintf(stderr,
2980 Name ": Cannot seek to extended mpb on %s: %s\n",
2981 devname, strerror(errno));
2982 return 1;
2983 }
2984
2985 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
2986 if (devname)
2987 fprintf(stderr,
2988 Name ": Cannot read extended mpb on %s: %s\n",
2989 devname, strerror(errno));
2990 return 2;
2991 }
2992
2993 check_sum = __gen_imsm_checksum(super->anchor);
2994 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
2995 if (devname)
2996 fprintf(stderr,
2997 Name ": IMSM checksum %x != %x on %s\n",
2998 check_sum, __le32_to_cpu(super->anchor->check_sum),
2999 devname);
3000 return 3;
3001 }
3002
3003 /* FIXME the BBM log is disk specific so we cannot use this global
3004 * buffer for all disks. Ok for now since we only look at the global
3005 * bbm_log_size parameter to gate assembly
3006 */
3007 super->bbm_log = __get_imsm_bbm_log(super->anchor);
3008
3009 return 0;
3010 }
3011
3012 static int read_imsm_migr_rec(int fd, struct intel_super *super);
3013
3014 static int
3015 load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
3016 {
3017 int err;
3018
3019 err = load_imsm_mpb(fd, super, devname);
3020 if (err)
3021 return err;
3022 err = load_imsm_disk(fd, super, devname, keep_fd);
3023 if (err)
3024 return err;
3025 err = parse_raid_devices(super);
3026
3027 return err;
3028 }
3029
3030 static void __free_imsm_disk(struct dl *d)
3031 {
3032 if (d->fd >= 0)
3033 close(d->fd);
3034 if (d->devname)
3035 free(d->devname);
3036 if (d->e)
3037 free(d->e);
3038 free(d);
3039
3040 }
3041
3042 static void free_imsm_disks(struct intel_super *super)
3043 {
3044 struct dl *d;
3045
3046 while (super->disks) {
3047 d = super->disks;
3048 super->disks = d->next;
3049 __free_imsm_disk(d);
3050 }
3051 while (super->disk_mgmt_list) {
3052 d = super->disk_mgmt_list;
3053 super->disk_mgmt_list = d->next;
3054 __free_imsm_disk(d);
3055 }
3056 while (super->missing) {
3057 d = super->missing;
3058 super->missing = d->next;
3059 __free_imsm_disk(d);
3060 }
3061
3062 }
3063
3064 /* free all the pieces hanging off of a super pointer */
3065 static void __free_imsm(struct intel_super *super, int free_disks)
3066 {
3067 struct intel_hba *elem, *next;
3068
3069 if (super->buf) {
3070 free(super->buf);
3071 super->buf = NULL;
3072 }
3073 /* unlink capability description */
3074 super->orom = NULL;
3075 if (super->migr_rec_buf) {
3076 free(super->migr_rec_buf);
3077 super->migr_rec_buf = NULL;
3078 }
3079 if (free_disks)
3080 free_imsm_disks(super);
3081 free_devlist(super);
3082 elem = super->hba;
3083 while (elem) {
3084 if (elem->path)
3085 free((void *)elem->path);
3086 next = elem->next;
3087 free(elem);
3088 elem = next;
3089 }
3090 super->hba = NULL;
3091 }
3092
3093 static void free_imsm(struct intel_super *super)
3094 {
3095 __free_imsm(super, 1);
3096 free(super);
3097 }
3098
3099 static void free_super_imsm(struct supertype *st)
3100 {
3101 struct intel_super *super = st->sb;
3102
3103 if (!super)
3104 return;
3105
3106 free_imsm(super);
3107 st->sb = NULL;
3108 }
3109
3110 static struct intel_super *alloc_super(void)
3111 {
3112 struct intel_super *super = malloc(sizeof(*super));
3113
3114 if (super) {
3115 memset(super, 0, sizeof(*super));
3116 super->current_vol = -1;
3117 super->create_offset = ~((__u32 ) 0);
3118 }
3119 return super;
3120 }
3121
3122 /*
3123 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3124 */
3125 static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
3126 {
3127 struct sys_dev *hba_name;
3128 int rv = 0;
3129
3130 if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
3131 super->orom = NULL;
3132 super->hba = NULL;
3133 return 0;
3134 }
3135 hba_name = find_disk_attached_hba(fd, NULL);
3136 if (!hba_name) {
3137 if (devname)
3138 fprintf(stderr,
3139 Name ": %s is not attached to Intel(R) RAID controller.\n",
3140 devname);
3141 return 1;
3142 }
3143 rv = attach_hba_to_super(super, hba_name);
3144 if (rv == 2) {
3145 if (devname) {
3146 struct intel_hba *hba = super->hba;
3147
3148 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
3149 "controller (%s),\n"
3150 " but the container is assigned to Intel(R) "
3151 "%s RAID controller (",
3152 devname,
3153 hba_name->path,
3154 hba_name->pci_id ? : "Err!",
3155 get_sys_dev_type(hba_name->type));
3156
3157 while (hba) {
3158 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
3159 if (hba->next)
3160 fprintf(stderr, ", ");
3161 hba = hba->next;
3162 }
3163
3164 fprintf(stderr, ").\n"
3165 " Mixing devices attached to different controllers "
3166 "is not allowed.\n");
3167 }
3168 free_sys_dev(&hba_name);
3169 return 2;
3170 }
3171 super->orom = find_imsm_capability(hba_name->type);
3172 free_sys_dev(&hba_name);
3173 if (!super->orom)
3174 return 3;
3175 return 0;
3176 }
3177
3178 #ifndef MDASSEMBLE
3179 /* find_missing - helper routine for load_super_imsm_all that identifies
3180 * disks that have disappeared from the system. This routine relies on
3181 * the mpb being uptodate, which it is at load time.
3182 */
3183 static int find_missing(struct intel_super *super)
3184 {
3185 int i;
3186 struct imsm_super *mpb = super->anchor;
3187 struct dl *dl;
3188 struct imsm_disk *disk;
3189
3190 for (i = 0; i < mpb->num_disks; i++) {
3191 disk = __get_imsm_disk(mpb, i);
3192 dl = serial_to_dl(disk->serial, super);
3193 if (dl)
3194 continue;
3195
3196 dl = malloc(sizeof(*dl));
3197 if (!dl)
3198 return 1;
3199 dl->major = 0;
3200 dl->minor = 0;
3201 dl->fd = -1;
3202 dl->devname = strdup("missing");
3203 dl->index = i;
3204 serialcpy(dl->serial, disk->serial);
3205 dl->disk = *disk;
3206 dl->e = NULL;
3207 dl->next = super->missing;
3208 super->missing = dl;
3209 }
3210
3211 return 0;
3212 }
3213
3214 static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
3215 {
3216 struct intel_disk *idisk = disk_list;
3217
3218 while (idisk) {
3219 if (serialcmp(idisk->disk.serial, serial) == 0)
3220 break;
3221 idisk = idisk->next;
3222 }
3223
3224 return idisk;
3225 }
3226
3227 static int __prep_thunderdome(struct intel_super **table, int tbl_size,
3228 struct intel_super *super,
3229 struct intel_disk **disk_list)
3230 {
3231 struct imsm_disk *d = &super->disks->disk;
3232 struct imsm_super *mpb = super->anchor;
3233 int i, j;
3234
3235 for (i = 0; i < tbl_size; i++) {
3236 struct imsm_super *tbl_mpb = table[i]->anchor;
3237 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3238
3239 if (tbl_mpb->family_num == mpb->family_num) {
3240 if (tbl_mpb->check_sum == mpb->check_sum) {
3241 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3242 __func__, super->disks->major,
3243 super->disks->minor,
3244 table[i]->disks->major,
3245 table[i]->disks->minor);
3246 break;
3247 }
3248
3249 if (((is_configured(d) && !is_configured(tbl_d)) ||
3250 is_configured(d) == is_configured(tbl_d)) &&
3251 tbl_mpb->generation_num < mpb->generation_num) {
3252 /* current version of the mpb is a
3253 * better candidate than the one in
3254 * super_table, but copy over "cross
3255 * generational" status
3256 */
3257 struct intel_disk *idisk;
3258
3259 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3260 __func__, super->disks->major,
3261 super->disks->minor,
3262 table[i]->disks->major,
3263 table[i]->disks->minor);
3264
3265 idisk = disk_list_get(tbl_d->serial, *disk_list);
3266 if (idisk && is_failed(&idisk->disk))
3267 tbl_d->status |= FAILED_DISK;
3268 break;
3269 } else {
3270 struct intel_disk *idisk;
3271 struct imsm_disk *disk;
3272
3273 /* tbl_mpb is more up to date, but copy
3274 * over cross generational status before
3275 * returning
3276 */
3277 disk = __serial_to_disk(d->serial, mpb, NULL);
3278 if (disk && is_failed(disk))
3279 d->status |= FAILED_DISK;
3280
3281 idisk = disk_list_get(d->serial, *disk_list);
3282 if (idisk) {
3283 idisk->owner = i;
3284 if (disk && is_configured(disk))
3285 idisk->disk.status |= CONFIGURED_DISK;
3286 }
3287
3288 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3289 __func__, super->disks->major,
3290 super->disks->minor,
3291 table[i]->disks->major,
3292 table[i]->disks->minor);
3293
3294 return tbl_size;
3295 }
3296 }
3297 }
3298
3299 if (i >= tbl_size)
3300 table[tbl_size++] = super;
3301 else
3302 table[i] = super;
3303
3304 /* update/extend the merged list of imsm_disk records */
3305 for (j = 0; j < mpb->num_disks; j++) {
3306 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3307 struct intel_disk *idisk;
3308
3309 idisk = disk_list_get(disk->serial, *disk_list);
3310 if (idisk) {
3311 idisk->disk.status |= disk->status;
3312 if (is_configured(&idisk->disk) ||
3313 is_failed(&idisk->disk))
3314 idisk->disk.status &= ~(SPARE_DISK);
3315 } else {
3316 idisk = calloc(1, sizeof(*idisk));
3317 if (!idisk)
3318 return -1;
3319 idisk->owner = IMSM_UNKNOWN_OWNER;
3320 idisk->disk = *disk;
3321 idisk->next = *disk_list;
3322 *disk_list = idisk;
3323 }
3324
3325 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3326 idisk->owner = i;
3327 }
3328
3329 return tbl_size;
3330 }
3331
3332 static struct intel_super *
3333 validate_members(struct intel_super *super, struct intel_disk *disk_list,
3334 const int owner)
3335 {
3336 struct imsm_super *mpb = super->anchor;
3337 int ok_count = 0;
3338 int i;
3339
3340 for (i = 0; i < mpb->num_disks; i++) {
3341 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3342 struct intel_disk *idisk;
3343
3344 idisk = disk_list_get(disk->serial, disk_list);
3345 if (idisk) {
3346 if (idisk->owner == owner ||
3347 idisk->owner == IMSM_UNKNOWN_OWNER)
3348 ok_count++;
3349 else
3350 dprintf("%s: '%.16s' owner %d != %d\n",
3351 __func__, disk->serial, idisk->owner,
3352 owner);
3353 } else {
3354 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3355 __func__, __le32_to_cpu(mpb->family_num), i,
3356 disk->serial);
3357 break;
3358 }
3359 }
3360
3361 if (ok_count == mpb->num_disks)
3362 return super;
3363 return NULL;
3364 }
3365
3366 static void show_conflicts(__u32 family_num, struct intel_super *super_list)
3367 {
3368 struct intel_super *s;
3369
3370 for (s = super_list; s; s = s->next) {
3371 if (family_num != s->anchor->family_num)
3372 continue;
3373 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
3374 __le32_to_cpu(family_num), s->disks->devname);
3375 }
3376 }
3377
3378 static struct intel_super *
3379 imsm_thunderdome(struct intel_super **super_list, int len)
3380 {
3381 struct intel_super *super_table[len];
3382 struct intel_disk *disk_list = NULL;
3383 struct intel_super *champion, *spare;
3384 struct intel_super *s, **del;
3385 int tbl_size = 0;
3386 int conflict;
3387 int i;
3388
3389 memset(super_table, 0, sizeof(super_table));
3390 for (s = *super_list; s; s = s->next)
3391 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
3392
3393 for (i = 0; i < tbl_size; i++) {
3394 struct imsm_disk *d;
3395 struct intel_disk *idisk;
3396 struct imsm_super *mpb = super_table[i]->anchor;
3397
3398 s = super_table[i];
3399 d = &s->disks->disk;
3400
3401 /* 'd' must appear in merged disk list for its
3402 * configuration to be valid
3403 */
3404 idisk = disk_list_get(d->serial, disk_list);
3405 if (idisk && idisk->owner == i)
3406 s = validate_members(s, disk_list, i);
3407 else
3408 s = NULL;
3409
3410 if (!s)
3411 dprintf("%s: marking family: %#x from %d:%d offline\n",
3412 __func__, mpb->family_num,
3413 super_table[i]->disks->major,
3414 super_table[i]->disks->minor);
3415 super_table[i] = s;
3416 }
3417
3418 /* This is where the mdadm implementation differs from the Windows
3419 * driver which has no strict concept of a container. We can only
3420 * assemble one family from a container, so when returning a prodigal
3421 * array member to this system the code will not be able to disambiguate
3422 * the container contents that should be assembled ("foreign" versus
3423 * "local"). It requires user intervention to set the orig_family_num
3424 * to a new value to establish a new container. The Windows driver in
3425 * this situation fixes up the volume name in place and manages the
3426 * foreign array as an independent entity.
3427 */
3428 s = NULL;
3429 spare = NULL;
3430 conflict = 0;
3431 for (i = 0; i < tbl_size; i++) {
3432 struct intel_super *tbl_ent = super_table[i];
3433 int is_spare = 0;
3434
3435 if (!tbl_ent)
3436 continue;
3437
3438 if (tbl_ent->anchor->num_raid_devs == 0) {
3439 spare = tbl_ent;
3440 is_spare = 1;
3441 }
3442
3443 if (s && !is_spare) {
3444 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3445 conflict++;
3446 } else if (!s && !is_spare)
3447 s = tbl_ent;
3448 }
3449
3450 if (!s)
3451 s = spare;
3452 if (!s) {
3453 champion = NULL;
3454 goto out;
3455 }
3456 champion = s;
3457
3458 if (conflict)
3459 fprintf(stderr, "Chose family %#x on '%s', "
3460 "assemble conflicts to new container with '--update=uuid'\n",
3461 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3462
3463 /* collect all dl's onto 'champion', and update them to
3464 * champion's version of the status
3465 */
3466 for (s = *super_list; s; s = s->next) {
3467 struct imsm_super *mpb = champion->anchor;
3468 struct dl *dl = s->disks;
3469
3470 if (s == champion)
3471 continue;
3472
3473 for (i = 0; i < mpb->num_disks; i++) {
3474 struct imsm_disk *disk;
3475
3476 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3477 if (disk) {
3478 dl->disk = *disk;
3479 /* only set index on disks that are a member of
3480 * a populated contianer, i.e. one with
3481 * raid_devs
3482 */
3483 if (is_failed(&dl->disk))
3484 dl->index = -2;
3485 else if (is_spare(&dl->disk))
3486 dl->index = -1;
3487 break;
3488 }
3489 }
3490
3491 if (i >= mpb->num_disks) {
3492 struct intel_disk *idisk;
3493
3494 idisk = disk_list_get(dl->serial, disk_list);
3495 if (idisk && is_spare(&idisk->disk) &&
3496 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3497 dl->index = -1;
3498 else {
3499 dl->index = -2;
3500 continue;
3501 }
3502 }
3503
3504 dl->next = champion->disks;
3505 champion->disks = dl;
3506 s->disks = NULL;
3507 }
3508
3509 /* delete 'champion' from super_list */
3510 for (del = super_list; *del; ) {
3511 if (*del == champion) {
3512 *del = (*del)->next;
3513 break;
3514 } else
3515 del = &(*del)->next;
3516 }
3517 champion->next = NULL;
3518
3519 out:
3520 while (disk_list) {
3521 struct intel_disk *idisk = disk_list;
3522
3523 disk_list = disk_list->next;
3524 free(idisk);
3525 }
3526
3527 return champion;
3528 }
3529
3530 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
3531 char *devname)
3532 {
3533 struct mdinfo *sra;
3534 struct intel_super *super_list = NULL;
3535 struct intel_super *super = NULL;
3536 int devnum = fd2devnum(fd);
3537 struct mdinfo *sd;
3538 int retry;
3539 int err = 0;
3540 int i;
3541
3542 /* check if 'fd' an opened container */
3543 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
3544 if (!sra)
3545 return 1;
3546
3547 if (sra->array.major_version != -1 ||
3548 sra->array.minor_version != -2 ||
3549 strcmp(sra->text_version, "imsm") != 0) {
3550 err = 1;
3551 goto error;
3552 }
3553 /* load all mpbs */
3554 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
3555 struct intel_super *s = alloc_super();
3556 char nm[32];
3557 int dfd;
3558 int rv;
3559
3560 err = 1;
3561 if (!s)
3562 goto error;
3563 s->next = super_list;
3564 super_list = s;
3565
3566 err = 2;
3567 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3568 dfd = dev_open(nm, O_RDWR);
3569 if (dfd < 0)
3570 goto error;
3571
3572 rv = find_intel_hba_capability(dfd, s, devname);
3573 /* no orom/efi or non-intel hba of the disk */
3574 if (rv != 0)
3575 goto error;
3576
3577 err = load_and_parse_mpb(dfd, s, NULL, 1);
3578
3579 /* retry the load if we might have raced against mdmon */
3580 if (err == 3 && mdmon_running(devnum))
3581 for (retry = 0; retry < 3; retry++) {
3582 usleep(3000);
3583 err = load_and_parse_mpb(dfd, s, NULL, 1);
3584 if (err != 3)
3585 break;
3586 }
3587 if (err)
3588 goto error;
3589 }
3590
3591 /* all mpbs enter, maybe one leaves */
3592 super = imsm_thunderdome(&super_list, i);
3593 if (!super) {
3594 err = 1;
3595 goto error;
3596 }
3597
3598 if (find_missing(super) != 0) {
3599 free_imsm(super);
3600 err = 2;
3601 goto error;
3602 }
3603
3604 /* load migration record */
3605 err = load_imsm_migr_rec(super, NULL);
3606 if (err) {
3607 err = 4;
3608 goto error;
3609 }
3610
3611 /* Check migration compatibility */
3612 if (check_mpb_migr_compatibility(super) != 0) {
3613 fprintf(stderr, Name ": Unsupported migration detected");
3614 if (devname)
3615 fprintf(stderr, " on %s\n", devname);
3616 else
3617 fprintf(stderr, " (IMSM).\n");
3618
3619 err = 5;
3620 goto error;
3621 }
3622
3623 err = 0;
3624
3625 error:
3626 while (super_list) {
3627 struct intel_super *s = super_list;
3628
3629 super_list = super_list->next;
3630 free_imsm(s);
3631 }
3632 sysfs_free(sra);
3633
3634 if (err)
3635 return err;
3636
3637 *sbp = super;
3638 st->container_dev = devnum;
3639 if (err == 0 && st->ss == NULL) {
3640 st->ss = &super_imsm;
3641 st->minor_version = 0;
3642 st->max_devs = IMSM_MAX_DEVICES;
3643 }
3644 return 0;
3645 }
3646
3647 static int load_container_imsm(struct supertype *st, int fd, char *devname)
3648 {
3649 return load_super_imsm_all(st, fd, &st->sb, devname);
3650 }
3651 #endif
3652
3653 static int load_super_imsm(struct supertype *st, int fd, char *devname)
3654 {
3655 struct intel_super *super;
3656 int rv;
3657
3658 if (test_partition(fd))
3659 /* IMSM not allowed on partitions */
3660 return 1;
3661
3662 free_super_imsm(st);
3663
3664 super = alloc_super();
3665 if (!super) {
3666 fprintf(stderr,
3667 Name ": malloc of %zu failed.\n",
3668 sizeof(*super));
3669 return 1;
3670 }
3671 /* Load hba and capabilities if they exist.
3672 * But do not preclude loading metadata in case capabilities or hba are
3673 * non-compliant and ignore_hw_compat is set.
3674 */
3675 rv = find_intel_hba_capability(fd, super, devname);
3676 /* no orom/efi or non-intel hba of the disk */
3677 if ((rv != 0) && (st->ignore_hw_compat == 0)) {
3678 if (devname)
3679 fprintf(stderr,
3680 Name ": No OROM/EFI properties for %s\n", devname);
3681 free_imsm(super);
3682 return 2;
3683 }
3684 rv = load_and_parse_mpb(fd, super, devname, 0);
3685
3686 if (rv) {
3687 if (devname)
3688 fprintf(stderr,
3689 Name ": Failed to load all information "
3690 "sections on %s\n", devname);
3691 free_imsm(super);
3692 return rv;
3693 }
3694
3695 st->sb = super;
3696 if (st->ss == NULL) {
3697 st->ss = &super_imsm;
3698 st->minor_version = 0;
3699 st->max_devs = IMSM_MAX_DEVICES;
3700 }
3701
3702 /* load migration record */
3703 load_imsm_migr_rec(super, NULL);
3704
3705 /* Check for unsupported migration features */
3706 if (check_mpb_migr_compatibility(super) != 0) {
3707 fprintf(stderr, Name ": Unsupported migration detected");
3708 if (devname)
3709 fprintf(stderr, " on %s\n", devname);
3710 else
3711 fprintf(stderr, " (IMSM).\n");
3712 return 3;
3713 }
3714
3715 return 0;
3716 }
3717
3718 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3719 {
3720 if (info->level == 1)
3721 return 128;
3722 return info->chunk_size >> 9;
3723 }
3724
3725 static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
3726 {
3727 __u32 num_stripes;
3728
3729 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
3730 num_stripes /= num_domains;
3731
3732 return num_stripes;
3733 }
3734
3735 static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3736 {
3737 if (info->level == 1)
3738 return info->size * 2;
3739 else
3740 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
3741 }
3742
3743 static void imsm_update_version_info(struct intel_super *super)
3744 {
3745 /* update the version and attributes */
3746 struct imsm_super *mpb = super->anchor;
3747 char *version;
3748 struct imsm_dev *dev;
3749 struct imsm_map *map;
3750 int i;
3751
3752 for (i = 0; i < mpb->num_raid_devs; i++) {
3753 dev = get_imsm_dev(super, i);
3754 map = get_imsm_map(dev, 0);
3755 if (__le32_to_cpu(dev->size_high) > 0)
3756 mpb->attributes |= MPB_ATTRIB_2TB;
3757
3758 /* FIXME detect when an array spans a port multiplier */
3759 #if 0
3760 mpb->attributes |= MPB_ATTRIB_PM;
3761 #endif
3762
3763 if (mpb->num_raid_devs > 1 ||
3764 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3765 version = MPB_VERSION_ATTRIBS;
3766 switch (get_imsm_raid_level(map)) {
3767 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3768 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3769 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3770 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3771 }
3772 } else {
3773 if (map->num_members >= 5)
3774 version = MPB_VERSION_5OR6_DISK_ARRAY;
3775 else if (dev->status == DEV_CLONE_N_GO)
3776 version = MPB_VERSION_CNG;
3777 else if (get_imsm_raid_level(map) == 5)
3778 version = MPB_VERSION_RAID5;
3779 else if (map->num_members >= 3)
3780 version = MPB_VERSION_3OR4_DISK_ARRAY;
3781 else if (get_imsm_raid_level(map) == 1)
3782 version = MPB_VERSION_RAID1;
3783 else
3784 version = MPB_VERSION_RAID0;
3785 }
3786 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3787 }
3788 }
3789
3790 static int check_name(struct intel_super *super, char *name, int quiet)
3791 {
3792 struct imsm_super *mpb = super->anchor;
3793 char *reason = NULL;
3794 int i;
3795
3796 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3797 reason = "must be 16 characters or less";
3798
3799 for (i = 0; i < mpb->num_raid_devs; i++) {
3800 struct imsm_dev *dev = get_imsm_dev(super, i);
3801
3802 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3803 reason = "already exists";
3804 break;
3805 }
3806 }
3807
3808 if (reason && !quiet)
3809 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3810
3811 return !reason;
3812 }
3813
3814 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3815 unsigned long long size, char *name,
3816 char *homehost, int *uuid)
3817 {
3818 /* We are creating a volume inside a pre-existing container.
3819 * so st->sb is already set.
3820 */
3821 struct intel_super *super = st->sb;
3822 struct imsm_super *mpb = super->anchor;
3823 struct intel_dev *dv;
3824 struct imsm_dev *dev;
3825 struct imsm_vol *vol;
3826 struct imsm_map *map;
3827 int idx = mpb->num_raid_devs;
3828 int i;
3829 unsigned long long array_blocks;
3830 size_t size_old, size_new;
3831 __u32 num_data_stripes;
3832
3833 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
3834 fprintf(stderr, Name": This imsm-container already has the "
3835 "maximum of %d volumes\n", super->orom->vpa);
3836 return 0;
3837 }
3838
3839 /* ensure the mpb is large enough for the new data */
3840 size_old = __le32_to_cpu(mpb->mpb_size);
3841 size_new = disks_to_mpb_size(info->nr_disks);
3842 if (size_new > size_old) {
3843 void *mpb_new;
3844 size_t size_round = ROUND_UP(size_new, 512);
3845
3846 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3847 fprintf(stderr, Name": could not allocate new mpb\n");
3848 return 0;
3849 }
3850 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3851 fprintf(stderr, Name
3852 ": %s could not allocate migr_rec buffer\n",
3853 __func__);
3854 free(super->buf);
3855 free(super);
3856 return 0;
3857 }
3858 memcpy(mpb_new, mpb, size_old);
3859 free(mpb);
3860 mpb = mpb_new;
3861 super->anchor = mpb_new;
3862 mpb->mpb_size = __cpu_to_le32(size_new);
3863 memset(mpb_new + size_old, 0, size_round - size_old);
3864 }
3865 super->current_vol = idx;
3866 /* when creating the first raid device in this container set num_disks
3867 * to zero, i.e. delete this spare and add raid member devices in
3868 * add_to_super_imsm_volume()
3869 */
3870 if (super->current_vol == 0)
3871 mpb->num_disks = 0;
3872
3873 if (!check_name(super, name, 0))
3874 return 0;
3875 dv = malloc(sizeof(*dv));
3876 if (!dv) {
3877 fprintf(stderr, Name ": failed to allocate device list entry\n");
3878 return 0;
3879 }
3880 dev = calloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
3881 if (!dev) {
3882 free(dv);
3883 fprintf(stderr, Name": could not allocate raid device\n");
3884 return 0;
3885 }
3886
3887 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
3888 if (info->level == 1)
3889 array_blocks = info_to_blocks_per_member(info);
3890 else
3891 array_blocks = calc_array_size(info->level, info->raid_disks,
3892 info->layout, info->chunk_size,
3893 info->size*2);
3894 /* round array size down to closest MB */
3895 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
3896
3897 dev->size_low = __cpu_to_le32((__u32) array_blocks);
3898 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
3899 dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
3900 vol = &dev->vol;
3901 vol->migr_state = 0;
3902 set_migr_type(dev, MIGR_INIT);
3903 vol->dirty = 0;
3904 vol->curr_migr_unit = 0;
3905 map = get_imsm_map(dev, 0);
3906 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
3907 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
3908 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
3909 map->failed_disk_num = ~0;
3910 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
3911 IMSM_T_STATE_NORMAL;
3912 map->ddf = 1;
3913
3914 if (info->level == 1 && info->raid_disks > 2) {
3915 free(dev);
3916 free(dv);
3917 fprintf(stderr, Name": imsm does not support more than 2 disks"
3918 "in a raid1 volume\n");
3919 return 0;
3920 }
3921
3922 map->raid_level = info->level;
3923 if (info->level == 10) {
3924 map->raid_level = 1;
3925 map->num_domains = info->raid_disks / 2;
3926 } else if (info->level == 1)
3927 map->num_domains = info->raid_disks;
3928 else
3929 map->num_domains = 1;
3930
3931 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
3932 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
3933
3934 map->num_members = info->raid_disks;
3935 for (i = 0; i < map->num_members; i++) {
3936 /* initialized in add_to_super */
3937 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
3938 }
3939 mpb->num_raid_devs++;
3940
3941 dv->dev = dev;
3942 dv->index = super->current_vol;
3943 dv->next = super->devlist;
3944 super->devlist = dv;
3945
3946 imsm_update_version_info(super);
3947
3948 return 1;
3949 }
3950
3951 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
3952 unsigned long long size, char *name,
3953 char *homehost, int *uuid)
3954 {
3955 /* This is primarily called by Create when creating a new array.
3956 * We will then get add_to_super called for each component, and then
3957 * write_init_super called to write it out to each device.
3958 * For IMSM, Create can create on fresh devices or on a pre-existing
3959 * array.
3960 * To create on a pre-existing array a different method will be called.
3961 * This one is just for fresh drives.
3962 */
3963 struct intel_super *super;
3964 struct imsm_super *mpb;
3965 size_t mpb_size;
3966 char *version;
3967
3968 if (st->sb)
3969 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
3970
3971 if (info)
3972 mpb_size = disks_to_mpb_size(info->nr_disks);
3973 else
3974 mpb_size = 512;
3975
3976 super = alloc_super();
3977 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
3978 free(super);
3979 super = NULL;
3980 }
3981 if (!super) {
3982 fprintf(stderr, Name
3983 ": %s could not allocate superblock\n", __func__);
3984 return 0;
3985 }
3986 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3987 fprintf(stderr, Name
3988 ": %s could not allocate migr_rec buffer\n", __func__);
3989 free(super->buf);
3990 free(super);
3991 return 0;
3992 }
3993 memset(super->buf, 0, mpb_size);
3994 mpb = super->buf;
3995 mpb->mpb_size = __cpu_to_le32(mpb_size);
3996 st->sb = super;
3997
3998 if (info == NULL) {
3999 /* zeroing superblock */
4000 return 0;
4001 }
4002
4003 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4004
4005 version = (char *) mpb->sig;
4006 strcpy(version, MPB_SIGNATURE);
4007 version += strlen(MPB_SIGNATURE);
4008 strcpy(version, MPB_VERSION_RAID0);
4009
4010 return 1;
4011 }
4012
4013 #ifndef MDASSEMBLE
4014 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
4015 int fd, char *devname)
4016 {
4017 struct intel_super *super = st->sb;
4018 struct imsm_super *mpb = super->anchor;
4019 struct dl *dl;
4020 struct imsm_dev *dev;
4021 struct imsm_map *map;
4022 int slot;
4023
4024 dev = get_imsm_dev(super, super->current_vol);
4025 map = get_imsm_map(dev, 0);
4026
4027 if (! (dk->state & (1<<MD_DISK_SYNC))) {
4028 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
4029 devname);
4030 return 1;
4031 }
4032
4033 if (fd == -1) {
4034 /* we're doing autolayout so grab the pre-marked (in
4035 * validate_geometry) raid_disk
4036 */
4037 for (dl = super->disks; dl; dl = dl->next)
4038 if (dl->raiddisk == dk->raid_disk)
4039 break;
4040 } else {
4041 for (dl = super->disks; dl ; dl = dl->next)
4042 if (dl->major == dk->major &&
4043 dl->minor == dk->minor)
4044 break;
4045 }
4046
4047 if (!dl) {
4048 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
4049 return 1;
4050 }
4051
4052 /* add a pristine spare to the metadata */
4053 if (dl->index < 0) {
4054 dl->index = super->anchor->num_disks;
4055 super->anchor->num_disks++;
4056 }
4057 /* Check the device has not already been added */
4058 slot = get_imsm_disk_slot(map, dl->index);
4059 if (slot >= 0 &&
4060 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
4061 fprintf(stderr, Name ": %s has been included in this array twice\n",
4062 devname);
4063 return 1;
4064 }
4065 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
4066 dl->disk.status = CONFIGURED_DISK;
4067
4068 /* if we are creating the first raid device update the family number */
4069 if (super->current_vol == 0) {
4070 __u32 sum;
4071 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
4072 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
4073
4074 if (!_dev || !_disk) {
4075 fprintf(stderr, Name ": BUG mpb setup error\n");
4076 return 1;
4077 }
4078 *_dev = *dev;
4079 *_disk = dl->disk;
4080 sum = random32();
4081 sum += __gen_imsm_checksum(mpb);
4082 mpb->family_num = __cpu_to_le32(sum);
4083 mpb->orig_family_num = mpb->family_num;
4084 }
4085
4086 return 0;
4087 }
4088
4089
4090 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
4091 int fd, char *devname)
4092 {
4093 struct intel_super *super = st->sb;
4094 struct dl *dd;
4095 unsigned long long size;
4096 __u32 id;
4097 int rv;
4098 struct stat stb;
4099
4100 /* If we are on an RAID enabled platform check that the disk is
4101 * attached to the raid controller.
4102 * We do not need to test disks attachment for container based additions,
4103 * they shall be already tested when container was created/assembled.
4104 */
4105 rv = find_intel_hba_capability(fd, super, devname);
4106 /* no orom/efi or non-intel hba of the disk */
4107 if (rv != 0) {
4108 dprintf("capability: %p fd: %d ret: %d\n",
4109 super->orom, fd, rv);
4110 return 1;
4111 }
4112
4113 if (super->current_vol >= 0)
4114 return add_to_super_imsm_volume(st, dk, fd, devname);
4115
4116 fstat(fd, &stb);
4117 dd = malloc(sizeof(*dd));
4118 if (!dd) {
4119 fprintf(stderr,
4120 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4121 return 1;
4122 }
4123 memset(dd, 0, sizeof(*dd));
4124 dd->major = major(stb.st_rdev);
4125 dd->minor = minor(stb.st_rdev);
4126 dd->index = -1;
4127 dd->devname = devname ? strdup(devname) : NULL;
4128 dd->fd = fd;
4129 dd->e = NULL;
4130 dd->action = DISK_ADD;
4131 rv = imsm_read_serial(fd, devname, dd->serial);
4132 if (rv) {
4133 fprintf(stderr,
4134 Name ": failed to retrieve scsi serial, aborting\n");
4135 free(dd);
4136 abort();
4137 }
4138
4139 get_dev_size(fd, NULL, &size);
4140 size /= 512;
4141 serialcpy(dd->disk.serial, dd->serial);
4142 dd->disk.total_blocks = __cpu_to_le32(size);
4143 dd->disk.status = SPARE_DISK;
4144 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
4145 dd->disk.scsi_id = __cpu_to_le32(id);
4146 else
4147 dd->disk.scsi_id = __cpu_to_le32(0);
4148
4149 if (st->update_tail) {
4150 dd->next = super->disk_mgmt_list;
4151 super->disk_mgmt_list = dd;
4152 } else {
4153 dd->next = super->disks;
4154 super->disks = dd;
4155 super->updates_pending++;
4156 }
4157
4158 return 0;
4159 }
4160
4161
4162 static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
4163 {
4164 struct intel_super *super = st->sb;
4165 struct dl *dd;
4166
4167 /* remove from super works only in mdmon - for communication
4168 * manager - monitor. Check if communication memory buffer
4169 * is prepared.
4170 */
4171 if (!st->update_tail) {
4172 fprintf(stderr,
4173 Name ": %s shall be used in mdmon context only"
4174 "(line %d).\n", __func__, __LINE__);
4175 return 1;
4176 }
4177 dd = malloc(sizeof(*dd));
4178 if (!dd) {
4179 fprintf(stderr,
4180 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4181 return 1;
4182 }
4183 memset(dd, 0, sizeof(*dd));
4184 dd->major = dk->major;
4185 dd->minor = dk->minor;
4186 dd->index = -1;
4187 dd->fd = -1;
4188 dd->disk.status = SPARE_DISK;
4189 dd->action = DISK_REMOVE;
4190
4191 dd->next = super->disk_mgmt_list;
4192 super->disk_mgmt_list = dd;
4193
4194
4195 return 0;
4196 }
4197
4198 static int store_imsm_mpb(int fd, struct imsm_super *mpb);
4199
4200 static union {
4201 char buf[512];
4202 struct imsm_super anchor;
4203 } spare_record __attribute__ ((aligned(512)));
4204
4205 /* spare records have their own family number and do not have any defined raid
4206 * devices
4207 */
4208 static int write_super_imsm_spares(struct intel_super *super, int doclose)
4209 {
4210 struct imsm_super *mpb = super->anchor;
4211 struct imsm_super *spare = &spare_record.anchor;
4212 __u32 sum;
4213 struct dl *d;
4214
4215 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
4216 spare->generation_num = __cpu_to_le32(1UL),
4217 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4218 spare->num_disks = 1,
4219 spare->num_raid_devs = 0,
4220 spare->cache_size = mpb->cache_size,
4221 spare->pwr_cycle_count = __cpu_to_le32(1),
4222
4223 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
4224 MPB_SIGNATURE MPB_VERSION_RAID0);
4225
4226 for (d = super->disks; d; d = d->next) {
4227 if (d->index != -1)
4228 continue;
4229
4230 spare->disk[0] = d->disk;
4231 sum = __gen_imsm_checksum(spare);
4232 spare->family_num = __cpu_to_le32(sum);
4233 spare->orig_family_num = 0;
4234 sum = __gen_imsm_checksum(spare);
4235 spare->check_sum = __cpu_to_le32(sum);
4236
4237 if (store_imsm_mpb(d->fd, spare)) {
4238 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4239 __func__, d->major, d->minor, strerror(errno));
4240 return 1;
4241 }
4242 if (doclose) {
4243 close(d->fd);
4244 d->fd = -1;
4245 }
4246 }
4247
4248 return 0;
4249 }
4250
4251 static int is_gen_migration(struct imsm_dev *dev);
4252
4253 static int write_super_imsm(struct supertype *st, int doclose)
4254 {
4255 struct intel_super *super = st->sb;
4256 struct imsm_super *mpb = super->anchor;
4257 struct dl *d;
4258 __u32 generation;
4259 __u32 sum;
4260 int spares = 0;
4261 int i;
4262 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
4263 int num_disks = 0;
4264 int clear_migration_record = 1;
4265
4266 /* 'generation' is incremented everytime the metadata is written */
4267 generation = __le32_to_cpu(mpb->generation_num);
4268 generation++;
4269 mpb->generation_num = __cpu_to_le32(generation);
4270
4271 /* fix up cases where previous mdadm releases failed to set
4272 * orig_family_num
4273 */
4274 if (mpb->orig_family_num == 0)
4275 mpb->orig_family_num = mpb->family_num;
4276
4277 for (d = super->disks; d; d = d->next) {
4278 if (d->index == -1)
4279 spares++;
4280 else {
4281 mpb->disk[d->index] = d->disk;
4282 num_disks++;
4283 }
4284 }
4285 for (d = super->missing; d; d = d->next) {
4286 mpb->disk[d->index] = d->disk;
4287 num_disks++;
4288 }
4289 mpb->num_disks = num_disks;
4290 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
4291
4292 for (i = 0; i < mpb->num_raid_devs; i++) {
4293 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
4294 struct imsm_dev *dev2 = get_imsm_dev(super, i);
4295 if (dev && dev2) {
4296 imsm_copy_dev(dev, dev2);
4297 mpb_size += sizeof_imsm_dev(dev, 0);
4298 }
4299 if (is_gen_migration(dev2))
4300 clear_migration_record = 0;
4301 }
4302 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
4303 mpb->mpb_size = __cpu_to_le32(mpb_size);
4304
4305 /* recalculate checksum */
4306 sum = __gen_imsm_checksum(mpb);
4307 mpb->check_sum = __cpu_to_le32(sum);
4308
4309 if (clear_migration_record)
4310 memset(super->migr_rec_buf, 0, 512);
4311
4312 /* write the mpb for disks that compose raid devices */
4313 for (d = super->disks; d ; d = d->next) {
4314 if (d->index < 0)
4315 continue;
4316 if (store_imsm_mpb(d->fd, mpb))
4317 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4318 __func__, d->major, d->minor, strerror(errno));
4319 if (clear_migration_record) {
4320 unsigned long long dsize;
4321
4322 get_dev_size(d->fd, NULL, &dsize);
4323 if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
4324 write(d->fd, super->migr_rec_buf, 512);
4325 }
4326 }
4327 if (doclose) {
4328 close(d->fd);
4329 d->fd = -1;
4330 }
4331 }
4332
4333 if (spares)
4334 return write_super_imsm_spares(super, doclose);
4335
4336 return 0;
4337 }
4338
4339
4340 static int create_array(struct supertype *st, int dev_idx)
4341 {
4342 size_t len;
4343 struct imsm_update_create_array *u;
4344 struct intel_super *super = st->sb;
4345 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
4346 struct imsm_map *map = get_imsm_map(dev, 0);
4347 struct disk_info *inf;
4348 struct imsm_disk *disk;
4349 int i;
4350
4351 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
4352 sizeof(*inf) * map->num_members;
4353 u = malloc(len);
4354 if (!u) {
4355 fprintf(stderr, "%s: failed to allocate update buffer\n",
4356 __func__);
4357 return 1;
4358 }
4359
4360 u->type = update_create_array;
4361 u->dev_idx = dev_idx;
4362 imsm_copy_dev(&u->dev, dev);
4363 inf = get_disk_info(u);
4364 for (i = 0; i < map->num_members; i++) {
4365 int idx = get_imsm_disk_idx(dev, i, -1);
4366
4367 disk = get_imsm_disk(super, idx);
4368 serialcpy(inf[i].serial, disk->serial);
4369 }
4370 append_metadata_update(st, u, len);
4371
4372 return 0;
4373 }
4374
4375 static int mgmt_disk(struct supertype *st)
4376 {
4377 struct intel_super *super = st->sb;
4378 size_t len;
4379 struct imsm_update_add_remove_disk *u;
4380
4381 if (!super->disk_mgmt_list)
4382 return 0;
4383
4384 len = sizeof(*u);
4385 u = malloc(len);
4386 if (!u) {
4387 fprintf(stderr, "%s: failed to allocate update buffer\n",
4388 __func__);
4389 return 1;
4390 }
4391
4392 u->type = update_add_remove_disk;
4393 append_metadata_update(st, u, len);
4394
4395 return 0;
4396 }
4397
4398 static int write_init_super_imsm(struct supertype *st)
4399 {
4400 struct intel_super *super = st->sb;
4401 int current_vol = super->current_vol;
4402
4403 /* we are done with current_vol reset it to point st at the container */
4404 super->current_vol = -1;
4405
4406 if (st->update_tail) {
4407 /* queue the recently created array / added disk
4408 * as a metadata update */
4409 int rv;
4410
4411 /* determine if we are creating a volume or adding a disk */
4412 if (current_vol < 0) {
4413 /* in the mgmt (add/remove) disk case we are running
4414 * in mdmon context, so don't close fd's
4415 */
4416 return mgmt_disk(st);
4417 } else
4418 rv = create_array(st, current_vol);
4419
4420 return rv;
4421 } else {
4422 struct dl *d;
4423 for (d = super->disks; d; d = d->next)
4424 Kill(d->devname, NULL, 0, 1, 1);
4425 return write_super_imsm(st, 1);
4426 }
4427 }
4428 #endif
4429
4430 static int store_super_imsm(struct supertype *st, int fd)
4431 {
4432 struct intel_super *super = st->sb;
4433 struct imsm_super *mpb = super ? super->anchor : NULL;
4434
4435 if (!mpb)
4436 return 1;
4437
4438 #ifndef MDASSEMBLE
4439 return store_imsm_mpb(fd, mpb);
4440 #else
4441 return 1;
4442 #endif
4443 }
4444
4445 static int imsm_bbm_log_size(struct imsm_super *mpb)
4446 {
4447 return __le32_to_cpu(mpb->bbm_log_size);
4448 }
4449
4450 #ifndef MDASSEMBLE
4451 static int validate_geometry_imsm_container(struct supertype *st, int level,
4452 int layout, int raiddisks, int chunk,
4453 unsigned long long size, char *dev,
4454 unsigned long long *freesize,
4455 int verbose)
4456 {
4457 int fd;
4458 unsigned long long ldsize;
4459 struct intel_super *super=NULL;
4460 int rv = 0;
4461
4462 if (level != LEVEL_CONTAINER)
4463 return 0;
4464 if (!dev)
4465 return 1;
4466
4467 fd = open(dev, O_RDONLY|O_EXCL, 0);
4468 if (fd < 0) {
4469 if (verbose)
4470 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4471 dev, strerror(errno));
4472 return 0;
4473 }
4474 if (!get_dev_size(fd, dev, &ldsize)) {
4475 close(fd);
4476 return 0;
4477 }
4478
4479 /* capabilities retrieve could be possible
4480 * note that there is no fd for the disks in array.
4481 */
4482 super = alloc_super();
4483 if (!super) {
4484 fprintf(stderr,
4485 Name ": malloc of %zu failed.\n",
4486 sizeof(*super));
4487 close(fd);
4488 return 0;
4489 }
4490
4491 rv = find_intel_hba_capability(fd, super, verbose ? dev : NULL);
4492 if (rv != 0) {
4493 #if DEBUG
4494 char str[256];
4495 fd2devname(fd, str);
4496 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4497 fd, str, super->orom, rv, raiddisks);
4498 #endif
4499 /* no orom/efi or non-intel hba of the disk */
4500 close(fd);
4501 free_imsm(super);
4502 return 0;
4503 }
4504 close(fd);
4505 if (super->orom && raiddisks > super->orom->tds) {
4506 if (verbose)
4507 fprintf(stderr, Name ": %d exceeds maximum number of"
4508 " platform supported disks: %d\n",
4509 raiddisks, super->orom->tds);
4510
4511 free_imsm(super);
4512 return 0;
4513 }
4514
4515 *freesize = avail_size_imsm(st, ldsize >> 9);
4516 free_imsm(super);
4517
4518 return 1;
4519 }
4520
4521 static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4522 {
4523 const unsigned long long base_start = e[*idx].start;
4524 unsigned long long end = base_start + e[*idx].size;
4525 int i;
4526
4527 if (base_start == end)
4528 return 0;
4529
4530 *idx = *idx + 1;
4531 for (i = *idx; i < num_extents; i++) {
4532 /* extend overlapping extents */
4533 if (e[i].start >= base_start &&
4534 e[i].start <= end) {
4535 if (e[i].size == 0)
4536 return 0;
4537 if (e[i].start + e[i].size > end)
4538 end = e[i].start + e[i].size;
4539 } else if (e[i].start > end) {
4540 *idx = i;
4541 break;
4542 }
4543 }
4544
4545 return end - base_start;
4546 }
4547
4548 static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4549 {
4550 /* build a composite disk with all known extents and generate a new
4551 * 'maxsize' given the "all disks in an array must share a common start
4552 * offset" constraint
4553 */
4554 struct extent *e = calloc(sum_extents, sizeof(*e));
4555 struct dl *dl;
4556 int i, j;
4557 int start_extent;
4558 unsigned long long pos;
4559 unsigned long long start = 0;
4560 unsigned long long maxsize;
4561 unsigned long reserve;
4562
4563 if (!e)
4564 return 0;
4565
4566 /* coalesce and sort all extents. also, check to see if we need to
4567 * reserve space between member arrays
4568 */
4569 j = 0;
4570 for (dl = super->disks; dl; dl = dl->next) {
4571 if (!dl->e)
4572 continue;
4573 for (i = 0; i < dl->extent_cnt; i++)
4574 e[j++] = dl->e[i];
4575 }
4576 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4577
4578 /* merge extents */
4579 i = 0;
4580 j = 0;
4581 while (i < sum_extents) {
4582 e[j].start = e[i].start;
4583 e[j].size = find_size(e, &i, sum_extents);
4584 j++;
4585 if (e[j-1].size == 0)
4586 break;
4587 }
4588
4589 pos = 0;
4590 maxsize = 0;
4591 start_extent = 0;
4592 i = 0;
4593 do {
4594 unsigned long long esize;
4595
4596 esize = e[i].start - pos;
4597 if (esize >= maxsize) {
4598 maxsize = esize;
4599 start = pos;
4600 start_extent = i;
4601 }
4602 pos = e[i].start + e[i].size;
4603 i++;
4604 } while (e[i-1].size);
4605 free(e);
4606
4607 if (maxsize == 0)
4608 return 0;
4609
4610 /* FIXME assumes volume at offset 0 is the first volume in a
4611 * container
4612 */
4613 if (start_extent > 0)
4614 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4615 else
4616 reserve = 0;
4617
4618 if (maxsize < reserve)
4619 return 0;
4620
4621 super->create_offset = ~((__u32) 0);
4622 if (start + reserve > super->create_offset)
4623 return 0; /* start overflows create_offset */
4624 super->create_offset = start + reserve;
4625
4626 return maxsize - reserve;
4627 }
4628
4629 static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4630 {
4631 if (level < 0 || level == 6 || level == 4)
4632 return 0;
4633
4634 /* if we have an orom prevent invalid raid levels */
4635 if (orom)
4636 switch (level) {
4637 case 0: return imsm_orom_has_raid0(orom);
4638 case 1:
4639 if (raiddisks > 2)
4640 return imsm_orom_has_raid1e(orom);
4641 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4642 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4643 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
4644 }
4645 else
4646 return 1; /* not on an Intel RAID platform so anything goes */
4647
4648 return 0;
4649 }
4650
4651
4652 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4653 /*
4654 * validate volume parameters with OROM/EFI capabilities
4655 */
4656 static int
4657 validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
4658 int raiddisks, int *chunk, int verbose)
4659 {
4660 #if DEBUG
4661 verbose = 1;
4662 #endif
4663 /* validate container capabilities */
4664 if (super->orom && raiddisks > super->orom->tds) {
4665 if (verbose)
4666 fprintf(stderr, Name ": %d exceeds maximum number of"
4667 " platform supported disks: %d\n",
4668 raiddisks, super->orom->tds);
4669 return 0;
4670 }
4671
4672 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4673 if (super->orom && (!is_raid_level_supported(super->orom, level,
4674 raiddisks))) {
4675 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4676 level, raiddisks, raiddisks > 1 ? "s" : "");
4677 return 0;
4678 }
4679 if (super->orom && level != 1) {
4680 if (chunk && (*chunk == 0 || *chunk == UnSet))
4681 *chunk = imsm_orom_default_chunk(super->orom);
4682 else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
4683 pr_vrb(": platform does not support a chunk size of: "
4684 "%d\n", *chunk);
4685 return 0;
4686 }
4687 }
4688 if (layout != imsm_level_to_layout(level)) {
4689 if (level == 5)
4690 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4691 else if (level == 10)
4692 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4693 else
4694 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4695 layout, level);
4696 return 0;
4697 }
4698 return 1;
4699 }
4700
4701 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4702 * FIX ME add ahci details
4703 */
4704 static int validate_geometry_imsm_volume(struct supertype *st, int level,
4705 int layout, int raiddisks, int *chunk,
4706 unsigned long long size, char *dev,
4707 unsigned long long *freesize,
4708 int verbose)
4709 {
4710 struct stat stb;
4711 struct intel_super *super = st->sb;
4712 struct imsm_super *mpb = super->anchor;
4713 struct dl *dl;
4714 unsigned long long pos = 0;
4715 unsigned long long maxsize;
4716 struct extent *e;
4717 int i;
4718
4719 /* We must have the container info already read in. */
4720 if (!super)
4721 return 0;
4722
4723 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
4724 fprintf(stderr, Name ": RAID gemetry validation failed. "
4725 "Cannot proceed with the action(s).\n");
4726 return 0;
4727 }
4728 if (!dev) {
4729 /* General test: make sure there is space for
4730 * 'raiddisks' device extents of size 'size' at a given
4731 * offset
4732 */
4733 unsigned long long minsize = size;
4734 unsigned long long start_offset = MaxSector;
4735 int dcnt = 0;
4736 if (minsize == 0)
4737 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4738 for (dl = super->disks; dl ; dl = dl->next) {
4739 int found = 0;
4740
4741 pos = 0;
4742 i = 0;
4743 e = get_extents(super, dl);
4744 if (!e) continue;
4745 do {
4746 unsigned long long esize;
4747 esize = e[i].start - pos;
4748 if (esize >= minsize)
4749 found = 1;
4750 if (found && start_offset == MaxSector) {
4751 start_offset = pos;
4752 break;
4753 } else if (found && pos != start_offset) {
4754 found = 0;
4755 break;
4756 }
4757 pos = e[i].start + e[i].size;
4758 i++;
4759 } while (e[i-1].size);
4760 if (found)
4761 dcnt++;
4762 free(e);
4763 }
4764 if (dcnt < raiddisks) {
4765 if (verbose)
4766 fprintf(stderr, Name ": imsm: Not enough "
4767 "devices with space for this array "
4768 "(%d < %d)\n",
4769 dcnt, raiddisks);
4770 return 0;
4771 }
4772 return 1;
4773 }
4774
4775 /* This device must be a member of the set */
4776 if (stat(dev, &stb) < 0)
4777 return 0;
4778 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4779 return 0;
4780 for (dl = super->disks ; dl ; dl = dl->next) {
4781 if (dl->major == (int)major(stb.st_rdev) &&
4782 dl->minor == (int)minor(stb.st_rdev))
4783 break;
4784 }
4785 if (!dl) {
4786 if (verbose)
4787 fprintf(stderr, Name ": %s is not in the "
4788 "same imsm set\n", dev);
4789 return 0;
4790 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4791 /* If a volume is present then the current creation attempt
4792 * cannot incorporate new spares because the orom may not
4793 * understand this configuration (all member disks must be
4794 * members of each array in the container).
4795 */
4796 fprintf(stderr, Name ": %s is a spare and a volume"
4797 " is already defined for this container\n", dev);
4798 fprintf(stderr, Name ": The option-rom requires all member"
4799 " disks to be a member of all volumes\n");
4800 return 0;
4801 }
4802
4803 /* retrieve the largest free space block */
4804 e = get_extents(super, dl);
4805 maxsize = 0;
4806 i = 0;
4807 if (e) {
4808 do {
4809 unsigned long long esize;
4810
4811 esize = e[i].start - pos;
4812 if (esize >= maxsize)
4813 maxsize = esize;
4814 pos = e[i].start + e[i].size;
4815 i++;
4816 } while (e[i-1].size);
4817 dl->e = e;
4818 dl->extent_cnt = i;
4819 } else {
4820 if (verbose)
4821 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4822 dev);
4823 return 0;
4824 }
4825 if (maxsize < size) {
4826 if (verbose)
4827 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4828 dev, maxsize, size);
4829 return 0;
4830 }
4831
4832 /* count total number of extents for merge */
4833 i = 0;
4834 for (dl = super->disks; dl; dl = dl->next)
4835 if (dl->e)
4836 i += dl->extent_cnt;
4837
4838 maxsize = merge_extents(super, i);
4839 if (maxsize < size || maxsize == 0) {
4840 if (verbose)
4841 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4842 maxsize, size);
4843 return 0;
4844 }
4845
4846 *freesize = maxsize;
4847
4848 return 1;
4849 }
4850
4851 static int reserve_space(struct supertype *st, int raiddisks,
4852 unsigned long long size, int chunk,
4853 unsigned long long *freesize)
4854 {
4855 struct intel_super *super = st->sb;
4856 struct imsm_super *mpb = super->anchor;
4857 struct dl *dl;
4858 int i;
4859 int extent_cnt;
4860 struct extent *e;
4861 unsigned long long maxsize;
4862 unsigned long long minsize;
4863 int cnt;
4864 int used;
4865
4866 /* find the largest common start free region of the possible disks */
4867 used = 0;
4868 extent_cnt = 0;
4869 cnt = 0;
4870 for (dl = super->disks; dl; dl = dl->next) {
4871 dl->raiddisk = -1;
4872
4873 if (dl->index >= 0)
4874 used++;
4875
4876 /* don't activate new spares if we are orom constrained
4877 * and there is already a volume active in the container
4878 */
4879 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
4880 continue;
4881
4882 e = get_extents(super, dl);
4883 if (!e)
4884 continue;
4885 for (i = 1; e[i-1].size; i++)
4886 ;
4887 dl->e = e;
4888 dl->extent_cnt = i;
4889 extent_cnt += i;
4890 cnt++;
4891 }
4892
4893 maxsize = merge_extents(super, extent_cnt);
4894 minsize = size;
4895 if (size == 0)
4896 /* chunk is in K */
4897 minsize = chunk * 2;
4898
4899 if (cnt < raiddisks ||
4900 (super->orom && used && used != raiddisks) ||
4901 maxsize < minsize ||
4902 maxsize == 0) {
4903 fprintf(stderr, Name ": not enough devices with space to create array.\n");
4904 return 0; /* No enough free spaces large enough */
4905 }
4906
4907 if (size == 0) {
4908 size = maxsize;
4909 if (chunk) {
4910 size /= 2 * chunk;
4911 size *= 2 * chunk;
4912 }
4913 }
4914
4915 cnt = 0;
4916 for (dl = super->disks; dl; dl = dl->next)
4917 if (dl->e)
4918 dl->raiddisk = cnt++;
4919
4920 *freesize = size;
4921
4922 return 1;
4923 }
4924
4925 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
4926 int raiddisks, int *chunk, unsigned long long size,
4927 char *dev, unsigned long long *freesize,
4928 int verbose)
4929 {
4930 int fd, cfd;
4931 struct mdinfo *sra;
4932 int is_member = 0;
4933
4934 /* load capability
4935 * if given unused devices create a container
4936 * if given given devices in a container create a member volume
4937 */
4938 if (level == LEVEL_CONTAINER) {
4939 /* Must be a fresh device to add to a container */
4940 return validate_geometry_imsm_container(st, level, layout,
4941 raiddisks,
4942 chunk?*chunk:0, size,
4943 dev, freesize,
4944 verbose);
4945 }
4946
4947 if (!dev) {
4948 if (st->sb && freesize) {
4949 /* we are being asked to automatically layout a
4950 * new volume based on the current contents of
4951 * the container. If the the parameters can be
4952 * satisfied reserve_space will record the disks,
4953 * start offset, and size of the volume to be
4954 * created. add_to_super and getinfo_super
4955 * detect when autolayout is in progress.
4956 */
4957 if (!validate_geometry_imsm_orom(st->sb, level, layout,
4958 raiddisks, chunk,
4959 verbose))
4960 return 0;
4961 return reserve_space(st, raiddisks, size,
4962 chunk?*chunk:0, freesize);
4963 }
4964 return 1;
4965 }
4966 if (st->sb) {
4967 /* creating in a given container */
4968 return validate_geometry_imsm_volume(st, level, layout,
4969 raiddisks, chunk, size,
4970 dev, freesize, verbose);
4971 }
4972
4973 /* This device needs to be a device in an 'imsm' container */
4974 fd = open(dev, O_RDONLY|O_EXCL, 0);
4975 if (fd >= 0) {
4976 if (verbose)
4977 fprintf(stderr,
4978 Name ": Cannot create this array on device %s\n",
4979 dev);
4980 close(fd);
4981 return 0;
4982 }
4983 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
4984 if (verbose)
4985 fprintf(stderr, Name ": Cannot open %s: %s\n",
4986 dev, strerror(errno));
4987 return 0;
4988 }
4989 /* Well, it is in use by someone, maybe an 'imsm' container. */
4990 cfd = open_container(fd);
4991 close(fd);
4992 if (cfd < 0) {
4993 if (verbose)
4994 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
4995 dev);
4996 return 0;
4997 }
4998 sra = sysfs_read(cfd, 0, GET_VERSION);
4999 if (sra && sra->array.major_version == -1 &&
5000 strcmp(sra->text_version, "imsm") == 0)
5001 is_member = 1;
5002 sysfs_free(sra);
5003 if (is_member) {
5004 /* This is a member of a imsm container. Load the container
5005 * and try to create a volume
5006 */
5007 struct intel_super *super;
5008
5009 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
5010 st->sb = super;
5011 st->container_dev = fd2devnum(cfd);
5012 close(cfd);
5013 return validate_geometry_imsm_volume(st, level, layout,
5014 raiddisks, chunk,
5015 size, dev,
5016 freesize, verbose);
5017 }
5018 }
5019
5020 if (verbose)
5021 fprintf(stderr, Name ": failed container membership check\n");
5022
5023 close(cfd);
5024 return 0;
5025 }
5026
5027 static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
5028 {
5029 struct intel_super *super = st->sb;
5030
5031 if (level && *level == UnSet)
5032 *level = LEVEL_CONTAINER;
5033
5034 if (level && layout && *layout == UnSet)
5035 *layout = imsm_level_to_layout(*level);
5036
5037 if (chunk && (*chunk == UnSet || *chunk == 0) &&
5038 super && super->orom)
5039 *chunk = imsm_orom_default_chunk(super->orom);
5040 }
5041
5042 static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
5043
5044 static int kill_subarray_imsm(struct supertype *st)
5045 {
5046 /* remove the subarray currently referenced by ->current_vol */
5047 __u8 i;
5048 struct intel_dev **dp;
5049 struct intel_super *super = st->sb;
5050 __u8 current_vol = super->current_vol;
5051 struct imsm_super *mpb = super->anchor;
5052
5053 if (super->current_vol < 0)
5054 return 2;
5055 super->current_vol = -1; /* invalidate subarray cursor */
5056
5057 /* block deletions that would change the uuid of active subarrays
5058 *
5059 * FIXME when immutable ids are available, but note that we'll
5060 * also need to fixup the invalidated/active subarray indexes in
5061 * mdstat
5062 */
5063 for (i = 0; i < mpb->num_raid_devs; i++) {
5064 char subarray[4];
5065
5066 if (i < current_vol)
5067 continue;
5068 sprintf(subarray, "%u", i);
5069 if (is_subarray_active(subarray, st->devname)) {
5070 fprintf(stderr,
5071 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
5072 current_vol, i);
5073
5074 return 2;
5075 }
5076 }
5077
5078 if (st->update_tail) {
5079 struct imsm_update_kill_array *u = malloc(sizeof(*u));
5080
5081 if (!u)
5082 return 2;
5083 u->type = update_kill_array;
5084 u->dev_idx = current_vol;
5085 append_metadata_update(st, u, sizeof(*u));
5086
5087 return 0;
5088 }
5089
5090 for (dp = &super->devlist; *dp;)
5091 if ((*dp)->index == current_vol) {
5092 *dp = (*dp)->next;
5093 } else {
5094 handle_missing(super, (*dp)->dev);
5095 if ((*dp)->index > current_vol)
5096 (*dp)->index--;
5097 dp = &(*dp)->next;
5098 }
5099
5100 /* no more raid devices, all active components are now spares,
5101 * but of course failed are still failed
5102 */
5103 if (--mpb->num_raid_devs == 0) {
5104 struct dl *d;
5105
5106 for (d = super->disks; d; d = d->next)
5107 if (d->index > -2) {
5108 d->index = -1;
5109 d->disk.status = SPARE_DISK;
5110 }
5111 }
5112
5113 super->updates_pending++;
5114
5115 return 0;
5116 }
5117
5118 static int update_subarray_imsm(struct supertype *st, char *subarray,
5119 char *update, struct mddev_ident *ident)
5120 {
5121 /* update the subarray currently referenced by ->current_vol */
5122 struct intel_super *super = st->sb;
5123 struct imsm_super *mpb = super->anchor;
5124
5125 if (strcmp(update, "name") == 0) {
5126 char *name = ident->name;
5127 char *ep;
5128 int vol;
5129
5130 if (is_subarray_active(subarray, st->devname)) {
5131 fprintf(stderr,
5132 Name ": Unable to update name of active subarray\n");
5133 return 2;
5134 }
5135
5136 if (!check_name(super, name, 0))
5137 return 2;
5138
5139 vol = strtoul(subarray, &ep, 10);
5140 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
5141 return 2;
5142
5143 if (st->update_tail) {
5144 struct imsm_update_rename_array *u = malloc(sizeof(*u));
5145
5146 if (!u)
5147 return 2;
5148 u->type = update_rename_array;
5149 u->dev_idx = vol;
5150 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
5151 append_metadata_update(st, u, sizeof(*u));
5152 } else {
5153 struct imsm_dev *dev;
5154 int i;
5155
5156 dev = get_imsm_dev(super, vol);
5157 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
5158 for (i = 0; i < mpb->num_raid_devs; i++) {
5159 dev = get_imsm_dev(super, i);
5160 handle_missing(super, dev);
5161 }
5162 super->updates_pending++;
5163 }
5164 } else
5165 return 2;
5166
5167 return 0;
5168 }
5169
5170 static int is_gen_migration(struct imsm_dev *dev)
5171 {
5172 if (!dev->vol.migr_state)
5173 return 0;
5174
5175 if (migr_type(dev) == MIGR_GEN_MIGR)
5176 return 1;
5177
5178 return 0;
5179 }
5180 #endif /* MDASSEMBLE */
5181
5182 static int is_rebuilding(struct imsm_dev *dev)
5183 {
5184 struct imsm_map *migr_map;
5185
5186 if (!dev->vol.migr_state)
5187 return 0;
5188
5189 if (migr_type(dev) != MIGR_REBUILD)
5190 return 0;
5191
5192 migr_map = get_imsm_map(dev, 1);
5193
5194 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
5195 return 1;
5196 else
5197 return 0;
5198 }
5199
5200 static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
5201 {
5202 struct mdinfo *rebuild = NULL;
5203 struct mdinfo *d;
5204 __u32 units;
5205
5206 if (!is_rebuilding(dev))
5207 return;
5208
5209 /* Find the rebuild target, but punt on the dual rebuild case */
5210 for (d = array->devs; d; d = d->next)
5211 if (d->recovery_start == 0) {
5212 if (rebuild)
5213 return;
5214 rebuild = d;
5215 }
5216
5217 if (!rebuild) {
5218 /* (?) none of the disks are marked with
5219 * IMSM_ORD_REBUILD, so assume they are missing and the
5220 * disk_ord_tbl was not correctly updated
5221 */
5222 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
5223 return;
5224 }
5225
5226 units = __le32_to_cpu(dev->vol.curr_migr_unit);
5227 rebuild->recovery_start = units * blocks_per_migr_unit(dev);
5228 }
5229
5230
5231 static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
5232 {
5233 /* Given a container loaded by load_super_imsm_all,
5234 * extract information about all the arrays into
5235 * an mdinfo tree.
5236 * If 'subarray' is given, just extract info about that array.
5237 *
5238 * For each imsm_dev create an mdinfo, fill it in,
5239 * then look for matching devices in super->disks
5240 * and create appropriate device mdinfo.
5241 */
5242 struct intel_super *super = st->sb;
5243 struct imsm_super *mpb = super->anchor;
5244 struct mdinfo *rest = NULL;
5245 unsigned int i;
5246 int bbm_errors = 0;
5247 struct dl *d;
5248 int spare_disks = 0;
5249
5250 /* check for bad blocks */
5251 if (imsm_bbm_log_size(super->anchor))
5252 bbm_errors = 1;
5253
5254 /* count spare devices, not used in maps
5255 */
5256 for (d = super->disks; d; d = d->next)
5257 if (d->index == -1)
5258 spare_disks++;
5259
5260 for (i = 0; i < mpb->num_raid_devs; i++) {
5261 struct imsm_dev *dev;
5262 struct imsm_map *map;
5263 struct imsm_map *map2;
5264 struct mdinfo *this;
5265 int slot, chunk;
5266 char *ep;
5267
5268 if (subarray &&
5269 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
5270 continue;
5271
5272 dev = get_imsm_dev(super, i);
5273 map = get_imsm_map(dev, 0);
5274 map2 = get_imsm_map(dev, 1);
5275
5276 /* do not publish arrays that are in the middle of an
5277 * unsupported migration
5278 */
5279 if (dev->vol.migr_state &&
5280 (migr_type(dev) == MIGR_STATE_CHANGE)) {
5281 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
5282 " unsupported migration in progress\n",
5283 dev->volume);
5284 continue;
5285 }
5286 /* do not publish arrays that are not support by controller's
5287 * OROM/EFI
5288 */
5289
5290 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
5291 #ifndef MDASSEMBLE
5292 if (!validate_geometry_imsm_orom(super,
5293 get_imsm_raid_level(map), /* RAID level */
5294 imsm_level_to_layout(get_imsm_raid_level(map)),
5295 map->num_members, /* raid disks */
5296 &chunk,
5297 1 /* verbose */)) {
5298 fprintf(stderr, Name ": RAID gemetry validation failed. "
5299 "Cannot proceed with the action(s).\n");
5300 continue;
5301 }
5302 #endif /* MDASSEMBLE */
5303 this = malloc(sizeof(*this));
5304 if (!this) {
5305 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
5306 sizeof(*this));
5307 break;
5308 }
5309 memset(this, 0, sizeof(*this));
5310 this->next = rest;
5311
5312 super->current_vol = i;
5313 getinfo_super_imsm_volume(st, this, NULL);
5314 for (slot = 0 ; slot < map->num_members; slot++) {
5315 unsigned long long recovery_start;
5316 struct mdinfo *info_d;
5317 struct dl *d;
5318 int idx;
5319 int skip;
5320 __u32 ord;
5321
5322 skip = 0;
5323 idx = get_imsm_disk_idx(dev, slot, 0);
5324 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
5325 for (d = super->disks; d ; d = d->next)
5326 if (d->index == idx)
5327 break;
5328
5329 recovery_start = MaxSector;
5330 if (d == NULL)
5331 skip = 1;
5332 if (d && is_failed(&d->disk))
5333 skip = 1;
5334 if (ord & IMSM_ORD_REBUILD)
5335 recovery_start = 0;
5336
5337 /*
5338 * if we skip some disks the array will be assmebled degraded;
5339 * reset resync start to avoid a dirty-degraded
5340 * situation when performing the intial sync
5341 *
5342 * FIXME handle dirty degraded
5343 */
5344 if ((skip || recovery_start == 0) && !dev->vol.dirty)
5345 this->resync_start = MaxSector;
5346 if (skip)
5347 continue;
5348
5349 info_d = calloc(1, sizeof(*info_d));
5350 if (!info_d) {
5351 fprintf(stderr, Name ": failed to allocate disk"
5352 " for volume %.16s\n", dev->volume);
5353 info_d = this->devs;
5354 while (info_d) {
5355 struct mdinfo *d = info_d->next;
5356
5357 free(info_d);
5358 info_d = d;
5359 }
5360 free(this);
5361 this = rest;
5362 break;
5363 }
5364 info_d->next = this->devs;
5365 this->devs = info_d;
5366
5367 info_d->disk.number = d->index;
5368 info_d->disk.major = d->major;
5369 info_d->disk.minor = d->minor;
5370 info_d->disk.raid_disk = slot;
5371 info_d->recovery_start = recovery_start;
5372 if (map2) {
5373 if (slot < map2->num_members)
5374 info_d->disk.state = (1 << MD_DISK_ACTIVE);
5375 else
5376 this->array.spare_disks++;
5377 } else {
5378 if (slot < map->num_members)
5379 info_d->disk.state = (1 << MD_DISK_ACTIVE);
5380 else
5381 this->array.spare_disks++;
5382 }
5383 if (info_d->recovery_start == MaxSector)
5384 this->array.working_disks++;
5385
5386 info_d->events = __le32_to_cpu(mpb->generation_num);
5387 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
5388 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
5389 }
5390 /* now that the disk list is up-to-date fixup recovery_start */
5391 update_recovery_start(dev, this);
5392 this->array.spare_disks += spare_disks;
5393 rest = this;
5394 }
5395
5396 /* if array has bad blocks, set suitable bit in array status */
5397 if (bbm_errors)
5398 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
5399
5400 return rest;
5401 }
5402
5403
5404 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
5405 {
5406 struct imsm_map *map = get_imsm_map(dev, 0);
5407
5408 if (!failed)
5409 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
5410 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
5411
5412 switch (get_imsm_raid_level(map)) {
5413 case 0:
5414 return IMSM_T_STATE_FAILED;
5415 break;
5416 case 1:
5417 if (failed < map->num_members)
5418 return IMSM_T_STATE_DEGRADED;
5419 else
5420 return IMSM_T_STATE_FAILED;
5421 break;
5422 case 10:
5423 {
5424 /**
5425 * check to see if any mirrors have failed, otherwise we
5426 * are degraded. Even numbered slots are mirrored on
5427 * slot+1
5428 */
5429 int i;
5430 /* gcc -Os complains that this is unused */
5431 int insync = insync;
5432
5433 for (i = 0; i < map->num_members; i++) {
5434 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
5435 int idx = ord_to_idx(ord);
5436 struct imsm_disk *disk;
5437
5438 /* reset the potential in-sync count on even-numbered
5439 * slots. num_copies is always 2 for imsm raid10
5440 */
5441 if ((i & 1) == 0)
5442 insync = 2;
5443
5444 disk = get_imsm_disk(super, idx);
5445 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
5446 insync--;
5447
5448 /* no in-sync disks left in this mirror the
5449 * array has failed
5450 */
5451 if (insync == 0)
5452 return IMSM_T_STATE_FAILED;
5453 }
5454
5455 return IMSM_T_STATE_DEGRADED;
5456 }
5457 case 5:
5458 if (failed < 2)
5459 return IMSM_T_STATE_DEGRADED;
5460 else
5461 return IMSM_T_STATE_FAILED;
5462 break;
5463 default:
5464 break;
5465 }
5466
5467 return map->map_state;
5468 }
5469
5470 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
5471 {
5472 int i;
5473 int failed = 0;
5474 struct imsm_disk *disk;
5475 struct imsm_map *map = get_imsm_map(dev, 0);
5476 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
5477 __u32 ord;
5478 int idx;
5479
5480 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5481 * disks that are being rebuilt. New failures are recorded to
5482 * map[0]. So we look through all the disks we started with and
5483 * see if any failures are still present, or if any new ones
5484 * have arrived
5485 *
5486 * FIXME add support for online capacity expansion and
5487 * raid-level-migration
5488 */
5489 for (i = 0; i < prev->num_members; i++) {
5490 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
5491 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
5492 idx = ord_to_idx(ord);
5493
5494 disk = get_imsm_disk(super, idx);
5495 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
5496 failed++;
5497 }
5498
5499 return failed;
5500 }
5501
5502 #ifndef MDASSEMBLE
5503 static int imsm_open_new(struct supertype *c, struct active_array *a,
5504 char *inst)
5505 {
5506 struct intel_super *super = c->sb;
5507 struct imsm_super *mpb = super->anchor;
5508
5509 if (atoi(inst) >= mpb->num_raid_devs) {
5510 fprintf(stderr, "%s: subarry index %d, out of range\n",
5511 __func__, atoi(inst));
5512 return -ENODEV;
5513 }
5514
5515 dprintf("imsm: open_new %s\n", inst);
5516 a->info.container_member = atoi(inst);
5517 return 0;
5518 }
5519
5520 static int is_resyncing(struct imsm_dev *dev)
5521 {
5522 struct imsm_map *migr_map;
5523
5524 if (!dev->vol.migr_state)
5525 return 0;
5526
5527 if (migr_type(dev) == MIGR_INIT ||
5528 migr_type(dev) == MIGR_REPAIR)
5529 return 1;
5530
5531 if (migr_type(dev) == MIGR_GEN_MIGR)
5532 return 0;
5533
5534 migr_map = get_imsm_map(dev, 1);
5535
5536 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
5537 (dev->vol.migr_type != MIGR_GEN_MIGR))
5538 return 1;
5539 else
5540 return 0;
5541 }
5542
5543 /* return true if we recorded new information */
5544 static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5545 {
5546 __u32 ord;
5547 int slot;
5548 struct imsm_map *map;
5549
5550 /* new failures are always set in map[0] */
5551 map = get_imsm_map(dev, 0);
5552
5553 slot = get_imsm_disk_slot(map, idx);
5554 if (slot < 0)
5555 return 0;
5556
5557 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
5558 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
5559 return 0;
5560
5561 disk->status |= FAILED_DISK;
5562 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
5563 if (map->failed_disk_num == 0xff)
5564 map->failed_disk_num = slot;
5565 return 1;
5566 }
5567
5568 static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5569 {
5570 mark_failure(dev, disk, idx);
5571
5572 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5573 return;
5574
5575 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5576 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5577 }
5578
5579 static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5580 {
5581 __u8 map_state;
5582 struct dl *dl;
5583 int failed;
5584
5585 if (!super->missing)
5586 return;
5587 failed = imsm_count_failed(super, dev);
5588 map_state = imsm_check_degraded(super, dev, failed);
5589
5590 dprintf("imsm: mark missing\n");
5591 end_migration(dev, map_state);
5592 for (dl = super->missing; dl; dl = dl->next)
5593 mark_missing(dev, &dl->disk, dl->index);
5594 super->updates_pending++;
5595 }
5596
5597 static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5598 {
5599 int used_disks = imsm_num_data_members(dev, 0);
5600 unsigned long long array_blocks;
5601 struct imsm_map *map;
5602
5603 if (used_disks == 0) {
5604 /* when problems occures
5605 * return current array_blocks value
5606 */
5607 array_blocks = __le32_to_cpu(dev->size_high);
5608 array_blocks = array_blocks << 32;
5609 array_blocks += __le32_to_cpu(dev->size_low);
5610
5611 return array_blocks;
5612 }
5613
5614 /* set array size in metadata
5615 */
5616 map = get_imsm_map(dev, 0);
5617 array_blocks = map->blocks_per_member * used_disks;
5618
5619 /* round array size down to closest MB
5620 */
5621 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5622 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5623 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5624
5625 return array_blocks;
5626 }
5627
5628 static void imsm_set_disk(struct active_array *a, int n, int state);
5629
5630 static void imsm_progress_container_reshape(struct intel_super *super)
5631 {
5632 /* if no device has a migr_state, but some device has a
5633 * different number of members than the previous device, start
5634 * changing the number of devices in this device to match
5635 * previous.
5636 */
5637 struct imsm_super *mpb = super->anchor;
5638 int prev_disks = -1;
5639 int i;
5640 int copy_map_size;
5641
5642 for (i = 0; i < mpb->num_raid_devs; i++) {
5643 struct imsm_dev *dev = get_imsm_dev(super, i);
5644 struct imsm_map *map = get_imsm_map(dev, 0);
5645 struct imsm_map *map2;
5646 int prev_num_members;
5647
5648 if (dev->vol.migr_state)
5649 return;
5650
5651 if (prev_disks == -1)
5652 prev_disks = map->num_members;
5653 if (prev_disks == map->num_members)
5654 continue;
5655
5656 /* OK, this array needs to enter reshape mode.
5657 * i.e it needs a migr_state
5658 */
5659
5660 copy_map_size = sizeof_imsm_map(map);
5661 prev_num_members = map->num_members;
5662 map->num_members = prev_disks;
5663 dev->vol.migr_state = 1;
5664 dev->vol.curr_migr_unit = 0;
5665 dev->vol.migr_type = MIGR_GEN_MIGR;
5666 for (i = prev_num_members;
5667 i < map->num_members; i++)
5668 set_imsm_ord_tbl_ent(map, i, i);
5669 map2 = get_imsm_map(dev, 1);
5670 /* Copy the current map */
5671 memcpy(map2, map, copy_map_size);
5672 map2->num_members = prev_num_members;
5673
5674 imsm_set_array_size(dev);
5675 super->updates_pending++;
5676 }
5677 }
5678
5679 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5680 * states are handled in imsm_set_disk() with one exception, when a
5681 * resync is stopped due to a new failure this routine will set the
5682 * 'degraded' state for the array.
5683 */
5684 static int imsm_set_array_state(struct active_array *a, int consistent)
5685 {
5686 int inst = a->info.container_member;
5687 struct intel_super *super = a->container->sb;
5688 struct imsm_dev *dev = get_imsm_dev(super, inst);
5689 struct imsm_map *map = get_imsm_map(dev, 0);
5690 int failed = imsm_count_failed(super, dev);
5691 __u8 map_state = imsm_check_degraded(super, dev, failed);
5692 __u32 blocks_per_unit;
5693
5694 if (dev->vol.migr_state &&
5695 dev->vol.migr_type == MIGR_GEN_MIGR) {
5696 /* array state change is blocked due to reshape action
5697 * We might need to
5698 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5699 * - finish the reshape (if last_checkpoint is big and action != reshape)
5700 * - update curr_migr_unit
5701 */
5702 if (a->curr_action == reshape) {
5703 /* still reshaping, maybe update curr_migr_unit */
5704 goto mark_checkpoint;
5705 } else {
5706 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5707 /* for some reason we aborted the reshape.
5708 * Better clean up
5709 */
5710 struct imsm_map *map2 = get_imsm_map(dev, 1);
5711 dev->vol.migr_state = 0;
5712 dev->vol.migr_type = 0;
5713 dev->vol.curr_migr_unit = 0;
5714 memcpy(map, map2, sizeof_imsm_map(map2));
5715 super->updates_pending++;
5716 }
5717 if (a->last_checkpoint >= a->info.component_size) {
5718 unsigned long long array_blocks;
5719 int used_disks;
5720 struct mdinfo *mdi;
5721
5722 used_disks = imsm_num_data_members(dev, 0);
5723 if (used_disks > 0) {
5724 array_blocks =
5725 map->blocks_per_member *
5726 used_disks;
5727 /* round array size down to closest MB
5728 */
5729 array_blocks = (array_blocks
5730 >> SECT_PER_MB_SHIFT)
5731 << SECT_PER_MB_SHIFT;
5732 a->info.custom_array_size = array_blocks;
5733 /* encourage manager to update array
5734 * size
5735 */
5736
5737 a->check_reshape = 1;
5738 }
5739 /* finalize online capacity expansion/reshape */
5740 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5741 imsm_set_disk(a,
5742 mdi->disk.raid_disk,
5743 mdi->curr_state);
5744
5745 imsm_progress_container_reshape(super);
5746 }
5747 }
5748 }
5749
5750 /* before we activate this array handle any missing disks */
5751 if (consistent == 2)
5752 handle_missing(super, dev);
5753
5754 if (consistent == 2 &&
5755 (!is_resync_complete(&a->info) ||
5756 map_state != IMSM_T_STATE_NORMAL ||
5757 dev->vol.migr_state))
5758 consistent = 0;
5759
5760 if (is_resync_complete(&a->info)) {
5761 /* complete intialization / resync,
5762 * recovery and interrupted recovery is completed in
5763 * ->set_disk
5764 */
5765 if (is_resyncing(dev)) {
5766 dprintf("imsm: mark resync done\n");
5767 end_migration(dev, map_state);
5768 super->updates_pending++;
5769 a->last_checkpoint = 0;
5770 }
5771 } else if (!is_resyncing(dev) && !failed) {
5772 /* mark the start of the init process if nothing is failed */
5773 dprintf("imsm: mark resync start\n");
5774 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
5775 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
5776 else
5777 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
5778 super->updates_pending++;
5779 }
5780
5781 mark_checkpoint:
5782 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5783 blocks_per_unit = blocks_per_migr_unit(dev);
5784 if (blocks_per_unit) {
5785 __u32 units32;
5786 __u64 units;
5787
5788 units = a->last_checkpoint / blocks_per_unit;
5789 units32 = units;
5790
5791 /* check that we did not overflow 32-bits, and that
5792 * curr_migr_unit needs updating
5793 */
5794 if (units32 == units &&
5795 units32 != 0 &&
5796 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5797 dprintf("imsm: mark checkpoint (%u)\n", units32);
5798 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5799 super->updates_pending++;
5800 }
5801 }
5802
5803 /* mark dirty / clean */
5804 if (dev->vol.dirty != !consistent) {
5805 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
5806 if (consistent)
5807 dev->vol.dirty = 0;
5808 else
5809 dev->vol.dirty = 1;
5810 super->updates_pending++;
5811 }
5812
5813 return consistent;
5814 }
5815
5816 static void imsm_set_disk(struct active_array *a, int n, int state)
5817 {
5818 int inst = a->info.container_member;
5819 struct intel_super *super = a->container->sb;
5820 struct imsm_dev *dev = get_imsm_dev(super, inst);
5821 struct imsm_map *map = get_imsm_map(dev, 0);
5822 struct imsm_disk *disk;
5823 int failed;
5824 __u32 ord;
5825 __u8 map_state;
5826
5827 if (n > map->num_members)
5828 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5829 n, map->num_members - 1);
5830
5831 if (n < 0)
5832 return;
5833
5834 dprintf("imsm: set_disk %d:%x\n", n, state);
5835
5836 ord = get_imsm_ord_tbl_ent(dev, n, -1);
5837 disk = get_imsm_disk(super, ord_to_idx(ord));
5838
5839 /* check for new failures */
5840 if (state & DS_FAULTY) {
5841 if (mark_failure(dev, disk, ord_to_idx(ord)))
5842 super->updates_pending++;
5843 }
5844
5845 /* check if in_sync */
5846 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
5847 struct imsm_map *migr_map = get_imsm_map(dev, 1);
5848
5849 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
5850 super->updates_pending++;
5851 }
5852
5853 failed = imsm_count_failed(super, dev);
5854 map_state = imsm_check_degraded(super, dev, failed);
5855
5856 /* check if recovery complete, newly degraded, or failed */
5857 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
5858 end_migration(dev, map_state);
5859 map = get_imsm_map(dev, 0);
5860 map->failed_disk_num = ~0;
5861 super->updates_pending++;
5862 a->last_checkpoint = 0;
5863 } else if (map_state == IMSM_T_STATE_DEGRADED &&
5864 map->map_state != map_state &&
5865 !dev->vol.migr_state) {
5866 dprintf("imsm: mark degraded\n");
5867 map->map_state = map_state;
5868 super->updates_pending++;
5869 a->last_checkpoint = 0;
5870 } else if (map_state == IMSM_T_STATE_FAILED &&
5871 map->map_state != map_state) {
5872 dprintf("imsm: mark failed\n");
5873 end_migration(dev, map_state);
5874 super->updates_pending++;
5875 a->last_checkpoint = 0;
5876 } else if (is_gen_migration(dev)) {
5877 dprintf("imsm: Detected General Migration in state: ");
5878 if (map_state == IMSM_T_STATE_NORMAL) {
5879 end_migration(dev, map_state);
5880 map = get_imsm_map(dev, 0);
5881 map->failed_disk_num = ~0;
5882 dprintf("normal\n");
5883 } else {
5884 if (map_state == IMSM_T_STATE_DEGRADED) {
5885 printf("degraded\n");
5886 end_migration(dev, map_state);
5887 } else {
5888 dprintf("failed\n");
5889 }
5890 map->map_state = map_state;
5891 }
5892 super->updates_pending++;
5893 }
5894 }
5895
5896 static int store_imsm_mpb(int fd, struct imsm_super *mpb)
5897 {
5898 void *buf = mpb;
5899 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
5900 unsigned long long dsize;
5901 unsigned long long sectors;
5902
5903 get_dev_size(fd, NULL, &dsize);
5904
5905 if (mpb_size > 512) {
5906 /* -1 to account for anchor */
5907 sectors = mpb_sectors(mpb) - 1;
5908
5909 /* write the extended mpb to the sectors preceeding the anchor */
5910 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
5911 return 1;
5912
5913 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
5914 != 512 * sectors)
5915 return 1;
5916 }
5917
5918 /* first block is stored on second to last sector of the disk */
5919 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
5920 return 1;
5921
5922 if (write(fd, buf, 512) != 512)
5923 return 1;
5924
5925 return 0;
5926 }
5927
5928 static void imsm_sync_metadata(struct supertype *container)
5929 {
5930 struct intel_super *super = container->sb;
5931
5932 dprintf("sync metadata: %d\n", super->updates_pending);
5933 if (!super->updates_pending)
5934 return;
5935
5936 write_super_imsm(container, 0);
5937
5938 super->updates_pending = 0;
5939 }
5940
5941 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
5942 {
5943 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
5944 int i = get_imsm_disk_idx(dev, idx, -1);
5945 struct dl *dl;
5946
5947 for (dl = super->disks; dl; dl = dl->next)
5948 if (dl->index == i)
5949 break;
5950
5951 if (dl && is_failed(&dl->disk))
5952 dl = NULL;
5953
5954 if (dl)
5955 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
5956
5957 return dl;
5958 }
5959
5960 static struct dl *imsm_add_spare(struct intel_super *super, int slot,
5961 struct active_array *a, int activate_new,
5962 struct mdinfo *additional_test_list)
5963 {
5964 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
5965 int idx = get_imsm_disk_idx(dev, slot, -1);
5966 struct imsm_super *mpb = super->anchor;
5967 struct imsm_map *map;
5968 unsigned long long pos;
5969 struct mdinfo *d;
5970 struct extent *ex;
5971 int i, j;
5972 int found;
5973 __u32 array_start = 0;
5974 __u32 array_end = 0;
5975 struct dl *dl;
5976 struct mdinfo *test_list;
5977
5978 for (dl = super->disks; dl; dl = dl->next) {
5979 /* If in this array, skip */
5980 for (d = a->info.devs ; d ; d = d->next)
5981 if (d->state_fd >= 0 &&
5982 d->disk.major == dl->major &&
5983 d->disk.minor == dl->minor) {
5984 dprintf("%x:%x already in array\n",
5985 dl->major, dl->minor);
5986 break;
5987 }
5988 if (d)
5989 continue;
5990 test_list = additional_test_list;
5991 while (test_list) {
5992 if (test_list->disk.major == dl->major &&
5993 test_list->disk.minor == dl->minor) {
5994 dprintf("%x:%x already in additional test list\n",
5995 dl->major, dl->minor);
5996 break;
5997 }
5998 test_list = test_list->next;
5999 }
6000 if (test_list)
6001 continue;
6002
6003 /* skip in use or failed drives */
6004 if (is_failed(&dl->disk) || idx == dl->index ||
6005 dl->index == -2) {
6006 dprintf("%x:%x status (failed: %d index: %d)\n",
6007 dl->major, dl->minor, is_failed(&dl->disk), idx);
6008 continue;
6009 }
6010
6011 /* skip pure spares when we are looking for partially
6012 * assimilated drives
6013 */
6014 if (dl->index == -1 && !activate_new)
6015 continue;
6016
6017 /* Does this unused device have the requisite free space?
6018 * It needs to be able to cover all member volumes
6019 */
6020 ex = get_extents(super, dl);
6021 if (!ex) {
6022 dprintf("cannot get extents\n");
6023 continue;
6024 }
6025 for (i = 0; i < mpb->num_raid_devs; i++) {
6026 dev = get_imsm_dev(super, i);
6027 map = get_imsm_map(dev, 0);
6028
6029 /* check if this disk is already a member of
6030 * this array
6031 */
6032 if (get_imsm_disk_slot(map, dl->index) >= 0)
6033 continue;
6034
6035 found = 0;
6036 j = 0;
6037 pos = 0;
6038 array_start = __le32_to_cpu(map->pba_of_lba0);
6039 array_end = array_start +
6040 __le32_to_cpu(map->blocks_per_member) - 1;
6041
6042 do {
6043 /* check that we can start at pba_of_lba0 with
6044 * blocks_per_member of space
6045 */
6046 if (array_start >= pos && array_end < ex[j].start) {
6047 found = 1;
6048 break;
6049 }
6050 pos = ex[j].start + ex[j].size;
6051 j++;
6052 } while (ex[j-1].size);
6053
6054 if (!found)
6055 break;
6056 }
6057
6058 free(ex);
6059 if (i < mpb->num_raid_devs) {
6060 dprintf("%x:%x does not have %u to %u available\n",
6061 dl->major, dl->minor, array_start, array_end);
6062 /* No room */
6063 continue;
6064 }
6065 return dl;
6066 }
6067
6068 return dl;
6069 }
6070
6071
6072 static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
6073 {
6074 struct imsm_dev *dev2;
6075 struct imsm_map *map;
6076 struct dl *idisk;
6077 int slot;
6078 int idx;
6079 __u8 state;
6080
6081 dev2 = get_imsm_dev(cont->sb, dev_idx);
6082 if (dev2) {
6083 state = imsm_check_degraded(cont->sb, dev2, failed);
6084 if (state == IMSM_T_STATE_FAILED) {
6085 map = get_imsm_map(dev2, 0);
6086 if (!map)
6087 return 1;
6088 for (slot = 0; slot < map->num_members; slot++) {
6089 /*
6090 * Check if failed disks are deleted from intel
6091 * disk list or are marked to be deleted
6092 */
6093 idx = get_imsm_disk_idx(dev2, slot, -1);
6094 idisk = get_imsm_dl_disk(cont->sb, idx);
6095 /*
6096 * Do not rebuild the array if failed disks
6097 * from failed sub-array are not removed from
6098 * container.
6099 */
6100 if (idisk &&
6101 is_failed(&idisk->disk) &&
6102 (idisk->action != DISK_REMOVE))
6103 return 0;
6104 }
6105 }
6106 }
6107 return 1;
6108 }
6109
6110 static struct mdinfo *imsm_activate_spare(struct active_array *a,
6111 struct metadata_update **updates)
6112 {
6113 /**
6114 * Find a device with unused free space and use it to replace a
6115 * failed/vacant region in an array. We replace failed regions one a
6116 * array at a time. The result is that a new spare disk will be added
6117 * to the first failed array and after the monitor has finished
6118 * propagating failures the remainder will be consumed.
6119 *
6120 * FIXME add a capability for mdmon to request spares from another
6121 * container.
6122 */
6123
6124 struct intel_super *super = a->container->sb;
6125 int inst = a->info.container_member;
6126 struct imsm_dev *dev = get_imsm_dev(super, inst);
6127 struct imsm_map *map = get_imsm_map(dev, 0);
6128 int failed = a->info.array.raid_disks;
6129 struct mdinfo *rv = NULL;
6130 struct mdinfo *d;
6131 struct mdinfo *di;
6132 struct metadata_update *mu;
6133 struct dl *dl;
6134 struct imsm_update_activate_spare *u;
6135 int num_spares = 0;
6136 int i;
6137 int allowed;
6138
6139 for (d = a->info.devs ; d ; d = d->next) {
6140 if ((d->curr_state & DS_FAULTY) &&
6141 d->state_fd >= 0)
6142 /* wait for Removal to happen */
6143 return NULL;
6144 if (d->state_fd >= 0)
6145 failed--;
6146 }
6147
6148 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6149 inst, failed, a->info.array.raid_disks, a->info.array.level);
6150
6151 if (dev->vol.migr_state &&
6152 dev->vol.migr_type == MIGR_GEN_MIGR)
6153 /* No repair during migration */
6154 return NULL;
6155
6156 if (a->info.array.level == 4)
6157 /* No repair for takeovered array
6158 * imsm doesn't support raid4
6159 */
6160 return NULL;
6161
6162 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
6163 return NULL;
6164
6165 /*
6166 * If there are any failed disks check state of the other volume.
6167 * Block rebuild if the another one is failed until failed disks
6168 * are removed from container.
6169 */
6170 if (failed) {
6171 dprintf("found failed disks in %s, check if there another"
6172 "failed sub-array.\n",
6173 dev->volume);
6174 /* check if states of the other volumes allow for rebuild */
6175 for (i = 0; i < super->anchor->num_raid_devs; i++) {
6176 if (i != inst) {
6177 allowed = imsm_rebuild_allowed(a->container,
6178 i, failed);
6179 if (!allowed)
6180 return NULL;
6181 }
6182 }
6183 }
6184
6185 /* For each slot, if it is not working, find a spare */
6186 for (i = 0; i < a->info.array.raid_disks; i++) {
6187 for (d = a->info.devs ; d ; d = d->next)
6188 if (d->disk.raid_disk == i)
6189 break;
6190 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
6191 if (d && (d->state_fd >= 0))
6192 continue;
6193
6194 /*
6195 * OK, this device needs recovery. Try to re-add the
6196 * previous occupant of this slot, if this fails see if
6197 * we can continue the assimilation of a spare that was
6198 * partially assimilated, finally try to activate a new
6199 * spare.
6200 */
6201 dl = imsm_readd(super, i, a);
6202 if (!dl)
6203 dl = imsm_add_spare(super, i, a, 0, NULL);
6204 if (!dl)
6205 dl = imsm_add_spare(super, i, a, 1, NULL);
6206 if (!dl)
6207 continue;
6208
6209 /* found a usable disk with enough space */
6210 di = malloc(sizeof(*di));
6211 if (!di)
6212 continue;
6213 memset(di, 0, sizeof(*di));
6214
6215 /* dl->index will be -1 in the case we are activating a
6216 * pristine spare. imsm_process_update() will create a
6217 * new index in this case. Once a disk is found to be
6218 * failed in all member arrays it is kicked from the
6219 * metadata
6220 */
6221 di->disk.number = dl->index;
6222
6223 /* (ab)use di->devs to store a pointer to the device
6224 * we chose
6225 */
6226 di->devs = (struct mdinfo *) dl;
6227
6228 di->disk.raid_disk = i;
6229 di->disk.major = dl->major;
6230 di->disk.minor = dl->minor;
6231 di->disk.state = 0;
6232 di->recovery_start = 0;
6233 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
6234 di->component_size = a->info.component_size;
6235 di->container_member = inst;
6236 super->random = random32();
6237 di->next = rv;
6238 rv = di;
6239 num_spares++;
6240 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
6241 i, di->data_offset);
6242
6243 break;
6244 }
6245
6246 if (!rv)
6247 /* No spares found */
6248 return rv;
6249 /* Now 'rv' has a list of devices to return.
6250 * Create a metadata_update record to update the
6251 * disk_ord_tbl for the array
6252 */
6253 mu = malloc(sizeof(*mu));
6254 if (mu) {
6255 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
6256 if (mu->buf == NULL) {
6257 free(mu);
6258 mu = NULL;
6259 }
6260 }
6261 if (!mu) {
6262 while (rv) {
6263 struct mdinfo *n = rv->next;
6264
6265 free(rv);
6266 rv = n;
6267 }
6268 return NULL;
6269 }
6270
6271 mu->space = NULL;
6272 mu->space_list = NULL;
6273 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
6274 mu->next = *updates;
6275 u = (struct imsm_update_activate_spare *) mu->buf;
6276
6277 for (di = rv ; di ; di = di->next) {
6278 u->type = update_activate_spare;
6279 u->dl = (struct dl *) di->devs;
6280 di->devs = NULL;
6281 u->slot = di->disk.raid_disk;
6282 u->array = inst;
6283 u->next = u + 1;
6284 u++;
6285 }
6286 (u-1)->next = NULL;
6287 *updates = mu;
6288
6289 return rv;
6290 }
6291
6292 static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
6293 {
6294 struct imsm_dev *dev = get_imsm_dev(super, idx);
6295 struct imsm_map *map = get_imsm_map(dev, 0);
6296 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
6297 struct disk_info *inf = get_disk_info(u);
6298 struct imsm_disk *disk;
6299 int i;
6300 int j;
6301
6302 for (i = 0; i < map->num_members; i++) {
6303 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
6304 for (j = 0; j < new_map->num_members; j++)
6305 if (serialcmp(disk->serial, inf[j].serial) == 0)
6306 return 1;
6307 }
6308
6309 return 0;
6310 }
6311
6312
6313 static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
6314 {
6315 struct dl *dl = NULL;
6316 for (dl = super->disks; dl; dl = dl->next)
6317 if ((dl->major == major) && (dl->minor == minor))
6318 return dl;
6319 return NULL;
6320 }
6321
6322 static int remove_disk_super(struct intel_super *super, int major, int minor)
6323 {
6324 struct dl *prev = NULL;
6325 struct dl *dl;
6326
6327 prev = NULL;
6328 for (dl = super->disks; dl; dl = dl->next) {
6329 if ((dl->major == major) && (dl->minor == minor)) {
6330 /* remove */
6331 if (prev)
6332 prev->next = dl->next;
6333 else
6334 super->disks = dl->next;
6335 dl->next = NULL;
6336 __free_imsm_disk(dl);
6337 dprintf("%s: removed %x:%x\n",
6338 __func__, major, minor);
6339 break;
6340 }
6341 prev = dl;
6342 }
6343 return 0;
6344 }
6345
6346 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
6347
6348 static int add_remove_disk_update(struct intel_super *super)
6349 {
6350 int check_degraded = 0;
6351 struct dl *disk = NULL;
6352 /* add/remove some spares to/from the metadata/contrainer */
6353 while (super->disk_mgmt_list) {
6354 struct dl *disk_cfg;
6355
6356 disk_cfg = super->disk_mgmt_list;
6357 super->disk_mgmt_list = disk_cfg->next;
6358 disk_cfg->next = NULL;
6359
6360 if (disk_cfg->action == DISK_ADD) {
6361 disk_cfg->next = super->disks;
6362 super->disks = disk_cfg;
6363 check_degraded = 1;
6364 dprintf("%s: added %x:%x\n",
6365 __func__, disk_cfg->major,
6366 disk_cfg->minor);
6367 } else if (disk_cfg->action == DISK_REMOVE) {
6368 dprintf("Disk remove action processed: %x.%x\n",
6369 disk_cfg->major, disk_cfg->minor);
6370 disk = get_disk_super(super,
6371 disk_cfg->major,
6372 disk_cfg->minor);
6373 if (disk) {
6374 /* store action status */
6375 disk->action = DISK_REMOVE;
6376 /* remove spare disks only */
6377 if (disk->index == -1) {
6378 remove_disk_super(super,
6379 disk_cfg->major,
6380 disk_cfg->minor);
6381 }
6382 }
6383 /* release allocate disk structure */
6384 __free_imsm_disk(disk_cfg);
6385 }
6386 }
6387 return check_degraded;
6388 }
6389
6390
6391 static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
6392 struct intel_super *super,
6393 void ***space_list)
6394 {
6395 struct intel_dev *id;
6396 void **tofree = NULL;
6397 int ret_val = 0;
6398
6399 dprintf("apply_reshape_migration_update()\n");
6400 if ((u->subdev < 0) ||
6401 (u->subdev > 1)) {
6402 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
6403 return ret_val;
6404 }
6405 if ((space_list == NULL) || (*space_list == NULL)) {
6406 dprintf("imsm: Error: Memory is not allocated\n");
6407 return ret_val;
6408 }
6409
6410 for (id = super->devlist ; id; id = id->next) {
6411 if (id->index == (unsigned)u->subdev) {
6412 struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
6413 struct imsm_map *map;
6414 struct imsm_dev *new_dev =
6415 (struct imsm_dev *)*space_list;
6416 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6417 int to_state;
6418 struct dl *new_disk;
6419
6420 if (new_dev == NULL)
6421 return ret_val;
6422 *space_list = **space_list;
6423 memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
6424 map = get_imsm_map(new_dev, 0);
6425 if (migr_map) {
6426 dprintf("imsm: Error: migration in progress");
6427 return ret_val;
6428 }
6429
6430 to_state = map->map_state;
6431 if ((u->new_level == 5) && (map->raid_level == 0)) {
6432 map->num_members++;
6433 /* this should not happen */
6434 if (u->new_disks[0] < 0) {
6435 map->failed_disk_num =
6436 map->num_members - 1;
6437 to_state = IMSM_T_STATE_DEGRADED;
6438 } else
6439 to_state = IMSM_T_STATE_NORMAL;
6440 }
6441 migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
6442 if (u->new_level > -1)
6443 map->raid_level = u->new_level;
6444 migr_map = get_imsm_map(new_dev, 1);
6445 if ((u->new_level == 5) &&
6446 (migr_map->raid_level == 0)) {
6447 int ord = map->num_members - 1;
6448 migr_map->num_members--;
6449 if (u->new_disks[0] < 0)
6450 ord |= IMSM_ORD_REBUILD;
6451 set_imsm_ord_tbl_ent(map,
6452 map->num_members - 1,
6453 ord);
6454 }
6455 id->dev = new_dev;
6456 tofree = (void **)dev;
6457
6458 /* update chunk size
6459 */
6460 if (u->new_chunksize > 0)
6461 map->blocks_per_strip =
6462 __cpu_to_le16(u->new_chunksize * 2);
6463
6464 /* add disk
6465 */
6466 if ((u->new_level != 5) ||
6467 (migr_map->raid_level != 0) ||
6468 (migr_map->raid_level == map->raid_level))
6469 goto skip_disk_add;
6470
6471 if (u->new_disks[0] >= 0) {
6472 /* use passes spare
6473 */
6474 new_disk = get_disk_super(super,
6475 major(u->new_disks[0]),
6476 minor(u->new_disks[0]));
6477 dprintf("imsm: new disk for reshape is: %i:%i "
6478 "(%p, index = %i)\n",
6479 major(u->new_disks[0]),
6480 minor(u->new_disks[0]),
6481 new_disk, new_disk->index);
6482 if (new_disk == NULL)
6483 goto error_disk_add;
6484
6485 new_disk->index = map->num_members - 1;
6486 /* slot to fill in autolayout
6487 */
6488 new_disk->raiddisk = new_disk->index;
6489 new_disk->disk.status |= CONFIGURED_DISK;
6490 new_disk->disk.status &= ~SPARE_DISK;
6491 } else
6492 goto error_disk_add;
6493
6494 skip_disk_add:
6495 *tofree = *space_list;
6496 /* calculate new size
6497 */
6498 imsm_set_array_size(new_dev);
6499
6500 ret_val = 1;
6501 }
6502 }
6503
6504 if (tofree)
6505 *space_list = tofree;
6506 return ret_val;
6507
6508 error_disk_add:
6509 dprintf("Error: imsm: Cannot find disk.\n");
6510 return ret_val;
6511 }
6512
6513
6514 static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
6515 struct intel_super *super,
6516 void ***space_list)
6517 {
6518 struct dl *new_disk;
6519 struct intel_dev *id;
6520 int i;
6521 int delta_disks = u->new_raid_disks - u->old_raid_disks;
6522 int disk_count = u->old_raid_disks;
6523 void **tofree = NULL;
6524 int devices_to_reshape = 1;
6525 struct imsm_super *mpb = super->anchor;
6526 int ret_val = 0;
6527 unsigned int dev_id;
6528
6529 dprintf("imsm: apply_reshape_container_disks_update()\n");
6530
6531 /* enable spares to use in array */
6532 for (i = 0; i < delta_disks; i++) {
6533 new_disk = get_disk_super(super,
6534 major(u->new_disks[i]),
6535 minor(u->new_disks[i]));
6536 dprintf("imsm: new disk for reshape is: %i:%i "
6537 "(%p, index = %i)\n",
6538 major(u->new_disks[i]), minor(u->new_disks[i]),
6539 new_disk, new_disk->index);
6540 if ((new_disk == NULL) ||
6541 ((new_disk->index >= 0) &&
6542 (new_disk->index < u->old_raid_disks)))
6543 goto update_reshape_exit;
6544 new_disk->index = disk_count++;
6545 /* slot to fill in autolayout
6546 */
6547 new_disk->raiddisk = new_disk->index;
6548 new_disk->disk.status |=
6549 CONFIGURED_DISK;
6550 new_disk->disk.status &= ~SPARE_DISK;
6551 }
6552
6553 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6554 mpb->num_raid_devs);
6555 /* manage changes in volume
6556 */
6557 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
6558 void **sp = *space_list;
6559 struct imsm_dev *newdev;
6560 struct imsm_map *newmap, *oldmap;
6561
6562 for (id = super->devlist ; id; id = id->next) {
6563 if (id->index == dev_id)
6564 break;
6565 }
6566 if (id == NULL)
6567 break;
6568 if (!sp)
6569 continue;
6570 *space_list = *sp;
6571 newdev = (void*)sp;
6572 /* Copy the dev, but not (all of) the map */
6573 memcpy(newdev, id->dev, sizeof(*newdev));
6574 oldmap = get_imsm_map(id->dev, 0);
6575 newmap = get_imsm_map(newdev, 0);
6576 /* Copy the current map */
6577 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6578 /* update one device only
6579 */
6580 if (devices_to_reshape) {
6581 dprintf("imsm: modifying subdev: %i\n",
6582 id->index);
6583 devices_to_reshape--;
6584 newdev->vol.migr_state = 1;
6585 newdev->vol.curr_migr_unit = 0;
6586 newdev->vol.migr_type = MIGR_GEN_MIGR;
6587 newmap->num_members = u->new_raid_disks;
6588 for (i = 0; i < delta_disks; i++) {
6589 set_imsm_ord_tbl_ent(newmap,
6590 u->old_raid_disks + i,
6591 u->old_raid_disks + i);
6592 }
6593 /* New map is correct, now need to save old map
6594 */
6595 newmap = get_imsm_map(newdev, 1);
6596 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6597
6598 imsm_set_array_size(newdev);
6599 }
6600
6601 sp = (void **)id->dev;
6602 id->dev = newdev;
6603 *sp = tofree;
6604 tofree = sp;
6605
6606 /* Clear migration record */
6607 memset(super->migr_rec, 0, sizeof(struct migr_record));
6608 }
6609 if (tofree)
6610 *space_list = tofree;
6611 ret_val = 1;
6612
6613 update_reshape_exit:
6614
6615 return ret_val;
6616 }
6617
6618 static int apply_takeover_update(struct imsm_update_takeover *u,
6619 struct intel_super *super,
6620 void ***space_list)
6621 {
6622 struct imsm_dev *dev = NULL;
6623 struct intel_dev *dv;
6624 struct imsm_dev *dev_new;
6625 struct imsm_map *map;
6626 struct dl *dm, *du;
6627 int i;
6628
6629 for (dv = super->devlist; dv; dv = dv->next)
6630 if (dv->index == (unsigned int)u->subarray) {
6631 dev = dv->dev;
6632 break;
6633 }
6634
6635 if (dev == NULL)
6636 return 0;
6637
6638 map = get_imsm_map(dev, 0);
6639
6640 if (u->direction == R10_TO_R0) {
6641 /* Number of failed disks must be half of initial disk number */
6642 if (imsm_count_failed(super, dev) != (map->num_members / 2))
6643 return 0;
6644
6645 /* iterate through devices to mark removed disks as spare */
6646 for (dm = super->disks; dm; dm = dm->next) {
6647 if (dm->disk.status & FAILED_DISK) {
6648 int idx = dm->index;
6649 /* update indexes on the disk list */
6650 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6651 the index values will end up being correct.... NB */
6652 for (du = super->disks; du; du = du->next)
6653 if (du->index > idx)
6654 du->index--;
6655 /* mark as spare disk */
6656 dm->disk.status = SPARE_DISK;
6657 dm->index = -1;
6658 }
6659 }
6660 /* update map */
6661 map->num_members = map->num_members / 2;
6662 map->map_state = IMSM_T_STATE_NORMAL;
6663 map->num_domains = 1;
6664 map->raid_level = 0;
6665 map->failed_disk_num = -1;
6666 }
6667
6668 if (u->direction == R0_TO_R10) {
6669 void **space;
6670 /* update slots in current disk list */
6671 for (dm = super->disks; dm; dm = dm->next) {
6672 if (dm->index >= 0)
6673 dm->index *= 2;
6674 }
6675 /* create new *missing* disks */
6676 for (i = 0; i < map->num_members; i++) {
6677 space = *space_list;
6678 if (!space)
6679 continue;
6680 *space_list = *space;
6681 du = (void *)space;
6682 memcpy(du, super->disks, sizeof(*du));
6683 du->fd = -1;
6684 du->minor = 0;
6685 du->major = 0;
6686 du->index = (i * 2) + 1;
6687 sprintf((char *)du->disk.serial,
6688 " MISSING_%d", du->index);
6689 sprintf((char *)du->serial,
6690 "MISSING_%d", du->index);
6691 du->next = super->missing;
6692 super->missing = du;
6693 }
6694 /* create new dev and map */
6695 space = *space_list;
6696 if (!space)
6697 return 0;
6698 *space_list = *space;
6699 dev_new = (void *)space;
6700 memcpy(dev_new, dev, sizeof(*dev));
6701 /* update new map */
6702 map = get_imsm_map(dev_new, 0);
6703 map->num_members = map->num_members * 2;
6704 map->map_state = IMSM_T_STATE_DEGRADED;
6705 map->num_domains = 2;
6706 map->raid_level = 1;
6707 /* replace dev<->dev_new */
6708 dv->dev = dev_new;
6709 }
6710 /* update disk order table */
6711 for (du = super->disks; du; du = du->next)
6712 if (du->index >= 0)
6713 set_imsm_ord_tbl_ent(map, du->index, du->index);
6714 for (du = super->missing; du; du = du->next)
6715 if (du->index >= 0) {
6716 set_imsm_ord_tbl_ent(map, du->index, du->index);
6717 mark_missing(dev_new, &du->disk, du->index);
6718 }
6719
6720 return 1;
6721 }
6722
6723 static void imsm_process_update(struct supertype *st,
6724 struct metadata_update *update)
6725 {
6726 /**
6727 * crack open the metadata_update envelope to find the update record
6728 * update can be one of:
6729 * update_reshape_container_disks - all the arrays in the container
6730 * are being reshaped to have more devices. We need to mark
6731 * the arrays for general migration and convert selected spares
6732 * into active devices.
6733 * update_activate_spare - a spare device has replaced a failed
6734 * device in an array, update the disk_ord_tbl. If this disk is
6735 * present in all member arrays then also clear the SPARE_DISK
6736 * flag
6737 * update_create_array
6738 * update_kill_array
6739 * update_rename_array
6740 * update_add_remove_disk
6741 */
6742 struct intel_super *super = st->sb;
6743 struct imsm_super *mpb;
6744 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6745
6746 /* update requires a larger buf but the allocation failed */
6747 if (super->next_len && !super->next_buf) {
6748 super->next_len = 0;
6749 return;
6750 }
6751
6752 if (super->next_buf) {
6753 memcpy(super->next_buf, super->buf, super->len);
6754 free(super->buf);
6755 super->len = super->next_len;
6756 super->buf = super->next_buf;
6757
6758 super->next_len = 0;
6759 super->next_buf = NULL;
6760 }
6761
6762 mpb = super->anchor;
6763
6764 switch (type) {
6765 case update_takeover: {
6766 struct imsm_update_takeover *u = (void *)update->buf;
6767 if (apply_takeover_update(u, super, &update->space_list)) {
6768 imsm_update_version_info(super);
6769 super->updates_pending++;
6770 }
6771 break;
6772 }
6773
6774 case update_reshape_container_disks: {
6775 struct imsm_update_reshape *u = (void *)update->buf;
6776 if (apply_reshape_container_disks_update(
6777 u, super, &update->space_list))
6778 super->updates_pending++;
6779 break;
6780 }
6781 case update_reshape_migration: {
6782 struct imsm_update_reshape_migration *u = (void *)update->buf;
6783 if (apply_reshape_migration_update(
6784 u, super, &update->space_list))
6785 super->updates_pending++;
6786 break;
6787 }
6788 case update_activate_spare: {
6789 struct imsm_update_activate_spare *u = (void *) update->buf;
6790 struct imsm_dev *dev = get_imsm_dev(super, u->array);
6791 struct imsm_map *map = get_imsm_map(dev, 0);
6792 struct imsm_map *migr_map;
6793 struct active_array *a;
6794 struct imsm_disk *disk;
6795 __u8 to_state;
6796 struct dl *dl;
6797 unsigned int found;
6798 int failed;
6799 int victim = get_imsm_disk_idx(dev, u->slot, -1);
6800 int i;
6801
6802 for (dl = super->disks; dl; dl = dl->next)
6803 if (dl == u->dl)
6804 break;
6805
6806 if (!dl) {
6807 fprintf(stderr, "error: imsm_activate_spare passed "
6808 "an unknown disk (index: %d)\n",
6809 u->dl->index);
6810 return;
6811 }
6812
6813 super->updates_pending++;
6814 /* count failures (excluding rebuilds and the victim)
6815 * to determine map[0] state
6816 */
6817 failed = 0;
6818 for (i = 0; i < map->num_members; i++) {
6819 if (i == u->slot)
6820 continue;
6821 disk = get_imsm_disk(super,
6822 get_imsm_disk_idx(dev, i, -1));
6823 if (!disk || is_failed(disk))
6824 failed++;
6825 }
6826
6827 /* adding a pristine spare, assign a new index */
6828 if (dl->index < 0) {
6829 dl->index = super->anchor->num_disks;
6830 super->anchor->num_disks++;
6831 }
6832 disk = &dl->disk;
6833 disk->status |= CONFIGURED_DISK;
6834 disk->status &= ~SPARE_DISK;
6835
6836 /* mark rebuild */
6837 to_state = imsm_check_degraded(super, dev, failed);
6838 map->map_state = IMSM_T_STATE_DEGRADED;
6839 migrate(dev, super, to_state, MIGR_REBUILD);
6840 migr_map = get_imsm_map(dev, 1);
6841 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6842 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6843
6844 /* update the family_num to mark a new container
6845 * generation, being careful to record the existing
6846 * family_num in orig_family_num to clean up after
6847 * earlier mdadm versions that neglected to set it.
6848 */
6849 if (mpb->orig_family_num == 0)
6850 mpb->orig_family_num = mpb->family_num;
6851 mpb->family_num += super->random;
6852
6853 /* count arrays using the victim in the metadata */
6854 found = 0;
6855 for (a = st->arrays; a ; a = a->next) {
6856 dev = get_imsm_dev(super, a->info.container_member);
6857 map = get_imsm_map(dev, 0);
6858
6859 if (get_imsm_disk_slot(map, victim) >= 0)
6860 found++;
6861 }
6862
6863 /* delete the victim if it is no longer being
6864 * utilized anywhere
6865 */
6866 if (!found) {
6867 struct dl **dlp;
6868
6869 /* We know that 'manager' isn't touching anything,
6870 * so it is safe to delete
6871 */
6872 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
6873 if ((*dlp)->index == victim)
6874 break;
6875
6876 /* victim may be on the missing list */
6877 if (!*dlp)
6878 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
6879 if ((*dlp)->index == victim)
6880 break;
6881 imsm_delete(super, dlp, victim);
6882 }
6883 break;
6884 }
6885 case update_create_array: {
6886 /* someone wants to create a new array, we need to be aware of
6887 * a few races/collisions:
6888 * 1/ 'Create' called by two separate instances of mdadm
6889 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6890 * devices that have since been assimilated via
6891 * activate_spare.
6892 * In the event this update can not be carried out mdadm will
6893 * (FIX ME) notice that its update did not take hold.
6894 */
6895 struct imsm_update_create_array *u = (void *) update->buf;
6896 struct intel_dev *dv;
6897 struct imsm_dev *dev;
6898 struct imsm_map *map, *new_map;
6899 unsigned long long start, end;
6900 unsigned long long new_start, new_end;
6901 int i;
6902 struct disk_info *inf;
6903 struct dl *dl;
6904
6905 /* handle racing creates: first come first serve */
6906 if (u->dev_idx < mpb->num_raid_devs) {
6907 dprintf("%s: subarray %d already defined\n",
6908 __func__, u->dev_idx);
6909 goto create_error;
6910 }
6911
6912 /* check update is next in sequence */
6913 if (u->dev_idx != mpb->num_raid_devs) {
6914 dprintf("%s: can not create array %d expected index %d\n",
6915 __func__, u->dev_idx, mpb->num_raid_devs);
6916 goto create_error;
6917 }
6918
6919 new_map = get_imsm_map(&u->dev, 0);
6920 new_start = __le32_to_cpu(new_map->pba_of_lba0);
6921 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
6922 inf = get_disk_info(u);
6923
6924 /* handle activate_spare versus create race:
6925 * check to make sure that overlapping arrays do not include
6926 * overalpping disks
6927 */
6928 for (i = 0; i < mpb->num_raid_devs; i++) {
6929 dev = get_imsm_dev(super, i);
6930 map = get_imsm_map(dev, 0);
6931 start = __le32_to_cpu(map->pba_of_lba0);
6932 end = start + __le32_to_cpu(map->blocks_per_member);
6933 if ((new_start >= start && new_start <= end) ||
6934 (start >= new_start && start <= new_end))
6935 /* overlap */;
6936 else
6937 continue;
6938
6939 if (disks_overlap(super, i, u)) {
6940 dprintf("%s: arrays overlap\n", __func__);
6941 goto create_error;
6942 }
6943 }
6944
6945 /* check that prepare update was successful */
6946 if (!update->space) {
6947 dprintf("%s: prepare update failed\n", __func__);
6948 goto create_error;
6949 }
6950
6951 /* check that all disks are still active before committing
6952 * changes. FIXME: could we instead handle this by creating a
6953 * degraded array? That's probably not what the user expects,
6954 * so better to drop this update on the floor.
6955 */
6956 for (i = 0; i < new_map->num_members; i++) {
6957 dl = serial_to_dl(inf[i].serial, super);
6958 if (!dl) {
6959 dprintf("%s: disk disappeared\n", __func__);
6960 goto create_error;
6961 }
6962 }
6963
6964 super->updates_pending++;
6965
6966 /* convert spares to members and fixup ord_tbl */
6967 for (i = 0; i < new_map->num_members; i++) {
6968 dl = serial_to_dl(inf[i].serial, super);
6969 if (dl->index == -1) {
6970 dl->index = mpb->num_disks;
6971 mpb->num_disks++;
6972 dl->disk.status |= CONFIGURED_DISK;
6973 dl->disk.status &= ~SPARE_DISK;
6974 }
6975 set_imsm_ord_tbl_ent(new_map, i, dl->index);
6976 }
6977
6978 dv = update->space;
6979 dev = dv->dev;
6980 update->space = NULL;
6981 imsm_copy_dev(dev, &u->dev);
6982 dv->index = u->dev_idx;
6983 dv->next = super->devlist;
6984 super->devlist = dv;
6985 mpb->num_raid_devs++;
6986
6987 imsm_update_version_info(super);
6988 break;
6989 create_error:
6990 /* mdmon knows how to release update->space, but not
6991 * ((struct intel_dev *) update->space)->dev
6992 */
6993 if (update->space) {
6994 dv = update->space;
6995 free(dv->dev);
6996 }
6997 break;
6998 }
6999 case update_kill_array: {
7000 struct imsm_update_kill_array *u = (void *) update->buf;
7001 int victim = u->dev_idx;
7002 struct active_array *a;
7003 struct intel_dev **dp;
7004 struct imsm_dev *dev;
7005
7006 /* sanity check that we are not affecting the uuid of
7007 * active arrays, or deleting an active array
7008 *
7009 * FIXME when immutable ids are available, but note that
7010 * we'll also need to fixup the invalidated/active
7011 * subarray indexes in mdstat
7012 */
7013 for (a = st->arrays; a; a = a->next)
7014 if (a->info.container_member >= victim)
7015 break;
7016 /* by definition if mdmon is running at least one array
7017 * is active in the container, so checking
7018 * mpb->num_raid_devs is just extra paranoia
7019 */
7020 dev = get_imsm_dev(super, victim);
7021 if (a || !dev || mpb->num_raid_devs == 1) {
7022 dprintf("failed to delete subarray-%d\n", victim);
7023 break;
7024 }
7025
7026 for (dp = &super->devlist; *dp;)
7027 if ((*dp)->index == (unsigned)super->current_vol) {
7028 *dp = (*dp)->next;
7029 } else {
7030 if ((*dp)->index > (unsigned)victim)
7031 (*dp)->index--;
7032 dp = &(*dp)->next;
7033 }
7034 mpb->num_raid_devs--;
7035 super->updates_pending++;
7036 break;
7037 }
7038 case update_rename_array: {
7039 struct imsm_update_rename_array *u = (void *) update->buf;
7040 char name[MAX_RAID_SERIAL_LEN+1];
7041 int target = u->dev_idx;
7042 struct active_array *a;
7043 struct imsm_dev *dev;
7044
7045 /* sanity check that we are not affecting the uuid of
7046 * an active array
7047 */
7048 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
7049 name[MAX_RAID_SERIAL_LEN] = '\0';
7050 for (a = st->arrays; a; a = a->next)
7051 if (a->info.container_member == target)
7052 break;
7053 dev = get_imsm_dev(super, u->dev_idx);
7054 if (a || !dev || !check_name(super, name, 1)) {
7055 dprintf("failed to rename subarray-%d\n", target);
7056 break;
7057 }
7058
7059 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
7060 super->updates_pending++;
7061 break;
7062 }
7063 case update_add_remove_disk: {
7064 /* we may be able to repair some arrays if disks are
7065 * being added, check teh status of add_remove_disk
7066 * if discs has been added.
7067 */
7068 if (add_remove_disk_update(super)) {
7069 struct active_array *a;
7070
7071 super->updates_pending++;
7072 for (a = st->arrays; a; a = a->next)
7073 a->check_degraded = 1;
7074 }
7075 break;
7076 }
7077 default:
7078 fprintf(stderr, "error: unsuported process update type:"
7079 "(type: %d)\n", type);
7080 }
7081 }
7082
7083 static struct mdinfo *get_spares_for_grow(struct supertype *st);
7084
7085 static void imsm_prepare_update(struct supertype *st,
7086 struct metadata_update *update)
7087 {
7088 /**
7089 * Allocate space to hold new disk entries, raid-device entries or a new
7090 * mpb if necessary. The manager synchronously waits for updates to
7091 * complete in the monitor, so new mpb buffers allocated here can be
7092 * integrated by the monitor thread without worrying about live pointers
7093 * in the manager thread.
7094 */
7095 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
7096 struct intel_super *super = st->sb;
7097 struct imsm_super *mpb = super->anchor;
7098 size_t buf_len;
7099 size_t len = 0;
7100
7101 switch (type) {
7102 case update_takeover: {
7103 struct imsm_update_takeover *u = (void *)update->buf;
7104 if (u->direction == R0_TO_R10) {
7105 void **tail = (void **)&update->space_list;
7106 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
7107 struct imsm_map *map = get_imsm_map(dev, 0);
7108 int num_members = map->num_members;
7109 void *space;
7110 int size, i;
7111 int err = 0;
7112 /* allocate memory for added disks */
7113 for (i = 0; i < num_members; i++) {
7114 size = sizeof(struct dl);
7115 space = malloc(size);
7116 if (!space) {
7117 err++;
7118 break;
7119 }
7120 *tail = space;
7121 tail = space;
7122 *tail = NULL;
7123 }
7124 /* allocate memory for new device */
7125 size = sizeof_imsm_dev(super->devlist->dev, 0) +
7126 (num_members * sizeof(__u32));
7127 space = malloc(size);
7128 if (!space)
7129 err++;
7130 else {
7131 *tail = space;
7132 tail = space;
7133 *tail = NULL;
7134 }
7135 if (!err) {
7136 len = disks_to_mpb_size(num_members * 2);
7137 } else {
7138 /* if allocation didn't success, free buffer */
7139 while (update->space_list) {
7140 void **sp = update->space_list;
7141 update->space_list = *sp;
7142 free(sp);
7143 }
7144 }
7145 }
7146
7147 break;
7148 }
7149 case update_reshape_container_disks: {
7150 /* Every raid device in the container is about to
7151 * gain some more devices, and we will enter a
7152 * reconfiguration.
7153 * So each 'imsm_map' will be bigger, and the imsm_vol
7154 * will now hold 2 of them.
7155 * Thus we need new 'struct imsm_dev' allocations sized
7156 * as sizeof_imsm_dev but with more devices in both maps.
7157 */
7158 struct imsm_update_reshape *u = (void *)update->buf;
7159 struct intel_dev *dl;
7160 void **space_tail = (void**)&update->space_list;
7161
7162 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7163
7164 for (dl = super->devlist; dl; dl = dl->next) {
7165 int size = sizeof_imsm_dev(dl->dev, 1);
7166 void *s;
7167 if (u->new_raid_disks > u->old_raid_disks)
7168 size += sizeof(__u32)*2*
7169 (u->new_raid_disks - u->old_raid_disks);
7170 s = malloc(size);
7171 if (!s)
7172 break;
7173 *space_tail = s;
7174 space_tail = s;
7175 *space_tail = NULL;
7176 }
7177
7178 len = disks_to_mpb_size(u->new_raid_disks);
7179 dprintf("New anchor length is %llu\n", (unsigned long long)len);
7180 break;
7181 }
7182 case update_reshape_migration: {
7183 /* for migration level 0->5 we need to add disks
7184 * so the same as for container operation we will copy
7185 * device to the bigger location.
7186 * in memory prepared device and new disk area are prepared
7187 * for usage in process update
7188 */
7189 struct imsm_update_reshape_migration *u = (void *)update->buf;
7190 struct intel_dev *id;
7191 void **space_tail = (void **)&update->space_list;
7192 int size;
7193 void *s;
7194 int current_level = -1;
7195
7196 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7197
7198 /* add space for bigger array in update
7199 */
7200 for (id = super->devlist; id; id = id->next) {
7201 if (id->index == (unsigned)u->subdev) {
7202 size = sizeof_imsm_dev(id->dev, 1);
7203 if (u->new_raid_disks > u->old_raid_disks)
7204 size += sizeof(__u32)*2*
7205 (u->new_raid_disks - u->old_raid_disks);
7206 s = malloc(size);
7207 if (!s)
7208 break;
7209 *space_tail = s;
7210 space_tail = s;
7211 *space_tail = NULL;
7212 break;
7213 }
7214 }
7215 if (update->space_list == NULL)
7216 break;
7217
7218 /* add space for disk in update
7219 */
7220 size = sizeof(struct dl);
7221 s = malloc(size);
7222 if (!s) {
7223 free(update->space_list);
7224 update->space_list = NULL;
7225 break;
7226 }
7227 *space_tail = s;
7228 space_tail = s;
7229 *space_tail = NULL;
7230
7231 /* add spare device to update
7232 */
7233 for (id = super->devlist ; id; id = id->next)
7234 if (id->index == (unsigned)u->subdev) {
7235 struct imsm_dev *dev;
7236 struct imsm_map *map;
7237
7238 dev = get_imsm_dev(super, u->subdev);
7239 map = get_imsm_map(dev, 0);
7240 current_level = map->raid_level;
7241 break;
7242 }
7243 if ((u->new_level == 5) && (u->new_level != current_level)) {
7244 struct mdinfo *spares;
7245
7246 spares = get_spares_for_grow(st);
7247 if (spares) {
7248 struct dl *dl;
7249 struct mdinfo *dev;
7250
7251 dev = spares->devs;
7252 if (dev) {
7253 u->new_disks[0] =
7254 makedev(dev->disk.major,
7255 dev->disk.minor);
7256 dl = get_disk_super(super,
7257 dev->disk.major,
7258 dev->disk.minor);
7259 dl->index = u->old_raid_disks;
7260 dev = dev->next;
7261 }
7262 sysfs_free(spares);
7263 }
7264 }
7265 len = disks_to_mpb_size(u->new_raid_disks);
7266 dprintf("New anchor length is %llu\n", (unsigned long long)len);
7267 break;
7268 }
7269 case update_create_array: {
7270 struct imsm_update_create_array *u = (void *) update->buf;
7271 struct intel_dev *dv;
7272 struct imsm_dev *dev = &u->dev;
7273 struct imsm_map *map = get_imsm_map(dev, 0);
7274 struct dl *dl;
7275 struct disk_info *inf;
7276 int i;
7277 int activate = 0;
7278
7279 inf = get_disk_info(u);
7280 len = sizeof_imsm_dev(dev, 1);
7281 /* allocate a new super->devlist entry */
7282 dv = malloc(sizeof(*dv));
7283 if (dv) {
7284 dv->dev = malloc(len);
7285 if (dv->dev)
7286 update->space = dv;
7287 else {
7288 free(dv);
7289 update->space = NULL;
7290 }
7291 }
7292
7293 /* count how many spares will be converted to members */
7294 for (i = 0; i < map->num_members; i++) {
7295 dl = serial_to_dl(inf[i].serial, super);
7296 if (!dl) {
7297 /* hmm maybe it failed?, nothing we can do about
7298 * it here
7299 */
7300 continue;
7301 }
7302 if (count_memberships(dl, super) == 0)
7303 activate++;
7304 }
7305 len += activate * sizeof(struct imsm_disk);
7306 break;
7307 default:
7308 break;
7309 }
7310 }
7311
7312 /* check if we need a larger metadata buffer */
7313 if (super->next_buf)
7314 buf_len = super->next_len;
7315 else
7316 buf_len = super->len;
7317
7318 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
7319 /* ok we need a larger buf than what is currently allocated
7320 * if this allocation fails process_update will notice that
7321 * ->next_len is set and ->next_buf is NULL
7322 */
7323 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
7324 if (super->next_buf)
7325 free(super->next_buf);
7326
7327 super->next_len = buf_len;
7328 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
7329 memset(super->next_buf, 0, buf_len);
7330 else
7331 super->next_buf = NULL;
7332 }
7333 }
7334
7335 /* must be called while manager is quiesced */
7336 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
7337 {
7338 struct imsm_super *mpb = super->anchor;
7339 struct dl *iter;
7340 struct imsm_dev *dev;
7341 struct imsm_map *map;
7342 int i, j, num_members;
7343 __u32 ord;
7344
7345 dprintf("%s: deleting device[%d] from imsm_super\n",
7346 __func__, index);
7347
7348 /* shift all indexes down one */
7349 for (iter = super->disks; iter; iter = iter->next)
7350 if (iter->index > (int)index)
7351 iter->index--;
7352 for (iter = super->missing; iter; iter = iter->next)
7353 if (iter->index > (int)index)
7354 iter->index--;
7355
7356 for (i = 0; i < mpb->num_raid_devs; i++) {
7357 dev = get_imsm_dev(super, i);
7358 map = get_imsm_map(dev, 0);
7359 num_members = map->num_members;
7360 for (j = 0; j < num_members; j++) {
7361 /* update ord entries being careful not to propagate
7362 * ord-flags to the first map
7363 */
7364 ord = get_imsm_ord_tbl_ent(dev, j, -1);
7365
7366 if (ord_to_idx(ord) <= index)
7367 continue;
7368
7369 map = get_imsm_map(dev, 0);
7370 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
7371 map = get_imsm_map(dev, 1);
7372 if (map)
7373 set_imsm_ord_tbl_ent(map, j, ord - 1);
7374 }
7375 }
7376
7377 mpb->num_disks--;
7378 super->updates_pending++;
7379 if (*dlp) {
7380 struct dl *dl = *dlp;
7381
7382 *dlp = (*dlp)->next;
7383 __free_imsm_disk(dl);
7384 }
7385 }
7386
7387 /*******************************************************************************
7388 * Function: open_backup_targets
7389 * Description: Function opens file descriptors for all devices given in
7390 * info->devs
7391 * Parameters:
7392 * info : general array info
7393 * raid_disks : number of disks
7394 * raid_fds : table of device's file descriptors
7395 * Returns:
7396 * 0 : success
7397 * -1 : fail
7398 ******************************************************************************/
7399 int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds)
7400 {
7401 struct mdinfo *sd;
7402
7403 for (sd = info->devs ; sd ; sd = sd->next) {
7404 char *dn;
7405
7406 if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
7407 dprintf("disk is faulty!!\n");
7408 continue;
7409 }
7410
7411 if ((sd->disk.raid_disk >= raid_disks) ||
7412 (sd->disk.raid_disk < 0))
7413 continue;
7414
7415 dn = map_dev(sd->disk.major,
7416 sd->disk.minor, 1);
7417 raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
7418 if (raid_fds[sd->disk.raid_disk] < 0) {
7419 fprintf(stderr, "cannot open component\n");
7420 return -1;
7421 }
7422 }
7423 return 0;
7424 }
7425
7426 /*******************************************************************************
7427 * Function: init_migr_record_imsm
7428 * Description: Function inits imsm migration record
7429 * Parameters:
7430 * super : imsm internal array info
7431 * dev : device under migration
7432 * info : general array info to find the smallest device
7433 * Returns:
7434 * none
7435 ******************************************************************************/
7436 void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
7437 struct mdinfo *info)
7438 {
7439 struct intel_super *super = st->sb;
7440 struct migr_record *migr_rec = super->migr_rec;
7441 int new_data_disks;
7442 unsigned long long dsize, dev_sectors;
7443 long long unsigned min_dev_sectors = -1LLU;
7444 struct mdinfo *sd;
7445 char nm[30];
7446 int fd;
7447 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7448 struct imsm_map *map_src = get_imsm_map(dev, 1);
7449 unsigned long long num_migr_units;
7450
7451 unsigned long long array_blocks =
7452 (((unsigned long long)__le32_to_cpu(dev->size_high)) << 32) +
7453 __le32_to_cpu(dev->size_low);
7454
7455 memset(migr_rec, 0, sizeof(struct migr_record));
7456 migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
7457
7458 /* only ascending reshape supported now */
7459 migr_rec->ascending_migr = __cpu_to_le32(1);
7460
7461 migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
7462 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
7463 migr_rec->dest_depth_per_unit *= map_dest->blocks_per_strip;
7464 new_data_disks = imsm_num_data_members(dev, 0);
7465 migr_rec->blocks_per_unit =
7466 __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
7467 migr_rec->dest_depth_per_unit =
7468 __cpu_to_le32(migr_rec->dest_depth_per_unit);
7469
7470 num_migr_units =
7471 array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
7472
7473 if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
7474 num_migr_units++;
7475 migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
7476
7477 migr_rec->post_migr_vol_cap = dev->size_low;
7478 migr_rec->post_migr_vol_cap_hi = dev->size_high;
7479
7480
7481 /* Find the smallest dev */
7482 for (sd = info->devs ; sd ; sd = sd->next) {
7483 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7484 fd = dev_open(nm, O_RDONLY);
7485 if (fd < 0)
7486 continue;
7487 get_dev_size(fd, NULL, &dsize);
7488 dev_sectors = dsize / 512;
7489 if (dev_sectors < min_dev_sectors)
7490 min_dev_sectors = dev_sectors;
7491 close(fd);
7492 }
7493 migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
7494 RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
7495
7496 write_imsm_migr_rec(st);
7497
7498 return;
7499 }
7500
7501 /*******************************************************************************
7502 * Function: save_backup_imsm
7503 * Description: Function saves critical data stripes to Migration Copy Area
7504 * and updates the current migration unit status.
7505 * Use restore_stripes() to form a destination stripe,
7506 * and to write it to the Copy Area.
7507 * Parameters:
7508 * st : supertype information
7509 * info : general array info
7510 * buf : input buffer
7511 * write_offset : address of data to backup
7512 * length : length of data to backup (blocks_per_unit)
7513 * Returns:
7514 * 0 : success
7515 *, -1 : fail
7516 ******************************************************************************/
7517 int save_backup_imsm(struct supertype *st,
7518 struct imsm_dev *dev,
7519 struct mdinfo *info,
7520 void *buf,
7521 int new_data,
7522 int length)
7523 {
7524 int rv = -1;
7525 struct intel_super *super = st->sb;
7526 unsigned long long *target_offsets = NULL;
7527 int *targets = NULL;
7528 int i;
7529 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7530 int new_disks = map_dest->num_members;
7531
7532 targets = malloc(new_disks * sizeof(int));
7533 if (!targets)
7534 goto abort;
7535
7536 target_offsets = malloc(new_disks * sizeof(unsigned long long));
7537 if (!target_offsets)
7538 goto abort;
7539
7540 for (i = 0; i < new_disks; i++) {
7541 targets[i] = -1;
7542 target_offsets[i] = (unsigned long long)
7543 __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
7544 }
7545
7546 if (open_backup_targets(info, new_disks, targets))
7547 goto abort;
7548
7549 if (restore_stripes(targets, /* list of dest devices */
7550 target_offsets, /* migration record offsets */
7551 new_disks,
7552 info->new_chunk,
7553 info->new_level,
7554 info->new_layout,
7555 -1, /* source backup file descriptor */
7556 0, /* input buf offset
7557 * always 0 buf is already offset */
7558 0,
7559 length,
7560 buf) != 0) {
7561 fprintf(stderr, Name ": Error restoring stripes\n");
7562 goto abort;
7563 }
7564
7565 rv = 0;
7566
7567 abort:
7568 if (targets) {
7569 for (i = 0; i < new_disks; i++)
7570 if (targets[i] >= 0)
7571 close(targets[i]);
7572 free(targets);
7573 }
7574 free(target_offsets);
7575
7576 return rv;
7577 }
7578
7579 /*******************************************************************************
7580 * Function: save_checkpoint_imsm
7581 * Description: Function called for current unit status update
7582 * in the migration record. It writes it to disk.
7583 * Parameters:
7584 * super : imsm internal array info
7585 * info : general array info
7586 * Returns:
7587 * 0: success
7588 * 1: failure
7589 ******************************************************************************/
7590 int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
7591 {
7592 struct intel_super *super = st->sb;
7593 load_imsm_migr_rec(super, info);
7594 if (__le32_to_cpu(super->migr_rec->blocks_per_unit) == 0) {
7595 dprintf("ERROR: blocks_per_unit = 0!!!\n");
7596 return 1;
7597 }
7598
7599 super->migr_rec->curr_migr_unit =
7600 __cpu_to_le32(info->reshape_progress /
7601 __le32_to_cpu(super->migr_rec->blocks_per_unit));
7602 super->migr_rec->rec_status = __cpu_to_le32(state);
7603 super->migr_rec->dest_1st_member_lba =
7604 __cpu_to_le32((__le32_to_cpu(super->migr_rec->curr_migr_unit))
7605 * __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
7606 if (write_imsm_migr_rec(st) < 0) {
7607 dprintf("imsm: Cannot write migration record "
7608 "outside backup area\n");
7609 return 1;
7610 }
7611
7612 return 0;
7613 }
7614
7615 static char disk_by_path[] = "/dev/disk/by-path/";
7616
7617 static const char *imsm_get_disk_controller_domain(const char *path)
7618 {
7619 char disk_path[PATH_MAX];
7620 char *drv=NULL;
7621 struct stat st;
7622
7623 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
7624 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
7625 if (stat(disk_path, &st) == 0) {
7626 struct sys_dev* hba;
7627 char *path=NULL;
7628
7629 path = devt_to_devpath(st.st_rdev);
7630 if (path == NULL)
7631 return "unknown";
7632 hba = find_disk_attached_hba(-1, path);
7633 if (hba && hba->type == SYS_DEV_SAS)
7634 drv = "isci";
7635 else if (hba && hba->type == SYS_DEV_SATA)
7636 drv = "ahci";
7637 else
7638 drv = "unknown";
7639 dprintf("path: %s hba: %s attached: %s\n",
7640 path, (hba) ? hba->path : "NULL", drv);
7641 free(path);
7642 if (hba)
7643 free_sys_dev(&hba);
7644 }
7645 return drv;
7646 }
7647
7648 static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
7649 {
7650 char subdev_name[20];
7651 struct mdstat_ent *mdstat;
7652
7653 sprintf(subdev_name, "%d", subdev);
7654 mdstat = mdstat_by_subdev(subdev_name, container);
7655 if (!mdstat)
7656 return -1;
7657
7658 *minor = mdstat->devnum;
7659 free_mdstat(mdstat);
7660 return 0;
7661 }
7662
7663 static int imsm_reshape_is_allowed_on_container(struct supertype *st,
7664 struct geo_params *geo,
7665 int *old_raid_disks)
7666 {
7667 /* currently we only support increasing the number of devices
7668 * for a container. This increases the number of device for each
7669 * member array. They must all be RAID0 or RAID5.
7670 */
7671 int ret_val = 0;
7672 struct mdinfo *info, *member;
7673 int devices_that_can_grow = 0;
7674
7675 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7676 "st->devnum = (%i)\n",
7677 st->devnum);
7678
7679 if (geo->size != -1 ||
7680 geo->level != UnSet ||
7681 geo->layout != UnSet ||
7682 geo->chunksize != 0 ||
7683 geo->raid_disks == UnSet) {
7684 dprintf("imsm: Container operation is allowed for "
7685 "raid disks number change only.\n");
7686 return ret_val;
7687 }
7688
7689 info = container_content_imsm(st, NULL);
7690 for (member = info; member; member = member->next) {
7691 int result;
7692 int minor;
7693
7694 dprintf("imsm: checking device_num: %i\n",
7695 member->container_member);
7696
7697 if (geo->raid_disks <= member->array.raid_disks) {
7698 /* we work on container for Online Capacity Expansion
7699 * only so raid_disks has to grow
7700 */
7701 dprintf("imsm: for container operation raid disks "
7702 "increase is required\n");
7703 break;
7704 }
7705
7706 if ((info->array.level != 0) &&
7707 (info->array.level != 5)) {
7708 /* we cannot use this container with other raid level
7709 */
7710 dprintf("imsm: for container operation wrong"
7711 " raid level (%i) detected\n",
7712 info->array.level);
7713 break;
7714 } else {
7715 /* check for platform support
7716 * for this raid level configuration
7717 */
7718 struct intel_super *super = st->sb;
7719 if (!is_raid_level_supported(super->orom,
7720 member->array.level,
7721 geo->raid_disks)) {
7722 dprintf("platform does not support raid%d with"
7723 " %d disk%s\n",
7724 info->array.level,
7725 geo->raid_disks,
7726 geo->raid_disks > 1 ? "s" : "");
7727 break;
7728 }
7729 /* check if component size is aligned to chunk size
7730 */
7731 if (info->component_size %
7732 (info->array.chunk_size/512)) {
7733 dprintf("Component size is not aligned to "
7734 "chunk size\n");
7735 break;
7736 }
7737 }
7738
7739 if (*old_raid_disks &&
7740 info->array.raid_disks != *old_raid_disks)
7741 break;
7742 *old_raid_disks = info->array.raid_disks;
7743
7744 /* All raid5 and raid0 volumes in container
7745 * have to be ready for Online Capacity Expansion
7746 * so they need to be assembled. We have already
7747 * checked that no recovery etc is happening.
7748 */
7749 result = imsm_find_array_minor_by_subdev(member->container_member,
7750 st->container_dev,
7751 &minor);
7752 if (result < 0) {
7753 dprintf("imsm: cannot find array\n");
7754 break;
7755 }
7756 devices_that_can_grow++;
7757 }
7758 sysfs_free(info);
7759 if (!member && devices_that_can_grow)
7760 ret_val = 1;
7761
7762 if (ret_val)
7763 dprintf("\tContainer operation allowed\n");
7764 else
7765 dprintf("\tError: %i\n", ret_val);
7766
7767 return ret_val;
7768 }
7769
7770 /* Function: get_spares_for_grow
7771 * Description: Allocates memory and creates list of spare devices
7772 * avaliable in container. Checks if spare drive size is acceptable.
7773 * Parameters: Pointer to the supertype structure
7774 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7775 * NULL if fail
7776 */
7777 static struct mdinfo *get_spares_for_grow(struct supertype *st)
7778 {
7779 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
7780 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
7781 }
7782
7783 /******************************************************************************
7784 * function: imsm_create_metadata_update_for_reshape
7785 * Function creates update for whole IMSM container.
7786 *
7787 ******************************************************************************/
7788 static int imsm_create_metadata_update_for_reshape(
7789 struct supertype *st,
7790 struct geo_params *geo,
7791 int old_raid_disks,
7792 struct imsm_update_reshape **updatep)
7793 {
7794 struct intel_super *super = st->sb;
7795 struct imsm_super *mpb = super->anchor;
7796 int update_memory_size = 0;
7797 struct imsm_update_reshape *u = NULL;
7798 struct mdinfo *spares = NULL;
7799 int i;
7800 int delta_disks = 0;
7801 struct mdinfo *dev;
7802
7803 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7804 geo->raid_disks);
7805
7806 delta_disks = geo->raid_disks - old_raid_disks;
7807
7808 /* size of all update data without anchor */
7809 update_memory_size = sizeof(struct imsm_update_reshape);
7810
7811 /* now add space for spare disks that we need to add. */
7812 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
7813
7814 u = calloc(1, update_memory_size);
7815 if (u == NULL) {
7816 dprintf("error: "
7817 "cannot get memory for imsm_update_reshape update\n");
7818 return 0;
7819 }
7820 u->type = update_reshape_container_disks;
7821 u->old_raid_disks = old_raid_disks;
7822 u->new_raid_disks = geo->raid_disks;
7823
7824 /* now get spare disks list
7825 */
7826 spares = get_spares_for_grow(st);
7827
7828 if (spares == NULL
7829 || delta_disks > spares->array.spare_disks) {
7830 fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices "
7831 "for %s.\n", geo->dev_name);
7832 goto abort;
7833 }
7834
7835 /* we have got spares
7836 * update disk list in imsm_disk list table in anchor
7837 */
7838 dprintf("imsm: %i spares are available.\n\n",
7839 spares->array.spare_disks);
7840
7841 dev = spares->devs;
7842 for (i = 0; i < delta_disks; i++) {
7843 struct dl *dl;
7844
7845 if (dev == NULL)
7846 break;
7847 u->new_disks[i] = makedev(dev->disk.major,
7848 dev->disk.minor);
7849 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
7850 dl->index = mpb->num_disks;
7851 mpb->num_disks++;
7852 dev = dev->next;
7853 }
7854
7855 abort:
7856 /* free spares
7857 */
7858 sysfs_free(spares);
7859
7860 dprintf("imsm: reshape update preparation :");
7861 if (i == delta_disks) {
7862 dprintf(" OK\n");
7863 *updatep = u;
7864 return update_memory_size;
7865 }
7866 free(u);
7867 dprintf(" Error\n");
7868
7869 return 0;
7870 }
7871
7872 /******************************************************************************
7873 * function: imsm_create_metadata_update_for_migration()
7874 * Creates update for IMSM array.
7875 *
7876 ******************************************************************************/
7877 static int imsm_create_metadata_update_for_migration(
7878 struct supertype *st,
7879 struct geo_params *geo,
7880 struct imsm_update_reshape_migration **updatep)
7881 {
7882 struct intel_super *super = st->sb;
7883 int update_memory_size = 0;
7884 struct imsm_update_reshape_migration *u = NULL;
7885 struct imsm_dev *dev;
7886 int previous_level = -1;
7887
7888 dprintf("imsm_create_metadata_update_for_migration(enter)"
7889 " New Level = %i\n", geo->level);
7890
7891 /* size of all update data without anchor */
7892 update_memory_size = sizeof(struct imsm_update_reshape_migration);
7893
7894 u = calloc(1, update_memory_size);
7895 if (u == NULL) {
7896 dprintf("error: cannot get memory for "
7897 "imsm_create_metadata_update_for_migration\n");
7898 return 0;
7899 }
7900 u->type = update_reshape_migration;
7901 u->subdev = super->current_vol;
7902 u->new_level = geo->level;
7903 u->new_layout = geo->layout;
7904 u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
7905 u->new_disks[0] = -1;
7906 u->new_chunksize = -1;
7907
7908 dev = get_imsm_dev(super, u->subdev);
7909 if (dev) {
7910 struct imsm_map *map;
7911
7912 map = get_imsm_map(dev, 0);
7913 if (map) {
7914 int current_chunk_size =
7915 __le16_to_cpu(map->blocks_per_strip) / 2;
7916
7917 if (geo->chunksize != current_chunk_size) {
7918 u->new_chunksize = geo->chunksize / 1024;
7919 dprintf("imsm: "
7920 "chunk size change from %i to %i\n",
7921 current_chunk_size, u->new_chunksize);
7922 }
7923 previous_level = map->raid_level;
7924 }
7925 }
7926 if ((geo->level == 5) && (previous_level == 0)) {
7927 struct mdinfo *spares = NULL;
7928
7929 u->new_raid_disks++;
7930 spares = get_spares_for_grow(st);
7931 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
7932 free(u);
7933 sysfs_free(spares);
7934 update_memory_size = 0;
7935 dprintf("error: cannot get spare device "
7936 "for requested migration");
7937 return 0;
7938 }
7939 sysfs_free(spares);
7940 }
7941 dprintf("imsm: reshape update preparation : OK\n");
7942 *updatep = u;
7943
7944 return update_memory_size;
7945 }
7946
7947 static void imsm_update_metadata_locally(struct supertype *st,
7948 void *buf, int len)
7949 {
7950 struct metadata_update mu;
7951
7952 mu.buf = buf;
7953 mu.len = len;
7954 mu.space = NULL;
7955 mu.space_list = NULL;
7956 mu.next = NULL;
7957 imsm_prepare_update(st, &mu);
7958 imsm_process_update(st, &mu);
7959
7960 while (mu.space_list) {
7961 void **space = mu.space_list;
7962 mu.space_list = *space;
7963 free(space);
7964 }
7965 }
7966
7967 /***************************************************************************
7968 * Function: imsm_analyze_change
7969 * Description: Function analyze change for single volume
7970 * and validate if transition is supported
7971 * Parameters: Geometry parameters, supertype structure
7972 * Returns: Operation type code on success, -1 if fail
7973 ****************************************************************************/
7974 enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
7975 struct geo_params *geo)
7976 {
7977 struct mdinfo info;
7978 int change = -1;
7979 int check_devs = 0;
7980 int chunk;
7981
7982 getinfo_super_imsm_volume(st, &info, NULL);
7983
7984 if ((geo->level != info.array.level) &&
7985 (geo->level >= 0) &&
7986 (geo->level != UnSet)) {
7987 switch (info.array.level) {
7988 case 0:
7989 if (geo->level == 5) {
7990 change = CH_MIGRATION;
7991 check_devs = 1;
7992 }
7993 if (geo->level == 10) {
7994 change = CH_TAKEOVER;
7995 check_devs = 1;
7996 }
7997 break;
7998 case 1:
7999 if (geo->level == 0) {
8000 change = CH_TAKEOVER;
8001 check_devs = 1;
8002 }
8003 break;
8004 case 10:
8005 if (geo->level == 0) {
8006 change = CH_TAKEOVER;
8007 check_devs = 1;
8008 }
8009 break;
8010 }
8011 if (change == -1) {
8012 fprintf(stderr,
8013 Name " Error. Level Migration from %d to %d "
8014 "not supported!\n",
8015 info.array.level, geo->level);
8016 goto analyse_change_exit;
8017 }
8018 } else
8019 geo->level = info.array.level;
8020
8021 if ((geo->layout != info.array.layout)
8022 && ((geo->layout != UnSet) && (geo->layout != -1))) {
8023 change = CH_MIGRATION;
8024 if ((info.array.layout == 0)
8025 && (info.array.level == 5)
8026 && (geo->layout == 5)) {
8027 /* reshape 5 -> 4 */
8028 } else if ((info.array.layout == 5)
8029 && (info.array.level == 5)
8030 && (geo->layout == 0)) {
8031 /* reshape 4 -> 5 */
8032 geo->layout = 0;
8033 geo->level = 5;
8034 } else {
8035 fprintf(stderr,
8036 Name " Error. Layout Migration from %d to %d "
8037 "not supported!\n",
8038 info.array.layout, geo->layout);
8039 change = -1;
8040 goto analyse_change_exit;
8041 }
8042 } else
8043 geo->layout = info.array.layout;
8044
8045 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
8046 && (geo->chunksize != info.array.chunk_size))
8047 change = CH_MIGRATION;
8048 else
8049 geo->chunksize = info.array.chunk_size;
8050
8051 chunk = geo->chunksize / 1024;
8052 if (!validate_geometry_imsm(st,
8053 geo->level,
8054 geo->layout,
8055 geo->raid_disks,
8056 &chunk,
8057 geo->size,
8058 0, 0, 1))
8059 change = -1;
8060
8061 if (check_devs) {
8062 struct intel_super *super = st->sb;
8063 struct imsm_super *mpb = super->anchor;
8064
8065 if (mpb->num_raid_devs > 1) {
8066 fprintf(stderr,
8067 Name " Error. Cannot perform operation on %s"
8068 "- for this operation it MUST be single "
8069 "array in container\n",
8070 geo->dev_name);
8071 change = -1;
8072 }
8073 }
8074
8075 analyse_change_exit:
8076
8077 return change;
8078 }
8079
8080 int imsm_takeover(struct supertype *st, struct geo_params *geo)
8081 {
8082 struct intel_super *super = st->sb;
8083 struct imsm_update_takeover *u;
8084
8085 u = malloc(sizeof(struct imsm_update_takeover));
8086 if (u == NULL)
8087 return 1;
8088
8089 u->type = update_takeover;
8090 u->subarray = super->current_vol;
8091
8092 /* 10->0 transition */
8093 if (geo->level == 0)
8094 u->direction = R10_TO_R0;
8095
8096 /* 0->10 transition */
8097 if (geo->level == 10)
8098 u->direction = R0_TO_R10;
8099
8100 /* update metadata locally */
8101 imsm_update_metadata_locally(st, u,
8102 sizeof(struct imsm_update_takeover));
8103 /* and possibly remotely */
8104 if (st->update_tail)
8105 append_metadata_update(st, u,
8106 sizeof(struct imsm_update_takeover));
8107 else
8108 free(u);
8109
8110 return 0;
8111 }
8112
8113 static int warn_user_about_risk(void)
8114 {
8115 int rv = 0;
8116
8117 fprintf(stderr,
8118 "\nThis is an experimental feature. Data on the RAID volume(s) "
8119 "can be lost!!!\n\n"
8120 "To continue command execution please make sure that\n"
8121 "the grow process will not be interrupted. Use safe power\n"
8122 "supply to avoid unexpected system reboot. Make sure that\n"
8123 "reshaped container is not assembled automatically during\n"
8124 "system boot.\n"
8125 "If reshape is interrupted, assemble array manually\n"
8126 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
8127 "Assembly in scan mode is not possible in such case.\n"
8128 "Growing container with boot array is not possible.\n"
8129 "If boot array reshape is interrupted, whole file system\n"
8130 "can be lost.\n\n");
8131 rv = ask("Do you want to continue? ");
8132 fprintf(stderr, "\n");
8133
8134 return rv;
8135 }
8136
8137 static int imsm_reshape_super(struct supertype *st, long long size, int level,
8138 int layout, int chunksize, int raid_disks,
8139 int delta_disks, char *backup, char *dev,
8140 int verbose)
8141 {
8142 int ret_val = 1;
8143 struct geo_params geo;
8144
8145 dprintf("imsm: reshape_super called.\n");
8146
8147 memset(&geo, 0, sizeof(struct geo_params));
8148
8149 geo.dev_name = dev;
8150 geo.dev_id = st->devnum;
8151 geo.size = size;
8152 geo.level = level;
8153 geo.layout = layout;
8154 geo.chunksize = chunksize;
8155 geo.raid_disks = raid_disks;
8156 if (delta_disks != UnSet)
8157 geo.raid_disks += delta_disks;
8158
8159 dprintf("\tfor level : %i\n", geo.level);
8160 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
8161
8162 if (experimental() == 0)
8163 return ret_val;
8164
8165 if (st->container_dev == st->devnum) {
8166 /* On container level we can only increase number of devices. */
8167 dprintf("imsm: info: Container operation\n");
8168 int old_raid_disks = 0;
8169
8170 /* this warning will be removed when imsm checkpointing
8171 * will be implemented, and restoring from check-point
8172 * operation will be transparent for reboot process
8173 */
8174 if (warn_user_about_risk() == 0)
8175 return ret_val;
8176
8177 if (imsm_reshape_is_allowed_on_container(
8178 st, &geo, &old_raid_disks)) {
8179 struct imsm_update_reshape *u = NULL;
8180 int len;
8181
8182 len = imsm_create_metadata_update_for_reshape(
8183 st, &geo, old_raid_disks, &u);
8184
8185 if (len <= 0) {
8186 dprintf("imsm: Cannot prepare update\n");
8187 goto exit_imsm_reshape_super;
8188 }
8189
8190 ret_val = 0;
8191 /* update metadata locally */
8192 imsm_update_metadata_locally(st, u, len);
8193 /* and possibly remotely */
8194 if (st->update_tail)
8195 append_metadata_update(st, u, len);
8196 else
8197 free(u);
8198
8199 } else {
8200 fprintf(stderr, Name ": (imsm) Operation "
8201 "is not allowed on this container\n");
8202 }
8203 } else {
8204 /* On volume level we support following operations
8205 * - takeover: raid10 -> raid0; raid0 -> raid10
8206 * - chunk size migration
8207 * - migration: raid5 -> raid0; raid0 -> raid5
8208 */
8209 struct intel_super *super = st->sb;
8210 struct intel_dev *dev = super->devlist;
8211 int change, devnum;
8212 dprintf("imsm: info: Volume operation\n");
8213 /* find requested device */
8214 while (dev) {
8215 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
8216 if (devnum == geo.dev_id)
8217 break;
8218 dev = dev->next;
8219 }
8220 if (dev == NULL) {
8221 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
8222 geo.dev_name, geo.dev_id);
8223 goto exit_imsm_reshape_super;
8224 }
8225 super->current_vol = dev->index;
8226 change = imsm_analyze_change(st, &geo);
8227 switch (change) {
8228 case CH_TAKEOVER:
8229 ret_val = imsm_takeover(st, &geo);
8230 break;
8231 case CH_MIGRATION: {
8232 struct imsm_update_reshape_migration *u = NULL;
8233 int len =
8234 imsm_create_metadata_update_for_migration(
8235 st, &geo, &u);
8236 if (len < 1) {
8237 dprintf("imsm: "
8238 "Cannot prepare update\n");
8239 break;
8240 }
8241 ret_val = 0;
8242 /* update metadata locally */
8243 imsm_update_metadata_locally(st, u, len);
8244 /* and possibly remotely */
8245 if (st->update_tail)
8246 append_metadata_update(st, u, len);
8247 else
8248 free(u);
8249 }
8250 break;
8251 default:
8252 ret_val = 1;
8253 }
8254 }
8255
8256 exit_imsm_reshape_super:
8257 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
8258 return ret_val;
8259 }
8260
8261 /*******************************************************************************
8262 * Function: wait_for_reshape_imsm
8263 * Description: Function writes new sync_max value and waits until
8264 * reshape process reach new position
8265 * Parameters:
8266 * sra : general array info
8267 * to_complete : new sync_max position
8268 * ndata : number of disks in new array's layout
8269 * Returns:
8270 * 0 : success,
8271 * 1 : there is no reshape in progress,
8272 * -1 : fail
8273 ******************************************************************************/
8274 int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete,
8275 int ndata)
8276 {
8277 int fd = sysfs_get_fd(sra, NULL, "reshape_position");
8278 unsigned long long completed;
8279
8280 struct timeval timeout;
8281
8282 if (fd < 0)
8283 return 1;
8284
8285 sysfs_fd_get_ll(fd, &completed);
8286
8287 if (to_complete == 0) {/* reshape till the end of array */
8288 sysfs_set_str(sra, NULL, "sync_max", "max");
8289 to_complete = MaxSector;
8290 } else {
8291 if (completed > to_complete)
8292 return -1;
8293 if (sysfs_set_num(sra, NULL, "sync_max",
8294 to_complete / ndata) != 0) {
8295 close(fd);
8296 return -1;
8297 }
8298 }
8299
8300 /* FIXME should not need a timeout at all */
8301 timeout.tv_sec = 30;
8302 timeout.tv_usec = 0;
8303 do {
8304 char action[20];
8305 fd_set rfds;
8306 FD_ZERO(&rfds);
8307 FD_SET(fd, &rfds);
8308 select(fd+1, NULL, NULL, &rfds, &timeout);
8309 if (sysfs_fd_get_ll(fd, &completed) < 0) {
8310 close(fd);
8311 return 1;
8312 }
8313 if (sysfs_get_str(sra, NULL, "sync_action",
8314 action, 20) > 0 &&
8315 strncmp(action, "reshape", 7) != 0)
8316 break;
8317 } while (completed < to_complete);
8318 close(fd);
8319 return 0;
8320
8321 }
8322
8323 /*******************************************************************************
8324 * Function: check_degradation_change
8325 * Description: Check that array hasn't become failed.
8326 * Parameters:
8327 * info : for sysfs access
8328 * sources : source disks descriptors
8329 * degraded: previous degradation level
8330 * Returns:
8331 * degradation level
8332 ******************************************************************************/
8333 int check_degradation_change(struct mdinfo *info,
8334 int *sources,
8335 int degraded)
8336 {
8337 unsigned long long new_degraded;
8338 sysfs_get_ll(info, NULL, "degraded", &new_degraded);
8339 if (new_degraded != (unsigned long long)degraded) {
8340 /* check each device to ensure it is still working */
8341 struct mdinfo *sd;
8342 new_degraded = 0;
8343 for (sd = info->devs ; sd ; sd = sd->next) {
8344 if (sd->disk.state & (1<<MD_DISK_FAULTY))
8345 continue;
8346 if (sd->disk.state & (1<<MD_DISK_SYNC)) {
8347 char sbuf[20];
8348 if (sysfs_get_str(info,
8349 sd, "state", sbuf, 20) < 0 ||
8350 strstr(sbuf, "faulty") ||
8351 strstr(sbuf, "in_sync") == NULL) {
8352 /* this device is dead */
8353 sd->disk.state = (1<<MD_DISK_FAULTY);
8354 if (sd->disk.raid_disk >= 0 &&
8355 sources[sd->disk.raid_disk] >= 0) {
8356 close(sources[
8357 sd->disk.raid_disk]);
8358 sources[sd->disk.raid_disk] =
8359 -1;
8360 }
8361 new_degraded++;
8362 }
8363 }
8364 }
8365 }
8366
8367 return new_degraded;
8368 }
8369
8370 /*******************************************************************************
8371 * Function: imsm_manage_reshape
8372 * Description: Function finds array under reshape and it manages reshape
8373 * process. It creates stripes backups (if required) and sets
8374 * checheckpoits.
8375 * Parameters:
8376 * afd : Backup handle (nattive) - not used
8377 * sra : general array info
8378 * reshape : reshape parameters - not used
8379 * st : supertype structure
8380 * blocks : size of critical section [blocks]
8381 * fds : table of source device descriptor
8382 * offsets : start of array (offest per devices)
8383 * dests : not used
8384 * destfd : table of destination device descriptor
8385 * destoffsets : table of destination offsets (per device)
8386 * Returns:
8387 * 1 : success, reshape is done
8388 * 0 : fail
8389 ******************************************************************************/
8390 static int imsm_manage_reshape(
8391 int afd, struct mdinfo *sra, struct reshape *reshape,
8392 struct supertype *st, unsigned long backup_blocks,
8393 int *fds, unsigned long long *offsets,
8394 int dests, int *destfd, unsigned long long *destoffsets)
8395 {
8396 int ret_val = 0;
8397 struct intel_super *super = st->sb;
8398 struct intel_dev *dv = NULL;
8399 struct imsm_dev *dev = NULL;
8400 struct imsm_map *map_src, *map_dest;
8401 int migr_vol_qan = 0;
8402 int ndata, odata; /* [bytes] */
8403 int chunk; /* [bytes] */
8404 struct migr_record *migr_rec;
8405 char *buf = NULL;
8406 unsigned int buf_size; /* [bytes] */
8407 unsigned long long max_position; /* array size [bytes] */
8408 unsigned long long next_step; /* [blocks]/[bytes] */
8409 unsigned long long old_data_stripe_length;
8410 unsigned long long new_data_stripe_length;
8411 unsigned long long start_src; /* [bytes] */
8412 unsigned long long start; /* [bytes] */
8413 unsigned long long start_buf_shift; /* [bytes] */
8414 int degraded = 0;
8415
8416 if (!fds || !offsets || !destfd || !destoffsets || !sra)
8417 goto abort;
8418
8419 /* Find volume during the reshape */
8420 for (dv = super->devlist; dv; dv = dv->next) {
8421 if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
8422 && dv->dev->vol.migr_state == 1) {
8423 dev = dv->dev;
8424 migr_vol_qan++;
8425 }
8426 }
8427 /* Only one volume can migrate at the same time */
8428 if (migr_vol_qan != 1) {
8429 fprintf(stderr, Name " : %s", migr_vol_qan ?
8430 "Number of migrating volumes greater than 1\n" :
8431 "There is no volume during migrationg\n");
8432 goto abort;
8433 }
8434
8435 map_src = get_imsm_map(dev, 1);
8436 if (map_src == NULL)
8437 goto abort;
8438 map_dest = get_imsm_map(dev, 0);
8439
8440 ndata = imsm_num_data_members(dev, 0);
8441 odata = imsm_num_data_members(dev, 1);
8442
8443 chunk = map_src->blocks_per_strip * 512;
8444 old_data_stripe_length = odata * chunk;
8445
8446 migr_rec = super->migr_rec;
8447
8448 /* [bytes] */
8449 sra->new_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
8450 sra->new_level = map_dest->raid_level;
8451 new_data_stripe_length = sra->new_chunk * ndata;
8452
8453 /* initialize migration record for start condition */
8454 if (sra->reshape_progress == 0)
8455 init_migr_record_imsm(st, dev, sra);
8456
8457 /* size for data */
8458 buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
8459 /* extend buffer size for parity disk */
8460 buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
8461 /* add space for stripe aligment */
8462 buf_size += old_data_stripe_length;
8463 if (posix_memalign((void **)&buf, 4096, buf_size)) {
8464 dprintf("imsm: Cannot allocate checpoint buffer\n");
8465 goto abort;
8466 }
8467
8468 max_position =
8469 __le32_to_cpu(migr_rec->post_migr_vol_cap) +
8470 ((unsigned long long)__le32_to_cpu(
8471 migr_rec->post_migr_vol_cap_hi) << 32);
8472
8473 while (__le32_to_cpu(migr_rec->curr_migr_unit) <
8474 __le32_to_cpu(migr_rec->num_migr_units)) {
8475 /* current reshape position [blocks] */
8476 unsigned long long current_position =
8477 __le32_to_cpu(migr_rec->blocks_per_unit)
8478 * __le32_to_cpu(migr_rec->curr_migr_unit);
8479 unsigned long long border;
8480
8481 /* Check that array hasn't become failed.
8482 */
8483 degraded = check_degradation_change(sra, fds, degraded);
8484 if (degraded > 1) {
8485 dprintf("imsm: Abort reshape due to degradation"
8486 " level (%i)\n", degraded);
8487 goto abort;
8488 }
8489
8490 next_step = __le32_to_cpu(migr_rec->blocks_per_unit);
8491
8492 if ((current_position + next_step) > max_position)
8493 next_step = max_position - current_position;
8494
8495 start = (map_src->pba_of_lba0 + dev->reserved_blocks +
8496 current_position) * 512;
8497
8498 /* allign reading start to old geometry */
8499 start_buf_shift = start % old_data_stripe_length;
8500 start_src = start - start_buf_shift;
8501
8502 border = (start_src / odata) - (start / ndata);
8503 border /= 512;
8504 if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) {
8505 /* save critical stripes to buf
8506 * start - start address of current unit
8507 * to backup [bytes]
8508 * start_src - start address of current unit
8509 * to backup alligned to source array
8510 * [bytes]
8511 */
8512 unsigned long long next_step_filler = 0;
8513 unsigned long long copy_length = next_step * 512;
8514
8515 /* allign copy area length to stripe in old geometry */
8516 next_step_filler = ((copy_length + start_buf_shift)
8517 % old_data_stripe_length);
8518 if (next_step_filler)
8519 next_step_filler = (old_data_stripe_length
8520 - next_step_filler);
8521 dprintf("save_stripes() parameters: start = %llu,"
8522 "\tstart_src = %llu,\tnext_step*512 = %llu,"
8523 "\tstart_in_buf_shift = %llu,"
8524 "\tnext_step_filler = %llu\n",
8525 start, start_src, copy_length,
8526 start_buf_shift, next_step_filler);
8527
8528 if (save_stripes(fds, offsets, map_src->num_members,
8529 chunk, sra->array.level,
8530 sra->array.layout, 0, NULL, start_src,
8531 copy_length +
8532 next_step_filler + start_buf_shift,
8533 buf)) {
8534 dprintf("imsm: Cannot save stripes"
8535 " to buffer\n");
8536 goto abort;
8537 }
8538 /* Convert data to destination format and store it
8539 * in backup general migration area
8540 */
8541 if (save_backup_imsm(st, dev, sra,
8542 buf + start_buf_shift,
8543 ndata, copy_length)) {
8544 dprintf("imsm: Cannot save stripes to "
8545 "target devices\n");
8546 goto abort;
8547 }
8548 if (save_checkpoint_imsm(st, sra,
8549 UNIT_SRC_IN_CP_AREA)) {
8550 dprintf("imsm: Cannot write checkpoint to "
8551 "migration record (UNIT_SRC_IN_CP_AREA)\n");
8552 goto abort;
8553 }
8554 /* decrease backup_blocks */
8555 if (backup_blocks > (unsigned long)next_step)
8556 backup_blocks -= next_step;
8557 else
8558 backup_blocks = 0;
8559 }
8560 /* When data backed up, checkpoint stored,
8561 * kick the kernel to reshape unit of data
8562 */
8563 next_step = next_step + sra->reshape_progress;
8564 sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
8565 sysfs_set_num(sra, NULL, "suspend_hi", next_step);
8566
8567 /* wait until reshape finish */
8568 if (wait_for_reshape_imsm(sra, next_step, ndata) < 0)
8569 dprintf("wait_for_reshape_imsm returned error,"
8570 " but we ignore it!\n");
8571
8572 sra->reshape_progress = next_step;
8573
8574 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL)) {
8575 dprintf("imsm: Cannot write checkpoint to "
8576 "migration record (UNIT_SRC_NORMAL)\n");
8577 goto abort;
8578 }
8579
8580 }
8581
8582 /* return '1' if done */
8583 ret_val = 1;
8584 abort:
8585 free(buf);
8586 abort_reshape(sra);
8587
8588 return ret_val;
8589 }
8590 #endif /* MDASSEMBLE */
8591
8592 struct superswitch super_imsm = {
8593 #ifndef MDASSEMBLE
8594 .examine_super = examine_super_imsm,
8595 .brief_examine_super = brief_examine_super_imsm,
8596 .brief_examine_subarrays = brief_examine_subarrays_imsm,
8597 .export_examine_super = export_examine_super_imsm,
8598 .detail_super = detail_super_imsm,
8599 .brief_detail_super = brief_detail_super_imsm,
8600 .write_init_super = write_init_super_imsm,
8601 .validate_geometry = validate_geometry_imsm,
8602 .add_to_super = add_to_super_imsm,
8603 .remove_from_super = remove_from_super_imsm,
8604 .detail_platform = detail_platform_imsm,
8605 .kill_subarray = kill_subarray_imsm,
8606 .update_subarray = update_subarray_imsm,
8607 .load_container = load_container_imsm,
8608 .default_geometry = default_geometry_imsm,
8609 .get_disk_controller_domain = imsm_get_disk_controller_domain,
8610 .reshape_super = imsm_reshape_super,
8611 .manage_reshape = imsm_manage_reshape,
8612 #endif
8613 .match_home = match_home_imsm,
8614 .uuid_from_super= uuid_from_super_imsm,
8615 .getinfo_super = getinfo_super_imsm,
8616 .getinfo_super_disks = getinfo_super_disks_imsm,
8617 .update_super = update_super_imsm,
8618
8619 .avail_size = avail_size_imsm,
8620 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
8621
8622 .compare_super = compare_super_imsm,
8623
8624 .load_super = load_super_imsm,
8625 .init_super = init_super_imsm,
8626 .store_super = store_super_imsm,
8627 .free_super = free_super_imsm,
8628 .match_metadata_desc = match_metadata_desc_imsm,
8629 .container_content = container_content_imsm,
8630
8631 .external = 1,
8632 .name = "imsm",
8633
8634 #ifndef MDASSEMBLE
8635 /* for mdmon */
8636 .open_new = imsm_open_new,
8637 .set_array_state= imsm_set_array_state,
8638 .set_disk = imsm_set_disk,
8639 .sync_metadata = imsm_sync_metadata,
8640 .activate_spare = imsm_activate_spare,
8641 .process_update = imsm_process_update,
8642 .prepare_update = imsm_prepare_update,
8643 #endif /* MDASSEMBLE */
8644 };