]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-intel.c
imsm: fix: disable migration from raid5->raid0
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include "platform-intel.h"
25 #include <values.h>
26 #include <scsi/sg.h>
27 #include <ctype.h>
28 #include <dirent.h>
29
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
43
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
54 #define MPB_SECTOR_CNT 418
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
57
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
60 struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71 };
72
73 /* RAID map configuration infos. */
74 struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
84 __u8 raid_level;
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
91 __u8 ddf;
92 __u32 filler[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
96 */
97 } __attribute__ ((packed));
98
99 struct imsm_vol {
100 __u32 curr_migr_unit;
101 __u32 checkpoint_id; /* id to access curr_migr_unit */
102 __u8 migr_state; /* Normal or Migrating */
103 #define MIGR_INIT 0
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed));
118
119 struct imsm_dev {
120 __u8 volume[MAX_RAID_SERIAL_LEN];
121 __u32 size_low;
122 __u32 size_high;
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148 } __attribute__ ((packed));
149
150 struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed));
172
173 #define BBM_LOG_MAX_ENTRIES 254
174
175 struct bbm_log_entry {
176 __u64 defective_block_start;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181 } __attribute__ ((__packed__));
182
183 struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190 } __attribute__ ((__packed__));
191
192
193 #ifndef MDASSEMBLE
194 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195 #endif
196
197 static __u8 migr_type(struct imsm_dev *dev)
198 {
199 if (dev->vol.migr_type == MIGR_VERIFY &&
200 dev->status & DEV_VERIFY_AND_FIX)
201 return MIGR_REPAIR;
202 else
203 return dev->vol.migr_type;
204 }
205
206 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
207 {
208 /* for compatibility with older oroms convert MIGR_REPAIR, into
209 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
210 */
211 if (migr_type == MIGR_REPAIR) {
212 dev->vol.migr_type = MIGR_VERIFY;
213 dev->status |= DEV_VERIFY_AND_FIX;
214 } else {
215 dev->vol.migr_type = migr_type;
216 dev->status &= ~DEV_VERIFY_AND_FIX;
217 }
218 }
219
220 static unsigned int sector_count(__u32 bytes)
221 {
222 return ((bytes + (512-1)) & (~(512-1))) / 512;
223 }
224
225 static unsigned int mpb_sectors(struct imsm_super *mpb)
226 {
227 return sector_count(__le32_to_cpu(mpb->mpb_size));
228 }
229
230 struct intel_dev {
231 struct imsm_dev *dev;
232 struct intel_dev *next;
233 unsigned index;
234 };
235
236 struct intel_hba {
237 enum sys_dev_type type;
238 char *path;
239 char *pci_id;
240 struct intel_hba *next;
241 };
242
243 enum action {
244 DISK_REMOVE = 1,
245 DISK_ADD
246 };
247 /* internal representation of IMSM metadata */
248 struct intel_super {
249 union {
250 void *buf; /* O_DIRECT buffer for reading/writing metadata */
251 struct imsm_super *anchor; /* immovable parameters */
252 };
253 size_t len; /* size of the 'buf' allocation */
254 void *next_buf; /* for realloc'ing buf from the manager */
255 size_t next_len;
256 int updates_pending; /* count of pending updates for mdmon */
257 int current_vol; /* index of raid device undergoing creation */
258 __u32 create_offset; /* common start for 'current_vol' */
259 __u32 random; /* random data for seeding new family numbers */
260 struct intel_dev *devlist;
261 struct dl {
262 struct dl *next;
263 int index;
264 __u8 serial[MAX_RAID_SERIAL_LEN];
265 int major, minor;
266 char *devname;
267 struct imsm_disk disk;
268 int fd;
269 int extent_cnt;
270 struct extent *e; /* for determining freespace @ create */
271 int raiddisk; /* slot to fill in autolayout */
272 enum action action;
273 } *disks;
274 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
275 active */
276 struct dl *missing; /* disks removed while we weren't looking */
277 struct bbm_log *bbm_log;
278 struct intel_hba *hba; /* device path of the raid controller for this metadata */
279 const struct imsm_orom *orom; /* platform firmware support */
280 struct intel_super *next; /* (temp) list for disambiguating family_num */
281 };
282
283 struct intel_disk {
284 struct imsm_disk disk;
285 #define IMSM_UNKNOWN_OWNER (-1)
286 int owner;
287 struct intel_disk *next;
288 };
289
290 struct extent {
291 unsigned long long start, size;
292 };
293
294 /* definitions of reshape process types */
295 enum imsm_reshape_type {
296 CH_TAKEOVER,
297 CH_MIGRATION,
298 };
299
300 /* definition of messages passed to imsm_process_update */
301 enum imsm_update_type {
302 update_activate_spare,
303 update_create_array,
304 update_kill_array,
305 update_rename_array,
306 update_add_remove_disk,
307 update_reshape_container_disks,
308 update_reshape_migration,
309 update_takeover
310 };
311
312 struct imsm_update_activate_spare {
313 enum imsm_update_type type;
314 struct dl *dl;
315 int slot;
316 int array;
317 struct imsm_update_activate_spare *next;
318 };
319
320 struct geo_params {
321 int dev_id;
322 char *dev_name;
323 long long size;
324 int level;
325 int layout;
326 int chunksize;
327 int raid_disks;
328 };
329
330 enum takeover_direction {
331 R10_TO_R0,
332 R0_TO_R10
333 };
334 struct imsm_update_takeover {
335 enum imsm_update_type type;
336 int subarray;
337 enum takeover_direction direction;
338 };
339
340 struct imsm_update_reshape {
341 enum imsm_update_type type;
342 int old_raid_disks;
343 int new_raid_disks;
344
345 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
346 };
347
348 struct imsm_update_reshape_migration {
349 enum imsm_update_type type;
350 int old_raid_disks;
351 int new_raid_disks;
352 /* fields for array migration changes
353 */
354 int subdev;
355 int new_level;
356 int new_layout;
357
358 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
359 };
360
361 struct disk_info {
362 __u8 serial[MAX_RAID_SERIAL_LEN];
363 };
364
365 struct imsm_update_create_array {
366 enum imsm_update_type type;
367 int dev_idx;
368 struct imsm_dev dev;
369 };
370
371 struct imsm_update_kill_array {
372 enum imsm_update_type type;
373 int dev_idx;
374 };
375
376 struct imsm_update_rename_array {
377 enum imsm_update_type type;
378 __u8 name[MAX_RAID_SERIAL_LEN];
379 int dev_idx;
380 };
381
382 struct imsm_update_add_remove_disk {
383 enum imsm_update_type type;
384 };
385
386
387 static const char *_sys_dev_type[] = {
388 [SYS_DEV_UNKNOWN] = "Unknown",
389 [SYS_DEV_SAS] = "SAS",
390 [SYS_DEV_SATA] = "SATA"
391 };
392
393 const char *get_sys_dev_type(enum sys_dev_type type)
394 {
395 if (type >= SYS_DEV_MAX)
396 type = SYS_DEV_UNKNOWN;
397
398 return _sys_dev_type[type];
399 }
400
401 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
402 {
403 struct intel_hba *result = malloc(sizeof(*result));
404 if (result) {
405 result->type = device->type;
406 result->path = strdup(device->path);
407 result->next = NULL;
408 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
409 result->pci_id++;
410 }
411 return result;
412 }
413
414 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
415 {
416 struct intel_hba *result=NULL;
417 for (result = hba; result; result = result->next) {
418 if (result->type == device->type && strcmp(result->path, device->path) == 0)
419 break;
420 }
421 return result;
422 }
423
424 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
425 {
426 struct intel_hba *hba;
427
428 /* check if disk attached to Intel HBA */
429 hba = find_intel_hba(super->hba, device);
430 if (hba != NULL)
431 return 1;
432 /* Check if HBA is already attached to super */
433 if (super->hba == NULL) {
434 super->hba = alloc_intel_hba(device);
435 return 1;
436 }
437
438 hba = super->hba;
439 /* Intel metadata allows for all disks attached to the same type HBA.
440 * Do not sypport odf HBA types mixing
441 */
442 if (device->type != hba->type)
443 return 2;
444
445 while (hba->next)
446 hba = hba->next;
447
448 hba->next = alloc_intel_hba(device);
449 return 1;
450 }
451
452 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
453 {
454 struct sys_dev *list, *elem, *prev;
455 char *disk_path;
456
457 if ((list = find_intel_devices()) == NULL)
458 return 0;
459
460 if (fd < 0)
461 disk_path = (char *) devname;
462 else
463 disk_path = diskfd_to_devpath(fd);
464
465 if (!disk_path) {
466 free_sys_dev(&list);
467 return 0;
468 }
469
470 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
471 if (path_attached_to_hba(disk_path, elem->path)) {
472 if (prev == NULL)
473 list = list->next;
474 else
475 prev->next = elem->next;
476 elem->next = NULL;
477 if (disk_path != devname)
478 free(disk_path);
479 free_sys_dev(&list);
480 return elem;
481 }
482 }
483 if (disk_path != devname)
484 free(disk_path);
485 free_sys_dev(&list);
486
487 return NULL;
488 }
489
490
491 static int find_intel_hba_capability(int fd, struct intel_super *super,
492 char *devname);
493
494 static struct supertype *match_metadata_desc_imsm(char *arg)
495 {
496 struct supertype *st;
497
498 if (strcmp(arg, "imsm") != 0 &&
499 strcmp(arg, "default") != 0
500 )
501 return NULL;
502
503 st = malloc(sizeof(*st));
504 if (!st)
505 return NULL;
506 memset(st, 0, sizeof(*st));
507 st->container_dev = NoMdDev;
508 st->ss = &super_imsm;
509 st->max_devs = IMSM_MAX_DEVICES;
510 st->minor_version = 0;
511 st->sb = NULL;
512 return st;
513 }
514
515 #ifndef MDASSEMBLE
516 static __u8 *get_imsm_version(struct imsm_super *mpb)
517 {
518 return &mpb->sig[MPB_SIG_LEN];
519 }
520 #endif
521
522 /* retrieve a disk directly from the anchor when the anchor is known to be
523 * up-to-date, currently only at load time
524 */
525 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
526 {
527 if (index >= mpb->num_disks)
528 return NULL;
529 return &mpb->disk[index];
530 }
531
532 /* retrieve the disk description based on a index of the disk
533 * in the sub-array
534 */
535 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
536 {
537 struct dl *d;
538
539 for (d = super->disks; d; d = d->next)
540 if (d->index == index)
541 return d;
542
543 return NULL;
544 }
545 /* retrieve a disk from the parsed metadata */
546 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
547 {
548 struct dl *dl;
549
550 dl = get_imsm_dl_disk(super, index);
551 if (dl)
552 return &dl->disk;
553
554 return NULL;
555 }
556
557 /* generate a checksum directly from the anchor when the anchor is known to be
558 * up-to-date, currently only at load or write_super after coalescing
559 */
560 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
561 {
562 __u32 end = mpb->mpb_size / sizeof(end);
563 __u32 *p = (__u32 *) mpb;
564 __u32 sum = 0;
565
566 while (end--) {
567 sum += __le32_to_cpu(*p);
568 p++;
569 }
570
571 return sum - __le32_to_cpu(mpb->check_sum);
572 }
573
574 static size_t sizeof_imsm_map(struct imsm_map *map)
575 {
576 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
577 }
578
579 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
580 {
581 /* A device can have 2 maps if it is in the middle of a migration.
582 * If second_map is:
583 * 0 - we return the first map
584 * 1 - we return the second map if it exists, else NULL
585 * -1 - we return the second map if it exists, else the first
586 */
587 struct imsm_map *map = &dev->vol.map[0];
588
589 if (second_map == 1 && !dev->vol.migr_state)
590 return NULL;
591 else if (second_map == 1 ||
592 (second_map < 0 && dev->vol.migr_state)) {
593 void *ptr = map;
594
595 return ptr + sizeof_imsm_map(map);
596 } else
597 return map;
598
599 }
600
601 /* return the size of the device.
602 * migr_state increases the returned size if map[0] were to be duplicated
603 */
604 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
605 {
606 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
607 sizeof_imsm_map(get_imsm_map(dev, 0));
608
609 /* migrating means an additional map */
610 if (dev->vol.migr_state)
611 size += sizeof_imsm_map(get_imsm_map(dev, 1));
612 else if (migr_state)
613 size += sizeof_imsm_map(get_imsm_map(dev, 0));
614
615 return size;
616 }
617
618 #ifndef MDASSEMBLE
619 /* retrieve disk serial number list from a metadata update */
620 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
621 {
622 void *u = update;
623 struct disk_info *inf;
624
625 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
626 sizeof_imsm_dev(&update->dev, 0);
627
628 return inf;
629 }
630 #endif
631
632 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
633 {
634 int offset;
635 int i;
636 void *_mpb = mpb;
637
638 if (index >= mpb->num_raid_devs)
639 return NULL;
640
641 /* devices start after all disks */
642 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
643
644 for (i = 0; i <= index; i++)
645 if (i == index)
646 return _mpb + offset;
647 else
648 offset += sizeof_imsm_dev(_mpb + offset, 0);
649
650 return NULL;
651 }
652
653 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
654 {
655 struct intel_dev *dv;
656
657 if (index >= super->anchor->num_raid_devs)
658 return NULL;
659 for (dv = super->devlist; dv; dv = dv->next)
660 if (dv->index == index)
661 return dv->dev;
662 return NULL;
663 }
664
665 /*
666 * for second_map:
667 * == 0 get first map
668 * == 1 get second map
669 * == -1 than get map according to the current migr_state
670 */
671 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
672 int slot,
673 int second_map)
674 {
675 struct imsm_map *map;
676
677 map = get_imsm_map(dev, second_map);
678
679 /* top byte identifies disk under rebuild */
680 return __le32_to_cpu(map->disk_ord_tbl[slot]);
681 }
682
683 #define ord_to_idx(ord) (((ord) << 8) >> 8)
684 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
685 {
686 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
687
688 return ord_to_idx(ord);
689 }
690
691 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
692 {
693 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
694 }
695
696 static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
697 {
698 int slot;
699 __u32 ord;
700
701 for (slot = 0; slot < map->num_members; slot++) {
702 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
703 if (ord_to_idx(ord) == idx)
704 return slot;
705 }
706
707 return -1;
708 }
709
710 static int get_imsm_raid_level(struct imsm_map *map)
711 {
712 if (map->raid_level == 1) {
713 if (map->num_members == 2)
714 return 1;
715 else
716 return 10;
717 }
718
719 return map->raid_level;
720 }
721
722 static int cmp_extent(const void *av, const void *bv)
723 {
724 const struct extent *a = av;
725 const struct extent *b = bv;
726 if (a->start < b->start)
727 return -1;
728 if (a->start > b->start)
729 return 1;
730 return 0;
731 }
732
733 static int count_memberships(struct dl *dl, struct intel_super *super)
734 {
735 int memberships = 0;
736 int i;
737
738 for (i = 0; i < super->anchor->num_raid_devs; i++) {
739 struct imsm_dev *dev = get_imsm_dev(super, i);
740 struct imsm_map *map = get_imsm_map(dev, 0);
741
742 if (get_imsm_disk_slot(map, dl->index) >= 0)
743 memberships++;
744 }
745
746 return memberships;
747 }
748
749 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
750 {
751 /* find a list of used extents on the given physical device */
752 struct extent *rv, *e;
753 int i;
754 int memberships = count_memberships(dl, super);
755 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
756
757 rv = malloc(sizeof(struct extent) * (memberships + 1));
758 if (!rv)
759 return NULL;
760 e = rv;
761
762 for (i = 0; i < super->anchor->num_raid_devs; i++) {
763 struct imsm_dev *dev = get_imsm_dev(super, i);
764 struct imsm_map *map = get_imsm_map(dev, 0);
765
766 if (get_imsm_disk_slot(map, dl->index) >= 0) {
767 e->start = __le32_to_cpu(map->pba_of_lba0);
768 e->size = __le32_to_cpu(map->blocks_per_member);
769 e++;
770 }
771 }
772 qsort(rv, memberships, sizeof(*rv), cmp_extent);
773
774 /* determine the start of the metadata
775 * when no raid devices are defined use the default
776 * ...otherwise allow the metadata to truncate the value
777 * as is the case with older versions of imsm
778 */
779 if (memberships) {
780 struct extent *last = &rv[memberships - 1];
781 __u32 remainder;
782
783 remainder = __le32_to_cpu(dl->disk.total_blocks) -
784 (last->start + last->size);
785 /* round down to 1k block to satisfy precision of the kernel
786 * 'size' interface
787 */
788 remainder &= ~1UL;
789 /* make sure remainder is still sane */
790 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
791 remainder = ROUND_UP(super->len, 512) >> 9;
792 if (reservation > remainder)
793 reservation = remainder;
794 }
795 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
796 e->size = 0;
797 return rv;
798 }
799
800 /* try to determine how much space is reserved for metadata from
801 * the last get_extents() entry, otherwise fallback to the
802 * default
803 */
804 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
805 {
806 struct extent *e;
807 int i;
808 __u32 rv;
809
810 /* for spares just return a minimal reservation which will grow
811 * once the spare is picked up by an array
812 */
813 if (dl->index == -1)
814 return MPB_SECTOR_CNT;
815
816 e = get_extents(super, dl);
817 if (!e)
818 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
819
820 /* scroll to last entry */
821 for (i = 0; e[i].size; i++)
822 continue;
823
824 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
825
826 free(e);
827
828 return rv;
829 }
830
831 static int is_spare(struct imsm_disk *disk)
832 {
833 return (disk->status & SPARE_DISK) == SPARE_DISK;
834 }
835
836 static int is_configured(struct imsm_disk *disk)
837 {
838 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
839 }
840
841 static int is_failed(struct imsm_disk *disk)
842 {
843 return (disk->status & FAILED_DISK) == FAILED_DISK;
844 }
845
846 /* Return minimum size of a spare that can be used in this array*/
847 static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
848 {
849 struct intel_super *super = st->sb;
850 struct dl *dl;
851 struct extent *e;
852 int i;
853 unsigned long long rv = 0;
854
855 if (!super)
856 return rv;
857 /* find first active disk in array */
858 dl = super->disks;
859 while (dl && (is_failed(&dl->disk) || dl->index == -1))
860 dl = dl->next;
861 if (!dl)
862 return rv;
863 /* find last lba used by subarrays */
864 e = get_extents(super, dl);
865 if (!e)
866 return rv;
867 for (i = 0; e[i].size; i++)
868 continue;
869 if (i > 0)
870 rv = e[i-1].start + e[i-1].size;
871 free(e);
872 /* add the amount of space needed for metadata */
873 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
874 return rv * 512;
875 }
876
877 #ifndef MDASSEMBLE
878 static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
879
880 static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
881 {
882 __u64 sz;
883 int slot, i;
884 struct imsm_map *map = get_imsm_map(dev, 0);
885 struct imsm_map *map2 = get_imsm_map(dev, 1);
886 __u32 ord;
887
888 printf("\n");
889 printf("[%.16s]:\n", dev->volume);
890 printf(" UUID : %s\n", uuid);
891 printf(" RAID Level : %d", get_imsm_raid_level(map));
892 if (map2)
893 printf(" <-- %d", get_imsm_raid_level(map2));
894 printf("\n");
895 printf(" Members : %d", map->num_members);
896 if (map2)
897 printf(" <-- %d", map2->num_members);
898 printf("\n");
899 printf(" Slots : [");
900 for (i = 0; i < map->num_members; i++) {
901 ord = get_imsm_ord_tbl_ent(dev, i, 0);
902 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
903 }
904 printf("]");
905 if (map2) {
906 printf(" <-- [");
907 for (i = 0; i < map2->num_members; i++) {
908 ord = get_imsm_ord_tbl_ent(dev, i, 1);
909 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
910 }
911 printf("]");
912 }
913 printf("\n");
914 printf(" Failed disk : ");
915 if (map->failed_disk_num == 0xff)
916 printf("none");
917 else
918 printf("%i", map->failed_disk_num);
919 printf("\n");
920 slot = get_imsm_disk_slot(map, disk_idx);
921 if (slot >= 0) {
922 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
923 printf(" This Slot : %d%s\n", slot,
924 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
925 } else
926 printf(" This Slot : ?\n");
927 sz = __le32_to_cpu(dev->size_high);
928 sz <<= 32;
929 sz += __le32_to_cpu(dev->size_low);
930 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
931 human_size(sz * 512));
932 sz = __le32_to_cpu(map->blocks_per_member);
933 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
934 human_size(sz * 512));
935 printf(" Sector Offset : %u\n",
936 __le32_to_cpu(map->pba_of_lba0));
937 printf(" Num Stripes : %u\n",
938 __le32_to_cpu(map->num_data_stripes));
939 printf(" Chunk Size : %u KiB",
940 __le16_to_cpu(map->blocks_per_strip) / 2);
941 if (map2)
942 printf(" <-- %u KiB",
943 __le16_to_cpu(map2->blocks_per_strip) / 2);
944 printf("\n");
945 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
946 printf(" Migrate State : ");
947 if (dev->vol.migr_state) {
948 if (migr_type(dev) == MIGR_INIT)
949 printf("initialize\n");
950 else if (migr_type(dev) == MIGR_REBUILD)
951 printf("rebuild\n");
952 else if (migr_type(dev) == MIGR_VERIFY)
953 printf("check\n");
954 else if (migr_type(dev) == MIGR_GEN_MIGR)
955 printf("general migration\n");
956 else if (migr_type(dev) == MIGR_STATE_CHANGE)
957 printf("state change\n");
958 else if (migr_type(dev) == MIGR_REPAIR)
959 printf("repair\n");
960 else
961 printf("<unknown:%d>\n", migr_type(dev));
962 } else
963 printf("idle\n");
964 printf(" Map State : %s", map_state_str[map->map_state]);
965 if (dev->vol.migr_state) {
966 struct imsm_map *map = get_imsm_map(dev, 1);
967
968 printf(" <-- %s", map_state_str[map->map_state]);
969 printf("\n Checkpoint : %u (%llu)",
970 __le32_to_cpu(dev->vol.curr_migr_unit),
971 (unsigned long long)blocks_per_migr_unit(dev));
972 }
973 printf("\n");
974 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
975 }
976
977 static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
978 {
979 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
980 char str[MAX_RAID_SERIAL_LEN + 1];
981 __u64 sz;
982
983 if (index < 0 || !disk)
984 return;
985
986 printf("\n");
987 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
988 printf(" Disk%02d Serial : %s\n", index, str);
989 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
990 is_configured(disk) ? " active" : "",
991 is_failed(disk) ? " failed" : "");
992 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
993 sz = __le32_to_cpu(disk->total_blocks) - reserved;
994 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
995 human_size(sz * 512));
996 }
997
998 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
999
1000 static void examine_super_imsm(struct supertype *st, char *homehost)
1001 {
1002 struct intel_super *super = st->sb;
1003 struct imsm_super *mpb = super->anchor;
1004 char str[MAX_SIGNATURE_LENGTH];
1005 int i;
1006 struct mdinfo info;
1007 char nbuf[64];
1008 __u32 sum;
1009 __u32 reserved = imsm_reserved_sectors(super, super->disks);
1010 struct dl *dl;
1011
1012 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1013 printf(" Magic : %s\n", str);
1014 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1015 printf(" Version : %s\n", get_imsm_version(mpb));
1016 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
1017 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1018 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
1019 getinfo_super_imsm(st, &info, NULL);
1020 fname_from_uuid(st, &info, nbuf, ':');
1021 printf(" UUID : %s\n", nbuf + 5);
1022 sum = __le32_to_cpu(mpb->check_sum);
1023 printf(" Checksum : %08x %s\n", sum,
1024 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
1025 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
1026 printf(" Disks : %d\n", mpb->num_disks);
1027 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
1028 print_imsm_disk(mpb, super->disks->index, reserved);
1029 if (super->bbm_log) {
1030 struct bbm_log *log = super->bbm_log;
1031
1032 printf("\n");
1033 printf("Bad Block Management Log:\n");
1034 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1035 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1036 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1037 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
1038 printf(" First Spare : %llx\n",
1039 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
1040 }
1041 for (i = 0; i < mpb->num_raid_devs; i++) {
1042 struct mdinfo info;
1043 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1044
1045 super->current_vol = i;
1046 getinfo_super_imsm(st, &info, NULL);
1047 fname_from_uuid(st, &info, nbuf, ':');
1048 print_imsm_dev(dev, nbuf + 5, super->disks->index);
1049 }
1050 for (i = 0; i < mpb->num_disks; i++) {
1051 if (i == super->disks->index)
1052 continue;
1053 print_imsm_disk(mpb, i, reserved);
1054 }
1055 for (dl = super->disks ; dl; dl = dl->next) {
1056 struct imsm_disk *disk;
1057 char str[MAX_RAID_SERIAL_LEN + 1];
1058 __u64 sz;
1059
1060 if (dl->index >= 0)
1061 continue;
1062
1063 disk = &dl->disk;
1064 printf("\n");
1065 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1066 printf(" Disk Serial : %s\n", str);
1067 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1068 is_configured(disk) ? " active" : "",
1069 is_failed(disk) ? " failed" : "");
1070 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1071 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1072 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1073 human_size(sz * 512));
1074 }
1075 }
1076
1077 static void brief_examine_super_imsm(struct supertype *st, int verbose)
1078 {
1079 /* We just write a generic IMSM ARRAY entry */
1080 struct mdinfo info;
1081 char nbuf[64];
1082 struct intel_super *super = st->sb;
1083
1084 if (!super->anchor->num_raid_devs) {
1085 printf("ARRAY metadata=imsm\n");
1086 return;
1087 }
1088
1089 getinfo_super_imsm(st, &info, NULL);
1090 fname_from_uuid(st, &info, nbuf, ':');
1091 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1092 }
1093
1094 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1095 {
1096 /* We just write a generic IMSM ARRAY entry */
1097 struct mdinfo info;
1098 char nbuf[64];
1099 char nbuf1[64];
1100 struct intel_super *super = st->sb;
1101 int i;
1102
1103 if (!super->anchor->num_raid_devs)
1104 return;
1105
1106 getinfo_super_imsm(st, &info, NULL);
1107 fname_from_uuid(st, &info, nbuf, ':');
1108 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1109 struct imsm_dev *dev = get_imsm_dev(super, i);
1110
1111 super->current_vol = i;
1112 getinfo_super_imsm(st, &info, NULL);
1113 fname_from_uuid(st, &info, nbuf1, ':');
1114 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1115 dev->volume, nbuf + 5, i, nbuf1 + 5);
1116 }
1117 }
1118
1119 static void export_examine_super_imsm(struct supertype *st)
1120 {
1121 struct intel_super *super = st->sb;
1122 struct imsm_super *mpb = super->anchor;
1123 struct mdinfo info;
1124 char nbuf[64];
1125
1126 getinfo_super_imsm(st, &info, NULL);
1127 fname_from_uuid(st, &info, nbuf, ':');
1128 printf("MD_METADATA=imsm\n");
1129 printf("MD_LEVEL=container\n");
1130 printf("MD_UUID=%s\n", nbuf+5);
1131 printf("MD_DEVICES=%u\n", mpb->num_disks);
1132 }
1133
1134 static void detail_super_imsm(struct supertype *st, char *homehost)
1135 {
1136 struct mdinfo info;
1137 char nbuf[64];
1138
1139 getinfo_super_imsm(st, &info, NULL);
1140 fname_from_uuid(st, &info, nbuf, ':');
1141 printf("\n UUID : %s\n", nbuf + 5);
1142 }
1143
1144 static void brief_detail_super_imsm(struct supertype *st)
1145 {
1146 struct mdinfo info;
1147 char nbuf[64];
1148 getinfo_super_imsm(st, &info, NULL);
1149 fname_from_uuid(st, &info, nbuf, ':');
1150 printf(" UUID=%s", nbuf + 5);
1151 }
1152
1153 static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1154 static void fd2devname(int fd, char *name);
1155
1156 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
1157 {
1158 /* dump an unsorted list of devices attached to AHCI Intel storage
1159 * controller, as well as non-connected ports
1160 */
1161 int hba_len = strlen(hba_path) + 1;
1162 struct dirent *ent;
1163 DIR *dir;
1164 char *path = NULL;
1165 int err = 0;
1166 unsigned long port_mask = (1 << port_count) - 1;
1167
1168 if (port_count > (int)sizeof(port_mask) * 8) {
1169 if (verbose)
1170 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1171 return 2;
1172 }
1173
1174 /* scroll through /sys/dev/block looking for devices attached to
1175 * this hba
1176 */
1177 dir = opendir("/sys/dev/block");
1178 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1179 int fd;
1180 char model[64];
1181 char vendor[64];
1182 char buf[1024];
1183 int major, minor;
1184 char *device;
1185 char *c;
1186 int port;
1187 int type;
1188
1189 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1190 continue;
1191 path = devt_to_devpath(makedev(major, minor));
1192 if (!path)
1193 continue;
1194 if (!path_attached_to_hba(path, hba_path)) {
1195 free(path);
1196 path = NULL;
1197 continue;
1198 }
1199
1200 /* retrieve the scsi device type */
1201 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1202 if (verbose)
1203 fprintf(stderr, Name ": failed to allocate 'device'\n");
1204 err = 2;
1205 break;
1206 }
1207 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1208 if (load_sys(device, buf) != 0) {
1209 if (verbose)
1210 fprintf(stderr, Name ": failed to read device type for %s\n",
1211 path);
1212 err = 2;
1213 free(device);
1214 break;
1215 }
1216 type = strtoul(buf, NULL, 10);
1217
1218 /* if it's not a disk print the vendor and model */
1219 if (!(type == 0 || type == 7 || type == 14)) {
1220 vendor[0] = '\0';
1221 model[0] = '\0';
1222 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1223 if (load_sys(device, buf) == 0) {
1224 strncpy(vendor, buf, sizeof(vendor));
1225 vendor[sizeof(vendor) - 1] = '\0';
1226 c = (char *) &vendor[sizeof(vendor) - 1];
1227 while (isspace(*c) || *c == '\0')
1228 *c-- = '\0';
1229
1230 }
1231 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1232 if (load_sys(device, buf) == 0) {
1233 strncpy(model, buf, sizeof(model));
1234 model[sizeof(model) - 1] = '\0';
1235 c = (char *) &model[sizeof(model) - 1];
1236 while (isspace(*c) || *c == '\0')
1237 *c-- = '\0';
1238 }
1239
1240 if (vendor[0] && model[0])
1241 sprintf(buf, "%.64s %.64s", vendor, model);
1242 else
1243 switch (type) { /* numbers from hald/linux/device.c */
1244 case 1: sprintf(buf, "tape"); break;
1245 case 2: sprintf(buf, "printer"); break;
1246 case 3: sprintf(buf, "processor"); break;
1247 case 4:
1248 case 5: sprintf(buf, "cdrom"); break;
1249 case 6: sprintf(buf, "scanner"); break;
1250 case 8: sprintf(buf, "media_changer"); break;
1251 case 9: sprintf(buf, "comm"); break;
1252 case 12: sprintf(buf, "raid"); break;
1253 default: sprintf(buf, "unknown");
1254 }
1255 } else
1256 buf[0] = '\0';
1257 free(device);
1258
1259 /* chop device path to 'host%d' and calculate the port number */
1260 c = strchr(&path[hba_len], '/');
1261 if (!c) {
1262 if (verbose)
1263 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1264 err = 2;
1265 break;
1266 }
1267 *c = '\0';
1268 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1269 port -= host_base;
1270 else {
1271 if (verbose) {
1272 *c = '/'; /* repair the full string */
1273 fprintf(stderr, Name ": failed to determine port number for %s\n",
1274 path);
1275 }
1276 err = 2;
1277 break;
1278 }
1279
1280 /* mark this port as used */
1281 port_mask &= ~(1 << port);
1282
1283 /* print out the device information */
1284 if (buf[0]) {
1285 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1286 continue;
1287 }
1288
1289 fd = dev_open(ent->d_name, O_RDONLY);
1290 if (fd < 0)
1291 printf(" Port%d : - disk info unavailable -\n", port);
1292 else {
1293 fd2devname(fd, buf);
1294 printf(" Port%d : %s", port, buf);
1295 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1296 printf(" (%s)\n", buf);
1297 else
1298 printf("()\n");
1299 }
1300 close(fd);
1301 free(path);
1302 path = NULL;
1303 }
1304 if (path)
1305 free(path);
1306 if (dir)
1307 closedir(dir);
1308 if (err == 0) {
1309 int i;
1310
1311 for (i = 0; i < port_count; i++)
1312 if (port_mask & (1 << i))
1313 printf(" Port%d : - no device attached -\n", i);
1314 }
1315
1316 return err;
1317 }
1318
1319
1320
1321 static void print_found_intel_controllers(struct sys_dev *elem)
1322 {
1323 for (; elem; elem = elem->next) {
1324 fprintf(stderr, Name ": found Intel(R) ");
1325 if (elem->type == SYS_DEV_SATA)
1326 fprintf(stderr, "SATA ");
1327 else if (elem->type == SYS_DEV_SAS)
1328 fprintf(stderr, "SAS ");
1329 fprintf(stderr, "RAID controller");
1330 if (elem->pci_id)
1331 fprintf(stderr, " at %s", elem->pci_id);
1332 fprintf(stderr, ".\n");
1333 }
1334 fflush(stderr);
1335 }
1336
1337 static int ahci_get_port_count(const char *hba_path, int *port_count)
1338 {
1339 struct dirent *ent;
1340 DIR *dir;
1341 int host_base = -1;
1342
1343 *port_count = 0;
1344 if ((dir = opendir(hba_path)) == NULL)
1345 return -1;
1346
1347 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1348 int host;
1349
1350 if (sscanf(ent->d_name, "host%d", &host) != 1)
1351 continue;
1352 if (*port_count == 0)
1353 host_base = host;
1354 else if (host < host_base)
1355 host_base = host;
1356
1357 if (host + 1 > *port_count + host_base)
1358 *port_count = host + 1 - host_base;
1359 }
1360 closedir(dir);
1361 return host_base;
1362 }
1363
1364 static void print_imsm_capability(const struct imsm_orom *orom)
1365 {
1366 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1367 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1368 orom->hotfix_ver, orom->build);
1369 printf(" RAID Levels :%s%s%s%s%s\n",
1370 imsm_orom_has_raid0(orom) ? " raid0" : "",
1371 imsm_orom_has_raid1(orom) ? " raid1" : "",
1372 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1373 imsm_orom_has_raid10(orom) ? " raid10" : "",
1374 imsm_orom_has_raid5(orom) ? " raid5" : "");
1375 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1376 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1377 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1378 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1379 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1380 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1381 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1382 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1383 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1384 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1385 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1386 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1387 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1388 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1389 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1390 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1391 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1392 printf(" Max Disks : %d\n", orom->tds);
1393 printf(" Max Volumes : %d\n", orom->vpa);
1394 return;
1395 }
1396
1397 static int detail_platform_imsm(int verbose, int enumerate_only)
1398 {
1399 /* There are two components to imsm platform support, the ahci SATA
1400 * controller and the option-rom. To find the SATA controller we
1401 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1402 * controller with the Intel vendor id is present. This approach
1403 * allows mdadm to leverage the kernel's ahci detection logic, with the
1404 * caveat that if ahci.ko is not loaded mdadm will not be able to
1405 * detect platform raid capabilities. The option-rom resides in a
1406 * platform "Adapter ROM". We scan for its signature to retrieve the
1407 * platform capabilities. If raid support is disabled in the BIOS the
1408 * option-rom capability structure will not be available.
1409 */
1410 const struct imsm_orom *orom;
1411 struct sys_dev *list, *hba;
1412 int host_base = 0;
1413 int port_count = 0;
1414 int result=0;
1415
1416 if (enumerate_only) {
1417 if (check_env("IMSM_NO_PLATFORM"))
1418 return 0;
1419 list = find_intel_devices();
1420 if (!list)
1421 return 2;
1422 for (hba = list; hba; hba = hba->next) {
1423 orom = find_imsm_capability(hba->type);
1424 if (!orom) {
1425 result = 2;
1426 break;
1427 }
1428 }
1429 free_sys_dev(&list);
1430 return result;
1431 }
1432
1433 list = find_intel_devices();
1434 if (!list) {
1435 if (verbose)
1436 fprintf(stderr, Name ": no active Intel(R) RAID "
1437 "controller found.\n");
1438 free_sys_dev(&list);
1439 return 2;
1440 } else if (verbose)
1441 print_found_intel_controllers(list);
1442
1443 for (hba = list; hba; hba = hba->next) {
1444 orom = find_imsm_capability(hba->type);
1445 if (!orom)
1446 fprintf(stderr, Name ": imsm capabilities not found for controller: %s (type %s)\n",
1447 hba->path, get_sys_dev_type(hba->type));
1448 else
1449 print_imsm_capability(orom);
1450 }
1451
1452 for (hba = list; hba; hba = hba->next) {
1453 printf(" I/O Controller : %s (%s)\n",
1454 hba->path, get_sys_dev_type(hba->type));
1455
1456 if (hba->type == SYS_DEV_SATA) {
1457 host_base = ahci_get_port_count(hba->path, &port_count);
1458 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1459 if (verbose)
1460 fprintf(stderr, Name ": failed to enumerate "
1461 "ports on SATA controller at %s.", hba->pci_id);
1462 result |= 2;
1463 }
1464 }
1465 }
1466
1467 free_sys_dev(&list);
1468 return result;
1469 }
1470 #endif
1471
1472 static int match_home_imsm(struct supertype *st, char *homehost)
1473 {
1474 /* the imsm metadata format does not specify any host
1475 * identification information. We return -1 since we can never
1476 * confirm nor deny whether a given array is "meant" for this
1477 * host. We rely on compare_super and the 'family_num' fields to
1478 * exclude member disks that do not belong, and we rely on
1479 * mdadm.conf to specify the arrays that should be assembled.
1480 * Auto-assembly may still pick up "foreign" arrays.
1481 */
1482
1483 return -1;
1484 }
1485
1486 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1487 {
1488 /* The uuid returned here is used for:
1489 * uuid to put into bitmap file (Create, Grow)
1490 * uuid for backup header when saving critical section (Grow)
1491 * comparing uuids when re-adding a device into an array
1492 * In these cases the uuid required is that of the data-array,
1493 * not the device-set.
1494 * uuid to recognise same set when adding a missing device back
1495 * to an array. This is a uuid for the device-set.
1496 *
1497 * For each of these we can make do with a truncated
1498 * or hashed uuid rather than the original, as long as
1499 * everyone agrees.
1500 * In each case the uuid required is that of the data-array,
1501 * not the device-set.
1502 */
1503 /* imsm does not track uuid's so we synthesis one using sha1 on
1504 * - The signature (Which is constant for all imsm array, but no matter)
1505 * - the orig_family_num of the container
1506 * - the index number of the volume
1507 * - the 'serial' number of the volume.
1508 * Hopefully these are all constant.
1509 */
1510 struct intel_super *super = st->sb;
1511
1512 char buf[20];
1513 struct sha1_ctx ctx;
1514 struct imsm_dev *dev = NULL;
1515 __u32 family_num;
1516
1517 /* some mdadm versions failed to set ->orig_family_num, in which
1518 * case fall back to ->family_num. orig_family_num will be
1519 * fixed up with the first metadata update.
1520 */
1521 family_num = super->anchor->orig_family_num;
1522 if (family_num == 0)
1523 family_num = super->anchor->family_num;
1524 sha1_init_ctx(&ctx);
1525 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
1526 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
1527 if (super->current_vol >= 0)
1528 dev = get_imsm_dev(super, super->current_vol);
1529 if (dev) {
1530 __u32 vol = super->current_vol;
1531 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1532 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1533 }
1534 sha1_finish_ctx(&ctx, buf);
1535 memcpy(uuid, buf, 4*4);
1536 }
1537
1538 #if 0
1539 static void
1540 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
1541 {
1542 __u8 *v = get_imsm_version(mpb);
1543 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1544 char major[] = { 0, 0, 0 };
1545 char minor[] = { 0 ,0, 0 };
1546 char patch[] = { 0, 0, 0 };
1547 char *ver_parse[] = { major, minor, patch };
1548 int i, j;
1549
1550 i = j = 0;
1551 while (*v != '\0' && v < end) {
1552 if (*v != '.' && j < 2)
1553 ver_parse[i][j++] = *v;
1554 else {
1555 i++;
1556 j = 0;
1557 }
1558 v++;
1559 }
1560
1561 *m = strtol(minor, NULL, 0);
1562 *p = strtol(patch, NULL, 0);
1563 }
1564 #endif
1565
1566 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1567 {
1568 /* migr_strip_size when repairing or initializing parity */
1569 struct imsm_map *map = get_imsm_map(dev, 0);
1570 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1571
1572 switch (get_imsm_raid_level(map)) {
1573 case 5:
1574 case 10:
1575 return chunk;
1576 default:
1577 return 128*1024 >> 9;
1578 }
1579 }
1580
1581 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1582 {
1583 /* migr_strip_size when rebuilding a degraded disk, no idea why
1584 * this is different than migr_strip_size_resync(), but it's good
1585 * to be compatible
1586 */
1587 struct imsm_map *map = get_imsm_map(dev, 1);
1588 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1589
1590 switch (get_imsm_raid_level(map)) {
1591 case 1:
1592 case 10:
1593 if (map->num_members % map->num_domains == 0)
1594 return 128*1024 >> 9;
1595 else
1596 return chunk;
1597 case 5:
1598 return max((__u32) 64*1024 >> 9, chunk);
1599 default:
1600 return 128*1024 >> 9;
1601 }
1602 }
1603
1604 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1605 {
1606 struct imsm_map *lo = get_imsm_map(dev, 0);
1607 struct imsm_map *hi = get_imsm_map(dev, 1);
1608 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1609 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1610
1611 return max((__u32) 1, hi_chunk / lo_chunk);
1612 }
1613
1614 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1615 {
1616 struct imsm_map *lo = get_imsm_map(dev, 0);
1617 int level = get_imsm_raid_level(lo);
1618
1619 if (level == 1 || level == 10) {
1620 struct imsm_map *hi = get_imsm_map(dev, 1);
1621
1622 return hi->num_domains;
1623 } else
1624 return num_stripes_per_unit_resync(dev);
1625 }
1626
1627 static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1628 {
1629 /* named 'imsm_' because raid0, raid1 and raid10
1630 * counter-intuitively have the same number of data disks
1631 */
1632 struct imsm_map *map = get_imsm_map(dev, second_map);
1633
1634 switch (get_imsm_raid_level(map)) {
1635 case 0:
1636 case 1:
1637 case 10:
1638 return map->num_members;
1639 case 5:
1640 return map->num_members - 1;
1641 default:
1642 dprintf("%s: unsupported raid level\n", __func__);
1643 return 0;
1644 }
1645 }
1646
1647 static __u32 parity_segment_depth(struct imsm_dev *dev)
1648 {
1649 struct imsm_map *map = get_imsm_map(dev, 0);
1650 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1651
1652 switch(get_imsm_raid_level(map)) {
1653 case 1:
1654 case 10:
1655 return chunk * map->num_domains;
1656 case 5:
1657 return chunk * map->num_members;
1658 default:
1659 return chunk;
1660 }
1661 }
1662
1663 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1664 {
1665 struct imsm_map *map = get_imsm_map(dev, 1);
1666 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1667 __u32 strip = block / chunk;
1668
1669 switch (get_imsm_raid_level(map)) {
1670 case 1:
1671 case 10: {
1672 __u32 vol_strip = (strip * map->num_domains) + 1;
1673 __u32 vol_stripe = vol_strip / map->num_members;
1674
1675 return vol_stripe * chunk + block % chunk;
1676 } case 5: {
1677 __u32 stripe = strip / (map->num_members - 1);
1678
1679 return stripe * chunk + block % chunk;
1680 }
1681 default:
1682 return 0;
1683 }
1684 }
1685
1686 static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
1687 {
1688 /* calculate the conversion factor between per member 'blocks'
1689 * (md/{resync,rebuild}_start) and imsm migration units, return
1690 * 0 for the 'not migrating' and 'unsupported migration' cases
1691 */
1692 if (!dev->vol.migr_state)
1693 return 0;
1694
1695 switch (migr_type(dev)) {
1696 case MIGR_GEN_MIGR:
1697 case MIGR_VERIFY:
1698 case MIGR_REPAIR:
1699 case MIGR_INIT: {
1700 struct imsm_map *map = get_imsm_map(dev, 0);
1701 __u32 stripes_per_unit;
1702 __u32 blocks_per_unit;
1703 __u32 parity_depth;
1704 __u32 migr_chunk;
1705 __u32 block_map;
1706 __u32 block_rel;
1707 __u32 segment;
1708 __u32 stripe;
1709 __u8 disks;
1710
1711 /* yes, this is really the translation of migr_units to
1712 * per-member blocks in the 'resync' case
1713 */
1714 stripes_per_unit = num_stripes_per_unit_resync(dev);
1715 migr_chunk = migr_strip_blocks_resync(dev);
1716 disks = imsm_num_data_members(dev, 0);
1717 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1718 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1719 segment = blocks_per_unit / stripe;
1720 block_rel = blocks_per_unit - segment * stripe;
1721 parity_depth = parity_segment_depth(dev);
1722 block_map = map_migr_block(dev, block_rel);
1723 return block_map + parity_depth * segment;
1724 }
1725 case MIGR_REBUILD: {
1726 __u32 stripes_per_unit;
1727 __u32 migr_chunk;
1728
1729 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1730 migr_chunk = migr_strip_blocks_rebuild(dev);
1731 return migr_chunk * stripes_per_unit;
1732 }
1733 case MIGR_STATE_CHANGE:
1734 default:
1735 return 0;
1736 }
1737 }
1738
1739 static int imsm_level_to_layout(int level)
1740 {
1741 switch (level) {
1742 case 0:
1743 case 1:
1744 return 0;
1745 case 5:
1746 case 6:
1747 return ALGORITHM_LEFT_ASYMMETRIC;
1748 case 10:
1749 return 0x102;
1750 }
1751 return UnSet;
1752 }
1753
1754 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
1755 {
1756 struct intel_super *super = st->sb;
1757 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
1758 struct imsm_map *map = get_imsm_map(dev, 0);
1759 struct imsm_map *prev_map = get_imsm_map(dev, 1);
1760 struct imsm_map *map_to_analyse = map;
1761 struct dl *dl;
1762 char *devname;
1763 unsigned int component_size_alligment;
1764 int map_disks = info->array.raid_disks;
1765
1766 if (prev_map)
1767 map_to_analyse = prev_map;
1768
1769 for (dl = super->disks; dl; dl = dl->next)
1770 if (dl->raiddisk == info->disk.raid_disk)
1771 break;
1772 info->container_member = super->current_vol;
1773 info->array.raid_disks = map->num_members;
1774 info->array.level = get_imsm_raid_level(map_to_analyse);
1775 info->array.layout = imsm_level_to_layout(info->array.level);
1776 info->array.md_minor = -1;
1777 info->array.ctime = 0;
1778 info->array.utime = 0;
1779 info->array.chunk_size =
1780 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
1781 info->array.state = !dev->vol.dirty;
1782 info->custom_array_size = __le32_to_cpu(dev->size_high);
1783 info->custom_array_size <<= 32;
1784 info->custom_array_size |= __le32_to_cpu(dev->size_low);
1785 if (prev_map && map->map_state == prev_map->map_state) {
1786 info->reshape_active = 1;
1787 info->new_level = get_imsm_raid_level(map);
1788 info->new_layout = imsm_level_to_layout(info->new_level);
1789 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
1790 info->delta_disks = map->num_members - prev_map->num_members;
1791 if (info->delta_disks) {
1792 /* this needs to be applied to every array
1793 * in the container.
1794 */
1795 info->reshape_active = 2;
1796 }
1797 /* We shape information that we give to md might have to be
1798 * modify to cope with md's requirement for reshaping arrays.
1799 * For example, when reshaping a RAID0, md requires it to be
1800 * presented as a degraded RAID4.
1801 * Also if a RAID0 is migrating to a RAID5 we need to specify
1802 * the array as already being RAID5, but the 'before' layout
1803 * is a RAID4-like layout.
1804 */
1805 switch (info->array.level) {
1806 case 0:
1807 switch(info->new_level) {
1808 case 0:
1809 /* conversion is happening as RAID4 */
1810 info->array.level = 4;
1811 info->array.raid_disks += 1;
1812 break;
1813 case 5:
1814 /* conversion is happening as RAID5 */
1815 info->array.level = 5;
1816 info->array.layout = ALGORITHM_PARITY_N;
1817 info->array.raid_disks += 1;
1818 info->delta_disks -= 1;
1819 break;
1820 default:
1821 /* FIXME error message */
1822 info->array.level = UnSet;
1823 break;
1824 }
1825 break;
1826 }
1827 } else {
1828 info->new_level = UnSet;
1829 info->new_layout = UnSet;
1830 info->new_chunk = info->array.chunk_size;
1831 info->delta_disks = 0;
1832 }
1833 info->disk.major = 0;
1834 info->disk.minor = 0;
1835 if (dl) {
1836 info->disk.major = dl->major;
1837 info->disk.minor = dl->minor;
1838 }
1839
1840 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
1841 info->component_size =
1842 __le32_to_cpu(map_to_analyse->blocks_per_member);
1843
1844 /* check component size aligment
1845 */
1846 component_size_alligment =
1847 info->component_size % (info->array.chunk_size/512);
1848
1849 if (component_size_alligment &&
1850 (info->array.level != 1) && (info->array.level != UnSet)) {
1851 dprintf("imsm: reported component size alligned from %llu ",
1852 info->component_size);
1853 info->component_size -= component_size_alligment;
1854 dprintf("to %llu (%i).\n",
1855 info->component_size, component_size_alligment);
1856 }
1857
1858 memset(info->uuid, 0, sizeof(info->uuid));
1859 info->recovery_start = MaxSector;
1860
1861 info->reshape_progress = 0;
1862 info->resync_start = MaxSector;
1863 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
1864 dev->vol.dirty) {
1865 info->resync_start = 0;
1866 }
1867 if (dev->vol.migr_state) {
1868 switch (migr_type(dev)) {
1869 case MIGR_REPAIR:
1870 case MIGR_INIT: {
1871 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
1872 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
1873
1874 info->resync_start = blocks_per_unit * units;
1875 break;
1876 }
1877 case MIGR_GEN_MIGR: {
1878 __u64 blocks_per_unit = blocks_per_migr_unit(dev);
1879 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
1880 unsigned long long array_blocks;
1881 int used_disks;
1882
1883 info->reshape_progress = blocks_per_unit * units;
1884
1885 /* checkpoint is written per disks unit
1886 * recalculate it to reshape position
1887 */
1888 used_disks = imsm_num_data_members(dev, 0);
1889 info->reshape_progress *= used_disks;
1890 dprintf("IMSM: General Migration checkpoint : %llu "
1891 "(%llu) -> read reshape progress : %llu\n",
1892 units, blocks_per_unit, info->reshape_progress);
1893
1894 used_disks = imsm_num_data_members(dev, 1);
1895 if (used_disks > 0) {
1896 array_blocks = map->blocks_per_member *
1897 used_disks;
1898 /* round array size down to closest MB
1899 */
1900 info->custom_array_size = (array_blocks
1901 >> SECT_PER_MB_SHIFT)
1902 << SECT_PER_MB_SHIFT;
1903 }
1904 }
1905 case MIGR_VERIFY:
1906 /* we could emulate the checkpointing of
1907 * 'sync_action=check' migrations, but for now
1908 * we just immediately complete them
1909 */
1910 case MIGR_REBUILD:
1911 /* this is handled by container_content_imsm() */
1912 case MIGR_STATE_CHANGE:
1913 /* FIXME handle other migrations */
1914 default:
1915 /* we are not dirty, so... */
1916 info->resync_start = MaxSector;
1917 }
1918 }
1919
1920 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
1921 info->name[MAX_RAID_SERIAL_LEN] = 0;
1922
1923 info->array.major_version = -1;
1924 info->array.minor_version = -2;
1925 devname = devnum2devname(st->container_dev);
1926 *info->text_version = '\0';
1927 if (devname)
1928 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
1929 free(devname);
1930 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
1931 uuid_from_super_imsm(st, info->uuid);
1932
1933 if (dmap) {
1934 int i, j;
1935 for (i=0; i<map_disks; i++) {
1936 dmap[i] = 0;
1937 if (i < info->array.raid_disks) {
1938 struct imsm_disk *dsk;
1939 j = get_imsm_disk_idx(dev, i, -1);
1940 dsk = get_imsm_disk(super, j);
1941 if (dsk && (dsk->status & CONFIGURED_DISK))
1942 dmap[i] = 1;
1943 }
1944 }
1945 }
1946 }
1947
1948 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
1949 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
1950
1951 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
1952 {
1953 struct dl *d;
1954
1955 for (d = super->missing; d; d = d->next)
1956 if (d->index == index)
1957 return &d->disk;
1958 return NULL;
1959 }
1960
1961 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
1962 {
1963 struct intel_super *super = st->sb;
1964 struct imsm_disk *disk;
1965 int map_disks = info->array.raid_disks;
1966 int max_enough = -1;
1967 int i;
1968 struct imsm_super *mpb;
1969
1970 if (super->current_vol >= 0) {
1971 getinfo_super_imsm_volume(st, info, map);
1972 return;
1973 }
1974
1975 /* Set raid_disks to zero so that Assemble will always pull in valid
1976 * spares
1977 */
1978 info->array.raid_disks = 0;
1979 info->array.level = LEVEL_CONTAINER;
1980 info->array.layout = 0;
1981 info->array.md_minor = -1;
1982 info->array.ctime = 0; /* N/A for imsm */
1983 info->array.utime = 0;
1984 info->array.chunk_size = 0;
1985
1986 info->disk.major = 0;
1987 info->disk.minor = 0;
1988 info->disk.raid_disk = -1;
1989 info->reshape_active = 0;
1990 info->array.major_version = -1;
1991 info->array.minor_version = -2;
1992 strcpy(info->text_version, "imsm");
1993 info->safe_mode_delay = 0;
1994 info->disk.number = -1;
1995 info->disk.state = 0;
1996 info->name[0] = 0;
1997 info->recovery_start = MaxSector;
1998
1999 /* do we have the all the insync disks that we expect? */
2000 mpb = super->anchor;
2001
2002 for (i = 0; i < mpb->num_raid_devs; i++) {
2003 struct imsm_dev *dev = get_imsm_dev(super, i);
2004 int failed, enough, j, missing = 0;
2005 struct imsm_map *map;
2006 __u8 state;
2007
2008 failed = imsm_count_failed(super, dev);
2009 state = imsm_check_degraded(super, dev, failed);
2010 map = get_imsm_map(dev, dev->vol.migr_state);
2011
2012 /* any newly missing disks?
2013 * (catches single-degraded vs double-degraded)
2014 */
2015 for (j = 0; j < map->num_members; j++) {
2016 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
2017 __u32 idx = ord_to_idx(ord);
2018
2019 if (!(ord & IMSM_ORD_REBUILD) &&
2020 get_imsm_missing(super, idx)) {
2021 missing = 1;
2022 break;
2023 }
2024 }
2025
2026 if (state == IMSM_T_STATE_FAILED)
2027 enough = -1;
2028 else if (state == IMSM_T_STATE_DEGRADED &&
2029 (state != map->map_state || missing))
2030 enough = 0;
2031 else /* we're normal, or already degraded */
2032 enough = 1;
2033
2034 /* in the missing/failed disk case check to see
2035 * if at least one array is runnable
2036 */
2037 max_enough = max(max_enough, enough);
2038 }
2039 dprintf("%s: enough: %d\n", __func__, max_enough);
2040 info->container_enough = max_enough;
2041
2042 if (super->disks) {
2043 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2044
2045 disk = &super->disks->disk;
2046 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
2047 info->component_size = reserved;
2048 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
2049 /* we don't change info->disk.raid_disk here because
2050 * this state will be finalized in mdmon after we have
2051 * found the 'most fresh' version of the metadata
2052 */
2053 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2054 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2055 }
2056
2057 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2058 * ->compare_super may have updated the 'num_raid_devs' field for spares
2059 */
2060 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
2061 uuid_from_super_imsm(st, info->uuid);
2062 else
2063 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
2064
2065 /* I don't know how to compute 'map' on imsm, so use safe default */
2066 if (map) {
2067 int i;
2068 for (i = 0; i < map_disks; i++)
2069 map[i] = 1;
2070 }
2071
2072 }
2073
2074 /* allocates memory and fills disk in mdinfo structure
2075 * for each disk in array */
2076 struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2077 {
2078 struct mdinfo *mddev = NULL;
2079 struct intel_super *super = st->sb;
2080 struct imsm_disk *disk;
2081 int count = 0;
2082 struct dl *dl;
2083 if (!super || !super->disks)
2084 return NULL;
2085 dl = super->disks;
2086 mddev = malloc(sizeof(*mddev));
2087 if (!mddev) {
2088 fprintf(stderr, Name ": Failed to allocate memory.\n");
2089 return NULL;
2090 }
2091 memset(mddev, 0, sizeof(*mddev));
2092 while (dl) {
2093 struct mdinfo *tmp;
2094 disk = &dl->disk;
2095 tmp = malloc(sizeof(*tmp));
2096 if (!tmp) {
2097 fprintf(stderr, Name ": Failed to allocate memory.\n");
2098 if (mddev)
2099 sysfs_free(mddev);
2100 return NULL;
2101 }
2102 memset(tmp, 0, sizeof(*tmp));
2103 if (mddev->devs)
2104 tmp->next = mddev->devs;
2105 mddev->devs = tmp;
2106 tmp->disk.number = count++;
2107 tmp->disk.major = dl->major;
2108 tmp->disk.minor = dl->minor;
2109 tmp->disk.state = is_configured(disk) ?
2110 (1 << MD_DISK_ACTIVE) : 0;
2111 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2112 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2113 tmp->disk.raid_disk = -1;
2114 dl = dl->next;
2115 }
2116 return mddev;
2117 }
2118
2119 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2120 char *update, char *devname, int verbose,
2121 int uuid_set, char *homehost)
2122 {
2123 /* For 'assemble' and 'force' we need to return non-zero if any
2124 * change was made. For others, the return value is ignored.
2125 * Update options are:
2126 * force-one : This device looks a bit old but needs to be included,
2127 * update age info appropriately.
2128 * assemble: clear any 'faulty' flag to allow this device to
2129 * be assembled.
2130 * force-array: Array is degraded but being forced, mark it clean
2131 * if that will be needed to assemble it.
2132 *
2133 * newdev: not used ????
2134 * grow: Array has gained a new device - this is currently for
2135 * linear only
2136 * resync: mark as dirty so a resync will happen.
2137 * name: update the name - preserving the homehost
2138 * uuid: Change the uuid of the array to match watch is given
2139 *
2140 * Following are not relevant for this imsm:
2141 * sparc2.2 : update from old dodgey metadata
2142 * super-minor: change the preferred_minor number
2143 * summaries: update redundant counters.
2144 * homehost: update the recorded homehost
2145 * _reshape_progress: record new reshape_progress position.
2146 */
2147 int rv = 1;
2148 struct intel_super *super = st->sb;
2149 struct imsm_super *mpb;
2150
2151 /* we can only update container info */
2152 if (!super || super->current_vol >= 0 || !super->anchor)
2153 return 1;
2154
2155 mpb = super->anchor;
2156
2157 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
2158 rv = -1;
2159 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2160 mpb->orig_family_num = *((__u32 *) info->update_private);
2161 rv = 0;
2162 } else if (strcmp(update, "uuid") == 0) {
2163 __u32 *new_family = malloc(sizeof(*new_family));
2164
2165 /* update orig_family_number with the incoming random
2166 * data, report the new effective uuid, and store the
2167 * new orig_family_num for future updates.
2168 */
2169 if (new_family) {
2170 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2171 uuid_from_super_imsm(st, info->uuid);
2172 *new_family = mpb->orig_family_num;
2173 info->update_private = new_family;
2174 rv = 0;
2175 }
2176 } else if (strcmp(update, "assemble") == 0)
2177 rv = 0;
2178 else
2179 rv = -1;
2180
2181 /* successful update? recompute checksum */
2182 if (rv == 0)
2183 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
2184
2185 return rv;
2186 }
2187
2188 static size_t disks_to_mpb_size(int disks)
2189 {
2190 size_t size;
2191
2192 size = sizeof(struct imsm_super);
2193 size += (disks - 1) * sizeof(struct imsm_disk);
2194 size += 2 * sizeof(struct imsm_dev);
2195 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2196 size += (4 - 2) * sizeof(struct imsm_map);
2197 /* 4 possible disk_ord_tbl's */
2198 size += 4 * (disks - 1) * sizeof(__u32);
2199
2200 return size;
2201 }
2202
2203 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2204 {
2205 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2206 return 0;
2207
2208 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
2209 }
2210
2211 static void free_devlist(struct intel_super *super)
2212 {
2213 struct intel_dev *dv;
2214
2215 while (super->devlist) {
2216 dv = super->devlist->next;
2217 free(super->devlist->dev);
2218 free(super->devlist);
2219 super->devlist = dv;
2220 }
2221 }
2222
2223 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2224 {
2225 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2226 }
2227
2228 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2229 {
2230 /*
2231 * return:
2232 * 0 same, or first was empty, and second was copied
2233 * 1 second had wrong number
2234 * 2 wrong uuid
2235 * 3 wrong other info
2236 */
2237 struct intel_super *first = st->sb;
2238 struct intel_super *sec = tst->sb;
2239
2240 if (!first) {
2241 st->sb = tst->sb;
2242 tst->sb = NULL;
2243 return 0;
2244 }
2245 /* in platform dependent environment test if the disks
2246 * use the same Intel hba
2247 */
2248 if (!check_env("IMSM_NO_PLATFORM")) {
2249 if (!first->hba || !sec->hba ||
2250 (first->hba->type != sec->hba->type)) {
2251 fprintf(stderr,
2252 "HBAs of devices does not match %s != %s\n",
2253 first->hba ? get_sys_dev_type(first->hba->type) : NULL,
2254 sec->hba ? get_sys_dev_type(sec->hba->type) : NULL);
2255 return 3;
2256 }
2257 }
2258
2259 /* if an anchor does not have num_raid_devs set then it is a free
2260 * floating spare
2261 */
2262 if (first->anchor->num_raid_devs > 0 &&
2263 sec->anchor->num_raid_devs > 0) {
2264 /* Determine if these disks might ever have been
2265 * related. Further disambiguation can only take place
2266 * in load_super_imsm_all
2267 */
2268 __u32 first_family = first->anchor->orig_family_num;
2269 __u32 sec_family = sec->anchor->orig_family_num;
2270
2271 if (memcmp(first->anchor->sig, sec->anchor->sig,
2272 MAX_SIGNATURE_LENGTH) != 0)
2273 return 3;
2274
2275 if (first_family == 0)
2276 first_family = first->anchor->family_num;
2277 if (sec_family == 0)
2278 sec_family = sec->anchor->family_num;
2279
2280 if (first_family != sec_family)
2281 return 3;
2282
2283 }
2284
2285
2286 /* if 'first' is a spare promote it to a populated mpb with sec's
2287 * family number
2288 */
2289 if (first->anchor->num_raid_devs == 0 &&
2290 sec->anchor->num_raid_devs > 0) {
2291 int i;
2292 struct intel_dev *dv;
2293 struct imsm_dev *dev;
2294
2295 /* we need to copy raid device info from sec if an allocation
2296 * fails here we don't associate the spare
2297 */
2298 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
2299 dv = malloc(sizeof(*dv));
2300 if (!dv)
2301 break;
2302 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2303 if (!dev) {
2304 free(dv);
2305 break;
2306 }
2307 dv->dev = dev;
2308 dv->index = i;
2309 dv->next = first->devlist;
2310 first->devlist = dv;
2311 }
2312 if (i < sec->anchor->num_raid_devs) {
2313 /* allocation failure */
2314 free_devlist(first);
2315 fprintf(stderr, "imsm: failed to associate spare\n");
2316 return 3;
2317 }
2318 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
2319 first->anchor->orig_family_num = sec->anchor->orig_family_num;
2320 first->anchor->family_num = sec->anchor->family_num;
2321 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
2322 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2323 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
2324 }
2325
2326 return 0;
2327 }
2328
2329 static void fd2devname(int fd, char *name)
2330 {
2331 struct stat st;
2332 char path[256];
2333 char dname[PATH_MAX];
2334 char *nm;
2335 int rv;
2336
2337 name[0] = '\0';
2338 if (fstat(fd, &st) != 0)
2339 return;
2340 sprintf(path, "/sys/dev/block/%d:%d",
2341 major(st.st_rdev), minor(st.st_rdev));
2342
2343 rv = readlink(path, dname, sizeof(dname));
2344 if (rv <= 0)
2345 return;
2346
2347 dname[rv] = '\0';
2348 nm = strrchr(dname, '/');
2349 nm++;
2350 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2351 }
2352
2353 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2354
2355 static int imsm_read_serial(int fd, char *devname,
2356 __u8 serial[MAX_RAID_SERIAL_LEN])
2357 {
2358 unsigned char scsi_serial[255];
2359 int rv;
2360 int rsp_len;
2361 int len;
2362 char *dest;
2363 char *src;
2364 char *rsp_buf;
2365 int i;
2366
2367 memset(scsi_serial, 0, sizeof(scsi_serial));
2368
2369 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2370
2371 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
2372 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2373 fd2devname(fd, (char *) serial);
2374 return 0;
2375 }
2376
2377 if (rv != 0) {
2378 if (devname)
2379 fprintf(stderr,
2380 Name ": Failed to retrieve serial for %s\n",
2381 devname);
2382 return rv;
2383 }
2384
2385 rsp_len = scsi_serial[3];
2386 if (!rsp_len) {
2387 if (devname)
2388 fprintf(stderr,
2389 Name ": Failed to retrieve serial for %s\n",
2390 devname);
2391 return 2;
2392 }
2393 rsp_buf = (char *) &scsi_serial[4];
2394
2395 /* trim all whitespace and non-printable characters and convert
2396 * ':' to ';'
2397 */
2398 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2399 src = &rsp_buf[i];
2400 if (*src > 0x20) {
2401 /* ':' is reserved for use in placeholder serial
2402 * numbers for missing disks
2403 */
2404 if (*src == ':')
2405 *dest++ = ';';
2406 else
2407 *dest++ = *src;
2408 }
2409 }
2410 len = dest - rsp_buf;
2411 dest = rsp_buf;
2412
2413 /* truncate leading characters */
2414 if (len > MAX_RAID_SERIAL_LEN) {
2415 dest += len - MAX_RAID_SERIAL_LEN;
2416 len = MAX_RAID_SERIAL_LEN;
2417 }
2418
2419 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2420 memcpy(serial, dest, len);
2421
2422 return 0;
2423 }
2424
2425 static int serialcmp(__u8 *s1, __u8 *s2)
2426 {
2427 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2428 }
2429
2430 static void serialcpy(__u8 *dest, __u8 *src)
2431 {
2432 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2433 }
2434
2435 #ifndef MDASSEMBLE
2436 static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2437 {
2438 struct dl *dl;
2439
2440 for (dl = super->disks; dl; dl = dl->next)
2441 if (serialcmp(dl->serial, serial) == 0)
2442 break;
2443
2444 return dl;
2445 }
2446 #endif
2447
2448 static struct imsm_disk *
2449 __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2450 {
2451 int i;
2452
2453 for (i = 0; i < mpb->num_disks; i++) {
2454 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2455
2456 if (serialcmp(disk->serial, serial) == 0) {
2457 if (idx)
2458 *idx = i;
2459 return disk;
2460 }
2461 }
2462
2463 return NULL;
2464 }
2465
2466 static int
2467 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2468 {
2469 struct imsm_disk *disk;
2470 struct dl *dl;
2471 struct stat stb;
2472 int rv;
2473 char name[40];
2474 __u8 serial[MAX_RAID_SERIAL_LEN];
2475
2476 rv = imsm_read_serial(fd, devname, serial);
2477
2478 if (rv != 0)
2479 return 2;
2480
2481 dl = calloc(1, sizeof(*dl));
2482 if (!dl) {
2483 if (devname)
2484 fprintf(stderr,
2485 Name ": failed to allocate disk buffer for %s\n",
2486 devname);
2487 return 2;
2488 }
2489
2490 fstat(fd, &stb);
2491 dl->major = major(stb.st_rdev);
2492 dl->minor = minor(stb.st_rdev);
2493 dl->next = super->disks;
2494 dl->fd = keep_fd ? fd : -1;
2495 assert(super->disks == NULL);
2496 super->disks = dl;
2497 serialcpy(dl->serial, serial);
2498 dl->index = -2;
2499 dl->e = NULL;
2500 fd2devname(fd, name);
2501 if (devname)
2502 dl->devname = strdup(devname);
2503 else
2504 dl->devname = strdup(name);
2505
2506 /* look up this disk's index in the current anchor */
2507 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2508 if (disk) {
2509 dl->disk = *disk;
2510 /* only set index on disks that are a member of a
2511 * populated contianer, i.e. one with raid_devs
2512 */
2513 if (is_failed(&dl->disk))
2514 dl->index = -2;
2515 else if (is_spare(&dl->disk))
2516 dl->index = -1;
2517 }
2518
2519 return 0;
2520 }
2521
2522 #ifndef MDASSEMBLE
2523 /* When migrating map0 contains the 'destination' state while map1
2524 * contains the current state. When not migrating map0 contains the
2525 * current state. This routine assumes that map[0].map_state is set to
2526 * the current array state before being called.
2527 *
2528 * Migration is indicated by one of the following states
2529 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2530 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2531 * map1state=unitialized)
2532 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2533 * map1state=normal)
2534 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2535 * map1state=degraded)
2536 */
2537 static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type)
2538 {
2539 struct imsm_map *dest;
2540 struct imsm_map *src = get_imsm_map(dev, 0);
2541
2542 dev->vol.migr_state = 1;
2543 set_migr_type(dev, migr_type);
2544 dev->vol.curr_migr_unit = 0;
2545 dest = get_imsm_map(dev, 1);
2546
2547 /* duplicate and then set the target end state in map[0] */
2548 memcpy(dest, src, sizeof_imsm_map(src));
2549 if ((migr_type == MIGR_REBUILD) ||
2550 (migr_type == MIGR_GEN_MIGR)) {
2551 __u32 ord;
2552 int i;
2553
2554 for (i = 0; i < src->num_members; i++) {
2555 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2556 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2557 }
2558 }
2559
2560 src->map_state = to_state;
2561 }
2562
2563 static void end_migration(struct imsm_dev *dev, __u8 map_state)
2564 {
2565 struct imsm_map *map = get_imsm_map(dev, 0);
2566 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
2567 int i, j;
2568
2569 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2570 * completed in the last migration.
2571 *
2572 * FIXME add support for raid-level-migration
2573 */
2574 for (i = 0; i < prev->num_members; i++)
2575 for (j = 0; j < map->num_members; j++)
2576 /* during online capacity expansion
2577 * disks position can be changed if takeover is used
2578 */
2579 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2580 ord_to_idx(prev->disk_ord_tbl[i])) {
2581 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2582 break;
2583 }
2584
2585 dev->vol.migr_state = 0;
2586 dev->vol.migr_type = 0;
2587 dev->vol.curr_migr_unit = 0;
2588 map->map_state = map_state;
2589 }
2590 #endif
2591
2592 static int parse_raid_devices(struct intel_super *super)
2593 {
2594 int i;
2595 struct imsm_dev *dev_new;
2596 size_t len, len_migr;
2597 size_t max_len = 0;
2598 size_t space_needed = 0;
2599 struct imsm_super *mpb = super->anchor;
2600
2601 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2602 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2603 struct intel_dev *dv;
2604
2605 len = sizeof_imsm_dev(dev_iter, 0);
2606 len_migr = sizeof_imsm_dev(dev_iter, 1);
2607 if (len_migr > len)
2608 space_needed += len_migr - len;
2609
2610 dv = malloc(sizeof(*dv));
2611 if (!dv)
2612 return 1;
2613 if (max_len < len_migr)
2614 max_len = len_migr;
2615 if (max_len > len_migr)
2616 space_needed += max_len - len_migr;
2617 dev_new = malloc(max_len);
2618 if (!dev_new) {
2619 free(dv);
2620 return 1;
2621 }
2622 imsm_copy_dev(dev_new, dev_iter);
2623 dv->dev = dev_new;
2624 dv->index = i;
2625 dv->next = super->devlist;
2626 super->devlist = dv;
2627 }
2628
2629 /* ensure that super->buf is large enough when all raid devices
2630 * are migrating
2631 */
2632 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2633 void *buf;
2634
2635 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2636 if (posix_memalign(&buf, 512, len) != 0)
2637 return 1;
2638
2639 memcpy(buf, super->buf, super->len);
2640 memset(buf + super->len, 0, len - super->len);
2641 free(super->buf);
2642 super->buf = buf;
2643 super->len = len;
2644 }
2645
2646 return 0;
2647 }
2648
2649 /* retrieve a pointer to the bbm log which starts after all raid devices */
2650 struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2651 {
2652 void *ptr = NULL;
2653
2654 if (__le32_to_cpu(mpb->bbm_log_size)) {
2655 ptr = mpb;
2656 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2657 }
2658
2659 return ptr;
2660 }
2661
2662 static void __free_imsm(struct intel_super *super, int free_disks);
2663
2664 /* load_imsm_mpb - read matrix metadata
2665 * allocates super->mpb to be freed by free_imsm
2666 */
2667 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
2668 {
2669 unsigned long long dsize;
2670 unsigned long long sectors;
2671 struct stat;
2672 struct imsm_super *anchor;
2673 __u32 check_sum;
2674
2675 get_dev_size(fd, NULL, &dsize);
2676 if (dsize < 1024) {
2677 if (devname)
2678 fprintf(stderr,
2679 Name ": %s: device to small for imsm\n",
2680 devname);
2681 return 1;
2682 }
2683
2684 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
2685 if (devname)
2686 fprintf(stderr,
2687 Name ": Cannot seek to anchor block on %s: %s\n",
2688 devname, strerror(errno));
2689 return 1;
2690 }
2691
2692 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
2693 if (devname)
2694 fprintf(stderr,
2695 Name ": Failed to allocate imsm anchor buffer"
2696 " on %s\n", devname);
2697 return 1;
2698 }
2699 if (read(fd, anchor, 512) != 512) {
2700 if (devname)
2701 fprintf(stderr,
2702 Name ": Cannot read anchor block on %s: %s\n",
2703 devname, strerror(errno));
2704 free(anchor);
2705 return 1;
2706 }
2707
2708 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
2709 if (devname)
2710 fprintf(stderr,
2711 Name ": no IMSM anchor on %s\n", devname);
2712 free(anchor);
2713 return 2;
2714 }
2715
2716 __free_imsm(super, 0);
2717 /* reload capability and hba */
2718
2719 /* capability and hba must be updated with new super allocation */
2720 find_intel_hba_capability(fd, super, devname);
2721 super->len = ROUND_UP(anchor->mpb_size, 512);
2722 if (posix_memalign(&super->buf, 512, super->len) != 0) {
2723 if (devname)
2724 fprintf(stderr,
2725 Name ": unable to allocate %zu byte mpb buffer\n",
2726 super->len);
2727 free(anchor);
2728 return 2;
2729 }
2730 memcpy(super->buf, anchor, 512);
2731
2732 sectors = mpb_sectors(anchor) - 1;
2733 free(anchor);
2734 if (!sectors) {
2735 check_sum = __gen_imsm_checksum(super->anchor);
2736 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
2737 if (devname)
2738 fprintf(stderr,
2739 Name ": IMSM checksum %x != %x on %s\n",
2740 check_sum,
2741 __le32_to_cpu(super->anchor->check_sum),
2742 devname);
2743 return 2;
2744 }
2745
2746 return 0;
2747 }
2748
2749 /* read the extended mpb */
2750 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
2751 if (devname)
2752 fprintf(stderr,
2753 Name ": Cannot seek to extended mpb on %s: %s\n",
2754 devname, strerror(errno));
2755 return 1;
2756 }
2757
2758 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
2759 if (devname)
2760 fprintf(stderr,
2761 Name ": Cannot read extended mpb on %s: %s\n",
2762 devname, strerror(errno));
2763 return 2;
2764 }
2765
2766 check_sum = __gen_imsm_checksum(super->anchor);
2767 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
2768 if (devname)
2769 fprintf(stderr,
2770 Name ": IMSM checksum %x != %x on %s\n",
2771 check_sum, __le32_to_cpu(super->anchor->check_sum),
2772 devname);
2773 return 3;
2774 }
2775
2776 /* FIXME the BBM log is disk specific so we cannot use this global
2777 * buffer for all disks. Ok for now since we only look at the global
2778 * bbm_log_size parameter to gate assembly
2779 */
2780 super->bbm_log = __get_imsm_bbm_log(super->anchor);
2781
2782 return 0;
2783 }
2784
2785 static int
2786 load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
2787 {
2788 int err;
2789
2790 err = load_imsm_mpb(fd, super, devname);
2791 if (err)
2792 return err;
2793 err = load_imsm_disk(fd, super, devname, keep_fd);
2794 if (err)
2795 return err;
2796 err = parse_raid_devices(super);
2797
2798 return err;
2799 }
2800
2801 static void __free_imsm_disk(struct dl *d)
2802 {
2803 if (d->fd >= 0)
2804 close(d->fd);
2805 if (d->devname)
2806 free(d->devname);
2807 if (d->e)
2808 free(d->e);
2809 free(d);
2810
2811 }
2812
2813 static void free_imsm_disks(struct intel_super *super)
2814 {
2815 struct dl *d;
2816
2817 while (super->disks) {
2818 d = super->disks;
2819 super->disks = d->next;
2820 __free_imsm_disk(d);
2821 }
2822 while (super->disk_mgmt_list) {
2823 d = super->disk_mgmt_list;
2824 super->disk_mgmt_list = d->next;
2825 __free_imsm_disk(d);
2826 }
2827 while (super->missing) {
2828 d = super->missing;
2829 super->missing = d->next;
2830 __free_imsm_disk(d);
2831 }
2832
2833 }
2834
2835 /* free all the pieces hanging off of a super pointer */
2836 static void __free_imsm(struct intel_super *super, int free_disks)
2837 {
2838 struct intel_hba *elem, *next;
2839
2840 if (super->buf) {
2841 free(super->buf);
2842 super->buf = NULL;
2843 }
2844 /* unlink capability description */
2845 super->orom = NULL;
2846 if (free_disks)
2847 free_imsm_disks(super);
2848 free_devlist(super);
2849 elem = super->hba;
2850 while (elem) {
2851 if (elem->path)
2852 free((void *)elem->path);
2853 next = elem->next;
2854 free(elem);
2855 elem = next;
2856 }
2857 super->hba = NULL;
2858 }
2859
2860 static void free_imsm(struct intel_super *super)
2861 {
2862 __free_imsm(super, 1);
2863 free(super);
2864 }
2865
2866 static void free_super_imsm(struct supertype *st)
2867 {
2868 struct intel_super *super = st->sb;
2869
2870 if (!super)
2871 return;
2872
2873 free_imsm(super);
2874 st->sb = NULL;
2875 }
2876
2877 static struct intel_super *alloc_super(void)
2878 {
2879 struct intel_super *super = malloc(sizeof(*super));
2880
2881 if (super) {
2882 memset(super, 0, sizeof(*super));
2883 super->current_vol = -1;
2884 super->create_offset = ~((__u32 ) 0);
2885 }
2886 return super;
2887 }
2888
2889 /*
2890 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
2891 */
2892 static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
2893 {
2894 struct sys_dev *hba_name;
2895 int rv = 0;
2896
2897 if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
2898 super->orom = NULL;
2899 super->hba = NULL;
2900 return 0;
2901 }
2902 hba_name = find_disk_attached_hba(fd, NULL);
2903 if (!hba_name) {
2904 if (devname)
2905 fprintf(stderr,
2906 Name ": %s is not attached to Intel(R) RAID controller.\n",
2907 devname);
2908 return 1;
2909 }
2910 rv = attach_hba_to_super(super, hba_name);
2911 if (rv == 2) {
2912 if (devname) {
2913 struct intel_hba *hba = super->hba;
2914
2915 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
2916 "controller (%s),\n"
2917 " but the container is assigned to Intel(R) "
2918 "%s RAID controller (",
2919 devname,
2920 hba_name->path,
2921 hba_name->pci_id ? : "Err!",
2922 get_sys_dev_type(hba_name->type));
2923
2924 while (hba) {
2925 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
2926 if (hba->next)
2927 fprintf(stderr, ", ");
2928 hba = hba->next;
2929 }
2930
2931 fprintf(stderr, ").\n"
2932 " Mixing devices attached to different controllers "
2933 "is not allowed.\n");
2934 }
2935 free_sys_dev(&hba_name);
2936 return 2;
2937 }
2938 super->orom = find_imsm_capability(hba_name->type);
2939 free_sys_dev(&hba_name);
2940 if (!super->orom)
2941 return 3;
2942 return 0;
2943 }
2944
2945 #ifndef MDASSEMBLE
2946 /* find_missing - helper routine for load_super_imsm_all that identifies
2947 * disks that have disappeared from the system. This routine relies on
2948 * the mpb being uptodate, which it is at load time.
2949 */
2950 static int find_missing(struct intel_super *super)
2951 {
2952 int i;
2953 struct imsm_super *mpb = super->anchor;
2954 struct dl *dl;
2955 struct imsm_disk *disk;
2956
2957 for (i = 0; i < mpb->num_disks; i++) {
2958 disk = __get_imsm_disk(mpb, i);
2959 dl = serial_to_dl(disk->serial, super);
2960 if (dl)
2961 continue;
2962
2963 dl = malloc(sizeof(*dl));
2964 if (!dl)
2965 return 1;
2966 dl->major = 0;
2967 dl->minor = 0;
2968 dl->fd = -1;
2969 dl->devname = strdup("missing");
2970 dl->index = i;
2971 serialcpy(dl->serial, disk->serial);
2972 dl->disk = *disk;
2973 dl->e = NULL;
2974 dl->next = super->missing;
2975 super->missing = dl;
2976 }
2977
2978 return 0;
2979 }
2980
2981 static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
2982 {
2983 struct intel_disk *idisk = disk_list;
2984
2985 while (idisk) {
2986 if (serialcmp(idisk->disk.serial, serial) == 0)
2987 break;
2988 idisk = idisk->next;
2989 }
2990
2991 return idisk;
2992 }
2993
2994 static int __prep_thunderdome(struct intel_super **table, int tbl_size,
2995 struct intel_super *super,
2996 struct intel_disk **disk_list)
2997 {
2998 struct imsm_disk *d = &super->disks->disk;
2999 struct imsm_super *mpb = super->anchor;
3000 int i, j;
3001
3002 for (i = 0; i < tbl_size; i++) {
3003 struct imsm_super *tbl_mpb = table[i]->anchor;
3004 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3005
3006 if (tbl_mpb->family_num == mpb->family_num) {
3007 if (tbl_mpb->check_sum == mpb->check_sum) {
3008 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3009 __func__, super->disks->major,
3010 super->disks->minor,
3011 table[i]->disks->major,
3012 table[i]->disks->minor);
3013 break;
3014 }
3015
3016 if (((is_configured(d) && !is_configured(tbl_d)) ||
3017 is_configured(d) == is_configured(tbl_d)) &&
3018 tbl_mpb->generation_num < mpb->generation_num) {
3019 /* current version of the mpb is a
3020 * better candidate than the one in
3021 * super_table, but copy over "cross
3022 * generational" status
3023 */
3024 struct intel_disk *idisk;
3025
3026 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3027 __func__, super->disks->major,
3028 super->disks->minor,
3029 table[i]->disks->major,
3030 table[i]->disks->minor);
3031
3032 idisk = disk_list_get(tbl_d->serial, *disk_list);
3033 if (idisk && is_failed(&idisk->disk))
3034 tbl_d->status |= FAILED_DISK;
3035 break;
3036 } else {
3037 struct intel_disk *idisk;
3038 struct imsm_disk *disk;
3039
3040 /* tbl_mpb is more up to date, but copy
3041 * over cross generational status before
3042 * returning
3043 */
3044 disk = __serial_to_disk(d->serial, mpb, NULL);
3045 if (disk && is_failed(disk))
3046 d->status |= FAILED_DISK;
3047
3048 idisk = disk_list_get(d->serial, *disk_list);
3049 if (idisk) {
3050 idisk->owner = i;
3051 if (disk && is_configured(disk))
3052 idisk->disk.status |= CONFIGURED_DISK;
3053 }
3054
3055 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3056 __func__, super->disks->major,
3057 super->disks->minor,
3058 table[i]->disks->major,
3059 table[i]->disks->minor);
3060
3061 return tbl_size;
3062 }
3063 }
3064 }
3065
3066 if (i >= tbl_size)
3067 table[tbl_size++] = super;
3068 else
3069 table[i] = super;
3070
3071 /* update/extend the merged list of imsm_disk records */
3072 for (j = 0; j < mpb->num_disks; j++) {
3073 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3074 struct intel_disk *idisk;
3075
3076 idisk = disk_list_get(disk->serial, *disk_list);
3077 if (idisk) {
3078 idisk->disk.status |= disk->status;
3079 if (is_configured(&idisk->disk) ||
3080 is_failed(&idisk->disk))
3081 idisk->disk.status &= ~(SPARE_DISK);
3082 } else {
3083 idisk = calloc(1, sizeof(*idisk));
3084 if (!idisk)
3085 return -1;
3086 idisk->owner = IMSM_UNKNOWN_OWNER;
3087 idisk->disk = *disk;
3088 idisk->next = *disk_list;
3089 *disk_list = idisk;
3090 }
3091
3092 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3093 idisk->owner = i;
3094 }
3095
3096 return tbl_size;
3097 }
3098
3099 static struct intel_super *
3100 validate_members(struct intel_super *super, struct intel_disk *disk_list,
3101 const int owner)
3102 {
3103 struct imsm_super *mpb = super->anchor;
3104 int ok_count = 0;
3105 int i;
3106
3107 for (i = 0; i < mpb->num_disks; i++) {
3108 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3109 struct intel_disk *idisk;
3110
3111 idisk = disk_list_get(disk->serial, disk_list);
3112 if (idisk) {
3113 if (idisk->owner == owner ||
3114 idisk->owner == IMSM_UNKNOWN_OWNER)
3115 ok_count++;
3116 else
3117 dprintf("%s: '%.16s' owner %d != %d\n",
3118 __func__, disk->serial, idisk->owner,
3119 owner);
3120 } else {
3121 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3122 __func__, __le32_to_cpu(mpb->family_num), i,
3123 disk->serial);
3124 break;
3125 }
3126 }
3127
3128 if (ok_count == mpb->num_disks)
3129 return super;
3130 return NULL;
3131 }
3132
3133 static void show_conflicts(__u32 family_num, struct intel_super *super_list)
3134 {
3135 struct intel_super *s;
3136
3137 for (s = super_list; s; s = s->next) {
3138 if (family_num != s->anchor->family_num)
3139 continue;
3140 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
3141 __le32_to_cpu(family_num), s->disks->devname);
3142 }
3143 }
3144
3145 static struct intel_super *
3146 imsm_thunderdome(struct intel_super **super_list, int len)
3147 {
3148 struct intel_super *super_table[len];
3149 struct intel_disk *disk_list = NULL;
3150 struct intel_super *champion, *spare;
3151 struct intel_super *s, **del;
3152 int tbl_size = 0;
3153 int conflict;
3154 int i;
3155
3156 memset(super_table, 0, sizeof(super_table));
3157 for (s = *super_list; s; s = s->next)
3158 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
3159
3160 for (i = 0; i < tbl_size; i++) {
3161 struct imsm_disk *d;
3162 struct intel_disk *idisk;
3163 struct imsm_super *mpb = super_table[i]->anchor;
3164
3165 s = super_table[i];
3166 d = &s->disks->disk;
3167
3168 /* 'd' must appear in merged disk list for its
3169 * configuration to be valid
3170 */
3171 idisk = disk_list_get(d->serial, disk_list);
3172 if (idisk && idisk->owner == i)
3173 s = validate_members(s, disk_list, i);
3174 else
3175 s = NULL;
3176
3177 if (!s)
3178 dprintf("%s: marking family: %#x from %d:%d offline\n",
3179 __func__, mpb->family_num,
3180 super_table[i]->disks->major,
3181 super_table[i]->disks->minor);
3182 super_table[i] = s;
3183 }
3184
3185 /* This is where the mdadm implementation differs from the Windows
3186 * driver which has no strict concept of a container. We can only
3187 * assemble one family from a container, so when returning a prodigal
3188 * array member to this system the code will not be able to disambiguate
3189 * the container contents that should be assembled ("foreign" versus
3190 * "local"). It requires user intervention to set the orig_family_num
3191 * to a new value to establish a new container. The Windows driver in
3192 * this situation fixes up the volume name in place and manages the
3193 * foreign array as an independent entity.
3194 */
3195 s = NULL;
3196 spare = NULL;
3197 conflict = 0;
3198 for (i = 0; i < tbl_size; i++) {
3199 struct intel_super *tbl_ent = super_table[i];
3200 int is_spare = 0;
3201
3202 if (!tbl_ent)
3203 continue;
3204
3205 if (tbl_ent->anchor->num_raid_devs == 0) {
3206 spare = tbl_ent;
3207 is_spare = 1;
3208 }
3209
3210 if (s && !is_spare) {
3211 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3212 conflict++;
3213 } else if (!s && !is_spare)
3214 s = tbl_ent;
3215 }
3216
3217 if (!s)
3218 s = spare;
3219 if (!s) {
3220 champion = NULL;
3221 goto out;
3222 }
3223 champion = s;
3224
3225 if (conflict)
3226 fprintf(stderr, "Chose family %#x on '%s', "
3227 "assemble conflicts to new container with '--update=uuid'\n",
3228 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3229
3230 /* collect all dl's onto 'champion', and update them to
3231 * champion's version of the status
3232 */
3233 for (s = *super_list; s; s = s->next) {
3234 struct imsm_super *mpb = champion->anchor;
3235 struct dl *dl = s->disks;
3236
3237 if (s == champion)
3238 continue;
3239
3240 for (i = 0; i < mpb->num_disks; i++) {
3241 struct imsm_disk *disk;
3242
3243 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3244 if (disk) {
3245 dl->disk = *disk;
3246 /* only set index on disks that are a member of
3247 * a populated contianer, i.e. one with
3248 * raid_devs
3249 */
3250 if (is_failed(&dl->disk))
3251 dl->index = -2;
3252 else if (is_spare(&dl->disk))
3253 dl->index = -1;
3254 break;
3255 }
3256 }
3257
3258 if (i >= mpb->num_disks) {
3259 struct intel_disk *idisk;
3260
3261 idisk = disk_list_get(dl->serial, disk_list);
3262 if (idisk && is_spare(&idisk->disk) &&
3263 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3264 dl->index = -1;
3265 else {
3266 dl->index = -2;
3267 continue;
3268 }
3269 }
3270
3271 dl->next = champion->disks;
3272 champion->disks = dl;
3273 s->disks = NULL;
3274 }
3275
3276 /* delete 'champion' from super_list */
3277 for (del = super_list; *del; ) {
3278 if (*del == champion) {
3279 *del = (*del)->next;
3280 break;
3281 } else
3282 del = &(*del)->next;
3283 }
3284 champion->next = NULL;
3285
3286 out:
3287 while (disk_list) {
3288 struct intel_disk *idisk = disk_list;
3289
3290 disk_list = disk_list->next;
3291 free(idisk);
3292 }
3293
3294 return champion;
3295 }
3296
3297 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
3298 char *devname)
3299 {
3300 struct mdinfo *sra;
3301 struct intel_super *super_list = NULL;
3302 struct intel_super *super = NULL;
3303 int devnum = fd2devnum(fd);
3304 struct mdinfo *sd;
3305 int retry;
3306 int err = 0;
3307 int i;
3308
3309 /* check if 'fd' an opened container */
3310 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
3311 if (!sra)
3312 return 1;
3313
3314 if (sra->array.major_version != -1 ||
3315 sra->array.minor_version != -2 ||
3316 strcmp(sra->text_version, "imsm") != 0) {
3317 err = 1;
3318 goto error;
3319 }
3320 /* load all mpbs */
3321 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
3322 struct intel_super *s = alloc_super();
3323 char nm[32];
3324 int dfd;
3325 int rv;
3326
3327 err = 1;
3328 if (!s)
3329 goto error;
3330 s->next = super_list;
3331 super_list = s;
3332
3333 err = 2;
3334 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3335 dfd = dev_open(nm, O_RDWR);
3336 if (dfd < 0)
3337 goto error;
3338
3339 rv = find_intel_hba_capability(dfd, s, devname);
3340 /* no orom/efi or non-intel hba of the disk */
3341 if (rv != 0)
3342 goto error;
3343
3344 err = load_and_parse_mpb(dfd, s, NULL, 1);
3345
3346 /* retry the load if we might have raced against mdmon */
3347 if (err == 3 && mdmon_running(devnum))
3348 for (retry = 0; retry < 3; retry++) {
3349 usleep(3000);
3350 err = load_and_parse_mpb(dfd, s, NULL, 1);
3351 if (err != 3)
3352 break;
3353 }
3354 if (err)
3355 goto error;
3356 }
3357
3358 /* all mpbs enter, maybe one leaves */
3359 super = imsm_thunderdome(&super_list, i);
3360 if (!super) {
3361 err = 1;
3362 goto error;
3363 }
3364
3365 if (find_missing(super) != 0) {
3366 free_imsm(super);
3367 err = 2;
3368 goto error;
3369 }
3370 err = 0;
3371
3372 error:
3373 while (super_list) {
3374 struct intel_super *s = super_list;
3375
3376 super_list = super_list->next;
3377 free_imsm(s);
3378 }
3379 sysfs_free(sra);
3380
3381 if (err)
3382 return err;
3383
3384 *sbp = super;
3385 st->container_dev = devnum;
3386 if (err == 0 && st->ss == NULL) {
3387 st->ss = &super_imsm;
3388 st->minor_version = 0;
3389 st->max_devs = IMSM_MAX_DEVICES;
3390 }
3391 return 0;
3392 }
3393
3394 static int load_container_imsm(struct supertype *st, int fd, char *devname)
3395 {
3396 return load_super_imsm_all(st, fd, &st->sb, devname);
3397 }
3398 #endif
3399
3400 static int load_super_imsm(struct supertype *st, int fd, char *devname)
3401 {
3402 struct intel_super *super;
3403 int rv;
3404
3405 if (test_partition(fd))
3406 /* IMSM not allowed on partitions */
3407 return 1;
3408
3409 free_super_imsm(st);
3410
3411 super = alloc_super();
3412 if (!super) {
3413 fprintf(stderr,
3414 Name ": malloc of %zu failed.\n",
3415 sizeof(*super));
3416 return 1;
3417 }
3418 /* Load hba and capabilities if they exist.
3419 * But do not preclude loading metadata in case capabilities or hba are
3420 * non-compliant and ignore_hw_compat is set.
3421 */
3422 rv = find_intel_hba_capability(fd, super, devname);
3423 /* no orom/efi or non-intel hba of the disk */
3424 if ((rv != 0) && (st->ignore_hw_compat == 0)) {
3425 if (devname)
3426 fprintf(stderr,
3427 Name ": No OROM/EFI properties for %s\n", devname);
3428 free_imsm(super);
3429 return 2;
3430 }
3431 rv = load_and_parse_mpb(fd, super, devname, 0);
3432
3433 if (rv) {
3434 if (devname)
3435 fprintf(stderr,
3436 Name ": Failed to load all information "
3437 "sections on %s\n", devname);
3438 free_imsm(super);
3439 return rv;
3440 }
3441
3442 st->sb = super;
3443 if (st->ss == NULL) {
3444 st->ss = &super_imsm;
3445 st->minor_version = 0;
3446 st->max_devs = IMSM_MAX_DEVICES;
3447 }
3448 return 0;
3449 }
3450
3451 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3452 {
3453 if (info->level == 1)
3454 return 128;
3455 return info->chunk_size >> 9;
3456 }
3457
3458 static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
3459 {
3460 __u32 num_stripes;
3461
3462 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
3463 num_stripes /= num_domains;
3464
3465 return num_stripes;
3466 }
3467
3468 static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3469 {
3470 if (info->level == 1)
3471 return info->size * 2;
3472 else
3473 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
3474 }
3475
3476 static void imsm_update_version_info(struct intel_super *super)
3477 {
3478 /* update the version and attributes */
3479 struct imsm_super *mpb = super->anchor;
3480 char *version;
3481 struct imsm_dev *dev;
3482 struct imsm_map *map;
3483 int i;
3484
3485 for (i = 0; i < mpb->num_raid_devs; i++) {
3486 dev = get_imsm_dev(super, i);
3487 map = get_imsm_map(dev, 0);
3488 if (__le32_to_cpu(dev->size_high) > 0)
3489 mpb->attributes |= MPB_ATTRIB_2TB;
3490
3491 /* FIXME detect when an array spans a port multiplier */
3492 #if 0
3493 mpb->attributes |= MPB_ATTRIB_PM;
3494 #endif
3495
3496 if (mpb->num_raid_devs > 1 ||
3497 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3498 version = MPB_VERSION_ATTRIBS;
3499 switch (get_imsm_raid_level(map)) {
3500 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3501 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3502 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3503 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3504 }
3505 } else {
3506 if (map->num_members >= 5)
3507 version = MPB_VERSION_5OR6_DISK_ARRAY;
3508 else if (dev->status == DEV_CLONE_N_GO)
3509 version = MPB_VERSION_CNG;
3510 else if (get_imsm_raid_level(map) == 5)
3511 version = MPB_VERSION_RAID5;
3512 else if (map->num_members >= 3)
3513 version = MPB_VERSION_3OR4_DISK_ARRAY;
3514 else if (get_imsm_raid_level(map) == 1)
3515 version = MPB_VERSION_RAID1;
3516 else
3517 version = MPB_VERSION_RAID0;
3518 }
3519 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3520 }
3521 }
3522
3523 static int check_name(struct intel_super *super, char *name, int quiet)
3524 {
3525 struct imsm_super *mpb = super->anchor;
3526 char *reason = NULL;
3527 int i;
3528
3529 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3530 reason = "must be 16 characters or less";
3531
3532 for (i = 0; i < mpb->num_raid_devs; i++) {
3533 struct imsm_dev *dev = get_imsm_dev(super, i);
3534
3535 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3536 reason = "already exists";
3537 break;
3538 }
3539 }
3540
3541 if (reason && !quiet)
3542 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3543
3544 return !reason;
3545 }
3546
3547 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3548 unsigned long long size, char *name,
3549 char *homehost, int *uuid)
3550 {
3551 /* We are creating a volume inside a pre-existing container.
3552 * so st->sb is already set.
3553 */
3554 struct intel_super *super = st->sb;
3555 struct imsm_super *mpb = super->anchor;
3556 struct intel_dev *dv;
3557 struct imsm_dev *dev;
3558 struct imsm_vol *vol;
3559 struct imsm_map *map;
3560 int idx = mpb->num_raid_devs;
3561 int i;
3562 unsigned long long array_blocks;
3563 size_t size_old, size_new;
3564 __u32 num_data_stripes;
3565
3566 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
3567 fprintf(stderr, Name": This imsm-container already has the "
3568 "maximum of %d volumes\n", super->orom->vpa);
3569 return 0;
3570 }
3571
3572 /* ensure the mpb is large enough for the new data */
3573 size_old = __le32_to_cpu(mpb->mpb_size);
3574 size_new = disks_to_mpb_size(info->nr_disks);
3575 if (size_new > size_old) {
3576 void *mpb_new;
3577 size_t size_round = ROUND_UP(size_new, 512);
3578
3579 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3580 fprintf(stderr, Name": could not allocate new mpb\n");
3581 return 0;
3582 }
3583 memcpy(mpb_new, mpb, size_old);
3584 free(mpb);
3585 mpb = mpb_new;
3586 super->anchor = mpb_new;
3587 mpb->mpb_size = __cpu_to_le32(size_new);
3588 memset(mpb_new + size_old, 0, size_round - size_old);
3589 }
3590 super->current_vol = idx;
3591 /* when creating the first raid device in this container set num_disks
3592 * to zero, i.e. delete this spare and add raid member devices in
3593 * add_to_super_imsm_volume()
3594 */
3595 if (super->current_vol == 0)
3596 mpb->num_disks = 0;
3597
3598 if (!check_name(super, name, 0))
3599 return 0;
3600 dv = malloc(sizeof(*dv));
3601 if (!dv) {
3602 fprintf(stderr, Name ": failed to allocate device list entry\n");
3603 return 0;
3604 }
3605 dev = calloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
3606 if (!dev) {
3607 free(dv);
3608 fprintf(stderr, Name": could not allocate raid device\n");
3609 return 0;
3610 }
3611
3612 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
3613 if (info->level == 1)
3614 array_blocks = info_to_blocks_per_member(info);
3615 else
3616 array_blocks = calc_array_size(info->level, info->raid_disks,
3617 info->layout, info->chunk_size,
3618 info->size*2);
3619 /* round array size down to closest MB */
3620 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
3621
3622 dev->size_low = __cpu_to_le32((__u32) array_blocks);
3623 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
3624 dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
3625 vol = &dev->vol;
3626 vol->migr_state = 0;
3627 set_migr_type(dev, MIGR_INIT);
3628 vol->dirty = 0;
3629 vol->curr_migr_unit = 0;
3630 map = get_imsm_map(dev, 0);
3631 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
3632 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
3633 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
3634 map->failed_disk_num = ~0;
3635 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
3636 IMSM_T_STATE_NORMAL;
3637 map->ddf = 1;
3638
3639 if (info->level == 1 && info->raid_disks > 2) {
3640 free(dev);
3641 free(dv);
3642 fprintf(stderr, Name": imsm does not support more than 2 disks"
3643 "in a raid1 volume\n");
3644 return 0;
3645 }
3646
3647 map->raid_level = info->level;
3648 if (info->level == 10) {
3649 map->raid_level = 1;
3650 map->num_domains = info->raid_disks / 2;
3651 } else if (info->level == 1)
3652 map->num_domains = info->raid_disks;
3653 else
3654 map->num_domains = 1;
3655
3656 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
3657 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
3658
3659 map->num_members = info->raid_disks;
3660 for (i = 0; i < map->num_members; i++) {
3661 /* initialized in add_to_super */
3662 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
3663 }
3664 mpb->num_raid_devs++;
3665
3666 dv->dev = dev;
3667 dv->index = super->current_vol;
3668 dv->next = super->devlist;
3669 super->devlist = dv;
3670
3671 imsm_update_version_info(super);
3672
3673 return 1;
3674 }
3675
3676 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
3677 unsigned long long size, char *name,
3678 char *homehost, int *uuid)
3679 {
3680 /* This is primarily called by Create when creating a new array.
3681 * We will then get add_to_super called for each component, and then
3682 * write_init_super called to write it out to each device.
3683 * For IMSM, Create can create on fresh devices or on a pre-existing
3684 * array.
3685 * To create on a pre-existing array a different method will be called.
3686 * This one is just for fresh drives.
3687 */
3688 struct intel_super *super;
3689 struct imsm_super *mpb;
3690 size_t mpb_size;
3691 char *version;
3692
3693 if (st->sb)
3694 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
3695
3696 if (info)
3697 mpb_size = disks_to_mpb_size(info->nr_disks);
3698 else
3699 mpb_size = 512;
3700
3701 super = alloc_super();
3702 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
3703 free(super);
3704 super = NULL;
3705 }
3706 if (!super) {
3707 fprintf(stderr, Name
3708 ": %s could not allocate superblock\n", __func__);
3709 return 0;
3710 }
3711 memset(super->buf, 0, mpb_size);
3712 mpb = super->buf;
3713 mpb->mpb_size = __cpu_to_le32(mpb_size);
3714 st->sb = super;
3715
3716 if (info == NULL) {
3717 /* zeroing superblock */
3718 return 0;
3719 }
3720
3721 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
3722
3723 version = (char *) mpb->sig;
3724 strcpy(version, MPB_SIGNATURE);
3725 version += strlen(MPB_SIGNATURE);
3726 strcpy(version, MPB_VERSION_RAID0);
3727
3728 return 1;
3729 }
3730
3731 #ifndef MDASSEMBLE
3732 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
3733 int fd, char *devname)
3734 {
3735 struct intel_super *super = st->sb;
3736 struct imsm_super *mpb = super->anchor;
3737 struct dl *dl;
3738 struct imsm_dev *dev;
3739 struct imsm_map *map;
3740 int slot;
3741
3742 dev = get_imsm_dev(super, super->current_vol);
3743 map = get_imsm_map(dev, 0);
3744
3745 if (! (dk->state & (1<<MD_DISK_SYNC))) {
3746 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
3747 devname);
3748 return 1;
3749 }
3750
3751 if (fd == -1) {
3752 /* we're doing autolayout so grab the pre-marked (in
3753 * validate_geometry) raid_disk
3754 */
3755 for (dl = super->disks; dl; dl = dl->next)
3756 if (dl->raiddisk == dk->raid_disk)
3757 break;
3758 } else {
3759 for (dl = super->disks; dl ; dl = dl->next)
3760 if (dl->major == dk->major &&
3761 dl->minor == dk->minor)
3762 break;
3763 }
3764
3765 if (!dl) {
3766 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
3767 return 1;
3768 }
3769
3770 /* add a pristine spare to the metadata */
3771 if (dl->index < 0) {
3772 dl->index = super->anchor->num_disks;
3773 super->anchor->num_disks++;
3774 }
3775 /* Check the device has not already been added */
3776 slot = get_imsm_disk_slot(map, dl->index);
3777 if (slot >= 0 &&
3778 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
3779 fprintf(stderr, Name ": %s has been included in this array twice\n",
3780 devname);
3781 return 1;
3782 }
3783 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
3784 dl->disk.status = CONFIGURED_DISK;
3785
3786 /* if we are creating the first raid device update the family number */
3787 if (super->current_vol == 0) {
3788 __u32 sum;
3789 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
3790 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
3791
3792 if (!_dev || !_disk) {
3793 fprintf(stderr, Name ": BUG mpb setup error\n");
3794 return 1;
3795 }
3796 *_dev = *dev;
3797 *_disk = dl->disk;
3798 sum = random32();
3799 sum += __gen_imsm_checksum(mpb);
3800 mpb->family_num = __cpu_to_le32(sum);
3801 mpb->orig_family_num = mpb->family_num;
3802 }
3803
3804 return 0;
3805 }
3806
3807
3808 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
3809 int fd, char *devname)
3810 {
3811 struct intel_super *super = st->sb;
3812 struct dl *dd;
3813 unsigned long long size;
3814 __u32 id;
3815 int rv;
3816 struct stat stb;
3817
3818 /* If we are on an RAID enabled platform check that the disk is
3819 * attached to the raid controller.
3820 * We do not need to test disks attachment for container based additions,
3821 * they shall be already tested when container was created/assembled.
3822 */
3823 rv = find_intel_hba_capability(fd, super, devname);
3824 /* no orom/efi or non-intel hba of the disk */
3825 if (rv != 0) {
3826 dprintf("capability: %p fd: %d ret: %d\n",
3827 super->orom, fd, rv);
3828 return 1;
3829 }
3830
3831 if (super->current_vol >= 0)
3832 return add_to_super_imsm_volume(st, dk, fd, devname);
3833
3834 fstat(fd, &stb);
3835 dd = malloc(sizeof(*dd));
3836 if (!dd) {
3837 fprintf(stderr,
3838 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
3839 return 1;
3840 }
3841 memset(dd, 0, sizeof(*dd));
3842 dd->major = major(stb.st_rdev);
3843 dd->minor = minor(stb.st_rdev);
3844 dd->index = -1;
3845 dd->devname = devname ? strdup(devname) : NULL;
3846 dd->fd = fd;
3847 dd->e = NULL;
3848 dd->action = DISK_ADD;
3849 rv = imsm_read_serial(fd, devname, dd->serial);
3850 if (rv) {
3851 fprintf(stderr,
3852 Name ": failed to retrieve scsi serial, aborting\n");
3853 free(dd);
3854 abort();
3855 }
3856
3857 get_dev_size(fd, NULL, &size);
3858 size /= 512;
3859 serialcpy(dd->disk.serial, dd->serial);
3860 dd->disk.total_blocks = __cpu_to_le32(size);
3861 dd->disk.status = SPARE_DISK;
3862 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
3863 dd->disk.scsi_id = __cpu_to_le32(id);
3864 else
3865 dd->disk.scsi_id = __cpu_to_le32(0);
3866
3867 if (st->update_tail) {
3868 dd->next = super->disk_mgmt_list;
3869 super->disk_mgmt_list = dd;
3870 } else {
3871 dd->next = super->disks;
3872 super->disks = dd;
3873 super->updates_pending++;
3874 }
3875
3876 return 0;
3877 }
3878
3879
3880 static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
3881 {
3882 struct intel_super *super = st->sb;
3883 struct dl *dd;
3884
3885 /* remove from super works only in mdmon - for communication
3886 * manager - monitor. Check if communication memory buffer
3887 * is prepared.
3888 */
3889 if (!st->update_tail) {
3890 fprintf(stderr,
3891 Name ": %s shall be used in mdmon context only"
3892 "(line %d).\n", __func__, __LINE__);
3893 return 1;
3894 }
3895 dd = malloc(sizeof(*dd));
3896 if (!dd) {
3897 fprintf(stderr,
3898 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
3899 return 1;
3900 }
3901 memset(dd, 0, sizeof(*dd));
3902 dd->major = dk->major;
3903 dd->minor = dk->minor;
3904 dd->index = -1;
3905 dd->fd = -1;
3906 dd->disk.status = SPARE_DISK;
3907 dd->action = DISK_REMOVE;
3908
3909 dd->next = super->disk_mgmt_list;
3910 super->disk_mgmt_list = dd;
3911
3912
3913 return 0;
3914 }
3915
3916 static int store_imsm_mpb(int fd, struct imsm_super *mpb);
3917
3918 static union {
3919 char buf[512];
3920 struct imsm_super anchor;
3921 } spare_record __attribute__ ((aligned(512)));
3922
3923 /* spare records have their own family number and do not have any defined raid
3924 * devices
3925 */
3926 static int write_super_imsm_spares(struct intel_super *super, int doclose)
3927 {
3928 struct imsm_super *mpb = super->anchor;
3929 struct imsm_super *spare = &spare_record.anchor;
3930 __u32 sum;
3931 struct dl *d;
3932
3933 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
3934 spare->generation_num = __cpu_to_le32(1UL),
3935 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
3936 spare->num_disks = 1,
3937 spare->num_raid_devs = 0,
3938 spare->cache_size = mpb->cache_size,
3939 spare->pwr_cycle_count = __cpu_to_le32(1),
3940
3941 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
3942 MPB_SIGNATURE MPB_VERSION_RAID0);
3943
3944 for (d = super->disks; d; d = d->next) {
3945 if (d->index != -1)
3946 continue;
3947
3948 spare->disk[0] = d->disk;
3949 sum = __gen_imsm_checksum(spare);
3950 spare->family_num = __cpu_to_le32(sum);
3951 spare->orig_family_num = 0;
3952 sum = __gen_imsm_checksum(spare);
3953 spare->check_sum = __cpu_to_le32(sum);
3954
3955 if (store_imsm_mpb(d->fd, spare)) {
3956 fprintf(stderr, "%s: failed for device %d:%d %s\n",
3957 __func__, d->major, d->minor, strerror(errno));
3958 return 1;
3959 }
3960 if (doclose) {
3961 close(d->fd);
3962 d->fd = -1;
3963 }
3964 }
3965
3966 return 0;
3967 }
3968
3969 static int write_super_imsm(struct supertype *st, int doclose)
3970 {
3971 struct intel_super *super = st->sb;
3972 struct imsm_super *mpb = super->anchor;
3973 struct dl *d;
3974 __u32 generation;
3975 __u32 sum;
3976 int spares = 0;
3977 int i;
3978 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
3979 int num_disks = 0;
3980
3981 /* 'generation' is incremented everytime the metadata is written */
3982 generation = __le32_to_cpu(mpb->generation_num);
3983 generation++;
3984 mpb->generation_num = __cpu_to_le32(generation);
3985
3986 /* fix up cases where previous mdadm releases failed to set
3987 * orig_family_num
3988 */
3989 if (mpb->orig_family_num == 0)
3990 mpb->orig_family_num = mpb->family_num;
3991
3992 for (d = super->disks; d; d = d->next) {
3993 if (d->index == -1)
3994 spares++;
3995 else {
3996 mpb->disk[d->index] = d->disk;
3997 num_disks++;
3998 }
3999 }
4000 for (d = super->missing; d; d = d->next) {
4001 mpb->disk[d->index] = d->disk;
4002 num_disks++;
4003 }
4004 mpb->num_disks = num_disks;
4005 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
4006
4007 for (i = 0; i < mpb->num_raid_devs; i++) {
4008 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
4009 struct imsm_dev *dev2 = get_imsm_dev(super, i);
4010 if (dev && dev2) {
4011 imsm_copy_dev(dev, dev2);
4012 mpb_size += sizeof_imsm_dev(dev, 0);
4013 }
4014 }
4015 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
4016 mpb->mpb_size = __cpu_to_le32(mpb_size);
4017
4018 /* recalculate checksum */
4019 sum = __gen_imsm_checksum(mpb);
4020 mpb->check_sum = __cpu_to_le32(sum);
4021
4022 /* write the mpb for disks that compose raid devices */
4023 for (d = super->disks; d ; d = d->next) {
4024 if (d->index < 0)
4025 continue;
4026 if (store_imsm_mpb(d->fd, mpb))
4027 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4028 __func__, d->major, d->minor, strerror(errno));
4029 if (doclose) {
4030 close(d->fd);
4031 d->fd = -1;
4032 }
4033 }
4034
4035 if (spares)
4036 return write_super_imsm_spares(super, doclose);
4037
4038 return 0;
4039 }
4040
4041
4042 static int create_array(struct supertype *st, int dev_idx)
4043 {
4044 size_t len;
4045 struct imsm_update_create_array *u;
4046 struct intel_super *super = st->sb;
4047 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
4048 struct imsm_map *map = get_imsm_map(dev, 0);
4049 struct disk_info *inf;
4050 struct imsm_disk *disk;
4051 int i;
4052
4053 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
4054 sizeof(*inf) * map->num_members;
4055 u = malloc(len);
4056 if (!u) {
4057 fprintf(stderr, "%s: failed to allocate update buffer\n",
4058 __func__);
4059 return 1;
4060 }
4061
4062 u->type = update_create_array;
4063 u->dev_idx = dev_idx;
4064 imsm_copy_dev(&u->dev, dev);
4065 inf = get_disk_info(u);
4066 for (i = 0; i < map->num_members; i++) {
4067 int idx = get_imsm_disk_idx(dev, i, -1);
4068
4069 disk = get_imsm_disk(super, idx);
4070 serialcpy(inf[i].serial, disk->serial);
4071 }
4072 append_metadata_update(st, u, len);
4073
4074 return 0;
4075 }
4076
4077 static int mgmt_disk(struct supertype *st)
4078 {
4079 struct intel_super *super = st->sb;
4080 size_t len;
4081 struct imsm_update_add_remove_disk *u;
4082
4083 if (!super->disk_mgmt_list)
4084 return 0;
4085
4086 len = sizeof(*u);
4087 u = malloc(len);
4088 if (!u) {
4089 fprintf(stderr, "%s: failed to allocate update buffer\n",
4090 __func__);
4091 return 1;
4092 }
4093
4094 u->type = update_add_remove_disk;
4095 append_metadata_update(st, u, len);
4096
4097 return 0;
4098 }
4099
4100 static int write_init_super_imsm(struct supertype *st)
4101 {
4102 struct intel_super *super = st->sb;
4103 int current_vol = super->current_vol;
4104
4105 /* we are done with current_vol reset it to point st at the container */
4106 super->current_vol = -1;
4107
4108 if (st->update_tail) {
4109 /* queue the recently created array / added disk
4110 * as a metadata update */
4111 int rv;
4112
4113 /* determine if we are creating a volume or adding a disk */
4114 if (current_vol < 0) {
4115 /* in the mgmt (add/remove) disk case we are running
4116 * in mdmon context, so don't close fd's
4117 */
4118 return mgmt_disk(st);
4119 } else
4120 rv = create_array(st, current_vol);
4121
4122 return rv;
4123 } else {
4124 struct dl *d;
4125 for (d = super->disks; d; d = d->next)
4126 Kill(d->devname, NULL, 0, 1, 1);
4127 return write_super_imsm(st, 1);
4128 }
4129 }
4130 #endif
4131
4132 static int store_super_imsm(struct supertype *st, int fd)
4133 {
4134 struct intel_super *super = st->sb;
4135 struct imsm_super *mpb = super ? super->anchor : NULL;
4136
4137 if (!mpb)
4138 return 1;
4139
4140 #ifndef MDASSEMBLE
4141 return store_imsm_mpb(fd, mpb);
4142 #else
4143 return 1;
4144 #endif
4145 }
4146
4147 static int imsm_bbm_log_size(struct imsm_super *mpb)
4148 {
4149 return __le32_to_cpu(mpb->bbm_log_size);
4150 }
4151
4152 #ifndef MDASSEMBLE
4153 static int validate_geometry_imsm_container(struct supertype *st, int level,
4154 int layout, int raiddisks, int chunk,
4155 unsigned long long size, char *dev,
4156 unsigned long long *freesize,
4157 int verbose)
4158 {
4159 int fd;
4160 unsigned long long ldsize;
4161 struct intel_super *super=NULL;
4162 int rv = 0;
4163
4164 if (level != LEVEL_CONTAINER)
4165 return 0;
4166 if (!dev)
4167 return 1;
4168
4169 fd = open(dev, O_RDONLY|O_EXCL, 0);
4170 if (fd < 0) {
4171 if (verbose)
4172 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4173 dev, strerror(errno));
4174 return 0;
4175 }
4176 if (!get_dev_size(fd, dev, &ldsize)) {
4177 close(fd);
4178 return 0;
4179 }
4180
4181 /* capabilities retrieve could be possible
4182 * note that there is no fd for the disks in array.
4183 */
4184 super = alloc_super();
4185 if (!super) {
4186 fprintf(stderr,
4187 Name ": malloc of %zu failed.\n",
4188 sizeof(*super));
4189 close(fd);
4190 return 0;
4191 }
4192
4193 rv = find_intel_hba_capability(fd, super, verbose ? dev : NULL);
4194 if (rv != 0) {
4195 #if DEBUG
4196 char str[256];
4197 fd2devname(fd, str);
4198 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4199 fd, str, super->orom, rv, raiddisks);
4200 #endif
4201 /* no orom/efi or non-intel hba of the disk */
4202 close(fd);
4203 free_imsm(super);
4204 return 0;
4205 }
4206 close(fd);
4207 if (super->orom && raiddisks > super->orom->tds) {
4208 if (verbose)
4209 fprintf(stderr, Name ": %d exceeds maximum number of"
4210 " platform supported disks: %d\n",
4211 raiddisks, super->orom->tds);
4212
4213 free_imsm(super);
4214 return 0;
4215 }
4216
4217 *freesize = avail_size_imsm(st, ldsize >> 9);
4218 free_imsm(super);
4219
4220 return 1;
4221 }
4222
4223 static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4224 {
4225 const unsigned long long base_start = e[*idx].start;
4226 unsigned long long end = base_start + e[*idx].size;
4227 int i;
4228
4229 if (base_start == end)
4230 return 0;
4231
4232 *idx = *idx + 1;
4233 for (i = *idx; i < num_extents; i++) {
4234 /* extend overlapping extents */
4235 if (e[i].start >= base_start &&
4236 e[i].start <= end) {
4237 if (e[i].size == 0)
4238 return 0;
4239 if (e[i].start + e[i].size > end)
4240 end = e[i].start + e[i].size;
4241 } else if (e[i].start > end) {
4242 *idx = i;
4243 break;
4244 }
4245 }
4246
4247 return end - base_start;
4248 }
4249
4250 static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4251 {
4252 /* build a composite disk with all known extents and generate a new
4253 * 'maxsize' given the "all disks in an array must share a common start
4254 * offset" constraint
4255 */
4256 struct extent *e = calloc(sum_extents, sizeof(*e));
4257 struct dl *dl;
4258 int i, j;
4259 int start_extent;
4260 unsigned long long pos;
4261 unsigned long long start = 0;
4262 unsigned long long maxsize;
4263 unsigned long reserve;
4264
4265 if (!e)
4266 return 0;
4267
4268 /* coalesce and sort all extents. also, check to see if we need to
4269 * reserve space between member arrays
4270 */
4271 j = 0;
4272 for (dl = super->disks; dl; dl = dl->next) {
4273 if (!dl->e)
4274 continue;
4275 for (i = 0; i < dl->extent_cnt; i++)
4276 e[j++] = dl->e[i];
4277 }
4278 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4279
4280 /* merge extents */
4281 i = 0;
4282 j = 0;
4283 while (i < sum_extents) {
4284 e[j].start = e[i].start;
4285 e[j].size = find_size(e, &i, sum_extents);
4286 j++;
4287 if (e[j-1].size == 0)
4288 break;
4289 }
4290
4291 pos = 0;
4292 maxsize = 0;
4293 start_extent = 0;
4294 i = 0;
4295 do {
4296 unsigned long long esize;
4297
4298 esize = e[i].start - pos;
4299 if (esize >= maxsize) {
4300 maxsize = esize;
4301 start = pos;
4302 start_extent = i;
4303 }
4304 pos = e[i].start + e[i].size;
4305 i++;
4306 } while (e[i-1].size);
4307 free(e);
4308
4309 if (maxsize == 0)
4310 return 0;
4311
4312 /* FIXME assumes volume at offset 0 is the first volume in a
4313 * container
4314 */
4315 if (start_extent > 0)
4316 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4317 else
4318 reserve = 0;
4319
4320 if (maxsize < reserve)
4321 return 0;
4322
4323 super->create_offset = ~((__u32) 0);
4324 if (start + reserve > super->create_offset)
4325 return 0; /* start overflows create_offset */
4326 super->create_offset = start + reserve;
4327
4328 return maxsize - reserve;
4329 }
4330
4331 static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4332 {
4333 if (level < 0 || level == 6 || level == 4)
4334 return 0;
4335
4336 /* if we have an orom prevent invalid raid levels */
4337 if (orom)
4338 switch (level) {
4339 case 0: return imsm_orom_has_raid0(orom);
4340 case 1:
4341 if (raiddisks > 2)
4342 return imsm_orom_has_raid1e(orom);
4343 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4344 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4345 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
4346 }
4347 else
4348 return 1; /* not on an Intel RAID platform so anything goes */
4349
4350 return 0;
4351 }
4352
4353
4354 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4355 /*
4356 * validate volume parameters with OROM/EFI capabilities
4357 */
4358 static int
4359 validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
4360 int raiddisks, int *chunk, int verbose)
4361 {
4362 #if DEBUG
4363 verbose = 1;
4364 #endif
4365 /* validate container capabilities */
4366 if (super->orom && raiddisks > super->orom->tds) {
4367 if (verbose)
4368 fprintf(stderr, Name ": %d exceeds maximum number of"
4369 " platform supported disks: %d\n",
4370 raiddisks, super->orom->tds);
4371 return 0;
4372 }
4373
4374 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4375 if (super->orom && (!is_raid_level_supported(super->orom, level,
4376 raiddisks))) {
4377 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4378 level, raiddisks, raiddisks > 1 ? "s" : "");
4379 return 0;
4380 }
4381 if (super->orom && level != 1) {
4382 if (chunk && (*chunk == 0 || *chunk == UnSet))
4383 *chunk = imsm_orom_default_chunk(super->orom);
4384 else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
4385 pr_vrb(": platform does not support a chunk size of: "
4386 "%d\n", *chunk);
4387 return 0;
4388 }
4389 }
4390 if (layout != imsm_level_to_layout(level)) {
4391 if (level == 5)
4392 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4393 else if (level == 10)
4394 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4395 else
4396 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4397 layout, level);
4398 return 0;
4399 }
4400 return 1;
4401 }
4402
4403 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4404 * FIX ME add ahci details
4405 */
4406 static int validate_geometry_imsm_volume(struct supertype *st, int level,
4407 int layout, int raiddisks, int *chunk,
4408 unsigned long long size, char *dev,
4409 unsigned long long *freesize,
4410 int verbose)
4411 {
4412 struct stat stb;
4413 struct intel_super *super = st->sb;
4414 struct imsm_super *mpb = super->anchor;
4415 struct dl *dl;
4416 unsigned long long pos = 0;
4417 unsigned long long maxsize;
4418 struct extent *e;
4419 int i;
4420
4421 /* We must have the container info already read in. */
4422 if (!super)
4423 return 0;
4424
4425 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
4426 fprintf(stderr, Name ": RAID gemetry validation failed. "
4427 "Cannot proceed with the action(s).\n");
4428 return 0;
4429 }
4430 if (!dev) {
4431 /* General test: make sure there is space for
4432 * 'raiddisks' device extents of size 'size' at a given
4433 * offset
4434 */
4435 unsigned long long minsize = size;
4436 unsigned long long start_offset = MaxSector;
4437 int dcnt = 0;
4438 if (minsize == 0)
4439 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4440 for (dl = super->disks; dl ; dl = dl->next) {
4441 int found = 0;
4442
4443 pos = 0;
4444 i = 0;
4445 e = get_extents(super, dl);
4446 if (!e) continue;
4447 do {
4448 unsigned long long esize;
4449 esize = e[i].start - pos;
4450 if (esize >= minsize)
4451 found = 1;
4452 if (found && start_offset == MaxSector) {
4453 start_offset = pos;
4454 break;
4455 } else if (found && pos != start_offset) {
4456 found = 0;
4457 break;
4458 }
4459 pos = e[i].start + e[i].size;
4460 i++;
4461 } while (e[i-1].size);
4462 if (found)
4463 dcnt++;
4464 free(e);
4465 }
4466 if (dcnt < raiddisks) {
4467 if (verbose)
4468 fprintf(stderr, Name ": imsm: Not enough "
4469 "devices with space for this array "
4470 "(%d < %d)\n",
4471 dcnt, raiddisks);
4472 return 0;
4473 }
4474 return 1;
4475 }
4476
4477 /* This device must be a member of the set */
4478 if (stat(dev, &stb) < 0)
4479 return 0;
4480 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4481 return 0;
4482 for (dl = super->disks ; dl ; dl = dl->next) {
4483 if (dl->major == (int)major(stb.st_rdev) &&
4484 dl->minor == (int)minor(stb.st_rdev))
4485 break;
4486 }
4487 if (!dl) {
4488 if (verbose)
4489 fprintf(stderr, Name ": %s is not in the "
4490 "same imsm set\n", dev);
4491 return 0;
4492 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4493 /* If a volume is present then the current creation attempt
4494 * cannot incorporate new spares because the orom may not
4495 * understand this configuration (all member disks must be
4496 * members of each array in the container).
4497 */
4498 fprintf(stderr, Name ": %s is a spare and a volume"
4499 " is already defined for this container\n", dev);
4500 fprintf(stderr, Name ": The option-rom requires all member"
4501 " disks to be a member of all volumes\n");
4502 return 0;
4503 }
4504
4505 /* retrieve the largest free space block */
4506 e = get_extents(super, dl);
4507 maxsize = 0;
4508 i = 0;
4509 if (e) {
4510 do {
4511 unsigned long long esize;
4512
4513 esize = e[i].start - pos;
4514 if (esize >= maxsize)
4515 maxsize = esize;
4516 pos = e[i].start + e[i].size;
4517 i++;
4518 } while (e[i-1].size);
4519 dl->e = e;
4520 dl->extent_cnt = i;
4521 } else {
4522 if (verbose)
4523 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4524 dev);
4525 return 0;
4526 }
4527 if (maxsize < size) {
4528 if (verbose)
4529 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4530 dev, maxsize, size);
4531 return 0;
4532 }
4533
4534 /* count total number of extents for merge */
4535 i = 0;
4536 for (dl = super->disks; dl; dl = dl->next)
4537 if (dl->e)
4538 i += dl->extent_cnt;
4539
4540 maxsize = merge_extents(super, i);
4541 if (maxsize < size || maxsize == 0) {
4542 if (verbose)
4543 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4544 maxsize, size);
4545 return 0;
4546 }
4547
4548 *freesize = maxsize;
4549
4550 return 1;
4551 }
4552
4553 static int reserve_space(struct supertype *st, int raiddisks,
4554 unsigned long long size, int chunk,
4555 unsigned long long *freesize)
4556 {
4557 struct intel_super *super = st->sb;
4558 struct imsm_super *mpb = super->anchor;
4559 struct dl *dl;
4560 int i;
4561 int extent_cnt;
4562 struct extent *e;
4563 unsigned long long maxsize;
4564 unsigned long long minsize;
4565 int cnt;
4566 int used;
4567
4568 /* find the largest common start free region of the possible disks */
4569 used = 0;
4570 extent_cnt = 0;
4571 cnt = 0;
4572 for (dl = super->disks; dl; dl = dl->next) {
4573 dl->raiddisk = -1;
4574
4575 if (dl->index >= 0)
4576 used++;
4577
4578 /* don't activate new spares if we are orom constrained
4579 * and there is already a volume active in the container
4580 */
4581 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
4582 continue;
4583
4584 e = get_extents(super, dl);
4585 if (!e)
4586 continue;
4587 for (i = 1; e[i-1].size; i++)
4588 ;
4589 dl->e = e;
4590 dl->extent_cnt = i;
4591 extent_cnt += i;
4592 cnt++;
4593 }
4594
4595 maxsize = merge_extents(super, extent_cnt);
4596 minsize = size;
4597 if (size == 0)
4598 /* chunk is in K */
4599 minsize = chunk * 2;
4600
4601 if (cnt < raiddisks ||
4602 (super->orom && used && used != raiddisks) ||
4603 maxsize < minsize ||
4604 maxsize == 0) {
4605 fprintf(stderr, Name ": not enough devices with space to create array.\n");
4606 return 0; /* No enough free spaces large enough */
4607 }
4608
4609 if (size == 0) {
4610 size = maxsize;
4611 if (chunk) {
4612 size /= 2 * chunk;
4613 size *= 2 * chunk;
4614 }
4615 }
4616
4617 cnt = 0;
4618 for (dl = super->disks; dl; dl = dl->next)
4619 if (dl->e)
4620 dl->raiddisk = cnt++;
4621
4622 *freesize = size;
4623
4624 return 1;
4625 }
4626
4627 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
4628 int raiddisks, int *chunk, unsigned long long size,
4629 char *dev, unsigned long long *freesize,
4630 int verbose)
4631 {
4632 int fd, cfd;
4633 struct mdinfo *sra;
4634 int is_member = 0;
4635
4636 /* load capability
4637 * if given unused devices create a container
4638 * if given given devices in a container create a member volume
4639 */
4640 if (level == LEVEL_CONTAINER) {
4641 /* Must be a fresh device to add to a container */
4642 return validate_geometry_imsm_container(st, level, layout,
4643 raiddisks,
4644 chunk?*chunk:0, size,
4645 dev, freesize,
4646 verbose);
4647 }
4648
4649 if (!dev) {
4650 if (st->sb && freesize) {
4651 /* we are being asked to automatically layout a
4652 * new volume based on the current contents of
4653 * the container. If the the parameters can be
4654 * satisfied reserve_space will record the disks,
4655 * start offset, and size of the volume to be
4656 * created. add_to_super and getinfo_super
4657 * detect when autolayout is in progress.
4658 */
4659 if (!validate_geometry_imsm_orom(st->sb, level, layout,
4660 raiddisks, chunk,
4661 verbose))
4662 return 0;
4663 return reserve_space(st, raiddisks, size,
4664 chunk?*chunk:0, freesize);
4665 }
4666 return 1;
4667 }
4668 if (st->sb) {
4669 /* creating in a given container */
4670 return validate_geometry_imsm_volume(st, level, layout,
4671 raiddisks, chunk, size,
4672 dev, freesize, verbose);
4673 }
4674
4675 /* This device needs to be a device in an 'imsm' container */
4676 fd = open(dev, O_RDONLY|O_EXCL, 0);
4677 if (fd >= 0) {
4678 if (verbose)
4679 fprintf(stderr,
4680 Name ": Cannot create this array on device %s\n",
4681 dev);
4682 close(fd);
4683 return 0;
4684 }
4685 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
4686 if (verbose)
4687 fprintf(stderr, Name ": Cannot open %s: %s\n",
4688 dev, strerror(errno));
4689 return 0;
4690 }
4691 /* Well, it is in use by someone, maybe an 'imsm' container. */
4692 cfd = open_container(fd);
4693 close(fd);
4694 if (cfd < 0) {
4695 if (verbose)
4696 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
4697 dev);
4698 return 0;
4699 }
4700 sra = sysfs_read(cfd, 0, GET_VERSION);
4701 if (sra && sra->array.major_version == -1 &&
4702 strcmp(sra->text_version, "imsm") == 0)
4703 is_member = 1;
4704 sysfs_free(sra);
4705 if (is_member) {
4706 /* This is a member of a imsm container. Load the container
4707 * and try to create a volume
4708 */
4709 struct intel_super *super;
4710
4711 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
4712 st->sb = super;
4713 st->container_dev = fd2devnum(cfd);
4714 close(cfd);
4715 return validate_geometry_imsm_volume(st, level, layout,
4716 raiddisks, chunk,
4717 size, dev,
4718 freesize, verbose);
4719 }
4720 }
4721
4722 if (verbose)
4723 fprintf(stderr, Name ": failed container membership check\n");
4724
4725 close(cfd);
4726 return 0;
4727 }
4728
4729 static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
4730 {
4731 struct intel_super *super = st->sb;
4732
4733 if (level && *level == UnSet)
4734 *level = LEVEL_CONTAINER;
4735
4736 if (level && layout && *layout == UnSet)
4737 *layout = imsm_level_to_layout(*level);
4738
4739 if (chunk && (*chunk == UnSet || *chunk == 0) &&
4740 super && super->orom)
4741 *chunk = imsm_orom_default_chunk(super->orom);
4742 }
4743
4744 static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
4745
4746 static int kill_subarray_imsm(struct supertype *st)
4747 {
4748 /* remove the subarray currently referenced by ->current_vol */
4749 __u8 i;
4750 struct intel_dev **dp;
4751 struct intel_super *super = st->sb;
4752 __u8 current_vol = super->current_vol;
4753 struct imsm_super *mpb = super->anchor;
4754
4755 if (super->current_vol < 0)
4756 return 2;
4757 super->current_vol = -1; /* invalidate subarray cursor */
4758
4759 /* block deletions that would change the uuid of active subarrays
4760 *
4761 * FIXME when immutable ids are available, but note that we'll
4762 * also need to fixup the invalidated/active subarray indexes in
4763 * mdstat
4764 */
4765 for (i = 0; i < mpb->num_raid_devs; i++) {
4766 char subarray[4];
4767
4768 if (i < current_vol)
4769 continue;
4770 sprintf(subarray, "%u", i);
4771 if (is_subarray_active(subarray, st->devname)) {
4772 fprintf(stderr,
4773 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
4774 current_vol, i);
4775
4776 return 2;
4777 }
4778 }
4779
4780 if (st->update_tail) {
4781 struct imsm_update_kill_array *u = malloc(sizeof(*u));
4782
4783 if (!u)
4784 return 2;
4785 u->type = update_kill_array;
4786 u->dev_idx = current_vol;
4787 append_metadata_update(st, u, sizeof(*u));
4788
4789 return 0;
4790 }
4791
4792 for (dp = &super->devlist; *dp;)
4793 if ((*dp)->index == current_vol) {
4794 *dp = (*dp)->next;
4795 } else {
4796 handle_missing(super, (*dp)->dev);
4797 if ((*dp)->index > current_vol)
4798 (*dp)->index--;
4799 dp = &(*dp)->next;
4800 }
4801
4802 /* no more raid devices, all active components are now spares,
4803 * but of course failed are still failed
4804 */
4805 if (--mpb->num_raid_devs == 0) {
4806 struct dl *d;
4807
4808 for (d = super->disks; d; d = d->next)
4809 if (d->index > -2) {
4810 d->index = -1;
4811 d->disk.status = SPARE_DISK;
4812 }
4813 }
4814
4815 super->updates_pending++;
4816
4817 return 0;
4818 }
4819
4820 static int update_subarray_imsm(struct supertype *st, char *subarray,
4821 char *update, struct mddev_ident *ident)
4822 {
4823 /* update the subarray currently referenced by ->current_vol */
4824 struct intel_super *super = st->sb;
4825 struct imsm_super *mpb = super->anchor;
4826
4827 if (strcmp(update, "name") == 0) {
4828 char *name = ident->name;
4829 char *ep;
4830 int vol;
4831
4832 if (is_subarray_active(subarray, st->devname)) {
4833 fprintf(stderr,
4834 Name ": Unable to update name of active subarray\n");
4835 return 2;
4836 }
4837
4838 if (!check_name(super, name, 0))
4839 return 2;
4840
4841 vol = strtoul(subarray, &ep, 10);
4842 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
4843 return 2;
4844
4845 if (st->update_tail) {
4846 struct imsm_update_rename_array *u = malloc(sizeof(*u));
4847
4848 if (!u)
4849 return 2;
4850 u->type = update_rename_array;
4851 u->dev_idx = vol;
4852 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
4853 append_metadata_update(st, u, sizeof(*u));
4854 } else {
4855 struct imsm_dev *dev;
4856 int i;
4857
4858 dev = get_imsm_dev(super, vol);
4859 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
4860 for (i = 0; i < mpb->num_raid_devs; i++) {
4861 dev = get_imsm_dev(super, i);
4862 handle_missing(super, dev);
4863 }
4864 super->updates_pending++;
4865 }
4866 } else
4867 return 2;
4868
4869 return 0;
4870 }
4871
4872 static int is_gen_migration(struct imsm_dev *dev)
4873 {
4874 if (!dev->vol.migr_state)
4875 return 0;
4876
4877 if (migr_type(dev) == MIGR_GEN_MIGR)
4878 return 1;
4879
4880 return 0;
4881 }
4882 #endif /* MDASSEMBLE */
4883
4884 static int is_rebuilding(struct imsm_dev *dev)
4885 {
4886 struct imsm_map *migr_map;
4887
4888 if (!dev->vol.migr_state)
4889 return 0;
4890
4891 if (migr_type(dev) != MIGR_REBUILD)
4892 return 0;
4893
4894 migr_map = get_imsm_map(dev, 1);
4895
4896 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
4897 return 1;
4898 else
4899 return 0;
4900 }
4901
4902 static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
4903 {
4904 struct mdinfo *rebuild = NULL;
4905 struct mdinfo *d;
4906 __u32 units;
4907
4908 if (!is_rebuilding(dev))
4909 return;
4910
4911 /* Find the rebuild target, but punt on the dual rebuild case */
4912 for (d = array->devs; d; d = d->next)
4913 if (d->recovery_start == 0) {
4914 if (rebuild)
4915 return;
4916 rebuild = d;
4917 }
4918
4919 if (!rebuild) {
4920 /* (?) none of the disks are marked with
4921 * IMSM_ORD_REBUILD, so assume they are missing and the
4922 * disk_ord_tbl was not correctly updated
4923 */
4924 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
4925 return;
4926 }
4927
4928 units = __le32_to_cpu(dev->vol.curr_migr_unit);
4929 rebuild->recovery_start = units * blocks_per_migr_unit(dev);
4930 }
4931
4932
4933 static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
4934 {
4935 /* Given a container loaded by load_super_imsm_all,
4936 * extract information about all the arrays into
4937 * an mdinfo tree.
4938 * If 'subarray' is given, just extract info about that array.
4939 *
4940 * For each imsm_dev create an mdinfo, fill it in,
4941 * then look for matching devices in super->disks
4942 * and create appropriate device mdinfo.
4943 */
4944 struct intel_super *super = st->sb;
4945 struct imsm_super *mpb = super->anchor;
4946 struct mdinfo *rest = NULL;
4947 unsigned int i;
4948 int bbm_errors = 0;
4949 struct dl *d;
4950 int spare_disks = 0;
4951
4952 /* check for bad blocks */
4953 if (imsm_bbm_log_size(super->anchor))
4954 bbm_errors = 1;
4955
4956 /* count spare devices, not used in maps
4957 */
4958 for (d = super->disks; d; d = d->next)
4959 if (d->index == -1)
4960 spare_disks++;
4961
4962 for (i = 0; i < mpb->num_raid_devs; i++) {
4963 struct imsm_dev *dev;
4964 struct imsm_map *map;
4965 struct imsm_map *map2;
4966 struct mdinfo *this;
4967 int slot, chunk;
4968 char *ep;
4969
4970 if (subarray &&
4971 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
4972 continue;
4973
4974 dev = get_imsm_dev(super, i);
4975 map = get_imsm_map(dev, 0);
4976 map2 = get_imsm_map(dev, 1);
4977
4978 /* do not publish arrays that are in the middle of an
4979 * unsupported migration
4980 */
4981 if (dev->vol.migr_state &&
4982 (migr_type(dev) == MIGR_STATE_CHANGE)) {
4983 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
4984 " unsupported migration in progress\n",
4985 dev->volume);
4986 continue;
4987 }
4988 /* do not publish arrays that are not support by controller's
4989 * OROM/EFI
4990 */
4991
4992 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
4993 #ifndef MDASSEMBLE
4994 if (!validate_geometry_imsm_orom(super,
4995 get_imsm_raid_level(map), /* RAID level */
4996 imsm_level_to_layout(get_imsm_raid_level(map)),
4997 map->num_members, /* raid disks */
4998 &chunk,
4999 1 /* verbose */)) {
5000 fprintf(stderr, Name ": RAID gemetry validation failed. "
5001 "Cannot proceed with the action(s).\n");
5002 continue;
5003 }
5004 #endif /* MDASSEMBLE */
5005 this = malloc(sizeof(*this));
5006 if (!this) {
5007 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
5008 sizeof(*this));
5009 break;
5010 }
5011 memset(this, 0, sizeof(*this));
5012 this->next = rest;
5013
5014 super->current_vol = i;
5015 getinfo_super_imsm_volume(st, this, NULL);
5016 for (slot = 0 ; slot < map->num_members; slot++) {
5017 unsigned long long recovery_start;
5018 struct mdinfo *info_d;
5019 struct dl *d;
5020 int idx;
5021 int skip;
5022 __u32 ord;
5023
5024 skip = 0;
5025 idx = get_imsm_disk_idx(dev, slot, 0);
5026 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
5027 for (d = super->disks; d ; d = d->next)
5028 if (d->index == idx)
5029 break;
5030
5031 recovery_start = MaxSector;
5032 if (d == NULL)
5033 skip = 1;
5034 if (d && is_failed(&d->disk))
5035 skip = 1;
5036 if (ord & IMSM_ORD_REBUILD)
5037 recovery_start = 0;
5038
5039 /*
5040 * if we skip some disks the array will be assmebled degraded;
5041 * reset resync start to avoid a dirty-degraded
5042 * situation when performing the intial sync
5043 *
5044 * FIXME handle dirty degraded
5045 */
5046 if ((skip || recovery_start == 0) && !dev->vol.dirty)
5047 this->resync_start = MaxSector;
5048 if (skip)
5049 continue;
5050
5051 info_d = calloc(1, sizeof(*info_d));
5052 if (!info_d) {
5053 fprintf(stderr, Name ": failed to allocate disk"
5054 " for volume %.16s\n", dev->volume);
5055 info_d = this->devs;
5056 while (info_d) {
5057 struct mdinfo *d = info_d->next;
5058
5059 free(info_d);
5060 info_d = d;
5061 }
5062 free(this);
5063 this = rest;
5064 break;
5065 }
5066 info_d->next = this->devs;
5067 this->devs = info_d;
5068
5069 info_d->disk.number = d->index;
5070 info_d->disk.major = d->major;
5071 info_d->disk.minor = d->minor;
5072 info_d->disk.raid_disk = slot;
5073 info_d->recovery_start = recovery_start;
5074 if (map2) {
5075 if (slot < map2->num_members)
5076 info_d->disk.state = (1 << MD_DISK_ACTIVE);
5077 else
5078 this->array.spare_disks++;
5079 } else {
5080 if (slot < map->num_members)
5081 info_d->disk.state = (1 << MD_DISK_ACTIVE);
5082 else
5083 this->array.spare_disks++;
5084 }
5085 if (info_d->recovery_start == MaxSector)
5086 this->array.working_disks++;
5087
5088 info_d->events = __le32_to_cpu(mpb->generation_num);
5089 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
5090 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
5091 }
5092 /* now that the disk list is up-to-date fixup recovery_start */
5093 update_recovery_start(dev, this);
5094 this->array.spare_disks += spare_disks;
5095 rest = this;
5096 }
5097
5098 /* if array has bad blocks, set suitable bit in array status */
5099 if (bbm_errors)
5100 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
5101
5102 return rest;
5103 }
5104
5105
5106 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
5107 {
5108 struct imsm_map *map = get_imsm_map(dev, 0);
5109
5110 if (!failed)
5111 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
5112 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
5113
5114 switch (get_imsm_raid_level(map)) {
5115 case 0:
5116 return IMSM_T_STATE_FAILED;
5117 break;
5118 case 1:
5119 if (failed < map->num_members)
5120 return IMSM_T_STATE_DEGRADED;
5121 else
5122 return IMSM_T_STATE_FAILED;
5123 break;
5124 case 10:
5125 {
5126 /**
5127 * check to see if any mirrors have failed, otherwise we
5128 * are degraded. Even numbered slots are mirrored on
5129 * slot+1
5130 */
5131 int i;
5132 /* gcc -Os complains that this is unused */
5133 int insync = insync;
5134
5135 for (i = 0; i < map->num_members; i++) {
5136 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
5137 int idx = ord_to_idx(ord);
5138 struct imsm_disk *disk;
5139
5140 /* reset the potential in-sync count on even-numbered
5141 * slots. num_copies is always 2 for imsm raid10
5142 */
5143 if ((i & 1) == 0)
5144 insync = 2;
5145
5146 disk = get_imsm_disk(super, idx);
5147 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
5148 insync--;
5149
5150 /* no in-sync disks left in this mirror the
5151 * array has failed
5152 */
5153 if (insync == 0)
5154 return IMSM_T_STATE_FAILED;
5155 }
5156
5157 return IMSM_T_STATE_DEGRADED;
5158 }
5159 case 5:
5160 if (failed < 2)
5161 return IMSM_T_STATE_DEGRADED;
5162 else
5163 return IMSM_T_STATE_FAILED;
5164 break;
5165 default:
5166 break;
5167 }
5168
5169 return map->map_state;
5170 }
5171
5172 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
5173 {
5174 int i;
5175 int failed = 0;
5176 struct imsm_disk *disk;
5177 struct imsm_map *map = get_imsm_map(dev, 0);
5178 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
5179 __u32 ord;
5180 int idx;
5181
5182 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5183 * disks that are being rebuilt. New failures are recorded to
5184 * map[0]. So we look through all the disks we started with and
5185 * see if any failures are still present, or if any new ones
5186 * have arrived
5187 *
5188 * FIXME add support for online capacity expansion and
5189 * raid-level-migration
5190 */
5191 for (i = 0; i < prev->num_members; i++) {
5192 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
5193 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
5194 idx = ord_to_idx(ord);
5195
5196 disk = get_imsm_disk(super, idx);
5197 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
5198 failed++;
5199 }
5200
5201 return failed;
5202 }
5203
5204 #ifndef MDASSEMBLE
5205 static int imsm_open_new(struct supertype *c, struct active_array *a,
5206 char *inst)
5207 {
5208 struct intel_super *super = c->sb;
5209 struct imsm_super *mpb = super->anchor;
5210
5211 if (atoi(inst) >= mpb->num_raid_devs) {
5212 fprintf(stderr, "%s: subarry index %d, out of range\n",
5213 __func__, atoi(inst));
5214 return -ENODEV;
5215 }
5216
5217 dprintf("imsm: open_new %s\n", inst);
5218 a->info.container_member = atoi(inst);
5219 return 0;
5220 }
5221
5222 static int is_resyncing(struct imsm_dev *dev)
5223 {
5224 struct imsm_map *migr_map;
5225
5226 if (!dev->vol.migr_state)
5227 return 0;
5228
5229 if (migr_type(dev) == MIGR_INIT ||
5230 migr_type(dev) == MIGR_REPAIR)
5231 return 1;
5232
5233 if (migr_type(dev) == MIGR_GEN_MIGR)
5234 return 0;
5235
5236 migr_map = get_imsm_map(dev, 1);
5237
5238 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
5239 (dev->vol.migr_type != MIGR_GEN_MIGR))
5240 return 1;
5241 else
5242 return 0;
5243 }
5244
5245 /* return true if we recorded new information */
5246 static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5247 {
5248 __u32 ord;
5249 int slot;
5250 struct imsm_map *map;
5251
5252 /* new failures are always set in map[0] */
5253 map = get_imsm_map(dev, 0);
5254
5255 slot = get_imsm_disk_slot(map, idx);
5256 if (slot < 0)
5257 return 0;
5258
5259 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
5260 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
5261 return 0;
5262
5263 disk->status |= FAILED_DISK;
5264 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
5265 if (map->failed_disk_num == 0xff)
5266 map->failed_disk_num = slot;
5267 return 1;
5268 }
5269
5270 static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5271 {
5272 mark_failure(dev, disk, idx);
5273
5274 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5275 return;
5276
5277 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5278 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5279 }
5280
5281 static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5282 {
5283 __u8 map_state;
5284 struct dl *dl;
5285 int failed;
5286
5287 if (!super->missing)
5288 return;
5289 failed = imsm_count_failed(super, dev);
5290 map_state = imsm_check_degraded(super, dev, failed);
5291
5292 dprintf("imsm: mark missing\n");
5293 end_migration(dev, map_state);
5294 for (dl = super->missing; dl; dl = dl->next)
5295 mark_missing(dev, &dl->disk, dl->index);
5296 super->updates_pending++;
5297 }
5298
5299 static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5300 {
5301 int used_disks = imsm_num_data_members(dev, 0);
5302 unsigned long long array_blocks;
5303 struct imsm_map *map;
5304
5305 if (used_disks == 0) {
5306 /* when problems occures
5307 * return current array_blocks value
5308 */
5309 array_blocks = __le32_to_cpu(dev->size_high);
5310 array_blocks = array_blocks << 32;
5311 array_blocks += __le32_to_cpu(dev->size_low);
5312
5313 return array_blocks;
5314 }
5315
5316 /* set array size in metadata
5317 */
5318 map = get_imsm_map(dev, 0);
5319 array_blocks = map->blocks_per_member * used_disks;
5320
5321 /* round array size down to closest MB
5322 */
5323 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5324 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5325 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5326
5327 return array_blocks;
5328 }
5329
5330 static void imsm_set_disk(struct active_array *a, int n, int state);
5331
5332 static void imsm_progress_container_reshape(struct intel_super *super)
5333 {
5334 /* if no device has a migr_state, but some device has a
5335 * different number of members than the previous device, start
5336 * changing the number of devices in this device to match
5337 * previous.
5338 */
5339 struct imsm_super *mpb = super->anchor;
5340 int prev_disks = -1;
5341 int i;
5342 int copy_map_size;
5343
5344 for (i = 0; i < mpb->num_raid_devs; i++) {
5345 struct imsm_dev *dev = get_imsm_dev(super, i);
5346 struct imsm_map *map = get_imsm_map(dev, 0);
5347 struct imsm_map *map2;
5348 int prev_num_members;
5349
5350 if (dev->vol.migr_state)
5351 return;
5352
5353 if (prev_disks == -1)
5354 prev_disks = map->num_members;
5355 if (prev_disks == map->num_members)
5356 continue;
5357
5358 /* OK, this array needs to enter reshape mode.
5359 * i.e it needs a migr_state
5360 */
5361
5362 copy_map_size = sizeof_imsm_map(map);
5363 prev_num_members = map->num_members;
5364 map->num_members = prev_disks;
5365 dev->vol.migr_state = 1;
5366 dev->vol.curr_migr_unit = 0;
5367 dev->vol.migr_type = MIGR_GEN_MIGR;
5368 for (i = prev_num_members;
5369 i < map->num_members; i++)
5370 set_imsm_ord_tbl_ent(map, i, i);
5371 map2 = get_imsm_map(dev, 1);
5372 /* Copy the current map */
5373 memcpy(map2, map, copy_map_size);
5374 map2->num_members = prev_num_members;
5375
5376 imsm_set_array_size(dev);
5377 super->updates_pending++;
5378 }
5379 }
5380
5381 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5382 * states are handled in imsm_set_disk() with one exception, when a
5383 * resync is stopped due to a new failure this routine will set the
5384 * 'degraded' state for the array.
5385 */
5386 static int imsm_set_array_state(struct active_array *a, int consistent)
5387 {
5388 int inst = a->info.container_member;
5389 struct intel_super *super = a->container->sb;
5390 struct imsm_dev *dev = get_imsm_dev(super, inst);
5391 struct imsm_map *map = get_imsm_map(dev, 0);
5392 int failed = imsm_count_failed(super, dev);
5393 __u8 map_state = imsm_check_degraded(super, dev, failed);
5394 __u32 blocks_per_unit;
5395
5396 if (dev->vol.migr_state &&
5397 dev->vol.migr_type == MIGR_GEN_MIGR) {
5398 /* array state change is blocked due to reshape action
5399 * We might need to
5400 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5401 * - finish the reshape (if last_checkpoint is big and action != reshape)
5402 * - update curr_migr_unit
5403 */
5404 if (a->curr_action == reshape) {
5405 /* still reshaping, maybe update curr_migr_unit */
5406 goto mark_checkpoint;
5407 } else {
5408 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5409 /* for some reason we aborted the reshape.
5410 * Better clean up
5411 */
5412 struct imsm_map *map2 = get_imsm_map(dev, 1);
5413 dev->vol.migr_state = 0;
5414 dev->vol.migr_type = 0;
5415 dev->vol.curr_migr_unit = 0;
5416 memcpy(map, map2, sizeof_imsm_map(map2));
5417 super->updates_pending++;
5418 }
5419 if (a->last_checkpoint >= a->info.component_size) {
5420 unsigned long long array_blocks;
5421 int used_disks;
5422 struct mdinfo *mdi;
5423
5424 used_disks = imsm_num_data_members(dev, 0);
5425 if (used_disks > 0) {
5426 array_blocks =
5427 map->blocks_per_member *
5428 used_disks;
5429 /* round array size down to closest MB
5430 */
5431 array_blocks = (array_blocks
5432 >> SECT_PER_MB_SHIFT)
5433 << SECT_PER_MB_SHIFT;
5434 a->info.custom_array_size = array_blocks;
5435 /* encourage manager to update array
5436 * size
5437 */
5438
5439 a->check_reshape = 1;
5440 }
5441 /* finalize online capacity expansion/reshape */
5442 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5443 imsm_set_disk(a,
5444 mdi->disk.raid_disk,
5445 mdi->curr_state);
5446
5447 imsm_progress_container_reshape(super);
5448 }
5449 }
5450 }
5451
5452 /* before we activate this array handle any missing disks */
5453 if (consistent == 2)
5454 handle_missing(super, dev);
5455
5456 if (consistent == 2 &&
5457 (!is_resync_complete(&a->info) ||
5458 map_state != IMSM_T_STATE_NORMAL ||
5459 dev->vol.migr_state))
5460 consistent = 0;
5461
5462 if (is_resync_complete(&a->info)) {
5463 /* complete intialization / resync,
5464 * recovery and interrupted recovery is completed in
5465 * ->set_disk
5466 */
5467 if (is_resyncing(dev)) {
5468 dprintf("imsm: mark resync done\n");
5469 end_migration(dev, map_state);
5470 super->updates_pending++;
5471 a->last_checkpoint = 0;
5472 }
5473 } else if (!is_resyncing(dev) && !failed) {
5474 /* mark the start of the init process if nothing is failed */
5475 dprintf("imsm: mark resync start\n");
5476 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
5477 migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT);
5478 else
5479 migrate(dev, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
5480 super->updates_pending++;
5481 }
5482
5483 mark_checkpoint:
5484 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5485 blocks_per_unit = blocks_per_migr_unit(dev);
5486 if (blocks_per_unit) {
5487 __u32 units32;
5488 __u64 units;
5489
5490 units = a->last_checkpoint / blocks_per_unit;
5491 units32 = units;
5492
5493 /* check that we did not overflow 32-bits, and that
5494 * curr_migr_unit needs updating
5495 */
5496 if (units32 == units &&
5497 units32 != 0 &&
5498 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5499 dprintf("imsm: mark checkpoint (%u)\n", units32);
5500 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5501 super->updates_pending++;
5502 }
5503 }
5504
5505 /* mark dirty / clean */
5506 if (dev->vol.dirty != !consistent) {
5507 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
5508 if (consistent)
5509 dev->vol.dirty = 0;
5510 else
5511 dev->vol.dirty = 1;
5512 super->updates_pending++;
5513 }
5514
5515 return consistent;
5516 }
5517
5518 static void imsm_set_disk(struct active_array *a, int n, int state)
5519 {
5520 int inst = a->info.container_member;
5521 struct intel_super *super = a->container->sb;
5522 struct imsm_dev *dev = get_imsm_dev(super, inst);
5523 struct imsm_map *map = get_imsm_map(dev, 0);
5524 struct imsm_disk *disk;
5525 int failed;
5526 __u32 ord;
5527 __u8 map_state;
5528
5529 if (n > map->num_members)
5530 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5531 n, map->num_members - 1);
5532
5533 if (n < 0)
5534 return;
5535
5536 dprintf("imsm: set_disk %d:%x\n", n, state);
5537
5538 ord = get_imsm_ord_tbl_ent(dev, n, -1);
5539 disk = get_imsm_disk(super, ord_to_idx(ord));
5540
5541 /* check for new failures */
5542 if (state & DS_FAULTY) {
5543 if (mark_failure(dev, disk, ord_to_idx(ord)))
5544 super->updates_pending++;
5545 }
5546
5547 /* check if in_sync */
5548 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
5549 struct imsm_map *migr_map = get_imsm_map(dev, 1);
5550
5551 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
5552 super->updates_pending++;
5553 }
5554
5555 failed = imsm_count_failed(super, dev);
5556 map_state = imsm_check_degraded(super, dev, failed);
5557
5558 /* check if recovery complete, newly degraded, or failed */
5559 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
5560 end_migration(dev, map_state);
5561 map = get_imsm_map(dev, 0);
5562 map->failed_disk_num = ~0;
5563 super->updates_pending++;
5564 a->last_checkpoint = 0;
5565 } else if (map_state == IMSM_T_STATE_DEGRADED &&
5566 map->map_state != map_state &&
5567 !dev->vol.migr_state) {
5568 dprintf("imsm: mark degraded\n");
5569 map->map_state = map_state;
5570 super->updates_pending++;
5571 a->last_checkpoint = 0;
5572 } else if (map_state == IMSM_T_STATE_FAILED &&
5573 map->map_state != map_state) {
5574 dprintf("imsm: mark failed\n");
5575 end_migration(dev, map_state);
5576 super->updates_pending++;
5577 a->last_checkpoint = 0;
5578 } else if (is_gen_migration(dev)) {
5579 dprintf("imsm: Detected General Migration in state: ");
5580 if (map_state == IMSM_T_STATE_NORMAL) {
5581 end_migration(dev, map_state);
5582 map = get_imsm_map(dev, 0);
5583 map->failed_disk_num = ~0;
5584 dprintf("normal\n");
5585 } else {
5586 if (map_state == IMSM_T_STATE_DEGRADED) {
5587 printf("degraded\n");
5588 end_migration(dev, map_state);
5589 } else {
5590 dprintf("failed\n");
5591 }
5592 map->map_state = map_state;
5593 }
5594 super->updates_pending++;
5595 }
5596 }
5597
5598 static int store_imsm_mpb(int fd, struct imsm_super *mpb)
5599 {
5600 void *buf = mpb;
5601 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
5602 unsigned long long dsize;
5603 unsigned long long sectors;
5604
5605 get_dev_size(fd, NULL, &dsize);
5606
5607 if (mpb_size > 512) {
5608 /* -1 to account for anchor */
5609 sectors = mpb_sectors(mpb) - 1;
5610
5611 /* write the extended mpb to the sectors preceeding the anchor */
5612 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
5613 return 1;
5614
5615 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
5616 != 512 * sectors)
5617 return 1;
5618 }
5619
5620 /* first block is stored on second to last sector of the disk */
5621 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
5622 return 1;
5623
5624 if (write(fd, buf, 512) != 512)
5625 return 1;
5626
5627 return 0;
5628 }
5629
5630 static void imsm_sync_metadata(struct supertype *container)
5631 {
5632 struct intel_super *super = container->sb;
5633
5634 dprintf("sync metadata: %d\n", super->updates_pending);
5635 if (!super->updates_pending)
5636 return;
5637
5638 write_super_imsm(container, 0);
5639
5640 super->updates_pending = 0;
5641 }
5642
5643 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
5644 {
5645 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
5646 int i = get_imsm_disk_idx(dev, idx, -1);
5647 struct dl *dl;
5648
5649 for (dl = super->disks; dl; dl = dl->next)
5650 if (dl->index == i)
5651 break;
5652
5653 if (dl && is_failed(&dl->disk))
5654 dl = NULL;
5655
5656 if (dl)
5657 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
5658
5659 return dl;
5660 }
5661
5662 static struct dl *imsm_add_spare(struct intel_super *super, int slot,
5663 struct active_array *a, int activate_new,
5664 struct mdinfo *additional_test_list)
5665 {
5666 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
5667 int idx = get_imsm_disk_idx(dev, slot, -1);
5668 struct imsm_super *mpb = super->anchor;
5669 struct imsm_map *map;
5670 unsigned long long pos;
5671 struct mdinfo *d;
5672 struct extent *ex;
5673 int i, j;
5674 int found;
5675 __u32 array_start = 0;
5676 __u32 array_end = 0;
5677 struct dl *dl;
5678 struct mdinfo *test_list;
5679
5680 for (dl = super->disks; dl; dl = dl->next) {
5681 /* If in this array, skip */
5682 for (d = a->info.devs ; d ; d = d->next)
5683 if (d->state_fd >= 0 &&
5684 d->disk.major == dl->major &&
5685 d->disk.minor == dl->minor) {
5686 dprintf("%x:%x already in array\n",
5687 dl->major, dl->minor);
5688 break;
5689 }
5690 if (d)
5691 continue;
5692 test_list = additional_test_list;
5693 while (test_list) {
5694 if (test_list->disk.major == dl->major &&
5695 test_list->disk.minor == dl->minor) {
5696 dprintf("%x:%x already in additional test list\n",
5697 dl->major, dl->minor);
5698 break;
5699 }
5700 test_list = test_list->next;
5701 }
5702 if (test_list)
5703 continue;
5704
5705 /* skip in use or failed drives */
5706 if (is_failed(&dl->disk) || idx == dl->index ||
5707 dl->index == -2) {
5708 dprintf("%x:%x status (failed: %d index: %d)\n",
5709 dl->major, dl->minor, is_failed(&dl->disk), idx);
5710 continue;
5711 }
5712
5713 /* skip pure spares when we are looking for partially
5714 * assimilated drives
5715 */
5716 if (dl->index == -1 && !activate_new)
5717 continue;
5718
5719 /* Does this unused device have the requisite free space?
5720 * It needs to be able to cover all member volumes
5721 */
5722 ex = get_extents(super, dl);
5723 if (!ex) {
5724 dprintf("cannot get extents\n");
5725 continue;
5726 }
5727 for (i = 0; i < mpb->num_raid_devs; i++) {
5728 dev = get_imsm_dev(super, i);
5729 map = get_imsm_map(dev, 0);
5730
5731 /* check if this disk is already a member of
5732 * this array
5733 */
5734 if (get_imsm_disk_slot(map, dl->index) >= 0)
5735 continue;
5736
5737 found = 0;
5738 j = 0;
5739 pos = 0;
5740 array_start = __le32_to_cpu(map->pba_of_lba0);
5741 array_end = array_start +
5742 __le32_to_cpu(map->blocks_per_member) - 1;
5743
5744 do {
5745 /* check that we can start at pba_of_lba0 with
5746 * blocks_per_member of space
5747 */
5748 if (array_start >= pos && array_end < ex[j].start) {
5749 found = 1;
5750 break;
5751 }
5752 pos = ex[j].start + ex[j].size;
5753 j++;
5754 } while (ex[j-1].size);
5755
5756 if (!found)
5757 break;
5758 }
5759
5760 free(ex);
5761 if (i < mpb->num_raid_devs) {
5762 dprintf("%x:%x does not have %u to %u available\n",
5763 dl->major, dl->minor, array_start, array_end);
5764 /* No room */
5765 continue;
5766 }
5767 return dl;
5768 }
5769
5770 return dl;
5771 }
5772
5773
5774 static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
5775 {
5776 struct imsm_dev *dev2;
5777 struct imsm_map *map;
5778 struct dl *idisk;
5779 int slot;
5780 int idx;
5781 __u8 state;
5782
5783 dev2 = get_imsm_dev(cont->sb, dev_idx);
5784 if (dev2) {
5785 state = imsm_check_degraded(cont->sb, dev2, failed);
5786 if (state == IMSM_T_STATE_FAILED) {
5787 map = get_imsm_map(dev2, 0);
5788 if (!map)
5789 return 1;
5790 for (slot = 0; slot < map->num_members; slot++) {
5791 /*
5792 * Check if failed disks are deleted from intel
5793 * disk list or are marked to be deleted
5794 */
5795 idx = get_imsm_disk_idx(dev2, slot, -1);
5796 idisk = get_imsm_dl_disk(cont->sb, idx);
5797 /*
5798 * Do not rebuild the array if failed disks
5799 * from failed sub-array are not removed from
5800 * container.
5801 */
5802 if (idisk &&
5803 is_failed(&idisk->disk) &&
5804 (idisk->action != DISK_REMOVE))
5805 return 0;
5806 }
5807 }
5808 }
5809 return 1;
5810 }
5811
5812 static struct mdinfo *imsm_activate_spare(struct active_array *a,
5813 struct metadata_update **updates)
5814 {
5815 /**
5816 * Find a device with unused free space and use it to replace a
5817 * failed/vacant region in an array. We replace failed regions one a
5818 * array at a time. The result is that a new spare disk will be added
5819 * to the first failed array and after the monitor has finished
5820 * propagating failures the remainder will be consumed.
5821 *
5822 * FIXME add a capability for mdmon to request spares from another
5823 * container.
5824 */
5825
5826 struct intel_super *super = a->container->sb;
5827 int inst = a->info.container_member;
5828 struct imsm_dev *dev = get_imsm_dev(super, inst);
5829 struct imsm_map *map = get_imsm_map(dev, 0);
5830 int failed = a->info.array.raid_disks;
5831 struct mdinfo *rv = NULL;
5832 struct mdinfo *d;
5833 struct mdinfo *di;
5834 struct metadata_update *mu;
5835 struct dl *dl;
5836 struct imsm_update_activate_spare *u;
5837 int num_spares = 0;
5838 int i;
5839 int allowed;
5840
5841 for (d = a->info.devs ; d ; d = d->next) {
5842 if ((d->curr_state & DS_FAULTY) &&
5843 d->state_fd >= 0)
5844 /* wait for Removal to happen */
5845 return NULL;
5846 if (d->state_fd >= 0)
5847 failed--;
5848 }
5849
5850 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
5851 inst, failed, a->info.array.raid_disks, a->info.array.level);
5852
5853 if (dev->vol.migr_state &&
5854 dev->vol.migr_type == MIGR_GEN_MIGR)
5855 /* No repair during migration */
5856 return NULL;
5857
5858 if (a->info.array.level == 4)
5859 /* No repair for takeovered array
5860 * imsm doesn't support raid4
5861 */
5862 return NULL;
5863
5864 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
5865 return NULL;
5866
5867 /*
5868 * If there are any failed disks check state of the other volume.
5869 * Block rebuild if the another one is failed until failed disks
5870 * are removed from container.
5871 */
5872 if (failed) {
5873 dprintf("found failed disks in %s, check if there another"
5874 "failed sub-array.\n",
5875 dev->volume);
5876 /* check if states of the other volumes allow for rebuild */
5877 for (i = 0; i < super->anchor->num_raid_devs; i++) {
5878 if (i != inst) {
5879 allowed = imsm_rebuild_allowed(a->container,
5880 i, failed);
5881 if (!allowed)
5882 return NULL;
5883 }
5884 }
5885 }
5886
5887 /* For each slot, if it is not working, find a spare */
5888 for (i = 0; i < a->info.array.raid_disks; i++) {
5889 for (d = a->info.devs ; d ; d = d->next)
5890 if (d->disk.raid_disk == i)
5891 break;
5892 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
5893 if (d && (d->state_fd >= 0))
5894 continue;
5895
5896 /*
5897 * OK, this device needs recovery. Try to re-add the
5898 * previous occupant of this slot, if this fails see if
5899 * we can continue the assimilation of a spare that was
5900 * partially assimilated, finally try to activate a new
5901 * spare.
5902 */
5903 dl = imsm_readd(super, i, a);
5904 if (!dl)
5905 dl = imsm_add_spare(super, i, a, 0, NULL);
5906 if (!dl)
5907 dl = imsm_add_spare(super, i, a, 1, NULL);
5908 if (!dl)
5909 continue;
5910
5911 /* found a usable disk with enough space */
5912 di = malloc(sizeof(*di));
5913 if (!di)
5914 continue;
5915 memset(di, 0, sizeof(*di));
5916
5917 /* dl->index will be -1 in the case we are activating a
5918 * pristine spare. imsm_process_update() will create a
5919 * new index in this case. Once a disk is found to be
5920 * failed in all member arrays it is kicked from the
5921 * metadata
5922 */
5923 di->disk.number = dl->index;
5924
5925 /* (ab)use di->devs to store a pointer to the device
5926 * we chose
5927 */
5928 di->devs = (struct mdinfo *) dl;
5929
5930 di->disk.raid_disk = i;
5931 di->disk.major = dl->major;
5932 di->disk.minor = dl->minor;
5933 di->disk.state = 0;
5934 di->recovery_start = 0;
5935 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
5936 di->component_size = a->info.component_size;
5937 di->container_member = inst;
5938 super->random = random32();
5939 di->next = rv;
5940 rv = di;
5941 num_spares++;
5942 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
5943 i, di->data_offset);
5944
5945 break;
5946 }
5947
5948 if (!rv)
5949 /* No spares found */
5950 return rv;
5951 /* Now 'rv' has a list of devices to return.
5952 * Create a metadata_update record to update the
5953 * disk_ord_tbl for the array
5954 */
5955 mu = malloc(sizeof(*mu));
5956 if (mu) {
5957 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
5958 if (mu->buf == NULL) {
5959 free(mu);
5960 mu = NULL;
5961 }
5962 }
5963 if (!mu) {
5964 while (rv) {
5965 struct mdinfo *n = rv->next;
5966
5967 free(rv);
5968 rv = n;
5969 }
5970 return NULL;
5971 }
5972
5973 mu->space = NULL;
5974 mu->space_list = NULL;
5975 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
5976 mu->next = *updates;
5977 u = (struct imsm_update_activate_spare *) mu->buf;
5978
5979 for (di = rv ; di ; di = di->next) {
5980 u->type = update_activate_spare;
5981 u->dl = (struct dl *) di->devs;
5982 di->devs = NULL;
5983 u->slot = di->disk.raid_disk;
5984 u->array = inst;
5985 u->next = u + 1;
5986 u++;
5987 }
5988 (u-1)->next = NULL;
5989 *updates = mu;
5990
5991 return rv;
5992 }
5993
5994 static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
5995 {
5996 struct imsm_dev *dev = get_imsm_dev(super, idx);
5997 struct imsm_map *map = get_imsm_map(dev, 0);
5998 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
5999 struct disk_info *inf = get_disk_info(u);
6000 struct imsm_disk *disk;
6001 int i;
6002 int j;
6003
6004 for (i = 0; i < map->num_members; i++) {
6005 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
6006 for (j = 0; j < new_map->num_members; j++)
6007 if (serialcmp(disk->serial, inf[j].serial) == 0)
6008 return 1;
6009 }
6010
6011 return 0;
6012 }
6013
6014
6015 static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
6016 {
6017 struct dl *dl = NULL;
6018 for (dl = super->disks; dl; dl = dl->next)
6019 if ((dl->major == major) && (dl->minor == minor))
6020 return dl;
6021 return NULL;
6022 }
6023
6024 static int remove_disk_super(struct intel_super *super, int major, int minor)
6025 {
6026 struct dl *prev = NULL;
6027 struct dl *dl;
6028
6029 prev = NULL;
6030 for (dl = super->disks; dl; dl = dl->next) {
6031 if ((dl->major == major) && (dl->minor == minor)) {
6032 /* remove */
6033 if (prev)
6034 prev->next = dl->next;
6035 else
6036 super->disks = dl->next;
6037 dl->next = NULL;
6038 __free_imsm_disk(dl);
6039 dprintf("%s: removed %x:%x\n",
6040 __func__, major, minor);
6041 break;
6042 }
6043 prev = dl;
6044 }
6045 return 0;
6046 }
6047
6048 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
6049
6050 static int add_remove_disk_update(struct intel_super *super)
6051 {
6052 int check_degraded = 0;
6053 struct dl *disk = NULL;
6054 /* add/remove some spares to/from the metadata/contrainer */
6055 while (super->disk_mgmt_list) {
6056 struct dl *disk_cfg;
6057
6058 disk_cfg = super->disk_mgmt_list;
6059 super->disk_mgmt_list = disk_cfg->next;
6060 disk_cfg->next = NULL;
6061
6062 if (disk_cfg->action == DISK_ADD) {
6063 disk_cfg->next = super->disks;
6064 super->disks = disk_cfg;
6065 check_degraded = 1;
6066 dprintf("%s: added %x:%x\n",
6067 __func__, disk_cfg->major,
6068 disk_cfg->minor);
6069 } else if (disk_cfg->action == DISK_REMOVE) {
6070 dprintf("Disk remove action processed: %x.%x\n",
6071 disk_cfg->major, disk_cfg->minor);
6072 disk = get_disk_super(super,
6073 disk_cfg->major,
6074 disk_cfg->minor);
6075 if (disk) {
6076 /* store action status */
6077 disk->action = DISK_REMOVE;
6078 /* remove spare disks only */
6079 if (disk->index == -1) {
6080 remove_disk_super(super,
6081 disk_cfg->major,
6082 disk_cfg->minor);
6083 }
6084 }
6085 /* release allocate disk structure */
6086 __free_imsm_disk(disk_cfg);
6087 }
6088 }
6089 return check_degraded;
6090 }
6091
6092 static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
6093 struct intel_super *super,
6094 void ***space_list)
6095 {
6096 struct dl *new_disk;
6097 struct intel_dev *id;
6098 int i;
6099 int delta_disks = u->new_raid_disks - u->old_raid_disks;
6100 int disk_count = u->old_raid_disks;
6101 void **tofree = NULL;
6102 int devices_to_reshape = 1;
6103 struct imsm_super *mpb = super->anchor;
6104 int ret_val = 0;
6105 unsigned int dev_id;
6106
6107 dprintf("imsm: apply_reshape_container_disks_update()\n");
6108
6109 /* enable spares to use in array */
6110 for (i = 0; i < delta_disks; i++) {
6111 new_disk = get_disk_super(super,
6112 major(u->new_disks[i]),
6113 minor(u->new_disks[i]));
6114 dprintf("imsm: new disk for reshape is: %i:%i "
6115 "(%p, index = %i)\n",
6116 major(u->new_disks[i]), minor(u->new_disks[i]),
6117 new_disk, new_disk->index);
6118 if ((new_disk == NULL) ||
6119 ((new_disk->index >= 0) &&
6120 (new_disk->index < u->old_raid_disks)))
6121 goto update_reshape_exit;
6122 new_disk->index = disk_count++;
6123 /* slot to fill in autolayout
6124 */
6125 new_disk->raiddisk = new_disk->index;
6126 new_disk->disk.status |=
6127 CONFIGURED_DISK;
6128 new_disk->disk.status &= ~SPARE_DISK;
6129 }
6130
6131 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6132 mpb->num_raid_devs);
6133 /* manage changes in volume
6134 */
6135 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
6136 void **sp = *space_list;
6137 struct imsm_dev *newdev;
6138 struct imsm_map *newmap, *oldmap;
6139
6140 for (id = super->devlist ; id; id = id->next) {
6141 if (id->index == dev_id)
6142 break;
6143 }
6144 if (id == NULL)
6145 break;
6146 if (!sp)
6147 continue;
6148 *space_list = *sp;
6149 newdev = (void*)sp;
6150 /* Copy the dev, but not (all of) the map */
6151 memcpy(newdev, id->dev, sizeof(*newdev));
6152 oldmap = get_imsm_map(id->dev, 0);
6153 newmap = get_imsm_map(newdev, 0);
6154 /* Copy the current map */
6155 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6156 /* update one device only
6157 */
6158 if (devices_to_reshape) {
6159 dprintf("imsm: modifying subdev: %i\n",
6160 id->index);
6161 devices_to_reshape--;
6162 newdev->vol.migr_state = 1;
6163 newdev->vol.curr_migr_unit = 0;
6164 newdev->vol.migr_type = MIGR_GEN_MIGR;
6165 newmap->num_members = u->new_raid_disks;
6166 for (i = 0; i < delta_disks; i++) {
6167 set_imsm_ord_tbl_ent(newmap,
6168 u->old_raid_disks + i,
6169 u->old_raid_disks + i);
6170 }
6171 /* New map is correct, now need to save old map
6172 */
6173 newmap = get_imsm_map(newdev, 1);
6174 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6175
6176 imsm_set_array_size(newdev);
6177 }
6178
6179 sp = (void **)id->dev;
6180 id->dev = newdev;
6181 *sp = tofree;
6182 tofree = sp;
6183 }
6184 if (tofree)
6185 *space_list = tofree;
6186 ret_val = 1;
6187
6188 update_reshape_exit:
6189
6190 return ret_val;
6191 }
6192
6193 static int apply_takeover_update(struct imsm_update_takeover *u,
6194 struct intel_super *super,
6195 void ***space_list)
6196 {
6197 struct imsm_dev *dev = NULL;
6198 struct intel_dev *dv;
6199 struct imsm_dev *dev_new;
6200 struct imsm_map *map;
6201 struct dl *dm, *du;
6202 int i;
6203
6204 for (dv = super->devlist; dv; dv = dv->next)
6205 if (dv->index == (unsigned int)u->subarray) {
6206 dev = dv->dev;
6207 break;
6208 }
6209
6210 if (dev == NULL)
6211 return 0;
6212
6213 map = get_imsm_map(dev, 0);
6214
6215 if (u->direction == R10_TO_R0) {
6216 /* Number of failed disks must be half of initial disk number */
6217 if (imsm_count_failed(super, dev) != (map->num_members / 2))
6218 return 0;
6219
6220 /* iterate through devices to mark removed disks as spare */
6221 for (dm = super->disks; dm; dm = dm->next) {
6222 if (dm->disk.status & FAILED_DISK) {
6223 int idx = dm->index;
6224 /* update indexes on the disk list */
6225 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6226 the index values will end up being correct.... NB */
6227 for (du = super->disks; du; du = du->next)
6228 if (du->index > idx)
6229 du->index--;
6230 /* mark as spare disk */
6231 dm->disk.status = SPARE_DISK;
6232 dm->index = -1;
6233 }
6234 }
6235 /* update map */
6236 map->num_members = map->num_members / 2;
6237 map->map_state = IMSM_T_STATE_NORMAL;
6238 map->num_domains = 1;
6239 map->raid_level = 0;
6240 map->failed_disk_num = -1;
6241 }
6242
6243 if (u->direction == R0_TO_R10) {
6244 void **space;
6245 /* update slots in current disk list */
6246 for (dm = super->disks; dm; dm = dm->next) {
6247 if (dm->index >= 0)
6248 dm->index *= 2;
6249 }
6250 /* create new *missing* disks */
6251 for (i = 0; i < map->num_members; i++) {
6252 space = *space_list;
6253 if (!space)
6254 continue;
6255 *space_list = *space;
6256 du = (void *)space;
6257 memcpy(du, super->disks, sizeof(*du));
6258 du->fd = -1;
6259 du->minor = 0;
6260 du->major = 0;
6261 du->index = (i * 2) + 1;
6262 sprintf((char *)du->disk.serial,
6263 " MISSING_%d", du->index);
6264 sprintf((char *)du->serial,
6265 "MISSING_%d", du->index);
6266 du->next = super->missing;
6267 super->missing = du;
6268 }
6269 /* create new dev and map */
6270 space = *space_list;
6271 if (!space)
6272 return 0;
6273 *space_list = *space;
6274 dev_new = (void *)space;
6275 memcpy(dev_new, dev, sizeof(*dev));
6276 /* update new map */
6277 map = get_imsm_map(dev_new, 0);
6278 map->num_members = map->num_members * 2;
6279 map->map_state = IMSM_T_STATE_DEGRADED;
6280 map->num_domains = 2;
6281 map->raid_level = 1;
6282 /* replace dev<->dev_new */
6283 dv->dev = dev_new;
6284 }
6285 /* update disk order table */
6286 for (du = super->disks; du; du = du->next)
6287 if (du->index >= 0)
6288 set_imsm_ord_tbl_ent(map, du->index, du->index);
6289 for (du = super->missing; du; du = du->next)
6290 if (du->index >= 0) {
6291 set_imsm_ord_tbl_ent(map, du->index, du->index);
6292 mark_missing(dev_new, &du->disk, du->index);
6293 }
6294
6295 return 1;
6296 }
6297
6298 static void imsm_process_update(struct supertype *st,
6299 struct metadata_update *update)
6300 {
6301 /**
6302 * crack open the metadata_update envelope to find the update record
6303 * update can be one of:
6304 * update_reshape_container_disks - all the arrays in the container
6305 * are being reshaped to have more devices. We need to mark
6306 * the arrays for general migration and convert selected spares
6307 * into active devices.
6308 * update_activate_spare - a spare device has replaced a failed
6309 * device in an array, update the disk_ord_tbl. If this disk is
6310 * present in all member arrays then also clear the SPARE_DISK
6311 * flag
6312 * update_create_array
6313 * update_kill_array
6314 * update_rename_array
6315 * update_add_remove_disk
6316 */
6317 struct intel_super *super = st->sb;
6318 struct imsm_super *mpb;
6319 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6320
6321 /* update requires a larger buf but the allocation failed */
6322 if (super->next_len && !super->next_buf) {
6323 super->next_len = 0;
6324 return;
6325 }
6326
6327 if (super->next_buf) {
6328 memcpy(super->next_buf, super->buf, super->len);
6329 free(super->buf);
6330 super->len = super->next_len;
6331 super->buf = super->next_buf;
6332
6333 super->next_len = 0;
6334 super->next_buf = NULL;
6335 }
6336
6337 mpb = super->anchor;
6338
6339 switch (type) {
6340 case update_takeover: {
6341 struct imsm_update_takeover *u = (void *)update->buf;
6342 if (apply_takeover_update(u, super, &update->space_list)) {
6343 imsm_update_version_info(super);
6344 super->updates_pending++;
6345 }
6346 break;
6347 }
6348
6349 case update_reshape_container_disks: {
6350 struct imsm_update_reshape *u = (void *)update->buf;
6351 if (apply_reshape_container_disks_update(
6352 u, super, &update->space_list))
6353 super->updates_pending++;
6354 break;
6355 }
6356 case update_reshape_migration: {
6357 break;
6358 }
6359 case update_activate_spare: {
6360 struct imsm_update_activate_spare *u = (void *) update->buf;
6361 struct imsm_dev *dev = get_imsm_dev(super, u->array);
6362 struct imsm_map *map = get_imsm_map(dev, 0);
6363 struct imsm_map *migr_map;
6364 struct active_array *a;
6365 struct imsm_disk *disk;
6366 __u8 to_state;
6367 struct dl *dl;
6368 unsigned int found;
6369 int failed;
6370 int victim = get_imsm_disk_idx(dev, u->slot, -1);
6371 int i;
6372
6373 for (dl = super->disks; dl; dl = dl->next)
6374 if (dl == u->dl)
6375 break;
6376
6377 if (!dl) {
6378 fprintf(stderr, "error: imsm_activate_spare passed "
6379 "an unknown disk (index: %d)\n",
6380 u->dl->index);
6381 return;
6382 }
6383
6384 super->updates_pending++;
6385
6386 /* count failures (excluding rebuilds and the victim)
6387 * to determine map[0] state
6388 */
6389 failed = 0;
6390 for (i = 0; i < map->num_members; i++) {
6391 if (i == u->slot)
6392 continue;
6393 disk = get_imsm_disk(super,
6394 get_imsm_disk_idx(dev, i, -1));
6395 if (!disk || is_failed(disk))
6396 failed++;
6397 }
6398
6399 /* adding a pristine spare, assign a new index */
6400 if (dl->index < 0) {
6401 dl->index = super->anchor->num_disks;
6402 super->anchor->num_disks++;
6403 }
6404 disk = &dl->disk;
6405 disk->status |= CONFIGURED_DISK;
6406 disk->status &= ~SPARE_DISK;
6407
6408 /* mark rebuild */
6409 to_state = imsm_check_degraded(super, dev, failed);
6410 map->map_state = IMSM_T_STATE_DEGRADED;
6411 migrate(dev, to_state, MIGR_REBUILD);
6412 migr_map = get_imsm_map(dev, 1);
6413 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6414 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6415
6416 /* update the family_num to mark a new container
6417 * generation, being careful to record the existing
6418 * family_num in orig_family_num to clean up after
6419 * earlier mdadm versions that neglected to set it.
6420 */
6421 if (mpb->orig_family_num == 0)
6422 mpb->orig_family_num = mpb->family_num;
6423 mpb->family_num += super->random;
6424
6425 /* count arrays using the victim in the metadata */
6426 found = 0;
6427 for (a = st->arrays; a ; a = a->next) {
6428 dev = get_imsm_dev(super, a->info.container_member);
6429 map = get_imsm_map(dev, 0);
6430
6431 if (get_imsm_disk_slot(map, victim) >= 0)
6432 found++;
6433 }
6434
6435 /* delete the victim if it is no longer being
6436 * utilized anywhere
6437 */
6438 if (!found) {
6439 struct dl **dlp;
6440
6441 /* We know that 'manager' isn't touching anything,
6442 * so it is safe to delete
6443 */
6444 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
6445 if ((*dlp)->index == victim)
6446 break;
6447
6448 /* victim may be on the missing list */
6449 if (!*dlp)
6450 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
6451 if ((*dlp)->index == victim)
6452 break;
6453 imsm_delete(super, dlp, victim);
6454 }
6455 break;
6456 }
6457 case update_create_array: {
6458 /* someone wants to create a new array, we need to be aware of
6459 * a few races/collisions:
6460 * 1/ 'Create' called by two separate instances of mdadm
6461 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6462 * devices that have since been assimilated via
6463 * activate_spare.
6464 * In the event this update can not be carried out mdadm will
6465 * (FIX ME) notice that its update did not take hold.
6466 */
6467 struct imsm_update_create_array *u = (void *) update->buf;
6468 struct intel_dev *dv;
6469 struct imsm_dev *dev;
6470 struct imsm_map *map, *new_map;
6471 unsigned long long start, end;
6472 unsigned long long new_start, new_end;
6473 int i;
6474 struct disk_info *inf;
6475 struct dl *dl;
6476
6477 /* handle racing creates: first come first serve */
6478 if (u->dev_idx < mpb->num_raid_devs) {
6479 dprintf("%s: subarray %d already defined\n",
6480 __func__, u->dev_idx);
6481 goto create_error;
6482 }
6483
6484 /* check update is next in sequence */
6485 if (u->dev_idx != mpb->num_raid_devs) {
6486 dprintf("%s: can not create array %d expected index %d\n",
6487 __func__, u->dev_idx, mpb->num_raid_devs);
6488 goto create_error;
6489 }
6490
6491 new_map = get_imsm_map(&u->dev, 0);
6492 new_start = __le32_to_cpu(new_map->pba_of_lba0);
6493 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
6494 inf = get_disk_info(u);
6495
6496 /* handle activate_spare versus create race:
6497 * check to make sure that overlapping arrays do not include
6498 * overalpping disks
6499 */
6500 for (i = 0; i < mpb->num_raid_devs; i++) {
6501 dev = get_imsm_dev(super, i);
6502 map = get_imsm_map(dev, 0);
6503 start = __le32_to_cpu(map->pba_of_lba0);
6504 end = start + __le32_to_cpu(map->blocks_per_member);
6505 if ((new_start >= start && new_start <= end) ||
6506 (start >= new_start && start <= new_end))
6507 /* overlap */;
6508 else
6509 continue;
6510
6511 if (disks_overlap(super, i, u)) {
6512 dprintf("%s: arrays overlap\n", __func__);
6513 goto create_error;
6514 }
6515 }
6516
6517 /* check that prepare update was successful */
6518 if (!update->space) {
6519 dprintf("%s: prepare update failed\n", __func__);
6520 goto create_error;
6521 }
6522
6523 /* check that all disks are still active before committing
6524 * changes. FIXME: could we instead handle this by creating a
6525 * degraded array? That's probably not what the user expects,
6526 * so better to drop this update on the floor.
6527 */
6528 for (i = 0; i < new_map->num_members; i++) {
6529 dl = serial_to_dl(inf[i].serial, super);
6530 if (!dl) {
6531 dprintf("%s: disk disappeared\n", __func__);
6532 goto create_error;
6533 }
6534 }
6535
6536 super->updates_pending++;
6537
6538 /* convert spares to members and fixup ord_tbl */
6539 for (i = 0; i < new_map->num_members; i++) {
6540 dl = serial_to_dl(inf[i].serial, super);
6541 if (dl->index == -1) {
6542 dl->index = mpb->num_disks;
6543 mpb->num_disks++;
6544 dl->disk.status |= CONFIGURED_DISK;
6545 dl->disk.status &= ~SPARE_DISK;
6546 }
6547 set_imsm_ord_tbl_ent(new_map, i, dl->index);
6548 }
6549
6550 dv = update->space;
6551 dev = dv->dev;
6552 update->space = NULL;
6553 imsm_copy_dev(dev, &u->dev);
6554 dv->index = u->dev_idx;
6555 dv->next = super->devlist;
6556 super->devlist = dv;
6557 mpb->num_raid_devs++;
6558
6559 imsm_update_version_info(super);
6560 break;
6561 create_error:
6562 /* mdmon knows how to release update->space, but not
6563 * ((struct intel_dev *) update->space)->dev
6564 */
6565 if (update->space) {
6566 dv = update->space;
6567 free(dv->dev);
6568 }
6569 break;
6570 }
6571 case update_kill_array: {
6572 struct imsm_update_kill_array *u = (void *) update->buf;
6573 int victim = u->dev_idx;
6574 struct active_array *a;
6575 struct intel_dev **dp;
6576 struct imsm_dev *dev;
6577
6578 /* sanity check that we are not affecting the uuid of
6579 * active arrays, or deleting an active array
6580 *
6581 * FIXME when immutable ids are available, but note that
6582 * we'll also need to fixup the invalidated/active
6583 * subarray indexes in mdstat
6584 */
6585 for (a = st->arrays; a; a = a->next)
6586 if (a->info.container_member >= victim)
6587 break;
6588 /* by definition if mdmon is running at least one array
6589 * is active in the container, so checking
6590 * mpb->num_raid_devs is just extra paranoia
6591 */
6592 dev = get_imsm_dev(super, victim);
6593 if (a || !dev || mpb->num_raid_devs == 1) {
6594 dprintf("failed to delete subarray-%d\n", victim);
6595 break;
6596 }
6597
6598 for (dp = &super->devlist; *dp;)
6599 if ((*dp)->index == (unsigned)super->current_vol) {
6600 *dp = (*dp)->next;
6601 } else {
6602 if ((*dp)->index > (unsigned)victim)
6603 (*dp)->index--;
6604 dp = &(*dp)->next;
6605 }
6606 mpb->num_raid_devs--;
6607 super->updates_pending++;
6608 break;
6609 }
6610 case update_rename_array: {
6611 struct imsm_update_rename_array *u = (void *) update->buf;
6612 char name[MAX_RAID_SERIAL_LEN+1];
6613 int target = u->dev_idx;
6614 struct active_array *a;
6615 struct imsm_dev *dev;
6616
6617 /* sanity check that we are not affecting the uuid of
6618 * an active array
6619 */
6620 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
6621 name[MAX_RAID_SERIAL_LEN] = '\0';
6622 for (a = st->arrays; a; a = a->next)
6623 if (a->info.container_member == target)
6624 break;
6625 dev = get_imsm_dev(super, u->dev_idx);
6626 if (a || !dev || !check_name(super, name, 1)) {
6627 dprintf("failed to rename subarray-%d\n", target);
6628 break;
6629 }
6630
6631 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
6632 super->updates_pending++;
6633 break;
6634 }
6635 case update_add_remove_disk: {
6636 /* we may be able to repair some arrays if disks are
6637 * being added, check teh status of add_remove_disk
6638 * if discs has been added.
6639 */
6640 if (add_remove_disk_update(super)) {
6641 struct active_array *a;
6642
6643 super->updates_pending++;
6644 for (a = st->arrays; a; a = a->next)
6645 a->check_degraded = 1;
6646 }
6647 break;
6648 }
6649 default:
6650 fprintf(stderr, "error: unsuported process update type:"
6651 "(type: %d)\n", type);
6652 }
6653 }
6654
6655 static void imsm_prepare_update(struct supertype *st,
6656 struct metadata_update *update)
6657 {
6658 /**
6659 * Allocate space to hold new disk entries, raid-device entries or a new
6660 * mpb if necessary. The manager synchronously waits for updates to
6661 * complete in the monitor, so new mpb buffers allocated here can be
6662 * integrated by the monitor thread without worrying about live pointers
6663 * in the manager thread.
6664 */
6665 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6666 struct intel_super *super = st->sb;
6667 struct imsm_super *mpb = super->anchor;
6668 size_t buf_len;
6669 size_t len = 0;
6670
6671 switch (type) {
6672 case update_takeover: {
6673 struct imsm_update_takeover *u = (void *)update->buf;
6674 if (u->direction == R0_TO_R10) {
6675 void **tail = (void **)&update->space_list;
6676 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
6677 struct imsm_map *map = get_imsm_map(dev, 0);
6678 int num_members = map->num_members;
6679 void *space;
6680 int size, i;
6681 int err = 0;
6682 /* allocate memory for added disks */
6683 for (i = 0; i < num_members; i++) {
6684 size = sizeof(struct dl);
6685 space = malloc(size);
6686 if (!space) {
6687 err++;
6688 break;
6689 }
6690 *tail = space;
6691 tail = space;
6692 *tail = NULL;
6693 }
6694 /* allocate memory for new device */
6695 size = sizeof_imsm_dev(super->devlist->dev, 0) +
6696 (num_members * sizeof(__u32));
6697 space = malloc(size);
6698 if (!space)
6699 err++;
6700 else {
6701 *tail = space;
6702 tail = space;
6703 *tail = NULL;
6704 }
6705 if (!err) {
6706 len = disks_to_mpb_size(num_members * 2);
6707 } else {
6708 /* if allocation didn't success, free buffer */
6709 while (update->space_list) {
6710 void **sp = update->space_list;
6711 update->space_list = *sp;
6712 free(sp);
6713 }
6714 }
6715 }
6716
6717 break;
6718 }
6719 case update_reshape_container_disks: {
6720 /* Every raid device in the container is about to
6721 * gain some more devices, and we will enter a
6722 * reconfiguration.
6723 * So each 'imsm_map' will be bigger, and the imsm_vol
6724 * will now hold 2 of them.
6725 * Thus we need new 'struct imsm_dev' allocations sized
6726 * as sizeof_imsm_dev but with more devices in both maps.
6727 */
6728 struct imsm_update_reshape *u = (void *)update->buf;
6729 struct intel_dev *dl;
6730 void **space_tail = (void**)&update->space_list;
6731
6732 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
6733
6734 for (dl = super->devlist; dl; dl = dl->next) {
6735 int size = sizeof_imsm_dev(dl->dev, 1);
6736 void *s;
6737 if (u->new_raid_disks > u->old_raid_disks)
6738 size += sizeof(__u32)*2*
6739 (u->new_raid_disks - u->old_raid_disks);
6740 s = malloc(size);
6741 if (!s)
6742 break;
6743 *space_tail = s;
6744 space_tail = s;
6745 *space_tail = NULL;
6746 }
6747
6748 len = disks_to_mpb_size(u->new_raid_disks);
6749 dprintf("New anchor length is %llu\n", (unsigned long long)len);
6750 break;
6751 }
6752 case update_reshape_migration: {
6753 break;
6754 }
6755 case update_create_array: {
6756 struct imsm_update_create_array *u = (void *) update->buf;
6757 struct intel_dev *dv;
6758 struct imsm_dev *dev = &u->dev;
6759 struct imsm_map *map = get_imsm_map(dev, 0);
6760 struct dl *dl;
6761 struct disk_info *inf;
6762 int i;
6763 int activate = 0;
6764
6765 inf = get_disk_info(u);
6766 len = sizeof_imsm_dev(dev, 1);
6767 /* allocate a new super->devlist entry */
6768 dv = malloc(sizeof(*dv));
6769 if (dv) {
6770 dv->dev = malloc(len);
6771 if (dv->dev)
6772 update->space = dv;
6773 else {
6774 free(dv);
6775 update->space = NULL;
6776 }
6777 }
6778
6779 /* count how many spares will be converted to members */
6780 for (i = 0; i < map->num_members; i++) {
6781 dl = serial_to_dl(inf[i].serial, super);
6782 if (!dl) {
6783 /* hmm maybe it failed?, nothing we can do about
6784 * it here
6785 */
6786 continue;
6787 }
6788 if (count_memberships(dl, super) == 0)
6789 activate++;
6790 }
6791 len += activate * sizeof(struct imsm_disk);
6792 break;
6793 default:
6794 break;
6795 }
6796 }
6797
6798 /* check if we need a larger metadata buffer */
6799 if (super->next_buf)
6800 buf_len = super->next_len;
6801 else
6802 buf_len = super->len;
6803
6804 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
6805 /* ok we need a larger buf than what is currently allocated
6806 * if this allocation fails process_update will notice that
6807 * ->next_len is set and ->next_buf is NULL
6808 */
6809 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
6810 if (super->next_buf)
6811 free(super->next_buf);
6812
6813 super->next_len = buf_len;
6814 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
6815 memset(super->next_buf, 0, buf_len);
6816 else
6817 super->next_buf = NULL;
6818 }
6819 }
6820
6821 /* must be called while manager is quiesced */
6822 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
6823 {
6824 struct imsm_super *mpb = super->anchor;
6825 struct dl *iter;
6826 struct imsm_dev *dev;
6827 struct imsm_map *map;
6828 int i, j, num_members;
6829 __u32 ord;
6830
6831 dprintf("%s: deleting device[%d] from imsm_super\n",
6832 __func__, index);
6833
6834 /* shift all indexes down one */
6835 for (iter = super->disks; iter; iter = iter->next)
6836 if (iter->index > (int)index)
6837 iter->index--;
6838 for (iter = super->missing; iter; iter = iter->next)
6839 if (iter->index > (int)index)
6840 iter->index--;
6841
6842 for (i = 0; i < mpb->num_raid_devs; i++) {
6843 dev = get_imsm_dev(super, i);
6844 map = get_imsm_map(dev, 0);
6845 num_members = map->num_members;
6846 for (j = 0; j < num_members; j++) {
6847 /* update ord entries being careful not to propagate
6848 * ord-flags to the first map
6849 */
6850 ord = get_imsm_ord_tbl_ent(dev, j, -1);
6851
6852 if (ord_to_idx(ord) <= index)
6853 continue;
6854
6855 map = get_imsm_map(dev, 0);
6856 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
6857 map = get_imsm_map(dev, 1);
6858 if (map)
6859 set_imsm_ord_tbl_ent(map, j, ord - 1);
6860 }
6861 }
6862
6863 mpb->num_disks--;
6864 super->updates_pending++;
6865 if (*dlp) {
6866 struct dl *dl = *dlp;
6867
6868 *dlp = (*dlp)->next;
6869 __free_imsm_disk(dl);
6870 }
6871 }
6872
6873 static char disk_by_path[] = "/dev/disk/by-path/";
6874
6875 static const char *imsm_get_disk_controller_domain(const char *path)
6876 {
6877 char disk_path[PATH_MAX];
6878 char *drv=NULL;
6879 struct stat st;
6880
6881 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
6882 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
6883 if (stat(disk_path, &st) == 0) {
6884 struct sys_dev* hba;
6885 char *path=NULL;
6886
6887 path = devt_to_devpath(st.st_rdev);
6888 if (path == NULL)
6889 return "unknown";
6890 hba = find_disk_attached_hba(-1, path);
6891 if (hba && hba->type == SYS_DEV_SAS)
6892 drv = "isci";
6893 else if (hba && hba->type == SYS_DEV_SATA)
6894 drv = "ahci";
6895 else
6896 drv = "unknown";
6897 dprintf("path: %s hba: %s attached: %s\n",
6898 path, (hba) ? hba->path : "NULL", drv);
6899 free(path);
6900 if (hba)
6901 free_sys_dev(&hba);
6902 }
6903 return drv;
6904 }
6905
6906 static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
6907 {
6908 char subdev_name[20];
6909 struct mdstat_ent *mdstat;
6910
6911 sprintf(subdev_name, "%d", subdev);
6912 mdstat = mdstat_by_subdev(subdev_name, container);
6913 if (!mdstat)
6914 return -1;
6915
6916 *minor = mdstat->devnum;
6917 free_mdstat(mdstat);
6918 return 0;
6919 }
6920
6921 static int imsm_reshape_is_allowed_on_container(struct supertype *st,
6922 struct geo_params *geo,
6923 int *old_raid_disks)
6924 {
6925 /* currently we only support increasing the number of devices
6926 * for a container. This increases the number of device for each
6927 * member array. They must all be RAID0 or RAID5.
6928 */
6929 int ret_val = 0;
6930 struct mdinfo *info, *member;
6931 int devices_that_can_grow = 0;
6932
6933 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
6934 "st->devnum = (%i)\n",
6935 st->devnum);
6936
6937 if (geo->size != -1 ||
6938 geo->level != UnSet ||
6939 geo->layout != UnSet ||
6940 geo->chunksize != 0 ||
6941 geo->raid_disks == UnSet) {
6942 dprintf("imsm: Container operation is allowed for "
6943 "raid disks number change only.\n");
6944 return ret_val;
6945 }
6946
6947 info = container_content_imsm(st, NULL);
6948 for (member = info; member; member = member->next) {
6949 int result;
6950 int minor;
6951
6952 dprintf("imsm: checking device_num: %i\n",
6953 member->container_member);
6954
6955 if (geo->raid_disks <= member->array.raid_disks) {
6956 /* we work on container for Online Capacity Expansion
6957 * only so raid_disks has to grow
6958 */
6959 dprintf("imsm: for container operation raid disks "
6960 "increase is required\n");
6961 break;
6962 }
6963
6964 if ((info->array.level != 0) &&
6965 (info->array.level != 5)) {
6966 /* we cannot use this container with other raid level
6967 */
6968 dprintf("imsm: for container operation wrong"
6969 " raid level (%i) detected\n",
6970 info->array.level);
6971 break;
6972 } else {
6973 /* check for platform support
6974 * for this raid level configuration
6975 */
6976 struct intel_super *super = st->sb;
6977 if (!is_raid_level_supported(super->orom,
6978 member->array.level,
6979 geo->raid_disks)) {
6980 dprintf("platform does not support raid%d with"
6981 " %d disk%s\n",
6982 info->array.level,
6983 geo->raid_disks,
6984 geo->raid_disks > 1 ? "s" : "");
6985 break;
6986 }
6987 /* check if component size is aligned to chunk size
6988 */
6989 if (info->component_size %
6990 (info->array.chunk_size/512)) {
6991 dprintf("Component size is not aligned to "
6992 "chunk size\n");
6993 break;
6994 }
6995 }
6996
6997 if (*old_raid_disks &&
6998 info->array.raid_disks != *old_raid_disks)
6999 break;
7000 *old_raid_disks = info->array.raid_disks;
7001
7002 /* All raid5 and raid0 volumes in container
7003 * have to be ready for Online Capacity Expansion
7004 * so they need to be assembled. We have already
7005 * checked that no recovery etc is happening.
7006 */
7007 result = imsm_find_array_minor_by_subdev(member->container_member,
7008 st->container_dev,
7009 &minor);
7010 if (result < 0) {
7011 dprintf("imsm: cannot find array\n");
7012 break;
7013 }
7014 devices_that_can_grow++;
7015 }
7016 sysfs_free(info);
7017 if (!member && devices_that_can_grow)
7018 ret_val = 1;
7019
7020 if (ret_val)
7021 dprintf("\tContainer operation allowed\n");
7022 else
7023 dprintf("\tError: %i\n", ret_val);
7024
7025 return ret_val;
7026 }
7027
7028 /* Function: get_spares_for_grow
7029 * Description: Allocates memory and creates list of spare devices
7030 * avaliable in container. Checks if spare drive size is acceptable.
7031 * Parameters: Pointer to the supertype structure
7032 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7033 * NULL if fail
7034 */
7035 static struct mdinfo *get_spares_for_grow(struct supertype *st)
7036 {
7037 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
7038 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
7039 }
7040
7041 /******************************************************************************
7042 * function: imsm_create_metadata_update_for_reshape
7043 * Function creates update for whole IMSM container.
7044 *
7045 ******************************************************************************/
7046 static int imsm_create_metadata_update_for_reshape(
7047 struct supertype *st,
7048 struct geo_params *geo,
7049 int old_raid_disks,
7050 struct imsm_update_reshape **updatep)
7051 {
7052 struct intel_super *super = st->sb;
7053 struct imsm_super *mpb = super->anchor;
7054 int update_memory_size = 0;
7055 struct imsm_update_reshape *u = NULL;
7056 struct mdinfo *spares = NULL;
7057 int i;
7058 int delta_disks = 0;
7059 struct mdinfo *dev;
7060
7061 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7062 geo->raid_disks);
7063
7064 delta_disks = geo->raid_disks - old_raid_disks;
7065
7066 /* size of all update data without anchor */
7067 update_memory_size = sizeof(struct imsm_update_reshape);
7068
7069 /* now add space for spare disks that we need to add. */
7070 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
7071
7072 u = calloc(1, update_memory_size);
7073 if (u == NULL) {
7074 dprintf("error: "
7075 "cannot get memory for imsm_update_reshape update\n");
7076 return 0;
7077 }
7078 u->type = update_reshape_container_disks;
7079 u->old_raid_disks = old_raid_disks;
7080 u->new_raid_disks = geo->raid_disks;
7081
7082 /* now get spare disks list
7083 */
7084 spares = get_spares_for_grow(st);
7085
7086 if (spares == NULL
7087 || delta_disks > spares->array.spare_disks) {
7088 fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices "
7089 "for %s.\n", geo->dev_name);
7090 goto abort;
7091 }
7092
7093 /* we have got spares
7094 * update disk list in imsm_disk list table in anchor
7095 */
7096 dprintf("imsm: %i spares are available.\n\n",
7097 spares->array.spare_disks);
7098
7099 dev = spares->devs;
7100 for (i = 0; i < delta_disks; i++) {
7101 struct dl *dl;
7102
7103 if (dev == NULL)
7104 break;
7105 u->new_disks[i] = makedev(dev->disk.major,
7106 dev->disk.minor);
7107 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
7108 dl->index = mpb->num_disks;
7109 mpb->num_disks++;
7110 dev = dev->next;
7111 }
7112
7113 abort:
7114 /* free spares
7115 */
7116 sysfs_free(spares);
7117
7118 dprintf("imsm: reshape update preparation :");
7119 if (i == delta_disks) {
7120 dprintf(" OK\n");
7121 *updatep = u;
7122 return update_memory_size;
7123 }
7124 free(u);
7125 dprintf(" Error\n");
7126
7127 return 0;
7128 }
7129
7130 /******************************************************************************
7131 * function: imsm_create_metadata_update_for_migration()
7132 * Creates update for IMSM array.
7133 *
7134 ******************************************************************************/
7135 static int imsm_create_metadata_update_for_migration(
7136 struct supertype *st,
7137 struct geo_params *geo,
7138 struct imsm_update_reshape_migration **updatep)
7139 {
7140 struct intel_super *super = st->sb;
7141 int update_memory_size = 0;
7142 struct imsm_update_reshape_migration *u = NULL;
7143 struct imsm_dev *dev;
7144 int previous_level = -1;
7145
7146 dprintf("imsm_create_metadata_update_for_migration(enter)"
7147 " New Level = %i\n", geo->level);
7148
7149 /* size of all update data without anchor */
7150 update_memory_size = sizeof(struct imsm_update_reshape_migration);
7151
7152 u = calloc(1, update_memory_size);
7153 if (u == NULL) {
7154 dprintf("error: cannot get memory for "
7155 "imsm_create_metadata_update_for_migration\n");
7156 return 0;
7157 }
7158 u->type = update_reshape_migration;
7159 u->subdev = super->current_vol;
7160 u->new_level = geo->level;
7161 u->new_layout = geo->layout;
7162 u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
7163 u->new_disks[0] = -1;
7164
7165 dev = get_imsm_dev(super, u->subdev);
7166 if (dev) {
7167 struct imsm_map *map;
7168
7169 map = get_imsm_map(dev, 0);
7170 if (map)
7171 previous_level = map->raid_level;
7172 }
7173 if ((geo->level == 5) && (previous_level == 0)) {
7174 struct mdinfo *spares = NULL;
7175
7176 u->new_raid_disks++;
7177 spares = get_spares_for_grow(st);
7178 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
7179 free(u);
7180 sysfs_free(spares);
7181 update_memory_size = 0;
7182 dprintf("error: cannot get spare device "
7183 "for requested migration");
7184 return 0;
7185 }
7186 sysfs_free(spares);
7187 }
7188 dprintf("imsm: reshape update preparation : OK\n");
7189 *updatep = u;
7190
7191 return update_memory_size;
7192 }
7193
7194 static void imsm_update_metadata_locally(struct supertype *st,
7195 void *buf, int len)
7196 {
7197 struct metadata_update mu;
7198
7199 mu.buf = buf;
7200 mu.len = len;
7201 mu.space = NULL;
7202 mu.space_list = NULL;
7203 mu.next = NULL;
7204 imsm_prepare_update(st, &mu);
7205 imsm_process_update(st, &mu);
7206
7207 while (mu.space_list) {
7208 void **space = mu.space_list;
7209 mu.space_list = *space;
7210 free(space);
7211 }
7212 }
7213
7214 /***************************************************************************
7215 * Function: imsm_analyze_change
7216 * Description: Function analyze change for single volume
7217 * and validate if transition is supported
7218 * Parameters: Geometry parameters, supertype structure
7219 * Returns: Operation type code on success, -1 if fail
7220 ****************************************************************************/
7221 enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
7222 struct geo_params *geo)
7223 {
7224 struct mdinfo info;
7225 int change = -1;
7226 int check_devs = 0;
7227 int chunk;
7228
7229 getinfo_super_imsm_volume(st, &info, NULL);
7230
7231 if ((geo->level != info.array.level) &&
7232 (geo->level >= 0) &&
7233 (geo->level != UnSet)) {
7234 switch (info.array.level) {
7235 case 0:
7236 if (geo->level == 5) {
7237 change = CH_MIGRATION;
7238 check_devs = 1;
7239 }
7240 if (geo->level == 10) {
7241 change = CH_TAKEOVER;
7242 check_devs = 1;
7243 }
7244 break;
7245 case 1:
7246 if (geo->level == 0) {
7247 change = CH_TAKEOVER;
7248 check_devs = 1;
7249 }
7250 break;
7251 case 10:
7252 if (geo->level == 0) {
7253 change = CH_TAKEOVER;
7254 check_devs = 1;
7255 }
7256 break;
7257 }
7258 if (change == -1) {
7259 fprintf(stderr,
7260 Name " Error. Level Migration from %d to %d "
7261 "not supported!\n",
7262 info.array.level, geo->level);
7263 goto analyse_change_exit;
7264 }
7265 } else
7266 geo->level = info.array.level;
7267
7268 if ((geo->layout != info.array.layout)
7269 && ((geo->layout != UnSet) && (geo->layout != -1))) {
7270 change = CH_MIGRATION;
7271 if ((info.array.layout == 0)
7272 && (info.array.level == 5)
7273 && (geo->layout == 5)) {
7274 /* reshape 5 -> 4 */
7275 } else if ((info.array.layout == 5)
7276 && (info.array.level == 5)
7277 && (geo->layout == 0)) {
7278 /* reshape 4 -> 5 */
7279 geo->layout = 0;
7280 geo->level = 5;
7281 } else {
7282 fprintf(stderr,
7283 Name " Error. Layout Migration from %d to %d "
7284 "not supported!\n",
7285 info.array.layout, geo->layout);
7286 change = -1;
7287 goto analyse_change_exit;
7288 }
7289 } else
7290 geo->layout = info.array.layout;
7291
7292 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
7293 && (geo->chunksize != info.array.chunk_size))
7294 change = CH_MIGRATION;
7295 else
7296 geo->chunksize = info.array.chunk_size;
7297
7298 chunk = geo->chunksize / 1024;
7299 if (!validate_geometry_imsm(st,
7300 geo->level,
7301 geo->layout,
7302 geo->raid_disks,
7303 &chunk,
7304 geo->size,
7305 0, 0, 1))
7306 change = -1;
7307
7308 if (check_devs) {
7309 struct intel_super *super = st->sb;
7310 struct imsm_super *mpb = super->anchor;
7311
7312 if (mpb->num_raid_devs > 1) {
7313 fprintf(stderr,
7314 Name " Error. Cannot perform operation on %s"
7315 "- for this operation it MUST be single "
7316 "array in container\n",
7317 geo->dev_name);
7318 change = -1;
7319 }
7320 }
7321
7322 analyse_change_exit:
7323
7324 return change;
7325 }
7326
7327 int imsm_takeover(struct supertype *st, struct geo_params *geo)
7328 {
7329 struct intel_super *super = st->sb;
7330 struct imsm_update_takeover *u;
7331
7332 u = malloc(sizeof(struct imsm_update_takeover));
7333 if (u == NULL)
7334 return 1;
7335
7336 u->type = update_takeover;
7337 u->subarray = super->current_vol;
7338
7339 /* 10->0 transition */
7340 if (geo->level == 0)
7341 u->direction = R10_TO_R0;
7342
7343 /* 0->10 transition */
7344 if (geo->level == 10)
7345 u->direction = R0_TO_R10;
7346
7347 /* update metadata locally */
7348 imsm_update_metadata_locally(st, u,
7349 sizeof(struct imsm_update_takeover));
7350 /* and possibly remotely */
7351 if (st->update_tail)
7352 append_metadata_update(st, u,
7353 sizeof(struct imsm_update_takeover));
7354 else
7355 free(u);
7356
7357 return 0;
7358 }
7359
7360 static int warn_user_about_risk(void)
7361 {
7362 int rv = 0;
7363
7364 fprintf(stderr,
7365 "\nThis is an experimental feature. Data on the RAID volume(s) "
7366 "can be lost!!!\n\n"
7367 "To continue command execution please make sure that\n"
7368 "the grow process will not be interrupted. Use safe power\n"
7369 "supply to avoid unexpected system reboot. Make sure that\n"
7370 "reshaped container is not assembled automatically during\n"
7371 "system boot.\n"
7372 "If reshape is interrupted, assemble array manually\n"
7373 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
7374 "Assembly in scan mode is not possible in such case.\n"
7375 "Growing container with boot array is not possible.\n"
7376 "If boot array reshape is interrupted, whole file system\n"
7377 "can be lost.\n\n");
7378 rv = ask("Do you want to continue? ");
7379 fprintf(stderr, "\n");
7380
7381 return rv;
7382 }
7383
7384 static int imsm_reshape_super(struct supertype *st, long long size, int level,
7385 int layout, int chunksize, int raid_disks,
7386 int delta_disks, char *backup, char *dev,
7387 int verbose)
7388 {
7389 int ret_val = 1;
7390 struct geo_params geo;
7391
7392 dprintf("imsm: reshape_super called.\n");
7393
7394 memset(&geo, 0, sizeof(struct geo_params));
7395
7396 geo.dev_name = dev;
7397 geo.dev_id = st->devnum;
7398 geo.size = size;
7399 geo.level = level;
7400 geo.layout = layout;
7401 geo.chunksize = chunksize;
7402 geo.raid_disks = raid_disks;
7403 if (delta_disks != UnSet)
7404 geo.raid_disks += delta_disks;
7405
7406 dprintf("\tfor level : %i\n", geo.level);
7407 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
7408
7409 if (experimental() == 0)
7410 return ret_val;
7411
7412 if (st->container_dev == st->devnum) {
7413 /* On container level we can only increase number of devices. */
7414 dprintf("imsm: info: Container operation\n");
7415 int old_raid_disks = 0;
7416
7417 /* this warning will be removed when imsm checkpointing
7418 * will be implemented, and restoring from check-point
7419 * operation will be transparent for reboot process
7420 */
7421 if (warn_user_about_risk() == 0)
7422 return ret_val;
7423
7424 if (imsm_reshape_is_allowed_on_container(
7425 st, &geo, &old_raid_disks)) {
7426 struct imsm_update_reshape *u = NULL;
7427 int len;
7428
7429 len = imsm_create_metadata_update_for_reshape(
7430 st, &geo, old_raid_disks, &u);
7431
7432 if (len <= 0) {
7433 dprintf("imsm: Cannot prepare update\n");
7434 goto exit_imsm_reshape_super;
7435 }
7436
7437 ret_val = 0;
7438 /* update metadata locally */
7439 imsm_update_metadata_locally(st, u, len);
7440 /* and possibly remotely */
7441 if (st->update_tail)
7442 append_metadata_update(st, u, len);
7443 else
7444 free(u);
7445
7446 } else {
7447 fprintf(stderr, Name ": (imsm) Operation "
7448 "is not allowed on this container\n");
7449 }
7450 } else {
7451 /* On volume level we support following operations
7452 * - takeover: raid10 -> raid0; raid0 -> raid10
7453 * - chunk size migration
7454 * - migration: raid5 -> raid0; raid0 -> raid5
7455 */
7456 struct intel_super *super = st->sb;
7457 struct intel_dev *dev = super->devlist;
7458 int change, devnum;
7459 dprintf("imsm: info: Volume operation\n");
7460 /* find requested device */
7461 while (dev) {
7462 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
7463 if (devnum == geo.dev_id)
7464 break;
7465 dev = dev->next;
7466 }
7467 if (dev == NULL) {
7468 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
7469 geo.dev_name, geo.dev_id);
7470 goto exit_imsm_reshape_super;
7471 }
7472 super->current_vol = dev->index;
7473 change = imsm_analyze_change(st, &geo);
7474 switch (change) {
7475 case CH_TAKEOVER:
7476 ret_val = imsm_takeover(st, &geo);
7477 break;
7478 case CH_MIGRATION: {
7479 struct imsm_update_reshape_migration *u = NULL;
7480 int len =
7481 imsm_create_metadata_update_for_migration(
7482 st, &geo, &u);
7483 if (len < 1) {
7484 dprintf("imsm: "
7485 "Cannot prepare update\n");
7486 break;
7487 }
7488 ret_val = 0;
7489 /* update metadata locally */
7490 imsm_update_metadata_locally(st, u, len);
7491 /* and possibly remotely */
7492 if (st->update_tail)
7493 append_metadata_update(st, u, len);
7494 else
7495 free(u);
7496 }
7497 break;
7498 default:
7499 ret_val = 1;
7500 }
7501 }
7502
7503 exit_imsm_reshape_super:
7504 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
7505 return ret_val;
7506 }
7507
7508 static int imsm_manage_reshape(
7509 int afd, struct mdinfo *sra, struct reshape *reshape,
7510 struct supertype *st, unsigned long stripes,
7511 int *fds, unsigned long long *offsets,
7512 int dests, int *destfd, unsigned long long *destoffsets)
7513 {
7514 /* Just use child_monitor for now */
7515 return child_monitor(
7516 afd, sra, reshape, st, stripes,
7517 fds, offsets, dests, destfd, destoffsets);
7518 }
7519 #endif /* MDASSEMBLE */
7520
7521 struct superswitch super_imsm = {
7522 #ifndef MDASSEMBLE
7523 .examine_super = examine_super_imsm,
7524 .brief_examine_super = brief_examine_super_imsm,
7525 .brief_examine_subarrays = brief_examine_subarrays_imsm,
7526 .export_examine_super = export_examine_super_imsm,
7527 .detail_super = detail_super_imsm,
7528 .brief_detail_super = brief_detail_super_imsm,
7529 .write_init_super = write_init_super_imsm,
7530 .validate_geometry = validate_geometry_imsm,
7531 .add_to_super = add_to_super_imsm,
7532 .remove_from_super = remove_from_super_imsm,
7533 .detail_platform = detail_platform_imsm,
7534 .kill_subarray = kill_subarray_imsm,
7535 .update_subarray = update_subarray_imsm,
7536 .load_container = load_container_imsm,
7537 .default_geometry = default_geometry_imsm,
7538 .get_disk_controller_domain = imsm_get_disk_controller_domain,
7539 .reshape_super = imsm_reshape_super,
7540 .manage_reshape = imsm_manage_reshape,
7541 #endif
7542 .match_home = match_home_imsm,
7543 .uuid_from_super= uuid_from_super_imsm,
7544 .getinfo_super = getinfo_super_imsm,
7545 .getinfo_super_disks = getinfo_super_disks_imsm,
7546 .update_super = update_super_imsm,
7547
7548 .avail_size = avail_size_imsm,
7549 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
7550
7551 .compare_super = compare_super_imsm,
7552
7553 .load_super = load_super_imsm,
7554 .init_super = init_super_imsm,
7555 .store_super = store_super_imsm,
7556 .free_super = free_super_imsm,
7557 .match_metadata_desc = match_metadata_desc_imsm,
7558 .container_content = container_content_imsm,
7559
7560 .external = 1,
7561 .name = "imsm",
7562
7563 #ifndef MDASSEMBLE
7564 /* for mdmon */
7565 .open_new = imsm_open_new,
7566 .set_array_state= imsm_set_array_state,
7567 .set_disk = imsm_set_disk,
7568 .sync_metadata = imsm_sync_metadata,
7569 .activate_spare = imsm_activate_spare,
7570 .process_update = imsm_process_update,
7571 .prepare_update = imsm_prepare_update,
7572 #endif /* MDASSEMBLE */
7573 };