]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-intel.c
imsm: Prepare checkpoint update for general migration
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include "platform-intel.h"
25 #include <values.h>
26 #include <scsi/sg.h>
27 #include <ctype.h>
28 #include <dirent.h>
29
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
43
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
54 #define MPB_SECTOR_CNT 2210
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
57
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
60 struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71 };
72
73 /* RAID map configuration infos. */
74 struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
84 __u8 raid_level;
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
91 __u8 ddf;
92 __u32 filler[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
96 */
97 } __attribute__ ((packed));
98
99 struct imsm_vol {
100 __u32 curr_migr_unit;
101 __u32 checkpoint_id; /* id to access curr_migr_unit */
102 __u8 migr_state; /* Normal or Migrating */
103 #define MIGR_INIT 0
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed));
118
119 struct imsm_dev {
120 __u8 volume[MAX_RAID_SERIAL_LEN];
121 __u32 size_low;
122 __u32 size_high;
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148 } __attribute__ ((packed));
149
150 struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed));
172
173 #define BBM_LOG_MAX_ENTRIES 254
174
175 struct bbm_log_entry {
176 __u64 defective_block_start;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181 } __attribute__ ((__packed__));
182
183 struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190 } __attribute__ ((__packed__));
191
192
193 #ifndef MDASSEMBLE
194 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195 #endif
196
197 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
198
199 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
200
201 #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203 #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
206 struct migr_record {
207 __u32 rec_status; /* Status used to determine how to restart
208 * migration in case it aborts
209 * in some fashion */
210 __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
211 __u32 family_num; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr; /* True if migrating in increasing
215 * order of lbas */
216 __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit; /* Num member blocks each destMap
218 * member disk
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230 } __attribute__ ((__packed__));
231
232 static __u8 migr_type(struct imsm_dev *dev)
233 {
234 if (dev->vol.migr_type == MIGR_VERIFY &&
235 dev->status & DEV_VERIFY_AND_FIX)
236 return MIGR_REPAIR;
237 else
238 return dev->vol.migr_type;
239 }
240
241 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
242 {
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
245 */
246 if (migr_type == MIGR_REPAIR) {
247 dev->vol.migr_type = MIGR_VERIFY;
248 dev->status |= DEV_VERIFY_AND_FIX;
249 } else {
250 dev->vol.migr_type = migr_type;
251 dev->status &= ~DEV_VERIFY_AND_FIX;
252 }
253 }
254
255 static unsigned int sector_count(__u32 bytes)
256 {
257 return ((bytes + (512-1)) & (~(512-1))) / 512;
258 }
259
260 static unsigned int mpb_sectors(struct imsm_super *mpb)
261 {
262 return sector_count(__le32_to_cpu(mpb->mpb_size));
263 }
264
265 struct intel_dev {
266 struct imsm_dev *dev;
267 struct intel_dev *next;
268 unsigned index;
269 };
270
271 struct intel_hba {
272 enum sys_dev_type type;
273 char *path;
274 char *pci_id;
275 struct intel_hba *next;
276 };
277
278 enum action {
279 DISK_REMOVE = 1,
280 DISK_ADD
281 };
282 /* internal representation of IMSM metadata */
283 struct intel_super {
284 union {
285 void *buf; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super *anchor; /* immovable parameters */
287 };
288 union {
289 void *migr_rec_buf; /* buffer for I/O operations */
290 struct migr_record *migr_rec; /* migration record */
291 };
292 size_t len; /* size of the 'buf' allocation */
293 void *next_buf; /* for realloc'ing buf from the manager */
294 size_t next_len;
295 int updates_pending; /* count of pending updates for mdmon */
296 int current_vol; /* index of raid device undergoing creation */
297 __u32 create_offset; /* common start for 'current_vol' */
298 __u32 random; /* random data for seeding new family numbers */
299 struct intel_dev *devlist;
300 struct dl {
301 struct dl *next;
302 int index;
303 __u8 serial[MAX_RAID_SERIAL_LEN];
304 int major, minor;
305 char *devname;
306 struct imsm_disk disk;
307 int fd;
308 int extent_cnt;
309 struct extent *e; /* for determining freespace @ create */
310 int raiddisk; /* slot to fill in autolayout */
311 enum action action;
312 } *disks;
313 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
314 active */
315 struct dl *missing; /* disks removed while we weren't looking */
316 struct bbm_log *bbm_log;
317 struct intel_hba *hba; /* device path of the raid controller for this metadata */
318 const struct imsm_orom *orom; /* platform firmware support */
319 struct intel_super *next; /* (temp) list for disambiguating family_num */
320 };
321
322 struct intel_disk {
323 struct imsm_disk disk;
324 #define IMSM_UNKNOWN_OWNER (-1)
325 int owner;
326 struct intel_disk *next;
327 };
328
329 struct extent {
330 unsigned long long start, size;
331 };
332
333 /* definitions of reshape process types */
334 enum imsm_reshape_type {
335 CH_TAKEOVER,
336 CH_MIGRATION,
337 };
338
339 /* definition of messages passed to imsm_process_update */
340 enum imsm_update_type {
341 update_activate_spare,
342 update_create_array,
343 update_kill_array,
344 update_rename_array,
345 update_add_remove_disk,
346 update_reshape_container_disks,
347 update_reshape_migration,
348 update_takeover,
349 update_general_migration_checkpoint,
350 };
351
352 struct imsm_update_activate_spare {
353 enum imsm_update_type type;
354 struct dl *dl;
355 int slot;
356 int array;
357 struct imsm_update_activate_spare *next;
358 };
359
360 struct geo_params {
361 int dev_id;
362 char *dev_name;
363 long long size;
364 int level;
365 int layout;
366 int chunksize;
367 int raid_disks;
368 };
369
370 enum takeover_direction {
371 R10_TO_R0,
372 R0_TO_R10
373 };
374 struct imsm_update_takeover {
375 enum imsm_update_type type;
376 int subarray;
377 enum takeover_direction direction;
378 };
379
380 struct imsm_update_reshape {
381 enum imsm_update_type type;
382 int old_raid_disks;
383 int new_raid_disks;
384
385 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
386 };
387
388 struct imsm_update_reshape_migration {
389 enum imsm_update_type type;
390 int old_raid_disks;
391 int new_raid_disks;
392 /* fields for array migration changes
393 */
394 int subdev;
395 int new_level;
396 int new_layout;
397 int new_chunksize;
398
399 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
400 };
401
402 struct imsm_update_general_migration_checkpoint {
403 enum imsm_update_type type;
404 __u32 curr_migr_unit;
405 };
406
407 struct disk_info {
408 __u8 serial[MAX_RAID_SERIAL_LEN];
409 };
410
411 struct imsm_update_create_array {
412 enum imsm_update_type type;
413 int dev_idx;
414 struct imsm_dev dev;
415 };
416
417 struct imsm_update_kill_array {
418 enum imsm_update_type type;
419 int dev_idx;
420 };
421
422 struct imsm_update_rename_array {
423 enum imsm_update_type type;
424 __u8 name[MAX_RAID_SERIAL_LEN];
425 int dev_idx;
426 };
427
428 struct imsm_update_add_remove_disk {
429 enum imsm_update_type type;
430 };
431
432
433 static const char *_sys_dev_type[] = {
434 [SYS_DEV_UNKNOWN] = "Unknown",
435 [SYS_DEV_SAS] = "SAS",
436 [SYS_DEV_SATA] = "SATA"
437 };
438
439 const char *get_sys_dev_type(enum sys_dev_type type)
440 {
441 if (type >= SYS_DEV_MAX)
442 type = SYS_DEV_UNKNOWN;
443
444 return _sys_dev_type[type];
445 }
446
447 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
448 {
449 struct intel_hba *result = malloc(sizeof(*result));
450 if (result) {
451 result->type = device->type;
452 result->path = strdup(device->path);
453 result->next = NULL;
454 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
455 result->pci_id++;
456 }
457 return result;
458 }
459
460 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
461 {
462 struct intel_hba *result=NULL;
463 for (result = hba; result; result = result->next) {
464 if (result->type == device->type && strcmp(result->path, device->path) == 0)
465 break;
466 }
467 return result;
468 }
469
470 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
471 {
472 struct intel_hba *hba;
473
474 /* check if disk attached to Intel HBA */
475 hba = find_intel_hba(super->hba, device);
476 if (hba != NULL)
477 return 1;
478 /* Check if HBA is already attached to super */
479 if (super->hba == NULL) {
480 super->hba = alloc_intel_hba(device);
481 return 1;
482 }
483
484 hba = super->hba;
485 /* Intel metadata allows for all disks attached to the same type HBA.
486 * Do not sypport odf HBA types mixing
487 */
488 if (device->type != hba->type)
489 return 2;
490
491 while (hba->next)
492 hba = hba->next;
493
494 hba->next = alloc_intel_hba(device);
495 return 1;
496 }
497
498 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
499 {
500 struct sys_dev *list, *elem, *prev;
501 char *disk_path;
502
503 if ((list = find_intel_devices()) == NULL)
504 return 0;
505
506 if (fd < 0)
507 disk_path = (char *) devname;
508 else
509 disk_path = diskfd_to_devpath(fd);
510
511 if (!disk_path) {
512 free_sys_dev(&list);
513 return 0;
514 }
515
516 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
517 if (path_attached_to_hba(disk_path, elem->path)) {
518 if (prev == NULL)
519 list = list->next;
520 else
521 prev->next = elem->next;
522 elem->next = NULL;
523 if (disk_path != devname)
524 free(disk_path);
525 free_sys_dev(&list);
526 return elem;
527 }
528 }
529 if (disk_path != devname)
530 free(disk_path);
531 free_sys_dev(&list);
532
533 return NULL;
534 }
535
536
537 static int find_intel_hba_capability(int fd, struct intel_super *super,
538 char *devname);
539
540 static struct supertype *match_metadata_desc_imsm(char *arg)
541 {
542 struct supertype *st;
543
544 if (strcmp(arg, "imsm") != 0 &&
545 strcmp(arg, "default") != 0
546 )
547 return NULL;
548
549 st = malloc(sizeof(*st));
550 if (!st)
551 return NULL;
552 memset(st, 0, sizeof(*st));
553 st->container_dev = NoMdDev;
554 st->ss = &super_imsm;
555 st->max_devs = IMSM_MAX_DEVICES;
556 st->minor_version = 0;
557 st->sb = NULL;
558 return st;
559 }
560
561 #ifndef MDASSEMBLE
562 static __u8 *get_imsm_version(struct imsm_super *mpb)
563 {
564 return &mpb->sig[MPB_SIG_LEN];
565 }
566 #endif
567
568 /* retrieve a disk directly from the anchor when the anchor is known to be
569 * up-to-date, currently only at load time
570 */
571 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
572 {
573 if (index >= mpb->num_disks)
574 return NULL;
575 return &mpb->disk[index];
576 }
577
578 /* retrieve the disk description based on a index of the disk
579 * in the sub-array
580 */
581 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
582 {
583 struct dl *d;
584
585 for (d = super->disks; d; d = d->next)
586 if (d->index == index)
587 return d;
588
589 return NULL;
590 }
591 /* retrieve a disk from the parsed metadata */
592 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
593 {
594 struct dl *dl;
595
596 dl = get_imsm_dl_disk(super, index);
597 if (dl)
598 return &dl->disk;
599
600 return NULL;
601 }
602
603 /* generate a checksum directly from the anchor when the anchor is known to be
604 * up-to-date, currently only at load or write_super after coalescing
605 */
606 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
607 {
608 __u32 end = mpb->mpb_size / sizeof(end);
609 __u32 *p = (__u32 *) mpb;
610 __u32 sum = 0;
611
612 while (end--) {
613 sum += __le32_to_cpu(*p);
614 p++;
615 }
616
617 return sum - __le32_to_cpu(mpb->check_sum);
618 }
619
620 static size_t sizeof_imsm_map(struct imsm_map *map)
621 {
622 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
623 }
624
625 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
626 {
627 /* A device can have 2 maps if it is in the middle of a migration.
628 * If second_map is:
629 * 0 - we return the first map
630 * 1 - we return the second map if it exists, else NULL
631 * -1 - we return the second map if it exists, else the first
632 */
633 struct imsm_map *map = &dev->vol.map[0];
634
635 if (second_map == 1 && !dev->vol.migr_state)
636 return NULL;
637 else if (second_map == 1 ||
638 (second_map < 0 && dev->vol.migr_state)) {
639 void *ptr = map;
640
641 return ptr + sizeof_imsm_map(map);
642 } else
643 return map;
644
645 }
646
647 /* return the size of the device.
648 * migr_state increases the returned size if map[0] were to be duplicated
649 */
650 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
651 {
652 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
653 sizeof_imsm_map(get_imsm_map(dev, 0));
654
655 /* migrating means an additional map */
656 if (dev->vol.migr_state)
657 size += sizeof_imsm_map(get_imsm_map(dev, 1));
658 else if (migr_state)
659 size += sizeof_imsm_map(get_imsm_map(dev, 0));
660
661 return size;
662 }
663
664 #ifndef MDASSEMBLE
665 /* retrieve disk serial number list from a metadata update */
666 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
667 {
668 void *u = update;
669 struct disk_info *inf;
670
671 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
672 sizeof_imsm_dev(&update->dev, 0);
673
674 return inf;
675 }
676 #endif
677
678 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
679 {
680 int offset;
681 int i;
682 void *_mpb = mpb;
683
684 if (index >= mpb->num_raid_devs)
685 return NULL;
686
687 /* devices start after all disks */
688 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
689
690 for (i = 0; i <= index; i++)
691 if (i == index)
692 return _mpb + offset;
693 else
694 offset += sizeof_imsm_dev(_mpb + offset, 0);
695
696 return NULL;
697 }
698
699 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
700 {
701 struct intel_dev *dv;
702
703 if (index >= super->anchor->num_raid_devs)
704 return NULL;
705 for (dv = super->devlist; dv; dv = dv->next)
706 if (dv->index == index)
707 return dv->dev;
708 return NULL;
709 }
710
711 /*
712 * for second_map:
713 * == 0 get first map
714 * == 1 get second map
715 * == -1 than get map according to the current migr_state
716 */
717 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
718 int slot,
719 int second_map)
720 {
721 struct imsm_map *map;
722
723 map = get_imsm_map(dev, second_map);
724
725 /* top byte identifies disk under rebuild */
726 return __le32_to_cpu(map->disk_ord_tbl[slot]);
727 }
728
729 #define ord_to_idx(ord) (((ord) << 8) >> 8)
730 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
731 {
732 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
733
734 return ord_to_idx(ord);
735 }
736
737 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
738 {
739 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
740 }
741
742 static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
743 {
744 int slot;
745 __u32 ord;
746
747 for (slot = 0; slot < map->num_members; slot++) {
748 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
749 if (ord_to_idx(ord) == idx)
750 return slot;
751 }
752
753 return -1;
754 }
755
756 static int get_imsm_raid_level(struct imsm_map *map)
757 {
758 if (map->raid_level == 1) {
759 if (map->num_members == 2)
760 return 1;
761 else
762 return 10;
763 }
764
765 return map->raid_level;
766 }
767
768 static int cmp_extent(const void *av, const void *bv)
769 {
770 const struct extent *a = av;
771 const struct extent *b = bv;
772 if (a->start < b->start)
773 return -1;
774 if (a->start > b->start)
775 return 1;
776 return 0;
777 }
778
779 static int count_memberships(struct dl *dl, struct intel_super *super)
780 {
781 int memberships = 0;
782 int i;
783
784 for (i = 0; i < super->anchor->num_raid_devs; i++) {
785 struct imsm_dev *dev = get_imsm_dev(super, i);
786 struct imsm_map *map = get_imsm_map(dev, 0);
787
788 if (get_imsm_disk_slot(map, dl->index) >= 0)
789 memberships++;
790 }
791
792 return memberships;
793 }
794
795 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
796 {
797 /* find a list of used extents on the given physical device */
798 struct extent *rv, *e;
799 int i;
800 int memberships = count_memberships(dl, super);
801 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
802
803 rv = malloc(sizeof(struct extent) * (memberships + 1));
804 if (!rv)
805 return NULL;
806 e = rv;
807
808 for (i = 0; i < super->anchor->num_raid_devs; i++) {
809 struct imsm_dev *dev = get_imsm_dev(super, i);
810 struct imsm_map *map = get_imsm_map(dev, 0);
811
812 if (get_imsm_disk_slot(map, dl->index) >= 0) {
813 e->start = __le32_to_cpu(map->pba_of_lba0);
814 e->size = __le32_to_cpu(map->blocks_per_member);
815 e++;
816 }
817 }
818 qsort(rv, memberships, sizeof(*rv), cmp_extent);
819
820 /* determine the start of the metadata
821 * when no raid devices are defined use the default
822 * ...otherwise allow the metadata to truncate the value
823 * as is the case with older versions of imsm
824 */
825 if (memberships) {
826 struct extent *last = &rv[memberships - 1];
827 __u32 remainder;
828
829 remainder = __le32_to_cpu(dl->disk.total_blocks) -
830 (last->start + last->size);
831 /* round down to 1k block to satisfy precision of the kernel
832 * 'size' interface
833 */
834 remainder &= ~1UL;
835 /* make sure remainder is still sane */
836 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
837 remainder = ROUND_UP(super->len, 512) >> 9;
838 if (reservation > remainder)
839 reservation = remainder;
840 }
841 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
842 e->size = 0;
843 return rv;
844 }
845
846 /* try to determine how much space is reserved for metadata from
847 * the last get_extents() entry, otherwise fallback to the
848 * default
849 */
850 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
851 {
852 struct extent *e;
853 int i;
854 __u32 rv;
855
856 /* for spares just return a minimal reservation which will grow
857 * once the spare is picked up by an array
858 */
859 if (dl->index == -1)
860 return MPB_SECTOR_CNT;
861
862 e = get_extents(super, dl);
863 if (!e)
864 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
865
866 /* scroll to last entry */
867 for (i = 0; e[i].size; i++)
868 continue;
869
870 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
871
872 free(e);
873
874 return rv;
875 }
876
877 static int is_spare(struct imsm_disk *disk)
878 {
879 return (disk->status & SPARE_DISK) == SPARE_DISK;
880 }
881
882 static int is_configured(struct imsm_disk *disk)
883 {
884 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
885 }
886
887 static int is_failed(struct imsm_disk *disk)
888 {
889 return (disk->status & FAILED_DISK) == FAILED_DISK;
890 }
891
892 /* Return minimum size of a spare that can be used in this array*/
893 static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
894 {
895 struct intel_super *super = st->sb;
896 struct dl *dl;
897 struct extent *e;
898 int i;
899 unsigned long long rv = 0;
900
901 if (!super)
902 return rv;
903 /* find first active disk in array */
904 dl = super->disks;
905 while (dl && (is_failed(&dl->disk) || dl->index == -1))
906 dl = dl->next;
907 if (!dl)
908 return rv;
909 /* find last lba used by subarrays */
910 e = get_extents(super, dl);
911 if (!e)
912 return rv;
913 for (i = 0; e[i].size; i++)
914 continue;
915 if (i > 0)
916 rv = e[i-1].start + e[i-1].size;
917 free(e);
918 /* add the amount of space needed for metadata */
919 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
920 return rv * 512;
921 }
922
923 #ifndef MDASSEMBLE
924 static __u64 blocks_per_migr_unit(struct intel_super *super,
925 struct imsm_dev *dev);
926
927 static void print_imsm_dev(struct intel_super *super,
928 struct imsm_dev *dev,
929 char *uuid,
930 int disk_idx)
931 {
932 __u64 sz;
933 int slot, i;
934 struct imsm_map *map = get_imsm_map(dev, 0);
935 struct imsm_map *map2 = get_imsm_map(dev, 1);
936 __u32 ord;
937
938 printf("\n");
939 printf("[%.16s]:\n", dev->volume);
940 printf(" UUID : %s\n", uuid);
941 printf(" RAID Level : %d", get_imsm_raid_level(map));
942 if (map2)
943 printf(" <-- %d", get_imsm_raid_level(map2));
944 printf("\n");
945 printf(" Members : %d", map->num_members);
946 if (map2)
947 printf(" <-- %d", map2->num_members);
948 printf("\n");
949 printf(" Slots : [");
950 for (i = 0; i < map->num_members; i++) {
951 ord = get_imsm_ord_tbl_ent(dev, i, 0);
952 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
953 }
954 printf("]");
955 if (map2) {
956 printf(" <-- [");
957 for (i = 0; i < map2->num_members; i++) {
958 ord = get_imsm_ord_tbl_ent(dev, i, 1);
959 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
960 }
961 printf("]");
962 }
963 printf("\n");
964 printf(" Failed disk : ");
965 if (map->failed_disk_num == 0xff)
966 printf("none");
967 else
968 printf("%i", map->failed_disk_num);
969 printf("\n");
970 slot = get_imsm_disk_slot(map, disk_idx);
971 if (slot >= 0) {
972 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
973 printf(" This Slot : %d%s\n", slot,
974 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
975 } else
976 printf(" This Slot : ?\n");
977 sz = __le32_to_cpu(dev->size_high);
978 sz <<= 32;
979 sz += __le32_to_cpu(dev->size_low);
980 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
981 human_size(sz * 512));
982 sz = __le32_to_cpu(map->blocks_per_member);
983 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
984 human_size(sz * 512));
985 printf(" Sector Offset : %u\n",
986 __le32_to_cpu(map->pba_of_lba0));
987 printf(" Num Stripes : %u\n",
988 __le32_to_cpu(map->num_data_stripes));
989 printf(" Chunk Size : %u KiB",
990 __le16_to_cpu(map->blocks_per_strip) / 2);
991 if (map2)
992 printf(" <-- %u KiB",
993 __le16_to_cpu(map2->blocks_per_strip) / 2);
994 printf("\n");
995 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
996 printf(" Migrate State : ");
997 if (dev->vol.migr_state) {
998 if (migr_type(dev) == MIGR_INIT)
999 printf("initialize\n");
1000 else if (migr_type(dev) == MIGR_REBUILD)
1001 printf("rebuild\n");
1002 else if (migr_type(dev) == MIGR_VERIFY)
1003 printf("check\n");
1004 else if (migr_type(dev) == MIGR_GEN_MIGR)
1005 printf("general migration\n");
1006 else if (migr_type(dev) == MIGR_STATE_CHANGE)
1007 printf("state change\n");
1008 else if (migr_type(dev) == MIGR_REPAIR)
1009 printf("repair\n");
1010 else
1011 printf("<unknown:%d>\n", migr_type(dev));
1012 } else
1013 printf("idle\n");
1014 printf(" Map State : %s", map_state_str[map->map_state]);
1015 if (dev->vol.migr_state) {
1016 struct imsm_map *map = get_imsm_map(dev, 1);
1017
1018 printf(" <-- %s", map_state_str[map->map_state]);
1019 printf("\n Checkpoint : %u (%llu)",
1020 __le32_to_cpu(dev->vol.curr_migr_unit),
1021 (unsigned long long)blocks_per_migr_unit(super, dev));
1022 }
1023 printf("\n");
1024 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
1025 }
1026
1027 static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
1028 {
1029 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
1030 char str[MAX_RAID_SERIAL_LEN + 1];
1031 __u64 sz;
1032
1033 if (index < 0 || !disk)
1034 return;
1035
1036 printf("\n");
1037 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1038 printf(" Disk%02d Serial : %s\n", index, str);
1039 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1040 is_configured(disk) ? " active" : "",
1041 is_failed(disk) ? " failed" : "");
1042 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1043 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1044 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1045 human_size(sz * 512));
1046 }
1047
1048 static int is_gen_migration(struct imsm_dev *dev);
1049
1050 void examine_migr_rec_imsm(struct intel_super *super)
1051 {
1052 struct migr_record *migr_rec = super->migr_rec;
1053 struct imsm_super *mpb = super->anchor;
1054 int i;
1055
1056 for (i = 0; i < mpb->num_raid_devs; i++) {
1057 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1058 if (is_gen_migration(dev) == 0)
1059 continue;
1060
1061 printf("\nMigration Record Information:");
1062 if (super->disks->index > 1) {
1063 printf(" Empty\n ");
1064 printf("Examine one of first two disks in array\n");
1065 break;
1066 }
1067 printf("\n Status : ");
1068 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
1069 printf("Normal\n");
1070 else
1071 printf("Contains Data\n");
1072 printf(" Current Unit : %u\n",
1073 __le32_to_cpu(migr_rec->curr_migr_unit));
1074 printf(" Family : %u\n",
1075 __le32_to_cpu(migr_rec->family_num));
1076 printf(" Ascending : %u\n",
1077 __le32_to_cpu(migr_rec->ascending_migr));
1078 printf(" Blocks Per Unit : %u\n",
1079 __le32_to_cpu(migr_rec->blocks_per_unit));
1080 printf(" Dest. Depth Per Unit : %u\n",
1081 __le32_to_cpu(migr_rec->dest_depth_per_unit));
1082 printf(" Checkpoint Area pba : %u\n",
1083 __le32_to_cpu(migr_rec->ckpt_area_pba));
1084 printf(" First member lba : %u\n",
1085 __le32_to_cpu(migr_rec->dest_1st_member_lba));
1086 printf(" Total Number of Units : %u\n",
1087 __le32_to_cpu(migr_rec->num_migr_units));
1088 printf(" Size of volume : %u\n",
1089 __le32_to_cpu(migr_rec->post_migr_vol_cap));
1090 printf(" Expansion space for LBA64 : %u\n",
1091 __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
1092 printf(" Record was read from : %u\n",
1093 __le32_to_cpu(migr_rec->ckpt_read_disk_num));
1094
1095 break;
1096 }
1097 }
1098
1099 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
1100
1101 static void examine_super_imsm(struct supertype *st, char *homehost)
1102 {
1103 struct intel_super *super = st->sb;
1104 struct imsm_super *mpb = super->anchor;
1105 char str[MAX_SIGNATURE_LENGTH];
1106 int i;
1107 struct mdinfo info;
1108 char nbuf[64];
1109 __u32 sum;
1110 __u32 reserved = imsm_reserved_sectors(super, super->disks);
1111 struct dl *dl;
1112
1113 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1114 printf(" Magic : %s\n", str);
1115 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1116 printf(" Version : %s\n", get_imsm_version(mpb));
1117 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
1118 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1119 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
1120 getinfo_super_imsm(st, &info, NULL);
1121 fname_from_uuid(st, &info, nbuf, ':');
1122 printf(" UUID : %s\n", nbuf + 5);
1123 sum = __le32_to_cpu(mpb->check_sum);
1124 printf(" Checksum : %08x %s\n", sum,
1125 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
1126 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
1127 printf(" Disks : %d\n", mpb->num_disks);
1128 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
1129 print_imsm_disk(mpb, super->disks->index, reserved);
1130 if (super->bbm_log) {
1131 struct bbm_log *log = super->bbm_log;
1132
1133 printf("\n");
1134 printf("Bad Block Management Log:\n");
1135 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1136 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1137 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1138 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
1139 printf(" First Spare : %llx\n",
1140 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
1141 }
1142 for (i = 0; i < mpb->num_raid_devs; i++) {
1143 struct mdinfo info;
1144 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1145
1146 super->current_vol = i;
1147 getinfo_super_imsm(st, &info, NULL);
1148 fname_from_uuid(st, &info, nbuf, ':');
1149 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
1150 }
1151 for (i = 0; i < mpb->num_disks; i++) {
1152 if (i == super->disks->index)
1153 continue;
1154 print_imsm_disk(mpb, i, reserved);
1155 }
1156 for (dl = super->disks ; dl; dl = dl->next) {
1157 struct imsm_disk *disk;
1158 char str[MAX_RAID_SERIAL_LEN + 1];
1159 __u64 sz;
1160
1161 if (dl->index >= 0)
1162 continue;
1163
1164 disk = &dl->disk;
1165 printf("\n");
1166 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1167 printf(" Disk Serial : %s\n", str);
1168 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1169 is_configured(disk) ? " active" : "",
1170 is_failed(disk) ? " failed" : "");
1171 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1172 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1173 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1174 human_size(sz * 512));
1175 }
1176
1177 examine_migr_rec_imsm(super);
1178 }
1179
1180 static void brief_examine_super_imsm(struct supertype *st, int verbose)
1181 {
1182 /* We just write a generic IMSM ARRAY entry */
1183 struct mdinfo info;
1184 char nbuf[64];
1185 struct intel_super *super = st->sb;
1186
1187 if (!super->anchor->num_raid_devs) {
1188 printf("ARRAY metadata=imsm\n");
1189 return;
1190 }
1191
1192 getinfo_super_imsm(st, &info, NULL);
1193 fname_from_uuid(st, &info, nbuf, ':');
1194 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1195 }
1196
1197 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1198 {
1199 /* We just write a generic IMSM ARRAY entry */
1200 struct mdinfo info;
1201 char nbuf[64];
1202 char nbuf1[64];
1203 struct intel_super *super = st->sb;
1204 int i;
1205
1206 if (!super->anchor->num_raid_devs)
1207 return;
1208
1209 getinfo_super_imsm(st, &info, NULL);
1210 fname_from_uuid(st, &info, nbuf, ':');
1211 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1212 struct imsm_dev *dev = get_imsm_dev(super, i);
1213
1214 super->current_vol = i;
1215 getinfo_super_imsm(st, &info, NULL);
1216 fname_from_uuid(st, &info, nbuf1, ':');
1217 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1218 dev->volume, nbuf + 5, i, nbuf1 + 5);
1219 }
1220 }
1221
1222 static void export_examine_super_imsm(struct supertype *st)
1223 {
1224 struct intel_super *super = st->sb;
1225 struct imsm_super *mpb = super->anchor;
1226 struct mdinfo info;
1227 char nbuf[64];
1228
1229 getinfo_super_imsm(st, &info, NULL);
1230 fname_from_uuid(st, &info, nbuf, ':');
1231 printf("MD_METADATA=imsm\n");
1232 printf("MD_LEVEL=container\n");
1233 printf("MD_UUID=%s\n", nbuf+5);
1234 printf("MD_DEVICES=%u\n", mpb->num_disks);
1235 }
1236
1237 static void detail_super_imsm(struct supertype *st, char *homehost)
1238 {
1239 struct mdinfo info;
1240 char nbuf[64];
1241
1242 getinfo_super_imsm(st, &info, NULL);
1243 fname_from_uuid(st, &info, nbuf, ':');
1244 printf("\n UUID : %s\n", nbuf + 5);
1245 }
1246
1247 static void brief_detail_super_imsm(struct supertype *st)
1248 {
1249 struct mdinfo info;
1250 char nbuf[64];
1251 getinfo_super_imsm(st, &info, NULL);
1252 fname_from_uuid(st, &info, nbuf, ':');
1253 printf(" UUID=%s", nbuf + 5);
1254 }
1255
1256 static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1257 static void fd2devname(int fd, char *name);
1258
1259 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
1260 {
1261 /* dump an unsorted list of devices attached to AHCI Intel storage
1262 * controller, as well as non-connected ports
1263 */
1264 int hba_len = strlen(hba_path) + 1;
1265 struct dirent *ent;
1266 DIR *dir;
1267 char *path = NULL;
1268 int err = 0;
1269 unsigned long port_mask = (1 << port_count) - 1;
1270
1271 if (port_count > (int)sizeof(port_mask) * 8) {
1272 if (verbose)
1273 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1274 return 2;
1275 }
1276
1277 /* scroll through /sys/dev/block looking for devices attached to
1278 * this hba
1279 */
1280 dir = opendir("/sys/dev/block");
1281 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1282 int fd;
1283 char model[64];
1284 char vendor[64];
1285 char buf[1024];
1286 int major, minor;
1287 char *device;
1288 char *c;
1289 int port;
1290 int type;
1291
1292 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1293 continue;
1294 path = devt_to_devpath(makedev(major, minor));
1295 if (!path)
1296 continue;
1297 if (!path_attached_to_hba(path, hba_path)) {
1298 free(path);
1299 path = NULL;
1300 continue;
1301 }
1302
1303 /* retrieve the scsi device type */
1304 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1305 if (verbose)
1306 fprintf(stderr, Name ": failed to allocate 'device'\n");
1307 err = 2;
1308 break;
1309 }
1310 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1311 if (load_sys(device, buf) != 0) {
1312 if (verbose)
1313 fprintf(stderr, Name ": failed to read device type for %s\n",
1314 path);
1315 err = 2;
1316 free(device);
1317 break;
1318 }
1319 type = strtoul(buf, NULL, 10);
1320
1321 /* if it's not a disk print the vendor and model */
1322 if (!(type == 0 || type == 7 || type == 14)) {
1323 vendor[0] = '\0';
1324 model[0] = '\0';
1325 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1326 if (load_sys(device, buf) == 0) {
1327 strncpy(vendor, buf, sizeof(vendor));
1328 vendor[sizeof(vendor) - 1] = '\0';
1329 c = (char *) &vendor[sizeof(vendor) - 1];
1330 while (isspace(*c) || *c == '\0')
1331 *c-- = '\0';
1332
1333 }
1334 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1335 if (load_sys(device, buf) == 0) {
1336 strncpy(model, buf, sizeof(model));
1337 model[sizeof(model) - 1] = '\0';
1338 c = (char *) &model[sizeof(model) - 1];
1339 while (isspace(*c) || *c == '\0')
1340 *c-- = '\0';
1341 }
1342
1343 if (vendor[0] && model[0])
1344 sprintf(buf, "%.64s %.64s", vendor, model);
1345 else
1346 switch (type) { /* numbers from hald/linux/device.c */
1347 case 1: sprintf(buf, "tape"); break;
1348 case 2: sprintf(buf, "printer"); break;
1349 case 3: sprintf(buf, "processor"); break;
1350 case 4:
1351 case 5: sprintf(buf, "cdrom"); break;
1352 case 6: sprintf(buf, "scanner"); break;
1353 case 8: sprintf(buf, "media_changer"); break;
1354 case 9: sprintf(buf, "comm"); break;
1355 case 12: sprintf(buf, "raid"); break;
1356 default: sprintf(buf, "unknown");
1357 }
1358 } else
1359 buf[0] = '\0';
1360 free(device);
1361
1362 /* chop device path to 'host%d' and calculate the port number */
1363 c = strchr(&path[hba_len], '/');
1364 if (!c) {
1365 if (verbose)
1366 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1367 err = 2;
1368 break;
1369 }
1370 *c = '\0';
1371 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1372 port -= host_base;
1373 else {
1374 if (verbose) {
1375 *c = '/'; /* repair the full string */
1376 fprintf(stderr, Name ": failed to determine port number for %s\n",
1377 path);
1378 }
1379 err = 2;
1380 break;
1381 }
1382
1383 /* mark this port as used */
1384 port_mask &= ~(1 << port);
1385
1386 /* print out the device information */
1387 if (buf[0]) {
1388 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1389 continue;
1390 }
1391
1392 fd = dev_open(ent->d_name, O_RDONLY);
1393 if (fd < 0)
1394 printf(" Port%d : - disk info unavailable -\n", port);
1395 else {
1396 fd2devname(fd, buf);
1397 printf(" Port%d : %s", port, buf);
1398 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1399 printf(" (%s)\n", buf);
1400 else
1401 printf("()\n");
1402 }
1403 close(fd);
1404 free(path);
1405 path = NULL;
1406 }
1407 if (path)
1408 free(path);
1409 if (dir)
1410 closedir(dir);
1411 if (err == 0) {
1412 int i;
1413
1414 for (i = 0; i < port_count; i++)
1415 if (port_mask & (1 << i))
1416 printf(" Port%d : - no device attached -\n", i);
1417 }
1418
1419 return err;
1420 }
1421
1422
1423
1424 static void print_found_intel_controllers(struct sys_dev *elem)
1425 {
1426 for (; elem; elem = elem->next) {
1427 fprintf(stderr, Name ": found Intel(R) ");
1428 if (elem->type == SYS_DEV_SATA)
1429 fprintf(stderr, "SATA ");
1430 else if (elem->type == SYS_DEV_SAS)
1431 fprintf(stderr, "SAS ");
1432 fprintf(stderr, "RAID controller");
1433 if (elem->pci_id)
1434 fprintf(stderr, " at %s", elem->pci_id);
1435 fprintf(stderr, ".\n");
1436 }
1437 fflush(stderr);
1438 }
1439
1440 static int ahci_get_port_count(const char *hba_path, int *port_count)
1441 {
1442 struct dirent *ent;
1443 DIR *dir;
1444 int host_base = -1;
1445
1446 *port_count = 0;
1447 if ((dir = opendir(hba_path)) == NULL)
1448 return -1;
1449
1450 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1451 int host;
1452
1453 if (sscanf(ent->d_name, "host%d", &host) != 1)
1454 continue;
1455 if (*port_count == 0)
1456 host_base = host;
1457 else if (host < host_base)
1458 host_base = host;
1459
1460 if (host + 1 > *port_count + host_base)
1461 *port_count = host + 1 - host_base;
1462 }
1463 closedir(dir);
1464 return host_base;
1465 }
1466
1467 static void print_imsm_capability(const struct imsm_orom *orom)
1468 {
1469 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1470 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1471 orom->hotfix_ver, orom->build);
1472 printf(" RAID Levels :%s%s%s%s%s\n",
1473 imsm_orom_has_raid0(orom) ? " raid0" : "",
1474 imsm_orom_has_raid1(orom) ? " raid1" : "",
1475 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1476 imsm_orom_has_raid10(orom) ? " raid10" : "",
1477 imsm_orom_has_raid5(orom) ? " raid5" : "");
1478 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1479 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1480 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1481 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1482 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1483 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1484 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1485 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1486 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1487 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1488 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1489 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1490 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1491 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1492 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1493 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1494 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1495 printf(" Max Disks : %d\n", orom->tds);
1496 printf(" Max Volumes : %d\n", orom->vpa);
1497 return;
1498 }
1499
1500 static int detail_platform_imsm(int verbose, int enumerate_only)
1501 {
1502 /* There are two components to imsm platform support, the ahci SATA
1503 * controller and the option-rom. To find the SATA controller we
1504 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1505 * controller with the Intel vendor id is present. This approach
1506 * allows mdadm to leverage the kernel's ahci detection logic, with the
1507 * caveat that if ahci.ko is not loaded mdadm will not be able to
1508 * detect platform raid capabilities. The option-rom resides in a
1509 * platform "Adapter ROM". We scan for its signature to retrieve the
1510 * platform capabilities. If raid support is disabled in the BIOS the
1511 * option-rom capability structure will not be available.
1512 */
1513 const struct imsm_orom *orom;
1514 struct sys_dev *list, *hba;
1515 int host_base = 0;
1516 int port_count = 0;
1517 int result=0;
1518
1519 if (enumerate_only) {
1520 if (check_env("IMSM_NO_PLATFORM"))
1521 return 0;
1522 list = find_intel_devices();
1523 if (!list)
1524 return 2;
1525 for (hba = list; hba; hba = hba->next) {
1526 orom = find_imsm_capability(hba->type);
1527 if (!orom) {
1528 result = 2;
1529 break;
1530 }
1531 }
1532 free_sys_dev(&list);
1533 return result;
1534 }
1535
1536 list = find_intel_devices();
1537 if (!list) {
1538 if (verbose)
1539 fprintf(stderr, Name ": no active Intel(R) RAID "
1540 "controller found.\n");
1541 free_sys_dev(&list);
1542 return 2;
1543 } else if (verbose)
1544 print_found_intel_controllers(list);
1545
1546 for (hba = list; hba; hba = hba->next) {
1547 orom = find_imsm_capability(hba->type);
1548 if (!orom)
1549 fprintf(stderr, Name ": imsm capabilities not found for controller: %s (type %s)\n",
1550 hba->path, get_sys_dev_type(hba->type));
1551 else
1552 print_imsm_capability(orom);
1553 }
1554
1555 for (hba = list; hba; hba = hba->next) {
1556 printf(" I/O Controller : %s (%s)\n",
1557 hba->path, get_sys_dev_type(hba->type));
1558
1559 if (hba->type == SYS_DEV_SATA) {
1560 host_base = ahci_get_port_count(hba->path, &port_count);
1561 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1562 if (verbose)
1563 fprintf(stderr, Name ": failed to enumerate "
1564 "ports on SATA controller at %s.", hba->pci_id);
1565 result |= 2;
1566 }
1567 }
1568 }
1569
1570 free_sys_dev(&list);
1571 return result;
1572 }
1573 #endif
1574
1575 static int match_home_imsm(struct supertype *st, char *homehost)
1576 {
1577 /* the imsm metadata format does not specify any host
1578 * identification information. We return -1 since we can never
1579 * confirm nor deny whether a given array is "meant" for this
1580 * host. We rely on compare_super and the 'family_num' fields to
1581 * exclude member disks that do not belong, and we rely on
1582 * mdadm.conf to specify the arrays that should be assembled.
1583 * Auto-assembly may still pick up "foreign" arrays.
1584 */
1585
1586 return -1;
1587 }
1588
1589 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1590 {
1591 /* The uuid returned here is used for:
1592 * uuid to put into bitmap file (Create, Grow)
1593 * uuid for backup header when saving critical section (Grow)
1594 * comparing uuids when re-adding a device into an array
1595 * In these cases the uuid required is that of the data-array,
1596 * not the device-set.
1597 * uuid to recognise same set when adding a missing device back
1598 * to an array. This is a uuid for the device-set.
1599 *
1600 * For each of these we can make do with a truncated
1601 * or hashed uuid rather than the original, as long as
1602 * everyone agrees.
1603 * In each case the uuid required is that of the data-array,
1604 * not the device-set.
1605 */
1606 /* imsm does not track uuid's so we synthesis one using sha1 on
1607 * - The signature (Which is constant for all imsm array, but no matter)
1608 * - the orig_family_num of the container
1609 * - the index number of the volume
1610 * - the 'serial' number of the volume.
1611 * Hopefully these are all constant.
1612 */
1613 struct intel_super *super = st->sb;
1614
1615 char buf[20];
1616 struct sha1_ctx ctx;
1617 struct imsm_dev *dev = NULL;
1618 __u32 family_num;
1619
1620 /* some mdadm versions failed to set ->orig_family_num, in which
1621 * case fall back to ->family_num. orig_family_num will be
1622 * fixed up with the first metadata update.
1623 */
1624 family_num = super->anchor->orig_family_num;
1625 if (family_num == 0)
1626 family_num = super->anchor->family_num;
1627 sha1_init_ctx(&ctx);
1628 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
1629 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
1630 if (super->current_vol >= 0)
1631 dev = get_imsm_dev(super, super->current_vol);
1632 if (dev) {
1633 __u32 vol = super->current_vol;
1634 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1635 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1636 }
1637 sha1_finish_ctx(&ctx, buf);
1638 memcpy(uuid, buf, 4*4);
1639 }
1640
1641 #if 0
1642 static void
1643 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
1644 {
1645 __u8 *v = get_imsm_version(mpb);
1646 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1647 char major[] = { 0, 0, 0 };
1648 char minor[] = { 0 ,0, 0 };
1649 char patch[] = { 0, 0, 0 };
1650 char *ver_parse[] = { major, minor, patch };
1651 int i, j;
1652
1653 i = j = 0;
1654 while (*v != '\0' && v < end) {
1655 if (*v != '.' && j < 2)
1656 ver_parse[i][j++] = *v;
1657 else {
1658 i++;
1659 j = 0;
1660 }
1661 v++;
1662 }
1663
1664 *m = strtol(minor, NULL, 0);
1665 *p = strtol(patch, NULL, 0);
1666 }
1667 #endif
1668
1669 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1670 {
1671 /* migr_strip_size when repairing or initializing parity */
1672 struct imsm_map *map = get_imsm_map(dev, 0);
1673 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1674
1675 switch (get_imsm_raid_level(map)) {
1676 case 5:
1677 case 10:
1678 return chunk;
1679 default:
1680 return 128*1024 >> 9;
1681 }
1682 }
1683
1684 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1685 {
1686 /* migr_strip_size when rebuilding a degraded disk, no idea why
1687 * this is different than migr_strip_size_resync(), but it's good
1688 * to be compatible
1689 */
1690 struct imsm_map *map = get_imsm_map(dev, 1);
1691 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1692
1693 switch (get_imsm_raid_level(map)) {
1694 case 1:
1695 case 10:
1696 if (map->num_members % map->num_domains == 0)
1697 return 128*1024 >> 9;
1698 else
1699 return chunk;
1700 case 5:
1701 return max((__u32) 64*1024 >> 9, chunk);
1702 default:
1703 return 128*1024 >> 9;
1704 }
1705 }
1706
1707 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1708 {
1709 struct imsm_map *lo = get_imsm_map(dev, 0);
1710 struct imsm_map *hi = get_imsm_map(dev, 1);
1711 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1712 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1713
1714 return max((__u32) 1, hi_chunk / lo_chunk);
1715 }
1716
1717 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1718 {
1719 struct imsm_map *lo = get_imsm_map(dev, 0);
1720 int level = get_imsm_raid_level(lo);
1721
1722 if (level == 1 || level == 10) {
1723 struct imsm_map *hi = get_imsm_map(dev, 1);
1724
1725 return hi->num_domains;
1726 } else
1727 return num_stripes_per_unit_resync(dev);
1728 }
1729
1730 static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1731 {
1732 /* named 'imsm_' because raid0, raid1 and raid10
1733 * counter-intuitively have the same number of data disks
1734 */
1735 struct imsm_map *map = get_imsm_map(dev, second_map);
1736
1737 switch (get_imsm_raid_level(map)) {
1738 case 0:
1739 case 1:
1740 case 10:
1741 return map->num_members;
1742 case 5:
1743 return map->num_members - 1;
1744 default:
1745 dprintf("%s: unsupported raid level\n", __func__);
1746 return 0;
1747 }
1748 }
1749
1750 static __u32 parity_segment_depth(struct imsm_dev *dev)
1751 {
1752 struct imsm_map *map = get_imsm_map(dev, 0);
1753 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1754
1755 switch(get_imsm_raid_level(map)) {
1756 case 1:
1757 case 10:
1758 return chunk * map->num_domains;
1759 case 5:
1760 return chunk * map->num_members;
1761 default:
1762 return chunk;
1763 }
1764 }
1765
1766 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1767 {
1768 struct imsm_map *map = get_imsm_map(dev, 1);
1769 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1770 __u32 strip = block / chunk;
1771
1772 switch (get_imsm_raid_level(map)) {
1773 case 1:
1774 case 10: {
1775 __u32 vol_strip = (strip * map->num_domains) + 1;
1776 __u32 vol_stripe = vol_strip / map->num_members;
1777
1778 return vol_stripe * chunk + block % chunk;
1779 } case 5: {
1780 __u32 stripe = strip / (map->num_members - 1);
1781
1782 return stripe * chunk + block % chunk;
1783 }
1784 default:
1785 return 0;
1786 }
1787 }
1788
1789 static __u64 blocks_per_migr_unit(struct intel_super *super,
1790 struct imsm_dev *dev)
1791 {
1792 /* calculate the conversion factor between per member 'blocks'
1793 * (md/{resync,rebuild}_start) and imsm migration units, return
1794 * 0 for the 'not migrating' and 'unsupported migration' cases
1795 */
1796 if (!dev->vol.migr_state)
1797 return 0;
1798
1799 switch (migr_type(dev)) {
1800 case MIGR_GEN_MIGR: {
1801 struct migr_record *migr_rec = super->migr_rec;
1802 return __le32_to_cpu(migr_rec->blocks_per_unit);
1803 }
1804 case MIGR_VERIFY:
1805 case MIGR_REPAIR:
1806 case MIGR_INIT: {
1807 struct imsm_map *map = get_imsm_map(dev, 0);
1808 __u32 stripes_per_unit;
1809 __u32 blocks_per_unit;
1810 __u32 parity_depth;
1811 __u32 migr_chunk;
1812 __u32 block_map;
1813 __u32 block_rel;
1814 __u32 segment;
1815 __u32 stripe;
1816 __u8 disks;
1817
1818 /* yes, this is really the translation of migr_units to
1819 * per-member blocks in the 'resync' case
1820 */
1821 stripes_per_unit = num_stripes_per_unit_resync(dev);
1822 migr_chunk = migr_strip_blocks_resync(dev);
1823 disks = imsm_num_data_members(dev, 0);
1824 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1825 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1826 segment = blocks_per_unit / stripe;
1827 block_rel = blocks_per_unit - segment * stripe;
1828 parity_depth = parity_segment_depth(dev);
1829 block_map = map_migr_block(dev, block_rel);
1830 return block_map + parity_depth * segment;
1831 }
1832 case MIGR_REBUILD: {
1833 __u32 stripes_per_unit;
1834 __u32 migr_chunk;
1835
1836 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1837 migr_chunk = migr_strip_blocks_rebuild(dev);
1838 return migr_chunk * stripes_per_unit;
1839 }
1840 case MIGR_STATE_CHANGE:
1841 default:
1842 return 0;
1843 }
1844 }
1845
1846 static int imsm_level_to_layout(int level)
1847 {
1848 switch (level) {
1849 case 0:
1850 case 1:
1851 return 0;
1852 case 5:
1853 case 6:
1854 return ALGORITHM_LEFT_ASYMMETRIC;
1855 case 10:
1856 return 0x102;
1857 }
1858 return UnSet;
1859 }
1860
1861 /*******************************************************************************
1862 * Function: read_imsm_migr_rec
1863 * Description: Function reads imsm migration record from last sector of disk
1864 * Parameters:
1865 * fd : disk descriptor
1866 * super : metadata info
1867 * Returns:
1868 * 0 : success,
1869 * -1 : fail
1870 ******************************************************************************/
1871 static int read_imsm_migr_rec(int fd, struct intel_super *super)
1872 {
1873 int ret_val = -1;
1874 unsigned long long dsize;
1875
1876 get_dev_size(fd, NULL, &dsize);
1877 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1878 fprintf(stderr,
1879 Name ": Cannot seek to anchor block: %s\n",
1880 strerror(errno));
1881 goto out;
1882 }
1883 if (read(fd, super->migr_rec_buf, 512) != 512) {
1884 fprintf(stderr,
1885 Name ": Cannot read migr record block: %s\n",
1886 strerror(errno));
1887 goto out;
1888 }
1889 ret_val = 0;
1890
1891 out:
1892 return ret_val;
1893 }
1894
1895 /*******************************************************************************
1896 * Function: load_imsm_migr_rec
1897 * Description: Function reads imsm migration record (it is stored at the last
1898 * sector of disk)
1899 * Parameters:
1900 * super : imsm internal array info
1901 * info : general array info
1902 * Returns:
1903 * 0 : success
1904 * -1 : fail
1905 ******************************************************************************/
1906 static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
1907 {
1908 struct mdinfo *sd;
1909 struct dl *dl = NULL;
1910 char nm[30];
1911 int retval = -1;
1912 int fd = -1;
1913
1914 if (info) {
1915 for (sd = info->devs ; sd ; sd = sd->next) {
1916 /* read only from one of the first two slots */
1917 if ((sd->disk.raid_disk > 1) ||
1918 (sd->disk.raid_disk < 0))
1919 continue;
1920 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1921 fd = dev_open(nm, O_RDONLY);
1922 if (fd >= 0)
1923 break;
1924 }
1925 }
1926 if (fd < 0) {
1927 for (dl = super->disks; dl; dl = dl->next) {
1928 /* read only from one of the first two slots */
1929 if (dl->index > 1)
1930 continue;
1931 sprintf(nm, "%d:%d", dl->major, dl->minor);
1932 fd = dev_open(nm, O_RDONLY);
1933 if (fd >= 0)
1934 break;
1935 }
1936 }
1937 if (fd < 0)
1938 goto out;
1939 retval = read_imsm_migr_rec(fd, super);
1940
1941 out:
1942 if (fd >= 0)
1943 close(fd);
1944 return retval;
1945 }
1946
1947 /*******************************************************************************
1948 * function: imsm_create_metadata_checkpoint_update
1949 * Description: It creates update for checkpoint change.
1950 * Parameters:
1951 * super : imsm internal array info
1952 * u : pointer to prepared update
1953 * Returns:
1954 * Uptate length.
1955 * If length is equal to 0, input pointer u contains no update
1956 ******************************************************************************/
1957 static int imsm_create_metadata_checkpoint_update(
1958 struct intel_super *super,
1959 struct imsm_update_general_migration_checkpoint **u)
1960 {
1961
1962 int update_memory_size = 0;
1963
1964 dprintf("imsm_create_metadata_checkpoint_update(enter)\n");
1965
1966 if (u == NULL)
1967 return 0;
1968 *u = NULL;
1969
1970 /* size of all update data without anchor */
1971 update_memory_size =
1972 sizeof(struct imsm_update_general_migration_checkpoint);
1973
1974 *u = calloc(1, update_memory_size);
1975 if (*u == NULL) {
1976 dprintf("error: cannot get memory for "
1977 "imsm_create_metadata_checkpoint_update update\n");
1978 return 0;
1979 }
1980 (*u)->type = update_general_migration_checkpoint;
1981 (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
1982 dprintf("imsm_create_metadata_checkpoint_update: prepared for %u\n",
1983 (*u)->curr_migr_unit);
1984
1985 return update_memory_size;
1986 }
1987
1988
1989 static void imsm_update_metadata_locally(struct supertype *st,
1990 void *buf, int len);
1991
1992 /*******************************************************************************
1993 * Function: write_imsm_migr_rec
1994 * Description: Function writes imsm migration record
1995 * (at the last sector of disk)
1996 * Parameters:
1997 * super : imsm internal array info
1998 * Returns:
1999 * 0 : success
2000 * -1 : if fail
2001 ******************************************************************************/
2002 static int write_imsm_migr_rec(struct supertype *st)
2003 {
2004 struct intel_super *super = st->sb;
2005 unsigned long long dsize;
2006 char nm[30];
2007 int fd = -1;
2008 int retval = -1;
2009 struct dl *sd;
2010 int len;
2011 struct imsm_update_general_migration_checkpoint *u;
2012
2013 for (sd = super->disks ; sd ; sd = sd->next) {
2014 /* write to 2 first slots only */
2015 if ((sd->index < 0) || (sd->index > 1))
2016 continue;
2017 sprintf(nm, "%d:%d", sd->major, sd->minor);
2018 fd = dev_open(nm, O_RDWR);
2019 if (fd < 0)
2020 continue;
2021 get_dev_size(fd, NULL, &dsize);
2022 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
2023 fprintf(stderr,
2024 Name ": Cannot seek to anchor block: %s\n",
2025 strerror(errno));
2026 goto out;
2027 }
2028 if (write(fd, super->migr_rec_buf, 512) != 512) {
2029 fprintf(stderr,
2030 Name ": Cannot write migr record block: %s\n",
2031 strerror(errno));
2032 goto out;
2033 }
2034 close(fd);
2035 fd = -1;
2036 }
2037 /* update checkpoint information in metadata */
2038 len = imsm_create_metadata_checkpoint_update(super, &u);
2039
2040 if (len <= 0) {
2041 dprintf("imsm: Cannot prepare update\n");
2042 goto out;
2043 }
2044 /* update metadata locally */
2045 imsm_update_metadata_locally(st, u, len);
2046 /* and possibly remotely */
2047 if (st->update_tail) {
2048 append_metadata_update(st, u, len);
2049 /* during reshape we do all work inside metadata handler
2050 * manage_reshape(), so metadata update has to be triggered
2051 * insida it
2052 */
2053 flush_metadata_updates(st);
2054 st->update_tail = &st->updates;
2055 } else
2056 free(u);
2057
2058 retval = 0;
2059 out:
2060 if (fd >= 0)
2061 close(fd);
2062 return retval;
2063 }
2064
2065 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
2066 {
2067 struct intel_super *super = st->sb;
2068 struct migr_record *migr_rec = super->migr_rec;
2069 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
2070 struct imsm_map *map = get_imsm_map(dev, 0);
2071 struct imsm_map *prev_map = get_imsm_map(dev, 1);
2072 struct imsm_map *map_to_analyse = map;
2073 struct dl *dl;
2074 char *devname;
2075 unsigned int component_size_alligment;
2076 int map_disks = info->array.raid_disks;
2077
2078 memset(info, 0, sizeof(*info));
2079 if (prev_map)
2080 map_to_analyse = prev_map;
2081
2082 for (dl = super->disks; dl; dl = dl->next)
2083 if (dl->raiddisk == info->disk.raid_disk)
2084 break;
2085 info->container_member = super->current_vol;
2086 info->array.raid_disks = map->num_members;
2087 info->array.level = get_imsm_raid_level(map_to_analyse);
2088 info->array.layout = imsm_level_to_layout(info->array.level);
2089 info->array.md_minor = -1;
2090 info->array.ctime = 0;
2091 info->array.utime = 0;
2092 info->array.chunk_size =
2093 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
2094 info->array.state = !dev->vol.dirty;
2095 info->custom_array_size = __le32_to_cpu(dev->size_high);
2096 info->custom_array_size <<= 32;
2097 info->custom_array_size |= __le32_to_cpu(dev->size_low);
2098 if (prev_map && map->map_state == prev_map->map_state) {
2099 info->reshape_active = 1;
2100 info->new_level = get_imsm_raid_level(map);
2101 info->new_layout = imsm_level_to_layout(info->new_level);
2102 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
2103 info->delta_disks = map->num_members - prev_map->num_members;
2104 if (info->delta_disks) {
2105 /* this needs to be applied to every array
2106 * in the container.
2107 */
2108 info->reshape_active = 2;
2109 }
2110 /* We shape information that we give to md might have to be
2111 * modify to cope with md's requirement for reshaping arrays.
2112 * For example, when reshaping a RAID0, md requires it to be
2113 * presented as a degraded RAID4.
2114 * Also if a RAID0 is migrating to a RAID5 we need to specify
2115 * the array as already being RAID5, but the 'before' layout
2116 * is a RAID4-like layout.
2117 */
2118 switch (info->array.level) {
2119 case 0:
2120 switch(info->new_level) {
2121 case 0:
2122 /* conversion is happening as RAID4 */
2123 info->array.level = 4;
2124 info->array.raid_disks += 1;
2125 break;
2126 case 5:
2127 /* conversion is happening as RAID5 */
2128 info->array.level = 5;
2129 info->array.layout = ALGORITHM_PARITY_N;
2130 info->array.raid_disks += 1;
2131 info->delta_disks -= 1;
2132 break;
2133 default:
2134 /* FIXME error message */
2135 info->array.level = UnSet;
2136 break;
2137 }
2138 break;
2139 }
2140 } else {
2141 info->new_level = UnSet;
2142 info->new_layout = UnSet;
2143 info->new_chunk = info->array.chunk_size;
2144 info->delta_disks = 0;
2145 }
2146 info->disk.major = 0;
2147 info->disk.minor = 0;
2148 if (dl) {
2149 info->disk.major = dl->major;
2150 info->disk.minor = dl->minor;
2151 }
2152
2153 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
2154 info->component_size =
2155 __le32_to_cpu(map_to_analyse->blocks_per_member);
2156
2157 /* check component size aligment
2158 */
2159 component_size_alligment =
2160 info->component_size % (info->array.chunk_size/512);
2161
2162 if (component_size_alligment &&
2163 (info->array.level != 1) && (info->array.level != UnSet)) {
2164 dprintf("imsm: reported component size alligned from %llu ",
2165 info->component_size);
2166 info->component_size -= component_size_alligment;
2167 dprintf("to %llu (%i).\n",
2168 info->component_size, component_size_alligment);
2169 }
2170
2171 memset(info->uuid, 0, sizeof(info->uuid));
2172 info->recovery_start = MaxSector;
2173
2174 info->reshape_progress = 0;
2175 info->resync_start = MaxSector;
2176 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
2177 dev->vol.dirty) {
2178 info->resync_start = 0;
2179 }
2180 if (dev->vol.migr_state) {
2181 switch (migr_type(dev)) {
2182 case MIGR_REPAIR:
2183 case MIGR_INIT: {
2184 __u64 blocks_per_unit = blocks_per_migr_unit(super,
2185 dev);
2186 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
2187
2188 info->resync_start = blocks_per_unit * units;
2189 break;
2190 }
2191 case MIGR_GEN_MIGR: {
2192 __u64 blocks_per_unit = blocks_per_migr_unit(super,
2193 dev);
2194 __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
2195 unsigned long long array_blocks;
2196 int used_disks;
2197
2198 info->reshape_progress = blocks_per_unit * units;
2199
2200 dprintf("IMSM: General Migration checkpoint : %llu "
2201 "(%llu) -> read reshape progress : %llu\n",
2202 units, blocks_per_unit, info->reshape_progress);
2203
2204 used_disks = imsm_num_data_members(dev, 1);
2205 if (used_disks > 0) {
2206 array_blocks = map->blocks_per_member *
2207 used_disks;
2208 /* round array size down to closest MB
2209 */
2210 info->custom_array_size = (array_blocks
2211 >> SECT_PER_MB_SHIFT)
2212 << SECT_PER_MB_SHIFT;
2213 }
2214 }
2215 case MIGR_VERIFY:
2216 /* we could emulate the checkpointing of
2217 * 'sync_action=check' migrations, but for now
2218 * we just immediately complete them
2219 */
2220 case MIGR_REBUILD:
2221 /* this is handled by container_content_imsm() */
2222 case MIGR_STATE_CHANGE:
2223 /* FIXME handle other migrations */
2224 default:
2225 /* we are not dirty, so... */
2226 info->resync_start = MaxSector;
2227 }
2228 }
2229
2230 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
2231 info->name[MAX_RAID_SERIAL_LEN] = 0;
2232
2233 info->array.major_version = -1;
2234 info->array.minor_version = -2;
2235 devname = devnum2devname(st->container_dev);
2236 *info->text_version = '\0';
2237 if (devname)
2238 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
2239 free(devname);
2240 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
2241 uuid_from_super_imsm(st, info->uuid);
2242
2243 if (dmap) {
2244 int i, j;
2245 for (i=0; i<map_disks; i++) {
2246 dmap[i] = 0;
2247 if (i < info->array.raid_disks) {
2248 struct imsm_disk *dsk;
2249 j = get_imsm_disk_idx(dev, i, -1);
2250 dsk = get_imsm_disk(super, j);
2251 if (dsk && (dsk->status & CONFIGURED_DISK))
2252 dmap[i] = 1;
2253 }
2254 }
2255 }
2256 }
2257
2258 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
2259 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
2260
2261 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
2262 {
2263 struct dl *d;
2264
2265 for (d = super->missing; d; d = d->next)
2266 if (d->index == index)
2267 return &d->disk;
2268 return NULL;
2269 }
2270
2271 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
2272 {
2273 struct intel_super *super = st->sb;
2274 struct imsm_disk *disk;
2275 int map_disks = info->array.raid_disks;
2276 int max_enough = -1;
2277 int i;
2278 struct imsm_super *mpb;
2279
2280 if (super->current_vol >= 0) {
2281 getinfo_super_imsm_volume(st, info, map);
2282 return;
2283 }
2284 memset(info, 0, sizeof(*info));
2285
2286 /* Set raid_disks to zero so that Assemble will always pull in valid
2287 * spares
2288 */
2289 info->array.raid_disks = 0;
2290 info->array.level = LEVEL_CONTAINER;
2291 info->array.layout = 0;
2292 info->array.md_minor = -1;
2293 info->array.ctime = 0; /* N/A for imsm */
2294 info->array.utime = 0;
2295 info->array.chunk_size = 0;
2296
2297 info->disk.major = 0;
2298 info->disk.minor = 0;
2299 info->disk.raid_disk = -1;
2300 info->reshape_active = 0;
2301 info->array.major_version = -1;
2302 info->array.minor_version = -2;
2303 strcpy(info->text_version, "imsm");
2304 info->safe_mode_delay = 0;
2305 info->disk.number = -1;
2306 info->disk.state = 0;
2307 info->name[0] = 0;
2308 info->recovery_start = MaxSector;
2309
2310 /* do we have the all the insync disks that we expect? */
2311 mpb = super->anchor;
2312
2313 for (i = 0; i < mpb->num_raid_devs; i++) {
2314 struct imsm_dev *dev = get_imsm_dev(super, i);
2315 int failed, enough, j, missing = 0;
2316 struct imsm_map *map;
2317 __u8 state;
2318
2319 failed = imsm_count_failed(super, dev);
2320 state = imsm_check_degraded(super, dev, failed);
2321 map = get_imsm_map(dev, dev->vol.migr_state);
2322
2323 /* any newly missing disks?
2324 * (catches single-degraded vs double-degraded)
2325 */
2326 for (j = 0; j < map->num_members; j++) {
2327 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
2328 __u32 idx = ord_to_idx(ord);
2329
2330 if (!(ord & IMSM_ORD_REBUILD) &&
2331 get_imsm_missing(super, idx)) {
2332 missing = 1;
2333 break;
2334 }
2335 }
2336
2337 if (state == IMSM_T_STATE_FAILED)
2338 enough = -1;
2339 else if (state == IMSM_T_STATE_DEGRADED &&
2340 (state != map->map_state || missing))
2341 enough = 0;
2342 else /* we're normal, or already degraded */
2343 enough = 1;
2344
2345 /* in the missing/failed disk case check to see
2346 * if at least one array is runnable
2347 */
2348 max_enough = max(max_enough, enough);
2349 }
2350 dprintf("%s: enough: %d\n", __func__, max_enough);
2351 info->container_enough = max_enough;
2352
2353 if (super->disks) {
2354 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2355
2356 disk = &super->disks->disk;
2357 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
2358 info->component_size = reserved;
2359 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
2360 /* we don't change info->disk.raid_disk here because
2361 * this state will be finalized in mdmon after we have
2362 * found the 'most fresh' version of the metadata
2363 */
2364 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2365 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2366 }
2367
2368 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2369 * ->compare_super may have updated the 'num_raid_devs' field for spares
2370 */
2371 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
2372 uuid_from_super_imsm(st, info->uuid);
2373 else
2374 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
2375
2376 /* I don't know how to compute 'map' on imsm, so use safe default */
2377 if (map) {
2378 int i;
2379 for (i = 0; i < map_disks; i++)
2380 map[i] = 1;
2381 }
2382
2383 }
2384
2385 /* allocates memory and fills disk in mdinfo structure
2386 * for each disk in array */
2387 struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2388 {
2389 struct mdinfo *mddev = NULL;
2390 struct intel_super *super = st->sb;
2391 struct imsm_disk *disk;
2392 int count = 0;
2393 struct dl *dl;
2394 if (!super || !super->disks)
2395 return NULL;
2396 dl = super->disks;
2397 mddev = malloc(sizeof(*mddev));
2398 if (!mddev) {
2399 fprintf(stderr, Name ": Failed to allocate memory.\n");
2400 return NULL;
2401 }
2402 memset(mddev, 0, sizeof(*mddev));
2403 while (dl) {
2404 struct mdinfo *tmp;
2405 disk = &dl->disk;
2406 tmp = malloc(sizeof(*tmp));
2407 if (!tmp) {
2408 fprintf(stderr, Name ": Failed to allocate memory.\n");
2409 if (mddev)
2410 sysfs_free(mddev);
2411 return NULL;
2412 }
2413 memset(tmp, 0, sizeof(*tmp));
2414 if (mddev->devs)
2415 tmp->next = mddev->devs;
2416 mddev->devs = tmp;
2417 tmp->disk.number = count++;
2418 tmp->disk.major = dl->major;
2419 tmp->disk.minor = dl->minor;
2420 tmp->disk.state = is_configured(disk) ?
2421 (1 << MD_DISK_ACTIVE) : 0;
2422 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2423 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2424 tmp->disk.raid_disk = -1;
2425 dl = dl->next;
2426 }
2427 return mddev;
2428 }
2429
2430 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2431 char *update, char *devname, int verbose,
2432 int uuid_set, char *homehost)
2433 {
2434 /* For 'assemble' and 'force' we need to return non-zero if any
2435 * change was made. For others, the return value is ignored.
2436 * Update options are:
2437 * force-one : This device looks a bit old but needs to be included,
2438 * update age info appropriately.
2439 * assemble: clear any 'faulty' flag to allow this device to
2440 * be assembled.
2441 * force-array: Array is degraded but being forced, mark it clean
2442 * if that will be needed to assemble it.
2443 *
2444 * newdev: not used ????
2445 * grow: Array has gained a new device - this is currently for
2446 * linear only
2447 * resync: mark as dirty so a resync will happen.
2448 * name: update the name - preserving the homehost
2449 * uuid: Change the uuid of the array to match watch is given
2450 *
2451 * Following are not relevant for this imsm:
2452 * sparc2.2 : update from old dodgey metadata
2453 * super-minor: change the preferred_minor number
2454 * summaries: update redundant counters.
2455 * homehost: update the recorded homehost
2456 * _reshape_progress: record new reshape_progress position.
2457 */
2458 int rv = 1;
2459 struct intel_super *super = st->sb;
2460 struct imsm_super *mpb;
2461
2462 /* we can only update container info */
2463 if (!super || super->current_vol >= 0 || !super->anchor)
2464 return 1;
2465
2466 mpb = super->anchor;
2467
2468 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
2469 rv = -1;
2470 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2471 mpb->orig_family_num = *((__u32 *) info->update_private);
2472 rv = 0;
2473 } else if (strcmp(update, "uuid") == 0) {
2474 __u32 *new_family = malloc(sizeof(*new_family));
2475
2476 /* update orig_family_number with the incoming random
2477 * data, report the new effective uuid, and store the
2478 * new orig_family_num for future updates.
2479 */
2480 if (new_family) {
2481 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2482 uuid_from_super_imsm(st, info->uuid);
2483 *new_family = mpb->orig_family_num;
2484 info->update_private = new_family;
2485 rv = 0;
2486 }
2487 } else if (strcmp(update, "assemble") == 0)
2488 rv = 0;
2489 else
2490 rv = -1;
2491
2492 /* successful update? recompute checksum */
2493 if (rv == 0)
2494 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
2495
2496 return rv;
2497 }
2498
2499 static size_t disks_to_mpb_size(int disks)
2500 {
2501 size_t size;
2502
2503 size = sizeof(struct imsm_super);
2504 size += (disks - 1) * sizeof(struct imsm_disk);
2505 size += 2 * sizeof(struct imsm_dev);
2506 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2507 size += (4 - 2) * sizeof(struct imsm_map);
2508 /* 4 possible disk_ord_tbl's */
2509 size += 4 * (disks - 1) * sizeof(__u32);
2510
2511 return size;
2512 }
2513
2514 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2515 {
2516 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2517 return 0;
2518
2519 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
2520 }
2521
2522 static void free_devlist(struct intel_super *super)
2523 {
2524 struct intel_dev *dv;
2525
2526 while (super->devlist) {
2527 dv = super->devlist->next;
2528 free(super->devlist->dev);
2529 free(super->devlist);
2530 super->devlist = dv;
2531 }
2532 }
2533
2534 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2535 {
2536 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2537 }
2538
2539 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2540 {
2541 /*
2542 * return:
2543 * 0 same, or first was empty, and second was copied
2544 * 1 second had wrong number
2545 * 2 wrong uuid
2546 * 3 wrong other info
2547 */
2548 struct intel_super *first = st->sb;
2549 struct intel_super *sec = tst->sb;
2550
2551 if (!first) {
2552 st->sb = tst->sb;
2553 tst->sb = NULL;
2554 return 0;
2555 }
2556 /* in platform dependent environment test if the disks
2557 * use the same Intel hba
2558 */
2559 if (!check_env("IMSM_NO_PLATFORM")) {
2560 if (!first->hba || !sec->hba ||
2561 (first->hba->type != sec->hba->type)) {
2562 fprintf(stderr,
2563 "HBAs of devices does not match %s != %s\n",
2564 first->hba ? get_sys_dev_type(first->hba->type) : NULL,
2565 sec->hba ? get_sys_dev_type(sec->hba->type) : NULL);
2566 return 3;
2567 }
2568 }
2569
2570 /* if an anchor does not have num_raid_devs set then it is a free
2571 * floating spare
2572 */
2573 if (first->anchor->num_raid_devs > 0 &&
2574 sec->anchor->num_raid_devs > 0) {
2575 /* Determine if these disks might ever have been
2576 * related. Further disambiguation can only take place
2577 * in load_super_imsm_all
2578 */
2579 __u32 first_family = first->anchor->orig_family_num;
2580 __u32 sec_family = sec->anchor->orig_family_num;
2581
2582 if (memcmp(first->anchor->sig, sec->anchor->sig,
2583 MAX_SIGNATURE_LENGTH) != 0)
2584 return 3;
2585
2586 if (first_family == 0)
2587 first_family = first->anchor->family_num;
2588 if (sec_family == 0)
2589 sec_family = sec->anchor->family_num;
2590
2591 if (first_family != sec_family)
2592 return 3;
2593
2594 }
2595
2596
2597 /* if 'first' is a spare promote it to a populated mpb with sec's
2598 * family number
2599 */
2600 if (first->anchor->num_raid_devs == 0 &&
2601 sec->anchor->num_raid_devs > 0) {
2602 int i;
2603 struct intel_dev *dv;
2604 struct imsm_dev *dev;
2605
2606 /* we need to copy raid device info from sec if an allocation
2607 * fails here we don't associate the spare
2608 */
2609 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
2610 dv = malloc(sizeof(*dv));
2611 if (!dv)
2612 break;
2613 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2614 if (!dev) {
2615 free(dv);
2616 break;
2617 }
2618 dv->dev = dev;
2619 dv->index = i;
2620 dv->next = first->devlist;
2621 first->devlist = dv;
2622 }
2623 if (i < sec->anchor->num_raid_devs) {
2624 /* allocation failure */
2625 free_devlist(first);
2626 fprintf(stderr, "imsm: failed to associate spare\n");
2627 return 3;
2628 }
2629 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
2630 first->anchor->orig_family_num = sec->anchor->orig_family_num;
2631 first->anchor->family_num = sec->anchor->family_num;
2632 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
2633 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2634 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
2635 }
2636
2637 return 0;
2638 }
2639
2640 static void fd2devname(int fd, char *name)
2641 {
2642 struct stat st;
2643 char path[256];
2644 char dname[PATH_MAX];
2645 char *nm;
2646 int rv;
2647
2648 name[0] = '\0';
2649 if (fstat(fd, &st) != 0)
2650 return;
2651 sprintf(path, "/sys/dev/block/%d:%d",
2652 major(st.st_rdev), minor(st.st_rdev));
2653
2654 rv = readlink(path, dname, sizeof(dname));
2655 if (rv <= 0)
2656 return;
2657
2658 dname[rv] = '\0';
2659 nm = strrchr(dname, '/');
2660 nm++;
2661 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2662 }
2663
2664 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2665
2666 static int imsm_read_serial(int fd, char *devname,
2667 __u8 serial[MAX_RAID_SERIAL_LEN])
2668 {
2669 unsigned char scsi_serial[255];
2670 int rv;
2671 int rsp_len;
2672 int len;
2673 char *dest;
2674 char *src;
2675 char *rsp_buf;
2676 int i;
2677
2678 memset(scsi_serial, 0, sizeof(scsi_serial));
2679
2680 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2681
2682 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
2683 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2684 fd2devname(fd, (char *) serial);
2685 return 0;
2686 }
2687
2688 if (rv != 0) {
2689 if (devname)
2690 fprintf(stderr,
2691 Name ": Failed to retrieve serial for %s\n",
2692 devname);
2693 return rv;
2694 }
2695
2696 rsp_len = scsi_serial[3];
2697 if (!rsp_len) {
2698 if (devname)
2699 fprintf(stderr,
2700 Name ": Failed to retrieve serial for %s\n",
2701 devname);
2702 return 2;
2703 }
2704 rsp_buf = (char *) &scsi_serial[4];
2705
2706 /* trim all whitespace and non-printable characters and convert
2707 * ':' to ';'
2708 */
2709 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2710 src = &rsp_buf[i];
2711 if (*src > 0x20) {
2712 /* ':' is reserved for use in placeholder serial
2713 * numbers for missing disks
2714 */
2715 if (*src == ':')
2716 *dest++ = ';';
2717 else
2718 *dest++ = *src;
2719 }
2720 }
2721 len = dest - rsp_buf;
2722 dest = rsp_buf;
2723
2724 /* truncate leading characters */
2725 if (len > MAX_RAID_SERIAL_LEN) {
2726 dest += len - MAX_RAID_SERIAL_LEN;
2727 len = MAX_RAID_SERIAL_LEN;
2728 }
2729
2730 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2731 memcpy(serial, dest, len);
2732
2733 return 0;
2734 }
2735
2736 static int serialcmp(__u8 *s1, __u8 *s2)
2737 {
2738 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2739 }
2740
2741 static void serialcpy(__u8 *dest, __u8 *src)
2742 {
2743 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2744 }
2745
2746 #ifndef MDASSEMBLE
2747 static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2748 {
2749 struct dl *dl;
2750
2751 for (dl = super->disks; dl; dl = dl->next)
2752 if (serialcmp(dl->serial, serial) == 0)
2753 break;
2754
2755 return dl;
2756 }
2757 #endif
2758
2759 static struct imsm_disk *
2760 __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2761 {
2762 int i;
2763
2764 for (i = 0; i < mpb->num_disks; i++) {
2765 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2766
2767 if (serialcmp(disk->serial, serial) == 0) {
2768 if (idx)
2769 *idx = i;
2770 return disk;
2771 }
2772 }
2773
2774 return NULL;
2775 }
2776
2777 static int
2778 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2779 {
2780 struct imsm_disk *disk;
2781 struct dl *dl;
2782 struct stat stb;
2783 int rv;
2784 char name[40];
2785 __u8 serial[MAX_RAID_SERIAL_LEN];
2786
2787 rv = imsm_read_serial(fd, devname, serial);
2788
2789 if (rv != 0)
2790 return 2;
2791
2792 dl = calloc(1, sizeof(*dl));
2793 if (!dl) {
2794 if (devname)
2795 fprintf(stderr,
2796 Name ": failed to allocate disk buffer for %s\n",
2797 devname);
2798 return 2;
2799 }
2800
2801 fstat(fd, &stb);
2802 dl->major = major(stb.st_rdev);
2803 dl->minor = minor(stb.st_rdev);
2804 dl->next = super->disks;
2805 dl->fd = keep_fd ? fd : -1;
2806 assert(super->disks == NULL);
2807 super->disks = dl;
2808 serialcpy(dl->serial, serial);
2809 dl->index = -2;
2810 dl->e = NULL;
2811 fd2devname(fd, name);
2812 if (devname)
2813 dl->devname = strdup(devname);
2814 else
2815 dl->devname = strdup(name);
2816
2817 /* look up this disk's index in the current anchor */
2818 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2819 if (disk) {
2820 dl->disk = *disk;
2821 /* only set index on disks that are a member of a
2822 * populated contianer, i.e. one with raid_devs
2823 */
2824 if (is_failed(&dl->disk))
2825 dl->index = -2;
2826 else if (is_spare(&dl->disk))
2827 dl->index = -1;
2828 }
2829
2830 return 0;
2831 }
2832
2833 #ifndef MDASSEMBLE
2834 /* When migrating map0 contains the 'destination' state while map1
2835 * contains the current state. When not migrating map0 contains the
2836 * current state. This routine assumes that map[0].map_state is set to
2837 * the current array state before being called.
2838 *
2839 * Migration is indicated by one of the following states
2840 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2841 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2842 * map1state=unitialized)
2843 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2844 * map1state=normal)
2845 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2846 * map1state=degraded)
2847 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2848 * map1state=normal)
2849 */
2850 static void migrate(struct imsm_dev *dev, struct intel_super *super,
2851 __u8 to_state, int migr_type)
2852 {
2853 struct imsm_map *dest;
2854 struct imsm_map *src = get_imsm_map(dev, 0);
2855
2856 dev->vol.migr_state = 1;
2857 set_migr_type(dev, migr_type);
2858 dev->vol.curr_migr_unit = 0;
2859 dest = get_imsm_map(dev, 1);
2860
2861 /* duplicate and then set the target end state in map[0] */
2862 memcpy(dest, src, sizeof_imsm_map(src));
2863 if ((migr_type == MIGR_REBUILD) ||
2864 (migr_type == MIGR_GEN_MIGR)) {
2865 __u32 ord;
2866 int i;
2867
2868 for (i = 0; i < src->num_members; i++) {
2869 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2870 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2871 }
2872 }
2873
2874 if (migr_type == MIGR_GEN_MIGR)
2875 /* Clear migration record */
2876 memset(super->migr_rec, 0, sizeof(struct migr_record));
2877
2878 src->map_state = to_state;
2879 }
2880
2881 static void end_migration(struct imsm_dev *dev, __u8 map_state)
2882 {
2883 struct imsm_map *map = get_imsm_map(dev, 0);
2884 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
2885 int i, j;
2886
2887 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2888 * completed in the last migration.
2889 *
2890 * FIXME add support for raid-level-migration
2891 */
2892 for (i = 0; i < prev->num_members; i++)
2893 for (j = 0; j < map->num_members; j++)
2894 /* during online capacity expansion
2895 * disks position can be changed if takeover is used
2896 */
2897 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2898 ord_to_idx(prev->disk_ord_tbl[i])) {
2899 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2900 break;
2901 }
2902
2903 dev->vol.migr_state = 0;
2904 dev->vol.migr_type = 0;
2905 dev->vol.curr_migr_unit = 0;
2906 map->map_state = map_state;
2907 }
2908 #endif
2909
2910 static int parse_raid_devices(struct intel_super *super)
2911 {
2912 int i;
2913 struct imsm_dev *dev_new;
2914 size_t len, len_migr;
2915 size_t max_len = 0;
2916 size_t space_needed = 0;
2917 struct imsm_super *mpb = super->anchor;
2918
2919 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2920 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2921 struct intel_dev *dv;
2922
2923 len = sizeof_imsm_dev(dev_iter, 0);
2924 len_migr = sizeof_imsm_dev(dev_iter, 1);
2925 if (len_migr > len)
2926 space_needed += len_migr - len;
2927
2928 dv = malloc(sizeof(*dv));
2929 if (!dv)
2930 return 1;
2931 if (max_len < len_migr)
2932 max_len = len_migr;
2933 if (max_len > len_migr)
2934 space_needed += max_len - len_migr;
2935 dev_new = malloc(max_len);
2936 if (!dev_new) {
2937 free(dv);
2938 return 1;
2939 }
2940 imsm_copy_dev(dev_new, dev_iter);
2941 dv->dev = dev_new;
2942 dv->index = i;
2943 dv->next = super->devlist;
2944 super->devlist = dv;
2945 }
2946
2947 /* ensure that super->buf is large enough when all raid devices
2948 * are migrating
2949 */
2950 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2951 void *buf;
2952
2953 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2954 if (posix_memalign(&buf, 512, len) != 0)
2955 return 1;
2956
2957 memcpy(buf, super->buf, super->len);
2958 memset(buf + super->len, 0, len - super->len);
2959 free(super->buf);
2960 super->buf = buf;
2961 super->len = len;
2962 }
2963
2964 return 0;
2965 }
2966
2967 /* retrieve a pointer to the bbm log which starts after all raid devices */
2968 struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2969 {
2970 void *ptr = NULL;
2971
2972 if (__le32_to_cpu(mpb->bbm_log_size)) {
2973 ptr = mpb;
2974 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2975 }
2976
2977 return ptr;
2978 }
2979
2980 /*******************************************************************************
2981 * Function: check_mpb_migr_compatibility
2982 * Description: Function checks for unsupported migration features:
2983 * - migration optimization area (pba_of_lba0)
2984 * - descending reshape (ascending_migr)
2985 * Parameters:
2986 * super : imsm metadata information
2987 * Returns:
2988 * 0 : migration is compatible
2989 * -1 : migration is not compatible
2990 ******************************************************************************/
2991 int check_mpb_migr_compatibility(struct intel_super *super)
2992 {
2993 struct imsm_map *map0, *map1;
2994 struct migr_record *migr_rec = super->migr_rec;
2995 int i;
2996
2997 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2998 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2999
3000 if (dev_iter &&
3001 dev_iter->vol.migr_state == 1 &&
3002 dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
3003 /* This device is migrating */
3004 map0 = get_imsm_map(dev_iter, 0);
3005 map1 = get_imsm_map(dev_iter, 1);
3006 if (map0->pba_of_lba0 != map1->pba_of_lba0)
3007 /* migration optimization area was used */
3008 return -1;
3009 if (migr_rec->ascending_migr == 0
3010 && migr_rec->dest_depth_per_unit > 0)
3011 /* descending reshape not supported yet */
3012 return -1;
3013 }
3014 }
3015 return 0;
3016 }
3017
3018 static void __free_imsm(struct intel_super *super, int free_disks);
3019
3020 /* load_imsm_mpb - read matrix metadata
3021 * allocates super->mpb to be freed by free_imsm
3022 */
3023 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
3024 {
3025 unsigned long long dsize;
3026 unsigned long long sectors;
3027 struct stat;
3028 struct imsm_super *anchor;
3029 __u32 check_sum;
3030
3031 get_dev_size(fd, NULL, &dsize);
3032 if (dsize < 1024) {
3033 if (devname)
3034 fprintf(stderr,
3035 Name ": %s: device to small for imsm\n",
3036 devname);
3037 return 1;
3038 }
3039
3040 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
3041 if (devname)
3042 fprintf(stderr,
3043 Name ": Cannot seek to anchor block on %s: %s\n",
3044 devname, strerror(errno));
3045 return 1;
3046 }
3047
3048 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
3049 if (devname)
3050 fprintf(stderr,
3051 Name ": Failed to allocate imsm anchor buffer"
3052 " on %s\n", devname);
3053 return 1;
3054 }
3055 if (read(fd, anchor, 512) != 512) {
3056 if (devname)
3057 fprintf(stderr,
3058 Name ": Cannot read anchor block on %s: %s\n",
3059 devname, strerror(errno));
3060 free(anchor);
3061 return 1;
3062 }
3063
3064 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
3065 if (devname)
3066 fprintf(stderr,
3067 Name ": no IMSM anchor on %s\n", devname);
3068 free(anchor);
3069 return 2;
3070 }
3071
3072 __free_imsm(super, 0);
3073 /* reload capability and hba */
3074
3075 /* capability and hba must be updated with new super allocation */
3076 find_intel_hba_capability(fd, super, devname);
3077 super->len = ROUND_UP(anchor->mpb_size, 512);
3078 if (posix_memalign(&super->buf, 512, super->len) != 0) {
3079 if (devname)
3080 fprintf(stderr,
3081 Name ": unable to allocate %zu byte mpb buffer\n",
3082 super->len);
3083 free(anchor);
3084 return 2;
3085 }
3086 memcpy(super->buf, anchor, 512);
3087
3088 sectors = mpb_sectors(anchor) - 1;
3089 free(anchor);
3090
3091 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3092 fprintf(stderr, Name
3093 ": %s could not allocate migr_rec buffer\n", __func__);
3094 free(super->buf);
3095 return 2;
3096 }
3097
3098 if (!sectors) {
3099 check_sum = __gen_imsm_checksum(super->anchor);
3100 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
3101 if (devname)
3102 fprintf(stderr,
3103 Name ": IMSM checksum %x != %x on %s\n",
3104 check_sum,
3105 __le32_to_cpu(super->anchor->check_sum),
3106 devname);
3107 return 2;
3108 }
3109
3110 return 0;
3111 }
3112
3113 /* read the extended mpb */
3114 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
3115 if (devname)
3116 fprintf(stderr,
3117 Name ": Cannot seek to extended mpb on %s: %s\n",
3118 devname, strerror(errno));
3119 return 1;
3120 }
3121
3122 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
3123 if (devname)
3124 fprintf(stderr,
3125 Name ": Cannot read extended mpb on %s: %s\n",
3126 devname, strerror(errno));
3127 return 2;
3128 }
3129
3130 check_sum = __gen_imsm_checksum(super->anchor);
3131 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
3132 if (devname)
3133 fprintf(stderr,
3134 Name ": IMSM checksum %x != %x on %s\n",
3135 check_sum, __le32_to_cpu(super->anchor->check_sum),
3136 devname);
3137 return 3;
3138 }
3139
3140 /* FIXME the BBM log is disk specific so we cannot use this global
3141 * buffer for all disks. Ok for now since we only look at the global
3142 * bbm_log_size parameter to gate assembly
3143 */
3144 super->bbm_log = __get_imsm_bbm_log(super->anchor);
3145
3146 return 0;
3147 }
3148
3149 static int read_imsm_migr_rec(int fd, struct intel_super *super);
3150
3151 static int
3152 load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
3153 {
3154 int err;
3155
3156 err = load_imsm_mpb(fd, super, devname);
3157 if (err)
3158 return err;
3159 err = load_imsm_disk(fd, super, devname, keep_fd);
3160 if (err)
3161 return err;
3162 err = parse_raid_devices(super);
3163
3164 return err;
3165 }
3166
3167 static void __free_imsm_disk(struct dl *d)
3168 {
3169 if (d->fd >= 0)
3170 close(d->fd);
3171 if (d->devname)
3172 free(d->devname);
3173 if (d->e)
3174 free(d->e);
3175 free(d);
3176
3177 }
3178
3179 static void free_imsm_disks(struct intel_super *super)
3180 {
3181 struct dl *d;
3182
3183 while (super->disks) {
3184 d = super->disks;
3185 super->disks = d->next;
3186 __free_imsm_disk(d);
3187 }
3188 while (super->disk_mgmt_list) {
3189 d = super->disk_mgmt_list;
3190 super->disk_mgmt_list = d->next;
3191 __free_imsm_disk(d);
3192 }
3193 while (super->missing) {
3194 d = super->missing;
3195 super->missing = d->next;
3196 __free_imsm_disk(d);
3197 }
3198
3199 }
3200
3201 /* free all the pieces hanging off of a super pointer */
3202 static void __free_imsm(struct intel_super *super, int free_disks)
3203 {
3204 struct intel_hba *elem, *next;
3205
3206 if (super->buf) {
3207 free(super->buf);
3208 super->buf = NULL;
3209 }
3210 /* unlink capability description */
3211 super->orom = NULL;
3212 if (super->migr_rec_buf) {
3213 free(super->migr_rec_buf);
3214 super->migr_rec_buf = NULL;
3215 }
3216 if (free_disks)
3217 free_imsm_disks(super);
3218 free_devlist(super);
3219 elem = super->hba;
3220 while (elem) {
3221 if (elem->path)
3222 free((void *)elem->path);
3223 next = elem->next;
3224 free(elem);
3225 elem = next;
3226 }
3227 super->hba = NULL;
3228 }
3229
3230 static void free_imsm(struct intel_super *super)
3231 {
3232 __free_imsm(super, 1);
3233 free(super);
3234 }
3235
3236 static void free_super_imsm(struct supertype *st)
3237 {
3238 struct intel_super *super = st->sb;
3239
3240 if (!super)
3241 return;
3242
3243 free_imsm(super);
3244 st->sb = NULL;
3245 }
3246
3247 static struct intel_super *alloc_super(void)
3248 {
3249 struct intel_super *super = malloc(sizeof(*super));
3250
3251 if (super) {
3252 memset(super, 0, sizeof(*super));
3253 super->current_vol = -1;
3254 super->create_offset = ~((__u32 ) 0);
3255 }
3256 return super;
3257 }
3258
3259 /*
3260 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3261 */
3262 static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
3263 {
3264 struct sys_dev *hba_name;
3265 int rv = 0;
3266
3267 if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
3268 super->orom = NULL;
3269 super->hba = NULL;
3270 return 0;
3271 }
3272 hba_name = find_disk_attached_hba(fd, NULL);
3273 if (!hba_name) {
3274 if (devname)
3275 fprintf(stderr,
3276 Name ": %s is not attached to Intel(R) RAID controller.\n",
3277 devname);
3278 return 1;
3279 }
3280 rv = attach_hba_to_super(super, hba_name);
3281 if (rv == 2) {
3282 if (devname) {
3283 struct intel_hba *hba = super->hba;
3284
3285 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
3286 "controller (%s),\n"
3287 " but the container is assigned to Intel(R) "
3288 "%s RAID controller (",
3289 devname,
3290 hba_name->path,
3291 hba_name->pci_id ? : "Err!",
3292 get_sys_dev_type(hba_name->type));
3293
3294 while (hba) {
3295 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
3296 if (hba->next)
3297 fprintf(stderr, ", ");
3298 hba = hba->next;
3299 }
3300
3301 fprintf(stderr, ").\n"
3302 " Mixing devices attached to different controllers "
3303 "is not allowed.\n");
3304 }
3305 free_sys_dev(&hba_name);
3306 return 2;
3307 }
3308 super->orom = find_imsm_capability(hba_name->type);
3309 free_sys_dev(&hba_name);
3310 if (!super->orom)
3311 return 3;
3312 return 0;
3313 }
3314
3315 #ifndef MDASSEMBLE
3316 /* find_missing - helper routine for load_super_imsm_all that identifies
3317 * disks that have disappeared from the system. This routine relies on
3318 * the mpb being uptodate, which it is at load time.
3319 */
3320 static int find_missing(struct intel_super *super)
3321 {
3322 int i;
3323 struct imsm_super *mpb = super->anchor;
3324 struct dl *dl;
3325 struct imsm_disk *disk;
3326
3327 for (i = 0; i < mpb->num_disks; i++) {
3328 disk = __get_imsm_disk(mpb, i);
3329 dl = serial_to_dl(disk->serial, super);
3330 if (dl)
3331 continue;
3332
3333 dl = malloc(sizeof(*dl));
3334 if (!dl)
3335 return 1;
3336 dl->major = 0;
3337 dl->minor = 0;
3338 dl->fd = -1;
3339 dl->devname = strdup("missing");
3340 dl->index = i;
3341 serialcpy(dl->serial, disk->serial);
3342 dl->disk = *disk;
3343 dl->e = NULL;
3344 dl->next = super->missing;
3345 super->missing = dl;
3346 }
3347
3348 return 0;
3349 }
3350
3351 static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
3352 {
3353 struct intel_disk *idisk = disk_list;
3354
3355 while (idisk) {
3356 if (serialcmp(idisk->disk.serial, serial) == 0)
3357 break;
3358 idisk = idisk->next;
3359 }
3360
3361 return idisk;
3362 }
3363
3364 static int __prep_thunderdome(struct intel_super **table, int tbl_size,
3365 struct intel_super *super,
3366 struct intel_disk **disk_list)
3367 {
3368 struct imsm_disk *d = &super->disks->disk;
3369 struct imsm_super *mpb = super->anchor;
3370 int i, j;
3371
3372 for (i = 0; i < tbl_size; i++) {
3373 struct imsm_super *tbl_mpb = table[i]->anchor;
3374 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3375
3376 if (tbl_mpb->family_num == mpb->family_num) {
3377 if (tbl_mpb->check_sum == mpb->check_sum) {
3378 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3379 __func__, super->disks->major,
3380 super->disks->minor,
3381 table[i]->disks->major,
3382 table[i]->disks->minor);
3383 break;
3384 }
3385
3386 if (((is_configured(d) && !is_configured(tbl_d)) ||
3387 is_configured(d) == is_configured(tbl_d)) &&
3388 tbl_mpb->generation_num < mpb->generation_num) {
3389 /* current version of the mpb is a
3390 * better candidate than the one in
3391 * super_table, but copy over "cross
3392 * generational" status
3393 */
3394 struct intel_disk *idisk;
3395
3396 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3397 __func__, super->disks->major,
3398 super->disks->minor,
3399 table[i]->disks->major,
3400 table[i]->disks->minor);
3401
3402 idisk = disk_list_get(tbl_d->serial, *disk_list);
3403 if (idisk && is_failed(&idisk->disk))
3404 tbl_d->status |= FAILED_DISK;
3405 break;
3406 } else {
3407 struct intel_disk *idisk;
3408 struct imsm_disk *disk;
3409
3410 /* tbl_mpb is more up to date, but copy
3411 * over cross generational status before
3412 * returning
3413 */
3414 disk = __serial_to_disk(d->serial, mpb, NULL);
3415 if (disk && is_failed(disk))
3416 d->status |= FAILED_DISK;
3417
3418 idisk = disk_list_get(d->serial, *disk_list);
3419 if (idisk) {
3420 idisk->owner = i;
3421 if (disk && is_configured(disk))
3422 idisk->disk.status |= CONFIGURED_DISK;
3423 }
3424
3425 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3426 __func__, super->disks->major,
3427 super->disks->minor,
3428 table[i]->disks->major,
3429 table[i]->disks->minor);
3430
3431 return tbl_size;
3432 }
3433 }
3434 }
3435
3436 if (i >= tbl_size)
3437 table[tbl_size++] = super;
3438 else
3439 table[i] = super;
3440
3441 /* update/extend the merged list of imsm_disk records */
3442 for (j = 0; j < mpb->num_disks; j++) {
3443 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3444 struct intel_disk *idisk;
3445
3446 idisk = disk_list_get(disk->serial, *disk_list);
3447 if (idisk) {
3448 idisk->disk.status |= disk->status;
3449 if (is_configured(&idisk->disk) ||
3450 is_failed(&idisk->disk))
3451 idisk->disk.status &= ~(SPARE_DISK);
3452 } else {
3453 idisk = calloc(1, sizeof(*idisk));
3454 if (!idisk)
3455 return -1;
3456 idisk->owner = IMSM_UNKNOWN_OWNER;
3457 idisk->disk = *disk;
3458 idisk->next = *disk_list;
3459 *disk_list = idisk;
3460 }
3461
3462 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3463 idisk->owner = i;
3464 }
3465
3466 return tbl_size;
3467 }
3468
3469 static struct intel_super *
3470 validate_members(struct intel_super *super, struct intel_disk *disk_list,
3471 const int owner)
3472 {
3473 struct imsm_super *mpb = super->anchor;
3474 int ok_count = 0;
3475 int i;
3476
3477 for (i = 0; i < mpb->num_disks; i++) {
3478 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3479 struct intel_disk *idisk;
3480
3481 idisk = disk_list_get(disk->serial, disk_list);
3482 if (idisk) {
3483 if (idisk->owner == owner ||
3484 idisk->owner == IMSM_UNKNOWN_OWNER)
3485 ok_count++;
3486 else
3487 dprintf("%s: '%.16s' owner %d != %d\n",
3488 __func__, disk->serial, idisk->owner,
3489 owner);
3490 } else {
3491 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3492 __func__, __le32_to_cpu(mpb->family_num), i,
3493 disk->serial);
3494 break;
3495 }
3496 }
3497
3498 if (ok_count == mpb->num_disks)
3499 return super;
3500 return NULL;
3501 }
3502
3503 static void show_conflicts(__u32 family_num, struct intel_super *super_list)
3504 {
3505 struct intel_super *s;
3506
3507 for (s = super_list; s; s = s->next) {
3508 if (family_num != s->anchor->family_num)
3509 continue;
3510 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
3511 __le32_to_cpu(family_num), s->disks->devname);
3512 }
3513 }
3514
3515 static struct intel_super *
3516 imsm_thunderdome(struct intel_super **super_list, int len)
3517 {
3518 struct intel_super *super_table[len];
3519 struct intel_disk *disk_list = NULL;
3520 struct intel_super *champion, *spare;
3521 struct intel_super *s, **del;
3522 int tbl_size = 0;
3523 int conflict;
3524 int i;
3525
3526 memset(super_table, 0, sizeof(super_table));
3527 for (s = *super_list; s; s = s->next)
3528 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
3529
3530 for (i = 0; i < tbl_size; i++) {
3531 struct imsm_disk *d;
3532 struct intel_disk *idisk;
3533 struct imsm_super *mpb = super_table[i]->anchor;
3534
3535 s = super_table[i];
3536 d = &s->disks->disk;
3537
3538 /* 'd' must appear in merged disk list for its
3539 * configuration to be valid
3540 */
3541 idisk = disk_list_get(d->serial, disk_list);
3542 if (idisk && idisk->owner == i)
3543 s = validate_members(s, disk_list, i);
3544 else
3545 s = NULL;
3546
3547 if (!s)
3548 dprintf("%s: marking family: %#x from %d:%d offline\n",
3549 __func__, mpb->family_num,
3550 super_table[i]->disks->major,
3551 super_table[i]->disks->minor);
3552 super_table[i] = s;
3553 }
3554
3555 /* This is where the mdadm implementation differs from the Windows
3556 * driver which has no strict concept of a container. We can only
3557 * assemble one family from a container, so when returning a prodigal
3558 * array member to this system the code will not be able to disambiguate
3559 * the container contents that should be assembled ("foreign" versus
3560 * "local"). It requires user intervention to set the orig_family_num
3561 * to a new value to establish a new container. The Windows driver in
3562 * this situation fixes up the volume name in place and manages the
3563 * foreign array as an independent entity.
3564 */
3565 s = NULL;
3566 spare = NULL;
3567 conflict = 0;
3568 for (i = 0; i < tbl_size; i++) {
3569 struct intel_super *tbl_ent = super_table[i];
3570 int is_spare = 0;
3571
3572 if (!tbl_ent)
3573 continue;
3574
3575 if (tbl_ent->anchor->num_raid_devs == 0) {
3576 spare = tbl_ent;
3577 is_spare = 1;
3578 }
3579
3580 if (s && !is_spare) {
3581 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3582 conflict++;
3583 } else if (!s && !is_spare)
3584 s = tbl_ent;
3585 }
3586
3587 if (!s)
3588 s = spare;
3589 if (!s) {
3590 champion = NULL;
3591 goto out;
3592 }
3593 champion = s;
3594
3595 if (conflict)
3596 fprintf(stderr, "Chose family %#x on '%s', "
3597 "assemble conflicts to new container with '--update=uuid'\n",
3598 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3599
3600 /* collect all dl's onto 'champion', and update them to
3601 * champion's version of the status
3602 */
3603 for (s = *super_list; s; s = s->next) {
3604 struct imsm_super *mpb = champion->anchor;
3605 struct dl *dl = s->disks;
3606
3607 if (s == champion)
3608 continue;
3609
3610 for (i = 0; i < mpb->num_disks; i++) {
3611 struct imsm_disk *disk;
3612
3613 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3614 if (disk) {
3615 dl->disk = *disk;
3616 /* only set index on disks that are a member of
3617 * a populated contianer, i.e. one with
3618 * raid_devs
3619 */
3620 if (is_failed(&dl->disk))
3621 dl->index = -2;
3622 else if (is_spare(&dl->disk))
3623 dl->index = -1;
3624 break;
3625 }
3626 }
3627
3628 if (i >= mpb->num_disks) {
3629 struct intel_disk *idisk;
3630
3631 idisk = disk_list_get(dl->serial, disk_list);
3632 if (idisk && is_spare(&idisk->disk) &&
3633 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3634 dl->index = -1;
3635 else {
3636 dl->index = -2;
3637 continue;
3638 }
3639 }
3640
3641 dl->next = champion->disks;
3642 champion->disks = dl;
3643 s->disks = NULL;
3644 }
3645
3646 /* delete 'champion' from super_list */
3647 for (del = super_list; *del; ) {
3648 if (*del == champion) {
3649 *del = (*del)->next;
3650 break;
3651 } else
3652 del = &(*del)->next;
3653 }
3654 champion->next = NULL;
3655
3656 out:
3657 while (disk_list) {
3658 struct intel_disk *idisk = disk_list;
3659
3660 disk_list = disk_list->next;
3661 free(idisk);
3662 }
3663
3664 return champion;
3665 }
3666
3667 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
3668 char *devname)
3669 {
3670 struct mdinfo *sra;
3671 struct intel_super *super_list = NULL;
3672 struct intel_super *super = NULL;
3673 int devnum = fd2devnum(fd);
3674 struct mdinfo *sd;
3675 int retry;
3676 int err = 0;
3677 int i;
3678
3679 /* check if 'fd' an opened container */
3680 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
3681 if (!sra)
3682 return 1;
3683
3684 if (sra->array.major_version != -1 ||
3685 sra->array.minor_version != -2 ||
3686 strcmp(sra->text_version, "imsm") != 0) {
3687 err = 1;
3688 goto error;
3689 }
3690 /* load all mpbs */
3691 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
3692 struct intel_super *s = alloc_super();
3693 char nm[32];
3694 int dfd;
3695 int rv;
3696
3697 err = 1;
3698 if (!s)
3699 goto error;
3700 s->next = super_list;
3701 super_list = s;
3702
3703 err = 2;
3704 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3705 dfd = dev_open(nm, O_RDWR);
3706 if (dfd < 0)
3707 goto error;
3708
3709 rv = find_intel_hba_capability(dfd, s, devname);
3710 /* no orom/efi or non-intel hba of the disk */
3711 if (rv != 0)
3712 goto error;
3713
3714 err = load_and_parse_mpb(dfd, s, NULL, 1);
3715
3716 /* retry the load if we might have raced against mdmon */
3717 if (err == 3 && mdmon_running(devnum))
3718 for (retry = 0; retry < 3; retry++) {
3719 usleep(3000);
3720 err = load_and_parse_mpb(dfd, s, NULL, 1);
3721 if (err != 3)
3722 break;
3723 }
3724 if (err)
3725 goto error;
3726 }
3727
3728 /* all mpbs enter, maybe one leaves */
3729 super = imsm_thunderdome(&super_list, i);
3730 if (!super) {
3731 err = 1;
3732 goto error;
3733 }
3734
3735 if (find_missing(super) != 0) {
3736 free_imsm(super);
3737 err = 2;
3738 goto error;
3739 }
3740
3741 /* load migration record */
3742 err = load_imsm_migr_rec(super, NULL);
3743 if (err) {
3744 err = 4;
3745 goto error;
3746 }
3747
3748 /* Check migration compatibility */
3749 if (check_mpb_migr_compatibility(super) != 0) {
3750 fprintf(stderr, Name ": Unsupported migration detected");
3751 if (devname)
3752 fprintf(stderr, " on %s\n", devname);
3753 else
3754 fprintf(stderr, " (IMSM).\n");
3755
3756 err = 5;
3757 goto error;
3758 }
3759
3760 err = 0;
3761
3762 error:
3763 while (super_list) {
3764 struct intel_super *s = super_list;
3765
3766 super_list = super_list->next;
3767 free_imsm(s);
3768 }
3769 sysfs_free(sra);
3770
3771 if (err)
3772 return err;
3773
3774 *sbp = super;
3775 st->container_dev = devnum;
3776 if (err == 0 && st->ss == NULL) {
3777 st->ss = &super_imsm;
3778 st->minor_version = 0;
3779 st->max_devs = IMSM_MAX_DEVICES;
3780 }
3781 return 0;
3782 }
3783
3784 static int load_container_imsm(struct supertype *st, int fd, char *devname)
3785 {
3786 return load_super_imsm_all(st, fd, &st->sb, devname);
3787 }
3788 #endif
3789
3790 static int load_super_imsm(struct supertype *st, int fd, char *devname)
3791 {
3792 struct intel_super *super;
3793 int rv;
3794
3795 if (test_partition(fd))
3796 /* IMSM not allowed on partitions */
3797 return 1;
3798
3799 free_super_imsm(st);
3800
3801 super = alloc_super();
3802 if (!super) {
3803 fprintf(stderr,
3804 Name ": malloc of %zu failed.\n",
3805 sizeof(*super));
3806 return 1;
3807 }
3808 /* Load hba and capabilities if they exist.
3809 * But do not preclude loading metadata in case capabilities or hba are
3810 * non-compliant and ignore_hw_compat is set.
3811 */
3812 rv = find_intel_hba_capability(fd, super, devname);
3813 /* no orom/efi or non-intel hba of the disk */
3814 if ((rv != 0) && (st->ignore_hw_compat == 0)) {
3815 if (devname)
3816 fprintf(stderr,
3817 Name ": No OROM/EFI properties for %s\n", devname);
3818 free_imsm(super);
3819 return 2;
3820 }
3821 rv = load_and_parse_mpb(fd, super, devname, 0);
3822
3823 if (rv) {
3824 if (devname)
3825 fprintf(stderr,
3826 Name ": Failed to load all information "
3827 "sections on %s\n", devname);
3828 free_imsm(super);
3829 return rv;
3830 }
3831
3832 st->sb = super;
3833 if (st->ss == NULL) {
3834 st->ss = &super_imsm;
3835 st->minor_version = 0;
3836 st->max_devs = IMSM_MAX_DEVICES;
3837 }
3838
3839 /* load migration record */
3840 load_imsm_migr_rec(super, NULL);
3841
3842 /* Check for unsupported migration features */
3843 if (check_mpb_migr_compatibility(super) != 0) {
3844 fprintf(stderr, Name ": Unsupported migration detected");
3845 if (devname)
3846 fprintf(stderr, " on %s\n", devname);
3847 else
3848 fprintf(stderr, " (IMSM).\n");
3849 return 3;
3850 }
3851
3852 return 0;
3853 }
3854
3855 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3856 {
3857 if (info->level == 1)
3858 return 128;
3859 return info->chunk_size >> 9;
3860 }
3861
3862 static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
3863 {
3864 __u32 num_stripes;
3865
3866 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
3867 num_stripes /= num_domains;
3868
3869 return num_stripes;
3870 }
3871
3872 static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3873 {
3874 if (info->level == 1)
3875 return info->size * 2;
3876 else
3877 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
3878 }
3879
3880 static void imsm_update_version_info(struct intel_super *super)
3881 {
3882 /* update the version and attributes */
3883 struct imsm_super *mpb = super->anchor;
3884 char *version;
3885 struct imsm_dev *dev;
3886 struct imsm_map *map;
3887 int i;
3888
3889 for (i = 0; i < mpb->num_raid_devs; i++) {
3890 dev = get_imsm_dev(super, i);
3891 map = get_imsm_map(dev, 0);
3892 if (__le32_to_cpu(dev->size_high) > 0)
3893 mpb->attributes |= MPB_ATTRIB_2TB;
3894
3895 /* FIXME detect when an array spans a port multiplier */
3896 #if 0
3897 mpb->attributes |= MPB_ATTRIB_PM;
3898 #endif
3899
3900 if (mpb->num_raid_devs > 1 ||
3901 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3902 version = MPB_VERSION_ATTRIBS;
3903 switch (get_imsm_raid_level(map)) {
3904 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3905 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3906 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3907 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3908 }
3909 } else {
3910 if (map->num_members >= 5)
3911 version = MPB_VERSION_5OR6_DISK_ARRAY;
3912 else if (dev->status == DEV_CLONE_N_GO)
3913 version = MPB_VERSION_CNG;
3914 else if (get_imsm_raid_level(map) == 5)
3915 version = MPB_VERSION_RAID5;
3916 else if (map->num_members >= 3)
3917 version = MPB_VERSION_3OR4_DISK_ARRAY;
3918 else if (get_imsm_raid_level(map) == 1)
3919 version = MPB_VERSION_RAID1;
3920 else
3921 version = MPB_VERSION_RAID0;
3922 }
3923 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3924 }
3925 }
3926
3927 static int check_name(struct intel_super *super, char *name, int quiet)
3928 {
3929 struct imsm_super *mpb = super->anchor;
3930 char *reason = NULL;
3931 int i;
3932
3933 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3934 reason = "must be 16 characters or less";
3935
3936 for (i = 0; i < mpb->num_raid_devs; i++) {
3937 struct imsm_dev *dev = get_imsm_dev(super, i);
3938
3939 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3940 reason = "already exists";
3941 break;
3942 }
3943 }
3944
3945 if (reason && !quiet)
3946 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3947
3948 return !reason;
3949 }
3950
3951 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3952 unsigned long long size, char *name,
3953 char *homehost, int *uuid)
3954 {
3955 /* We are creating a volume inside a pre-existing container.
3956 * so st->sb is already set.
3957 */
3958 struct intel_super *super = st->sb;
3959 struct imsm_super *mpb = super->anchor;
3960 struct intel_dev *dv;
3961 struct imsm_dev *dev;
3962 struct imsm_vol *vol;
3963 struct imsm_map *map;
3964 int idx = mpb->num_raid_devs;
3965 int i;
3966 unsigned long long array_blocks;
3967 size_t size_old, size_new;
3968 __u32 num_data_stripes;
3969
3970 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
3971 fprintf(stderr, Name": This imsm-container already has the "
3972 "maximum of %d volumes\n", super->orom->vpa);
3973 return 0;
3974 }
3975
3976 /* ensure the mpb is large enough for the new data */
3977 size_old = __le32_to_cpu(mpb->mpb_size);
3978 size_new = disks_to_mpb_size(info->nr_disks);
3979 if (size_new > size_old) {
3980 void *mpb_new;
3981 size_t size_round = ROUND_UP(size_new, 512);
3982
3983 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3984 fprintf(stderr, Name": could not allocate new mpb\n");
3985 return 0;
3986 }
3987 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3988 fprintf(stderr, Name
3989 ": %s could not allocate migr_rec buffer\n",
3990 __func__);
3991 free(super->buf);
3992 free(super);
3993 return 0;
3994 }
3995 memcpy(mpb_new, mpb, size_old);
3996 free(mpb);
3997 mpb = mpb_new;
3998 super->anchor = mpb_new;
3999 mpb->mpb_size = __cpu_to_le32(size_new);
4000 memset(mpb_new + size_old, 0, size_round - size_old);
4001 }
4002 super->current_vol = idx;
4003 /* when creating the first raid device in this container set num_disks
4004 * to zero, i.e. delete this spare and add raid member devices in
4005 * add_to_super_imsm_volume()
4006 */
4007 if (super->current_vol == 0)
4008 mpb->num_disks = 0;
4009
4010 if (!check_name(super, name, 0))
4011 return 0;
4012 dv = malloc(sizeof(*dv));
4013 if (!dv) {
4014 fprintf(stderr, Name ": failed to allocate device list entry\n");
4015 return 0;
4016 }
4017 dev = calloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
4018 if (!dev) {
4019 free(dv);
4020 fprintf(stderr, Name": could not allocate raid device\n");
4021 return 0;
4022 }
4023
4024 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
4025 if (info->level == 1)
4026 array_blocks = info_to_blocks_per_member(info);
4027 else
4028 array_blocks = calc_array_size(info->level, info->raid_disks,
4029 info->layout, info->chunk_size,
4030 info->size*2);
4031 /* round array size down to closest MB */
4032 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
4033
4034 dev->size_low = __cpu_to_le32((__u32) array_blocks);
4035 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
4036 dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
4037 vol = &dev->vol;
4038 vol->migr_state = 0;
4039 set_migr_type(dev, MIGR_INIT);
4040 vol->dirty = 0;
4041 vol->curr_migr_unit = 0;
4042 map = get_imsm_map(dev, 0);
4043 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
4044 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
4045 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
4046 map->failed_disk_num = ~0;
4047 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
4048 IMSM_T_STATE_NORMAL;
4049 map->ddf = 1;
4050
4051 if (info->level == 1 && info->raid_disks > 2) {
4052 free(dev);
4053 free(dv);
4054 fprintf(stderr, Name": imsm does not support more than 2 disks"
4055 "in a raid1 volume\n");
4056 return 0;
4057 }
4058
4059 map->raid_level = info->level;
4060 if (info->level == 10) {
4061 map->raid_level = 1;
4062 map->num_domains = info->raid_disks / 2;
4063 } else if (info->level == 1)
4064 map->num_domains = info->raid_disks;
4065 else
4066 map->num_domains = 1;
4067
4068 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
4069 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
4070
4071 map->num_members = info->raid_disks;
4072 for (i = 0; i < map->num_members; i++) {
4073 /* initialized in add_to_super */
4074 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
4075 }
4076 mpb->num_raid_devs++;
4077
4078 dv->dev = dev;
4079 dv->index = super->current_vol;
4080 dv->next = super->devlist;
4081 super->devlist = dv;
4082
4083 imsm_update_version_info(super);
4084
4085 return 1;
4086 }
4087
4088 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
4089 unsigned long long size, char *name,
4090 char *homehost, int *uuid)
4091 {
4092 /* This is primarily called by Create when creating a new array.
4093 * We will then get add_to_super called for each component, and then
4094 * write_init_super called to write it out to each device.
4095 * For IMSM, Create can create on fresh devices or on a pre-existing
4096 * array.
4097 * To create on a pre-existing array a different method will be called.
4098 * This one is just for fresh drives.
4099 */
4100 struct intel_super *super;
4101 struct imsm_super *mpb;
4102 size_t mpb_size;
4103 char *version;
4104
4105 if (st->sb)
4106 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
4107
4108 if (info)
4109 mpb_size = disks_to_mpb_size(info->nr_disks);
4110 else
4111 mpb_size = 512;
4112
4113 super = alloc_super();
4114 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
4115 free(super);
4116 super = NULL;
4117 }
4118 if (!super) {
4119 fprintf(stderr, Name
4120 ": %s could not allocate superblock\n", __func__);
4121 return 0;
4122 }
4123 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
4124 fprintf(stderr, Name
4125 ": %s could not allocate migr_rec buffer\n", __func__);
4126 free(super->buf);
4127 free(super);
4128 return 0;
4129 }
4130 memset(super->buf, 0, mpb_size);
4131 mpb = super->buf;
4132 mpb->mpb_size = __cpu_to_le32(mpb_size);
4133 st->sb = super;
4134
4135 if (info == NULL) {
4136 /* zeroing superblock */
4137 return 0;
4138 }
4139
4140 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4141
4142 version = (char *) mpb->sig;
4143 strcpy(version, MPB_SIGNATURE);
4144 version += strlen(MPB_SIGNATURE);
4145 strcpy(version, MPB_VERSION_RAID0);
4146
4147 return 1;
4148 }
4149
4150 #ifndef MDASSEMBLE
4151 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
4152 int fd, char *devname)
4153 {
4154 struct intel_super *super = st->sb;
4155 struct imsm_super *mpb = super->anchor;
4156 struct dl *dl;
4157 struct imsm_dev *dev;
4158 struct imsm_map *map;
4159 int slot;
4160
4161 dev = get_imsm_dev(super, super->current_vol);
4162 map = get_imsm_map(dev, 0);
4163
4164 if (! (dk->state & (1<<MD_DISK_SYNC))) {
4165 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
4166 devname);
4167 return 1;
4168 }
4169
4170 if (fd == -1) {
4171 /* we're doing autolayout so grab the pre-marked (in
4172 * validate_geometry) raid_disk
4173 */
4174 for (dl = super->disks; dl; dl = dl->next)
4175 if (dl->raiddisk == dk->raid_disk)
4176 break;
4177 } else {
4178 for (dl = super->disks; dl ; dl = dl->next)
4179 if (dl->major == dk->major &&
4180 dl->minor == dk->minor)
4181 break;
4182 }
4183
4184 if (!dl) {
4185 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
4186 return 1;
4187 }
4188
4189 /* add a pristine spare to the metadata */
4190 if (dl->index < 0) {
4191 dl->index = super->anchor->num_disks;
4192 super->anchor->num_disks++;
4193 }
4194 /* Check the device has not already been added */
4195 slot = get_imsm_disk_slot(map, dl->index);
4196 if (slot >= 0 &&
4197 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
4198 fprintf(stderr, Name ": %s has been included in this array twice\n",
4199 devname);
4200 return 1;
4201 }
4202 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
4203 dl->disk.status = CONFIGURED_DISK;
4204
4205 /* if we are creating the first raid device update the family number */
4206 if (super->current_vol == 0) {
4207 __u32 sum;
4208 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
4209 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
4210
4211 if (!_dev || !_disk) {
4212 fprintf(stderr, Name ": BUG mpb setup error\n");
4213 return 1;
4214 }
4215 *_dev = *dev;
4216 *_disk = dl->disk;
4217 sum = random32();
4218 sum += __gen_imsm_checksum(mpb);
4219 mpb->family_num = __cpu_to_le32(sum);
4220 mpb->orig_family_num = mpb->family_num;
4221 }
4222
4223 return 0;
4224 }
4225
4226
4227 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
4228 int fd, char *devname)
4229 {
4230 struct intel_super *super = st->sb;
4231 struct dl *dd;
4232 unsigned long long size;
4233 __u32 id;
4234 int rv;
4235 struct stat stb;
4236
4237 /* If we are on an RAID enabled platform check that the disk is
4238 * attached to the raid controller.
4239 * We do not need to test disks attachment for container based additions,
4240 * they shall be already tested when container was created/assembled.
4241 */
4242 rv = find_intel_hba_capability(fd, super, devname);
4243 /* no orom/efi or non-intel hba of the disk */
4244 if (rv != 0) {
4245 dprintf("capability: %p fd: %d ret: %d\n",
4246 super->orom, fd, rv);
4247 return 1;
4248 }
4249
4250 if (super->current_vol >= 0)
4251 return add_to_super_imsm_volume(st, dk, fd, devname);
4252
4253 fstat(fd, &stb);
4254 dd = malloc(sizeof(*dd));
4255 if (!dd) {
4256 fprintf(stderr,
4257 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4258 return 1;
4259 }
4260 memset(dd, 0, sizeof(*dd));
4261 dd->major = major(stb.st_rdev);
4262 dd->minor = minor(stb.st_rdev);
4263 dd->index = -1;
4264 dd->devname = devname ? strdup(devname) : NULL;
4265 dd->fd = fd;
4266 dd->e = NULL;
4267 dd->action = DISK_ADD;
4268 rv = imsm_read_serial(fd, devname, dd->serial);
4269 if (rv) {
4270 fprintf(stderr,
4271 Name ": failed to retrieve scsi serial, aborting\n");
4272 free(dd);
4273 abort();
4274 }
4275
4276 get_dev_size(fd, NULL, &size);
4277 size /= 512;
4278 serialcpy(dd->disk.serial, dd->serial);
4279 dd->disk.total_blocks = __cpu_to_le32(size);
4280 dd->disk.status = SPARE_DISK;
4281 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
4282 dd->disk.scsi_id = __cpu_to_le32(id);
4283 else
4284 dd->disk.scsi_id = __cpu_to_le32(0);
4285
4286 if (st->update_tail) {
4287 dd->next = super->disk_mgmt_list;
4288 super->disk_mgmt_list = dd;
4289 } else {
4290 dd->next = super->disks;
4291 super->disks = dd;
4292 super->updates_pending++;
4293 }
4294
4295 return 0;
4296 }
4297
4298
4299 static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
4300 {
4301 struct intel_super *super = st->sb;
4302 struct dl *dd;
4303
4304 /* remove from super works only in mdmon - for communication
4305 * manager - monitor. Check if communication memory buffer
4306 * is prepared.
4307 */
4308 if (!st->update_tail) {
4309 fprintf(stderr,
4310 Name ": %s shall be used in mdmon context only"
4311 "(line %d).\n", __func__, __LINE__);
4312 return 1;
4313 }
4314 dd = malloc(sizeof(*dd));
4315 if (!dd) {
4316 fprintf(stderr,
4317 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4318 return 1;
4319 }
4320 memset(dd, 0, sizeof(*dd));
4321 dd->major = dk->major;
4322 dd->minor = dk->minor;
4323 dd->index = -1;
4324 dd->fd = -1;
4325 dd->disk.status = SPARE_DISK;
4326 dd->action = DISK_REMOVE;
4327
4328 dd->next = super->disk_mgmt_list;
4329 super->disk_mgmt_list = dd;
4330
4331
4332 return 0;
4333 }
4334
4335 static int store_imsm_mpb(int fd, struct imsm_super *mpb);
4336
4337 static union {
4338 char buf[512];
4339 struct imsm_super anchor;
4340 } spare_record __attribute__ ((aligned(512)));
4341
4342 /* spare records have their own family number and do not have any defined raid
4343 * devices
4344 */
4345 static int write_super_imsm_spares(struct intel_super *super, int doclose)
4346 {
4347 struct imsm_super *mpb = super->anchor;
4348 struct imsm_super *spare = &spare_record.anchor;
4349 __u32 sum;
4350 struct dl *d;
4351
4352 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
4353 spare->generation_num = __cpu_to_le32(1UL),
4354 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4355 spare->num_disks = 1,
4356 spare->num_raid_devs = 0,
4357 spare->cache_size = mpb->cache_size,
4358 spare->pwr_cycle_count = __cpu_to_le32(1),
4359
4360 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
4361 MPB_SIGNATURE MPB_VERSION_RAID0);
4362
4363 for (d = super->disks; d; d = d->next) {
4364 if (d->index != -1)
4365 continue;
4366
4367 spare->disk[0] = d->disk;
4368 sum = __gen_imsm_checksum(spare);
4369 spare->family_num = __cpu_to_le32(sum);
4370 spare->orig_family_num = 0;
4371 sum = __gen_imsm_checksum(spare);
4372 spare->check_sum = __cpu_to_le32(sum);
4373
4374 if (store_imsm_mpb(d->fd, spare)) {
4375 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4376 __func__, d->major, d->minor, strerror(errno));
4377 return 1;
4378 }
4379 if (doclose) {
4380 close(d->fd);
4381 d->fd = -1;
4382 }
4383 }
4384
4385 return 0;
4386 }
4387
4388 static int is_gen_migration(struct imsm_dev *dev);
4389
4390 static int write_super_imsm(struct supertype *st, int doclose)
4391 {
4392 struct intel_super *super = st->sb;
4393 struct imsm_super *mpb = super->anchor;
4394 struct dl *d;
4395 __u32 generation;
4396 __u32 sum;
4397 int spares = 0;
4398 int i;
4399 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
4400 int num_disks = 0;
4401 int clear_migration_record = 1;
4402
4403 /* 'generation' is incremented everytime the metadata is written */
4404 generation = __le32_to_cpu(mpb->generation_num);
4405 generation++;
4406 mpb->generation_num = __cpu_to_le32(generation);
4407
4408 /* fix up cases where previous mdadm releases failed to set
4409 * orig_family_num
4410 */
4411 if (mpb->orig_family_num == 0)
4412 mpb->orig_family_num = mpb->family_num;
4413
4414 for (d = super->disks; d; d = d->next) {
4415 if (d->index == -1)
4416 spares++;
4417 else {
4418 mpb->disk[d->index] = d->disk;
4419 num_disks++;
4420 }
4421 }
4422 for (d = super->missing; d; d = d->next) {
4423 mpb->disk[d->index] = d->disk;
4424 num_disks++;
4425 }
4426 mpb->num_disks = num_disks;
4427 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
4428
4429 for (i = 0; i < mpb->num_raid_devs; i++) {
4430 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
4431 struct imsm_dev *dev2 = get_imsm_dev(super, i);
4432 if (dev && dev2) {
4433 imsm_copy_dev(dev, dev2);
4434 mpb_size += sizeof_imsm_dev(dev, 0);
4435 }
4436 if (is_gen_migration(dev2))
4437 clear_migration_record = 0;
4438 }
4439 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
4440 mpb->mpb_size = __cpu_to_le32(mpb_size);
4441
4442 /* recalculate checksum */
4443 sum = __gen_imsm_checksum(mpb);
4444 mpb->check_sum = __cpu_to_le32(sum);
4445
4446 if (clear_migration_record)
4447 memset(super->migr_rec_buf, 0, 512);
4448
4449 /* write the mpb for disks that compose raid devices */
4450 for (d = super->disks; d ; d = d->next) {
4451 if (d->index < 0)
4452 continue;
4453 if (store_imsm_mpb(d->fd, mpb))
4454 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4455 __func__, d->major, d->minor, strerror(errno));
4456 if (clear_migration_record) {
4457 unsigned long long dsize;
4458
4459 get_dev_size(d->fd, NULL, &dsize);
4460 if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
4461 write(d->fd, super->migr_rec_buf, 512);
4462 }
4463 }
4464 if (doclose) {
4465 close(d->fd);
4466 d->fd = -1;
4467 }
4468 }
4469
4470 if (spares)
4471 return write_super_imsm_spares(super, doclose);
4472
4473 return 0;
4474 }
4475
4476
4477 static int create_array(struct supertype *st, int dev_idx)
4478 {
4479 size_t len;
4480 struct imsm_update_create_array *u;
4481 struct intel_super *super = st->sb;
4482 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
4483 struct imsm_map *map = get_imsm_map(dev, 0);
4484 struct disk_info *inf;
4485 struct imsm_disk *disk;
4486 int i;
4487
4488 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
4489 sizeof(*inf) * map->num_members;
4490 u = malloc(len);
4491 if (!u) {
4492 fprintf(stderr, "%s: failed to allocate update buffer\n",
4493 __func__);
4494 return 1;
4495 }
4496
4497 u->type = update_create_array;
4498 u->dev_idx = dev_idx;
4499 imsm_copy_dev(&u->dev, dev);
4500 inf = get_disk_info(u);
4501 for (i = 0; i < map->num_members; i++) {
4502 int idx = get_imsm_disk_idx(dev, i, -1);
4503
4504 disk = get_imsm_disk(super, idx);
4505 serialcpy(inf[i].serial, disk->serial);
4506 }
4507 append_metadata_update(st, u, len);
4508
4509 return 0;
4510 }
4511
4512 static int mgmt_disk(struct supertype *st)
4513 {
4514 struct intel_super *super = st->sb;
4515 size_t len;
4516 struct imsm_update_add_remove_disk *u;
4517
4518 if (!super->disk_mgmt_list)
4519 return 0;
4520
4521 len = sizeof(*u);
4522 u = malloc(len);
4523 if (!u) {
4524 fprintf(stderr, "%s: failed to allocate update buffer\n",
4525 __func__);
4526 return 1;
4527 }
4528
4529 u->type = update_add_remove_disk;
4530 append_metadata_update(st, u, len);
4531
4532 return 0;
4533 }
4534
4535 static int write_init_super_imsm(struct supertype *st)
4536 {
4537 struct intel_super *super = st->sb;
4538 int current_vol = super->current_vol;
4539
4540 /* we are done with current_vol reset it to point st at the container */
4541 super->current_vol = -1;
4542
4543 if (st->update_tail) {
4544 /* queue the recently created array / added disk
4545 * as a metadata update */
4546 int rv;
4547
4548 /* determine if we are creating a volume or adding a disk */
4549 if (current_vol < 0) {
4550 /* in the mgmt (add/remove) disk case we are running
4551 * in mdmon context, so don't close fd's
4552 */
4553 return mgmt_disk(st);
4554 } else
4555 rv = create_array(st, current_vol);
4556
4557 return rv;
4558 } else {
4559 struct dl *d;
4560 for (d = super->disks; d; d = d->next)
4561 Kill(d->devname, NULL, 0, 1, 1);
4562 return write_super_imsm(st, 1);
4563 }
4564 }
4565 #endif
4566
4567 static int store_super_imsm(struct supertype *st, int fd)
4568 {
4569 struct intel_super *super = st->sb;
4570 struct imsm_super *mpb = super ? super->anchor : NULL;
4571
4572 if (!mpb)
4573 return 1;
4574
4575 #ifndef MDASSEMBLE
4576 return store_imsm_mpb(fd, mpb);
4577 #else
4578 return 1;
4579 #endif
4580 }
4581
4582 static int imsm_bbm_log_size(struct imsm_super *mpb)
4583 {
4584 return __le32_to_cpu(mpb->bbm_log_size);
4585 }
4586
4587 #ifndef MDASSEMBLE
4588 static int validate_geometry_imsm_container(struct supertype *st, int level,
4589 int layout, int raiddisks, int chunk,
4590 unsigned long long size, char *dev,
4591 unsigned long long *freesize,
4592 int verbose)
4593 {
4594 int fd;
4595 unsigned long long ldsize;
4596 struct intel_super *super=NULL;
4597 int rv = 0;
4598
4599 if (level != LEVEL_CONTAINER)
4600 return 0;
4601 if (!dev)
4602 return 1;
4603
4604 fd = open(dev, O_RDONLY|O_EXCL, 0);
4605 if (fd < 0) {
4606 if (verbose)
4607 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4608 dev, strerror(errno));
4609 return 0;
4610 }
4611 if (!get_dev_size(fd, dev, &ldsize)) {
4612 close(fd);
4613 return 0;
4614 }
4615
4616 /* capabilities retrieve could be possible
4617 * note that there is no fd for the disks in array.
4618 */
4619 super = alloc_super();
4620 if (!super) {
4621 fprintf(stderr,
4622 Name ": malloc of %zu failed.\n",
4623 sizeof(*super));
4624 close(fd);
4625 return 0;
4626 }
4627
4628 rv = find_intel_hba_capability(fd, super, verbose ? dev : NULL);
4629 if (rv != 0) {
4630 #if DEBUG
4631 char str[256];
4632 fd2devname(fd, str);
4633 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4634 fd, str, super->orom, rv, raiddisks);
4635 #endif
4636 /* no orom/efi or non-intel hba of the disk */
4637 close(fd);
4638 free_imsm(super);
4639 return 0;
4640 }
4641 close(fd);
4642 if (super->orom && raiddisks > super->orom->tds) {
4643 if (verbose)
4644 fprintf(stderr, Name ": %d exceeds maximum number of"
4645 " platform supported disks: %d\n",
4646 raiddisks, super->orom->tds);
4647
4648 free_imsm(super);
4649 return 0;
4650 }
4651
4652 *freesize = avail_size_imsm(st, ldsize >> 9);
4653 free_imsm(super);
4654
4655 return 1;
4656 }
4657
4658 static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4659 {
4660 const unsigned long long base_start = e[*idx].start;
4661 unsigned long long end = base_start + e[*idx].size;
4662 int i;
4663
4664 if (base_start == end)
4665 return 0;
4666
4667 *idx = *idx + 1;
4668 for (i = *idx; i < num_extents; i++) {
4669 /* extend overlapping extents */
4670 if (e[i].start >= base_start &&
4671 e[i].start <= end) {
4672 if (e[i].size == 0)
4673 return 0;
4674 if (e[i].start + e[i].size > end)
4675 end = e[i].start + e[i].size;
4676 } else if (e[i].start > end) {
4677 *idx = i;
4678 break;
4679 }
4680 }
4681
4682 return end - base_start;
4683 }
4684
4685 static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4686 {
4687 /* build a composite disk with all known extents and generate a new
4688 * 'maxsize' given the "all disks in an array must share a common start
4689 * offset" constraint
4690 */
4691 struct extent *e = calloc(sum_extents, sizeof(*e));
4692 struct dl *dl;
4693 int i, j;
4694 int start_extent;
4695 unsigned long long pos;
4696 unsigned long long start = 0;
4697 unsigned long long maxsize;
4698 unsigned long reserve;
4699
4700 if (!e)
4701 return 0;
4702
4703 /* coalesce and sort all extents. also, check to see if we need to
4704 * reserve space between member arrays
4705 */
4706 j = 0;
4707 for (dl = super->disks; dl; dl = dl->next) {
4708 if (!dl->e)
4709 continue;
4710 for (i = 0; i < dl->extent_cnt; i++)
4711 e[j++] = dl->e[i];
4712 }
4713 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4714
4715 /* merge extents */
4716 i = 0;
4717 j = 0;
4718 while (i < sum_extents) {
4719 e[j].start = e[i].start;
4720 e[j].size = find_size(e, &i, sum_extents);
4721 j++;
4722 if (e[j-1].size == 0)
4723 break;
4724 }
4725
4726 pos = 0;
4727 maxsize = 0;
4728 start_extent = 0;
4729 i = 0;
4730 do {
4731 unsigned long long esize;
4732
4733 esize = e[i].start - pos;
4734 if (esize >= maxsize) {
4735 maxsize = esize;
4736 start = pos;
4737 start_extent = i;
4738 }
4739 pos = e[i].start + e[i].size;
4740 i++;
4741 } while (e[i-1].size);
4742 free(e);
4743
4744 if (maxsize == 0)
4745 return 0;
4746
4747 /* FIXME assumes volume at offset 0 is the first volume in a
4748 * container
4749 */
4750 if (start_extent > 0)
4751 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4752 else
4753 reserve = 0;
4754
4755 if (maxsize < reserve)
4756 return 0;
4757
4758 super->create_offset = ~((__u32) 0);
4759 if (start + reserve > super->create_offset)
4760 return 0; /* start overflows create_offset */
4761 super->create_offset = start + reserve;
4762
4763 return maxsize - reserve;
4764 }
4765
4766 static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4767 {
4768 if (level < 0 || level == 6 || level == 4)
4769 return 0;
4770
4771 /* if we have an orom prevent invalid raid levels */
4772 if (orom)
4773 switch (level) {
4774 case 0: return imsm_orom_has_raid0(orom);
4775 case 1:
4776 if (raiddisks > 2)
4777 return imsm_orom_has_raid1e(orom);
4778 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4779 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4780 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
4781 }
4782 else
4783 return 1; /* not on an Intel RAID platform so anything goes */
4784
4785 return 0;
4786 }
4787
4788
4789 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4790 /*
4791 * validate volume parameters with OROM/EFI capabilities
4792 */
4793 static int
4794 validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
4795 int raiddisks, int *chunk, int verbose)
4796 {
4797 #if DEBUG
4798 verbose = 1;
4799 #endif
4800 /* validate container capabilities */
4801 if (super->orom && raiddisks > super->orom->tds) {
4802 if (verbose)
4803 fprintf(stderr, Name ": %d exceeds maximum number of"
4804 " platform supported disks: %d\n",
4805 raiddisks, super->orom->tds);
4806 return 0;
4807 }
4808
4809 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4810 if (super->orom && (!is_raid_level_supported(super->orom, level,
4811 raiddisks))) {
4812 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4813 level, raiddisks, raiddisks > 1 ? "s" : "");
4814 return 0;
4815 }
4816 if (super->orom && level != 1) {
4817 if (chunk && (*chunk == 0 || *chunk == UnSet))
4818 *chunk = imsm_orom_default_chunk(super->orom);
4819 else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
4820 pr_vrb(": platform does not support a chunk size of: "
4821 "%d\n", *chunk);
4822 return 0;
4823 }
4824 }
4825 if (layout != imsm_level_to_layout(level)) {
4826 if (level == 5)
4827 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4828 else if (level == 10)
4829 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4830 else
4831 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4832 layout, level);
4833 return 0;
4834 }
4835 return 1;
4836 }
4837
4838 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4839 * FIX ME add ahci details
4840 */
4841 static int validate_geometry_imsm_volume(struct supertype *st, int level,
4842 int layout, int raiddisks, int *chunk,
4843 unsigned long long size, char *dev,
4844 unsigned long long *freesize,
4845 int verbose)
4846 {
4847 struct stat stb;
4848 struct intel_super *super = st->sb;
4849 struct imsm_super *mpb = super->anchor;
4850 struct dl *dl;
4851 unsigned long long pos = 0;
4852 unsigned long long maxsize;
4853 struct extent *e;
4854 int i;
4855
4856 /* We must have the container info already read in. */
4857 if (!super)
4858 return 0;
4859
4860 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
4861 fprintf(stderr, Name ": RAID gemetry validation failed. "
4862 "Cannot proceed with the action(s).\n");
4863 return 0;
4864 }
4865 if (!dev) {
4866 /* General test: make sure there is space for
4867 * 'raiddisks' device extents of size 'size' at a given
4868 * offset
4869 */
4870 unsigned long long minsize = size;
4871 unsigned long long start_offset = MaxSector;
4872 int dcnt = 0;
4873 if (minsize == 0)
4874 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4875 for (dl = super->disks; dl ; dl = dl->next) {
4876 int found = 0;
4877
4878 pos = 0;
4879 i = 0;
4880 e = get_extents(super, dl);
4881 if (!e) continue;
4882 do {
4883 unsigned long long esize;
4884 esize = e[i].start - pos;
4885 if (esize >= minsize)
4886 found = 1;
4887 if (found && start_offset == MaxSector) {
4888 start_offset = pos;
4889 break;
4890 } else if (found && pos != start_offset) {
4891 found = 0;
4892 break;
4893 }
4894 pos = e[i].start + e[i].size;
4895 i++;
4896 } while (e[i-1].size);
4897 if (found)
4898 dcnt++;
4899 free(e);
4900 }
4901 if (dcnt < raiddisks) {
4902 if (verbose)
4903 fprintf(stderr, Name ": imsm: Not enough "
4904 "devices with space for this array "
4905 "(%d < %d)\n",
4906 dcnt, raiddisks);
4907 return 0;
4908 }
4909 return 1;
4910 }
4911
4912 /* This device must be a member of the set */
4913 if (stat(dev, &stb) < 0)
4914 return 0;
4915 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4916 return 0;
4917 for (dl = super->disks ; dl ; dl = dl->next) {
4918 if (dl->major == (int)major(stb.st_rdev) &&
4919 dl->minor == (int)minor(stb.st_rdev))
4920 break;
4921 }
4922 if (!dl) {
4923 if (verbose)
4924 fprintf(stderr, Name ": %s is not in the "
4925 "same imsm set\n", dev);
4926 return 0;
4927 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4928 /* If a volume is present then the current creation attempt
4929 * cannot incorporate new spares because the orom may not
4930 * understand this configuration (all member disks must be
4931 * members of each array in the container).
4932 */
4933 fprintf(stderr, Name ": %s is a spare and a volume"
4934 " is already defined for this container\n", dev);
4935 fprintf(stderr, Name ": The option-rom requires all member"
4936 " disks to be a member of all volumes\n");
4937 return 0;
4938 }
4939
4940 /* retrieve the largest free space block */
4941 e = get_extents(super, dl);
4942 maxsize = 0;
4943 i = 0;
4944 if (e) {
4945 do {
4946 unsigned long long esize;
4947
4948 esize = e[i].start - pos;
4949 if (esize >= maxsize)
4950 maxsize = esize;
4951 pos = e[i].start + e[i].size;
4952 i++;
4953 } while (e[i-1].size);
4954 dl->e = e;
4955 dl->extent_cnt = i;
4956 } else {
4957 if (verbose)
4958 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4959 dev);
4960 return 0;
4961 }
4962 if (maxsize < size) {
4963 if (verbose)
4964 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4965 dev, maxsize, size);
4966 return 0;
4967 }
4968
4969 /* count total number of extents for merge */
4970 i = 0;
4971 for (dl = super->disks; dl; dl = dl->next)
4972 if (dl->e)
4973 i += dl->extent_cnt;
4974
4975 maxsize = merge_extents(super, i);
4976 if (maxsize < size || maxsize == 0) {
4977 if (verbose)
4978 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4979 maxsize, size);
4980 return 0;
4981 }
4982
4983 *freesize = maxsize;
4984
4985 return 1;
4986 }
4987
4988 static int reserve_space(struct supertype *st, int raiddisks,
4989 unsigned long long size, int chunk,
4990 unsigned long long *freesize)
4991 {
4992 struct intel_super *super = st->sb;
4993 struct imsm_super *mpb = super->anchor;
4994 struct dl *dl;
4995 int i;
4996 int extent_cnt;
4997 struct extent *e;
4998 unsigned long long maxsize;
4999 unsigned long long minsize;
5000 int cnt;
5001 int used;
5002
5003 /* find the largest common start free region of the possible disks */
5004 used = 0;
5005 extent_cnt = 0;
5006 cnt = 0;
5007 for (dl = super->disks; dl; dl = dl->next) {
5008 dl->raiddisk = -1;
5009
5010 if (dl->index >= 0)
5011 used++;
5012
5013 /* don't activate new spares if we are orom constrained
5014 * and there is already a volume active in the container
5015 */
5016 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
5017 continue;
5018
5019 e = get_extents(super, dl);
5020 if (!e)
5021 continue;
5022 for (i = 1; e[i-1].size; i++)
5023 ;
5024 dl->e = e;
5025 dl->extent_cnt = i;
5026 extent_cnt += i;
5027 cnt++;
5028 }
5029
5030 maxsize = merge_extents(super, extent_cnt);
5031 minsize = size;
5032 if (size == 0)
5033 /* chunk is in K */
5034 minsize = chunk * 2;
5035
5036 if (cnt < raiddisks ||
5037 (super->orom && used && used != raiddisks) ||
5038 maxsize < minsize ||
5039 maxsize == 0) {
5040 fprintf(stderr, Name ": not enough devices with space to create array.\n");
5041 return 0; /* No enough free spaces large enough */
5042 }
5043
5044 if (size == 0) {
5045 size = maxsize;
5046 if (chunk) {
5047 size /= 2 * chunk;
5048 size *= 2 * chunk;
5049 }
5050 }
5051
5052 cnt = 0;
5053 for (dl = super->disks; dl; dl = dl->next)
5054 if (dl->e)
5055 dl->raiddisk = cnt++;
5056
5057 *freesize = size;
5058
5059 return 1;
5060 }
5061
5062 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
5063 int raiddisks, int *chunk, unsigned long long size,
5064 char *dev, unsigned long long *freesize,
5065 int verbose)
5066 {
5067 int fd, cfd;
5068 struct mdinfo *sra;
5069 int is_member = 0;
5070
5071 /* load capability
5072 * if given unused devices create a container
5073 * if given given devices in a container create a member volume
5074 */
5075 if (level == LEVEL_CONTAINER) {
5076 /* Must be a fresh device to add to a container */
5077 return validate_geometry_imsm_container(st, level, layout,
5078 raiddisks,
5079 chunk?*chunk:0, size,
5080 dev, freesize,
5081 verbose);
5082 }
5083
5084 if (!dev) {
5085 if (st->sb && freesize) {
5086 /* we are being asked to automatically layout a
5087 * new volume based on the current contents of
5088 * the container. If the the parameters can be
5089 * satisfied reserve_space will record the disks,
5090 * start offset, and size of the volume to be
5091 * created. add_to_super and getinfo_super
5092 * detect when autolayout is in progress.
5093 */
5094 if (!validate_geometry_imsm_orom(st->sb, level, layout,
5095 raiddisks, chunk,
5096 verbose))
5097 return 0;
5098 return reserve_space(st, raiddisks, size,
5099 chunk?*chunk:0, freesize);
5100 }
5101 return 1;
5102 }
5103 if (st->sb) {
5104 /* creating in a given container */
5105 return validate_geometry_imsm_volume(st, level, layout,
5106 raiddisks, chunk, size,
5107 dev, freesize, verbose);
5108 }
5109
5110 /* This device needs to be a device in an 'imsm' container */
5111 fd = open(dev, O_RDONLY|O_EXCL, 0);
5112 if (fd >= 0) {
5113 if (verbose)
5114 fprintf(stderr,
5115 Name ": Cannot create this array on device %s\n",
5116 dev);
5117 close(fd);
5118 return 0;
5119 }
5120 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
5121 if (verbose)
5122 fprintf(stderr, Name ": Cannot open %s: %s\n",
5123 dev, strerror(errno));
5124 return 0;
5125 }
5126 /* Well, it is in use by someone, maybe an 'imsm' container. */
5127 cfd = open_container(fd);
5128 close(fd);
5129 if (cfd < 0) {
5130 if (verbose)
5131 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
5132 dev);
5133 return 0;
5134 }
5135 sra = sysfs_read(cfd, 0, GET_VERSION);
5136 if (sra && sra->array.major_version == -1 &&
5137 strcmp(sra->text_version, "imsm") == 0)
5138 is_member = 1;
5139 sysfs_free(sra);
5140 if (is_member) {
5141 /* This is a member of a imsm container. Load the container
5142 * and try to create a volume
5143 */
5144 struct intel_super *super;
5145
5146 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
5147 st->sb = super;
5148 st->container_dev = fd2devnum(cfd);
5149 close(cfd);
5150 return validate_geometry_imsm_volume(st, level, layout,
5151 raiddisks, chunk,
5152 size, dev,
5153 freesize, verbose);
5154 }
5155 }
5156
5157 if (verbose)
5158 fprintf(stderr, Name ": failed container membership check\n");
5159
5160 close(cfd);
5161 return 0;
5162 }
5163
5164 static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
5165 {
5166 struct intel_super *super = st->sb;
5167
5168 if (level && *level == UnSet)
5169 *level = LEVEL_CONTAINER;
5170
5171 if (level && layout && *layout == UnSet)
5172 *layout = imsm_level_to_layout(*level);
5173
5174 if (chunk && (*chunk == UnSet || *chunk == 0) &&
5175 super && super->orom)
5176 *chunk = imsm_orom_default_chunk(super->orom);
5177 }
5178
5179 static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
5180
5181 static int kill_subarray_imsm(struct supertype *st)
5182 {
5183 /* remove the subarray currently referenced by ->current_vol */
5184 __u8 i;
5185 struct intel_dev **dp;
5186 struct intel_super *super = st->sb;
5187 __u8 current_vol = super->current_vol;
5188 struct imsm_super *mpb = super->anchor;
5189
5190 if (super->current_vol < 0)
5191 return 2;
5192 super->current_vol = -1; /* invalidate subarray cursor */
5193
5194 /* block deletions that would change the uuid of active subarrays
5195 *
5196 * FIXME when immutable ids are available, but note that we'll
5197 * also need to fixup the invalidated/active subarray indexes in
5198 * mdstat
5199 */
5200 for (i = 0; i < mpb->num_raid_devs; i++) {
5201 char subarray[4];
5202
5203 if (i < current_vol)
5204 continue;
5205 sprintf(subarray, "%u", i);
5206 if (is_subarray_active(subarray, st->devname)) {
5207 fprintf(stderr,
5208 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
5209 current_vol, i);
5210
5211 return 2;
5212 }
5213 }
5214
5215 if (st->update_tail) {
5216 struct imsm_update_kill_array *u = malloc(sizeof(*u));
5217
5218 if (!u)
5219 return 2;
5220 u->type = update_kill_array;
5221 u->dev_idx = current_vol;
5222 append_metadata_update(st, u, sizeof(*u));
5223
5224 return 0;
5225 }
5226
5227 for (dp = &super->devlist; *dp;)
5228 if ((*dp)->index == current_vol) {
5229 *dp = (*dp)->next;
5230 } else {
5231 handle_missing(super, (*dp)->dev);
5232 if ((*dp)->index > current_vol)
5233 (*dp)->index--;
5234 dp = &(*dp)->next;
5235 }
5236
5237 /* no more raid devices, all active components are now spares,
5238 * but of course failed are still failed
5239 */
5240 if (--mpb->num_raid_devs == 0) {
5241 struct dl *d;
5242
5243 for (d = super->disks; d; d = d->next)
5244 if (d->index > -2) {
5245 d->index = -1;
5246 d->disk.status = SPARE_DISK;
5247 }
5248 }
5249
5250 super->updates_pending++;
5251
5252 return 0;
5253 }
5254
5255 static int update_subarray_imsm(struct supertype *st, char *subarray,
5256 char *update, struct mddev_ident *ident)
5257 {
5258 /* update the subarray currently referenced by ->current_vol */
5259 struct intel_super *super = st->sb;
5260 struct imsm_super *mpb = super->anchor;
5261
5262 if (strcmp(update, "name") == 0) {
5263 char *name = ident->name;
5264 char *ep;
5265 int vol;
5266
5267 if (is_subarray_active(subarray, st->devname)) {
5268 fprintf(stderr,
5269 Name ": Unable to update name of active subarray\n");
5270 return 2;
5271 }
5272
5273 if (!check_name(super, name, 0))
5274 return 2;
5275
5276 vol = strtoul(subarray, &ep, 10);
5277 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
5278 return 2;
5279
5280 if (st->update_tail) {
5281 struct imsm_update_rename_array *u = malloc(sizeof(*u));
5282
5283 if (!u)
5284 return 2;
5285 u->type = update_rename_array;
5286 u->dev_idx = vol;
5287 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
5288 append_metadata_update(st, u, sizeof(*u));
5289 } else {
5290 struct imsm_dev *dev;
5291 int i;
5292
5293 dev = get_imsm_dev(super, vol);
5294 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
5295 for (i = 0; i < mpb->num_raid_devs; i++) {
5296 dev = get_imsm_dev(super, i);
5297 handle_missing(super, dev);
5298 }
5299 super->updates_pending++;
5300 }
5301 } else
5302 return 2;
5303
5304 return 0;
5305 }
5306
5307 static int is_gen_migration(struct imsm_dev *dev)
5308 {
5309 if (!dev->vol.migr_state)
5310 return 0;
5311
5312 if (migr_type(dev) == MIGR_GEN_MIGR)
5313 return 1;
5314
5315 return 0;
5316 }
5317 #endif /* MDASSEMBLE */
5318
5319 static int is_rebuilding(struct imsm_dev *dev)
5320 {
5321 struct imsm_map *migr_map;
5322
5323 if (!dev->vol.migr_state)
5324 return 0;
5325
5326 if (migr_type(dev) != MIGR_REBUILD)
5327 return 0;
5328
5329 migr_map = get_imsm_map(dev, 1);
5330
5331 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
5332 return 1;
5333 else
5334 return 0;
5335 }
5336
5337 static void update_recovery_start(struct intel_super *super,
5338 struct imsm_dev *dev,
5339 struct mdinfo *array)
5340 {
5341 struct mdinfo *rebuild = NULL;
5342 struct mdinfo *d;
5343 __u32 units;
5344
5345 if (!is_rebuilding(dev))
5346 return;
5347
5348 /* Find the rebuild target, but punt on the dual rebuild case */
5349 for (d = array->devs; d; d = d->next)
5350 if (d->recovery_start == 0) {
5351 if (rebuild)
5352 return;
5353 rebuild = d;
5354 }
5355
5356 if (!rebuild) {
5357 /* (?) none of the disks are marked with
5358 * IMSM_ORD_REBUILD, so assume they are missing and the
5359 * disk_ord_tbl was not correctly updated
5360 */
5361 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
5362 return;
5363 }
5364
5365 units = __le32_to_cpu(dev->vol.curr_migr_unit);
5366 rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
5367 }
5368
5369 static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
5370
5371 static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
5372 {
5373 /* Given a container loaded by load_super_imsm_all,
5374 * extract information about all the arrays into
5375 * an mdinfo tree.
5376 * If 'subarray' is given, just extract info about that array.
5377 *
5378 * For each imsm_dev create an mdinfo, fill it in,
5379 * then look for matching devices in super->disks
5380 * and create appropriate device mdinfo.
5381 */
5382 struct intel_super *super = st->sb;
5383 struct imsm_super *mpb = super->anchor;
5384 struct mdinfo *rest = NULL;
5385 unsigned int i;
5386 int bbm_errors = 0;
5387 struct dl *d;
5388 int spare_disks = 0;
5389
5390 /* check for bad blocks */
5391 if (imsm_bbm_log_size(super->anchor))
5392 bbm_errors = 1;
5393
5394 /* count spare devices, not used in maps
5395 */
5396 for (d = super->disks; d; d = d->next)
5397 if (d->index == -1)
5398 spare_disks++;
5399
5400 for (i = 0; i < mpb->num_raid_devs; i++) {
5401 struct imsm_dev *dev;
5402 struct imsm_map *map;
5403 struct imsm_map *map2;
5404 struct mdinfo *this;
5405 int slot, chunk;
5406 char *ep;
5407
5408 if (subarray &&
5409 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
5410 continue;
5411
5412 dev = get_imsm_dev(super, i);
5413 map = get_imsm_map(dev, 0);
5414 map2 = get_imsm_map(dev, 1);
5415
5416 /* do not publish arrays that are in the middle of an
5417 * unsupported migration
5418 */
5419 if (dev->vol.migr_state &&
5420 (migr_type(dev) == MIGR_STATE_CHANGE)) {
5421 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
5422 " unsupported migration in progress\n",
5423 dev->volume);
5424 continue;
5425 }
5426 /* do not publish arrays that are not support by controller's
5427 * OROM/EFI
5428 */
5429
5430 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
5431 #ifndef MDASSEMBLE
5432 if (!validate_geometry_imsm_orom(super,
5433 get_imsm_raid_level(map), /* RAID level */
5434 imsm_level_to_layout(get_imsm_raid_level(map)),
5435 map->num_members, /* raid disks */
5436 &chunk,
5437 1 /* verbose */)) {
5438 fprintf(stderr, Name ": RAID gemetry validation failed. "
5439 "Cannot proceed with the action(s).\n");
5440 continue;
5441 }
5442 #endif /* MDASSEMBLE */
5443 this = malloc(sizeof(*this));
5444 if (!this) {
5445 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
5446 sizeof(*this));
5447 break;
5448 }
5449 memset(this, 0, sizeof(*this));
5450 this->next = rest;
5451
5452 super->current_vol = i;
5453 getinfo_super_imsm_volume(st, this, NULL);
5454 for (slot = 0 ; slot < map->num_members; slot++) {
5455 unsigned long long recovery_start;
5456 struct mdinfo *info_d;
5457 struct dl *d;
5458 int idx;
5459 int skip;
5460 __u32 ord;
5461
5462 skip = 0;
5463 idx = get_imsm_disk_idx(dev, slot, 0);
5464 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
5465 for (d = super->disks; d ; d = d->next)
5466 if (d->index == idx)
5467 break;
5468
5469 recovery_start = MaxSector;
5470 if (d == NULL)
5471 skip = 1;
5472 if (d && is_failed(&d->disk))
5473 skip = 1;
5474 if (ord & IMSM_ORD_REBUILD)
5475 recovery_start = 0;
5476
5477 /*
5478 * if we skip some disks the array will be assmebled degraded;
5479 * reset resync start to avoid a dirty-degraded
5480 * situation when performing the intial sync
5481 *
5482 * FIXME handle dirty degraded
5483 */
5484 if ((skip || recovery_start == 0) && !dev->vol.dirty)
5485 this->resync_start = MaxSector;
5486 if (skip)
5487 continue;
5488
5489 info_d = calloc(1, sizeof(*info_d));
5490 if (!info_d) {
5491 fprintf(stderr, Name ": failed to allocate disk"
5492 " for volume %.16s\n", dev->volume);
5493 info_d = this->devs;
5494 while (info_d) {
5495 struct mdinfo *d = info_d->next;
5496
5497 free(info_d);
5498 info_d = d;
5499 }
5500 free(this);
5501 this = rest;
5502 break;
5503 }
5504 info_d->next = this->devs;
5505 this->devs = info_d;
5506
5507 info_d->disk.number = d->index;
5508 info_d->disk.major = d->major;
5509 info_d->disk.minor = d->minor;
5510 info_d->disk.raid_disk = slot;
5511 info_d->recovery_start = recovery_start;
5512 if (map2) {
5513 if (slot < map2->num_members)
5514 info_d->disk.state = (1 << MD_DISK_ACTIVE);
5515 else
5516 this->array.spare_disks++;
5517 } else {
5518 if (slot < map->num_members)
5519 info_d->disk.state = (1 << MD_DISK_ACTIVE);
5520 else
5521 this->array.spare_disks++;
5522 }
5523 if (info_d->recovery_start == MaxSector)
5524 this->array.working_disks++;
5525
5526 info_d->events = __le32_to_cpu(mpb->generation_num);
5527 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
5528 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
5529 }
5530 /* now that the disk list is up-to-date fixup recovery_start */
5531 update_recovery_start(super, dev, this);
5532 this->array.spare_disks += spare_disks;
5533
5534 /* check for reshape */
5535 if (this->reshape_active == 1)
5536 recover_backup_imsm(st, this);
5537
5538 rest = this;
5539 }
5540
5541 /* if array has bad blocks, set suitable bit in array status */
5542 if (bbm_errors)
5543 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
5544
5545 return rest;
5546 }
5547
5548
5549 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
5550 {
5551 struct imsm_map *map = get_imsm_map(dev, 0);
5552
5553 if (!failed)
5554 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
5555 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
5556
5557 switch (get_imsm_raid_level(map)) {
5558 case 0:
5559 return IMSM_T_STATE_FAILED;
5560 break;
5561 case 1:
5562 if (failed < map->num_members)
5563 return IMSM_T_STATE_DEGRADED;
5564 else
5565 return IMSM_T_STATE_FAILED;
5566 break;
5567 case 10:
5568 {
5569 /**
5570 * check to see if any mirrors have failed, otherwise we
5571 * are degraded. Even numbered slots are mirrored on
5572 * slot+1
5573 */
5574 int i;
5575 /* gcc -Os complains that this is unused */
5576 int insync = insync;
5577
5578 for (i = 0; i < map->num_members; i++) {
5579 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
5580 int idx = ord_to_idx(ord);
5581 struct imsm_disk *disk;
5582
5583 /* reset the potential in-sync count on even-numbered
5584 * slots. num_copies is always 2 for imsm raid10
5585 */
5586 if ((i & 1) == 0)
5587 insync = 2;
5588
5589 disk = get_imsm_disk(super, idx);
5590 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
5591 insync--;
5592
5593 /* no in-sync disks left in this mirror the
5594 * array has failed
5595 */
5596 if (insync == 0)
5597 return IMSM_T_STATE_FAILED;
5598 }
5599
5600 return IMSM_T_STATE_DEGRADED;
5601 }
5602 case 5:
5603 if (failed < 2)
5604 return IMSM_T_STATE_DEGRADED;
5605 else
5606 return IMSM_T_STATE_FAILED;
5607 break;
5608 default:
5609 break;
5610 }
5611
5612 return map->map_state;
5613 }
5614
5615 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
5616 {
5617 int i;
5618 int failed = 0;
5619 struct imsm_disk *disk;
5620 struct imsm_map *map = get_imsm_map(dev, 0);
5621 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
5622 __u32 ord;
5623 int idx;
5624
5625 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5626 * disks that are being rebuilt. New failures are recorded to
5627 * map[0]. So we look through all the disks we started with and
5628 * see if any failures are still present, or if any new ones
5629 * have arrived
5630 *
5631 * FIXME add support for online capacity expansion and
5632 * raid-level-migration
5633 */
5634 for (i = 0; i < prev->num_members; i++) {
5635 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
5636 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
5637 idx = ord_to_idx(ord);
5638
5639 disk = get_imsm_disk(super, idx);
5640 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
5641 failed++;
5642 }
5643
5644 return failed;
5645 }
5646
5647 #ifndef MDASSEMBLE
5648 static int imsm_open_new(struct supertype *c, struct active_array *a,
5649 char *inst)
5650 {
5651 struct intel_super *super = c->sb;
5652 struct imsm_super *mpb = super->anchor;
5653
5654 if (atoi(inst) >= mpb->num_raid_devs) {
5655 fprintf(stderr, "%s: subarry index %d, out of range\n",
5656 __func__, atoi(inst));
5657 return -ENODEV;
5658 }
5659
5660 dprintf("imsm: open_new %s\n", inst);
5661 a->info.container_member = atoi(inst);
5662 return 0;
5663 }
5664
5665 static int is_resyncing(struct imsm_dev *dev)
5666 {
5667 struct imsm_map *migr_map;
5668
5669 if (!dev->vol.migr_state)
5670 return 0;
5671
5672 if (migr_type(dev) == MIGR_INIT ||
5673 migr_type(dev) == MIGR_REPAIR)
5674 return 1;
5675
5676 if (migr_type(dev) == MIGR_GEN_MIGR)
5677 return 0;
5678
5679 migr_map = get_imsm_map(dev, 1);
5680
5681 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
5682 (dev->vol.migr_type != MIGR_GEN_MIGR))
5683 return 1;
5684 else
5685 return 0;
5686 }
5687
5688 /* return true if we recorded new information */
5689 static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5690 {
5691 __u32 ord;
5692 int slot;
5693 struct imsm_map *map;
5694
5695 /* new failures are always set in map[0] */
5696 map = get_imsm_map(dev, 0);
5697
5698 slot = get_imsm_disk_slot(map, idx);
5699 if (slot < 0)
5700 return 0;
5701
5702 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
5703 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
5704 return 0;
5705
5706 disk->status |= FAILED_DISK;
5707 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
5708 if (map->failed_disk_num == 0xff)
5709 map->failed_disk_num = slot;
5710 return 1;
5711 }
5712
5713 static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5714 {
5715 mark_failure(dev, disk, idx);
5716
5717 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5718 return;
5719
5720 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5721 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5722 }
5723
5724 static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5725 {
5726 __u8 map_state;
5727 struct dl *dl;
5728 int failed;
5729
5730 if (!super->missing)
5731 return;
5732 failed = imsm_count_failed(super, dev);
5733 map_state = imsm_check_degraded(super, dev, failed);
5734
5735 dprintf("imsm: mark missing\n");
5736 end_migration(dev, map_state);
5737 for (dl = super->missing; dl; dl = dl->next)
5738 mark_missing(dev, &dl->disk, dl->index);
5739 super->updates_pending++;
5740 }
5741
5742 static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5743 {
5744 int used_disks = imsm_num_data_members(dev, 0);
5745 unsigned long long array_blocks;
5746 struct imsm_map *map;
5747
5748 if (used_disks == 0) {
5749 /* when problems occures
5750 * return current array_blocks value
5751 */
5752 array_blocks = __le32_to_cpu(dev->size_high);
5753 array_blocks = array_blocks << 32;
5754 array_blocks += __le32_to_cpu(dev->size_low);
5755
5756 return array_blocks;
5757 }
5758
5759 /* set array size in metadata
5760 */
5761 map = get_imsm_map(dev, 0);
5762 array_blocks = map->blocks_per_member * used_disks;
5763
5764 /* round array size down to closest MB
5765 */
5766 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5767 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5768 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5769
5770 return array_blocks;
5771 }
5772
5773 static void imsm_set_disk(struct active_array *a, int n, int state);
5774
5775 static void imsm_progress_container_reshape(struct intel_super *super)
5776 {
5777 /* if no device has a migr_state, but some device has a
5778 * different number of members than the previous device, start
5779 * changing the number of devices in this device to match
5780 * previous.
5781 */
5782 struct imsm_super *mpb = super->anchor;
5783 int prev_disks = -1;
5784 int i;
5785 int copy_map_size;
5786
5787 for (i = 0; i < mpb->num_raid_devs; i++) {
5788 struct imsm_dev *dev = get_imsm_dev(super, i);
5789 struct imsm_map *map = get_imsm_map(dev, 0);
5790 struct imsm_map *map2;
5791 int prev_num_members;
5792
5793 if (dev->vol.migr_state)
5794 return;
5795
5796 if (prev_disks == -1)
5797 prev_disks = map->num_members;
5798 if (prev_disks == map->num_members)
5799 continue;
5800
5801 /* OK, this array needs to enter reshape mode.
5802 * i.e it needs a migr_state
5803 */
5804
5805 copy_map_size = sizeof_imsm_map(map);
5806 prev_num_members = map->num_members;
5807 map->num_members = prev_disks;
5808 dev->vol.migr_state = 1;
5809 dev->vol.curr_migr_unit = 0;
5810 dev->vol.migr_type = MIGR_GEN_MIGR;
5811 for (i = prev_num_members;
5812 i < map->num_members; i++)
5813 set_imsm_ord_tbl_ent(map, i, i);
5814 map2 = get_imsm_map(dev, 1);
5815 /* Copy the current map */
5816 memcpy(map2, map, copy_map_size);
5817 map2->num_members = prev_num_members;
5818
5819 imsm_set_array_size(dev);
5820 super->updates_pending++;
5821 }
5822 }
5823
5824 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5825 * states are handled in imsm_set_disk() with one exception, when a
5826 * resync is stopped due to a new failure this routine will set the
5827 * 'degraded' state for the array.
5828 */
5829 static int imsm_set_array_state(struct active_array *a, int consistent)
5830 {
5831 int inst = a->info.container_member;
5832 struct intel_super *super = a->container->sb;
5833 struct imsm_dev *dev = get_imsm_dev(super, inst);
5834 struct imsm_map *map = get_imsm_map(dev, 0);
5835 int failed = imsm_count_failed(super, dev);
5836 __u8 map_state = imsm_check_degraded(super, dev, failed);
5837 __u32 blocks_per_unit;
5838
5839 if (dev->vol.migr_state &&
5840 dev->vol.migr_type == MIGR_GEN_MIGR) {
5841 /* array state change is blocked due to reshape action
5842 * We might need to
5843 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5844 * - finish the reshape (if last_checkpoint is big and action != reshape)
5845 * - update curr_migr_unit
5846 */
5847 if (a->curr_action == reshape) {
5848 /* still reshaping, maybe update curr_migr_unit */
5849 goto mark_checkpoint;
5850 } else {
5851 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5852 /* for some reason we aborted the reshape.
5853 * Better clean up
5854 */
5855 struct imsm_map *map2 = get_imsm_map(dev, 1);
5856 dev->vol.migr_state = 0;
5857 dev->vol.migr_type = 0;
5858 dev->vol.curr_migr_unit = 0;
5859 memcpy(map, map2, sizeof_imsm_map(map2));
5860 super->updates_pending++;
5861 }
5862 if (a->last_checkpoint >= a->info.component_size) {
5863 unsigned long long array_blocks;
5864 int used_disks;
5865 struct mdinfo *mdi;
5866
5867 used_disks = imsm_num_data_members(dev, 0);
5868 if (used_disks > 0) {
5869 array_blocks =
5870 map->blocks_per_member *
5871 used_disks;
5872 /* round array size down to closest MB
5873 */
5874 array_blocks = (array_blocks
5875 >> SECT_PER_MB_SHIFT)
5876 << SECT_PER_MB_SHIFT;
5877 a->info.custom_array_size = array_blocks;
5878 /* encourage manager to update array
5879 * size
5880 */
5881
5882 a->check_reshape = 1;
5883 }
5884 /* finalize online capacity expansion/reshape */
5885 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5886 imsm_set_disk(a,
5887 mdi->disk.raid_disk,
5888 mdi->curr_state);
5889
5890 imsm_progress_container_reshape(super);
5891 }
5892 }
5893 }
5894
5895 /* before we activate this array handle any missing disks */
5896 if (consistent == 2)
5897 handle_missing(super, dev);
5898
5899 if (consistent == 2 &&
5900 (!is_resync_complete(&a->info) ||
5901 map_state != IMSM_T_STATE_NORMAL ||
5902 dev->vol.migr_state))
5903 consistent = 0;
5904
5905 if (is_resync_complete(&a->info)) {
5906 /* complete intialization / resync,
5907 * recovery and interrupted recovery is completed in
5908 * ->set_disk
5909 */
5910 if (is_resyncing(dev)) {
5911 dprintf("imsm: mark resync done\n");
5912 end_migration(dev, map_state);
5913 super->updates_pending++;
5914 a->last_checkpoint = 0;
5915 }
5916 } else if (!is_resyncing(dev) && !failed) {
5917 /* mark the start of the init process if nothing is failed */
5918 dprintf("imsm: mark resync start\n");
5919 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
5920 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
5921 else
5922 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
5923 super->updates_pending++;
5924 }
5925
5926 mark_checkpoint:
5927 /* skip checkpointing for general migration,
5928 * it is controlled in mdadm
5929 */
5930 if (is_gen_migration(dev))
5931 goto skip_mark_checkpoint;
5932
5933 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5934 blocks_per_unit = blocks_per_migr_unit(super, dev);
5935 if (blocks_per_unit) {
5936 __u32 units32;
5937 __u64 units;
5938
5939 units = a->last_checkpoint / blocks_per_unit;
5940 units32 = units;
5941
5942 /* check that we did not overflow 32-bits, and that
5943 * curr_migr_unit needs updating
5944 */
5945 if (units32 == units &&
5946 units32 != 0 &&
5947 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5948 dprintf("imsm: mark checkpoint (%u)\n", units32);
5949 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5950 super->updates_pending++;
5951 }
5952 }
5953
5954 skip_mark_checkpoint:
5955 /* mark dirty / clean */
5956 if (dev->vol.dirty != !consistent) {
5957 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
5958 if (consistent)
5959 dev->vol.dirty = 0;
5960 else
5961 dev->vol.dirty = 1;
5962 super->updates_pending++;
5963 }
5964
5965 return consistent;
5966 }
5967
5968 static void imsm_set_disk(struct active_array *a, int n, int state)
5969 {
5970 int inst = a->info.container_member;
5971 struct intel_super *super = a->container->sb;
5972 struct imsm_dev *dev = get_imsm_dev(super, inst);
5973 struct imsm_map *map = get_imsm_map(dev, 0);
5974 struct imsm_disk *disk;
5975 int failed;
5976 __u32 ord;
5977 __u8 map_state;
5978
5979 if (n > map->num_members)
5980 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5981 n, map->num_members - 1);
5982
5983 if (n < 0)
5984 return;
5985
5986 dprintf("imsm: set_disk %d:%x\n", n, state);
5987
5988 ord = get_imsm_ord_tbl_ent(dev, n, -1);
5989 disk = get_imsm_disk(super, ord_to_idx(ord));
5990
5991 /* check for new failures */
5992 if (state & DS_FAULTY) {
5993 if (mark_failure(dev, disk, ord_to_idx(ord)))
5994 super->updates_pending++;
5995 }
5996
5997 /* check if in_sync */
5998 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
5999 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6000
6001 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
6002 super->updates_pending++;
6003 }
6004
6005 failed = imsm_count_failed(super, dev);
6006 map_state = imsm_check_degraded(super, dev, failed);
6007
6008 /* check if recovery complete, newly degraded, or failed */
6009 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
6010 end_migration(dev, map_state);
6011 map = get_imsm_map(dev, 0);
6012 map->failed_disk_num = ~0;
6013 super->updates_pending++;
6014 a->last_checkpoint = 0;
6015 } else if (map_state == IMSM_T_STATE_DEGRADED &&
6016 map->map_state != map_state &&
6017 !dev->vol.migr_state) {
6018 dprintf("imsm: mark degraded\n");
6019 map->map_state = map_state;
6020 super->updates_pending++;
6021 a->last_checkpoint = 0;
6022 } else if (map_state == IMSM_T_STATE_FAILED &&
6023 map->map_state != map_state) {
6024 dprintf("imsm: mark failed\n");
6025 end_migration(dev, map_state);
6026 super->updates_pending++;
6027 a->last_checkpoint = 0;
6028 } else if (is_gen_migration(dev)) {
6029 dprintf("imsm: Detected General Migration in state: ");
6030 if (map_state == IMSM_T_STATE_NORMAL) {
6031 end_migration(dev, map_state);
6032 map = get_imsm_map(dev, 0);
6033 map->failed_disk_num = ~0;
6034 dprintf("normal\n");
6035 } else {
6036 if (map_state == IMSM_T_STATE_DEGRADED) {
6037 printf("degraded\n");
6038 end_migration(dev, map_state);
6039 } else {
6040 dprintf("failed\n");
6041 }
6042 map->map_state = map_state;
6043 }
6044 super->updates_pending++;
6045 }
6046 }
6047
6048 static int store_imsm_mpb(int fd, struct imsm_super *mpb)
6049 {
6050 void *buf = mpb;
6051 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
6052 unsigned long long dsize;
6053 unsigned long long sectors;
6054
6055 get_dev_size(fd, NULL, &dsize);
6056
6057 if (mpb_size > 512) {
6058 /* -1 to account for anchor */
6059 sectors = mpb_sectors(mpb) - 1;
6060
6061 /* write the extended mpb to the sectors preceeding the anchor */
6062 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
6063 return 1;
6064
6065 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
6066 != 512 * sectors)
6067 return 1;
6068 }
6069
6070 /* first block is stored on second to last sector of the disk */
6071 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
6072 return 1;
6073
6074 if (write(fd, buf, 512) != 512)
6075 return 1;
6076
6077 return 0;
6078 }
6079
6080 static void imsm_sync_metadata(struct supertype *container)
6081 {
6082 struct intel_super *super = container->sb;
6083
6084 dprintf("sync metadata: %d\n", super->updates_pending);
6085 if (!super->updates_pending)
6086 return;
6087
6088 write_super_imsm(container, 0);
6089
6090 super->updates_pending = 0;
6091 }
6092
6093 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
6094 {
6095 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
6096 int i = get_imsm_disk_idx(dev, idx, -1);
6097 struct dl *dl;
6098
6099 for (dl = super->disks; dl; dl = dl->next)
6100 if (dl->index == i)
6101 break;
6102
6103 if (dl && is_failed(&dl->disk))
6104 dl = NULL;
6105
6106 if (dl)
6107 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
6108
6109 return dl;
6110 }
6111
6112 static struct dl *imsm_add_spare(struct intel_super *super, int slot,
6113 struct active_array *a, int activate_new,
6114 struct mdinfo *additional_test_list)
6115 {
6116 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
6117 int idx = get_imsm_disk_idx(dev, slot, -1);
6118 struct imsm_super *mpb = super->anchor;
6119 struct imsm_map *map;
6120 unsigned long long pos;
6121 struct mdinfo *d;
6122 struct extent *ex;
6123 int i, j;
6124 int found;
6125 __u32 array_start = 0;
6126 __u32 array_end = 0;
6127 struct dl *dl;
6128 struct mdinfo *test_list;
6129
6130 for (dl = super->disks; dl; dl = dl->next) {
6131 /* If in this array, skip */
6132 for (d = a->info.devs ; d ; d = d->next)
6133 if (d->state_fd >= 0 &&
6134 d->disk.major == dl->major &&
6135 d->disk.minor == dl->minor) {
6136 dprintf("%x:%x already in array\n",
6137 dl->major, dl->minor);
6138 break;
6139 }
6140 if (d)
6141 continue;
6142 test_list = additional_test_list;
6143 while (test_list) {
6144 if (test_list->disk.major == dl->major &&
6145 test_list->disk.minor == dl->minor) {
6146 dprintf("%x:%x already in additional test list\n",
6147 dl->major, dl->minor);
6148 break;
6149 }
6150 test_list = test_list->next;
6151 }
6152 if (test_list)
6153 continue;
6154
6155 /* skip in use or failed drives */
6156 if (is_failed(&dl->disk) || idx == dl->index ||
6157 dl->index == -2) {
6158 dprintf("%x:%x status (failed: %d index: %d)\n",
6159 dl->major, dl->minor, is_failed(&dl->disk), idx);
6160 continue;
6161 }
6162
6163 /* skip pure spares when we are looking for partially
6164 * assimilated drives
6165 */
6166 if (dl->index == -1 && !activate_new)
6167 continue;
6168
6169 /* Does this unused device have the requisite free space?
6170 * It needs to be able to cover all member volumes
6171 */
6172 ex = get_extents(super, dl);
6173 if (!ex) {
6174 dprintf("cannot get extents\n");
6175 continue;
6176 }
6177 for (i = 0; i < mpb->num_raid_devs; i++) {
6178 dev = get_imsm_dev(super, i);
6179 map = get_imsm_map(dev, 0);
6180
6181 /* check if this disk is already a member of
6182 * this array
6183 */
6184 if (get_imsm_disk_slot(map, dl->index) >= 0)
6185 continue;
6186
6187 found = 0;
6188 j = 0;
6189 pos = 0;
6190 array_start = __le32_to_cpu(map->pba_of_lba0);
6191 array_end = array_start +
6192 __le32_to_cpu(map->blocks_per_member) - 1;
6193
6194 do {
6195 /* check that we can start at pba_of_lba0 with
6196 * blocks_per_member of space
6197 */
6198 if (array_start >= pos && array_end < ex[j].start) {
6199 found = 1;
6200 break;
6201 }
6202 pos = ex[j].start + ex[j].size;
6203 j++;
6204 } while (ex[j-1].size);
6205
6206 if (!found)
6207 break;
6208 }
6209
6210 free(ex);
6211 if (i < mpb->num_raid_devs) {
6212 dprintf("%x:%x does not have %u to %u available\n",
6213 dl->major, dl->minor, array_start, array_end);
6214 /* No room */
6215 continue;
6216 }
6217 return dl;
6218 }
6219
6220 return dl;
6221 }
6222
6223
6224 static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
6225 {
6226 struct imsm_dev *dev2;
6227 struct imsm_map *map;
6228 struct dl *idisk;
6229 int slot;
6230 int idx;
6231 __u8 state;
6232
6233 dev2 = get_imsm_dev(cont->sb, dev_idx);
6234 if (dev2) {
6235 state = imsm_check_degraded(cont->sb, dev2, failed);
6236 if (state == IMSM_T_STATE_FAILED) {
6237 map = get_imsm_map(dev2, 0);
6238 if (!map)
6239 return 1;
6240 for (slot = 0; slot < map->num_members; slot++) {
6241 /*
6242 * Check if failed disks are deleted from intel
6243 * disk list or are marked to be deleted
6244 */
6245 idx = get_imsm_disk_idx(dev2, slot, -1);
6246 idisk = get_imsm_dl_disk(cont->sb, idx);
6247 /*
6248 * Do not rebuild the array if failed disks
6249 * from failed sub-array are not removed from
6250 * container.
6251 */
6252 if (idisk &&
6253 is_failed(&idisk->disk) &&
6254 (idisk->action != DISK_REMOVE))
6255 return 0;
6256 }
6257 }
6258 }
6259 return 1;
6260 }
6261
6262 static struct mdinfo *imsm_activate_spare(struct active_array *a,
6263 struct metadata_update **updates)
6264 {
6265 /**
6266 * Find a device with unused free space and use it to replace a
6267 * failed/vacant region in an array. We replace failed regions one a
6268 * array at a time. The result is that a new spare disk will be added
6269 * to the first failed array and after the monitor has finished
6270 * propagating failures the remainder will be consumed.
6271 *
6272 * FIXME add a capability for mdmon to request spares from another
6273 * container.
6274 */
6275
6276 struct intel_super *super = a->container->sb;
6277 int inst = a->info.container_member;
6278 struct imsm_dev *dev = get_imsm_dev(super, inst);
6279 struct imsm_map *map = get_imsm_map(dev, 0);
6280 int failed = a->info.array.raid_disks;
6281 struct mdinfo *rv = NULL;
6282 struct mdinfo *d;
6283 struct mdinfo *di;
6284 struct metadata_update *mu;
6285 struct dl *dl;
6286 struct imsm_update_activate_spare *u;
6287 int num_spares = 0;
6288 int i;
6289 int allowed;
6290
6291 for (d = a->info.devs ; d ; d = d->next) {
6292 if ((d->curr_state & DS_FAULTY) &&
6293 d->state_fd >= 0)
6294 /* wait for Removal to happen */
6295 return NULL;
6296 if (d->state_fd >= 0)
6297 failed--;
6298 }
6299
6300 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6301 inst, failed, a->info.array.raid_disks, a->info.array.level);
6302
6303 if (dev->vol.migr_state &&
6304 dev->vol.migr_type == MIGR_GEN_MIGR)
6305 /* No repair during migration */
6306 return NULL;
6307
6308 if (a->info.array.level == 4)
6309 /* No repair for takeovered array
6310 * imsm doesn't support raid4
6311 */
6312 return NULL;
6313
6314 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
6315 return NULL;
6316
6317 /*
6318 * If there are any failed disks check state of the other volume.
6319 * Block rebuild if the another one is failed until failed disks
6320 * are removed from container.
6321 */
6322 if (failed) {
6323 dprintf("found failed disks in %s, check if there another"
6324 "failed sub-array.\n",
6325 dev->volume);
6326 /* check if states of the other volumes allow for rebuild */
6327 for (i = 0; i < super->anchor->num_raid_devs; i++) {
6328 if (i != inst) {
6329 allowed = imsm_rebuild_allowed(a->container,
6330 i, failed);
6331 if (!allowed)
6332 return NULL;
6333 }
6334 }
6335 }
6336
6337 /* For each slot, if it is not working, find a spare */
6338 for (i = 0; i < a->info.array.raid_disks; i++) {
6339 for (d = a->info.devs ; d ; d = d->next)
6340 if (d->disk.raid_disk == i)
6341 break;
6342 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
6343 if (d && (d->state_fd >= 0))
6344 continue;
6345
6346 /*
6347 * OK, this device needs recovery. Try to re-add the
6348 * previous occupant of this slot, if this fails see if
6349 * we can continue the assimilation of a spare that was
6350 * partially assimilated, finally try to activate a new
6351 * spare.
6352 */
6353 dl = imsm_readd(super, i, a);
6354 if (!dl)
6355 dl = imsm_add_spare(super, i, a, 0, NULL);
6356 if (!dl)
6357 dl = imsm_add_spare(super, i, a, 1, NULL);
6358 if (!dl)
6359 continue;
6360
6361 /* found a usable disk with enough space */
6362 di = malloc(sizeof(*di));
6363 if (!di)
6364 continue;
6365 memset(di, 0, sizeof(*di));
6366
6367 /* dl->index will be -1 in the case we are activating a
6368 * pristine spare. imsm_process_update() will create a
6369 * new index in this case. Once a disk is found to be
6370 * failed in all member arrays it is kicked from the
6371 * metadata
6372 */
6373 di->disk.number = dl->index;
6374
6375 /* (ab)use di->devs to store a pointer to the device
6376 * we chose
6377 */
6378 di->devs = (struct mdinfo *) dl;
6379
6380 di->disk.raid_disk = i;
6381 di->disk.major = dl->major;
6382 di->disk.minor = dl->minor;
6383 di->disk.state = 0;
6384 di->recovery_start = 0;
6385 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
6386 di->component_size = a->info.component_size;
6387 di->container_member = inst;
6388 super->random = random32();
6389 di->next = rv;
6390 rv = di;
6391 num_spares++;
6392 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
6393 i, di->data_offset);
6394
6395 break;
6396 }
6397
6398 if (!rv)
6399 /* No spares found */
6400 return rv;
6401 /* Now 'rv' has a list of devices to return.
6402 * Create a metadata_update record to update the
6403 * disk_ord_tbl for the array
6404 */
6405 mu = malloc(sizeof(*mu));
6406 if (mu) {
6407 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
6408 if (mu->buf == NULL) {
6409 free(mu);
6410 mu = NULL;
6411 }
6412 }
6413 if (!mu) {
6414 while (rv) {
6415 struct mdinfo *n = rv->next;
6416
6417 free(rv);
6418 rv = n;
6419 }
6420 return NULL;
6421 }
6422
6423 mu->space = NULL;
6424 mu->space_list = NULL;
6425 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
6426 mu->next = *updates;
6427 u = (struct imsm_update_activate_spare *) mu->buf;
6428
6429 for (di = rv ; di ; di = di->next) {
6430 u->type = update_activate_spare;
6431 u->dl = (struct dl *) di->devs;
6432 di->devs = NULL;
6433 u->slot = di->disk.raid_disk;
6434 u->array = inst;
6435 u->next = u + 1;
6436 u++;
6437 }
6438 (u-1)->next = NULL;
6439 *updates = mu;
6440
6441 return rv;
6442 }
6443
6444 static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
6445 {
6446 struct imsm_dev *dev = get_imsm_dev(super, idx);
6447 struct imsm_map *map = get_imsm_map(dev, 0);
6448 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
6449 struct disk_info *inf = get_disk_info(u);
6450 struct imsm_disk *disk;
6451 int i;
6452 int j;
6453
6454 for (i = 0; i < map->num_members; i++) {
6455 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
6456 for (j = 0; j < new_map->num_members; j++)
6457 if (serialcmp(disk->serial, inf[j].serial) == 0)
6458 return 1;
6459 }
6460
6461 return 0;
6462 }
6463
6464
6465 static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
6466 {
6467 struct dl *dl = NULL;
6468 for (dl = super->disks; dl; dl = dl->next)
6469 if ((dl->major == major) && (dl->minor == minor))
6470 return dl;
6471 return NULL;
6472 }
6473
6474 static int remove_disk_super(struct intel_super *super, int major, int minor)
6475 {
6476 struct dl *prev = NULL;
6477 struct dl *dl;
6478
6479 prev = NULL;
6480 for (dl = super->disks; dl; dl = dl->next) {
6481 if ((dl->major == major) && (dl->minor == minor)) {
6482 /* remove */
6483 if (prev)
6484 prev->next = dl->next;
6485 else
6486 super->disks = dl->next;
6487 dl->next = NULL;
6488 __free_imsm_disk(dl);
6489 dprintf("%s: removed %x:%x\n",
6490 __func__, major, minor);
6491 break;
6492 }
6493 prev = dl;
6494 }
6495 return 0;
6496 }
6497
6498 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
6499
6500 static int add_remove_disk_update(struct intel_super *super)
6501 {
6502 int check_degraded = 0;
6503 struct dl *disk = NULL;
6504 /* add/remove some spares to/from the metadata/contrainer */
6505 while (super->disk_mgmt_list) {
6506 struct dl *disk_cfg;
6507
6508 disk_cfg = super->disk_mgmt_list;
6509 super->disk_mgmt_list = disk_cfg->next;
6510 disk_cfg->next = NULL;
6511
6512 if (disk_cfg->action == DISK_ADD) {
6513 disk_cfg->next = super->disks;
6514 super->disks = disk_cfg;
6515 check_degraded = 1;
6516 dprintf("%s: added %x:%x\n",
6517 __func__, disk_cfg->major,
6518 disk_cfg->minor);
6519 } else if (disk_cfg->action == DISK_REMOVE) {
6520 dprintf("Disk remove action processed: %x.%x\n",
6521 disk_cfg->major, disk_cfg->minor);
6522 disk = get_disk_super(super,
6523 disk_cfg->major,
6524 disk_cfg->minor);
6525 if (disk) {
6526 /* store action status */
6527 disk->action = DISK_REMOVE;
6528 /* remove spare disks only */
6529 if (disk->index == -1) {
6530 remove_disk_super(super,
6531 disk_cfg->major,
6532 disk_cfg->minor);
6533 }
6534 }
6535 /* release allocate disk structure */
6536 __free_imsm_disk(disk_cfg);
6537 }
6538 }
6539 return check_degraded;
6540 }
6541
6542
6543 static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
6544 struct intel_super *super,
6545 void ***space_list)
6546 {
6547 struct intel_dev *id;
6548 void **tofree = NULL;
6549 int ret_val = 0;
6550
6551 dprintf("apply_reshape_migration_update()\n");
6552 if ((u->subdev < 0) ||
6553 (u->subdev > 1)) {
6554 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
6555 return ret_val;
6556 }
6557 if ((space_list == NULL) || (*space_list == NULL)) {
6558 dprintf("imsm: Error: Memory is not allocated\n");
6559 return ret_val;
6560 }
6561
6562 for (id = super->devlist ; id; id = id->next) {
6563 if (id->index == (unsigned)u->subdev) {
6564 struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
6565 struct imsm_map *map;
6566 struct imsm_dev *new_dev =
6567 (struct imsm_dev *)*space_list;
6568 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6569 int to_state;
6570 struct dl *new_disk;
6571
6572 if (new_dev == NULL)
6573 return ret_val;
6574 *space_list = **space_list;
6575 memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
6576 map = get_imsm_map(new_dev, 0);
6577 if (migr_map) {
6578 dprintf("imsm: Error: migration in progress");
6579 return ret_val;
6580 }
6581
6582 to_state = map->map_state;
6583 if ((u->new_level == 5) && (map->raid_level == 0)) {
6584 map->num_members++;
6585 /* this should not happen */
6586 if (u->new_disks[0] < 0) {
6587 map->failed_disk_num =
6588 map->num_members - 1;
6589 to_state = IMSM_T_STATE_DEGRADED;
6590 } else
6591 to_state = IMSM_T_STATE_NORMAL;
6592 }
6593 migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
6594 if (u->new_level > -1)
6595 map->raid_level = u->new_level;
6596 migr_map = get_imsm_map(new_dev, 1);
6597 if ((u->new_level == 5) &&
6598 (migr_map->raid_level == 0)) {
6599 int ord = map->num_members - 1;
6600 migr_map->num_members--;
6601 if (u->new_disks[0] < 0)
6602 ord |= IMSM_ORD_REBUILD;
6603 set_imsm_ord_tbl_ent(map,
6604 map->num_members - 1,
6605 ord);
6606 }
6607 id->dev = new_dev;
6608 tofree = (void **)dev;
6609
6610 /* update chunk size
6611 */
6612 if (u->new_chunksize > 0)
6613 map->blocks_per_strip =
6614 __cpu_to_le16(u->new_chunksize * 2);
6615
6616 /* add disk
6617 */
6618 if ((u->new_level != 5) ||
6619 (migr_map->raid_level != 0) ||
6620 (migr_map->raid_level == map->raid_level))
6621 goto skip_disk_add;
6622
6623 if (u->new_disks[0] >= 0) {
6624 /* use passes spare
6625 */
6626 new_disk = get_disk_super(super,
6627 major(u->new_disks[0]),
6628 minor(u->new_disks[0]));
6629 dprintf("imsm: new disk for reshape is: %i:%i "
6630 "(%p, index = %i)\n",
6631 major(u->new_disks[0]),
6632 minor(u->new_disks[0]),
6633 new_disk, new_disk->index);
6634 if (new_disk == NULL)
6635 goto error_disk_add;
6636
6637 new_disk->index = map->num_members - 1;
6638 /* slot to fill in autolayout
6639 */
6640 new_disk->raiddisk = new_disk->index;
6641 new_disk->disk.status |= CONFIGURED_DISK;
6642 new_disk->disk.status &= ~SPARE_DISK;
6643 } else
6644 goto error_disk_add;
6645
6646 skip_disk_add:
6647 *tofree = *space_list;
6648 /* calculate new size
6649 */
6650 imsm_set_array_size(new_dev);
6651
6652 ret_val = 1;
6653 }
6654 }
6655
6656 if (tofree)
6657 *space_list = tofree;
6658 return ret_val;
6659
6660 error_disk_add:
6661 dprintf("Error: imsm: Cannot find disk.\n");
6662 return ret_val;
6663 }
6664
6665
6666 static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
6667 struct intel_super *super,
6668 void ***space_list)
6669 {
6670 struct dl *new_disk;
6671 struct intel_dev *id;
6672 int i;
6673 int delta_disks = u->new_raid_disks - u->old_raid_disks;
6674 int disk_count = u->old_raid_disks;
6675 void **tofree = NULL;
6676 int devices_to_reshape = 1;
6677 struct imsm_super *mpb = super->anchor;
6678 int ret_val = 0;
6679 unsigned int dev_id;
6680
6681 dprintf("imsm: apply_reshape_container_disks_update()\n");
6682
6683 /* enable spares to use in array */
6684 for (i = 0; i < delta_disks; i++) {
6685 new_disk = get_disk_super(super,
6686 major(u->new_disks[i]),
6687 minor(u->new_disks[i]));
6688 dprintf("imsm: new disk for reshape is: %i:%i "
6689 "(%p, index = %i)\n",
6690 major(u->new_disks[i]), minor(u->new_disks[i]),
6691 new_disk, new_disk->index);
6692 if ((new_disk == NULL) ||
6693 ((new_disk->index >= 0) &&
6694 (new_disk->index < u->old_raid_disks)))
6695 goto update_reshape_exit;
6696 new_disk->index = disk_count++;
6697 /* slot to fill in autolayout
6698 */
6699 new_disk->raiddisk = new_disk->index;
6700 new_disk->disk.status |=
6701 CONFIGURED_DISK;
6702 new_disk->disk.status &= ~SPARE_DISK;
6703 }
6704
6705 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6706 mpb->num_raid_devs);
6707 /* manage changes in volume
6708 */
6709 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
6710 void **sp = *space_list;
6711 struct imsm_dev *newdev;
6712 struct imsm_map *newmap, *oldmap;
6713
6714 for (id = super->devlist ; id; id = id->next) {
6715 if (id->index == dev_id)
6716 break;
6717 }
6718 if (id == NULL)
6719 break;
6720 if (!sp)
6721 continue;
6722 *space_list = *sp;
6723 newdev = (void*)sp;
6724 /* Copy the dev, but not (all of) the map */
6725 memcpy(newdev, id->dev, sizeof(*newdev));
6726 oldmap = get_imsm_map(id->dev, 0);
6727 newmap = get_imsm_map(newdev, 0);
6728 /* Copy the current map */
6729 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6730 /* update one device only
6731 */
6732 if (devices_to_reshape) {
6733 dprintf("imsm: modifying subdev: %i\n",
6734 id->index);
6735 devices_to_reshape--;
6736 newdev->vol.migr_state = 1;
6737 newdev->vol.curr_migr_unit = 0;
6738 newdev->vol.migr_type = MIGR_GEN_MIGR;
6739 newmap->num_members = u->new_raid_disks;
6740 for (i = 0; i < delta_disks; i++) {
6741 set_imsm_ord_tbl_ent(newmap,
6742 u->old_raid_disks + i,
6743 u->old_raid_disks + i);
6744 }
6745 /* New map is correct, now need to save old map
6746 */
6747 newmap = get_imsm_map(newdev, 1);
6748 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6749
6750 imsm_set_array_size(newdev);
6751 }
6752
6753 sp = (void **)id->dev;
6754 id->dev = newdev;
6755 *sp = tofree;
6756 tofree = sp;
6757
6758 /* Clear migration record */
6759 memset(super->migr_rec, 0, sizeof(struct migr_record));
6760 }
6761 if (tofree)
6762 *space_list = tofree;
6763 ret_val = 1;
6764
6765 update_reshape_exit:
6766
6767 return ret_val;
6768 }
6769
6770 static int apply_takeover_update(struct imsm_update_takeover *u,
6771 struct intel_super *super,
6772 void ***space_list)
6773 {
6774 struct imsm_dev *dev = NULL;
6775 struct intel_dev *dv;
6776 struct imsm_dev *dev_new;
6777 struct imsm_map *map;
6778 struct dl *dm, *du;
6779 int i;
6780
6781 for (dv = super->devlist; dv; dv = dv->next)
6782 if (dv->index == (unsigned int)u->subarray) {
6783 dev = dv->dev;
6784 break;
6785 }
6786
6787 if (dev == NULL)
6788 return 0;
6789
6790 map = get_imsm_map(dev, 0);
6791
6792 if (u->direction == R10_TO_R0) {
6793 /* Number of failed disks must be half of initial disk number */
6794 if (imsm_count_failed(super, dev) != (map->num_members / 2))
6795 return 0;
6796
6797 /* iterate through devices to mark removed disks as spare */
6798 for (dm = super->disks; dm; dm = dm->next) {
6799 if (dm->disk.status & FAILED_DISK) {
6800 int idx = dm->index;
6801 /* update indexes on the disk list */
6802 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6803 the index values will end up being correct.... NB */
6804 for (du = super->disks; du; du = du->next)
6805 if (du->index > idx)
6806 du->index--;
6807 /* mark as spare disk */
6808 dm->disk.status = SPARE_DISK;
6809 dm->index = -1;
6810 }
6811 }
6812 /* update map */
6813 map->num_members = map->num_members / 2;
6814 map->map_state = IMSM_T_STATE_NORMAL;
6815 map->num_domains = 1;
6816 map->raid_level = 0;
6817 map->failed_disk_num = -1;
6818 }
6819
6820 if (u->direction == R0_TO_R10) {
6821 void **space;
6822 /* update slots in current disk list */
6823 for (dm = super->disks; dm; dm = dm->next) {
6824 if (dm->index >= 0)
6825 dm->index *= 2;
6826 }
6827 /* create new *missing* disks */
6828 for (i = 0; i < map->num_members; i++) {
6829 space = *space_list;
6830 if (!space)
6831 continue;
6832 *space_list = *space;
6833 du = (void *)space;
6834 memcpy(du, super->disks, sizeof(*du));
6835 du->fd = -1;
6836 du->minor = 0;
6837 du->major = 0;
6838 du->index = (i * 2) + 1;
6839 sprintf((char *)du->disk.serial,
6840 " MISSING_%d", du->index);
6841 sprintf((char *)du->serial,
6842 "MISSING_%d", du->index);
6843 du->next = super->missing;
6844 super->missing = du;
6845 }
6846 /* create new dev and map */
6847 space = *space_list;
6848 if (!space)
6849 return 0;
6850 *space_list = *space;
6851 dev_new = (void *)space;
6852 memcpy(dev_new, dev, sizeof(*dev));
6853 /* update new map */
6854 map = get_imsm_map(dev_new, 0);
6855 map->num_members = map->num_members * 2;
6856 map->map_state = IMSM_T_STATE_DEGRADED;
6857 map->num_domains = 2;
6858 map->raid_level = 1;
6859 /* replace dev<->dev_new */
6860 dv->dev = dev_new;
6861 }
6862 /* update disk order table */
6863 for (du = super->disks; du; du = du->next)
6864 if (du->index >= 0)
6865 set_imsm_ord_tbl_ent(map, du->index, du->index);
6866 for (du = super->missing; du; du = du->next)
6867 if (du->index >= 0) {
6868 set_imsm_ord_tbl_ent(map, du->index, du->index);
6869 mark_missing(dev_new, &du->disk, du->index);
6870 }
6871
6872 return 1;
6873 }
6874
6875 static void imsm_process_update(struct supertype *st,
6876 struct metadata_update *update)
6877 {
6878 /**
6879 * crack open the metadata_update envelope to find the update record
6880 * update can be one of:
6881 * update_reshape_container_disks - all the arrays in the container
6882 * are being reshaped to have more devices. We need to mark
6883 * the arrays for general migration and convert selected spares
6884 * into active devices.
6885 * update_activate_spare - a spare device has replaced a failed
6886 * device in an array, update the disk_ord_tbl. If this disk is
6887 * present in all member arrays then also clear the SPARE_DISK
6888 * flag
6889 * update_create_array
6890 * update_kill_array
6891 * update_rename_array
6892 * update_add_remove_disk
6893 */
6894 struct intel_super *super = st->sb;
6895 struct imsm_super *mpb;
6896 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6897
6898 /* update requires a larger buf but the allocation failed */
6899 if (super->next_len && !super->next_buf) {
6900 super->next_len = 0;
6901 return;
6902 }
6903
6904 if (super->next_buf) {
6905 memcpy(super->next_buf, super->buf, super->len);
6906 free(super->buf);
6907 super->len = super->next_len;
6908 super->buf = super->next_buf;
6909
6910 super->next_len = 0;
6911 super->next_buf = NULL;
6912 }
6913
6914 mpb = super->anchor;
6915
6916 switch (type) {
6917 case update_takeover: {
6918 struct imsm_update_takeover *u = (void *)update->buf;
6919 if (apply_takeover_update(u, super, &update->space_list)) {
6920 imsm_update_version_info(super);
6921 super->updates_pending++;
6922 }
6923 break;
6924 }
6925
6926 case update_reshape_container_disks: {
6927 struct imsm_update_reshape *u = (void *)update->buf;
6928 if (apply_reshape_container_disks_update(
6929 u, super, &update->space_list))
6930 super->updates_pending++;
6931 break;
6932 }
6933 case update_reshape_migration: {
6934 struct imsm_update_reshape_migration *u = (void *)update->buf;
6935 if (apply_reshape_migration_update(
6936 u, super, &update->space_list))
6937 super->updates_pending++;
6938 break;
6939 }
6940 case update_activate_spare: {
6941 struct imsm_update_activate_spare *u = (void *) update->buf;
6942 struct imsm_dev *dev = get_imsm_dev(super, u->array);
6943 struct imsm_map *map = get_imsm_map(dev, 0);
6944 struct imsm_map *migr_map;
6945 struct active_array *a;
6946 struct imsm_disk *disk;
6947 __u8 to_state;
6948 struct dl *dl;
6949 unsigned int found;
6950 int failed;
6951 int victim = get_imsm_disk_idx(dev, u->slot, -1);
6952 int i;
6953
6954 for (dl = super->disks; dl; dl = dl->next)
6955 if (dl == u->dl)
6956 break;
6957
6958 if (!dl) {
6959 fprintf(stderr, "error: imsm_activate_spare passed "
6960 "an unknown disk (index: %d)\n",
6961 u->dl->index);
6962 return;
6963 }
6964
6965 super->updates_pending++;
6966 /* count failures (excluding rebuilds and the victim)
6967 * to determine map[0] state
6968 */
6969 failed = 0;
6970 for (i = 0; i < map->num_members; i++) {
6971 if (i == u->slot)
6972 continue;
6973 disk = get_imsm_disk(super,
6974 get_imsm_disk_idx(dev, i, -1));
6975 if (!disk || is_failed(disk))
6976 failed++;
6977 }
6978
6979 /* adding a pristine spare, assign a new index */
6980 if (dl->index < 0) {
6981 dl->index = super->anchor->num_disks;
6982 super->anchor->num_disks++;
6983 }
6984 disk = &dl->disk;
6985 disk->status |= CONFIGURED_DISK;
6986 disk->status &= ~SPARE_DISK;
6987
6988 /* mark rebuild */
6989 to_state = imsm_check_degraded(super, dev, failed);
6990 map->map_state = IMSM_T_STATE_DEGRADED;
6991 migrate(dev, super, to_state, MIGR_REBUILD);
6992 migr_map = get_imsm_map(dev, 1);
6993 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6994 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6995
6996 /* update the family_num to mark a new container
6997 * generation, being careful to record the existing
6998 * family_num in orig_family_num to clean up after
6999 * earlier mdadm versions that neglected to set it.
7000 */
7001 if (mpb->orig_family_num == 0)
7002 mpb->orig_family_num = mpb->family_num;
7003 mpb->family_num += super->random;
7004
7005 /* count arrays using the victim in the metadata */
7006 found = 0;
7007 for (a = st->arrays; a ; a = a->next) {
7008 dev = get_imsm_dev(super, a->info.container_member);
7009 map = get_imsm_map(dev, 0);
7010
7011 if (get_imsm_disk_slot(map, victim) >= 0)
7012 found++;
7013 }
7014
7015 /* delete the victim if it is no longer being
7016 * utilized anywhere
7017 */
7018 if (!found) {
7019 struct dl **dlp;
7020
7021 /* We know that 'manager' isn't touching anything,
7022 * so it is safe to delete
7023 */
7024 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
7025 if ((*dlp)->index == victim)
7026 break;
7027
7028 /* victim may be on the missing list */
7029 if (!*dlp)
7030 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
7031 if ((*dlp)->index == victim)
7032 break;
7033 imsm_delete(super, dlp, victim);
7034 }
7035 break;
7036 }
7037 case update_create_array: {
7038 /* someone wants to create a new array, we need to be aware of
7039 * a few races/collisions:
7040 * 1/ 'Create' called by two separate instances of mdadm
7041 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
7042 * devices that have since been assimilated via
7043 * activate_spare.
7044 * In the event this update can not be carried out mdadm will
7045 * (FIX ME) notice that its update did not take hold.
7046 */
7047 struct imsm_update_create_array *u = (void *) update->buf;
7048 struct intel_dev *dv;
7049 struct imsm_dev *dev;
7050 struct imsm_map *map, *new_map;
7051 unsigned long long start, end;
7052 unsigned long long new_start, new_end;
7053 int i;
7054 struct disk_info *inf;
7055 struct dl *dl;
7056
7057 /* handle racing creates: first come first serve */
7058 if (u->dev_idx < mpb->num_raid_devs) {
7059 dprintf("%s: subarray %d already defined\n",
7060 __func__, u->dev_idx);
7061 goto create_error;
7062 }
7063
7064 /* check update is next in sequence */
7065 if (u->dev_idx != mpb->num_raid_devs) {
7066 dprintf("%s: can not create array %d expected index %d\n",
7067 __func__, u->dev_idx, mpb->num_raid_devs);
7068 goto create_error;
7069 }
7070
7071 new_map = get_imsm_map(&u->dev, 0);
7072 new_start = __le32_to_cpu(new_map->pba_of_lba0);
7073 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
7074 inf = get_disk_info(u);
7075
7076 /* handle activate_spare versus create race:
7077 * check to make sure that overlapping arrays do not include
7078 * overalpping disks
7079 */
7080 for (i = 0; i < mpb->num_raid_devs; i++) {
7081 dev = get_imsm_dev(super, i);
7082 map = get_imsm_map(dev, 0);
7083 start = __le32_to_cpu(map->pba_of_lba0);
7084 end = start + __le32_to_cpu(map->blocks_per_member);
7085 if ((new_start >= start && new_start <= end) ||
7086 (start >= new_start && start <= new_end))
7087 /* overlap */;
7088 else
7089 continue;
7090
7091 if (disks_overlap(super, i, u)) {
7092 dprintf("%s: arrays overlap\n", __func__);
7093 goto create_error;
7094 }
7095 }
7096
7097 /* check that prepare update was successful */
7098 if (!update->space) {
7099 dprintf("%s: prepare update failed\n", __func__);
7100 goto create_error;
7101 }
7102
7103 /* check that all disks are still active before committing
7104 * changes. FIXME: could we instead handle this by creating a
7105 * degraded array? That's probably not what the user expects,
7106 * so better to drop this update on the floor.
7107 */
7108 for (i = 0; i < new_map->num_members; i++) {
7109 dl = serial_to_dl(inf[i].serial, super);
7110 if (!dl) {
7111 dprintf("%s: disk disappeared\n", __func__);
7112 goto create_error;
7113 }
7114 }
7115
7116 super->updates_pending++;
7117
7118 /* convert spares to members and fixup ord_tbl */
7119 for (i = 0; i < new_map->num_members; i++) {
7120 dl = serial_to_dl(inf[i].serial, super);
7121 if (dl->index == -1) {
7122 dl->index = mpb->num_disks;
7123 mpb->num_disks++;
7124 dl->disk.status |= CONFIGURED_DISK;
7125 dl->disk.status &= ~SPARE_DISK;
7126 }
7127 set_imsm_ord_tbl_ent(new_map, i, dl->index);
7128 }
7129
7130 dv = update->space;
7131 dev = dv->dev;
7132 update->space = NULL;
7133 imsm_copy_dev(dev, &u->dev);
7134 dv->index = u->dev_idx;
7135 dv->next = super->devlist;
7136 super->devlist = dv;
7137 mpb->num_raid_devs++;
7138
7139 imsm_update_version_info(super);
7140 break;
7141 create_error:
7142 /* mdmon knows how to release update->space, but not
7143 * ((struct intel_dev *) update->space)->dev
7144 */
7145 if (update->space) {
7146 dv = update->space;
7147 free(dv->dev);
7148 }
7149 break;
7150 }
7151 case update_kill_array: {
7152 struct imsm_update_kill_array *u = (void *) update->buf;
7153 int victim = u->dev_idx;
7154 struct active_array *a;
7155 struct intel_dev **dp;
7156 struct imsm_dev *dev;
7157
7158 /* sanity check that we are not affecting the uuid of
7159 * active arrays, or deleting an active array
7160 *
7161 * FIXME when immutable ids are available, but note that
7162 * we'll also need to fixup the invalidated/active
7163 * subarray indexes in mdstat
7164 */
7165 for (a = st->arrays; a; a = a->next)
7166 if (a->info.container_member >= victim)
7167 break;
7168 /* by definition if mdmon is running at least one array
7169 * is active in the container, so checking
7170 * mpb->num_raid_devs is just extra paranoia
7171 */
7172 dev = get_imsm_dev(super, victim);
7173 if (a || !dev || mpb->num_raid_devs == 1) {
7174 dprintf("failed to delete subarray-%d\n", victim);
7175 break;
7176 }
7177
7178 for (dp = &super->devlist; *dp;)
7179 if ((*dp)->index == (unsigned)super->current_vol) {
7180 *dp = (*dp)->next;
7181 } else {
7182 if ((*dp)->index > (unsigned)victim)
7183 (*dp)->index--;
7184 dp = &(*dp)->next;
7185 }
7186 mpb->num_raid_devs--;
7187 super->updates_pending++;
7188 break;
7189 }
7190 case update_rename_array: {
7191 struct imsm_update_rename_array *u = (void *) update->buf;
7192 char name[MAX_RAID_SERIAL_LEN+1];
7193 int target = u->dev_idx;
7194 struct active_array *a;
7195 struct imsm_dev *dev;
7196
7197 /* sanity check that we are not affecting the uuid of
7198 * an active array
7199 */
7200 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
7201 name[MAX_RAID_SERIAL_LEN] = '\0';
7202 for (a = st->arrays; a; a = a->next)
7203 if (a->info.container_member == target)
7204 break;
7205 dev = get_imsm_dev(super, u->dev_idx);
7206 if (a || !dev || !check_name(super, name, 1)) {
7207 dprintf("failed to rename subarray-%d\n", target);
7208 break;
7209 }
7210
7211 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
7212 super->updates_pending++;
7213 break;
7214 }
7215 case update_add_remove_disk: {
7216 /* we may be able to repair some arrays if disks are
7217 * being added, check teh status of add_remove_disk
7218 * if discs has been added.
7219 */
7220 if (add_remove_disk_update(super)) {
7221 struct active_array *a;
7222
7223 super->updates_pending++;
7224 for (a = st->arrays; a; a = a->next)
7225 a->check_degraded = 1;
7226 }
7227 break;
7228 }
7229 default:
7230 fprintf(stderr, "error: unsuported process update type:"
7231 "(type: %d)\n", type);
7232 }
7233 }
7234
7235 static struct mdinfo *get_spares_for_grow(struct supertype *st);
7236
7237 static void imsm_prepare_update(struct supertype *st,
7238 struct metadata_update *update)
7239 {
7240 /**
7241 * Allocate space to hold new disk entries, raid-device entries or a new
7242 * mpb if necessary. The manager synchronously waits for updates to
7243 * complete in the monitor, so new mpb buffers allocated here can be
7244 * integrated by the monitor thread without worrying about live pointers
7245 * in the manager thread.
7246 */
7247 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
7248 struct intel_super *super = st->sb;
7249 struct imsm_super *mpb = super->anchor;
7250 size_t buf_len;
7251 size_t len = 0;
7252
7253 switch (type) {
7254 case update_takeover: {
7255 struct imsm_update_takeover *u = (void *)update->buf;
7256 if (u->direction == R0_TO_R10) {
7257 void **tail = (void **)&update->space_list;
7258 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
7259 struct imsm_map *map = get_imsm_map(dev, 0);
7260 int num_members = map->num_members;
7261 void *space;
7262 int size, i;
7263 int err = 0;
7264 /* allocate memory for added disks */
7265 for (i = 0; i < num_members; i++) {
7266 size = sizeof(struct dl);
7267 space = malloc(size);
7268 if (!space) {
7269 err++;
7270 break;
7271 }
7272 *tail = space;
7273 tail = space;
7274 *tail = NULL;
7275 }
7276 /* allocate memory for new device */
7277 size = sizeof_imsm_dev(super->devlist->dev, 0) +
7278 (num_members * sizeof(__u32));
7279 space = malloc(size);
7280 if (!space)
7281 err++;
7282 else {
7283 *tail = space;
7284 tail = space;
7285 *tail = NULL;
7286 }
7287 if (!err) {
7288 len = disks_to_mpb_size(num_members * 2);
7289 } else {
7290 /* if allocation didn't success, free buffer */
7291 while (update->space_list) {
7292 void **sp = update->space_list;
7293 update->space_list = *sp;
7294 free(sp);
7295 }
7296 }
7297 }
7298
7299 break;
7300 }
7301 case update_reshape_container_disks: {
7302 /* Every raid device in the container is about to
7303 * gain some more devices, and we will enter a
7304 * reconfiguration.
7305 * So each 'imsm_map' will be bigger, and the imsm_vol
7306 * will now hold 2 of them.
7307 * Thus we need new 'struct imsm_dev' allocations sized
7308 * as sizeof_imsm_dev but with more devices in both maps.
7309 */
7310 struct imsm_update_reshape *u = (void *)update->buf;
7311 struct intel_dev *dl;
7312 void **space_tail = (void**)&update->space_list;
7313
7314 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7315
7316 for (dl = super->devlist; dl; dl = dl->next) {
7317 int size = sizeof_imsm_dev(dl->dev, 1);
7318 void *s;
7319 if (u->new_raid_disks > u->old_raid_disks)
7320 size += sizeof(__u32)*2*
7321 (u->new_raid_disks - u->old_raid_disks);
7322 s = malloc(size);
7323 if (!s)
7324 break;
7325 *space_tail = s;
7326 space_tail = s;
7327 *space_tail = NULL;
7328 }
7329
7330 len = disks_to_mpb_size(u->new_raid_disks);
7331 dprintf("New anchor length is %llu\n", (unsigned long long)len);
7332 break;
7333 }
7334 case update_reshape_migration: {
7335 /* for migration level 0->5 we need to add disks
7336 * so the same as for container operation we will copy
7337 * device to the bigger location.
7338 * in memory prepared device and new disk area are prepared
7339 * for usage in process update
7340 */
7341 struct imsm_update_reshape_migration *u = (void *)update->buf;
7342 struct intel_dev *id;
7343 void **space_tail = (void **)&update->space_list;
7344 int size;
7345 void *s;
7346 int current_level = -1;
7347
7348 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7349
7350 /* add space for bigger array in update
7351 */
7352 for (id = super->devlist; id; id = id->next) {
7353 if (id->index == (unsigned)u->subdev) {
7354 size = sizeof_imsm_dev(id->dev, 1);
7355 if (u->new_raid_disks > u->old_raid_disks)
7356 size += sizeof(__u32)*2*
7357 (u->new_raid_disks - u->old_raid_disks);
7358 s = malloc(size);
7359 if (!s)
7360 break;
7361 *space_tail = s;
7362 space_tail = s;
7363 *space_tail = NULL;
7364 break;
7365 }
7366 }
7367 if (update->space_list == NULL)
7368 break;
7369
7370 /* add space for disk in update
7371 */
7372 size = sizeof(struct dl);
7373 s = malloc(size);
7374 if (!s) {
7375 free(update->space_list);
7376 update->space_list = NULL;
7377 break;
7378 }
7379 *space_tail = s;
7380 space_tail = s;
7381 *space_tail = NULL;
7382
7383 /* add spare device to update
7384 */
7385 for (id = super->devlist ; id; id = id->next)
7386 if (id->index == (unsigned)u->subdev) {
7387 struct imsm_dev *dev;
7388 struct imsm_map *map;
7389
7390 dev = get_imsm_dev(super, u->subdev);
7391 map = get_imsm_map(dev, 0);
7392 current_level = map->raid_level;
7393 break;
7394 }
7395 if ((u->new_level == 5) && (u->new_level != current_level)) {
7396 struct mdinfo *spares;
7397
7398 spares = get_spares_for_grow(st);
7399 if (spares) {
7400 struct dl *dl;
7401 struct mdinfo *dev;
7402
7403 dev = spares->devs;
7404 if (dev) {
7405 u->new_disks[0] =
7406 makedev(dev->disk.major,
7407 dev->disk.minor);
7408 dl = get_disk_super(super,
7409 dev->disk.major,
7410 dev->disk.minor);
7411 dl->index = u->old_raid_disks;
7412 dev = dev->next;
7413 }
7414 sysfs_free(spares);
7415 }
7416 }
7417 len = disks_to_mpb_size(u->new_raid_disks);
7418 dprintf("New anchor length is %llu\n", (unsigned long long)len);
7419 break;
7420 }
7421 case update_create_array: {
7422 struct imsm_update_create_array *u = (void *) update->buf;
7423 struct intel_dev *dv;
7424 struct imsm_dev *dev = &u->dev;
7425 struct imsm_map *map = get_imsm_map(dev, 0);
7426 struct dl *dl;
7427 struct disk_info *inf;
7428 int i;
7429 int activate = 0;
7430
7431 inf = get_disk_info(u);
7432 len = sizeof_imsm_dev(dev, 1);
7433 /* allocate a new super->devlist entry */
7434 dv = malloc(sizeof(*dv));
7435 if (dv) {
7436 dv->dev = malloc(len);
7437 if (dv->dev)
7438 update->space = dv;
7439 else {
7440 free(dv);
7441 update->space = NULL;
7442 }
7443 }
7444
7445 /* count how many spares will be converted to members */
7446 for (i = 0; i < map->num_members; i++) {
7447 dl = serial_to_dl(inf[i].serial, super);
7448 if (!dl) {
7449 /* hmm maybe it failed?, nothing we can do about
7450 * it here
7451 */
7452 continue;
7453 }
7454 if (count_memberships(dl, super) == 0)
7455 activate++;
7456 }
7457 len += activate * sizeof(struct imsm_disk);
7458 break;
7459 default:
7460 break;
7461 }
7462 }
7463
7464 /* check if we need a larger metadata buffer */
7465 if (super->next_buf)
7466 buf_len = super->next_len;
7467 else
7468 buf_len = super->len;
7469
7470 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
7471 /* ok we need a larger buf than what is currently allocated
7472 * if this allocation fails process_update will notice that
7473 * ->next_len is set and ->next_buf is NULL
7474 */
7475 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
7476 if (super->next_buf)
7477 free(super->next_buf);
7478
7479 super->next_len = buf_len;
7480 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
7481 memset(super->next_buf, 0, buf_len);
7482 else
7483 super->next_buf = NULL;
7484 }
7485 }
7486
7487 /* must be called while manager is quiesced */
7488 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
7489 {
7490 struct imsm_super *mpb = super->anchor;
7491 struct dl *iter;
7492 struct imsm_dev *dev;
7493 struct imsm_map *map;
7494 int i, j, num_members;
7495 __u32 ord;
7496
7497 dprintf("%s: deleting device[%d] from imsm_super\n",
7498 __func__, index);
7499
7500 /* shift all indexes down one */
7501 for (iter = super->disks; iter; iter = iter->next)
7502 if (iter->index > (int)index)
7503 iter->index--;
7504 for (iter = super->missing; iter; iter = iter->next)
7505 if (iter->index > (int)index)
7506 iter->index--;
7507
7508 for (i = 0; i < mpb->num_raid_devs; i++) {
7509 dev = get_imsm_dev(super, i);
7510 map = get_imsm_map(dev, 0);
7511 num_members = map->num_members;
7512 for (j = 0; j < num_members; j++) {
7513 /* update ord entries being careful not to propagate
7514 * ord-flags to the first map
7515 */
7516 ord = get_imsm_ord_tbl_ent(dev, j, -1);
7517
7518 if (ord_to_idx(ord) <= index)
7519 continue;
7520
7521 map = get_imsm_map(dev, 0);
7522 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
7523 map = get_imsm_map(dev, 1);
7524 if (map)
7525 set_imsm_ord_tbl_ent(map, j, ord - 1);
7526 }
7527 }
7528
7529 mpb->num_disks--;
7530 super->updates_pending++;
7531 if (*dlp) {
7532 struct dl *dl = *dlp;
7533
7534 *dlp = (*dlp)->next;
7535 __free_imsm_disk(dl);
7536 }
7537 }
7538
7539 /*******************************************************************************
7540 * Function: open_backup_targets
7541 * Description: Function opens file descriptors for all devices given in
7542 * info->devs
7543 * Parameters:
7544 * info : general array info
7545 * raid_disks : number of disks
7546 * raid_fds : table of device's file descriptors
7547 * Returns:
7548 * 0 : success
7549 * -1 : fail
7550 ******************************************************************************/
7551 int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds)
7552 {
7553 struct mdinfo *sd;
7554
7555 for (sd = info->devs ; sd ; sd = sd->next) {
7556 char *dn;
7557
7558 if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
7559 dprintf("disk is faulty!!\n");
7560 continue;
7561 }
7562
7563 if ((sd->disk.raid_disk >= raid_disks) ||
7564 (sd->disk.raid_disk < 0))
7565 continue;
7566
7567 dn = map_dev(sd->disk.major,
7568 sd->disk.minor, 1);
7569 raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
7570 if (raid_fds[sd->disk.raid_disk] < 0) {
7571 fprintf(stderr, "cannot open component\n");
7572 return -1;
7573 }
7574 }
7575 return 0;
7576 }
7577
7578 /*******************************************************************************
7579 * Function: init_migr_record_imsm
7580 * Description: Function inits imsm migration record
7581 * Parameters:
7582 * super : imsm internal array info
7583 * dev : device under migration
7584 * info : general array info to find the smallest device
7585 * Returns:
7586 * none
7587 ******************************************************************************/
7588 void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
7589 struct mdinfo *info)
7590 {
7591 struct intel_super *super = st->sb;
7592 struct migr_record *migr_rec = super->migr_rec;
7593 int new_data_disks;
7594 unsigned long long dsize, dev_sectors;
7595 long long unsigned min_dev_sectors = -1LLU;
7596 struct mdinfo *sd;
7597 char nm[30];
7598 int fd;
7599 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7600 struct imsm_map *map_src = get_imsm_map(dev, 1);
7601 unsigned long long num_migr_units;
7602
7603 unsigned long long array_blocks =
7604 (((unsigned long long)__le32_to_cpu(dev->size_high)) << 32) +
7605 __le32_to_cpu(dev->size_low);
7606
7607 memset(migr_rec, 0, sizeof(struct migr_record));
7608 migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
7609
7610 /* only ascending reshape supported now */
7611 migr_rec->ascending_migr = __cpu_to_le32(1);
7612
7613 migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
7614 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
7615 migr_rec->dest_depth_per_unit *= map_dest->blocks_per_strip;
7616 new_data_disks = imsm_num_data_members(dev, 0);
7617 migr_rec->blocks_per_unit =
7618 __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
7619 migr_rec->dest_depth_per_unit =
7620 __cpu_to_le32(migr_rec->dest_depth_per_unit);
7621
7622 num_migr_units =
7623 array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
7624
7625 if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
7626 num_migr_units++;
7627 migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
7628
7629 migr_rec->post_migr_vol_cap = dev->size_low;
7630 migr_rec->post_migr_vol_cap_hi = dev->size_high;
7631
7632
7633 /* Find the smallest dev */
7634 for (sd = info->devs ; sd ; sd = sd->next) {
7635 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7636 fd = dev_open(nm, O_RDONLY);
7637 if (fd < 0)
7638 continue;
7639 get_dev_size(fd, NULL, &dsize);
7640 dev_sectors = dsize / 512;
7641 if (dev_sectors < min_dev_sectors)
7642 min_dev_sectors = dev_sectors;
7643 close(fd);
7644 }
7645 migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
7646 RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
7647
7648 write_imsm_migr_rec(st);
7649
7650 return;
7651 }
7652
7653 /*******************************************************************************
7654 * Function: save_backup_imsm
7655 * Description: Function saves critical data stripes to Migration Copy Area
7656 * and updates the current migration unit status.
7657 * Use restore_stripes() to form a destination stripe,
7658 * and to write it to the Copy Area.
7659 * Parameters:
7660 * st : supertype information
7661 * info : general array info
7662 * buf : input buffer
7663 * write_offset : address of data to backup
7664 * length : length of data to backup (blocks_per_unit)
7665 * Returns:
7666 * 0 : success
7667 *, -1 : fail
7668 ******************************************************************************/
7669 int save_backup_imsm(struct supertype *st,
7670 struct imsm_dev *dev,
7671 struct mdinfo *info,
7672 void *buf,
7673 int new_data,
7674 int length)
7675 {
7676 int rv = -1;
7677 struct intel_super *super = st->sb;
7678 unsigned long long *target_offsets = NULL;
7679 int *targets = NULL;
7680 int i;
7681 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7682 int new_disks = map_dest->num_members;
7683
7684 targets = malloc(new_disks * sizeof(int));
7685 if (!targets)
7686 goto abort;
7687
7688 target_offsets = malloc(new_disks * sizeof(unsigned long long));
7689 if (!target_offsets)
7690 goto abort;
7691
7692 for (i = 0; i < new_disks; i++) {
7693 targets[i] = -1;
7694 target_offsets[i] = (unsigned long long)
7695 __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
7696 }
7697
7698 if (open_backup_targets(info, new_disks, targets))
7699 goto abort;
7700
7701 if (restore_stripes(targets, /* list of dest devices */
7702 target_offsets, /* migration record offsets */
7703 new_disks,
7704 info->new_chunk,
7705 info->new_level,
7706 info->new_layout,
7707 -1, /* source backup file descriptor */
7708 0, /* input buf offset
7709 * always 0 buf is already offset */
7710 0,
7711 length,
7712 buf) != 0) {
7713 fprintf(stderr, Name ": Error restoring stripes\n");
7714 goto abort;
7715 }
7716
7717 rv = 0;
7718
7719 abort:
7720 if (targets) {
7721 for (i = 0; i < new_disks; i++)
7722 if (targets[i] >= 0)
7723 close(targets[i]);
7724 free(targets);
7725 }
7726 free(target_offsets);
7727
7728 return rv;
7729 }
7730
7731 /*******************************************************************************
7732 * Function: save_checkpoint_imsm
7733 * Description: Function called for current unit status update
7734 * in the migration record. It writes it to disk.
7735 * Parameters:
7736 * super : imsm internal array info
7737 * info : general array info
7738 * Returns:
7739 * 0: success
7740 * 1: failure
7741 ******************************************************************************/
7742 int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
7743 {
7744 struct intel_super *super = st->sb;
7745 load_imsm_migr_rec(super, info);
7746 if (__le32_to_cpu(super->migr_rec->blocks_per_unit) == 0) {
7747 dprintf("ERROR: blocks_per_unit = 0!!!\n");
7748 return 1;
7749 }
7750
7751 super->migr_rec->curr_migr_unit =
7752 __cpu_to_le32(info->reshape_progress /
7753 __le32_to_cpu(super->migr_rec->blocks_per_unit));
7754 super->migr_rec->rec_status = __cpu_to_le32(state);
7755 super->migr_rec->dest_1st_member_lba =
7756 __cpu_to_le32((__le32_to_cpu(super->migr_rec->curr_migr_unit))
7757 * __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
7758 if (write_imsm_migr_rec(st) < 0) {
7759 dprintf("imsm: Cannot write migration record "
7760 "outside backup area\n");
7761 return 1;
7762 }
7763
7764 return 0;
7765 }
7766
7767 static __u64 blocks_per_migr_unit(struct intel_super *super,
7768 struct imsm_dev *dev);
7769
7770 /*******************************************************************************
7771 * Function: recover_backup_imsm
7772 * Description: Function recovers critical data from the Migration Copy Area
7773 * while assembling an array.
7774 * Parameters:
7775 * super : imsm internal array info
7776 * info : general array info
7777 * Returns:
7778 * 0 : success (or there is no data to recover)
7779 * 1 : fail
7780 ******************************************************************************/
7781 int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
7782 {
7783 struct intel_super *super = st->sb;
7784 struct migr_record *migr_rec = super->migr_rec;
7785 struct imsm_map *map_dest = NULL;
7786 struct intel_dev *id = NULL;
7787 unsigned long long read_offset;
7788 unsigned long long write_offset;
7789 unsigned unit_len;
7790 int *targets = NULL;
7791 int new_disks, i, err;
7792 char *buf = NULL;
7793 int retval = 1;
7794 unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
7795 unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
7796 int ascending = __le32_to_cpu(migr_rec->ascending_migr);
7797 char buffer[20];
7798
7799 err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
7800 if (err < 1)
7801 return 1;
7802
7803 /* recover data only during assemblation */
7804 if (strncmp(buffer, "inactive", 8) != 0)
7805 return 0;
7806 /* no data to recover */
7807 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
7808 return 0;
7809 if (curr_migr_unit >= num_migr_units)
7810 return 1;
7811
7812 /* find device during reshape */
7813 for (id = super->devlist; id; id = id->next)
7814 if (is_gen_migration(id->dev))
7815 break;
7816 if (id == NULL)
7817 return 1;
7818
7819 map_dest = get_imsm_map(id->dev, 0);
7820 new_disks = map_dest->num_members;
7821
7822 read_offset = (unsigned long long)
7823 __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
7824
7825 write_offset = ((unsigned long long)
7826 __le32_to_cpu(migr_rec->dest_1st_member_lba) +
7827 info->data_offset) * 512;
7828
7829 unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
7830 if (posix_memalign((void **)&buf, 512, unit_len) != 0)
7831 goto abort;
7832 targets = malloc(new_disks * sizeof(int));
7833 if (!targets)
7834 goto abort;
7835
7836 open_backup_targets(info, new_disks, targets);
7837
7838 for (i = 0; i < new_disks; i++) {
7839 if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
7840 fprintf(stderr,
7841 Name ": Cannot seek to block: %s\n",
7842 strerror(errno));
7843 goto abort;
7844 }
7845 if (read(targets[i], buf, unit_len) != unit_len) {
7846 fprintf(stderr,
7847 Name ": Cannot read copy area block: %s\n",
7848 strerror(errno));
7849 goto abort;
7850 }
7851 if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
7852 fprintf(stderr,
7853 Name ": Cannot seek to block: %s\n",
7854 strerror(errno));
7855 goto abort;
7856 }
7857 if (write(targets[i], buf, unit_len) != unit_len) {
7858 fprintf(stderr,
7859 Name ": Cannot restore block: %s\n",
7860 strerror(errno));
7861 goto abort;
7862 }
7863 }
7864
7865 if (ascending && curr_migr_unit < (num_migr_units-1))
7866 curr_migr_unit++;
7867
7868 migr_rec->curr_migr_unit = __le32_to_cpu(curr_migr_unit);
7869 super->migr_rec->rec_status = __cpu_to_le32(UNIT_SRC_NORMAL);
7870 if (write_imsm_migr_rec(st) == 0) {
7871 __u64 blocks_per_unit = blocks_per_migr_unit(super, id->dev);
7872 info->reshape_progress = curr_migr_unit * blocks_per_unit;
7873 retval = 0;
7874 }
7875
7876 abort:
7877 if (targets) {
7878 for (i = 0; i < new_disks; i++)
7879 if (targets[i])
7880 close(targets[i]);
7881 free(targets);
7882 }
7883 free(buf);
7884 return retval;
7885 }
7886
7887 static char disk_by_path[] = "/dev/disk/by-path/";
7888
7889 static const char *imsm_get_disk_controller_domain(const char *path)
7890 {
7891 char disk_path[PATH_MAX];
7892 char *drv=NULL;
7893 struct stat st;
7894
7895 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
7896 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
7897 if (stat(disk_path, &st) == 0) {
7898 struct sys_dev* hba;
7899 char *path=NULL;
7900
7901 path = devt_to_devpath(st.st_rdev);
7902 if (path == NULL)
7903 return "unknown";
7904 hba = find_disk_attached_hba(-1, path);
7905 if (hba && hba->type == SYS_DEV_SAS)
7906 drv = "isci";
7907 else if (hba && hba->type == SYS_DEV_SATA)
7908 drv = "ahci";
7909 else
7910 drv = "unknown";
7911 dprintf("path: %s hba: %s attached: %s\n",
7912 path, (hba) ? hba->path : "NULL", drv);
7913 free(path);
7914 if (hba)
7915 free_sys_dev(&hba);
7916 }
7917 return drv;
7918 }
7919
7920 static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
7921 {
7922 char subdev_name[20];
7923 struct mdstat_ent *mdstat;
7924
7925 sprintf(subdev_name, "%d", subdev);
7926 mdstat = mdstat_by_subdev(subdev_name, container);
7927 if (!mdstat)
7928 return -1;
7929
7930 *minor = mdstat->devnum;
7931 free_mdstat(mdstat);
7932 return 0;
7933 }
7934
7935 static int imsm_reshape_is_allowed_on_container(struct supertype *st,
7936 struct geo_params *geo,
7937 int *old_raid_disks)
7938 {
7939 /* currently we only support increasing the number of devices
7940 * for a container. This increases the number of device for each
7941 * member array. They must all be RAID0 or RAID5.
7942 */
7943 int ret_val = 0;
7944 struct mdinfo *info, *member;
7945 int devices_that_can_grow = 0;
7946
7947 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7948 "st->devnum = (%i)\n",
7949 st->devnum);
7950
7951 if (geo->size != -1 ||
7952 geo->level != UnSet ||
7953 geo->layout != UnSet ||
7954 geo->chunksize != 0 ||
7955 geo->raid_disks == UnSet) {
7956 dprintf("imsm: Container operation is allowed for "
7957 "raid disks number change only.\n");
7958 return ret_val;
7959 }
7960
7961 info = container_content_imsm(st, NULL);
7962 for (member = info; member; member = member->next) {
7963 int result;
7964 int minor;
7965
7966 dprintf("imsm: checking device_num: %i\n",
7967 member->container_member);
7968
7969 if (geo->raid_disks <= member->array.raid_disks) {
7970 /* we work on container for Online Capacity Expansion
7971 * only so raid_disks has to grow
7972 */
7973 dprintf("imsm: for container operation raid disks "
7974 "increase is required\n");
7975 break;
7976 }
7977
7978 if ((info->array.level != 0) &&
7979 (info->array.level != 5)) {
7980 /* we cannot use this container with other raid level
7981 */
7982 dprintf("imsm: for container operation wrong"
7983 " raid level (%i) detected\n",
7984 info->array.level);
7985 break;
7986 } else {
7987 /* check for platform support
7988 * for this raid level configuration
7989 */
7990 struct intel_super *super = st->sb;
7991 if (!is_raid_level_supported(super->orom,
7992 member->array.level,
7993 geo->raid_disks)) {
7994 dprintf("platform does not support raid%d with"
7995 " %d disk%s\n",
7996 info->array.level,
7997 geo->raid_disks,
7998 geo->raid_disks > 1 ? "s" : "");
7999 break;
8000 }
8001 /* check if component size is aligned to chunk size
8002 */
8003 if (info->component_size %
8004 (info->array.chunk_size/512)) {
8005 dprintf("Component size is not aligned to "
8006 "chunk size\n");
8007 break;
8008 }
8009 }
8010
8011 if (*old_raid_disks &&
8012 info->array.raid_disks != *old_raid_disks)
8013 break;
8014 *old_raid_disks = info->array.raid_disks;
8015
8016 /* All raid5 and raid0 volumes in container
8017 * have to be ready for Online Capacity Expansion
8018 * so they need to be assembled. We have already
8019 * checked that no recovery etc is happening.
8020 */
8021 result = imsm_find_array_minor_by_subdev(member->container_member,
8022 st->container_dev,
8023 &minor);
8024 if (result < 0) {
8025 dprintf("imsm: cannot find array\n");
8026 break;
8027 }
8028 devices_that_can_grow++;
8029 }
8030 sysfs_free(info);
8031 if (!member && devices_that_can_grow)
8032 ret_val = 1;
8033
8034 if (ret_val)
8035 dprintf("\tContainer operation allowed\n");
8036 else
8037 dprintf("\tError: %i\n", ret_val);
8038
8039 return ret_val;
8040 }
8041
8042 /* Function: get_spares_for_grow
8043 * Description: Allocates memory and creates list of spare devices
8044 * avaliable in container. Checks if spare drive size is acceptable.
8045 * Parameters: Pointer to the supertype structure
8046 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
8047 * NULL if fail
8048 */
8049 static struct mdinfo *get_spares_for_grow(struct supertype *st)
8050 {
8051 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
8052 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
8053 }
8054
8055 /******************************************************************************
8056 * function: imsm_create_metadata_update_for_reshape
8057 * Function creates update for whole IMSM container.
8058 *
8059 ******************************************************************************/
8060 static int imsm_create_metadata_update_for_reshape(
8061 struct supertype *st,
8062 struct geo_params *geo,
8063 int old_raid_disks,
8064 struct imsm_update_reshape **updatep)
8065 {
8066 struct intel_super *super = st->sb;
8067 struct imsm_super *mpb = super->anchor;
8068 int update_memory_size = 0;
8069 struct imsm_update_reshape *u = NULL;
8070 struct mdinfo *spares = NULL;
8071 int i;
8072 int delta_disks = 0;
8073 struct mdinfo *dev;
8074
8075 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
8076 geo->raid_disks);
8077
8078 delta_disks = geo->raid_disks - old_raid_disks;
8079
8080 /* size of all update data without anchor */
8081 update_memory_size = sizeof(struct imsm_update_reshape);
8082
8083 /* now add space for spare disks that we need to add. */
8084 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
8085
8086 u = calloc(1, update_memory_size);
8087 if (u == NULL) {
8088 dprintf("error: "
8089 "cannot get memory for imsm_update_reshape update\n");
8090 return 0;
8091 }
8092 u->type = update_reshape_container_disks;
8093 u->old_raid_disks = old_raid_disks;
8094 u->new_raid_disks = geo->raid_disks;
8095
8096 /* now get spare disks list
8097 */
8098 spares = get_spares_for_grow(st);
8099
8100 if (spares == NULL
8101 || delta_disks > spares->array.spare_disks) {
8102 fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices "
8103 "for %s.\n", geo->dev_name);
8104 goto abort;
8105 }
8106
8107 /* we have got spares
8108 * update disk list in imsm_disk list table in anchor
8109 */
8110 dprintf("imsm: %i spares are available.\n\n",
8111 spares->array.spare_disks);
8112
8113 dev = spares->devs;
8114 for (i = 0; i < delta_disks; i++) {
8115 struct dl *dl;
8116
8117 if (dev == NULL)
8118 break;
8119 u->new_disks[i] = makedev(dev->disk.major,
8120 dev->disk.minor);
8121 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
8122 dl->index = mpb->num_disks;
8123 mpb->num_disks++;
8124 dev = dev->next;
8125 }
8126
8127 abort:
8128 /* free spares
8129 */
8130 sysfs_free(spares);
8131
8132 dprintf("imsm: reshape update preparation :");
8133 if (i == delta_disks) {
8134 dprintf(" OK\n");
8135 *updatep = u;
8136 return update_memory_size;
8137 }
8138 free(u);
8139 dprintf(" Error\n");
8140
8141 return 0;
8142 }
8143
8144 /******************************************************************************
8145 * function: imsm_create_metadata_update_for_migration()
8146 * Creates update for IMSM array.
8147 *
8148 ******************************************************************************/
8149 static int imsm_create_metadata_update_for_migration(
8150 struct supertype *st,
8151 struct geo_params *geo,
8152 struct imsm_update_reshape_migration **updatep)
8153 {
8154 struct intel_super *super = st->sb;
8155 int update_memory_size = 0;
8156 struct imsm_update_reshape_migration *u = NULL;
8157 struct imsm_dev *dev;
8158 int previous_level = -1;
8159
8160 dprintf("imsm_create_metadata_update_for_migration(enter)"
8161 " New Level = %i\n", geo->level);
8162
8163 /* size of all update data without anchor */
8164 update_memory_size = sizeof(struct imsm_update_reshape_migration);
8165
8166 u = calloc(1, update_memory_size);
8167 if (u == NULL) {
8168 dprintf("error: cannot get memory for "
8169 "imsm_create_metadata_update_for_migration\n");
8170 return 0;
8171 }
8172 u->type = update_reshape_migration;
8173 u->subdev = super->current_vol;
8174 u->new_level = geo->level;
8175 u->new_layout = geo->layout;
8176 u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
8177 u->new_disks[0] = -1;
8178 u->new_chunksize = -1;
8179
8180 dev = get_imsm_dev(super, u->subdev);
8181 if (dev) {
8182 struct imsm_map *map;
8183
8184 map = get_imsm_map(dev, 0);
8185 if (map) {
8186 int current_chunk_size =
8187 __le16_to_cpu(map->blocks_per_strip) / 2;
8188
8189 if (geo->chunksize != current_chunk_size) {
8190 u->new_chunksize = geo->chunksize / 1024;
8191 dprintf("imsm: "
8192 "chunk size change from %i to %i\n",
8193 current_chunk_size, u->new_chunksize);
8194 }
8195 previous_level = map->raid_level;
8196 }
8197 }
8198 if ((geo->level == 5) && (previous_level == 0)) {
8199 struct mdinfo *spares = NULL;
8200
8201 u->new_raid_disks++;
8202 spares = get_spares_for_grow(st);
8203 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
8204 free(u);
8205 sysfs_free(spares);
8206 update_memory_size = 0;
8207 dprintf("error: cannot get spare device "
8208 "for requested migration");
8209 return 0;
8210 }
8211 sysfs_free(spares);
8212 }
8213 dprintf("imsm: reshape update preparation : OK\n");
8214 *updatep = u;
8215
8216 return update_memory_size;
8217 }
8218
8219 static void imsm_update_metadata_locally(struct supertype *st,
8220 void *buf, int len)
8221 {
8222 struct metadata_update mu;
8223
8224 mu.buf = buf;
8225 mu.len = len;
8226 mu.space = NULL;
8227 mu.space_list = NULL;
8228 mu.next = NULL;
8229 imsm_prepare_update(st, &mu);
8230 imsm_process_update(st, &mu);
8231
8232 while (mu.space_list) {
8233 void **space = mu.space_list;
8234 mu.space_list = *space;
8235 free(space);
8236 }
8237 }
8238
8239 /***************************************************************************
8240 * Function: imsm_analyze_change
8241 * Description: Function analyze change for single volume
8242 * and validate if transition is supported
8243 * Parameters: Geometry parameters, supertype structure
8244 * Returns: Operation type code on success, -1 if fail
8245 ****************************************************************************/
8246 enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
8247 struct geo_params *geo)
8248 {
8249 struct mdinfo info;
8250 int change = -1;
8251 int check_devs = 0;
8252 int chunk;
8253
8254 getinfo_super_imsm_volume(st, &info, NULL);
8255
8256 if ((geo->level != info.array.level) &&
8257 (geo->level >= 0) &&
8258 (geo->level != UnSet)) {
8259 switch (info.array.level) {
8260 case 0:
8261 if (geo->level == 5) {
8262 change = CH_MIGRATION;
8263 check_devs = 1;
8264 }
8265 if (geo->level == 10) {
8266 change = CH_TAKEOVER;
8267 check_devs = 1;
8268 }
8269 break;
8270 case 1:
8271 if (geo->level == 0) {
8272 change = CH_TAKEOVER;
8273 check_devs = 1;
8274 }
8275 break;
8276 case 10:
8277 if (geo->level == 0) {
8278 change = CH_TAKEOVER;
8279 check_devs = 1;
8280 }
8281 break;
8282 }
8283 if (change == -1) {
8284 fprintf(stderr,
8285 Name " Error. Level Migration from %d to %d "
8286 "not supported!\n",
8287 info.array.level, geo->level);
8288 goto analyse_change_exit;
8289 }
8290 } else
8291 geo->level = info.array.level;
8292
8293 if ((geo->layout != info.array.layout)
8294 && ((geo->layout != UnSet) && (geo->layout != -1))) {
8295 change = CH_MIGRATION;
8296 if ((info.array.layout == 0)
8297 && (info.array.level == 5)
8298 && (geo->layout == 5)) {
8299 /* reshape 5 -> 4 */
8300 } else if ((info.array.layout == 5)
8301 && (info.array.level == 5)
8302 && (geo->layout == 0)) {
8303 /* reshape 4 -> 5 */
8304 geo->layout = 0;
8305 geo->level = 5;
8306 } else {
8307 fprintf(stderr,
8308 Name " Error. Layout Migration from %d to %d "
8309 "not supported!\n",
8310 info.array.layout, geo->layout);
8311 change = -1;
8312 goto analyse_change_exit;
8313 }
8314 } else
8315 geo->layout = info.array.layout;
8316
8317 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
8318 && (geo->chunksize != info.array.chunk_size))
8319 change = CH_MIGRATION;
8320 else
8321 geo->chunksize = info.array.chunk_size;
8322
8323 chunk = geo->chunksize / 1024;
8324 if (!validate_geometry_imsm(st,
8325 geo->level,
8326 geo->layout,
8327 geo->raid_disks,
8328 &chunk,
8329 geo->size,
8330 0, 0, 1))
8331 change = -1;
8332
8333 if (check_devs) {
8334 struct intel_super *super = st->sb;
8335 struct imsm_super *mpb = super->anchor;
8336
8337 if (mpb->num_raid_devs > 1) {
8338 fprintf(stderr,
8339 Name " Error. Cannot perform operation on %s"
8340 "- for this operation it MUST be single "
8341 "array in container\n",
8342 geo->dev_name);
8343 change = -1;
8344 }
8345 }
8346
8347 analyse_change_exit:
8348
8349 return change;
8350 }
8351
8352 int imsm_takeover(struct supertype *st, struct geo_params *geo)
8353 {
8354 struct intel_super *super = st->sb;
8355 struct imsm_update_takeover *u;
8356
8357 u = malloc(sizeof(struct imsm_update_takeover));
8358 if (u == NULL)
8359 return 1;
8360
8361 u->type = update_takeover;
8362 u->subarray = super->current_vol;
8363
8364 /* 10->0 transition */
8365 if (geo->level == 0)
8366 u->direction = R10_TO_R0;
8367
8368 /* 0->10 transition */
8369 if (geo->level == 10)
8370 u->direction = R0_TO_R10;
8371
8372 /* update metadata locally */
8373 imsm_update_metadata_locally(st, u,
8374 sizeof(struct imsm_update_takeover));
8375 /* and possibly remotely */
8376 if (st->update_tail)
8377 append_metadata_update(st, u,
8378 sizeof(struct imsm_update_takeover));
8379 else
8380 free(u);
8381
8382 return 0;
8383 }
8384
8385 static int warn_user_about_risk(void)
8386 {
8387 int rv = 0;
8388
8389 fprintf(stderr,
8390 "\nThis is an experimental feature. Data on the RAID volume(s) "
8391 "can be lost!!!\n\n"
8392 "To continue command execution please make sure that\n"
8393 "the grow process will not be interrupted. Use safe power\n"
8394 "supply to avoid unexpected system reboot. Make sure that\n"
8395 "reshaped container is not assembled automatically during\n"
8396 "system boot.\n"
8397 "If reshape is interrupted, assemble array manually\n"
8398 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
8399 "Assembly in scan mode is not possible in such case.\n"
8400 "Growing container with boot array is not possible.\n"
8401 "If boot array reshape is interrupted, whole file system\n"
8402 "can be lost.\n\n");
8403 rv = ask("Do you want to continue? ");
8404 fprintf(stderr, "\n");
8405
8406 return rv;
8407 }
8408
8409 static int imsm_reshape_super(struct supertype *st, long long size, int level,
8410 int layout, int chunksize, int raid_disks,
8411 int delta_disks, char *backup, char *dev,
8412 int verbose)
8413 {
8414 int ret_val = 1;
8415 struct geo_params geo;
8416
8417 dprintf("imsm: reshape_super called.\n");
8418
8419 memset(&geo, 0, sizeof(struct geo_params));
8420
8421 geo.dev_name = dev;
8422 geo.dev_id = st->devnum;
8423 geo.size = size;
8424 geo.level = level;
8425 geo.layout = layout;
8426 geo.chunksize = chunksize;
8427 geo.raid_disks = raid_disks;
8428 if (delta_disks != UnSet)
8429 geo.raid_disks += delta_disks;
8430
8431 dprintf("\tfor level : %i\n", geo.level);
8432 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
8433
8434 if (experimental() == 0)
8435 return ret_val;
8436
8437 if (st->container_dev == st->devnum) {
8438 /* On container level we can only increase number of devices. */
8439 dprintf("imsm: info: Container operation\n");
8440 int old_raid_disks = 0;
8441
8442 /* this warning will be removed when imsm checkpointing
8443 * will be implemented, and restoring from check-point
8444 * operation will be transparent for reboot process
8445 */
8446 if (warn_user_about_risk() == 0)
8447 return ret_val;
8448
8449 if (imsm_reshape_is_allowed_on_container(
8450 st, &geo, &old_raid_disks)) {
8451 struct imsm_update_reshape *u = NULL;
8452 int len;
8453
8454 len = imsm_create_metadata_update_for_reshape(
8455 st, &geo, old_raid_disks, &u);
8456
8457 if (len <= 0) {
8458 dprintf("imsm: Cannot prepare update\n");
8459 goto exit_imsm_reshape_super;
8460 }
8461
8462 ret_val = 0;
8463 /* update metadata locally */
8464 imsm_update_metadata_locally(st, u, len);
8465 /* and possibly remotely */
8466 if (st->update_tail)
8467 append_metadata_update(st, u, len);
8468 else
8469 free(u);
8470
8471 } else {
8472 fprintf(stderr, Name ": (imsm) Operation "
8473 "is not allowed on this container\n");
8474 }
8475 } else {
8476 /* On volume level we support following operations
8477 * - takeover: raid10 -> raid0; raid0 -> raid10
8478 * - chunk size migration
8479 * - migration: raid5 -> raid0; raid0 -> raid5
8480 */
8481 struct intel_super *super = st->sb;
8482 struct intel_dev *dev = super->devlist;
8483 int change, devnum;
8484 dprintf("imsm: info: Volume operation\n");
8485 /* find requested device */
8486 while (dev) {
8487 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
8488 if (devnum == geo.dev_id)
8489 break;
8490 dev = dev->next;
8491 }
8492 if (dev == NULL) {
8493 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
8494 geo.dev_name, geo.dev_id);
8495 goto exit_imsm_reshape_super;
8496 }
8497 super->current_vol = dev->index;
8498 change = imsm_analyze_change(st, &geo);
8499 switch (change) {
8500 case CH_TAKEOVER:
8501 ret_val = imsm_takeover(st, &geo);
8502 break;
8503 case CH_MIGRATION: {
8504 struct imsm_update_reshape_migration *u = NULL;
8505 int len =
8506 imsm_create_metadata_update_for_migration(
8507 st, &geo, &u);
8508 if (len < 1) {
8509 dprintf("imsm: "
8510 "Cannot prepare update\n");
8511 break;
8512 }
8513 ret_val = 0;
8514 /* update metadata locally */
8515 imsm_update_metadata_locally(st, u, len);
8516 /* and possibly remotely */
8517 if (st->update_tail)
8518 append_metadata_update(st, u, len);
8519 else
8520 free(u);
8521 }
8522 break;
8523 default:
8524 ret_val = 1;
8525 }
8526 }
8527
8528 exit_imsm_reshape_super:
8529 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
8530 return ret_val;
8531 }
8532
8533 /*******************************************************************************
8534 * Function: wait_for_reshape_imsm
8535 * Description: Function writes new sync_max value and waits until
8536 * reshape process reach new position
8537 * Parameters:
8538 * sra : general array info
8539 * to_complete : new sync_max position
8540 * ndata : number of disks in new array's layout
8541 * Returns:
8542 * 0 : success,
8543 * 1 : there is no reshape in progress,
8544 * -1 : fail
8545 ******************************************************************************/
8546 int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete,
8547 int ndata)
8548 {
8549 int fd = sysfs_get_fd(sra, NULL, "reshape_position");
8550 unsigned long long completed;
8551
8552 struct timeval timeout;
8553
8554 if (fd < 0)
8555 return 1;
8556
8557 sysfs_fd_get_ll(fd, &completed);
8558
8559 if (to_complete == 0) {/* reshape till the end of array */
8560 sysfs_set_str(sra, NULL, "sync_max", "max");
8561 to_complete = MaxSector;
8562 } else {
8563 if (completed > to_complete)
8564 return -1;
8565 if (sysfs_set_num(sra, NULL, "sync_max",
8566 to_complete / ndata) != 0) {
8567 close(fd);
8568 return -1;
8569 }
8570 }
8571
8572 /* FIXME should not need a timeout at all */
8573 timeout.tv_sec = 30;
8574 timeout.tv_usec = 0;
8575 do {
8576 char action[20];
8577 fd_set rfds;
8578 FD_ZERO(&rfds);
8579 FD_SET(fd, &rfds);
8580 select(fd+1, NULL, NULL, &rfds, &timeout);
8581 if (sysfs_fd_get_ll(fd, &completed) < 0) {
8582 close(fd);
8583 return 1;
8584 }
8585 if (sysfs_get_str(sra, NULL, "sync_action",
8586 action, 20) > 0 &&
8587 strncmp(action, "reshape", 7) != 0)
8588 break;
8589 } while (completed < to_complete);
8590 close(fd);
8591 return 0;
8592
8593 }
8594
8595 /*******************************************************************************
8596 * Function: check_degradation_change
8597 * Description: Check that array hasn't become failed.
8598 * Parameters:
8599 * info : for sysfs access
8600 * sources : source disks descriptors
8601 * degraded: previous degradation level
8602 * Returns:
8603 * degradation level
8604 ******************************************************************************/
8605 int check_degradation_change(struct mdinfo *info,
8606 int *sources,
8607 int degraded)
8608 {
8609 unsigned long long new_degraded;
8610 sysfs_get_ll(info, NULL, "degraded", &new_degraded);
8611 if (new_degraded != (unsigned long long)degraded) {
8612 /* check each device to ensure it is still working */
8613 struct mdinfo *sd;
8614 new_degraded = 0;
8615 for (sd = info->devs ; sd ; sd = sd->next) {
8616 if (sd->disk.state & (1<<MD_DISK_FAULTY))
8617 continue;
8618 if (sd->disk.state & (1<<MD_DISK_SYNC)) {
8619 char sbuf[20];
8620 if (sysfs_get_str(info,
8621 sd, "state", sbuf, 20) < 0 ||
8622 strstr(sbuf, "faulty") ||
8623 strstr(sbuf, "in_sync") == NULL) {
8624 /* this device is dead */
8625 sd->disk.state = (1<<MD_DISK_FAULTY);
8626 if (sd->disk.raid_disk >= 0 &&
8627 sources[sd->disk.raid_disk] >= 0) {
8628 close(sources[
8629 sd->disk.raid_disk]);
8630 sources[sd->disk.raid_disk] =
8631 -1;
8632 }
8633 new_degraded++;
8634 }
8635 }
8636 }
8637 }
8638
8639 return new_degraded;
8640 }
8641
8642 /*******************************************************************************
8643 * Function: imsm_manage_reshape
8644 * Description: Function finds array under reshape and it manages reshape
8645 * process. It creates stripes backups (if required) and sets
8646 * checheckpoits.
8647 * Parameters:
8648 * afd : Backup handle (nattive) - not used
8649 * sra : general array info
8650 * reshape : reshape parameters - not used
8651 * st : supertype structure
8652 * blocks : size of critical section [blocks]
8653 * fds : table of source device descriptor
8654 * offsets : start of array (offest per devices)
8655 * dests : not used
8656 * destfd : table of destination device descriptor
8657 * destoffsets : table of destination offsets (per device)
8658 * Returns:
8659 * 1 : success, reshape is done
8660 * 0 : fail
8661 ******************************************************************************/
8662 static int imsm_manage_reshape(
8663 int afd, struct mdinfo *sra, struct reshape *reshape,
8664 struct supertype *st, unsigned long backup_blocks,
8665 int *fds, unsigned long long *offsets,
8666 int dests, int *destfd, unsigned long long *destoffsets)
8667 {
8668 int ret_val = 0;
8669 struct intel_super *super = st->sb;
8670 struct intel_dev *dv = NULL;
8671 struct imsm_dev *dev = NULL;
8672 struct imsm_map *map_src, *map_dest;
8673 int migr_vol_qan = 0;
8674 int ndata, odata; /* [bytes] */
8675 int chunk; /* [bytes] */
8676 struct migr_record *migr_rec;
8677 char *buf = NULL;
8678 unsigned int buf_size; /* [bytes] */
8679 unsigned long long max_position; /* array size [bytes] */
8680 unsigned long long next_step; /* [blocks]/[bytes] */
8681 unsigned long long old_data_stripe_length;
8682 unsigned long long new_data_stripe_length;
8683 unsigned long long start_src; /* [bytes] */
8684 unsigned long long start; /* [bytes] */
8685 unsigned long long start_buf_shift; /* [bytes] */
8686 int degraded = 0;
8687
8688 if (!fds || !offsets || !destfd || !destoffsets || !sra)
8689 goto abort;
8690
8691 /* Find volume during the reshape */
8692 for (dv = super->devlist; dv; dv = dv->next) {
8693 if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
8694 && dv->dev->vol.migr_state == 1) {
8695 dev = dv->dev;
8696 migr_vol_qan++;
8697 }
8698 }
8699 /* Only one volume can migrate at the same time */
8700 if (migr_vol_qan != 1) {
8701 fprintf(stderr, Name " : %s", migr_vol_qan ?
8702 "Number of migrating volumes greater than 1\n" :
8703 "There is no volume during migrationg\n");
8704 goto abort;
8705 }
8706
8707 map_src = get_imsm_map(dev, 1);
8708 if (map_src == NULL)
8709 goto abort;
8710 map_dest = get_imsm_map(dev, 0);
8711
8712 ndata = imsm_num_data_members(dev, 0);
8713 odata = imsm_num_data_members(dev, 1);
8714
8715 chunk = map_src->blocks_per_strip * 512;
8716 old_data_stripe_length = odata * chunk;
8717
8718 migr_rec = super->migr_rec;
8719
8720 /* [bytes] */
8721 sra->new_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
8722 sra->new_level = map_dest->raid_level;
8723 new_data_stripe_length = sra->new_chunk * ndata;
8724
8725 /* initialize migration record for start condition */
8726 if (sra->reshape_progress == 0)
8727 init_migr_record_imsm(st, dev, sra);
8728
8729 /* size for data */
8730 buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
8731 /* extend buffer size for parity disk */
8732 buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
8733 /* add space for stripe aligment */
8734 buf_size += old_data_stripe_length;
8735 if (posix_memalign((void **)&buf, 4096, buf_size)) {
8736 dprintf("imsm: Cannot allocate checpoint buffer\n");
8737 goto abort;
8738 }
8739
8740 max_position =
8741 __le32_to_cpu(migr_rec->post_migr_vol_cap) +
8742 ((unsigned long long)__le32_to_cpu(
8743 migr_rec->post_migr_vol_cap_hi) << 32);
8744
8745 while (__le32_to_cpu(migr_rec->curr_migr_unit) <
8746 __le32_to_cpu(migr_rec->num_migr_units)) {
8747 /* current reshape position [blocks] */
8748 unsigned long long current_position =
8749 __le32_to_cpu(migr_rec->blocks_per_unit)
8750 * __le32_to_cpu(migr_rec->curr_migr_unit);
8751 unsigned long long border;
8752
8753 /* Check that array hasn't become failed.
8754 */
8755 degraded = check_degradation_change(sra, fds, degraded);
8756 if (degraded > 1) {
8757 dprintf("imsm: Abort reshape due to degradation"
8758 " level (%i)\n", degraded);
8759 goto abort;
8760 }
8761
8762 next_step = __le32_to_cpu(migr_rec->blocks_per_unit);
8763
8764 if ((current_position + next_step) > max_position)
8765 next_step = max_position - current_position;
8766
8767 start = (map_src->pba_of_lba0 + dev->reserved_blocks +
8768 current_position) * 512;
8769
8770 /* allign reading start to old geometry */
8771 start_buf_shift = start % old_data_stripe_length;
8772 start_src = start - start_buf_shift;
8773
8774 border = (start_src / odata) - (start / ndata);
8775 border /= 512;
8776 if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) {
8777 /* save critical stripes to buf
8778 * start - start address of current unit
8779 * to backup [bytes]
8780 * start_src - start address of current unit
8781 * to backup alligned to source array
8782 * [bytes]
8783 */
8784 unsigned long long next_step_filler = 0;
8785 unsigned long long copy_length = next_step * 512;
8786
8787 /* allign copy area length to stripe in old geometry */
8788 next_step_filler = ((copy_length + start_buf_shift)
8789 % old_data_stripe_length);
8790 if (next_step_filler)
8791 next_step_filler = (old_data_stripe_length
8792 - next_step_filler);
8793 dprintf("save_stripes() parameters: start = %llu,"
8794 "\tstart_src = %llu,\tnext_step*512 = %llu,"
8795 "\tstart_in_buf_shift = %llu,"
8796 "\tnext_step_filler = %llu\n",
8797 start, start_src, copy_length,
8798 start_buf_shift, next_step_filler);
8799
8800 if (save_stripes(fds, offsets, map_src->num_members,
8801 chunk, sra->array.level,
8802 sra->array.layout, 0, NULL, start_src,
8803 copy_length +
8804 next_step_filler + start_buf_shift,
8805 buf)) {
8806 dprintf("imsm: Cannot save stripes"
8807 " to buffer\n");
8808 goto abort;
8809 }
8810 /* Convert data to destination format and store it
8811 * in backup general migration area
8812 */
8813 if (save_backup_imsm(st, dev, sra,
8814 buf + start_buf_shift,
8815 ndata, copy_length)) {
8816 dprintf("imsm: Cannot save stripes to "
8817 "target devices\n");
8818 goto abort;
8819 }
8820 if (save_checkpoint_imsm(st, sra,
8821 UNIT_SRC_IN_CP_AREA)) {
8822 dprintf("imsm: Cannot write checkpoint to "
8823 "migration record (UNIT_SRC_IN_CP_AREA)\n");
8824 goto abort;
8825 }
8826 /* decrease backup_blocks */
8827 if (backup_blocks > (unsigned long)next_step)
8828 backup_blocks -= next_step;
8829 else
8830 backup_blocks = 0;
8831 }
8832 /* When data backed up, checkpoint stored,
8833 * kick the kernel to reshape unit of data
8834 */
8835 next_step = next_step + sra->reshape_progress;
8836 sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
8837 sysfs_set_num(sra, NULL, "suspend_hi", next_step);
8838
8839 /* wait until reshape finish */
8840 if (wait_for_reshape_imsm(sra, next_step, ndata) < 0) {
8841 dprintf("wait_for_reshape_imsm returned error!\n");
8842 goto abort;
8843 }
8844
8845 sra->reshape_progress = next_step;
8846
8847 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL)) {
8848 dprintf("imsm: Cannot write checkpoint to "
8849 "migration record (UNIT_SRC_NORMAL)\n");
8850 goto abort;
8851 }
8852
8853 }
8854
8855 /* return '1' if done */
8856 ret_val = 1;
8857 abort:
8858 free(buf);
8859 abort_reshape(sra);
8860
8861 return ret_val;
8862 }
8863 #endif /* MDASSEMBLE */
8864
8865 struct superswitch super_imsm = {
8866 #ifndef MDASSEMBLE
8867 .examine_super = examine_super_imsm,
8868 .brief_examine_super = brief_examine_super_imsm,
8869 .brief_examine_subarrays = brief_examine_subarrays_imsm,
8870 .export_examine_super = export_examine_super_imsm,
8871 .detail_super = detail_super_imsm,
8872 .brief_detail_super = brief_detail_super_imsm,
8873 .write_init_super = write_init_super_imsm,
8874 .validate_geometry = validate_geometry_imsm,
8875 .add_to_super = add_to_super_imsm,
8876 .remove_from_super = remove_from_super_imsm,
8877 .detail_platform = detail_platform_imsm,
8878 .kill_subarray = kill_subarray_imsm,
8879 .update_subarray = update_subarray_imsm,
8880 .load_container = load_container_imsm,
8881 .default_geometry = default_geometry_imsm,
8882 .get_disk_controller_domain = imsm_get_disk_controller_domain,
8883 .reshape_super = imsm_reshape_super,
8884 .manage_reshape = imsm_manage_reshape,
8885 #endif
8886 .match_home = match_home_imsm,
8887 .uuid_from_super= uuid_from_super_imsm,
8888 .getinfo_super = getinfo_super_imsm,
8889 .getinfo_super_disks = getinfo_super_disks_imsm,
8890 .update_super = update_super_imsm,
8891
8892 .avail_size = avail_size_imsm,
8893 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
8894
8895 .compare_super = compare_super_imsm,
8896
8897 .load_super = load_super_imsm,
8898 .init_super = init_super_imsm,
8899 .store_super = store_super_imsm,
8900 .free_super = free_super_imsm,
8901 .match_metadata_desc = match_metadata_desc_imsm,
8902 .container_content = container_content_imsm,
8903
8904 .recover_backup = recover_backup_imsm,
8905
8906 .external = 1,
8907 .name = "imsm",
8908
8909 #ifndef MDASSEMBLE
8910 /* for mdmon */
8911 .open_new = imsm_open_new,
8912 .set_array_state= imsm_set_array_state,
8913 .set_disk = imsm_set_disk,
8914 .sync_metadata = imsm_sync_metadata,
8915 .activate_spare = imsm_activate_spare,
8916 .process_update = imsm_process_update,
8917 .prepare_update = imsm_prepare_update,
8918 #endif /* MDASSEMBLE */
8919 };